예제 #1
0
파일: db.py 프로젝트: donplei/fantasyDB
class FantasyDB(object):
    def __init__(self, dbname='SOS.db', sheet_name='', drop=False):
        self.drive = Drive(sheet_name)
        self.con = sqlite3.connect(dbname)
        self.con.row_factory = sqlite3.Row

        class Median:
            def __init__(self):
                self.values = []

            def step(self, value):
                self.values.append(value)

            def finalize(self):
                count = len(self.values)
                values = sorted(self.values)
                if count % 2 == 1:
                    return values[math.ceil(count / 2)]
                else:
                    return (values[int(count / 2) - 1] +
                            values[int(count / 2)]) / 2

        self.con.create_aggregate("median", 1, Median)

        self.c = self.con.cursor()
        self.diff_table = False
        self.median_table = False

        self.cats_conf = OrderedDict({
            "R": {
                "precision": 1,
                "close_limit": 3,
                "batting": True,
                "real": True
            }
        })
        self.cats_conf["HR"] = {
                "precision": 1,
                "close_limit": 1,
                "batting": True,
                "real": True
        }
        self.cats_conf["RBI"] = {
                "precision": 1,
                "close_limit": 3,
                "batting": True,
                "real": True
        }
        self.cats_conf["SB"] = {
                "precision": 1,
                "close_limit": 1,
                "batting": True,
                "real": True
        }
        self.cats_conf["H"]=  {
                "precision": 0,
                "batting": True,
                "real": False
        }
        self.cats_conf["AB"] = {
                "precision": 0,
                "batting": True,
                "real": False
        }
        self.cats_conf["AVG"] = {
                "precision": 3,
                "close_limit": 0.020,
                "batting": True,
                "real": True,
                "expr": "ifnull(1.0 * $(H) / $(AB), 0)",
        }
        self.cats_conf["W"] = {
                "precision": 1,
                "close_limit": 1,
                "batting": False,
                "real": True
        }
        self.cats_conf["SV"] = {
                "precision": 1,
                "close_limit": 1,
                "batting": False,
                "real": True
        }
        self.cats_conf["K"] = {
                "precision": 0,
                "close_limit": 5,
                "batting": False,
                "real": True
        }
        self.cats_conf["IP"] = {
                "precision": 1,
                "batting": False,
                "real": False
        }
        self.cats_conf["ER"] = {
                "precision": 1,
                "batting": False,
                "real": False
        }
        self.cats_conf["WH"] = {
                "precision": 0,
                "batting": False,
                "real": False
        }
        self.cats_conf["ERA"] = {
                "precision": 2,
                "close_limit": 0.50,
                "batting": False,
                "real": True,
                "invert": True,
                "expr": "ifnull($(ER) * 9 / $(IP), 0)"
        }
        self.cats_conf["WHIP"] = {
                "precision": 2,
                "close_limit": 0.10,
                "batting": False,
                "real": True,
                "invert": True,
                "expr": "ifnull($(WH) / $(IP), 0)"
        }

        if drop:
            self.drop_db()
            self.create_db()

    def drop_db(self):
        self.c.executescript("""
            DROP TABLE IF EXISTS team;
            DROP TABLE IF EXISTS matchup;
            DROP TABLE IF EXISTS week_stats;
        """)

    def create_db(self):
        self.c.executescript("""
            CREATE TABLE team (team_id integer, team_key text,
                               label text, manager text);

            CREATE TABLE matchup (team_id1 integer, team_id2 integer,
                                  week integer);

            CREATE TABLE week_stats
                (team_id integer, opponent_id integer, week integer, R integer,
                 HR integer, RBI integer, SB integer, H integer, AB integer,
                 W integer, SV integer, K integer, IP real, ER integer,
                 WH integer);
        """)

    def create_tmp_stats_table(self, table_name):
        self.c.executescript("""
            DROP TABLE IF EXISTS %s;
            CREATE TABLE %s
                (team_id integer, opponent_id integer, week integer,
                 R integer, HR integer, RBI integer, SB integer, AVG real,
                 W integer, SV integer, K integer, ERA real, WHIP real);
        """ % (table_name, table_name))

    def create_tmp_matchup_table(self, table_name):
        self.c.executescript("""
            DROP TABLE IF EXISTS %s;
            CREATE TABLE %s (team_id integer, week integer,
                R integer, HR integer, RBI integer, SB integer, AVG real,
                W integer, SV integer, K integer, ERA real, WHIP real);
        """ % (table_name, table_name))

    def insert(self, sql, data=None):
        if data is None:
            self.c.execute(sql)
        else:
            for line in data:
                self.c.execute(sql, line)
        self.con.commit()

    def add_teams(self, teams):
        self.insert("INSERT INTO team VALUES (?, ?, ?, ?)", teams)

    def add_matchups(self, matchups):
        self.insert("INSERT INTO matchup VALUES(?, ?, ?)", matchups)

    def add_stats(self, stats):
        sql = "INSERT INTO week_stats ("
        sql += ", ".join(stats.keys())
        sql += ") VALUES("
        sql += ", ".join(["?"] * len(stats)) + ")"
        self.insert(sql, [tuple(stats.values())])

    def save_data(self, data, name):
        # Save to file
        f = open("export/" + name + ".txt", "w", encoding="utf-8")
        f.write(data)
        f.close()

        # Save to Google Drive
        if self.drive is not None:
            self.drive.login()
            self.drive.create_sheet(name, data)

    def nb_weeks(self):
        sql = "SELECT DISTINCT COUNT(*) FROM week_stats GROUP BY team_id"
        return self.c.execute(sql).fetchone()[0]

    def get_select(self, aggregate="AVG", real_only=True, use_alias=True,
                   use_round=True, as_string=True, table_alias=""):
        columns = OrderedDict({})
        if len(table_alias):
            table_alias += "."
        for cat, conf in self.cats_conf.items():
            if real_only and not conf["real"]:
                continue
            expr = "$(" + cat + ")" if "expr" not in conf else conf["expr"]
            col = expr.replace("$(", aggregate + "(" + table_alias)
            if use_round:
                col = self.apply_round(cat, col)
            if use_alias:
                col = self.apply_alias(cat, col)

            columns[cat] = col

        return ", ".join(columns.values()) if as_string else columns

    def apply_round(self, cat, expr):
        precision = str(self.cats_conf[cat]["precision"])
        return "round(" + expr + "," + precision + ")"

    def apply_alias(self, cat, expr):
        return expr + " AS " + cat

    def stats_avg_query(self):
        sql = "SELECT 'Moyenne' as Moyenne, "
        sql += self.get_select()
        sql += " FROM week_stats w"
        # sql += " WHERE team_id <> 6 OR week < 7 "
        return sql

    def stats_query(self):
        sql = "SELECT t.label AS Team, "
        sql += self.get_select()
        sql += " FROM week_stats w JOIN team t ON(w.team_id = t.team_id) "
        sql += "GROUP BY t.team_id"
        print(sql)
        return sql

    def stats_against_query(self):
        sql = "SELECT t.label AS Team, "
        sql += self.get_select()
        sql += " FROM week_stats w JOIN team t ON(w.opponent_id = t.team_id) "
        sql += "GROUP BY t.team_id"
        return sql

    def stats_diff_query(self):
        sql = "SELECT t.label AS Team"
        cols = self.get_select("AVG", table_alias="for", use_alias=False,
                               use_round=False, as_string=False)

        for cat, col in cols.items():
            col = col + " - " + col.replace("for.", "against.")
            col = self.apply_round(cat, col)
            if "invert" in self.cats_conf[cat]:
                col = "- (" + col + ")"
            col = self.apply_alias(cat, col)
            sql += ", " + col

        sql += """
            FROM week_stats against
            JOIN team t ON(against.opponent_id = t.team_id)
            JOIN week_stats for ON(t.team_id = for.team_id)
            GROUP BY t.team_id
        """
        return sql

    def create_diff_table(self):
        if self.diff_table:
            return

        select = "SELECT for.team_id, against.team_id, for.week"
        cols = self.get_select("AVG", table_alias="for", use_alias=False,
                               use_round=False, as_string=False)

        for cat, col in cols.items():
            col = col + " - " + col.replace("for.", "against.")
            col = self.apply_round(cat, col)
            if "invert" in self.cats_conf[cat]:
                col = "- (" + col + ")"
            col = self.apply_alias(cat, col)
            select += ", " + col

        select += """
            FROM week_stats against
            JOIN week_stats for ON(against.opponent_id = for.team_id
                 AND against.week = for.week)
            GROUP BY for.team_id, for.week
        """

        # Temp table creation
        table = "tmp_stats"
        self.create_tmp_stats_table(table)
        self.insert("INSERT INTO tmp_stats " + select)
        self.diff_table = True

    def create_median_table(self, win=True):
        # if self.median_table:
        #     return

        select = "SELECT team_id, week"
        cols = self.get_select("", use_alias=False,
                               use_round=False, as_string=False)

        for cat, expr in cols.items():
            col = "(SELECT COUNT(*) FROM week_stats "
            col += "WHERE team_id = w1.team_id AND week = w1.week "
            query = "SELECT median(%s) FROM week_stats w3 " % expr
            query += "WHERE w3.week = w1.week AND w1.team_id != w3.team_id"
            invert = "invert" in self.cats_conf[cat]
            lt = invert if win else not invert
            op = "<" if lt else ">"
            col += "AND %s %s (%s))" % (expr, op, query)
            select += ", " + col

        select += " FROM week_stats w1"
        select += " GROUP BY team_id, week"

        # Temp table creation
        table = "tmp_medium_stats_%s" % ("W" if win else "L")
        self.create_tmp_matchup_table(table)
        self.insert("INSERT INTO %s %s" % (table, select))
        self.median_table = True

    def stat_diff_win_query(self, win=True):
        self.create_diff_table()

        op = ">" if win else "<"
        sql = "SELECT t.label AS Team"
        for cat, conf in self.cats_conf.items():
            if not conf["real"]:
                continue
            expr = "AVG(" + cat + ")"
            query = "SELECT ifnull(" + self.apply_round(cat, expr) + ", '') FROM tmp_stats "
            query += "WHERE " + cat + " " + op + " 0 AND team_id = tmp.team_id"
            sql += self.apply_alias(cat, ", (" + query + ")")

        sql += " FROM tmp_stats AS tmp"
        sql += " JOIN team t ON(tmp.team_id = t.team_id)"
        sql += " GROUP BY tmp.team_id"

        return sql

    def close_diff_win_query(self, win=True):
        self.create_diff_table()

        sql = "SELECT t.label AS Team"
        for cat, conf in self.cats_conf.items():
            if not conf["real"]:
                continue

            if win:
                cond = cat + " > 0 AND " + cat + " <= " + str(conf["close_limit"])
            else:
                cond = cat + " >= -" + str(conf["close_limit"]) + " AND " + cat + " < 0"

            query = "SELECT COUNT(*) FROM tmp_stats"
            query += " WHERE " + cond + " AND team_id = tmp.team_id"
            sql += self.apply_alias(cat, ", (" + query + ")")

        sql += " FROM tmp_stats AS tmp"
        sql += " JOIN team t ON(tmp.team_id = t.team_id)"
        sql += " GROUP BY tmp.team_id"

        return sql

    def standings(self, sql, filename):
        self.c.execute(sql)

        results = "\t".join(map(lambda x: str(x[0]), self.c.description)) + "\n"

        for row in self.c.fetchall():
            results += "\t".join(map(str, row)) + "\n"

        self.c.execute(self.stats_avg_query())
        results += "\n" + "\t".join(map(str, self.c.fetchone())) + "\n"

        self.save_data(results, filename)

    def close_standings(self, filename):
        wins = self.con.execute(self.close_diff_win_query())
        losses = self.con.execute(self.close_diff_win_query(False))

        results = "\t".join(map(lambda x: str(x[0]), wins.description))
        results += "\tTotal\tPct\n"

        while True:
            win = wins.fetchone()
            loss = losses.fetchone()
            if win is None:
                break

            results += win[0]
            total_win = 0
            total_loss = 0
            for cat, W in OrderedDict(win).items():
                if cat in self.cats_conf:
                    results += '\t="%d-%d"' % (W, loss[cat])
                    total_win += W
                    total_loss += loss[cat]
            results += '\t="%d-%d"' % (total_win, total_loss)
            if total_win + total_loss == 0:
                results += '\n'
            else:
                results += '\t%.2f\n' % (total_win / (total_win + total_loss))

        results += "\nMargin"
        for cat, conf in self.cats_conf.items():
            if conf["real"]:
                results += "\t%s" % str(conf["close_limit"])

        self.save_data(results, filename)

    def median_standings(self, filename):
        self.create_median_table()
        self.create_median_table(False)

        sql = "SELECT t.label AS Team"
        wins = []
        losses = []

        for cat, conf in self.cats_conf.items():
            if not conf["real"]:
                continue

            win = "SUM(W.%s) / MAX(W.week)" % cat
            loss = "SUM(L.%s) / MAX(W.week)" % cat
            expr = "printf('=\"%%d-%%d\"', %s, %s)" % (win, loss)
            sql += ", " + self.apply_alias(cat, expr)
            wins.append(win)
            losses.append(loss)

        wins_expr = " + ".join(wins)
        losses_expr = " + ".join(losses)
        sql += ", printf('=\"%%d-%%d\"', %s, %s) AS Total" % (wins_expr, losses_expr)
        sql += ", round((%s) / (%s + %s + 0.0), 3) AS Pct" % (wins_expr, wins_expr, losses_expr)
        sql += " FROM tmp_medium_stats_W W JOIN team t USING (team_id)"
        sql += " JOIN tmp_medium_stats_L L USING (team_id)"
        sql += " GROUP BY W.team_id"
        sql += " ORDER BY Pct DESC"

        print(sql)
        self.c.execute(sql)
        results = "\t".join(map(lambda x: str(x[0]), self.c.description)) + "\n"

        for row in self.c.fetchall():
            results += "\t".join(map(str, row)) + "\n"

        self.save_data(results, filename)

    def value_standings(self, sql, filename):
        # Diff par rapport à la moyenne de la league
        avg = self.c.execute(self.stats_avg_query()).fetchone()
        results = self.con.execute(sql)
        min = {}
        max = {}
        data = []

        while(True):
            stats = results.fetchone()
            if stats is None:
                break
            stats = OrderedDict(stats)

            # Calculate min and max values
            for cat, value in stats.items():
                if cat not in min:
                    min[cat] = value
                elif value < min[cat]:
                    min[cat] = value
                if cat not in max:
                    max[cat] = value
                elif value > max[cat]:
                    max[cat] = value

            data.append(stats)

        # Second pass to calculate variations
        variations = []
        for stats in data:
            total = 0
            for cat, value in stats.items():
                if cat == "Team":
                    continue

                if value > avg[cat]:
                    value = 1 - (max[cat] - value) / (max[cat] - avg[cat])
                elif value < avg[cat]:
                    value = (value - min[cat]) / (avg[cat] - min[cat]) - 1
                else:
                    value = 0

                if cat in ["ERA", "WHIP"]:
                    value = 0 - value

                total += value
                stats[cat] = "%.2f" % value

            stats["total"] = "%.2f" % total
            variations.append(stats)

        results = "\t".join(map(lambda x: str(x[0]), results.description)) + "\tTotal\n"

        # Order by total value
        def getTotal(item):
            return float(item["total"])
        variations = sorted(variations, key=getTotal, reverse=True)

        for stats in variations:
            results += "\t".join(stats.values()) + "\n"

        self.save_data(results, filename)

    def calculate_stats_ratio(self):
        avg = self.calculate_row(self.c.execute(self.stats_avg_query()).fetchone(), False)
        R = avg["R"]
        del avg["R"]

        ratio = {}
        for cat, value in avg.items():
            ratio[cat] = R / value

        ratio["R"] = 1
        print(ratio)
        return ratio