Example #1
0
def fix_dup_gamestat(engine):
    """
    For unknown reason (or bug), if one squad has multiple stat for one game,
    find them and delete them.
    Usually this will not happen because SquadGameStat use both squad_id and
    game_id to identify itself.
    :param engine:
    :return: None
    """
    session = settings.create_session(engine)
    dup_game_id = session.query(SquadGameStat.game_id)\
        .group_by(SquadGameStat.game_id).having(func.count(SquadGameStat.game_id) > 2).all()
    print dup_game_id
    for gamestat_list in dup_game_id:
        game_id = gamestat_list[0]
        dup_records = session.query(
            SquadGameStat.id,
            SquadGameStat.squad_id).filter_by(game_id=game_id).all()
        print dup_records
        squad_list = []
        for dup_record in dup_records:
            record_id = dup_record[0]
            squad_id = dup_record[1]
            if squad_id not in squad_list:
                squad_list.append(squad_id)
            else:
                try:
                    session.query(SquadGameStat).filter_by(
                        id=record_id).delete()
                    print record_id, "deleted"
                except:
                    print record_id, "delete failed"

    session.close()
Example #2
0
def fix_dup_gamestat(engine):
    """
    For unknown reason (or bug), if one squad has multiple stat for one game,
    find them and delete them.
    Usually this will not happen because SquadGameStat use both squad_id and
    game_id to identify itself.
    :param engine:
    :return: None
    """
    session = settings.create_session(engine)
    dup_game_id = session.query(SquadGameStat.game_id)\
        .group_by(SquadGameStat.game_id).having(func.count(SquadGameStat.game_id) > 2).all()
    print dup_game_id
    for gamestat_list in dup_game_id:
        game_id = gamestat_list[0]
        dup_records = session.query(SquadGameStat.id, SquadGameStat.squad_id).filter_by(game_id=game_id).all()
        print dup_records
        squad_list = []
        for dup_record in dup_records:
            record_id = dup_record[0]
            squad_id = dup_record[1]
            if squad_id not in squad_list:
                squad_list.append(squad_id)
            else:
                try:
                    session.query(SquadGameStat).filter_by(id=record_id).delete()
                    print record_id, "deleted"
                except:
                    print record_id, "delete failed"

    session.close()
Example #3
0
    def test_player_season_stat(self):

        engine = settings.create_engine("men")
        session = settings.create_session(engine)

        squadmember_id = 84783

        player_season_stat_obj = session.query(PlayerSeasonStat).filter_by(squadmember_id=squadmember_id).first()

        self.assertIsNone(player_season_stat_obj)
Example #4
0
    def test_player_season_stat(self):

        engine = settings.create_engine("men")
        session = settings.create_session(engine)

        squadmember_id = 84783

        player_season_stat_obj = session.query(PlayerSeasonStat).filter_by(
            squadmember_id=squadmember_id).first()

        self.assertIsNone(player_season_stat_obj)
Example #5
0
def fix_game_with_no_date(engine):
    """
    Some game has no date (and other information) because of the interruption
    of the scrapping process. Give it a chance to scrape again.
    Some of them are caused by ncaa corrupted pages. No way to fix.
    :param engine:
    :return: None
    """
    session = settings.create_session(engine)
    games = session.query(Game).filter_by(date=None).order_by(asc(Game.id)).all()
    for game_record in games:
        game_parser(session, game_record)
    session.close()
Example #6
0
def fix_game_with_no_date(engine):
    """
    Some game has no date (and other information) because of the interruption
    of the scrapping process. Give it a chance to scrape again.
    Some of them are caused by ncaa corrupted pages. No way to fix.
    :param engine:
    :return: None
    """
    session = settings.create_session(engine)
    games = session.query(Game).filter_by(date=None).order_by(asc(
        Game.id)).all()
    for game_record in games:
        game_parser(session, game_record)
    session.close()
Example #7
0
def fix_dup_game_in_one_day(engine):
    """
    This happens rarely. ncaa posted the first game, later they found the data was wrong,
    so they post the second page (with a new assigned game_id) and took down the first page.
    Our database scrapped both of the games page but only need to keep one (the one with
    larger game_id).
    We will skip squad_id = 1 because that represents all non-ncaa squads
    :param engine:
    :return: None
    """
    session = settings.create_session(engine)
    dup_games = session.query(Game.id, Game.date, Game.winner_id).\
        filter(and_(Game.winner_id != 1, Game.loser_id != 1, Game.date != None)).\
        group_by(Game.date, Game.winner_id).having(func.count(Game.id) > 1).all()
    for game_info in dup_games:
        game_id = game_info[0]
        session.query(Game).filter_by(id=game_id).delete()
        print "Game: ", game_id, " deleted."
Example #8
0
def fix_dup_game_in_one_day(engine):
    """
    This happens rarely. ncaa posted the first game, later they found the data was wrong,
    so they post the second page (with a new assigned game_id) and took down the first page.
    Our database scrapped both of the games page but only need to keep one (the one with
    larger game_id).
    We will skip squad_id = 1 because that represents all non-ncaa squads
    :param engine:
    :return: None
    """
    session = settings.create_session(engine)
    dup_games = session.query(Game.id, Game.date, Game.winner_id).\
        filter(and_(Game.winner_id != 1, Game.loser_id != 1, Game.date != None)).\
        group_by(Game.date, Game.winner_id).having(func.count(Game.id) > 1).all()
    for game_info in dup_games:
        game_id = game_info[0]
        session.query(Game).filter_by(id=game_id).delete()
        print "Game: ", game_id, " deleted."
Example #9
0
def fix_dup_playerseasonstat(engine):
    """
    One squad member should only have one season_stat
    This should be a one time fix.
    :param engine:
    :return: None
    """
    session = settings.create_session(engine)
    distinct_member_id_list = session.query(distinct(PlayerSeasonStat.squadmember_id)).all()
    for member_id_result in distinct_member_id_list:
        member_id = member_id_result[0]
        # Query all duplicates in descending order
        dup_stats = session.query(PlayerSeasonStat).filter_by(squadmember_id=member_id)\
            .order_by(desc(PlayerSeasonStat.id)).all()
        # Delete all other dups except for the most recent one
        for one_stat in dup_stats[1:]:
            session.query(PlayerSeasonStat).filter_by(id=one_stat.id).delete()
        print "Cleaning dup for squad_member: ", str(member_id)
Example #10
0
def fix_dup_playerseasonstat(engine):
    """
    One squad member should only have one season_stat
    This should be a one time fix.
    :param engine:
    :return: None
    """
    session = settings.create_session(engine)
    distinct_member_id_list = session.query(
        distinct(PlayerSeasonStat.squadmember_id)).all()
    for member_id_result in distinct_member_id_list:
        member_id = member_id_result[0]
        # Query all duplicates in descending order
        dup_stats = session.query(PlayerSeasonStat).filter_by(squadmember_id=member_id)\
            .order_by(desc(PlayerSeasonStat.id)).all()
        # Delete all other dups except for the most recent one
        for one_stat in dup_stats[1:]:
            session.query(PlayerSeasonStat).filter_by(id=one_stat.id).delete()
        print "Cleaning dup for squad_member: ", str(member_id)
Example #11
0
def fix_only_one_gamestat(engine):
    """
    Some games might only have one stat record due to the broken scraping process.
    (But if the other team is a non-ncaa team, there is no need to fix)
    :param engine:
    :return: None
    """

    session = settings.create_session(engine)
    game_id_list = session.query(SquadGameStat.game_id)\
        .group_by(SquadGameStat.game_id).having(func.count(SquadGameStat.game_id) == 1).all()
    # Compare each game_id in table Schedule
    for game_id_result in game_id_list:
        game_id = game_id_result[0]
        schedules = session.query(Schedule).filter_by(game_id=game_id).all()
        if len(schedules) == 2:
            # Has two schedules for each team, need to be fixed
            game_record = session.query(Game).filter_by(id=game_id).first()
            print game_record
            game_stat_parser(session, game_record)
            print "added 1 game_stat"

    session.close()
Example #12
0
def fix_only_one_gamestat(engine):
    """
    Some games might only have one stat record due to the broken scraping process.
    (But if the other team is a non-ncaa team, there is no need to fix)
    :param engine:
    :return: None
    """

    session = settings.create_session(engine)
    game_id_list = session.query(SquadGameStat.game_id)\
        .group_by(SquadGameStat.game_id).having(func.count(SquadGameStat.game_id) == 1).all()
    # Compare each game_id in table Schedule
    for game_id_result in game_id_list:
        game_id = game_id_result[0]
        schedules = session.query(Schedule).filter_by(game_id=game_id).all()
        if len(schedules) == 2:
            # Has two schedules for each team, need to be fixed
            game_record = session.query(Game).filter_by(id=game_id).first()
            print game_record
            game_stat_parser(session, game_record)
            print "added 1 game_stat"

    session.close()