Exemple #1
0
def main(table_file, scores_file, out_file):
    '''To DO:
        1) Update usernames
        2) Insert simple models for benchmarking
    '''
    # username to user id
    username_id = read_json('usernames.json')
    table = pd.read_csv(table_file)
    scores = read_json(scores_file)

    points = table.Points.values
    mvps = table.MVP.values

    mvp_score = max([scores[u] for u in scores.keys()])
    for i, name in enumerate(table.team):
        score = scores[username_id[name]]
        points[i] = points[i] + score
        if score == mvp_score:
            mvps[i] += 1

    new_table = table.copy()
    new_table['Points'] = points
    new_table['MVP'] = mvps

    new_table = table.sort(['Points', 'MVP', 'Name'], ascending=[0, 0, 1])
    new_table.insert(0, 'Position',
                     np.arange(1, new_table.shape[0] + 1, dtype=int))

    new_table.to_csv(out_file, index=False)

    return new_table
def main(post_id):
    '''
    This function fetches the comments from a facebook post and stores them
    in a json file.
    '''
    logging.basicConfig(level=logging.INFO)

    # read token's file
    tokens_file = 'config/tokens.json'
    tokens = read_json(tokens_file)
    token = tokens['token']
    user_id = tokens['user_id']

    # set output directory
    config_file = 'config/config_playoff_pred.json'
    if path.isfile(config_file):
        out_dir = read_json(config_file)['output_directory']
    else:
        logging.warning('Configuration file not found. ',
                        'Save to working directory')
        out_dir = '.'

    # make graph
    graph = fb.GraphAPI(access_token=token, version=3.0)
    # graph id = user_id_post_id
    idd = user_id + '_' + post_id

    # Get the comments from a post.
    comments = graph.get_connections(id=idd, connection_name='comments')
    answers = comments['data']

    while 'next' in comments['paging']:
        comments = requests.get(comments['paging']['next']).json()
        answers.extend(comments['data'])

    logging.info('%d comments fetched', len(answers))

    # write comments to json file.
    write_json(path.join(out_dir, 'playoff_predictions_fb_comments.json'),
               answers)
    return
Exemple #3
0
def get_results(games_fb, day, season, team_mapping_file):
    '''
    Finds the results of the games of a round as ordered on the fb post.
    It returns the results as an numpy array.
    '''
    logger = logging.getLogger(__name__)
    # teams for round's games from the fb post. Convert lists to dataframe
    if isinstance(games_fb, list):
        games_fb = pd.DataFrame(games_fb, columns=['Home Team', 'Away Team'])

    # teams and games' results from web-page
    data = scrap_round_results(day, season)

    if games_fb.shape[0] != data.shape[0]:
        logger.warning('Number of games is inconsistent')

    mappednames = read_json(team_mapping_file)
    # first map teams in greek to official english names
    games_fb.replace(mappednames, inplace=True)

    # after converting the names of the teams, merge the two dataframes
    final = games_fb.merge(data,
                           how='inner',
                           left_on=['Home Team', 'Away Team'],
                           right_on=['Home Team', 'Away Team'])

    if pd.isna(final.values).any():
        logger.error('Nan values appeared after merging the DataFrames.')
        sys.exit('Exit')

    if final.shape[0] != games_fb.shape[0]:
        logger.error("Shape of 'final' variable is inconsistent (%d)" %
                     final.shape[0])
        logger.info("This is likely due to incorrect naming of teams in "
                    "FB post. Check the post for typos in teams' names")
        logger.debug(final)
        sys.exit('Exit')

    results = np.where(final['Home Score'] > final['Away Score'], 1, 2)
    results[final['Home Score'] == final['Away Score']] = 0
    # ignore from the score games that were determined by judges
    idx = ((final['Home Score'] == 20) & (final['Away Score'] == 0) |
           (final['Home Score'] == 0) & (final['Away Score'] == 20))
    results[idx] = 0

    if results.shape[0] != games_fb.shape[0]:
        logger.error("Shape of 'results' variable is inconsistent (%d)" %
                     results.shape[0])
        sys.exit('Exit')

    return results, final['Datetime'].dt.to_pydatetime()
Exemple #4
0
def main(post_id, results_file, out_file):
    '''TO DO: 
        1) check time-zones
    '''
    # configuration file
    config_file = 'config.json'
    configs = read_json(config_file)
    token = configs['token']
    user_id = configs['user_id']

#    # username to user id 
#    username_id = read_json('usernames.json')

    # read actual results
    results = read_results(results_file)
    
    dt_format = '%Y-%m-%dT%H:%M:%S+0000'

    # make graph
    graph = fb.GraphAPI(access_token=token, version=2.7)
    # graph id = user_id_post_id
    idd = user_id+'_'+post_id
    # get text of post
    post = graph.get_object(id=idd)
    message = post['message']
    end_time = re.search('\d{4,4}-\d{2,2}-\d{2,2} \d{2,2}:\d{2,2}:\d{2,2}', 
                         message)
    if end_time is None:
        print('Warning: Deadline timestamp not found in post.')
        deadline = datetime.now()
    else:
        deadline = datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S')


    # Get the comments from a post.
    comments = graph.get_connections(id=idd, connection_name='comments')
#    comments = comments['comments']['data']
#    comments = comments['data']

    users_dict = {}
    i = 0
    while True:
        for comment in comments['data']:
            user = comment['id']
            text = comment['message']
            time = datetime.strptime(comment['created_time'], dt_format)
            pred = np.array([int(s) for s in text if s.isdigit()])

            # chekc if number of prediction is correct
            if len(pred) != 8:
                print('Warning: Incorrect number of predictions for user %s' % user)
                score = 0
            # check if predictions are either 1 or 2
            elif not ((pred == 1) | (pred == 2)).all():
                print('Warning: Incorrect prediction for user %s' % user)
                score = 0
            # check comment is in time
            elif time > deadline:
                print('Warning: User %s prediction off time' % user)
                print(time, deadline)
                score = 0
            else:
                score = np.sum(pred == results)

            users_dict[user] = int(score)

        if 'next' in comments['paging']:
            comments = requests.get(comments['paging']['next']).json()
        else:
            break

    write_json(out_file, users_dict)

    return users_dict
def main(comments):
    '''
    Function that extracts the username and the predicted teams of the playoffs
    from the comments (of a facebok post) and converts them to a dataframe,
    which is saved to the disk.
    '''
    logging.basicConfig(level=logging.INFO)

    # read config file
    config_file = 'config/config_playoff_pred.json'
    config = read_json(config_file)
    n_pred_teams = config['n_pred_teams']
    out_dir = config['output_directory']
    team_mappings_file = config['team_names_mapping_file']
    teams_dict = read_json(team_mappings_file)
    deadline = datetime.strptime(config['deadline'], '%Y-%m-%d %H:%M:%S')
    deadline = convert_timezone(deadline, from_tz='Europe/Athens', to_tz='UTC')
    fb_format = '%Y-%m-%dT%H:%M:%S+0000'

    teams = list(teams_dict.keys())
    regexps = [
        '(?e)(%s){e<=2}' %
        team if team not in ['CSKA', 'Zenit', 'Real'] else '(?e)(%s){e<1}' %
        team for team in teams
    ]
    header = ['team%d' % i for i in range(1, n_pred_teams + 1)]

    all_predictions = []
    usernames = []
    for comment in comments:
        comment_id = comment['id']
        text = comment['message']
        time = datetime.strptime(comment['created_time'], fb_format)
        # make the time variable datetime aware
        time = time.replace(tzinfo=pytz.UTC)
        # check if comment is off time.
        if time > deadline:
            logging.warning('Comment (id %s) is off time: "%s"', comment_id,
                            text)

        text_copy = text.replace('\n', '')
        pred_teams = []
        for team, rxp in zip(teams, regexps):
            r = regex.search(rxp, text, regex.IGNORECASE)
            if r is not None:
                match = text[r.start():r.end()]
                # print(team, match)
                pred_teams.append(teams_dict[team])
                text_copy = text_copy.replace(match, '').strip()
        username = (text_copy.replace('-', '').replace(',',
                                                       '').replace('.',
                                                                   '').strip())

        # if text.startswith('Malisiovas Andreas'):
        #     embed()
        if len(pred_teams) == n_pred_teams:
            all_predictions.append(dict(zip(header, sorted(pred_teams))))
            usernames.append(username)
        else:
            logging.warning('Comment (id %s) is not valid: "%s"', comment_id,
                            text)

    # convert to dataframe
    df = pd.DataFrame(all_predictions, index=usernames)
    df.index.name = 'username'
    # save to file
    df.to_csv(os.path.join(out_dir, 'playoffs_predictions.csv'), index=True)
    return
        # if text.startswith('Malisiovas Andreas'):
        #     embed()
        if len(pred_teams) == n_pred_teams:
            all_predictions.append(dict(zip(header, sorted(pred_teams))))
            usernames.append(username)
        else:
            logging.warning('Comment (id %s) is not valid: "%s"', comment_id,
                            text)

    # convert to dataframe
    df = pd.DataFrame(all_predictions, index=usernames)
    df.index.name = 'username'
    # save to file
    df.to_csv(os.path.join(out_dir, 'playoffs_predictions.csv'), index=True)
    return


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-f',
                        '--file',
                        type=str,
                        required=argparse.FileType('r'),
                        help="the file with facebook comments")
    args = parser.parse_args()

    # read comments
    comments = read_json(args.file)

    main(comments)
Exemple #7
0
def main(day, sort_by_final=0):
    '''To DO:
        1) Insert simple models for benchmarking
    '''
    logging.basicConfig(level=logging.INFO)

    if day < 1:
        logging.error('Round must be non-negative integer')
        sys.exit('Exit')

    config_file = 'config/config.json'
    configs = read_json(config_file)
    out_dir = configs['output_directory']
    fuzzy_thr = configs['fuzzy_threshold']
    season = configs['season']
    #
    if ('playoff_predictions_file' in configs.keys()
            and os.path.isfile(configs['playoff_predictions_file'])):
        playoff_pred_file = configs['playoff_predictions_file']
        penalties = (configs['playoff_predict_penalties']
                     if 'playoff_predict_penalties' in configs else {})
        playoffs_scores = get_playoffs_scores(playoff_pred_file,
                                              season,
                                              day,
                                              penalties=penalties,
                                              n_playoff_teams=8)
    else:
        logging.warning('Playoff predictions file not available.')

    table_file = os.path.join(out_dir, 'table_day_%d.csv' % (day - 1))
    scores_file = os.path.join(out_dir, 'predictions_day_%d.csv' % day)
    out_file = os.path.join(out_dir, 'table_day_%d.csv' % day)

    scores = pd.read_csv(scores_file)
    if day == 1:
        # set initial table, all players have zero points, mvps, etc.
        table = pd.DataFrame(np.arange(1, scores.shape[0] + 1, dtype=int),
                             columns=['Position'])
        table['Name'] = scores['Name'].values
        table['MVP'] = np.zeros((scores.shape[0], 1), dtype=int)
        table['Points'] = np.zeros((scores.shape[0], 1), dtype=int)
        table['Missed Rounds'] = np.zeros((scores.shape[0], 1), dtype=int)
    else:
        # read the table of the previous round
        table = pd.read_csv(table_file)

        # fix usernames via fuzzy search
        new_names = fuzzy_fix_names(scores['Name'].values,
                                    table['Name'].values,
                                    threshold=fuzzy_thr)
        scores['Name'] = new_names

    # check if there is a new user, give the lowest score.
    min_points = np.min(table.Points.values)

    # find mvp score of the round
    mvp_score = np.nanmax(scores['Score'].values)

    # find min score of the round
    min_score = np.nanmin(scores['Score'].values)

    # merge datasets
    df_merged = table.merge(scores,
                            how='outer',
                            left_on='Name',
                            right_on='Name')
    df_new = df_merged.copy()

    # new players
    jj = np.isnan(df_merged['Points'])
    if any(jj):
        df_new.loc[jj, 'Points'] = min_points
        df_new.loc[jj, 'MVP'] = 0
        df_new.loc[jj, 'Missed Rounds'] = day - 1
        for name in df_new['Name'][jj].values:
            logging.info('%s is a new player', name)

    # did not play this round
    ii = np.isnan(df_merged['Score'])
    dnp = []
    if any(ii):
        df_new.loc[ii, 'Score'] = min_score
        df_new.loc[ii, 'Missed Rounds'] += 1
        for name in df_new['Name'][ii].values:
            dnp.append(name)
            logging.warning('(DNP): %s did not play this round' % name)

    # check if user has missed more than four rounds.
    mm = df_new['Missed Rounds'] >= 4
    disq = []
    if any(mm):
        for name in df_new['Name'][mm].values:
            logging.warning(
                '(DIS): User %s has missed four rounds and '
                'is being disqualified', name)
            df_new.drop(df_new[df_new['Name'] == name].index, inplace=True)
            disq.append(name)

    # update points
    df_new['Points'] += df_new['Score'].astype(int)
    # update MVP
    df_new.loc[df_new['Score'] == mvp_score, 'MVP'] += 1
    # convert colums to int type.
    df_new[['Score', 'MVP', 'Points', 'Missed Rounds'
            ]] = df_new[['Score', 'MVP', 'Points',
                         'Missed Rounds']].astype(int)

    new_table = df_new[['Name', 'Score', 'Points', 'MVP',
                        'Missed Rounds']].copy()
    new_table.rename(columns={'Score': 'Round Score'}, inplace=True)
    # merge the score table with the playoff table
    new_table = new_table.merge(playoffs_scores,
                                how='left',
                                left_on='Name',
                                right_index=True)
    new_table['Final_Score'] = new_table['Points'] + new_table['Playoff_Score']

    if sort_by_final == 1:
        # sort by final score (desc), points (desc), MVP (desc)
        # and finally by Name (asc).
        sort_list = ['Final_Score', 'Points', 'MVP', 'Name']
        asc_list = [False, False, False, True]
        new_table = new_table.sort_values(by=sort_list, ascending=asc_list)
        new_table.insert(0, 'Position',
                         np.arange(1, new_table.shape[0] + 1, dtype=int))
    else:
        # sort by points (desc), then by MVP (desc) and finally by name (asc)
        sort_list = ['Points', 'MVP', 'Name']
        asc_list = [False, False, True]
        new_table = new_table.sort_values(by=sort_list, ascending=asc_list)
        new_table.insert(0, 'Position',
                         np.arange(1, new_table.shape[0] + 1, dtype=int))
        # add the final rank
        ii = np.lexsort(
            (new_table['Position'].values, -new_table['Final_Score'].values))
        new_table['Final_Rank'] = ii.argsort() + 1

    if new_table['Playoff_Score'].isna().any():
        logging.warning('Users unknown')

    logging.debug(new_table)

    metadata_file = os.path.join(out_dir, 'metadata_day_%d.json' % day)
    metadata = read_json(metadata_file)
    metadata.update({'dnp': dnp, 'disq': disq})
    write_json(metadata_file, metadata)

    new_table.to_csv(out_file, index=False)

    return new_table
Exemple #8
0
def main(post_id, results_file, nday):
    '''
    '''
    logging.basicConfig(level=logging.INFO)

    # tokens file
    tokens_file = 'config/tokens.json'
    tokens = read_json(tokens_file)
    token = tokens['token']
    user_id = tokens['user_id']

    # configuration file
    config_file = 'config/config.json'
    configs = read_json(config_file)
    n_games = configs['n_games']
    season = configs['season']
    dt_format = configs['dt_format']
    pattern = configs['dt_pattern']
    out_dir = configs['output_directory']
    team_mapping_file = configs['team_names_mapping_file']

    fb_format = '%Y-%m-%dT%H:%M:%S+0000'

    # make graph
    graph = fb.GraphAPI(access_token=token, version=3.0)
    # graph id = user_id_post_id
    idd = user_id + '_' + post_id
    # get text of post
    post = graph.get_object(id=idd)
    message = post['message']
    post_time = datetime.strptime(post['created_time'], fb_format)

    # fetch actual results from the web -- restrictions apply
    out_results_file = os.path.join(out_dir, 'results_day_%d.csv' % nday)
    repat = re.compile(pattern)
    games_times = re.findall(pattern + r'[^\d\n]*', message)
    games = [[u.strip() for u in repat.sub('', game).split('-')]
             for game in games_times]

    fb_post_games_df = pd.DataFrame(games, columns=['Home', 'Away'])
    if len(games) != n_games:
        # check if the number of games identified in the post is correct.
        logging.error('Number of games identified on FB post is incorrect')
        sys.exit('Exit')
    else:
        try:
            # fetch results and game-times from the web
            results, game_times_utc = get_results(games, nday, season,
                                                  team_mapping_file)
        except (requests.exceptions.ConnectionError, AssertionError) as e:
            logging.error(e)
            # if there is a connection error, read results from file.
            logging.warning('Unable to fetch results from the internet. '
                            'Try from flat file.')
            # if file does not exist, exit program and ask for file of results.
            if not os.path.isfile(results_file):
                logging.error('Provide correct file with the results.')
                sys.exit('Exit')
            # read actual results
            results = read_results(results_file)

            logging.info('Get game-times from the FB post')
            # extract game times from the post
            game_times_utc = get_game_times_from_post(pattern, message,
                                                      dt_format, post_time,
                                                      n_games)
    # write results to csv file with names of teams
    fb_post_games_df['Result'] = results
    fb_post_games_df.to_csv(out_results_file, index=False)

    logging.info('The results are: {}'.format(results))

    if results.shape[0] != n_games:
        logging.error('Results not valid')
        sys.exit('Exit')

    # Get the comments from a post.
    comments = graph.get_connections(id=idd, connection_name='comments')

    score_dict = {}
    predict_dict = {}
    offtime = {}
    while True:
        for comment in comments['data']:
            comment_id = comment['id']
            text = comment['message']
            time = datetime.strptime(comment['created_time'], fb_format)
            # make the time variable datetime aware
            time = time.replace(tzinfo=pytz.UTC)

            is_valid, user, pred = valid_post(text,
                                              comment_id=comment_id,
                                              n_games=n_games)

            if is_valid is False:
                logging.debug('Comment id %s not valid' % comment_id)
                continue

            # check comment is prior game-times.
            ii = time < game_times_utc
            if not ii.all():
                offtime.update({user: int(np.sum(~ii))})
                logging.warning(
                    '%d Prediction(s) off time for user %s in '
                    'comment (id %s): %s', np.sum(~ii), user, comment_id, text)
            # if comment after any game started, give 0 points for this game
            pred[~ii] = 0
            jj = results > 0  # include only finished games.
            # E.g. if a player played less than 9 games, they will get 0, which
            # will match with an unfinished game.
            score = np.sum(pred[ii & jj] == results[ii & jj])

            score_dict[user] = int(score)
            predict_dict[user] = pred

        if 'next' in comments['paging']:
            comments = requests.get(comments['paging']['next']).json()
        else:
            break

    # make dataframe from users' score dictionary
    df_scores = pd.DataFrame.from_dict(score_dict,
                                       orient='index',
                                       columns=['Score'])
    # make dataframe from users' predictions dictionary
    df_pred = pd.DataFrame.from_dict(predict_dict,
                                     orient='index',
                                     columns=['-'.join(u) for u in games])

    # merge the two dataframe based on users' names.
    df = df_scores.merge(df_pred, left_index=True, right_index=True)
    # sort by score (descending) and by name (descending)
    df.rename_axis('Name', axis=0, inplace=True)
    df.sort_values(['Score', 'Name'], ascending=[False, True], inplace=True)

    metadata = {
        'offtime': offtime,
        'mvps': list(df[df['Score'] == df['Score'].max()].index),
        'mvp_score': int(df['Score'].max()),
        'dnp_score': int(df['Score'].min())
    }
    metadata_file = os.path.join(out_dir, 'metadata_day_%d.json' % nday)
    write_json(metadata_file, metadata)

    # save dataframe
    df.to_csv(os.path.join(out_dir, 'predictions_day_%d.csv' % nday),
              sep=',',
              index=True,
              encoding='utf-8')

    return
def main(day, post=False):
    '''
    This function creates a text-based table with info about the round's MVP,
    etc., and posts it on the Facebook group.
    WARNING: After experimentation, the post is not visible to every member
    of the group, therefore this method was ignored. However, it is used for
    producing the text of MVPs, DNPs, etc., to a flat file, from which it is
    copied and manually posted on FB.
    If a user wants to post the table on FB, set post input to True.
    '''

    # tokens file
    tokens_file = 'config/tokens.json'
    tokens = tokens = read_json(tokens_file)
    token = tokens['token']
    # user_id = tokens['user_id']
    group_id = tokens['mia_syn_mia_id']

    # configuration file
    config_file = 'config/config.json'
    configs = read_json(config_file)
    out_dir = configs['output_directory']
    n_games = configs['n_games']

    # read the latest table file
    table_file = os.path.join(out_dir, 'table_day_%d.csv' % day)
    # load the mvp, dnp and offtime usernames produced by other scripts.
    metadata = read_json(os.path.join(out_dir, 'metadata_day_%d.json' % day))
    mvps = metadata['mvps']
    mvp_score = metadata['mvp_score']
    dnp_score = (metadata['dnp_score']
                 if 'dnp_score' in metadata.keys() else None)
    dnps = metadata['dnp'] if 'dnp' in metadata.keys() else []
    disqs = metadata['disq'] if 'disq' in metadata.keys() else []
    offtime = metadata['offtime']

    # ask for input optional message
    # optional = input('Optional message:')
    optional = ''

    # form the string
    mvp_str = 'MVPs: ' + ', '.join(['@{}'.format(name) for name in mvps]) +\
              ' με {}/{} σωστές προβλέψεις.'.format(mvp_score, n_games) + '\n'

    verb = 'λαμβάνει' if len(dnps) == 1 else 'λαμβάνουν'
    if len(dnps) > 0:
        dnp_str = (
            'DNP: ' + ', '.join(['@{}'.format(name) for name in dnps]) +
            ' και {} τη χαμηλότερη βαθμολογία ({})'.format(verb, dnp_score) +
            '\n')
    else:
        dnp_str = 'DNP: -' + '\n'

    if len(offtime) > 0:
        offtime_str = 'Off time: ' + ', '.join(
            ['@{} ({})'.format(k, v) for k, v in offtime.items()]) + '\n'
    else:
        offtime_str = 'Off time: -' + '\n'

    verb1 = 'αποβάλλεται' if len(disqs) == 1 else 'αποβάλλονται'
    verb2 = 'συμπλήρωσε' if len(disqs) == 1 else 'συμπλήρωσαν'
    if len(disqs) > 0:
        disq_str = ', '.join(['@{}'.format(name) for name in disqs]) +\
                    ' {} καθώς {} 4 απουσίες.'.format(verb1, verb2) + '\n'
    else:
        disq_str = ''

    # form the table
    table = pd.read_csv(table_file)
    maxlen = int(table['Name'].str.len().max())
    header = (table.columns.str.replace('Position', 'Rank').str.replace(
        'Missed Rounds', 'DNP').str.replace('_', ' '))
    widths = [len(u) + 2 if u != 'Name' else maxlen + 2 for u in header]
    s = "".join(u.ljust(i) for u, i in zip(header, widths)) + '\n'
    line_lenth = len(s) - 1
    for row in table.values:
        s1 = "".join(str(u).ljust(i) for u, i in zip(row, widths)) + '\n'
        s += s1
        if row[0] == 4:
            s += '-' * line_lenth + '\n'
        elif row[0] == 12:
            s += '=' * line_lenth + '\n'
    table_str = s

    # concatenate the string
    stats = mvp_str + dnp_str + offtime_str + disq_str

    start = 'Αποτελέσματα Euroleague Day %d' % day
    end = '#feeldevotion #euroleague #day%d #mia_syn_mia #oneplusone' % day
    final = (start + '\n\n' + optional + '\n\n' + stats + '\n' + table_str +
             '\n' + end)

    # write the text to a text file for inspection
    with open('temp_table.txt', 'w', encoding='utf-8') as f:
        f.writelines(final)

    # post the final text to facebook.
    if post:
        graph = fb.GraphAPI(access_token=token, version=3.0)

        graph.put_object(parent_object=group_id,
                         connection_name='feed',
                         message=final)

    return