def main(table_file, scores_file, out_file): '''To DO: 1) Update usernames 2) Insert simple models for benchmarking ''' # username to user id username_id = read_json('usernames.json') table = pd.read_csv(table_file) scores = read_json(scores_file) points = table.Points.values mvps = table.MVP.values mvp_score = max([scores[u] for u in scores.keys()]) for i, name in enumerate(table.team): score = scores[username_id[name]] points[i] = points[i] + score if score == mvp_score: mvps[i] += 1 new_table = table.copy() new_table['Points'] = points new_table['MVP'] = mvps new_table = table.sort(['Points', 'MVP', 'Name'], ascending=[0, 0, 1]) new_table.insert(0, 'Position', np.arange(1, new_table.shape[0] + 1, dtype=int)) new_table.to_csv(out_file, index=False) return new_table
def main(post_id): ''' This function fetches the comments from a facebook post and stores them in a json file. ''' logging.basicConfig(level=logging.INFO) # read token's file tokens_file = 'config/tokens.json' tokens = read_json(tokens_file) token = tokens['token'] user_id = tokens['user_id'] # set output directory config_file = 'config/config_playoff_pred.json' if path.isfile(config_file): out_dir = read_json(config_file)['output_directory'] else: logging.warning('Configuration file not found. ', 'Save to working directory') out_dir = '.' # make graph graph = fb.GraphAPI(access_token=token, version=3.0) # graph id = user_id_post_id idd = user_id + '_' + post_id # Get the comments from a post. comments = graph.get_connections(id=idd, connection_name='comments') answers = comments['data'] while 'next' in comments['paging']: comments = requests.get(comments['paging']['next']).json() answers.extend(comments['data']) logging.info('%d comments fetched', len(answers)) # write comments to json file. write_json(path.join(out_dir, 'playoff_predictions_fb_comments.json'), answers) return
def get_results(games_fb, day, season, team_mapping_file): ''' Finds the results of the games of a round as ordered on the fb post. It returns the results as an numpy array. ''' logger = logging.getLogger(__name__) # teams for round's games from the fb post. Convert lists to dataframe if isinstance(games_fb, list): games_fb = pd.DataFrame(games_fb, columns=['Home Team', 'Away Team']) # teams and games' results from web-page data = scrap_round_results(day, season) if games_fb.shape[0] != data.shape[0]: logger.warning('Number of games is inconsistent') mappednames = read_json(team_mapping_file) # first map teams in greek to official english names games_fb.replace(mappednames, inplace=True) # after converting the names of the teams, merge the two dataframes final = games_fb.merge(data, how='inner', left_on=['Home Team', 'Away Team'], right_on=['Home Team', 'Away Team']) if pd.isna(final.values).any(): logger.error('Nan values appeared after merging the DataFrames.') sys.exit('Exit') if final.shape[0] != games_fb.shape[0]: logger.error("Shape of 'final' variable is inconsistent (%d)" % final.shape[0]) logger.info("This is likely due to incorrect naming of teams in " "FB post. Check the post for typos in teams' names") logger.debug(final) sys.exit('Exit') results = np.where(final['Home Score'] > final['Away Score'], 1, 2) results[final['Home Score'] == final['Away Score']] = 0 # ignore from the score games that were determined by judges idx = ((final['Home Score'] == 20) & (final['Away Score'] == 0) | (final['Home Score'] == 0) & (final['Away Score'] == 20)) results[idx] = 0 if results.shape[0] != games_fb.shape[0]: logger.error("Shape of 'results' variable is inconsistent (%d)" % results.shape[0]) sys.exit('Exit') return results, final['Datetime'].dt.to_pydatetime()
def main(post_id, results_file, out_file): '''TO DO: 1) check time-zones ''' # configuration file config_file = 'config.json' configs = read_json(config_file) token = configs['token'] user_id = configs['user_id'] # # username to user id # username_id = read_json('usernames.json') # read actual results results = read_results(results_file) dt_format = '%Y-%m-%dT%H:%M:%S+0000' # make graph graph = fb.GraphAPI(access_token=token, version=2.7) # graph id = user_id_post_id idd = user_id+'_'+post_id # get text of post post = graph.get_object(id=idd) message = post['message'] end_time = re.search('\d{4,4}-\d{2,2}-\d{2,2} \d{2,2}:\d{2,2}:\d{2,2}', message) if end_time is None: print('Warning: Deadline timestamp not found in post.') deadline = datetime.now() else: deadline = datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S') # Get the comments from a post. comments = graph.get_connections(id=idd, connection_name='comments') # comments = comments['comments']['data'] # comments = comments['data'] users_dict = {} i = 0 while True: for comment in comments['data']: user = comment['id'] text = comment['message'] time = datetime.strptime(comment['created_time'], dt_format) pred = np.array([int(s) for s in text if s.isdigit()]) # chekc if number of prediction is correct if len(pred) != 8: print('Warning: Incorrect number of predictions for user %s' % user) score = 0 # check if predictions are either 1 or 2 elif not ((pred == 1) | (pred == 2)).all(): print('Warning: Incorrect prediction for user %s' % user) score = 0 # check comment is in time elif time > deadline: print('Warning: User %s prediction off time' % user) print(time, deadline) score = 0 else: score = np.sum(pred == results) users_dict[user] = int(score) if 'next' in comments['paging']: comments = requests.get(comments['paging']['next']).json() else: break write_json(out_file, users_dict) return users_dict
def main(comments): ''' Function that extracts the username and the predicted teams of the playoffs from the comments (of a facebok post) and converts them to a dataframe, which is saved to the disk. ''' logging.basicConfig(level=logging.INFO) # read config file config_file = 'config/config_playoff_pred.json' config = read_json(config_file) n_pred_teams = config['n_pred_teams'] out_dir = config['output_directory'] team_mappings_file = config['team_names_mapping_file'] teams_dict = read_json(team_mappings_file) deadline = datetime.strptime(config['deadline'], '%Y-%m-%d %H:%M:%S') deadline = convert_timezone(deadline, from_tz='Europe/Athens', to_tz='UTC') fb_format = '%Y-%m-%dT%H:%M:%S+0000' teams = list(teams_dict.keys()) regexps = [ '(?e)(%s){e<=2}' % team if team not in ['CSKA', 'Zenit', 'Real'] else '(?e)(%s){e<1}' % team for team in teams ] header = ['team%d' % i for i in range(1, n_pred_teams + 1)] all_predictions = [] usernames = [] for comment in comments: comment_id = comment['id'] text = comment['message'] time = datetime.strptime(comment['created_time'], fb_format) # make the time variable datetime aware time = time.replace(tzinfo=pytz.UTC) # check if comment is off time. if time > deadline: logging.warning('Comment (id %s) is off time: "%s"', comment_id, text) text_copy = text.replace('\n', '') pred_teams = [] for team, rxp in zip(teams, regexps): r = regex.search(rxp, text, regex.IGNORECASE) if r is not None: match = text[r.start():r.end()] # print(team, match) pred_teams.append(teams_dict[team]) text_copy = text_copy.replace(match, '').strip() username = (text_copy.replace('-', '').replace(',', '').replace('.', '').strip()) # if text.startswith('Malisiovas Andreas'): # embed() if len(pred_teams) == n_pred_teams: all_predictions.append(dict(zip(header, sorted(pred_teams)))) usernames.append(username) else: logging.warning('Comment (id %s) is not valid: "%s"', comment_id, text) # convert to dataframe df = pd.DataFrame(all_predictions, index=usernames) df.index.name = 'username' # save to file df.to_csv(os.path.join(out_dir, 'playoffs_predictions.csv'), index=True) return
# if text.startswith('Malisiovas Andreas'): # embed() if len(pred_teams) == n_pred_teams: all_predictions.append(dict(zip(header, sorted(pred_teams)))) usernames.append(username) else: logging.warning('Comment (id %s) is not valid: "%s"', comment_id, text) # convert to dataframe df = pd.DataFrame(all_predictions, index=usernames) df.index.name = 'username' # save to file df.to_csv(os.path.join(out_dir, 'playoffs_predictions.csv'), index=True) return if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-f', '--file', type=str, required=argparse.FileType('r'), help="the file with facebook comments") args = parser.parse_args() # read comments comments = read_json(args.file) main(comments)
def main(day, sort_by_final=0): '''To DO: 1) Insert simple models for benchmarking ''' logging.basicConfig(level=logging.INFO) if day < 1: logging.error('Round must be non-negative integer') sys.exit('Exit') config_file = 'config/config.json' configs = read_json(config_file) out_dir = configs['output_directory'] fuzzy_thr = configs['fuzzy_threshold'] season = configs['season'] # if ('playoff_predictions_file' in configs.keys() and os.path.isfile(configs['playoff_predictions_file'])): playoff_pred_file = configs['playoff_predictions_file'] penalties = (configs['playoff_predict_penalties'] if 'playoff_predict_penalties' in configs else {}) playoffs_scores = get_playoffs_scores(playoff_pred_file, season, day, penalties=penalties, n_playoff_teams=8) else: logging.warning('Playoff predictions file not available.') table_file = os.path.join(out_dir, 'table_day_%d.csv' % (day - 1)) scores_file = os.path.join(out_dir, 'predictions_day_%d.csv' % day) out_file = os.path.join(out_dir, 'table_day_%d.csv' % day) scores = pd.read_csv(scores_file) if day == 1: # set initial table, all players have zero points, mvps, etc. table = pd.DataFrame(np.arange(1, scores.shape[0] + 1, dtype=int), columns=['Position']) table['Name'] = scores['Name'].values table['MVP'] = np.zeros((scores.shape[0], 1), dtype=int) table['Points'] = np.zeros((scores.shape[0], 1), dtype=int) table['Missed Rounds'] = np.zeros((scores.shape[0], 1), dtype=int) else: # read the table of the previous round table = pd.read_csv(table_file) # fix usernames via fuzzy search new_names = fuzzy_fix_names(scores['Name'].values, table['Name'].values, threshold=fuzzy_thr) scores['Name'] = new_names # check if there is a new user, give the lowest score. min_points = np.min(table.Points.values) # find mvp score of the round mvp_score = np.nanmax(scores['Score'].values) # find min score of the round min_score = np.nanmin(scores['Score'].values) # merge datasets df_merged = table.merge(scores, how='outer', left_on='Name', right_on='Name') df_new = df_merged.copy() # new players jj = np.isnan(df_merged['Points']) if any(jj): df_new.loc[jj, 'Points'] = min_points df_new.loc[jj, 'MVP'] = 0 df_new.loc[jj, 'Missed Rounds'] = day - 1 for name in df_new['Name'][jj].values: logging.info('%s is a new player', name) # did not play this round ii = np.isnan(df_merged['Score']) dnp = [] if any(ii): df_new.loc[ii, 'Score'] = min_score df_new.loc[ii, 'Missed Rounds'] += 1 for name in df_new['Name'][ii].values: dnp.append(name) logging.warning('(DNP): %s did not play this round' % name) # check if user has missed more than four rounds. mm = df_new['Missed Rounds'] >= 4 disq = [] if any(mm): for name in df_new['Name'][mm].values: logging.warning( '(DIS): User %s has missed four rounds and ' 'is being disqualified', name) df_new.drop(df_new[df_new['Name'] == name].index, inplace=True) disq.append(name) # update points df_new['Points'] += df_new['Score'].astype(int) # update MVP df_new.loc[df_new['Score'] == mvp_score, 'MVP'] += 1 # convert colums to int type. df_new[['Score', 'MVP', 'Points', 'Missed Rounds' ]] = df_new[['Score', 'MVP', 'Points', 'Missed Rounds']].astype(int) new_table = df_new[['Name', 'Score', 'Points', 'MVP', 'Missed Rounds']].copy() new_table.rename(columns={'Score': 'Round Score'}, inplace=True) # merge the score table with the playoff table new_table = new_table.merge(playoffs_scores, how='left', left_on='Name', right_index=True) new_table['Final_Score'] = new_table['Points'] + new_table['Playoff_Score'] if sort_by_final == 1: # sort by final score (desc), points (desc), MVP (desc) # and finally by Name (asc). sort_list = ['Final_Score', 'Points', 'MVP', 'Name'] asc_list = [False, False, False, True] new_table = new_table.sort_values(by=sort_list, ascending=asc_list) new_table.insert(0, 'Position', np.arange(1, new_table.shape[0] + 1, dtype=int)) else: # sort by points (desc), then by MVP (desc) and finally by name (asc) sort_list = ['Points', 'MVP', 'Name'] asc_list = [False, False, True] new_table = new_table.sort_values(by=sort_list, ascending=asc_list) new_table.insert(0, 'Position', np.arange(1, new_table.shape[0] + 1, dtype=int)) # add the final rank ii = np.lexsort( (new_table['Position'].values, -new_table['Final_Score'].values)) new_table['Final_Rank'] = ii.argsort() + 1 if new_table['Playoff_Score'].isna().any(): logging.warning('Users unknown') logging.debug(new_table) metadata_file = os.path.join(out_dir, 'metadata_day_%d.json' % day) metadata = read_json(metadata_file) metadata.update({'dnp': dnp, 'disq': disq}) write_json(metadata_file, metadata) new_table.to_csv(out_file, index=False) return new_table
def main(post_id, results_file, nday): ''' ''' logging.basicConfig(level=logging.INFO) # tokens file tokens_file = 'config/tokens.json' tokens = read_json(tokens_file) token = tokens['token'] user_id = tokens['user_id'] # configuration file config_file = 'config/config.json' configs = read_json(config_file) n_games = configs['n_games'] season = configs['season'] dt_format = configs['dt_format'] pattern = configs['dt_pattern'] out_dir = configs['output_directory'] team_mapping_file = configs['team_names_mapping_file'] fb_format = '%Y-%m-%dT%H:%M:%S+0000' # make graph graph = fb.GraphAPI(access_token=token, version=3.0) # graph id = user_id_post_id idd = user_id + '_' + post_id # get text of post post = graph.get_object(id=idd) message = post['message'] post_time = datetime.strptime(post['created_time'], fb_format) # fetch actual results from the web -- restrictions apply out_results_file = os.path.join(out_dir, 'results_day_%d.csv' % nday) repat = re.compile(pattern) games_times = re.findall(pattern + r'[^\d\n]*', message) games = [[u.strip() for u in repat.sub('', game).split('-')] for game in games_times] fb_post_games_df = pd.DataFrame(games, columns=['Home', 'Away']) if len(games) != n_games: # check if the number of games identified in the post is correct. logging.error('Number of games identified on FB post is incorrect') sys.exit('Exit') else: try: # fetch results and game-times from the web results, game_times_utc = get_results(games, nday, season, team_mapping_file) except (requests.exceptions.ConnectionError, AssertionError) as e: logging.error(e) # if there is a connection error, read results from file. logging.warning('Unable to fetch results from the internet. ' 'Try from flat file.') # if file does not exist, exit program and ask for file of results. if not os.path.isfile(results_file): logging.error('Provide correct file with the results.') sys.exit('Exit') # read actual results results = read_results(results_file) logging.info('Get game-times from the FB post') # extract game times from the post game_times_utc = get_game_times_from_post(pattern, message, dt_format, post_time, n_games) # write results to csv file with names of teams fb_post_games_df['Result'] = results fb_post_games_df.to_csv(out_results_file, index=False) logging.info('The results are: {}'.format(results)) if results.shape[0] != n_games: logging.error('Results not valid') sys.exit('Exit') # Get the comments from a post. comments = graph.get_connections(id=idd, connection_name='comments') score_dict = {} predict_dict = {} offtime = {} while True: for comment in comments['data']: comment_id = comment['id'] text = comment['message'] time = datetime.strptime(comment['created_time'], fb_format) # make the time variable datetime aware time = time.replace(tzinfo=pytz.UTC) is_valid, user, pred = valid_post(text, comment_id=comment_id, n_games=n_games) if is_valid is False: logging.debug('Comment id %s not valid' % comment_id) continue # check comment is prior game-times. ii = time < game_times_utc if not ii.all(): offtime.update({user: int(np.sum(~ii))}) logging.warning( '%d Prediction(s) off time for user %s in ' 'comment (id %s): %s', np.sum(~ii), user, comment_id, text) # if comment after any game started, give 0 points for this game pred[~ii] = 0 jj = results > 0 # include only finished games. # E.g. if a player played less than 9 games, they will get 0, which # will match with an unfinished game. score = np.sum(pred[ii & jj] == results[ii & jj]) score_dict[user] = int(score) predict_dict[user] = pred if 'next' in comments['paging']: comments = requests.get(comments['paging']['next']).json() else: break # make dataframe from users' score dictionary df_scores = pd.DataFrame.from_dict(score_dict, orient='index', columns=['Score']) # make dataframe from users' predictions dictionary df_pred = pd.DataFrame.from_dict(predict_dict, orient='index', columns=['-'.join(u) for u in games]) # merge the two dataframe based on users' names. df = df_scores.merge(df_pred, left_index=True, right_index=True) # sort by score (descending) and by name (descending) df.rename_axis('Name', axis=0, inplace=True) df.sort_values(['Score', 'Name'], ascending=[False, True], inplace=True) metadata = { 'offtime': offtime, 'mvps': list(df[df['Score'] == df['Score'].max()].index), 'mvp_score': int(df['Score'].max()), 'dnp_score': int(df['Score'].min()) } metadata_file = os.path.join(out_dir, 'metadata_day_%d.json' % nday) write_json(metadata_file, metadata) # save dataframe df.to_csv(os.path.join(out_dir, 'predictions_day_%d.csv' % nday), sep=',', index=True, encoding='utf-8') return
def main(day, post=False): ''' This function creates a text-based table with info about the round's MVP, etc., and posts it on the Facebook group. WARNING: After experimentation, the post is not visible to every member of the group, therefore this method was ignored. However, it is used for producing the text of MVPs, DNPs, etc., to a flat file, from which it is copied and manually posted on FB. If a user wants to post the table on FB, set post input to True. ''' # tokens file tokens_file = 'config/tokens.json' tokens = tokens = read_json(tokens_file) token = tokens['token'] # user_id = tokens['user_id'] group_id = tokens['mia_syn_mia_id'] # configuration file config_file = 'config/config.json' configs = read_json(config_file) out_dir = configs['output_directory'] n_games = configs['n_games'] # read the latest table file table_file = os.path.join(out_dir, 'table_day_%d.csv' % day) # load the mvp, dnp and offtime usernames produced by other scripts. metadata = read_json(os.path.join(out_dir, 'metadata_day_%d.json' % day)) mvps = metadata['mvps'] mvp_score = metadata['mvp_score'] dnp_score = (metadata['dnp_score'] if 'dnp_score' in metadata.keys() else None) dnps = metadata['dnp'] if 'dnp' in metadata.keys() else [] disqs = metadata['disq'] if 'disq' in metadata.keys() else [] offtime = metadata['offtime'] # ask for input optional message # optional = input('Optional message:') optional = '' # form the string mvp_str = 'MVPs: ' + ', '.join(['@{}'.format(name) for name in mvps]) +\ ' με {}/{} σωστές προβλέψεις.'.format(mvp_score, n_games) + '\n' verb = 'λαμβάνει' if len(dnps) == 1 else 'λαμβάνουν' if len(dnps) > 0: dnp_str = ( 'DNP: ' + ', '.join(['@{}'.format(name) for name in dnps]) + ' και {} τη χαμηλότερη βαθμολογία ({})'.format(verb, dnp_score) + '\n') else: dnp_str = 'DNP: -' + '\n' if len(offtime) > 0: offtime_str = 'Off time: ' + ', '.join( ['@{} ({})'.format(k, v) for k, v in offtime.items()]) + '\n' else: offtime_str = 'Off time: -' + '\n' verb1 = 'αποβάλλεται' if len(disqs) == 1 else 'αποβάλλονται' verb2 = 'συμπλήρωσε' if len(disqs) == 1 else 'συμπλήρωσαν' if len(disqs) > 0: disq_str = ', '.join(['@{}'.format(name) for name in disqs]) +\ ' {} καθώς {} 4 απουσίες.'.format(verb1, verb2) + '\n' else: disq_str = '' # form the table table = pd.read_csv(table_file) maxlen = int(table['Name'].str.len().max()) header = (table.columns.str.replace('Position', 'Rank').str.replace( 'Missed Rounds', 'DNP').str.replace('_', ' ')) widths = [len(u) + 2 if u != 'Name' else maxlen + 2 for u in header] s = "".join(u.ljust(i) for u, i in zip(header, widths)) + '\n' line_lenth = len(s) - 1 for row in table.values: s1 = "".join(str(u).ljust(i) for u, i in zip(row, widths)) + '\n' s += s1 if row[0] == 4: s += '-' * line_lenth + '\n' elif row[0] == 12: s += '=' * line_lenth + '\n' table_str = s # concatenate the string stats = mvp_str + dnp_str + offtime_str + disq_str start = 'Αποτελέσματα Euroleague Day %d' % day end = '#feeldevotion #euroleague #day%d #mia_syn_mia #oneplusone' % day final = (start + '\n\n' + optional + '\n\n' + stats + '\n' + table_str + '\n' + end) # write the text to a text file for inspection with open('temp_table.txt', 'w', encoding='utf-8') as f: f.writelines(final) # post the final text to facebook. if post: graph = fb.GraphAPI(access_token=token, version=3.0) graph.put_object(parent_object=group_id, connection_name='feed', message=final) return