def get_new_results(num_events): cursor = Cursor() query = "select event_link, event_id from event_table where results_loaded=0 limit {}".format(num_events) event_infos = cursor.execute(query) cursor.close() for event_info in event_infos: mark_event(*event_info, result=process_event_link(*event_info))
def process_event_link(event_link, event_id): soup = event_soup(event_link) print 'Deleting existing rows for event {}'.format(event_id) cursor = Cursor() cursor.execute("delete from {} where event_id='{}'".format(RAW_TABLE_NAME, event_id)) cursor.close() failed_links = [] try: rounds_info = all_rounds_info(soup, event_id) print 'Round info parsed for event {}'.format(rounds_info[0][1]) for round_ in rounds_info: try: process_results_link(*round_) print '>>>>>>{} Round {} Successfully Processed<<<<<<'.format(round_[1], round_[2]) except Exception as error: print error print 'XXXXXX{} Round {} Failed XXXXXXX'.format(round_[1], round_[2]) failed_links.append(round_[0]) elim_results(soup, event_id, max([info[2] for info in rounds_info])) print '' if len(failed_links) > 0: print 'Event {} Incomplete :('.format(rounds_info[0][1]) return -1 else: print 'Event {} Successfully Processed!'.format(rounds_info[0][1]) return 1 except Exception as error: print error print 'Event Link {} Failed :('.format(event_link) return -1
def populate_event_player_table(event_names, event_id): query = "select player_id, norm_name_1, norm_name_2, norm_name_3 from player_table where " or_ = False for name in event_names: if not or_: query += "or " or_ = True query += "norm_name_1 like '{0}%' or norm_name_2 like '{0}%' or norm_name_3 like '{0}%' ".format(name) cursor = Cursor() player_table_names = cursor.execute(query) found_names = [] new_names = [] for name in event_names: found = False for idx, row in enumerate(player_table_names): if name in row: if found: raise 'two matches found for name ' + name found_names.append({'player_id':row[0], 'normalized_name':name, 'event_id':event_id}) found = True if not found: new_names.append(name) player_id = cursor.execute("select max(player_id) from player_table")[0][0] new_players = [] for name in new_names: player_id += 1 new_players.append({'player_id':player_id, 'norm_name_1':name, 'first_event':event_id}) found_names.append({'player_id':player_id, 'normalized_name':name, 'event_id':event_id}) cursor.insert('event_player_table', found_names) cursor.insert('player_table', new_players) cursor.close()
def upload_round_results(results_table, event_id, round_num): # results_table must all have same round_num and represent all results for that round!! print print '==========Processing Results for Event {}, Round {}=========='.format(event_id, round_num) cursor = Cursor() print 'Writing {} rows'.format(len(results_table)) cursor.insert(RAW_TABLE_NAME, results_table) cursor.close() cursor = Cursor() print 'New {} row count: {}'.format(RAW_TABLE_NAME, cursor.execute('select count(1) from {}'.format(RAW_TABLE_NAME))[0][0]) cursor.close(commit=False)
def normalized_event_names(event_id): cursor = Cursor() num_rounds = cursor.execute("select max(round_num) from results_raw_table where event_id = '{}'".format(event_id))[0][0] all_round_names = [] for round_num in range(num_rounds): names = cursor.execute("select distinct p1_name_raw from results_raw_table where event_id = '{}' and round_num = {}".format(event_id, round_num)) names += cursor.execute("select distinct p2_name_raw from results_raw_table where event_id = '{}' and round_num = {}".format(event_id, round_num)) all_round_names.append(list(set([normalize_raw_name(item) for sublist in names for item in sublist if '* BYE *' not in item and 'Awarded Bye' not in item]))) cursor.close() return reduce(max_name_list, all_round_names, [])
def update_event(event_info): if 'event_link' not in event_info: return None cursor = Cursor() query = "select * from event_table where event_link = '{}'".format(event_info['event_link']) result = cursor.execute(query) if len(result) == 0: event_info['results_loaded'] = 0 cursor.insert('event_table', [event_info]) cursor.close() return
def mark_event(event_link, event_id, result): cursor = Cursor() query = "UPDATE event_table set results_loaded={} where event_id='{}' and event_link='{}'".format(result, event_id, event_link) cursor.execute(query) cursor.close() return