def post(self): asa_number_str = self.request.get('asa_number') if (asa_number_str is None) or (len(asa_number_str) == 0): logging.error("Missing asa_number in swimmer nuke request.") self.response.set_status(400) return asa_number = int(asa_number_str) logging.info("Nuking " + asa_number_str) keys_to_delete = [] # A couple of local helper functions def delete_swims(swims): for swim in swims: keys_to_delete.append(swim.key) def delete_model(model): if model is not None: keys_to_delete.append(model.key) # Nuke all the swims for event in short_course_events: delete_swims(Swim.fetch_all(asa_number, event)) for event in long_course_events: delete_swims(Swim.fetch_all(asa_number, event)) # And everything else delete_model(SwimList.get(asa_number)) delete_model(Swimmer.get("Winsford", asa_number)) delete_model(SwimmerCat1.get("Winsford", asa_number)) ndb.delete_multi(keys_to_delete)
def get(self): asa_numbers = self.request.get_all('asa_numbers') num_swimmers = len( asa_numbers ) self.response.headers['Content-Type'] = 'text/plain' if num_swimmers == 0: # Show error page self.response.out( "Missing asa_numbers parameters" ) else: # Collate list of swimmers swimmers = [] for asa_number in asa_numbers: swimmer = Swimmer.get( "Winsford", int(asa_number) ) if swimmer is not None: swimmers.append( swimmer ) def listEvents( events ): for event in events: self.response.out.write( event.to_string() ) # Write a '^' separated list of PB swims of each requested swimmer. # Leave blank if the swimmer has no PB for this event. for swimmer in swimmers: self.response.out.write('^') swim = Swim.fetch_pb( swimmer, event ) if swim is not None: self.response.out.write( str( swim ) ) self.response.out.write( '\n' ) listEvents( short_course_events ) listEvents( long_course_events )
def post(self): results = json.loads( self.request.body ) import_meets = results["meets"]; logging.info( "Importing " + str(len(import_meets)) + " meets" ) #utc = datetime.tzinfo for import_meet in import_meets: import_swimmers = import_meet["swimmers"] course_code = import_meet["courseCode"] #start_date = datetime.datetime.strptime(import_meet["startDate"], '%Y-%m-%dT%H:%M:%S.%fZ') start_date = datetime.datetime( *(time.strptime(import_meet["startDate"], '%Y-%m-%dT%H:%M:%S.%fZ')[0:6]),tzinfo=tzutc() ) logging.info( "Meet start date: " + start_date.strftime("%d/%m/%Y") ) meet_name = import_meet["name"] for import_swimmer in import_swimmers: name = import_swimmer["name"] if "asaNumber" in import_swimmer: asa_number = import_swimmer["asaNumber"] swimmer = Swimmer.get( "Winsford", asa_number ) if swimmer is None: logging.error( "Unable to find swimmer: " + name + "ASAA Number: " + str(asa_number) ) else: import_swims = import_swimmer["swims"] for import_swim in import_swims: event = Event.create_from_code( import_swim["eventCode"], course_code ) swim = UnofficialSwim.create( swimmer, event, start_date, meet_name, import_swim["raceTime"] ) if swim is None: logging.error( "Failed to create swim" ) else: swim.put() else: logging.error( "No ASA number for swimmer: " + name )
def post(self): asa_number_str = self.request.get('asa_number') if (asa_number_str is None) or (len( asa_number_str ) == 0): logging.error( "Missing asa_number in swimmer nuke request." ) self.response.set_status( 400 ) return asa_number = int( asa_number_str ) logging.info( "Nuking " + asa_number_str ) keys_to_delete = [] # A couple of local helper functions def delete_swims( swims ): for swim in swims: keys_to_delete.append( swim.key ) def delete_model( model ): if model is not None: keys_to_delete.append( model.key ) # Nuke all the swims for event in short_course_events: delete_swims( Swim.fetch_all( asa_number, event ) ) for event in long_course_events: delete_swims( Swim.fetch_all( asa_number, event ) ) # And everything else delete_model( SwimList.get( asa_number ) ) delete_model( Swimmer.get( "Winsford", asa_number ) ) delete_model( SwimmerCat1.get( "Winsford", asa_number ) ) ndb.delete_multi( keys_to_delete )
def update_swimmer_list( cls ): swimmers = Swimmer.query_club( "Winsford" ); txt = "" for swimmer in swimmers: txt += str( swimmer ) + "\n" swimmer_list = cls( key = swimmer_list_key, data = txt ) swimmer_list.put()
def post(self): swimmers = Swimmer.query_club( "Winsford" ); self.response.headers['Content-Type'] = 'text/plain' for swimmer in swimmers: asa_number = str(swimmer.asa_number) logging.info( "Queueing update of personal bests for " + swimmer.full_name() ) self.response.out.write( "Queueing update of personal bests for " + swimmer.full_name() + "\n" ) taskqueue.add(url='/admin/update_personal_bests', params={'asa_number': asa_number})
def ReadClubRankingsFile(file_name, swimmers, swimmers_dict): club_rankings_file = open(file_name, 'r') is_male = True current_swimmer = None current_event = None course_code = '' num_new_swimmers = 0 num_new_swims = 0 print("Reading " + file_name) for line in club_rankings_file: tokens = line.split('\t') if line_is_course_code(tokens): new_course_code = 'S' if tokens[0].strip().endswith('Long Course'): new_course_code = 'L' if course_code != new_course_code: # Announce the new course code course_code = new_course_code if course_code == 'S': print("Short Course") else: print("Long Course") elif line_is_swimmer(tokens): new_swimmer = Swimmer.from_club_rankings(is_male, tokens) if current_swimmer is not None: if is_male and (new_swimmer.last_name < current_swimmer.last_name): # Club rankings report has all the male swimmers followed by all the female swimmers #print("Switching from male to female") is_male = False new_swimmer.is_male = False if new_swimmer.asa_number in swimmers_dict: # We've already seen this swimmer #if (current_swimmer is not None) and (current_swimmer.asa_number != new_swimmer.asa_number): #print("Continue " + str(new_swimmer)) current_swimmer = swimmers_dict[new_swimmer.asa_number] new_swimmer = None if new_swimmer is not None: current_swimmer = new_swimmer swimmers_dict[current_swimmer.asa_number] = current_swimmer #print(str(current_swimmer)) num_new_swimmers = num_new_swimmers + 1 swimmers.append(current_swimmer) current_event = None elif line_is_event_type(tokens): assert (course_code != '') assert (current_swimmer is not None) current_event = parse_event(tokens, course_code) #print(str(current_event)) elif line_is_swim(tokens): assert (current_swimmer is not None) assert (current_event is not None) swim = Swim(current_swimmer.asa_number, current_event, tokens) current_swimmer.swims.append(swim) num_new_swims = num_new_swims + 1 print( str(num_new_swimmers) + " new swimmers and " + str(num_new_swims) + " new swims")
def post(self): self.response.headers['Content-Type'] = 'text/plain' asa_numbers = self.request.get_all('asa_number') if len(asa_numbers) == 0: logging.error("Missing asa_number(s) in swim update request.") self.response.set_status(400) return course_code = self.request.get('course', default_value=None) event_code = self.request.get('event', default_value=None) if (course_code is None) and (event_code is None): # This is a full update request. # We can't handle that here, so instead we queue up lots of smaller requests. for asa_number in asa_numbers: QueueUpdateSwimsForSwimmer(asa_number) return if course_code is None: logging.error("Missing course code in swim update request") self.response.set_status(400) return event = None if event_code is not None: event_code = int(event_code) if course_code == "s": if (event_code >= len(short_course_events)) or (event_code < 0): logging.error("Bad event code in swim update request") self.response.set_status(400) return event = short_course_events[event_code] else: if (event_code >= len(long_course_events)) or (event_code < 0): logging.error("Bad event code in swim update request") self.response.set_status(400) return event = long_course_events[event_code] for asa_number in asa_numbers: swimmer = Swimmer.get("Winsford", int(asa_number)) swimmer_name = swimmer.full_name() if event is None: if course_code == "s": ret = UpdateSwimsForEvents(swimmer, short_course_events, self.response.out) else: ret = UpdateSwimsForEvents(swimmer, long_course_events, self.response.out) else: ret = UpdateSwimsForEvent(swimmer, event, self.response.out) if ret is not None: # Error self.response.set_status(ret) return
def post(self): swimmers = Swimmer.query_club("Winsford") self.response.headers['Content-Type'] = 'text/plain' for swimmer in swimmers: asa_number = str(swimmer.asa_number) logging.info("Queueing update of personal bests for " + swimmer.full_name()) self.response.out.write("Queueing update of personal bests for " + swimmer.full_name() + "\n") taskqueue.add(url='/admin/update_personal_bests', params={'asa_number': asa_number})
def post(self): records_for_each_gender=[{},{}] swimmers = Swimmer.query_club( "Winsford" ); num_event_codes = len( short_course_events ) for gender_idx in range( 0, 2 ): gender_code = gender_codes[ gender_idx ] is_male = (gender_idx == 0) records_for_each_age = records_for_each_gender[gender_idx] for age in range( 9, 17 ): records_for_this_age={} records_for_each_age[ age ] = records_for_this_age for event_code in range( 0, num_event_codes ): sc_event = short_course_events[ event_code ] lc_event = long_course_events[ event_code ] best_race_time = 9999999 best_swimmer = None best_swim = None for swimmer in swimmers: if swimmer.is_male == is_male: # Figure out this swimmer's best time at this age converted # to short course time sc_pb_time = 9999999 sc_pb_swim = Swim.fetch_pb( swimmer, sc_event, age ) pb_swim = sc_pb_swim lc_pb_swim = Swim.fetch_pb( swimmer, lc_event, age ) if sc_pb_swim is not None: sc_pb_time = sc_pb_swim.race_time if lc_pb_swim is not None: lc_pb_time_converted = lc_event.convertTime( lc_pb_swim.race_time ) if lc_pb_time_converted < sc_pb_time: sc_pb_time = lc_pb_time_converted pb_swim = lc_pb_swim if (pb_swim is not None) and (sc_pb_time < best_race_time): # This is the best time we've seen so far best_race_time = sc_pb_time best_swimmer = swimmer best_swim = pb_swim if best_swim is not None: record = Record( age, best_swimmer, best_swim, best_race_time ) records_for_this_age[ event_code ] = record logging.info( 'Record for ' + gender_code + ' ' + str(age) + ' ' + sc_event.short_name_without_course() + ': ' + str( RaceTime(best_race_time) ) + ', ' + best_swimmer.full_name() ) # Now tabulate and send a plain text response club_records = '' self.response.headers['Content-Type'] = 'text/plain' for gender_idx in range( 0, 2 ): records_for_each_age = records_for_each_gender[gender_idx] gender_code = gender_codes[ gender_idx ] for age, records_for_age in records_for_each_age.iteritems(): for event_code, record in records_for_age.iteritems(): swimmer = record.swimmer swim = record.swim club_records += ( gender_code + '^' + str(record.age) +'^' + str(event_code) + '^'+ swimmer.full_name() + '^' + str(swim) + '\n' ) self.response.out.write( club_records ) StaticData.set_club_records( club_records )
def get(self): self.response.headers['Content-Type'] = 'text/plain' relay = Relay(self.request) if relay.error: self.response.out.write("Missing args") return # First we filter the swimmers for those who meet the gender and age criteria swimmers = Swimmer.query_club("Winsford") candidates = GetCandidates(relay, 0, swimmers) ListCandidates(candidates, self.response.out)
def get(self): self.response.headers['Content-Type'] = 'text/plain' relay = Relay( self.request ) if relay.error: self.response.out.write( "Missing args" ) return # First we filter the swimmers for those who meet the gender and age criteria swimmers = Swimmer.query_club( "Winsford" ) candidates = GetCandidates( relay, 0, swimmers ) ListCandidates( candidates, self.response.out )
def post(self): self.response.headers['Content-Type'] = 'text/plain' asa_numbers = self.request.get_all('asa_number') if len(asa_numbers) == 0: logging.error( "Missing asa_number(s) in swim update request." ) self.response.set_status( 400 ) return course_code = self.request.get('course',default_value=None) event_code = self.request.get('event',default_value=None) if (course_code is None) and (event_code is None): # This is a full update request. # We can't handle that here, so instead we queue up lots of smaller requests. for asa_number in asa_numbers: QueueUpdateSwimsForSwimmer( asa_number ) return if course_code is None: logging.error( "Missing course code in swim update request" ) self.response.set_status( 400 ) return event = None if event_code is not None: event_code = int( event_code ) if course_code == "s": if (event_code >= len( short_course_events )) or (event_code < 0): logging.error( "Bad event code in swim update request" ) self.response.set_status( 400 ) return event = short_course_events[ event_code ] else: if (event_code >= len( long_course_events )) or (event_code < 0): logging.error( "Bad event code in swim update request" ) self.response.set_status( 400 ) return event = long_course_events[ event_code ] for asa_number in asa_numbers: swimmer = Swimmer.get( "Winsford", int( asa_number ) ) swimmer_name = swimmer.full_name() if event is None: if course_code == "s": ret = UpdateSwimsForEvents( swimmer, short_course_events, self.response.out ) else: ret = UpdateSwimsForEvents( swimmer, long_course_events, self.response.out ) else: ret = UpdateSwimsForEvent( swimmer, event, self.response.out ) if ret is not None: # Error self.response.set_status( ret ) return
def post(self): self.response.headers['Content-Type'] = 'text/plain' asa_numbers = self.request.get_all('asa_number') if len( asa_numbers ) == 0: self.response.out.write( "No swimmers specified" ) for asa_number in asa_numbers: swimmer = Swimmer.get( "Winsford", int( asa_number ) ) if swimmer is None: logging.error( "Unable to find swimmer: " + str( asa_number ) ) else: swimmer_name = swimmer.full_name() logging.info( "Updating personal bests for " + swimmer_name ) ret = scrape_personal_bests( swimmer, self.response.out ) if ret is not None: # Error self.response.set_status( ret )
def post(self): self.response.headers['Content-Type'] = 'text/plain' asa_numbers = self.request.get_all('asa_number') if len(asa_numbers) == 0: self.response.out.write("No swimmers specified") for asa_number in asa_numbers: swimmer = Swimmer.get("Winsford", int(asa_number)) if swimmer is None: logging.error("Unable to find swimmer: " + str(asa_number)) else: swimmer_name = swimmer.full_name() logging.info("Updating personal bests for " + swimmer_name) ret = scrape_personal_bests(swimmer, self.response.out) if ret is not None: # Error self.response.set_status(ret)
def GetNameMatchedSwimmers( name_search ): matched = [] if name_search is not None: name_search_len = len( name_search ) if name_search_len > 0: swimmers = Swimmer.query_club( "Winsford" ); name_search = name_search.lower(); if (name_search_len == 3) and (name_search[1] == '-'): # Ranged update start = name_search[0] end = name_search[2] for swimmer in swimmers: letter = swimmer.last_name[0].lower() if (letter >= start) and (letter <= end): matched.append( swimmer ) else: for swimmer in swimmers: if swimmer.last_name.lower().startswith( name_search ): matched.append( swimmer ) return matched
def GetNameMatchedSwimmers(name_search): matched = [] if name_search is not None: name_search_len = len(name_search) if name_search_len > 0: swimmers = Swimmer.query_club("Winsford") name_search = name_search.lower() if (name_search_len == 3) and (name_search[1] == '-'): # Ranged update start = name_search[0] end = name_search[2] for swimmer in swimmers: letter = swimmer.last_name[0].lower() if (letter >= start) and (letter <= end): matched.append(swimmer) else: for swimmer in swimmers: if swimmer.last_name.lower().startswith(name_search): matched.append(swimmer) return matched
def post(self): results = json.loads(self.request.body) import_meets = results["meets"] logging.info("Importing " + str(len(import_meets)) + " meets") #utc = datetime.tzinfo for import_meet in import_meets: import_swimmers = import_meet["swimmers"] course_code = import_meet["courseCode"] #start_date = datetime.datetime.strptime(import_meet["startDate"], '%Y-%m-%dT%H:%M:%S.%fZ') start_date = datetime.datetime(*(time.strptime( import_meet["startDate"], '%Y-%m-%dT%H:%M:%S.%fZ')[0:6]), tzinfo=tzutc()) logging.info("Meet start date: " + start_date.strftime("%d/%m/%Y")) meet_name = import_meet["name"] for import_swimmer in import_swimmers: name = import_swimmer["name"] if "asaNumber" in import_swimmer: asa_number = import_swimmer["asaNumber"] swimmer = Swimmer.get("Winsford", asa_number) if swimmer is None: logging.error("Unable to find swimmer: " + name + "ASAA Number: " + str(asa_number)) else: import_swims = import_swimmer["swims"] for import_swim in import_swims: event = Event.create_from_code( import_swim["eventCode"], course_code) swim = UnofficialSwim.create( swimmer, event, start_date, meet_name, import_swim["raceTime"]) if swim is None: logging.error("Failed to create swim") else: swim.put() else: logging.error("No ASA number for swimmer: " + name)
def scrape_meet(asa_meet_code, page_number, meet_name, date, course_code): logging.info("Attempting to parse meet " + meet_name + ", meet code: " + str(asa_meet_code) + ", page: " + str(page_number)) # Load a meet page from a URL like this... # https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=19611&targetclub=WINNCHRN url = "https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=" + str( asa_meet_code) + "&targetclub=WINNCHRN&page=" + str(page_number) page = helpers.FetchUrl(url) if page is None: logging.error("Failed to get page " + url) return 503 tree = html.fromstring(page) meet_has_been_parsed(asa_meet_code) try: table = tree.get_element_by_id("rankTable") except: logging.info("No rankTable for " + url + ". Presuming no Winsford swimmers at that meet") return if page_number == 1: # When scraping the first page, one of our jobs is to count how many other pages # there are and add tasks to scrape those pages num_pages = scrape_num_pages(tree) logging.info("Meet contains " + str(num_pages) + " pages ") date_str = date.strftime("%d/%m/%y") for i in range(2, num_pages + 1): logging.info("Queing update of page " + str(i) + " of " + meet_name) taskqueue.add(url='/admin/scrape_meet', params={ 'asa_meet_code': str(asa_meet_code), 'meet_name': meet_name, 'date': date_str, 'course_code': course_code, 'page': str(i) }) swimmers_checked = set() update_swimmer_list = False for row in TableRows(table, _meet_headers_of_interest): # First we look at the swimmer. # Is it one we've already seen while scraping this meet, or is it a new one? # If it's a new one, is it a swimmer that's in our database? # Perhaps it's a swimmer that's in our database as Cat 1 and needs upgrading. asa_number = int(row[0].text) if asa_number not in swimmers_checked: swimmers_checked.add(asa_number) swimmer = Swimmer.get("Winsford", asa_number) if swimmer is None: swimmer = SwimmerCat1.get("Winsford", asa_number) if swimmer is None: # This looks like a new Winsford swimmer that isn't in the database # Add a task to add them logging.info("Found new Winsford swimmer: " + str(asa_number) + ". Adding task to scrape.") taskqueue.add(url='/admin/update_swimmers', params={'name_search': str(asa_number)}) #QueueUpdateSwimsForSwimmer( str(asa_number) ) update_swimmer_list = True else: # This is a swimmer that's in our database as Cat1 # Add a task to upgrade them logging.info("Found new Cat 2 Winsford swimmer: " + str(asa_number) + ". Adding task to upgrade.") taskqueue.add(url='/admin/check_for_swimmer_upgrade', params={'asa_number': str(asa_number)}) update_swimmer_list = True else: logging.info("Found existing Winsford swimmer: " + swimmer.full_name()) if update_swimmer_list: taskqueue.add(url='/admin/update_swimmer_list') swims_for_swimmer = {} for row in TableRows(table, _meet_headers_of_interest): # Now look at the actual swims. # If there's a swim link, then that means there are some splits. In those # cases we also add a task to parse the splits and add them to the Swim. asa_number = int(row[0].text) event_str = row[1].text date_of_birth = helpers.ParseDate_dmy(row[2].text) race_time = float(RaceTime(row[3].text)) event = Event.create_from_str(event_str, course_code) asa_swim_id = get_asa_swim_id(row[3]) swim = Swim.create(asa_number, date_of_birth, event, date, meet_name, race_time, asa_swim_id) if asa_swim_id is not None: # Swim link. Add a task to parse the splits. swim_key_str = swim.create_swim_key_str() logging.info("Adding split scraping task for swim " + swim_key_str) taskqueue.add(url='/admin/scrape_splits', params={'swim': swim_key_str}) # Record this swim if asa_number not in swims_for_swimmer: swims_for_swimmer[asa_number] = [] swims_for_swimmer[asa_number].append(swim) for asa_number, swims in swims_for_swimmer.iteritems(): num_swims = len(swims) logging.info("Putting " + str(num_swims) + " swims for " + str(asa_number)) put_new_swims(asa_number, swims)
def post(self): swimmers = Swimmer.query_club("Winsford") self.response.headers['Content-Type'] = 'text/plain' for swimmer in swimmers: asa_number = str(swimmer.asa_number) QueueUpdateSwimsForSwimmer(asa_number)
def post(self): records_for_each_gender = [{}, {}] swimmers = Swimmer.query_club("Winsford") num_event_codes = len(short_course_events) for gender_idx in range(0, 2): gender_code = gender_codes[gender_idx] is_male = (gender_idx == 0) records_for_each_age = records_for_each_gender[gender_idx] for age in range(9, 17): records_for_this_age = {} records_for_each_age[age] = records_for_this_age for event_code in range(0, num_event_codes): sc_event = short_course_events[event_code] lc_event = long_course_events[event_code] best_race_time = 9999999 best_swimmer = None best_swim = None for swimmer in swimmers: if swimmer.is_male == is_male: # Figure out this swimmer's best time at this age converted # to short course time sc_pb_time = 9999999 sc_pb_swim = Swim.fetch_pb(swimmer, sc_event, age) pb_swim = sc_pb_swim lc_pb_swim = Swim.fetch_pb(swimmer, lc_event, age) if sc_pb_swim is not None: sc_pb_time = sc_pb_swim.race_time if lc_pb_swim is not None: lc_pb_time_converted = lc_event.convertTime( lc_pb_swim.race_time) if lc_pb_time_converted < sc_pb_time: sc_pb_time = lc_pb_time_converted pb_swim = lc_pb_swim if (pb_swim is not None) and (sc_pb_time < best_race_time): # This is the best time we've seen so far best_race_time = sc_pb_time best_swimmer = swimmer best_swim = pb_swim if best_swim is not None: record = Record(age, best_swimmer, best_swim, best_race_time) records_for_this_age[event_code] = record logging.info('Record for ' + gender_code + ' ' + str(age) + ' ' + sc_event.short_name_without_course() + ': ' + str(RaceTime(best_race_time)) + ', ' + best_swimmer.full_name()) # Now tabulate and send a plain text response club_records = '' self.response.headers['Content-Type'] = 'text/plain' for gender_idx in range(0, 2): records_for_each_age = records_for_each_gender[gender_idx] gender_code = gender_codes[gender_idx] for age, records_for_age in records_for_each_age.iteritems(): for event_code, record in records_for_age.iteritems(): swimmer = record.swimmer swim = record.swim club_records += (gender_code + '^' + str(record.age) + '^' + str(event_code) + '^' + swimmer.full_name() + '^' + str(swim) + '\n') self.response.out.write(club_records) StaticData.set_club_records(club_records)
def post(self): swimmers = Swimmer.query_club( "Winsford" ); self.response.headers['Content-Type'] = 'text/plain' for swimmer in swimmers: asa_number = str(swimmer.asa_number) QueueUpdateSwimsForSwimmer( asa_number )
def scrape_meet( asa_meet_code, page_number, meet_name, date, course_code ): logging.info( "Attempting to parse meet " + meet_name + ", meet code: " + str( asa_meet_code ) + ", page: " + str(page_number) ) # Load a meet page from a URL like this... # https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=19611&targetclub=WINNCHRN url = "https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=" + str( asa_meet_code ) + "&targetclub=WINNCHRN&page=" + str( page_number ) page = helpers.FetchUrl( url ) if page is None: logging.error( "Failed to get page " + url ) return 503 tree = html.fromstring( page ) meet_has_been_parsed( asa_meet_code ) try: table = tree.get_element_by_id( "rankTable" ) except: logging.info( "No rankTable for " + url + ". Presuming no Winsford swimmers at that meet" ) return if page_number == 1: # When scraping the first page, one of our jobs is to count how many other pages # there are and add tasks to scrape those pages num_pages = scrape_num_pages( tree ) logging.info( "Meet contains " + str( num_pages ) + " pages ") date_str = date.strftime( "%d/%m/%y" ) for i in range( 2, num_pages+1 ): logging.info( "Queing update of page " + str(i) + " of " + meet_name ) taskqueue.add(url='/admin/scrape_meet', params={'asa_meet_code': str(asa_meet_code), 'meet_name' : meet_name, 'date' : date_str, 'course_code' : course_code, 'page' : str(i) }) swimmers_checked = set() update_swimmer_list = False for row in TableRows( table, _meet_headers_of_interest ): # First we look at the swimmer. # Is it one we've already seen while scraping this meet, or is it a new one? # If it's a new one, is it a swimmer that's in our database? # Perhaps it's a swimmer that's in our database as Cat 1 and needs upgrading. asa_number = int( row[0].text ) if asa_number not in swimmers_checked: swimmers_checked.add( asa_number ) swimmer = Swimmer.get( "Winsford", asa_number ) if swimmer is None: swimmer = SwimmerCat1.get( "Winsford", asa_number ) if swimmer is None: # This looks like a new Winsford swimmer that isn't in the database # Add a task to add them logging.info( "Found new Winsford swimmer: " + str( asa_number ) + ". Adding task to scrape." ) taskqueue.add(url='/admin/update_swimmers', params={'name_search': str(asa_number)}) #QueueUpdateSwimsForSwimmer( str(asa_number) ) update_swimmer_list = True else: # This is a swimmer that's in our database as Cat1 # Add a task to upgrade them logging.info( "Found new Cat 2 Winsford swimmer: " + str( asa_number ) + ". Adding task to upgrade." ) taskqueue.add(url='/admin/check_for_swimmer_upgrade', params={'asa_number': str(asa_number)}) update_swimmer_list = True else: logging.info( "Found existing Winsford swimmer: " + swimmer.full_name() ) if update_swimmer_list: taskqueue.add(url='/admin/update_swimmer_list') swims_for_swimmer = {} for row in TableRows( table, _meet_headers_of_interest ): # Now look at the actual swims. # If there's a swim link, then that means there are some splits. In those # cases we also add a task to parse the splits and add them to the Swim. asa_number = int( row[0].text ) event_str = row[1].text date_of_birth = helpers.ParseDate_dmy( row[2].text ) race_time = float( RaceTime( row[3].text ) ) event = Event.create_from_str( event_str, course_code ) asa_swim_id = get_asa_swim_id( row[3] ) swim = Swim.create( asa_number, date_of_birth, event, date, meet_name, race_time, asa_swim_id ) if asa_swim_id is not None: # Swim link. Add a task to parse the splits. swim_key_str = swim.create_swim_key_str() logging.info( "Adding split scraping task for swim " + swim_key_str ) taskqueue.add(url='/admin/scrape_splits', params={'swim': swim_key_str}) # Record this swim if asa_number not in swims_for_swimmer: swims_for_swimmer[ asa_number ] = []; swims_for_swimmer[ asa_number ].append( swim ) for asa_number, swims in swims_for_swimmer.iteritems(): num_swims = len( swims ) logging.info( "Putting " + str(num_swims) + " swims for " + str( asa_number ) ) put_new_swims( asa_number, swims )
swimmer = None races = [] for line in consideration_times_file: if reading_swims: if len(line) <= 1: # Empty line. So we've finished reading all the swims for a swimmer reading_swims = False process_swimmer(swimmer, races) races = [] else: # Expect the line to be a Swim race = EventCodeAndTime(line) races.append(race) else: # Expect the line to be a Swimmer swimmer = Swimmer.from_old_format(line) reading_swims = True # We've read the entire file. if reading_swims: # We won't have processed the final swimmer... reading_swims = False process_swimmer(swimmer, races) races = [] # Read the race times for line in race_times_file: if reading_swims: if len(line) <= 1: # Empty line. So we've finished reading all the swims for a swimmer reading_swims = False