Esempio n. 1
0
 def ParsePersonalBestsTable( swimmer, table, course_code, output ):
   for row in TableRows( table, pbs_headers_of_interest ):
     # The event as text is in row[3]
     event = Event.create_from_str( str( row[3] ), course_code )
     if event is None:
       logging.error( "Failed to parse event: " + str( row[3] ) + " " + course_code )
     else:
       swims.append( _create_swim( swimmer, event, row, output ) )
Esempio n. 2
0
 def __init__(self, line):
   tokens = line.split( "|" )
   num_tokens = len( tokens )
   assert( (num_tokens == 2) or (num_tokens == 3) )
   
   self.event_code = Event.create_from_str( tokens[0], "S" ).get_short_course_event_code()
   self.time = float( RaceTime( tokens[1] ) )
   if num_tokens == 3:
     self.is_nt = (tokens[2] == 'nt\n')
   else:
     self.is_nt = False
Esempio n. 3
0
    def __init__(self, line):
        tokens = line.split("|")
        num_tokens = len(tokens)
        assert ((num_tokens == 2) or (num_tokens == 3))

        self.event_code = Event.create_from_str(
            tokens[0], "S").get_short_course_event_code()
        self.time = float(RaceTime(tokens[1]))
        if num_tokens == 3:
            self.is_nt = (tokens[2] == 'nt\n')
        else:
            self.is_nt = False
Esempio n. 4
0
def _parse_spreadsheet_data( spreadsheet_data ):
  rows = spreadsheet_data.split( '\n' )

  num_events = len( short_course_events )
  nt_times_by_event = [ None ] * num_events
  
  for row in rows:
    columns = row.split( '\t' )
    # Parse the event name
    event_code = Event.create_from_str( columns[0], 'S' ).event_code
    if len( columns ) != 9:
      raise RuntimeError( "Unexpected number of columns in spreadsheet data" )
    nt_times_for_event = []
    nt_times_by_event[ event_code ] = nt_times_for_event
    for i in range( 1, 9 ):
      if len( columns[i] ) == 0:
        nt_times_for_event.append( None )
      else:
        nt_times_for_event.append( float( RaceTime( columns[i] ) ) )
  return nt_times_by_event
Esempio n. 5
0
def _parse_spreadsheet_data(spreadsheet_data):
    rows = spreadsheet_data.split('\n')

    seconds_per_point_by_event = [None] * num_events

    for row in rows:
        columns = row.split('\t')
        # Parse the event name
        event_code = Event.create_from_str(columns[0], 'S').event_code
        if len(columns) != 9:
            raise RuntimeError(
                "Unexpected number of columns in spreadsheet data")
        seconds_per_point_for_event = []
        seconds_per_point_by_event[event_code] = seconds_per_point_for_event
        for i in range(1, 9):
            if len(columns[i]) == 0:
                seconds_per_point_for_event.append(None)
            else:
                seconds_per_point_for_event.append(float(columns[i]))
    return seconds_per_point_by_event
Esempio n. 6
0
def _parse_spreadsheet_data( spreadsheet_data ):
  rows = spreadsheet_data.split( '\n' )

  num_events = len( short_course_events )
  qt_by_event = [ None ] * num_events
  expected_num_columns = _max_age - _min_age + 2
  
  for row in rows:
    columns = row.split( '\t' )
    # Parse the event name
    event_code = Event.create_from_str( columns[0], 'S' ).event_code
    if len( columns ) != expected_num_columns:
      raise RuntimeError( "Unexpected number of columns in spreadsheet data" )
    qt_for_event = []
    qt_by_event[ event_code ] = qt_for_event
    for i in range( 1, expected_num_columns ):
      if len( columns[i] ) == 0:
        qt_for_event.append( None )
      else:
        qt_for_event.append( float( RaceTime( columns[i] ) ) )
  return qt_by_event
Esempio n. 7
0
def _parse_spreadsheet_data(spreadsheet_data):
    rows = spreadsheet_data.split('\n')

    num_events = len(short_course_events)
    qt_by_event = [None] * num_events
    expected_num_columns = max_age - min_age + 2

    for row in rows:
        columns = row.split('\t')
        # Parse the event name
        event_code = Event.create_from_str(columns[0], 'S').event_code
        if len(columns) != expected_num_columns:
            raise RuntimeError(
                "Unexpected number of columns in spreadsheet data")
        qt_for_event = []
        qt_by_event[event_code] = qt_for_event
        for i in range(1, expected_num_columns):
            if len(columns[i]) == 0:
                qt_for_event.append(None)
            else:
                qt_for_event.append(float(RaceTime(columns[i])))
    return qt_by_event
Esempio n. 8
0
def parse_event(tokens, course_code):
    str = tokens[0][12:]  # Because they all start with 'Male/Female'
    return Event.create_from_str(str, course_code)
Esempio n. 9
0
def scrape_meet( asa_meet_code, page_number, meet_name, date, course_code ):
  logging.info( "Attempting to parse meet " + meet_name + ", meet code: " + str( asa_meet_code ) + ", page: " + str(page_number) )
  # Load a meet page from a URL like this...
  # https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=19611&targetclub=WINNCHRN 
  url = "https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=" + str( asa_meet_code ) + "&targetclub=WINNCHRN&page=" + str( page_number )
  page = helpers.FetchUrl( url )

  if page is None:
    logging.error( "Failed to get page " + url )
    return 503
  tree = html.fromstring( page )
  meet_has_been_parsed( asa_meet_code )
  try:
    table = tree.get_element_by_id( "rankTable" )
  except:
    logging.info( "No rankTable for " + url + ". Presuming no Winsford swimmers at that meet" )
    return
  
  if page_number == 1:
    # When scraping the first page, one of our jobs is to count how many other pages
    # there are and add tasks to scrape those pages
    num_pages = scrape_num_pages( tree )
    logging.info( "Meet contains " + str( num_pages ) + " pages ")
    date_str = date.strftime( "%d/%m/%y" )
    for i in range( 2, num_pages+1 ):
      logging.info( "Queing update of page " + str(i) + " of " + meet_name )
      taskqueue.add(url='/admin/scrape_meet', params={'asa_meet_code': str(asa_meet_code), 'meet_name' : meet_name, 'date' : date_str, 'course_code' : course_code, 'page' : str(i) })

  swimmers_checked = set()
  update_swimmer_list = False
  for row in TableRows( table, _meet_headers_of_interest ):
    # First we look at the swimmer.
    # Is it one we've already seen while scraping this meet, or is it a new one?
    # If it's a new one, is it a swimmer that's in our database?
    # Perhaps it's a swimmer that's in our database as Cat 1 and needs upgrading.
    asa_number = int( row[0].text )
    if asa_number not in swimmers_checked:
      swimmers_checked.add( asa_number )
      swimmer = Swimmer.get( "Winsford", asa_number )
      if swimmer is None:
        swimmer = SwimmerCat1.get( "Winsford", asa_number )
        if swimmer is None:
          # This looks like a new Winsford swimmer that isn't in the database
          # Add a task to add them
          logging.info( "Found new Winsford swimmer: " + str( asa_number ) + ". Adding task to scrape." )
          taskqueue.add(url='/admin/update_swimmers', params={'name_search': str(asa_number)})
          #QueueUpdateSwimsForSwimmer( str(asa_number) )
          update_swimmer_list = True
        else:
          # This is a swimmer that's in our database as Cat1
          # Add a task to upgrade them
          logging.info( "Found new Cat 2 Winsford swimmer: " + str( asa_number ) + ". Adding task to upgrade." )
          taskqueue.add(url='/admin/check_for_swimmer_upgrade', params={'asa_number': str(asa_number)})
          update_swimmer_list = True
      else:
        logging.info( "Found existing Winsford swimmer: " + swimmer.full_name() )

  if update_swimmer_list:
    taskqueue.add(url='/admin/update_swimmer_list')

  swims_for_swimmer = {}
  for row in TableRows( table, _meet_headers_of_interest ):
    # Now look at the actual swims.
    # If there's a swim link, then that means there are some splits. In those
    # cases we also add a task to parse the splits and add them to the Swim.
    asa_number = int( row[0].text )
    event_str = row[1].text
    date_of_birth = helpers.ParseDate_dmy( row[2].text )
    race_time = float( RaceTime( row[3].text ) )
    event = Event.create_from_str( event_str, course_code )
    asa_swim_id = get_asa_swim_id( row[3] )
      
    swim = Swim.create( asa_number, date_of_birth, event, date, meet_name, race_time, asa_swim_id )

    if asa_swim_id is not None:
      # Swim link. Add a task to parse the splits.
      swim_key_str = swim.create_swim_key_str()
      logging.info( "Adding split scraping task for swim " + swim_key_str )
      taskqueue.add(url='/admin/scrape_splits', params={'swim': swim_key_str})

    # Record this swim
    if asa_number not in swims_for_swimmer:
      swims_for_swimmer[ asa_number ] = [];
      
    swims_for_swimmer[ asa_number ].append( swim )
    
  for asa_number, swims in swims_for_swimmer.iteritems():
    num_swims = len( swims )
    logging.info( "Putting " + str(num_swims) + " swims for " + str( asa_number ) )
    put_new_swims( asa_number, swims )
Esempio n. 10
0
def scrape_meet(asa_meet_code, page_number, meet_name, date, course_code):
    logging.info("Attempting to parse meet " + meet_name + ", meet code: " +
                 str(asa_meet_code) + ", page: " + str(page_number))
    # Load a meet page from a URL like this...
    # https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=19611&targetclub=WINNCHRN
    url = "https://www.swimmingresults.org/showmeetsbyclub/index.php?meetcode=" + str(
        asa_meet_code) + "&targetclub=WINNCHRN&page=" + str(page_number)
    page = helpers.FetchUrl(url)

    if page is None:
        logging.error("Failed to get page " + url)
        return 503
    tree = html.fromstring(page)
    meet_has_been_parsed(asa_meet_code)
    try:
        table = tree.get_element_by_id("rankTable")
    except:
        logging.info("No rankTable for " + url +
                     ". Presuming no Winsford swimmers at that meet")
        return

    if page_number == 1:
        # When scraping the first page, one of our jobs is to count how many other pages
        # there are and add tasks to scrape those pages
        num_pages = scrape_num_pages(tree)
        logging.info("Meet contains " + str(num_pages) + " pages ")
        date_str = date.strftime("%d/%m/%y")
        for i in range(2, num_pages + 1):
            logging.info("Queing update of page " + str(i) + " of " +
                         meet_name)
            taskqueue.add(url='/admin/scrape_meet',
                          params={
                              'asa_meet_code': str(asa_meet_code),
                              'meet_name': meet_name,
                              'date': date_str,
                              'course_code': course_code,
                              'page': str(i)
                          })

    swimmers_checked = set()
    update_swimmer_list = False
    for row in TableRows(table, _meet_headers_of_interest):
        # First we look at the swimmer.
        # Is it one we've already seen while scraping this meet, or is it a new one?
        # If it's a new one, is it a swimmer that's in our database?
        # Perhaps it's a swimmer that's in our database as Cat 1 and needs upgrading.
        asa_number = int(row[0].text)
        if asa_number not in swimmers_checked:
            swimmers_checked.add(asa_number)
            swimmer = Swimmer.get("Winsford", asa_number)
            if swimmer is None:
                swimmer = SwimmerCat1.get("Winsford", asa_number)
                if swimmer is None:
                    # This looks like a new Winsford swimmer that isn't in the database
                    # Add a task to add them
                    logging.info("Found new Winsford swimmer: " +
                                 str(asa_number) + ". Adding task to scrape.")
                    taskqueue.add(url='/admin/update_swimmers',
                                  params={'name_search': str(asa_number)})
                    #QueueUpdateSwimsForSwimmer( str(asa_number) )
                    update_swimmer_list = True
                else:
                    # This is a swimmer that's in our database as Cat1
                    # Add a task to upgrade them
                    logging.info("Found new Cat 2 Winsford swimmer: " +
                                 str(asa_number) + ". Adding task to upgrade.")
                    taskqueue.add(url='/admin/check_for_swimmer_upgrade',
                                  params={'asa_number': str(asa_number)})
                    update_swimmer_list = True
            else:
                logging.info("Found existing Winsford swimmer: " +
                             swimmer.full_name())

    if update_swimmer_list:
        taskqueue.add(url='/admin/update_swimmer_list')

    swims_for_swimmer = {}
    for row in TableRows(table, _meet_headers_of_interest):
        # Now look at the actual swims.
        # If there's a swim link, then that means there are some splits. In those
        # cases we also add a task to parse the splits and add them to the Swim.
        asa_number = int(row[0].text)
        event_str = row[1].text
        date_of_birth = helpers.ParseDate_dmy(row[2].text)
        race_time = float(RaceTime(row[3].text))
        event = Event.create_from_str(event_str, course_code)
        asa_swim_id = get_asa_swim_id(row[3])

        swim = Swim.create(asa_number, date_of_birth, event, date, meet_name,
                           race_time, asa_swim_id)

        if asa_swim_id is not None:
            # Swim link. Add a task to parse the splits.
            swim_key_str = swim.create_swim_key_str()
            logging.info("Adding split scraping task for swim " + swim_key_str)
            taskqueue.add(url='/admin/scrape_splits',
                          params={'swim': swim_key_str})

        # Record this swim
        if asa_number not in swims_for_swimmer:
            swims_for_swimmer[asa_number] = []

        swims_for_swimmer[asa_number].append(swim)

    for asa_number, swims in swims_for_swimmer.iteritems():
        num_swims = len(swims)
        logging.info("Putting " + str(num_swims) + " swims for " +
                     str(asa_number))
        put_new_swims(asa_number, swims)