class Swim(ndb.Model): # We try to minimise the number of NDB properties to minimise the number of read and # write operations that we incur, because Google App Engine's pricing is based on those # operations rather than something like number of bytes read and written. # We need to search by age_on_day so this needs to be a real property age_on_day = ndb.IntegerProperty( 'AoD', required=True ) # We need to sort by race_time so this needs to be a real property race_time = ndb.FloatProperty( 'T', required=True ) # We put everything else into a non-indexed string, so we only pay one read/write-op for it. # Don't use JSON because that just adds bloat for something so simple. # It pains me to use a string for storing things like float values, I'd much rather use # a binary blob, but I suppose I do need to balance coding simplicity vs. efficiency. data = ndb.StringProperty( "Data", indexed=False, required=True) # Creates a string that uniquely identifies this Swim. # The key string can be used with get_from_key_str to retrieve this Swim def create_swim_key_str( self ): # Key string format is <parent_key_id>_<swim_key_id> return str( compute_parent_key_id( self.asa_number, self.event ) ) + "_" + str( compute_swim_key_id( self.date, self.asa_number, self.event ) ) # Retrieves a Swim from the database given a key string that was previously # created with create_swim_key_str @classmethod def get_from_key_str( cls, key_str ): # Split into parent key id and swim key id tokens = key_str.split( "_" ) if len( tokens ) != 2: logging.error( "Failed to split swim key string" ) return parent_key_id = int( tokens[0] ) swim_key_id = int( tokens[1] ) swim = ndb.Key( "SwimmerEvent", parent_key_id, "Swim", swim_key_id ).get() if swim is not None: swim.unpack_data() return swim # Internal helper to create most of the packed data string @classmethod def pack_data( cls, asa_number, event, date, meet, asa_swim_id ): data_str = "V1|" + str( asa_number ) + "|" + str(event.to_int()) + "|" + date.strftime( "%d/%m/%Y" ) + "|" + meet + "|" if asa_swim_id is not None: data_str += str( asa_swim_id ) else: data_str += "-1" data_str += "|" return data_str # Pass split_times_from_asa as a list of known split times as floats. # They can have missing splits, and be either 25m or 50m splits. # # This function will attempt to create a complete list of splits in self.splits # by guessing the distances of the provided splits and interpolating # any missing splits. # # It works most of the time but has been known to go wrong on longer IM races # because of the non-uniformity of split times. This should be fixable # with a little more thought and some heuristic data on typical split ratios # for IM legs. def fix_splits(self, split_times_from_asa): # The splits from the ASA website are often completely wrong in terms of # distance. And there are often missing splits. num_25m_splits = int( self.event.getDistance() / 25 ) any_splits_on_25m = False splits = [None] * num_25m_splits for split_time in split_times_from_asa: # Guess the distance of this split by looking at the whole race time guessed_25m_split = int( round( float( num_25m_splits ) * split_time / self.race_time ) ) - 1 #logging.info( "Split:" + str( split_time ) + "25m: " + str( guessed_25m_split ) ) if guessed_25m_split < 0: logging.error( "Split time before first 25m" ) guessed_25m_split = 0 if guessed_25m_split >= num_25m_splits: guessed_25m_split = num_25m_splits - 1 if (guessed_25m_split & 1) == 0: any_splits_on_25m = True if splits[ guessed_25m_split ] is not None: logging.error( "More than one split in same 25m segment" ) splits[ guessed_25m_split ] = Split( (guessed_25m_split + 1) * 25, split_time, False ) # Now we fill in any missing splits with interpolated values final_split = Split( self.event.getDistance(), self.race_time, False ) splits[ num_25m_splits - 1 ] = final_split previous_good_split = Split( 0, 0, False ) for i in range( 0, num_25m_splits ): if splits[ i ] is None: # Find the next known split next_good_split = final_split for j in range( i + 1, num_25m_splits - 1 ): if splits[ j ] is not None: next_good_split = splits[j]; break; # Interpolate this_split_distance = (i + 1) * 25; interp = (float(this_split_distance) - float(previous_good_split.distance)) / (float(next_good_split.distance) - float(previous_good_split.distance)) splits[ i ] = Split( this_split_distance, previous_good_split.time + (interp * (next_good_split.time - previous_good_split.time)), True ) #logging.info( "Interp: " + str( i ) + ", " + str( this_split_distance ) + ", " + str( interp ) + ", " + str( splits[ i ].time ) ) else: previous_good_split = splits[i] # Now we can transfer the splits to self if any_splits_on_25m: self.splits = splits else: num_50m_splits = num_25m_splits / 2 self.splits = [None] * num_50m_splits for i in range( 0, num_50m_splits ): self.splits[i] = splits[(i*2) + 1] #logging.info( str( self.splits[i] ) ) # Internal helper that is called when a Swim has been read from the database. # This unpacks the data that is packed into the string, so they're available to read # as member variables. # Makes heavy use of string tokenising using split. def unpack_data(self): #logging.info( self.data ) tokens = self.data.split( "|" ) num_tokens = len( tokens ) # Figure out what version data we have version = 0 if tokens[0].startswith( "V" ): version = int( tokens[0][1:] ) if version == 0: # Old version swim data, missing the version number self.asa_number = int( tokens[0] ) self.event = Event( int( tokens[1] ) ) self.date = helpers.ParseDate_dmY( tokens[2] ) self.meet = tokens[3] self.asa_swim_id = -1 if len( tokens[4] ) > 0: self.asa_swim_id = int( tokens[4] ) # Ignore any splits data in version 0 because it's most likely nonsense else: # Version 1 or higher data. # Token 0 is the version number. self.asa_number = int( tokens[1] ) self.event = Event( int( tokens[2] ) ) self.date = helpers.ParseDate_dmY( tokens[3] ) self.meet = tokens[4] self.asa_swim_id = int( tokens[5] ) # Read the splits if tokens[6] == "-": # There are no splits available from the ASA for this swim self.splits = [] else: split_times_from_asa = [] splits = tokens[6].split( "," ) if len(splits) > 1: #logging.info( "T:" + tokens[6] + "N: " + str( len(splits) ) ) for split in splits: split_times_from_asa.append( float( split ) ) self.fix_splits( split_times_from_asa ) if version != 1: # Update database with latest version data logging.info( "Upgrading swim data to latest version" ) self.repack_data() self.put() # Internal helper to re-pack self.data if we change something, like adding # splits for example. def repack_data(self): data_str = Swim.pack_data( self.asa_number, self.event, self.date, self.meet, self.asa_swim_id ) if hasattr( self, 'splits' ): if len( self.splits ) == 0: # Write a "-" to mean there are no splits for this swim available. # This is to prevent us continually going to the ASA website for splits. data_str += "-" else: first = True for split in self.splits: if not split.interpolated: if not first: data_str += "," data_str += str( split.time ) first = False self.data = data_str # Returns the asa number of the swimmer that this swim is for. def get_asa_swim_id(self): if self.asa_swim_id == -1: return None return self.asa_swim_id # Swim creation that takes a swimmer asa number and date-of-birth for cases where # we don't have a Swimmer (like when we're scraping a meet and we encounter a swimmer for # the first time, or one that needs upgrading from Cat 1) @classmethod def create(cls, asa_number, date_of_birth, event, date, meet, race_time, asa_swim_id = None): key = create_parent_key( asa_number, event ) id = compute_swim_key_id( date, asa_number, event ) age_on_day = helpers.CalcAge( date_of_birth, date ) data = cls.pack_data( asa_number, event, date, meet, asa_swim_id ) swim = cls( parent = key, id = id, age_on_day = age_on_day, race_time = race_time, data = data ) swim.unpack_data() return swim # Retrieves a swimmer's PB for an event. # Optionally pass the age that you want the PB for. This can be used to generate # club records by finding a swimmer's fastest 200 Free time when they were 11 for example. @classmethod def fetch_pb(cls, swimmer, event, aod = None ): key = create_parent_key( swimmer.asa_number, event ) if aod is None: # We want this swimmer's absolute PB swims = cls.query( ancestor=key ).order(cls.race_time).fetch(1) else: # Fetch this swimmer's PB for the specified age swims = cls.query( Swim.age_on_day == aod, ancestor=key ).order(cls.race_time).fetch(1) if (swims is not None) and (len(swims) > 0): swims[0].unpack_data() return swims[0] # Retrieves all a swimmer's swims for an event. @classmethod def fetch_all(cls, asa_number, event ): key = create_parent_key( asa_number, event ) swims = cls.query( ancestor=key ).order(cls.race_time).fetch() for swim in swims: swim.unpack_data() return swims # Convert to string, usually to send to the JS Swim constructor in swim.js def __str__(self): data_str = str( self.asa_number ) + "|" + str(self.event.to_int()) + "|" + self.date.strftime( "%d/%m/%Y" ) + "|" + self.meet + "|" + str( self.asa_swim_id ) + "|" if hasattr( self, 'splits' ): first = True previous_time = float(0) for split in self.splits: if not first: data_str += "," data_str += str( split.time - previous_time ) if split.interpolated: data_str += "I" previous_time = split.time first = False data_str += "|" + str( self.race_time ) data_str += "|" + self.create_swim_key_str() return data_str