Exemple #1
0
def collectrunningahead(aag,runningaheadfile):
#----------------------------------------------------------------------
    '''
    Collect club age grade statistics, based on collected runningahead statistics (collectrunningaheadresults)
    
    :param aag: :class:`AnalyzeAgeGrade` objects, by runner name
    :param runningaheadfile: file with runningahead results, output from runningaheadresults
    '''
    # reading runningaheadfile
    rafile = runningaheadresults.RunningAheadResultFile(runningaheadfile)
    rafile.open()
    
    # read records from runningaheadfile
    # gather each individual's result statistics, render later
    while True:
        result = rafile.next()
        if result is None: break
        
        thisname = result.name.lower()

        # initialize aag data structure, if not already done
        initaagrunner(aag,thisname,result.gender,result.dob)
    
        # collect this result
        timesecs = timeu.timesecs(result.time)
        if timesecs > 0:
            aag[thisname].add_stat(result.date,result.km*1000,timesecs,race=result.race,source='runningahead',priority=PRIO_RUNNINGAHEAD)
Exemple #2
0
def collectathlinks(aag,athlinksfile):
#----------------------------------------------------------------------
    '''
    Collect club age grade statistics, based on collected athlinks statistics (collectathlinksresults)
    
    :param aag: :class:`AnalyzeAgeGrade` objects, by runner name
    :param athlinksfile: file with athlinks results, output from athlinksresults
    '''
    # reading athlinksfile
    athlf = athlinksresults.AthlinksResultFile(athlinksfile)
    athlf.open()
    
    # read records from athlinksfile
    # gather each individual's result statistics, render later
    while True:
        result = athlf.next()
        if result is None: break
        
        thisname = result.name.lower()

        # initialize aag data structure, if not already done
        initaagrunner(aag,thisname,result.gender,result.dob)
    
        # collect this result
        timesecs = timeu.timesecs(result.resulttime)
        if timesecs > 0:
            aag[thisname].add_stat(result.racedate,result.distkm*1000,timesecs,race=result.racename,
                                   loc=result.raceloc,fuzzyage=result.fuzzyage,
                                   source='athlinks',priority=PRIO_ATHLINKS)
Exemple #3
0
def collectultrasignup(aag,ultrasignupfile):
#----------------------------------------------------------------------
    '''
    Collect club age grade statistics, based on collected ultrasignup statistics (collectultrasignupresults)
    
    :param aag: :class:`AnalyzeAgeGrade` objects, by runner name
    :param ultrasignupfile: file with ultrasignup results, output from ultrasignupresults
    '''
    # reading ultrasignupfile
    ultra = ultrasignupresults.UltraSignupResultFile(ultrasignupfile)
    ultra.open()
    
    # read records from ultrasignupfile
    # gather each individual's result statistics, render later
    while True:
        result = ultra.next()
        if result is None: break
        
        thisname = result.name.lower()

        # initialize aag data structure, if not already done
        initaagrunner(aag,thisname,result.gender,result.dob)
    
        # collect this result
        timesecs = timeu.timesecs(result.time)
        if timesecs > 0:
            aag[thisname].add_stat(result.date,result.km*1000,timesecs,race=result.race,
                                   loc=result.loc,source='ultrasignup',priority=PRIO_ULTRASIGNUP)
Exemple #4
0
 def __init__(self,name,dob,gender,racename,racedate,distmiles,distkm,resulttime,ag):
     self.name = name
     self.dob = tfile.asc2dt(dob)
     self.gender = gender
     self.racename = racename
     self.racedate = tfile.asc2dt(racedate)
     self.distmiles = float(distmiles)
     self.distkm = float(distkm)
     self.resulttime = timeu.timesecs(resulttime)
     self.ag = float(ag)
def collect(searchfile,outfile,begindate,enddate):
#----------------------------------------------------------------------
    '''
    collect race results from ultrasignup
    
    :param searchfile: path to file containing names, genders, birth dates to search for
    :param outfile: output file path
    :param begindate: epoch time - choose races between begindate and enddate
    :param enddate: epoch time - choose races between begindate and enddate
    '''
    
    # open files
    _IN = open(searchfile,'rb')
    IN = csv.DictReader(_IN)
    _OUT = open(outfile,'wb')
    OUT = csv.DictWriter(_OUT,UltraSignupResultFile.filehdr)
    OUT.writeheader()

    # common fields between input and output
    commonfields = 'GivenName,FamilyName,DOB,Gender'.split(',')

    # create ultrasignup access
    ultra = ultrasignup.UltraSignup(debug=True)

    # reset begindate to beginning of day, enddate to end of day
    dt_begindate = timeu.epoch2dt(begindate)
    adj_begindate = datetime.datetime(dt_begindate.year,dt_begindate.month,dt_begindate.day,0,0,0)
    begindate = timeu.dt2epoch(adj_begindate)
    dt_enddate = timeu.epoch2dt(enddate)
    adj_enddate = datetime.datetime(dt_enddate.year,dt_enddate.month,dt_enddate.day,23,59,59)
    enddate = timeu.dt2epoch(adj_enddate)
    
    # get today's date for high level age filter
    start = time.time()
    today = timeu.epoch2dt(start)
    
    # loop through runners in the input file
    for runner in IN:
        fname,lname = runner['GivenName'],runner['FamilyName']
        e_dob = ftime.asc2epoch(runner['DOB'])
        dt_dob = ftime.asc2dt(runner['DOB'])
        gender = runner['Gender'][0]
        
        ## skip getting results if participant too young
        #todayage = timeu.age(today,dt_dob)
        #if todayage < 14: continue
        
        # get results for this athlete
        results = ultra.listresults(fname,lname)
        
        # loop through each result
        for result in results:
            e_racedate = ftime.asc2epoch(result.racedate)
            
            # skip result if outside the desired time window
            if e_racedate < begindate or e_racedate > enddate: continue
            
            # skip result if runner's age doesn't match the age within the result
            dt_racedate = timeu.epoch2dt(e_racedate)
            racedateage = timeu.age(dt_racedate,dt_dob)
            if result.age != racedateage: continue
            
            # skip result if runner's gender doesn't match gender within the result
            resultgen = result.gender
            if resultgen != runner['Gender'][0]: continue
            
            # create output record and copy common fields
            outrec = {}
            for field in commonfields:
                outrec[field] = runner[field]
                
            # fill in output record fields from runner, result
            # combine name, get age
            outrec['name'] = '{} {}'.format(runner['GivenName'],runner['FamilyName'])
            outrec['age'] = result.age

            # race name, location; convert from unicode if necessary
            racename = result.racename
            outrec['race'] = racename
            outrec['date'] = ftime.epoch2asc(e_racedate)
            outrec['loc'] = '{}, {}'.format(result.racecity, result.racestate)
            
            # distance, category, time
            distmiles = result.distmiles
            distkm = result.distkm
            if distkm is None or distkm < 0.050: continue # should already be filtered within ultrasignup, but just in case

            outrec['miles'] = distmiles
            outrec['km'] = distkm
            resulttime = result.racetime

            # int resulttime means DNF, most likely -- skip this result
            if type(resulttime) == int: continue
            
            # strange case of TicksString = ':00'
            if resulttime[0] == ':':
                resulttime = '0'+resulttime
            while resulttime.count(':') < 2:
                resulttime = '0:'+resulttime
            outrec['time'] = resulttime

            # just leave out age grade if exception occurs
            try:
                agpercent,agresult,agfactor = ag.agegrade(racedateage,gender,distmiles,timeu.timesecs(resulttime))
                outrec['ag'] = agpercent
                if agpercent < 15 or agpercent >= 100: continue # skip obvious outliers
            except:
                pass

            OUT.writerow(outrec)
        
    _OUT.close()
    _IN.close()
    
    finish = time.time()
    print 'number of URLs retrieved = {}'.format(ultra.geturlcount())
    print 'elapsed time (min) = {}'.format((finish-start)/60)
Exemple #6
0
    def convertserviceresult(self, result):
    #----------------------------------------------------------------------
        '''
        converts a single service result to dict suitable to be saved in resultfile

        result must be converted to dict with keys in `resultfilehdr` provided at instance creation

        must be overridden when ResultsCollect is instantiated

        use return value of None for cases when results could not be filtered by `:meth:getresults`

        :param fname: participant's first name
        :param lname: participant's last name
        :param result: single service result, from list retrieved through `getresults`
        :rtype: dict with keys matching `resultfilehdr`, or None if result is not to be saved
        '''

        # create output record and copy common fields
        outrec = {}

        # copy participant information
        outrec['name'] = self.name
        outrec['GivenName'] = self.fname
        outrec['FamilyName'] = self.lname
        outrec['DOB'] = self.dob
        outrec['Gender'] = self.gender


        # some debug items - assume everything is cached
        coursecached = True
        racecached = True

        # get course used for this result
        courseid = '{}/{}'.format(result['Race']['RaceID'], result['CourseID'])
        course = Course.query.filter_by(club_id=self.club_id, source='athlinks', sourceid=courseid).first()

        # cache course if not done already
        race = None
        if not course:
            coursecached = False

            coursedata = self.service.getcourse(result['Race']['RaceID'], result['CourseID'])

            distmiles = athlinks.dist2miles(coursedata['Courses'][0]['DistUnit'],coursedata['Courses'][0]['DistTypeID'])
            distkm = athlinks.dist2km(coursedata['Courses'][0]['DistUnit'],coursedata['Courses'][0]['DistTypeID'])
            if distkm < 0.050: return None # skip timed events, which seem to be recorded with 0 distance

            # skip result if not Running or Trail Running race
            thiscategory = coursedata['Courses'][0]['RaceCatID']
            if thiscategory not in race_category: return None
        
            course = Course()
            course.club_id = self.club_id
            course.source = 'athlinks'
            course.sourceid = courseid

            # strip racename and coursename here to make sure detail file matches what is stored in database
            racename = csvu.unicode2ascii(coursedata['RaceName']).strip()
            coursename = csvu.unicode2ascii(coursedata['Courses'][0]['CourseName']).strip()
            course.name = '{} / {}'.format(racename,coursename)

            # maybe truncate to FIRST part of race name
            if len(course.name) > MAX_RACENAME_LEN:
                course.name = course.name[:MAX_RACENAME_LEN]
            
            course.date = ftime.epoch2asc(athlinks.gettime(coursedata['RaceDate']))
            course.location = csvu.unicode2ascii(coursedata['Home'])
            # maybe truncate to LAST part of location name, to keep most relevant information (state, country)
            if len(course.location) > MAX_LOCATION_LEN:
                course.location = course.location[-MAX_LOCATION_LEN:]

            # TODO: adjust marathon and half marathon distances?
            course.distkm =distkm
            course.distmiles = distmiles

            course.surface = race_category[thiscategory]

            # retrieve or add race
            # flush should allow subsequent query per http://stackoverflow.com/questions/4201455/sqlalchemy-whats-the-difference-between-flush-and-commit
            # Race has uniqueconstraint for club_id/name/year/fixeddist. It's been seen that there are additional races in athlinks, 
            # but just assume the first is the correct one.
            raceyear = ftime.asc2dt(course.date).year
            race = Race.query.filter_by(club_id=self.club_id, name=course.name, year=raceyear, fixeddist=race_fixeddist(course.distmiles)).first()
            ### TODO: should the above be .all() then check for first race within epsilon distance?
            if not race:
                racecached = False
                race = Race(self.club_id, raceyear)
                race.name = course.name
                race.distance = course.distmiles
                race.fixeddist = race_fixeddist(race.distance)
                race.date = course.date
                race.active = True
                race.external = True
                race.surface = course.surface
                loc = self.locsvr.getlocation(course.location)
                race.locationid = loc.id
                db.session.add(race)
                db.session.flush()  # force id to be created

            course.raceid = race.id
            db.session.add(course)
            db.session.flush()      # force id to be created

        # maybe course was cached but location of race wasn't
        # update location of result race, if needed, and if supplied
        # this is here to clean up old database data
        if not race:
            race = Race.query.filter_by(club_id=self.club_id, name=course.name, year=ftime.asc2dt(course.date).year, fixeddist=race_fixeddist(course.distmiles)).first()
        if not race.locationid and course.location:
            # app.logger.debug('updating race with location {}'.format(course.location))
            loc = self.locsvr.getlocation(course.location)
            race.locationid = loc.id
            insert_or_update(db.session, Race, race, skipcolumns=['id'], 
                             club_id=self.club_id, name=course.name, year=ftime.asc2dt(course.date).year, fixeddist=race_fixeddist(course.distmiles))
        # else:
        #     app.logger.debug('race.locationid={} course.location="{}"'.format(race.locationid, course.location))

        # debug races
        if self.racefile:
            racestatusl = []
            if not coursecached: racestatusl.append('addcourse')
            if not racecached: racestatusl.append('addrace')
            if not racestatusl: racestatusl.append('cached')
            racestatus = '-'.join(racestatusl)
            racerow = {'status': racestatus, 'runner': self.name}

            for racefield in self.racefields:
                if racefield in ['status', 'runner']: continue
                racerow[racefield] = getattr(course,racefield)
            self.RACE.writerow(racerow)


        # fill in output record fields from result, course
        # combine name, get age
        outrec['age'] = result['Age']
        outrec['fuzzyage'] = result['fuzzyage']

        # leave athlid blank if result not from an athlink member
        athlmember = result['IsMember']
        if athlmember:
            outrec['athlid'] = result['RacerID']

        # remember the entryid, high water mark of which can be used to limit the work here
        outrec['entryid'] = result['EntryID']

        # race name, location; convert from unicode if necessary
        # TODO: make function to do unicode translation -- apply to runner name as well (or should csv just store unicode?)
        outrec['race'] = course.name
        outrec['date'] = course.date
        outrec['loc'] = course.location
        
        outrec['miles'] = course.distmiles
        outrec['km'] = course.distkm
        outrec['category'] = course.surface
        resulttime = result['TicksString']

        # strange case of TicksString = ':00'
        if resulttime[0] == ':':
            resulttime = '0'+resulttime
        while resulttime.count(':') < 2:
            resulttime = '0:'+resulttime
        outrec['time'] = resulttime
        
        # strange case of 0 time, causes ZeroDivisionError and is clearly not valid
        if timeu.timesecs(resulttime) == 0: return None

        # leave out age grade if exception occurs, skip results which have outliers
        try:
            # skip result if runner's age doesn't match the age within the result
            # sometimes athlinks stores the age group of the runner, not exact age,
            # so also check if this runner's age is within the age group, and indicate if so
            e_racedate = athlinks.gettime(result['Race']['RaceDate'])
            resultgen = result['Gender'][0]
            dt_racedate = timeu.epoch2dt(e_racedate)
            racedateage = timeu.age(dt_racedate,self.dt_dob)
            agpercent,agresult,agfactor = ag.agegrade(racedateage,resultgen,course.distmiles,timeu.timesecs(resulttime))
            outrec['ag'] = agpercent
            if agpercent < 15 or agpercent >= 100: return None # skip obvious outliers
        except:
            app.logger.warning(traceback.format_exc())
            pass

        # and we're done
        return outrec
Exemple #7
0
def collect(searchfile, outfile, begindate, enddate):
    #----------------------------------------------------------------------
    '''
    collect race results from ultrasignup
    
    :param searchfile: path to file containing names, genders, birth dates to search for
    :param outfile: output file path
    :param begindate: epoch time - choose races between begindate and enddate
    :param enddate: epoch time - choose races between begindate and enddate
    '''

    # open files
    _IN = open(searchfile, 'rb')
    IN = csv.DictReader(_IN)
    _OUT = open(outfile, 'wb')
    OUT = csv.DictWriter(_OUT, UltraSignupResultFile.filehdr)
    OUT.writeheader()

    # common fields between input and output
    commonfields = 'GivenName,FamilyName,DOB,Gender'.split(',')

    # create ultrasignup access
    ultra = ultrasignup.UltraSignup(debug=True)

    # reset begindate to beginning of day, enddate to end of day
    dt_begindate = timeu.epoch2dt(begindate)
    adj_begindate = datetime.datetime(dt_begindate.year, dt_begindate.month,
                                      dt_begindate.day, 0, 0, 0)
    begindate = timeu.dt2epoch(adj_begindate)
    dt_enddate = timeu.epoch2dt(enddate)
    adj_enddate = datetime.datetime(dt_enddate.year, dt_enddate.month,
                                    dt_enddate.day, 23, 59, 59)
    enddate = timeu.dt2epoch(adj_enddate)

    # get today's date for high level age filter
    start = time.time()
    today = timeu.epoch2dt(start)

    # loop through runners in the input file
    for runner in IN:
        fname, lname = runner['GivenName'], runner['FamilyName']
        e_dob = ftime.asc2epoch(runner['DOB'])
        dt_dob = ftime.asc2dt(runner['DOB'])
        gender = runner['Gender'][0]

        ## skip getting results if participant too young
        #todayage = timeu.age(today,dt_dob)
        #if todayage < 14: continue

        # get results for this athlete
        results = ultra.listresults(fname, lname)

        # loop through each result
        for result in results:
            e_racedate = ftime.asc2epoch(result.racedate)

            # skip result if outside the desired time window
            if e_racedate < begindate or e_racedate > enddate: continue

            # skip result if runner's age doesn't match the age within the result
            dt_racedate = timeu.epoch2dt(e_racedate)
            racedateage = timeu.age(dt_racedate, dt_dob)
            if result.age != racedateage: continue

            # skip result if runner's gender doesn't match gender within the result
            resultgen = result.gender
            if resultgen != runner['Gender'][0]: continue

            # create output record and copy common fields
            outrec = {}
            for field in commonfields:
                outrec[field] = runner[field]

            # fill in output record fields from runner, result
            # combine name, get age
            outrec['name'] = '{} {}'.format(runner['GivenName'],
                                            runner['FamilyName'])
            outrec['age'] = result.age

            # race name, location; convert from unicode if necessary
            racename = result.racename
            outrec['race'] = racename
            outrec['date'] = ftime.epoch2asc(e_racedate)
            outrec['loc'] = '{}, {}'.format(result.racecity, result.racestate)

            # distance, category, time
            distmiles = result.distmiles
            distkm = result.distkm
            if distkm is None or distkm < 0.050:
                continue  # should already be filtered within ultrasignup, but just in case

            outrec['miles'] = distmiles
            outrec['km'] = distkm
            resulttime = result.racetime

            # int resulttime means DNF, most likely -- skip this result
            if type(resulttime) == int: continue

            # strange case of TicksString = ':00'
            if resulttime[0] == ':':
                resulttime = '0' + resulttime
            while resulttime.count(':') < 2:
                resulttime = '0:' + resulttime
            outrec['time'] = resulttime

            # just leave out age grade if exception occurs
            try:
                agpercent, agresult, agfactor = ag.agegrade(
                    racedateage, gender, distmiles, timeu.timesecs(resulttime))
                outrec['ag'] = agpercent
                if agpercent < 15 or agpercent >= 100:
                    continue  # skip obvious outliers
            except:
                pass

            OUT.writerow(outrec)

    _OUT.close()
    _IN.close()

    finish = time.time()
    print 'number of URLs retrieved = {}'.format(ultra.geturlcount())
    print 'elapsed time (min) = {}'.format((finish - start) / 60)
    def convertserviceresult(self, result):
    #----------------------------------------------------------------------
        '''
        converts a single service result to dict suitable to be saved in resultfile

        result must be converted to dict with keys in `resultfilehdr` provided at instance creation

        must be overridden when ResultsCollect is instantiated

        use return value of None for cases when results could not be filtered by `:meth:getresults`

        :param fname: participant's first name
        :param lname: participant's last name
        :param result: single service result, from list retrieved through `getresults`
        :rtype: dict with keys matching `resultfilehdr`, or None if result is not to be saved
        '''

        # create output record and copy common fields
        outrec = {}

        # copy participant information
        outrec['name'] = self.name
        outrec['GivenName'] = self.fname
        outrec['FamilyName'] = self.lname
        outrec['DOB'] = self.dob
        outrec['Gender'] = self.gender

        # get race name, strip white space
        racename = result['race'].strip()
        # maybe truncate to FIRST part of race name
        if len(racename) > MAX_RACENAME_LEN:
            racename = racename[:MAX_RACENAME_LEN]
            
        outrec['race'] = racename
        outrec['date'] = result['date']
        outrec['loc'] = ''
        if len(outrec['loc']) > MAX_LOCATION_LEN:
            outrec['loc'] = outrec['loc'][:MAX_LOCATION_LEN]
        
        # distance, category, time
        distmiles = result['miles']
        distkm = result['km']
        if distkm is None or distkm < 0.050: return None # should already be filtered within runningahead, but just in case

        outrec['miles'] = distmiles
        outrec['km'] = distkm
        resulttime = result['time']

        # what about surface? would require retrieving course and who knows if asphalt is set correctly?

        # strange case of TicksString = ':00'
        if resulttime[0] == ':':
            resulttime = '0'+resulttime
        while resulttime.count(':') < 2:
            resulttime = '0:'+resulttime
        outrec['time'] = resulttime
        outrec['timesecs'] = timeu.timesecs(resulttime)

        # retrieve or add race
        # flush should allow subsequent query per http://stackoverflow.com/questions/4201455/sqlalchemy-whats-the-difference-between-flush-and-commit
        # Race has uniqueconstraint for club_id/name/year/fixeddist. 
        racecached = True
        raceyear = ftime.asc2dt(result['date']).year
        race = Race.query.filter_by(club_id=self.club_id, name=racename, year=raceyear, fixeddist=race_fixeddist(distmiles)).first()
        ### TODO: should the above be .all() then check for first race within epsilon distance?
        if not race:
            racecached = False
            race = Race(self.club_id, raceyear)
            race.name = racename
            race.distance = distmiles
            race.fixeddist = race_fixeddist(race.distance)
            race.date = result['date']
            race.active = True
            race.external = True
            race.surface = 'trail'  # a guess here, but we really don't know
            db.session.add(race)
            db.session.flush()  # force id to be created

        # age is on date of race
        dt_racedate = ftime.asc2dt(race.date)
        racedateage = timeu.age(dt_racedate, self.dt_dob)
        outrec['age'] = racedateage

        # leave out age grade if exception occurs, skip results which have outliers
        try:
            resultgen = result['Gender'][0].upper()
            agpercent,agresult,agfactor = ag.agegrade(racedateage, resultgen, distmiles, timeu.timesecs(resulttime))
            outrec['ag'] = agpercent
            if agpercent < 15 or agpercent >= 100: return None # skip obvious outliers
        except:
            app.logger.warning(traceback.format_exc())
            pass

        # and we're done
        return outrec
Exemple #9
0
def collect(searchfile,outfile,begindate,enddate):
#----------------------------------------------------------------------
    '''
    collect race results from athlinks
    
    :param searchfile: path to file containing names, genders, birth dates to search for
    :param outfile: output file path
    :param begindate: epoch time - choose races between begindate and enddate
    :param enddate: epoch time - choose races between begindate and enddate
    '''
    
    # open files
    _IN = open(searchfile,'rb')
    IN = csv.DictReader(_IN)
    _OUT = open(outfile,'wb')
    OUT = csv.DictWriter(_OUT,resultfilehdr)
    OUT.writeheader()

    # common fields between input and output
    commonfields = 'GivenName,FamilyName,DOB,Gender'.split(',')

    # create athlinks
    athl = athlinks.Athlinks(debug=True)

    # reset begindate to beginning of day, enddate to end of day
    dt_begindate = timeu.epoch2dt(begindate)
    adj_begindate = datetime.datetime(dt_begindate.year,dt_begindate.month,dt_begindate.day,0,0,0)
    begindate = timeu.dt2epoch(adj_begindate)
    dt_enddate = timeu.epoch2dt(enddate)
    adj_enddate = datetime.datetime(dt_enddate.year,dt_enddate.month,dt_enddate.day,23,59,59)
    enddate = timeu.dt2epoch(adj_enddate)
    
    # get today's date for high level age filter
    start = time.time()
    today = timeu.epoch2dt(start)
    
    # loop through runners in the input file
    for runner in IN:
        name = ' '.join([runner['GivenName'],runner['FamilyName']])
        e_dob = ftime.asc2epoch(runner['DOB'])
        dt_dob = ftime.asc2dt(runner['DOB'])
        
        ## skip getting results if participant too young
        #todayage = timeu.age(today,dt_dob)
        #if todayage < 14: continue
        
        # get results for this athlete
        results = athl.listathleteresults(name)
        
        # loop through each result
        for result in results:
            e_racedate = athlinks.gettime(result['Race']['RaceDate'])
            
            # skip result if outside the desired time window
            if e_racedate < begindate or e_racedate > enddate: continue
            
            # create output record and copy common fields
            outrec = {}
            for field in commonfields:
                outrec[field] = runner[field]
                
            # skip result if runner's age doesn't match the age within the result
            # sometimes athlinks stores the age group of the runner, not exact age,
            # so also check if this runner's age is within the age group, and indicate if so
            dt_racedate = timeu.epoch2dt(e_racedate)
            racedateage = timeu.age(dt_racedate,dt_dob)
            resultage = int(result['Age'])
            if resultage != racedateage:
                # if results are not stored as age group, skip this result
                if (resultage/5)*5 != resultage:
                    continue
                # result's age might be age group, not exact age
                else:
                    # if runner's age consistent with race age, use result, but mark "fuzzy"
                    if (racedateage/5)*5 == resultage:
                        outrec['fuzzyage'] = 'Y'
                    # otherwise skip result
                    else:
                        continue
            
            # skip result if runner's gender doesn't match gender within the result
            resultgen = result['Gender'][0]
            if resultgen != runner['Gender'][0]: continue
            
            # get course used for this result
            course = athl.getcourse(result['Race']['RaceID'],result['CourseID'])
            
            # skip result if not Running or Trail Running race
            thiscategory = course['Courses'][0]['RaceCatID']
            if thiscategory not in race_category: continue
            
            # fill in output record fields from runner, result, course
            # combine name, get age
            outrec['name'] = '{} {}'.format(runner['GivenName'],runner['FamilyName'])
            outrec['age'] = result['Age']

            # leave athlmember and athlid blank if result not from an athlink member
            athlmember = result['IsMember']
            if athlmember:
                outrec['athlmember'] = 'Y'
                outrec['athlid'] = result['RacerID']

            # race name, location; convert from unicode if necessary
            # TODO: make function to do unicode translation -- apply to runner name as well (or should csv just store unicode?)
            racename = csvu.unicode2ascii(course['RaceName'])
            coursename = csvu.unicode2ascii(course['Courses'][0]['CourseName'])
            outrec['race'] = '{} / {}'.format(racename,coursename)
            outrec['date'] = ftime.epoch2asc(athlinks.gettime(course['RaceDate']))
            outrec['loc'] = csvu.unicode2ascii(course['Home'])
            
            # distance, category, time
            distmiles = athlinks.dist2miles(course['Courses'][0]['DistUnit'],course['Courses'][0]['DistTypeID'])
            distkm = athlinks.dist2km(course['Courses'][0]['DistUnit'],course['Courses'][0]['DistTypeID'])
            if distkm < 0.050: continue # skip timed events, which seem to be recorded with 0 distance

            outrec['miles'] = distmiles
            outrec['km'] = distkm
            outrec['category'] = race_category[thiscategory]
            resulttime = result['TicksString']

            # strange case of TicksString = ':00'
            if resulttime[0] == ':':
                resulttime = '0'+resulttime
            while resulttime.count(':') < 2:
                resulttime = '0:'+resulttime
            outrec['time'] = resulttime

            # just leave out age grade if exception occurs
            try:
                agpercent,agresult,agfactor = ag.agegrade(racedateage,resultgen,distmiles,timeu.timesecs(resulttime))
                outrec['ag'] = agpercent
                if agpercent < 15 or agpercent >= 100: continue # skip obvious outliers
            except:
                pass

            OUT.writerow(outrec)
        
    _OUT.close()
    _IN.close()
    
    finish = time.time()
    print 'number of URLs retrieved = {}'.format(athl.geturlcount())
    print 'elapsed time (min) = {}'.format((finish-start)/60)
Exemple #10
0
# set up services to collect and store data from
normstoreattrs = 'runnername,dob,gender,sourceid,sourceresultid,racename,date,raceloc,raceage,distmiles,time,timesecs,fuzzyage'.split(',')
collectservices = {}
storeservices = {}

## athlinks handling
from athlinksresults import AthlinksCollect, AthlinksResultFile
athl = AthlinksCollect()
collectservices['athlinks'] = athl.collect
athlinksattrs = 'name,dob,gender,id,entryid,racename,racedate,raceloc,age,distmiles,resulttime,timesecs,fuzzyage'.split(',')
athlinkstransform = dict(zip(normstoreattrs, athlinksattrs))
# dates come in as datetime, reset to ascii
# athlinkstransform['dob'] = lambda row: ftime.dt2asc(getattr(row, 'dob'))
# athlinkstransform['date'] = lambda row: ftime.dt2asc(getattr(row, 'racedate'))
athlinkstransform['timesecs'] = lambda row: timesecs(getattr(row, 'resulttime'))    # not provided by athlinks
athlresults = AthlinksResultFile()
storeservices['athlinks'] = StoreServiceResults('athlinks', athlresults, athlinkstransform)

## ultrasignup handling
from ultrasignupresults import UltraSignupCollect, UltraSignupResultFile
us = UltraSignupCollect()
collectservices['ultrasignup'] = us.collect
usattrs = 'name,dob,gender,sourceid,sourceresultid,race,date,raceloc,age,miles,time,timesecs,fuzzyage'.split(',')
ustransform = dict(zip(normstoreattrs, usattrs))
ustransform['sourceid'] = lambda row: None
ustransform['sourceresultid'] = lambda row: None
ustransform['fuzzyage'] = lambda row: False
usresults = UltraSignupResultFile()
storeservices['ultrasignup'] = StoreServiceResults('ultrasignup', usresults, ustransform)