def filtermissed(missed, racedate, resultage): #---------------------------------------------------------------------- ''' filter missed matches which are greater than a configured max age delta also filter missed matches which were in the exclusions table :param missed: list of missed matches, as returned from clubmember.xxx().getmissedmatches() :param racedate: race date in dbdate format :param age: resultage from race result, if None, '', 0, empty list is returned :rtype: missed list, including only elements within the allowed age range ''' # make a local copy in case the caller wants to preserve the original list localmissed = missed[:] # if age in result is invalid, empty list is returned if not resultage: return [] racedatedt = dbdate.asc2dt(racedate) for thismissed in missed: # don't consider 'missed matches' where age difference from result is too large dobdt = dbdate.asc2dt(thismissed['dob']) if abs(timeu.age(racedatedt, dobdt) - resultage) > AGE_DELTAMAX: localmissed.remove(thismissed) else: resultname = thismissed['name'] runnername = thismissed['dbname'] ascdob = thismissed['dob'] return localmissed
def filtermissed(missed,racedate,resultage): #---------------------------------------------------------------------- ''' filter missed matches which are greater than a configured max age delta also filter missed matches which were in the exclusions table :param missed: list of missed matches, as returned from clubmember.xxx().getmissedmatches() :param racedate: race date in dbdate format :param age: resultage from race result, if None, '', 0, empty list is returned :rtype: missed list, including only elements within the allowed age range ''' # make a local copy in case the caller wants to preserve the original list localmissed = missed[:] # if age in result is invalid, empty list is returned if not resultage: return [] racedatedt = dbdate.asc2dt(racedate) for thismissed in missed: # don't consider 'missed matches' where age difference from result is too large dobdt = dbdate.asc2dt(thismissed['dob']) if abs(timeu.age(racedatedt,dobdt) - resultage) > AGE_DELTAMAX: localmissed.remove(thismissed) else: resultname = thismissed['name'] runnername = thismissed['dbname'] ascdob = thismissed['dob'] return localmissed
def _getdivision(member): ''' gets division as of Jan 1 :param member: Member record :rtype: division text ''' # use local time today = time.time() - time.timezone todaydt = epoch2dt(today) jan1 = datetime(todaydt.year, 1, 1) memberage = age(jan1, member.dob) # this must match grand prix configuration in membership database # TODO: add api to query this information from scoretility if memberage <= 13: div = '13 and under' elif memberage <= 29: div = '14-29' elif memberage <= 39: div = '30-39' elif memberage <= 49: div = '40-49' elif memberage <= 59: div = '50-59' elif memberage <= 69: div = '60-69' else: div = '70 and over' return div
def _getdivision(member): #---------------------------------------------------------------------- ''' gets division as of Jan 1 from RunningAheadMember record :param member: RunningAheadMember record :rtype: division text ''' # use local time today = time.time()-time.timezone todaydt = timeu.epoch2dt(today) jan1 = datetime(todaydt.year, 1, 1) memberage = timeu.age(jan1, ymd.asc2dt(member.dob)) # this must match grand prix configuration in membership database # TODO: add api to query this information from scoretility if memberage <= 13: div = '13 and under' elif memberage <= 29: div = '14-29' elif memberage <= 39: div = '30-39' elif memberage <= 49: div = '40-49' elif memberage <= 59: div = '50-59' elif memberage <= 69: div = '60-69' else: div = '70 and over' return div
def get(self): try: name = request.args['name'] localuser = LocalUser.query.filter_by( name=name, active=True, **localinterest_query_params()).one() member = RacingTeamMember.query.filter_by( localuser=localuser, **localinterest_query_params()).one() racedatedt = isodate.asc2dt(request.args['racedate']) memberage = age(racedatedt, member.dateofbirth) dist = request.args['dist'] units = request.args['units'] time = request.args['time'] # convert marathon and half marathon to exact miles if (dist == 26.2 and units == 'miles') or (dist == 42.2 and units == 'km'): dist = 26.2188 elif (dist == 13.1 and units == 'miles') or (dist == 21.1 and units == 'km'): dist = 13.1094 # convert dist to miles elif units == 'km': dist = float(dist) / 1.609344 # convert parameters to query string theparams = { 'age': memberage, 'gender': member.gender, 'distance': dist, 'time': time, } # get age grade data response = requests.get( f'https://scoretility.com/_agegrade?{urlencode(theparams)}') if response.status_code == 200: # no need to jsonify() the text, as scoretility already did that return response.text else: # need ERROR, to emulate error string from scoretility return jsonify( status='fail', errorfield='server response', errordetail='ERROR,bad response from agegrade fetch') except Exception as e: exc = ''.join(format_exception_only(type(e), e)) output_result = { 'status': 'fail', 'error': 'exception occurred:\n{}'.format(exc) } current_app.logger.error(format_exc()) return jsonify(output_result)
def getresults(self, name, fname, lname, gender, dt_dob, begindate, enddate): #---------------------------------------------------------------------- ''' retrieves a list of results for a single name must be overridden when ResultsCollect is instantiated use dt_dob to filter errant race results, based on age of runner on race day :param name: name of participant for which results are to be returned :param fname: first name of participant :param lname: last name of participant :param gender: 'M' or 'F' :param dt_dob: participant's date of birth, as datetime :param begindate: epoch time for start of results, 00:00:00 on date to begin :param end: epoch time for end of results, 23:59:59 on date to finish :rtype: list of serviceresults, each of which can be processed by convertresult ''' # remember participant data self.name = name self.fname = fname self.lname = lname self.gender = gender self.dt_dob = dt_dob self.dob = ftime.dt2asc(dt_dob) # get results for this athlete allresults = self.service.listresults(fname,lname) # filter by date and by age filteredresults = [] for result in allresults: e_racedate = ftime.asc2epoch(result.racedate) # skip result if outside the desired time window if e_racedate < begindate or e_racedate > enddate: continue # skip result if runner's age doesn't match the age within the result dt_racedate = timeu.epoch2dt(e_racedate) racedateage = timeu.age(dt_racedate,dt_dob) if result.age != racedateage: continue # skip result if runner's gender doesn't match gender within the result resultgen = result.gender if resultgen != gender: continue # if we reach here, the result is ok, and is added to filteredresults filteredresults.append(result) # back to caller return filteredresults
def get(self): try: name = request.args['name'] localuser = LocalUser.query.filter_by( name=name, active=True, **localinterest_query_params()).one() member = RacingTeamMember.query.filter_by( localuser=localuser, **localinterest_query_params()).one() racedatedt = isodate.asc2dt(request.args['racedate']) memberage = age(racedatedt, member.dateofbirth) return jsonify(status='success', age=memberage, gender=member.gender) except Exception as e: exc = ''.join(format_exception_only(type(e), e)) output_result = { 'status': 'fail', 'error': 'exception occurred:\n{}'.format(exc) } current_app.logger.error(format_exc()) return jsonify(output_result)
def rendermissed(missed, racedate): #---------------------------------------------------------------------- ''' render missed matches :param missed: list of missed matches, as returned from clubmember.xxx().getmissedmatches() :param racedate: race date in dbdate format :rtype: renderable missed list ''' racedatedt = dbdate.asc2dt(racedate) rtnval = '' for thismissed in missed: dobdt = dbdate.asc2dt(thismissed['dob']) missedage = timeu.age(racedatedt, dobdt) rtnval += '{}({}), '.format(thismissed['dbname'], missedage) if len(rtnval) > 0: rtnval = rtnval[:-2] # remove trailing comma/space return rtnval
def rendermissed(missed,racedate): #---------------------------------------------------------------------- ''' render missed matches :param missed: list of missed matches, as returned from clubmember.xxx().getmissedmatches() :param racedate: race date in dbdate format :rtype: renderable missed list ''' racedatedt = dbdate.asc2dt(racedate) rtnval = '' for thismissed in missed: dobdt = dbdate.asc2dt(thismissed['dob']) missedage = timeu.age(racedatedt,dobdt) rtnval += '{}({}), '.format(thismissed['dbname'],missedage) if len(rtnval) > 0: rtnval = rtnval[:-2] # remove trailing comma/space return rtnval
def splitresults(self, FH, debugfile=None): #---------------------------------------------------------------------- ''' split input file into separate output files output files are named based on Race, Date fields of input file :param FH: DictReader object containing race results :param debugfile: name of optional debug file ''' # create debug file if specified if debugfile: _DEB = open(debugfile, 'w', newline='') debughdrs = 'date,race,resname,membername,dob'.split(',') DEB = csv.DictWriter(_DEB, debughdrs) DEB.writeheader() try: # fields in destination results file resultsfields = 'place,name,gender,age,time'.split(',') # only use valid filename characters. See http://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename-in-python valid_chars = "-_.() {}{}".format(string.ascii_letters, string.digits) # each line produces a separate file for race in FH: racedate = tymd.dt2asc(tmdy.asc2dt(race['Date'])) racename = race['Race'] origfname = '{}-{}.csv'.format(racedate, racename) fname = ''.join(c for c in origfname if c in valid_chars) with open(fname, 'w', newline='') as _RFH: RFH = csv.DictWriter(_RFH, resultsfields) RFH.writeheader() place = 1 raceresults = race['Athletes / Results'] athleteresults = raceresults.split(', ') for athleteresult in athleteresults: athletetime = athleteresult.split(' (')[0] # find time at the end of the string, one or more digits, any number of :, any number of . ressplit = re.search('(.+)\s(([0-9]+:*.*)+)', athletetime) thisathlete = ressplit.group(1) thistime = ressplit.group(2) # look up athlete's dob and gender member = self.members.getmember(thisathlete) if not member: raise memberError( 'could not find {} - see race {}'.format( thisathlete, race)) bestmember = member['matchingmembers'][0] membername = bestmember['name'] membergen = bestmember['gender'][ 0] # just first character memberdob = bestmember['dob'] # debug if debugfile: DEB.writerow( dict( list( zip(debughdrs, [ racedate, racename, thisathlete, membername, memberdob ])))) # output result age = timeu.age(tymd.asc2dt(racedate), tymd.asc2dt(memberdob)) RFH.writerow( dict( list( zip(resultsfields, [ place, membername, membergen, age, thistime ])))) place += 1 finally: # debug if debugfile: _DEB.close()
def main(): #---------------------------------------------------------------------- ''' update club membership information ''' parser = argparse.ArgumentParser(version='{0} {1}'.format('runningclub',version.__version__)) parser.add_argument('memberfile',help='csv, xls or xlsx file with member information') parser.add_argument('-r','--racedb',help='filename of race database (default is as configured during rcuserconfig)',default=None) parser.add_argument('--debug',help='if set, create updatemembers.txt for debugging',action='store_true') args = parser.parse_args() OUT = None if args.debug: OUT = open('updatemembers.txt','w') racedb.setracedb(args.racedb) session = racedb.Session() # get clubmembers from file memberfile = args.memberfile root,ext = os.path.splitext(memberfile) if ext in ['.xls','.xlsx']: members = clubmember.XlClubMember(memberfile) elif ext in ['.csv']: members = clubmember.CsvClubMember(memberfile) else: print '***ERROR: invalid memberfile {}, must be csv, xls or xlsx'.format(memberfile) return # get old clubmembers from database dbmembers = clubmember.DbClubMember() # use default database # get all the member runners currently in the database # hash them into dict by (name,dateofbirth) allrunners = session.query(racedb.Runner).filter_by(member=True,active=True).all() inactiverunners = {} for thisrunner in allrunners: inactiverunners[thisrunner.name,thisrunner.dateofbirth] = thisrunner if OUT: OUT.write('found id={0}, runner={1}\n'.format(thisrunner.id,thisrunner)) # make report for new members found with this memberfile logdir = os.path.dirname(args.memberfile) memberfilebase = os.path.splitext(os.path.basename(args.memberfile))[0] newmemlogname = '{0}-newmem.csv'.format(memberfilebase) NEWMEM = open(os.path.join(logdir,newmemlogname),'wb') NEWMEMCSV = csv.DictWriter(NEWMEM,['name','dob']) NEWMEMCSV.writeheader() # prepare for age check thisyear = timeu.epoch2dt(time.time()).year asofasc = '{}-1-1'.format(thisyear) # jan 1 of current year asof = tYmd.asc2dt(asofasc) # process each name in new membership list allmembers = members.getmembers() for name in allmembers: thesemembers = allmembers[name] # NOTE: may be multiple members with same name for thismember in thesemembers: thisname = thismember['name'] thisdob = thismember['dob'] thisgender = thismember['gender'][0].upper() # male -> M, female -> F thishometown = thismember['hometown'] # prep for if .. elif below by running some queries # handle close matches, if DOB does match age = timeu.age(asof,tYmd.asc2dt(thisdob)) matchingmember = dbmembers.findmember(thisname,age,asofasc) dbmember = None if matchingmember: membername,memberdob = matchingmember if memberdob == thisdob: dbmember = racedb.getunique(session,racedb.Runner,member=True,name=membername,dateofbirth=thisdob) # TODO: need to handle case where dob transitions from '' to actual date of birth # no member found, maybe there is nonmember of same name already in database if dbmember is None: dbnonmember = racedb.getunique(session,racedb.Runner,member=False,name=thisname) # TODO: there's a slim possibility that there are two nonmembers with the same name, but I'm sure we've already # bolloxed that up in importresult as there's no way to discriminate between the two # make report for new members NEWMEMCSV.writerow({'name':thisname,'dob':thisdob}) # see if this runner is a member in the database already, or was a member once and make the update # add or update runner in database # get instance, if it exists, and make any updates found = False if dbmember is not None: thisrunner = racedb.Runner(membername,thisdob,thisgender,thishometown) # this is also done down below, but must be done here in case member's name has changed if (thisrunner.name,thisrunner.dateofbirth) in inactiverunners: inactiverunners.pop((thisrunner.name,thisrunner.dateofbirth)) # overwrite member's name if necessary thisrunner.name = thisname added = racedb.update(session,racedb.Runner,dbmember,thisrunner,skipcolumns=['id']) found = True # if runner's name is in database, but not a member, see if this runner is a nonmemember which can be converted # Check first result for age against age within the input file # if ages match, convert nonmember to member elif dbnonmember is not None: # get dt for date of birth, if specified try: dob = tYmd.asc2dt(thisdob) except ValueError: dob = None # nonmember came into the database due to a nonmember race result, so we can use any race result to check nonmember's age if dob: result = session.query(racedb.RaceResult).filter_by(runnerid=dbnonmember.id).first() resultage = result.agage racedate = tYmd.asc2dt(result.race.date) expectedage = racedate.year - dob.year - int((racedate.month, racedate.day) < (dob.month, dob.day)) # we found the right person, always if dob isn't specified, but preferably check race result for correct age if dob is None or resultage == expectedage: thisrunner = racedb.Runner(thisname,thisdob,thisgender,thishometown) added = racedb.update(session,racedb.Runner,dbnonmember,thisrunner,skipcolumns=['id']) found = True else: print '{} found in database, wrong age, expected {} found {} in {}'.format(thisname,expectedage,resultage,result) # TODO: need to make file for these, also need way to force update, because maybe bad date in database for result # currently this will cause a new runner entry # if runner was not found in database, just insert new runner if not found: thisrunner = racedb.Runner(thisname,thisdob,thisgender,thishometown) added = racedb.insert_or_update(session,racedb.Runner,thisrunner,skipcolumns=['id'],name=thisname,dateofbirth=thisdob) # remove this runner from collection of runners which should be deactivated in database if (thisrunner.name,thisrunner.dateofbirth) in inactiverunners: inactiverunners.pop((thisrunner.name,thisrunner.dateofbirth)) if OUT: if added: OUT.write('added or updated {0}\n'.format(thisrunner)) else: OUT.write('no updates necessary {0}\n'.format(thisrunner)) # any runners remaining in 'inactiverunners' should be deactivated for (name,dateofbirth) in inactiverunners: thisrunner = session.query(racedb.Runner).filter_by(name=name,dateofbirth=dateofbirth).first() # should be only one returned by filter thisrunner.active = False if OUT: OUT.write('deactivated {0}\n'.format(thisrunner)) session.commit() session.close() NEWMEM.close() if OUT: OUT.close()
def collect(searchfile,outfile,begindate,enddate): #---------------------------------------------------------------------- ''' collect race results from ultrasignup :param searchfile: path to file containing names, genders, birth dates to search for :param outfile: output file path :param begindate: epoch time - choose races between begindate and enddate :param enddate: epoch time - choose races between begindate and enddate ''' # open files _IN = open(searchfile,'rb') IN = csv.DictReader(_IN) _OUT = open(outfile,'wb') OUT = csv.DictWriter(_OUT,UltraSignupResultFile.filehdr) OUT.writeheader() # common fields between input and output commonfields = 'GivenName,FamilyName,DOB,Gender'.split(',') # create ultrasignup access ultra = ultrasignup.UltraSignup(debug=True) # reset begindate to beginning of day, enddate to end of day dt_begindate = timeu.epoch2dt(begindate) adj_begindate = datetime.datetime(dt_begindate.year,dt_begindate.month,dt_begindate.day,0,0,0) begindate = timeu.dt2epoch(adj_begindate) dt_enddate = timeu.epoch2dt(enddate) adj_enddate = datetime.datetime(dt_enddate.year,dt_enddate.month,dt_enddate.day,23,59,59) enddate = timeu.dt2epoch(adj_enddate) # get today's date for high level age filter start = time.time() today = timeu.epoch2dt(start) # loop through runners in the input file for runner in IN: fname,lname = runner['GivenName'],runner['FamilyName'] e_dob = ftime.asc2epoch(runner['DOB']) dt_dob = ftime.asc2dt(runner['DOB']) gender = runner['Gender'][0] ## skip getting results if participant too young #todayage = timeu.age(today,dt_dob) #if todayage < 14: continue # get results for this athlete results = ultra.listresults(fname,lname) # loop through each result for result in results: e_racedate = ftime.asc2epoch(result.racedate) # skip result if outside the desired time window if e_racedate < begindate or e_racedate > enddate: continue # skip result if runner's age doesn't match the age within the result dt_racedate = timeu.epoch2dt(e_racedate) racedateage = timeu.age(dt_racedate,dt_dob) if result.age != racedateage: continue # skip result if runner's gender doesn't match gender within the result resultgen = result.gender if resultgen != runner['Gender'][0]: continue # create output record and copy common fields outrec = {} for field in commonfields: outrec[field] = runner[field] # fill in output record fields from runner, result # combine name, get age outrec['name'] = '{} {}'.format(runner['GivenName'],runner['FamilyName']) outrec['age'] = result.age # race name, location; convert from unicode if necessary racename = result.racename outrec['race'] = racename outrec['date'] = ftime.epoch2asc(e_racedate) outrec['loc'] = '{}, {}'.format(result.racecity, result.racestate) # distance, category, time distmiles = result.distmiles distkm = result.distkm if distkm is None or distkm < 0.050: continue # should already be filtered within ultrasignup, but just in case outrec['miles'] = distmiles outrec['km'] = distkm resulttime = result.racetime # int resulttime means DNF, most likely -- skip this result if type(resulttime) == int: continue # strange case of TicksString = ':00' if resulttime[0] == ':': resulttime = '0'+resulttime while resulttime.count(':') < 2: resulttime = '0:'+resulttime outrec['time'] = resulttime # just leave out age grade if exception occurs try: agpercent,agresult,agfactor = ag.agegrade(racedateage,gender,distmiles,timeu.timesecs(resulttime)) outrec['ag'] = agpercent if agpercent < 15 or agpercent >= 100: continue # skip obvious outliers except: pass OUT.writerow(outrec) _OUT.close() _IN.close() finish = time.time() print 'number of URLs retrieved = {}'.format(ultra.geturlcount()) print 'elapsed time (min) = {}'.format((finish-start)/60)
def post(self): #---------------------------------------------------------------------- def allowed_file(filename): return '.' in filename and filename.split('.')[-1] in ['csv','xlsx','xls'] try: club_id = flask.session['club_id'] thisyear = flask.session['year'] readcheck = ViewClubDataPermission(club_id) writecheck = UpdateClubDataPermission(club_id) # verify user can write the data, otherwise abort if not writecheck.can(): db.session.rollback() flask.abort(403) # if using api, collect data from api and save in temp directory useapi = request.args.get('useapi')=='true' # if we're using the api, do some quick checks that the request makes sense # save apitype, apiid, apikey, apisecret for later if useapi: thisclub = Club.query.filter_by(id=club_id).first() apitype = thisclub.memberserviceapi apiid = thisclub.memberserviceid if not apitype or not apiid: db.session.rollback() cause = 'Unexpected Error: API requested but not configured' app.logger.error(cause) return failure_response(cause=cause) thisapi = ApiCredentials.query.filter_by(name=apitype).first() if not thisapi: db.session.rollback() cause = "Unexpected Error: API credentials for '{}' not configured".format(apitype) app.logger.error(cause) return failure_response(cause=cause) apikey = thisapi.key apisecret = thisapi.secret if not apikey or not apisecret: db.session.rollback() cause = "Unexpected Error: API credentials for '{}' not configured with key or secret".format(apitype) app.logger.error(cause) return failure_response(cause=cause) # if we're not using api, file came in with request else: memberfile = request.files['file'] # get file extention root,ext = os.path.splitext(memberfile.filename) # make sure valid file if not memberfile: db.session.rollback() cause = 'Unexpected Error: Missing file' app.logger.error(cause) return failure_response(cause=cause) if not allowed_file(memberfile.filename): db.session.rollback() cause = 'Invalid file type {} for file {}'.format(ext,memberfile.filename) app.logger.error(cause) return failure_response(cause=cause) # get all the member runners currently in the database # hash them into dict by (name,dateofbirth) allrunners = Runner.query.filter_by(club_id=club_id,member=True,active=True).all() inactiverunners = {} for thisrunner in allrunners: inactiverunners[thisrunner.name,thisrunner.dateofbirth] = thisrunner # if some members exist, verify user wants to overwrite if allrunners and not request.args.get('force')=='true': db.session.rollback() return failure_response(cause='Overwrite members?',confirm=True) # if we're using the api, collect the member information using the appropriate credentials # NOTE: only runsignup supported at this time if useapi: tempdir = tempfile.mkdtemp() memberfilename = 'members.csv' ext = '.csv' memberpathname = os.path.join(tempdir,memberfilename) rsu_members2csv(apiid, apikey, apisecret, rsu_api2filemapping, filepath=memberpathname) else: # save file for import tempdir = tempfile.mkdtemp() memberfilename = secure_filename(memberfile.filename) memberpathname = os.path.join(tempdir,memberfilename) memberfile.save(memberpathname) # bring in data from the file if ext in ['.xls','.xlsx']: members = clubmember.XlClubMember(memberpathname) elif ext in ['.csv']: members = clubmember.CsvClubMember(memberpathname) # how did this happen? check allowed_file() for bugs else: db.session.rollback() cause = 'Program Error: Invalid file type {} for file {} path {} (unexpected)'.format(ext,memberfilename,memberpathname) app.logger.error(cause) return failure_response(cause=cause) # remove file and temporary directory os.remove(memberpathname) try: os.rmdir(tempdir) # no idea why this can happen; hopefully doesn't happen on linux except WindowsError,e: app.logger.debug('WindowsError exception ignored: {}'.format(e)) # get old clubmembers from database dbmembers = clubmember.DbClubMember(club_id=club_id) # use default database # prepare for age check thisyear = timeu.epoch2dt(time.time()).year asofasc = '{}-1-1'.format(thisyear) # jan 1 of current year asof = tYmd.asc2dt(asofasc) # process each name in new membership list allmembers = members.getmembers() for name in allmembers: thesemembers = allmembers[name] # NOTE: may be multiple members with same name for thismember in thesemembers: thisname = thismember['name'] thisfname = thismember['fname'] thislname = thismember['lname'] thisdob = thismember['dob'] thisgender = thismember['gender'][0].upper() # male -> M, female -> F thishometown = thismember['hometown'] thisrenewdate = thismember['renewdate'] thisexpdate = thismember['expdate'] # prep for if .. elif below by running some queries # handle close matches, if DOB does match age = timeu.age(asof,tYmd.asc2dt(thisdob)) matchingmember = dbmembers.findmember(thisname,age,asofasc) dbmember = None if matchingmember: membername,memberdob = matchingmember if memberdob == thisdob: dbmember = racedb.getunique(db.session,Runner,club_id=club_id,member=True,name=membername,dateofbirth=thisdob) # TODO: need to handle case where dob transitions from '' to actual date of birth # no member found, maybe there is nonmember of same name already in database if dbmember is None: dbnonmember = racedb.getunique(db.session,Runner,club_id=club_id,member=False,name=thisname) # TODO: there's a slim possibility that there are two nonmembers with the same name, but I'm sure we've already # bolloxed that up in importresult as there's no way to discriminate between the two ## make report for new members #NEWMEMCSV.writerow({'name':thisname,'dob':thisdob}) # see if this runner is a member in the database already, or was a member once and make the update # add or update runner in database # get instance, if it exists, and make any updates found = False if dbmember is not None: thisrunner = Runner(club_id,membername,thisdob,thisgender,thishometown, fname=thisfname,lname=thislname, renewdate=thisrenewdate,expdate=thisexpdate) # this is also done down below, but must be done here in case member's name has changed if (thisrunner.name,thisrunner.dateofbirth) in inactiverunners: inactiverunners.pop((thisrunner.name,thisrunner.dateofbirth)) # overwrite member's name if necessary thisrunner.name = thisname added = racedb.update(db.session,Runner,dbmember,thisrunner,skipcolumns=['id']) found = True # if runner's name is in database, but not a member, see if this runner is a nonmemember which can be converted # Check first result for age against age within the input file # if ages match, convert nonmember to member elif dbnonmember is not None: # get dt for date of birth, if specified try: dob = tYmd.asc2dt(thisdob) except ValueError: dob = None # nonmember came into the database due to a nonmember race result, so we can use any race result to check nonmember's age if dob: result = RaceResult.query.filter_by(runnerid=dbnonmember.id).first() resultage = result.agage racedate = tYmd.asc2dt(result.race.date) expectedage = timeu.age(racedate,dob) #expectedage = racedate.year - dob.year - int((racedate.month, racedate.day) < (dob.month, dob.day)) # we found the right person, always if dob isn't specified, but preferably check race result for correct age if dob is None or resultage == expectedage: thisrunner = Runner(club_id,thisname,thisdob,thisgender,thishometown, fname=thisfname,lname=thislname, renewdate=thisrenewdate,expdate=thisexpdate) added = racedb.update(db.session,Runner,dbnonmember,thisrunner,skipcolumns=['id']) found = True else: app.logger.warning('{} found in database, wrong age, expected {} found {} in {}'.format(thisname,expectedage,resultage,result)) # TODO: need to make file for these, also need way to force update, because maybe bad date in database for result # currently this will cause a new runner entry # if runner was not found in database, just insert new runner if not found: thisrunner = Runner(club_id,thisname,thisdob,thisgender,thishometown, fname=thisfname,lname=thislname, renewdate=thisrenewdate,expdate=thisexpdate) added = racedb.insert_or_update(db.session,Runner,thisrunner,skipcolumns=['id'],club_id=club_id,name=thisname,dateofbirth=thisdob) # remove this runner from collection of runners which should be deactivated in database if (thisrunner.name,thisrunner.dateofbirth) in inactiverunners: inactiverunners.pop((thisrunner.name,thisrunner.dateofbirth)) # any runners remaining in 'inactiverunners' should be deactivated for (name,dateofbirth) in inactiverunners: thisrunner = Runner.query.filter_by(club_id=club_id,name=name,dateofbirth=dateofbirth).first() # should be only one returned by filter thisrunner.active = False # commit database updates and close transaction db.session.commit() return success_response()
# home grown from . import version from loutilities.transform import Transform from loutilities.timeu import asctime, age from datetime import date from collections import defaultdict, OrderedDict # time stuff tymd = asctime('%Y-%m-%d') # transform DETAILS file produced by scoretility Results Analysis xform = Transform( { 'name' : 'runnername', 'gender' : 'gender', 'age' : lambda result: age(date.today(), tymd.asc2dt(result['dob'])), 'distmiles' : 'distmiles', 'ag' : lambda result: int(float(result['agpercent'])), 'year' : lambda result: tymd.asc2dt(result['racedate']).year }, sourceattr=False, targetattr=True) # # from https://gist.github.com/shenwei356/71dcc393ec4143f3447d # # from: http://stackoverflow.com/questions/651794/whats-the-best-way-to-initialize-a-dict-of-dicts-in-python # #---------------------------------------------------------------------- # def ddict(): # #---------------------------------------------------------------------- # return defaultdict(ddict) #######################################################################
def getresults(self, name, fname, lname, gender, dt_dob, begindate, enddate): #---------------------------------------------------------------------- ''' retrieves a list of results for a single name must be overridden when ResultsCollect is instantiated use dt_dob to filter errant race results, based on age of runner on race day :param name: name of participant for which results are to be returned :param fname: first name of participant :param lname: last name of participant :param gender: 'M' or 'F' :param dt_dob: participant's date of birth, as datetime :param begindate: epoch time for start of results, 00:00:00 on date to begin :param end: epoch time for end of results, 23:59:59 on date to finish :rtype: list of serviceresults, each of which can be processed by convertresult ''' # remember participant data self.name = name self.fname = fname self.lname = lname self.gender = gender self.dt_dob = dt_dob self.dob = ftime.dt2asc(dt_dob) # find this user foundmember = False for user,rauser in self.rausers: if 'givenName' not in rauser or 'birthDate' not in rauser: continue # we need to know the name and birth date givenName = rauser['givenName'] if 'givenName' in rauser else '' familyName = rauser['familyName'] if 'familyName' in rauser else '' rausername = '******'.format(givenName,familyName) if rausername == name and dt_dob == ftime.asc2dt(rauser['birthDate']): foundmember = True app.logger.debug('found {}'.format(name)) break if not foundmember: return [] # if we're here, found the right user, now let's look at the workouts a_begindate = ftime.epoch2asc(begindate) a_enddate = ftime.epoch2asc(enddate) workouts = self.service.listworkouts(user['token'],begindate=a_begindate,enddate=a_enddate,getfields=FIELD['workout'].keys()) # get race results for this athlete results = [] if workouts: for wo in workouts: if wo['workoutName'].lower() != 'race': continue if 'duration' not in wo['details']: continue # seen once, not sure why thisdate = wo['date'] dt_thisdate = ftime.asc2dt(thisdate) thisdist = runningahead.dist2meters(wo['details']['distance']) thistime = wo['details']['duration'] thisrace = wo['course']['name'] if wo.has_key('course') else 'unknown' if thistime == 0: log.warning('{} has 0 time for {} {}'.format(name,thisrace,thisdate)) continue stat = {'GivenName':fname,'FamilyName':lname,'name':name, 'DOB':self.dob,'Gender':gender,'race':thisrace,'date':thisdate,'age':timeu.age(dt_thisdate,dt_dob), 'miles':thisdist/METERSPERMILE,'km':thisdist/1000.0,'time':render.rendertime(thistime,0)} results.append(stat) # already filtered by date and by age # send results back to caller return results
def convertserviceresult(self, result): #---------------------------------------------------------------------- ''' converts a single service result to dict suitable to be saved in resultfile result must be converted to dict with keys in `resultfilehdr` provided at instance creation must be overridden when ResultsCollect is instantiated use return value of None for cases when results could not be filtered by `:meth:getresults` :param fname: participant's first name :param lname: participant's last name :param result: single service result, from list retrieved through `getresults` :rtype: dict with keys matching `resultfilehdr`, or None if result is not to be saved ''' # create output record and copy common fields outrec = {} # copy participant information outrec['name'] = self.name outrec['GivenName'] = self.fname outrec['FamilyName'] = self.lname outrec['DOB'] = self.dob outrec['Gender'] = self.gender # get race name, strip white space racename = result['race'].strip() # maybe truncate to FIRST part of race name if len(racename) > MAX_RACENAME_LEN: racename = racename[:MAX_RACENAME_LEN] outrec['race'] = racename outrec['date'] = result['date'] outrec['loc'] = '' if len(outrec['loc']) > MAX_LOCATION_LEN: outrec['loc'] = outrec['loc'][:MAX_LOCATION_LEN] # distance, category, time distmiles = result['miles'] distkm = result['km'] if distkm is None or distkm < 0.050: return None # should already be filtered within runningahead, but just in case outrec['miles'] = distmiles outrec['km'] = distkm resulttime = result['time'] # what about surface? would require retrieving course and who knows if asphalt is set correctly? # strange case of TicksString = ':00' if resulttime[0] == ':': resulttime = '0'+resulttime while resulttime.count(':') < 2: resulttime = '0:'+resulttime outrec['time'] = resulttime outrec['timesecs'] = timeu.timesecs(resulttime) # retrieve or add race # flush should allow subsequent query per http://stackoverflow.com/questions/4201455/sqlalchemy-whats-the-difference-between-flush-and-commit # Race has uniqueconstraint for club_id/name/year/fixeddist. racecached = True raceyear = ftime.asc2dt(result['date']).year race = Race.query.filter_by(club_id=self.club_id, name=racename, year=raceyear, fixeddist=race_fixeddist(distmiles)).first() ### TODO: should the above be .all() then check for first race within epsilon distance? if not race: racecached = False race = Race(self.club_id, raceyear) race.name = racename race.distance = distmiles race.fixeddist = race_fixeddist(race.distance) race.date = result['date'] race.active = True race.external = True race.surface = 'trail' # a guess here, but we really don't know db.session.add(race) db.session.flush() # force id to be created # age is on date of race dt_racedate = ftime.asc2dt(race.date) racedateage = timeu.age(dt_racedate, self.dt_dob) outrec['age'] = racedateage # leave out age grade if exception occurs, skip results which have outliers try: resultgen = result['Gender'][0].upper() agpercent,agresult,agfactor = ag.agegrade(racedateage, resultgen, distmiles, timeu.timesecs(resulttime)) outrec['ag'] = agpercent if agpercent < 15 or agpercent >= 100: return None # skip obvious outliers except: app.logger.warning(traceback.format_exc()) pass # and we're done return outrec
def collect(searchfile, outfile, begindate, enddate): #---------------------------------------------------------------------- ''' collect race results from ultrasignup :param searchfile: path to file containing names, genders, birth dates to search for :param outfile: output file path :param begindate: epoch time - choose races between begindate and enddate :param enddate: epoch time - choose races between begindate and enddate ''' # open files _IN = open(searchfile, 'rb') IN = csv.DictReader(_IN) _OUT = open(outfile, 'wb') OUT = csv.DictWriter(_OUT, UltraSignupResultFile.filehdr) OUT.writeheader() # common fields between input and output commonfields = 'GivenName,FamilyName,DOB,Gender'.split(',') # create ultrasignup access ultra = ultrasignup.UltraSignup(debug=True) # reset begindate to beginning of day, enddate to end of day dt_begindate = timeu.epoch2dt(begindate) adj_begindate = datetime.datetime(dt_begindate.year, dt_begindate.month, dt_begindate.day, 0, 0, 0) begindate = timeu.dt2epoch(adj_begindate) dt_enddate = timeu.epoch2dt(enddate) adj_enddate = datetime.datetime(dt_enddate.year, dt_enddate.month, dt_enddate.day, 23, 59, 59) enddate = timeu.dt2epoch(adj_enddate) # get today's date for high level age filter start = time.time() today = timeu.epoch2dt(start) # loop through runners in the input file for runner in IN: fname, lname = runner['GivenName'], runner['FamilyName'] e_dob = ftime.asc2epoch(runner['DOB']) dt_dob = ftime.asc2dt(runner['DOB']) gender = runner['Gender'][0] ## skip getting results if participant too young #todayage = timeu.age(today,dt_dob) #if todayage < 14: continue # get results for this athlete results = ultra.listresults(fname, lname) # loop through each result for result in results: e_racedate = ftime.asc2epoch(result.racedate) # skip result if outside the desired time window if e_racedate < begindate or e_racedate > enddate: continue # skip result if runner's age doesn't match the age within the result dt_racedate = timeu.epoch2dt(e_racedate) racedateage = timeu.age(dt_racedate, dt_dob) if result.age != racedateage: continue # skip result if runner's gender doesn't match gender within the result resultgen = result.gender if resultgen != runner['Gender'][0]: continue # create output record and copy common fields outrec = {} for field in commonfields: outrec[field] = runner[field] # fill in output record fields from runner, result # combine name, get age outrec['name'] = '{} {}'.format(runner['GivenName'], runner['FamilyName']) outrec['age'] = result.age # race name, location; convert from unicode if necessary racename = result.racename outrec['race'] = racename outrec['date'] = ftime.epoch2asc(e_racedate) outrec['loc'] = '{}, {}'.format(result.racecity, result.racestate) # distance, category, time distmiles = result.distmiles distkm = result.distkm if distkm is None or distkm < 0.050: continue # should already be filtered within ultrasignup, but just in case outrec['miles'] = distmiles outrec['km'] = distkm resulttime = result.racetime # int resulttime means DNF, most likely -- skip this result if type(resulttime) == int: continue # strange case of TicksString = ':00' if resulttime[0] == ':': resulttime = '0' + resulttime while resulttime.count(':') < 2: resulttime = '0:' + resulttime outrec['time'] = resulttime # just leave out age grade if exception occurs try: agpercent, agresult, agfactor = ag.agegrade( racedateage, gender, distmiles, timeu.timesecs(resulttime)) outrec['ag'] = agpercent if agpercent < 15 or agpercent >= 100: continue # skip obvious outliers except: pass OUT.writerow(outrec) _OUT.close() _IN.close() finish = time.time() print 'number of URLs retrieved = {}'.format(ultra.geturlcount()) print 'elapsed time (min) = {}'.format((finish - start) / 60)
def main(): descr = ''' Update racing team info volunteer records from csv file ''' parser = ArgumentParser(description=descr) parser.add_argument('inputfile', help='csv file with input records', default=None) args = parser.parse_args() scriptdir = dirname(__file__) # two levels up scriptfolder = dirname(dirname(scriptdir)) configdir = join(scriptfolder, 'config') memberconfigfile = "members.cfg" memberconfigpath = join(configdir, memberconfigfile) userconfigfile = "users.cfg" userconfigpath = join(configdir, userconfigfile) # create app and get configuration # use this order so members.cfg overrrides users.cfg configfiles = [userconfigpath, memberconfigpath] app = create_app(Development(configfiles), configfiles) # set up database db.init_app(app) # determine input file encoding with open(args.inputfile, 'rb') as binaryfile: rawdata = binaryfile.read() detected = detect(rawdata) # translate type from old format to new applntype = { 'Returning Racing Team Member': 'renewal', 'New Racing Team Member': 'new', } # need app context, open input file with app.app_context(), open(args.inputfile, 'r', encoding=detected['encoding'], newline='', errors='replace') as IN: # turn on logging setlogging() # trick local interest stuff g.interest = 'fsrc' # initialize database tables from input file infile = DictReader(IN) for row in infile: # this pulls timezone information off of record timestamp, formatted like 'Sun Feb 25 2018 14:07:17 GMT-0500 (EST)' timestampasc = ' '.join(row['time'].split(' ')[:-2]) timestamp = tstamp.asc2dt(timestampasc) # if we already have received an application for this name at this timestamp, skip it else we'll get duplicates applnrec = RacingTeamApplication.query.filter_by( name=row['name'], logtime=timestamp, **localinterest_query_params()).one_or_none() if applnrec: continue # at least one record doesn't have a date of birth if not row['dob']: app.logger.warning( f"racingteam_appln_init: skipping {row['name']} {row['race1-name']} {row[f'race1-date']}" ) continue # if we've gotten here, we need to add application and result records dob = isodate.asc2dt(row['dob']).date() applnrec = RacingTeamApplication( interest=localinterest(), logtime=timestamp, name=row['name'], type=applntype[row['applntype']], comments=row['comments'], dateofbirth=dob, email=row['email'], gender=row['gender'].upper()[0], ) db.session.add(applnrec) for race in ['race1', 'race2']: # originally, new members were only asked for one race # detect this condition and skip this result -- this should only happen for race2 if not row[f'{race}-date']: continue # handle case where age grade was not calculated properly # this was due to deficiency in the original script, so these should be early entries # it's not worth adding the complexity to fix this data at this point try: agegrade = float(row[f'{race}-agegrade']), agegrade = row[f'{race}-agegrade'] except ValueError: agegrade = None # calculate age racedate = isodate.asc2dt(row[f'{race}-date']).date() thisage = age(racedate, dob) # add result resultrec = RacingTeamResult( interest=localinterest(), application=applnrec, eventdate=racedate, eventname=row[f'{race}-name'], age=thisage, agegrade=agegrade, distance=row[f'{race}-distance'], units=row[f'{race}-units'], location=row[f'{race}-location'], url=row[f'{race}-resultslink'], time=row[f'{race}-time'], ) db.session.add(resultrec) db.session.commit()
def storeresults(self, thistask, status, club_id, filename): #---------------------------------------------------------------------- ''' create service accessor and open file get location if known loop through all results in accessor file, and store in database close file caller needs to `db.session.commit()` the changes :param thistask: this is required for task thistask.update_state() :param status: status for updating front end :param club_id: identifies club for which results are to be stored :param filename: name of csv file which contains service result records ''' # create service accessor and open file self.serviceaccessor.open(filename) status[self.servicename]['total'] = self.serviceaccessor.count() status[self.servicename]['processed'] = 0 # loop through all results and store in database while True: filerecord = self.serviceaccessor.next() if not filerecord: break # transform to result attributes result = Record() result.source = self.servicename # app.logger.debug('filerecord = {}'.format(filerecord.__dict__)) self.service2norm.transform(filerecord, result) # app.logger.debug('result = {}'.format(result.__dict__)) # maybe we have a record in the database which matches this one, if so update the record # otherwise create a new database record ## first get runner runner = Runner.query.filter_by(club_id=club_id, name=result.runnername, dateofbirth=result.dob, gender=result.gender).first() if not runner: raise ParameterError, "could not find runner in database: {} line {} {} {} {}".format(filename, status[self.servicename]['processed']+2, result.runnername, result.dob, result.gender) ## next get race ### Race has uniqueconstraint for club_id/name/year/fixeddist. It's been seen that there are additional races in athlinks, ### but just assume the first is the correct one. raceyear = ftime.asc2dt(result.date).year race = Race.query.filter_by(club_id=club_id, name=result.racename, year=raceyear, fixeddist=race_fixeddist(result.distmiles)).first() # races = Race.query.filter_by(club_id=club_id, name=result.racename, date=result.date, fixeddist=race_fixeddist(result.distmiles)).all() # race = None # for thisrace in races: # if abs(thisrace.distance - result.distmiles) < RACEEPSILON: # race = thisrace # break if not race: raise ParameterError, "could not find race in database: {} line {} {} {} {}".format(filename, status[self.servicename]['processed']+2, result.racename, result.date, result.distmiles) ## update or create result in database try: agage = age(ftime.asc2dt(race.date), ftime.asc2dt(runner.dateofbirth)) result.agpercent, result.agtime, result.agfactor = ag.agegrade(agage, runner.gender, result.distmiles, result.timesecs) dbresult = RaceResult(club_id, runner.id, race.id, None, result.timesecs, runner.gender, agage, instandings=False) for attr in ['agfactor', 'agtime', 'agpercent', 'source', 'sourceid', 'sourceresultid', 'fuzzyage']: setattr(dbresult,attr,getattr(result,attr)) insert_or_update(db.session, RaceResult, dbresult, skipcolumns=['id'], club_id=club_id, source=self.servicename, runnerid=runner.id, raceid=race.id) # maybe user is trying to cancel except SystemExit: raise # otherwise just log and ignore result except: app.logger.warning('exception for "{}", result ignored, processing {} result {}\n{}'.format(runner.name, self.servicename, result.__dict__, traceback.format_exc())) # update the number of results processed and pass back the status status[self.servicename]['lastname'] = result.runnername status[self.servicename]['processed'] += 1 thistask.update_state(state='PROGRESS', meta={'progress':status}) # finished reading results, close input file self.serviceaccessor.close()
def collect(searchfile, outfile, begindate, enddate): #---------------------------------------------------------------------- ''' collect race results from runningahead :param searchfile: path to file containing names, genders, birth dates to search for :param outfile: output file path :param begindate: epoch time - choose races between begindate and enddate :param enddate: epoch time - choose races between begindate and enddate ''' outfilehdr = 'GivenName,FamilyName,name,DOB,Gender,race,date,age,miles,km,time'.split( ',') # open files _IN = open(searchfile, 'rb') IN = csv.DictReader(_IN) _OUT = open(outfile, 'wb') OUT = csv.DictWriter(_OUT, outfilehdr) OUT.writeheader() # common fields between input and output commonfields = 'GivenName,FamilyName,DOB,Gender'.split(',') # create runningahead access, grab users who have used the steeplechasers.org portal to RA ra = runningahead.RunningAhead() users = ra.listusers() rausers = [] for user in users: rauser = ra.getuser(user['token']) rausers.append((user, rauser)) # reset begindate to beginning of day, enddate to end of day dt_begindate = timeu.epoch2dt(begindate) a_begindate = fdate.dt2asc(dt_begindate) adj_begindate = datetime.datetime(dt_begindate.year, dt_begindate.month, dt_begindate.day, 0, 0, 0) e_begindate = timeu.dt2epoch(adj_begindate) dt_enddate = timeu.epoch2dt(enddate) a_enddate = fdate.dt2asc(dt_enddate) adj_enddate = datetime.datetime(dt_enddate.year, dt_enddate.month, dt_enddate.day, 23, 59, 59) e_enddate = timeu.dt2epoch(adj_enddate) # get today's date for high level age filter start = time.time() today = timeu.epoch2dt(start) # loop through runners in the input file for runner in IN: fname, lname = runner['GivenName'], runner['FamilyName'] membername = '{} {}'.format(fname, lname) log.debug('looking for {}'.format(membername)) e_dob = fdate.asc2epoch(runner['DOB']) dt_dob = fdate.asc2dt(runner['DOB']) dob = runner['DOB'] gender = runner['Gender'][0] # find thisuser foundmember = False for user, rauser in rausers: if 'givenName' not in rauser or 'birthDate' not in rauser: continue # we need to know the name and birth date givenName = rauser['givenName'] if 'givenName' in rauser else '' familyName = rauser['familyName'] if 'familyName' in rauser else '' rausername = '******'.format(givenName, familyName) if rausername == membername and dt_dob == fdate.asc2dt( rauser['birthDate']): foundmember = True log.debug('found {}'.format(membername)) break # member is not this ra user, keep looking # if we couldn't find this member in RA, try the next member if not foundmember: continue ## skip getting results if participant too young #todayage = timeu.age(today,dt_dob) #if todayage < 14: continue # if we're here, found the right user, now let's look at the workouts workouts = ra.listworkouts(user['token'], begindate=a_begindate, enddate=a_enddate, getfields=FIELD['workout'].keys()) # save race workouts, if any found results = [] if workouts: for wo in workouts: if wo['workoutName'].lower() != 'race': continue if 'duration' not in wo['details']: continue # seen once, not sure why thisdate = wo['date'] dt_thisdate = fdate.asc2dt(thisdate) thisdist = runningahead.dist2meters(wo['details']['distance']) thistime = wo['details']['duration'] thisrace = wo['course']['name'] if wo.has_key( 'course') else 'unknown' if thistime == 0: log.warning('{} has 0 time for {} {}'.format( membername, thisrace, thisdate)) continue stat = { 'GivenName': fname, 'FamilyName': lname, 'name': membername, 'DOB': dob, 'Gender': gender, 'race': thisrace, 'date': thisdate, 'age': timeu.age(dt_thisdate, dt_dob), 'miles': thisdist / METERSPERMILE, 'km': thisdist / 1000.0, 'time': render.rendertime(thistime, 0) } results.append(stat) # loop through each result for result in results: e_racedate = fdate.asc2epoch(result['date']) # skip result if outside the desired time window if e_racedate < begindate or e_racedate > enddate: continue # create output record and copy fields outrec = result resulttime = result['time'] # strange case of TicksString = ':00' if resulttime[0] == ':': resulttime = '0' + resulttime while resulttime.count(':') < 2: resulttime = '0:' + resulttime outrec['time'] = resulttime OUT.writerow(outrec) _OUT.close() _IN.close() finish = time.time() print 'elapsed time (min) = {}'.format((finish - start) / 60)
def render(aag,outfile,summaryfile,detailfile,minagegrade,minraces,mintrend,begindate,enddate): #---------------------------------------------------------------------- ''' render collected results :param outfile: output file name template, like '{who}-ag-analysis-{date}-{time}.png' :param summaryfile: summary file name template (.csv), may include {date} field :param detailfile: summary file name template (.csv), may include {date} field :param minagegrade: minimum age grade :param minraces: minimum races in the same year as enddate :param mintrend: minimum races over the full period for trendline :param begindate: render races between begindate and enddate, datetime :param enddate: render races between begindate and enddate, datetime ''' firstyear = begindate.year lastyear = enddate.year yearrange = range(firstyear,lastyear+1) summfields = ['name','age','gender'] distcategories = ['overall'] + [TRENDLIMITS[tlimit][0] for tlimit in TRENDLIMITS] for stattype in ['1yr agegrade','avg agegrade','trend','numraces','stderr','r-squared','pvalue']: for distcategory in distcategories: summfields.append('{}\n{}'.format(stattype,distcategory)) if stattype == 'numraces': for year in yearrange: summfields.append('{}\n{}'.format(stattype,year)) tfile = timeu.asctime('%Y-%m-%d') summaryfname = summaryfile.format(date=tfile.epoch2asc(time.time())) _SUMM = open(summaryfname,'wb') SUMM = csv.DictWriter(_SUMM,summfields) SUMM.writeheader() detailfname = detailfile.format(date=tfile.epoch2asc(time.time())) detlfields = ['name','dob','gender'] + analyzeagegrade.AgeGradeStat.attrs + ['distmiles','distkm','rendertime'] detlfields.remove('priority') # priority is internal _DETL = open(detailfname,'wb') DETL = csv.DictWriter(_DETL,detlfields,extrasaction='ignore') DETL.writeheader() # create a figure used for everyone -- required to save memory fig = plt.figure() # loop through each member we've recorded information about for thisname in aag: rendername = thisname.title() # remove duplicate entries aag[thisname].deduplicate() # crunch the numbers, and remove entries less than minagegrade aag[thisname].crunch() # calculate age grade for each result stats = aag[thisname].get_stats() #for stat in stats: # if stat.ag < minagegrade: # aag[thisname].del_stat(stat) # write detailed file before filtering name,gender,dob = aag[thisname].get_runner() detlout = {'name':rendername,'gender':gender,'dob':tfile.dt2asc(dob)} for stat in stats: for attr in analyzeagegrade.AgeGradeStat.attrs: detlout[attr] = getattr(stat,attr) if attr == 'date': detlout[attr] = tfile.dt2asc(detlout[attr]) # interpret some of the data from the raw stat detlout['distkm'] = detlout['dist'] / 1000.0 detlout['distmiles'] = detlout['dist']/METERSPERMILE rendertime = ren.rendertime(detlout['time'],0) while len(rendertime.split(':')) < 3: rendertime = '0:'+rendertime detlout['rendertime'] = rendertime DETL.writerow(detlout) jan1 = tfile.asc2dt('{}-1-1'.format(lastyear)) runnerage = timeu.age(jan1,dob) # filter out runners younger than 14 if runnerage < 14: continue # filter out runners who have not run enough races stats = aag[thisname].get_stats() if enddate: lastyear = enddate.year else: lastyear = timeu.epoch2dt(time.time()).year lastyearstats = [s for s in stats if s.date.year==lastyear] if len(lastyearstats) < minraces: continue # set up output file name template if outfile: aag[thisname].set_renderfname(outfile) # set up rendering parameters aag[thisname].set_xlim(begindate,enddate) aag[thisname].set_ylim(minagegrade,100) aag[thisname].set_colormap([200,100*METERSPERMILE]) # clear figure, set up axes fig.clear() ax = fig.add_subplot(111) # render the results aag[thisname].render_stats(fig) # plot statistics # set up to collect averages avg = collections.OrderedDict() # draw trendlines, write output allstats = aag[thisname].get_stats() avg['overall'] = mean([s.ag for s in allstats]) trend = aag[thisname].render_trendline(fig,'overall',color='k') # retrieve output filename for hyperlink # must be called after set_runner and set_renderfname thisoutfile = aag[thisname].get_outfilename() summout = {} summout['name'] = '=HYPERLINK("{}","{}")'.format(thisoutfile,rendername) summout['age'] = runnerage summout['gender'] = gender oneyrstats = [s.ag for s in allstats if s.date.year == lastyear] if len(oneyrstats) > 0: summout['1yr agegrade\noverall'] = mean(oneyrstats) summout['avg agegrade\noverall'] = avg['overall'] if len(allstats) >= mintrend: summout['trend\noverall'] = trend.slope summout['stderr\noverall'] = trend.stderr summout['r-squared\noverall'] = trend.rvalue**2 summout['pvalue\noverall'] = trend.pvalue summout['numraces\noverall'] = len(allstats) for year in yearrange: summout['numraces\n{}'.format(year)] = len([s for s in allstats if s.date.year==year]) for tlimit in TRENDLIMITS: distcategory,distcolor = TRENDLIMITS[tlimit] tstats = [s for s in allstats if s.dist >= tlimit[0] and s.dist <= tlimit[1]] if len(tstats) < mintrend: continue avg[distcategory] = mean([s.ag for s in tstats]) trend = aag[thisname].render_trendline(fig,distcategory,thesestats=tstats,color=distcolor) oneyrcategory = [s.ag for s in tstats if s.date.year == lastyear] if len(oneyrcategory) > 0: summout['1yr agegrade\n{}'.format(distcategory)] = mean(oneyrcategory) summout['avg agegrade\n{}'.format(distcategory)] = avg[distcategory] summout['trend\n{}'.format(distcategory)] = trend.slope summout['stderr\n{}'.format(distcategory)] = trend.stderr summout['r-squared\n{}'.format(distcategory)] = trend.rvalue**2 summout['pvalue\n{}'.format(distcategory)] = trend.pvalue summout['numraces\n{}'.format(distcategory)] = len(tstats) SUMM.writerow(summout) # annotate with averages avgstr = 'averages\n' for lab in avg: thisavg = int(round(avg[lab])) avgstr += ' {}: {}%\n'.format(lab,thisavg) avgstr += 'age (1/1/{}): {}'.format(lastyear,runnerage) # TODO: add get_*lim() to aag -- xlim and ylim are currently side-effect of aag.render_stats() x1,xn = ax.get_xlim() y1,yn = ax.get_ylim() xy = (x1+10,y1+10) aag[thisname].render_annotate(fig,avgstr,xy) # save file aag[thisname].save(fig) _SUMM.close() _DETL.close()
def getresults(self, name, fname, lname, gender, dt_dob, begindate, enddate): #---------------------------------------------------------------------- ''' retrieves a list of results for a single name must be overridden when ResultsCollect is instantiated use dt_dob to filter errant race results, based on age of runner on race day :param name: name of participant for which results are to be returned :param fname: first name of participant :param lname: last name of participant :param gender: 'M' or 'F' :param dt_dob: participant's date of birth, as datetime :param begindate: epoch time for start of results, 00:00:00 on date to begin :param end: epoch time for end of results, 23:59:59 on date to finish :rtype: list of serviceresults, each of which can be processed by convertresult ''' # remember participant data self.name = name self.fname = fname self.lname = lname self.gender = gender self.dt_dob = dt_dob self.dob = ftime.dt2asc(dt_dob) # get results for this athlete allresults = self.service.listathleteresults(name) # filter by date and by age filteredresults = [] for result in allresults: e_racedate = athlinks.gettime(result['Race']['RaceDate']) # skip result if outside the desired time window if e_racedate < begindate or e_racedate > enddate: continue # skip result if wrong gender resultgen = result['Gender'][0] if resultgen != gender: continue # skip result if runner's age doesn't match the age within the result # sometimes athlinks stores the age group of the runner, not exact age, # so also check if this runner's age is within the age group, and indicate if so dt_racedate = timeu.epoch2dt(e_racedate) racedateage = timeu.age(dt_racedate,dt_dob) resultage = int(result['Age']) result['fuzzyage'] = False if resultage != racedateage: # if results are not stored as age group, skip this result if (resultage/5)*5 != resultage: continue # result's age might be age group, not exact age else: # if runner's age consistent with race age, use result, but mark "fuzzy" if (racedateage/5)*5 == resultage: result['fuzzyage'] = True # otherwise skip result else: continue # if we reach here, the result is ok, and is added to filteredresults filteredresults.append(result) # back to caller return filteredresults
def summarize(thistask, club_id, sources, status, summaryfile, detailfile, resultsurl, minage=12, minagegrade=20, minraces=3 , mintrend=2, numyears=3, begindate=None, enddate=None): #---------------------------------------------------------------------- ''' render collected results :param thistask: this is required for task thistask.update_state() :param club_id: identifies club for which results are to be stored :param sources: list of sources / services we're keeping status for :param summaryfile: summary file name (.csv) :param detailfile: detail file name (.csv) :param resultsurl: base url to send results to, for link in summary table :param minage: minimum age to keep track of stats :param minagegrade: minimum age grade :param minraces: minimum races in the same year as enddate :param mintrend: minimum races over the full period for trendline :param begindate: render races between begindate and enddate, datetime :param enddate: render races between begindate and enddate, datetime ''' # get club slug and location for later club = Club.query.filter_by(id=club_id).first() clubslug = club.shname locsvr = LocationServer() clublocation = locsvr.getlocation(club.location) # get maxdistance by service services = RaceResultService.query.filter_by(club_id=club_id).join(ApiCredentials).all() maxdistance = {} for service in services: attrs = ServiceAttributes(club_id, service.apicredentials.name) # app.logger.debug('service {} attrs {}'.format(service, attrs.__dict__)) if attrs.maxdistance: maxdistance[service.apicredentials.name] = attrs.maxdistance else: maxdistance[service.apicredentials.name] = None maxdistance[productname] = None # set up date range. begindate and enddate take precedence, else use numyears from today if not (begindate and enddate): etoday = time.time() today = timeu.epoch2dt(etoday) begindate = datetime(today.year-numyears+1,1,1) enddate = datetime(today.year,12,31) firstyear = begindate.year lastyear = enddate.year yearrange = range(firstyear,lastyear+1) # get all the requested result data from the database and save in a data structure indexed by runner ## first get the data from the database results = RaceResult.query.join(Race).join(Runner).filter(RaceResult.club_id==club_id, Race.date.between(ftime.dt2asc(begindate), ftime.dt2asc(enddate)), Runner.member==True, Runner.active==True).order_by(Runner.lname, Runner.fname).all() ## then set up our status and pass to the front end for source in sources: status[source]['status'] = 'summarizing' status[source]['lastname'] = '' status[source]['processed'] = 0 status[source]['total'] = sum([1 for result in results if result.source==source]) thistask.update_state(state='PROGRESS', meta={'progress':status}) ## prepare to save detail file, for debugging detlfields = 'runnername,runnerid,dob,gender,resultid,racename,racedate,series,distmiles,distkm,time,timesecs,agpercent,source,sourceid'.split(',') detailfname = detailfile _DETL = open(detailfname,'wb') DETL = csv.DictWriter(_DETL,detlfields) DETL.writeheader() ## then fill in data structure to hold AnalyzeAgeGrade objects ## use OrderedDict to force aag to be in same order as DETL file, for debugging aag = collections.OrderedDict() for result in results: # skip results which are too far away, if a maxdistance is defined for this source if maxdistance[result.source]: locationid = result.race.locationid if not locationid: continue racelocation = Location.query.filter_by(id=locationid).first() distance = get_distance(clublocation, racelocation) if distance == None or distance > maxdistance[result.source]: continue thisname = (result.runner.name.lower(), result.runner.dateofbirth) initaagrunner(aag, thisname, result.runner.fname, result.runner.lname, result.runner.gender, ftime.asc2dt(result.runner.dateofbirth), result.runner.id) # determine location name. any error gets null string locationname = '' if result.race.locationid: location = Location.query.filter_by(id=result.race.locationid).first() if location: locationname = location.name thisstat = aag[thisname].add_stat(ftime.asc2dt(result.race.date), result.race.distance*METERSPERMILE, result.time, race=result.race.name, loc=locationname, fuzzyage=result.fuzzyage, source=result.source, priority=priority[result.source]) ### TODO: store result's agpercent, in AgeGrade.crunch() skip agegrade calculation if already present DETL.writerow(dict( runnername = result.runner.name, runnerid = result.runner.id, dob = result.runner.dateofbirth, gender = result.runner.gender, resultid = result.id, racename = result.race.name, racedate = result.race.date, series = result.series.name if result.seriesid else None, distmiles = result.race.distance, distkm = result.race.distance*(METERSPERMILE/1000), timesecs = result.time, time = rendertime(result.time,0), agpercent = result.agpercent, source = result.source, sourceid = result.sourceid, )) ## close detail file _DETL.close() # initialize summary file summfields = ['name', 'lname', 'fname', 'age', 'gender'] datafields = copy(summfields) distcategories = ['overall'] + [TRENDLIMITS[tlimit][0] for tlimit in TRENDLIMITS] datacategories = ['overall'] + [TRENDLIMITS[tlimit][1] for tlimit in TRENDLIMITS] stattypes = ['1yr agegrade','avg agegrade','trend','numraces','stderr','r-squared','pvalue'] statdatatypes = ['1yr-agegrade','avg-agegrade','trend','numraces','stderr','r-squared','pvalue'] for stattype, statdatatype in zip(stattypes, statdatatypes): for distcategory, datacategory in zip(distcategories, datacategories): summfields.append('{}\n{}'.format(stattype, distcategory)) datafields.append('{}-{}'.format(statdatatype, datacategory)) if stattype == 'numraces': for year in yearrange: summfields.append('{}\n{}'.format(stattype, year)) datafields.append('{}-{}'.format(statdatatype, lastyear-year)) # save summary file columns for resultsanalysissummary dtcolumns = json.dumps([{ 'data':d, 'name':d, 'label':l } for d,l in zip(datafields, summfields)]) columnsfilename = summaryfile + '.cols' with open(columnsfilename, 'w') as cols: cols.write(dtcolumns) # set up summary file summaryfname = summaryfile _SUMM = open(summaryfname,'wb') SUMM = csv.DictWriter(_SUMM,summfields) SUMM.writeheader() # loop through each member we've recorded information about for thisname in aag: fullname, fname, lname, gender, dob, runnerid = aag[thisname].get_runner() rendername = fullname.title() # check stats before deduplicating statcount = {} stats = aag[thisname].get_stats() for source in sources: statcount[source] = sum([1 for s in stats if s.source == source]) # remove duplicate entries aag[thisname].deduplicate() # crunch the numbers aag[thisname].crunch() # calculate age grade for each result stats = aag[thisname].get_stats() jan1 = ftime.asc2dt('{}-1-1'.format(lastyear)) runnerage = timeu.age(jan1, dob) # filter out runners younger than allowed if runnerage < minage: continue # filter out runners who have not run enough races stats = aag[thisname].get_stats() if enddate: lastyear = enddate.year else: lastyear = timeu.epoch2dt(time.time()).year lastyearstats = [s for s in stats if s.date.year==lastyear] if len(lastyearstats) < minraces: continue # fill in row for summary output summout = {} # get link for this runner's results chart # see http://stackoverflow.com/questions/2506379/add-params-to-given-url-in-python url_parts = list(urlparse(resultsurl)) query = dict(parse_qsl(url_parts[4])) query.update({'club': clubslug, 'runnerid': runnerid, 'begindate': ftime.dt2asc(begindate), 'enddate': ftime.dt2asc(enddate)}) url_parts[4] = urlencode(query) resultslink = urlunparse(url_parts) summout['name'] = '<a href={} target=_blank>{}</a>'.format(resultslink, rendername) summout['fname'] = fname summout['lname'] = lname summout['age'] = runnerage summout['gender'] = gender # set up to collect averages avg = collections.OrderedDict() # draw trendlines, write output allstats = aag[thisname].get_stats() if len(allstats) > 0: avg['overall'] = mean([s.ag for s in allstats]) trend = aag[thisname].get_trendline() oneyrstats = [s.ag for s in allstats if s.date.year == lastyear] if len(oneyrstats) > 0: summout['1yr agegrade\noverall'] = mean(oneyrstats) if len(allstats) > 0: summout['avg agegrade\noverall'] = avg['overall'] if len(allstats) >= mintrend and allstats[0].date != allstats[-1].date: summout['trend\noverall'] = trend.improvement summout['stderr\noverall'] = trend.stderr summout['r-squared\noverall'] = trend.r2**2 summout['pvalue\noverall'] = trend.pvalue summout['numraces\noverall'] = len(allstats) for year in yearrange: summout['numraces\n{}'.format(year)] = len([s for s in allstats if s.date.year==year]) for tlimit in TRENDLIMITS: distcategory,distcolor = TRENDLIMITS[tlimit] tstats = [s for s in allstats if s.dist >= tlimit[0] and s.dist < tlimit[1]] if len(tstats) > 0: avg[distcategory] = mean([s.ag for s in tstats]) summout['avg agegrade\n{}'.format(distcategory)] = avg[distcategory] summout['numraces\n{}'.format(distcategory)] = len(tstats) oneyrcategory = [s.ag for s in tstats if s.date.year == lastyear] if len(oneyrcategory) > 0: summout['1yr agegrade\n{}'.format(distcategory)] = mean(oneyrcategory) if len(tstats) >= mintrend and tstats[0].date != tstats[-1].date: try: trend = aag[thisname].get_trendline(thesestats=tstats) except ZeroDivisionError: app.logger.debug('ZeroDivisionError - processing {}'.format(rendername)) trend = None # ignore trends which can't be calculated if trend: summout['trend\n{}'.format(distcategory)] = trend.improvement summout['stderr\n{}'.format(distcategory)] = trend.stderr summout['r-squared\n{}'.format(distcategory)] = trend.r2 summout['pvalue\n{}'.format(distcategory)] = trend.pvalue SUMM.writerow(summout) # update status for source in sources: status[source]['processed'] += statcount[source] status[source]['lastname'] = rendername thistask.update_state(state='PROGRESS', meta={'progress':status}) _SUMM.close()
def convertserviceresult(self, result): #---------------------------------------------------------------------- ''' converts a single service result to dict suitable to be saved in resultfile result must be converted to dict with keys in `resultfilehdr` provided at instance creation must be overridden when ResultsCollect is instantiated use return value of None for cases when results could not be filtered by `:meth:getresults` :param fname: participant's first name :param lname: participant's last name :param result: single service result, from list retrieved through `getresults` :rtype: dict with keys matching `resultfilehdr`, or None if result is not to be saved ''' # create output record and copy common fields outrec = {} # copy participant information outrec['name'] = self.name outrec['GivenName'] = self.fname outrec['FamilyName'] = self.lname outrec['DOB'] = self.dob outrec['Gender'] = self.gender # some debug items - assume everything is cached coursecached = True racecached = True # get course used for this result courseid = '{}/{}'.format(result['Race']['RaceID'], result['CourseID']) course = Course.query.filter_by(club_id=self.club_id, source='athlinks', sourceid=courseid).first() # cache course if not done already race = None if not course: coursecached = False coursedata = self.service.getcourse(result['Race']['RaceID'], result['CourseID']) distmiles = athlinks.dist2miles(coursedata['Courses'][0]['DistUnit'],coursedata['Courses'][0]['DistTypeID']) distkm = athlinks.dist2km(coursedata['Courses'][0]['DistUnit'],coursedata['Courses'][0]['DistTypeID']) if distkm < 0.050: return None # skip timed events, which seem to be recorded with 0 distance # skip result if not Running or Trail Running race thiscategory = coursedata['Courses'][0]['RaceCatID'] if thiscategory not in race_category: return None course = Course() course.club_id = self.club_id course.source = 'athlinks' course.sourceid = courseid # strip racename and coursename here to make sure detail file matches what is stored in database racename = csvu.unicode2ascii(coursedata['RaceName']).strip() coursename = csvu.unicode2ascii(coursedata['Courses'][0]['CourseName']).strip() course.name = '{} / {}'.format(racename,coursename) # maybe truncate to FIRST part of race name if len(course.name) > MAX_RACENAME_LEN: course.name = course.name[:MAX_RACENAME_LEN] course.date = ftime.epoch2asc(athlinks.gettime(coursedata['RaceDate'])) course.location = csvu.unicode2ascii(coursedata['Home']) # maybe truncate to LAST part of location name, to keep most relevant information (state, country) if len(course.location) > MAX_LOCATION_LEN: course.location = course.location[-MAX_LOCATION_LEN:] # TODO: adjust marathon and half marathon distances? course.distkm =distkm course.distmiles = distmiles course.surface = race_category[thiscategory] # retrieve or add race # flush should allow subsequent query per http://stackoverflow.com/questions/4201455/sqlalchemy-whats-the-difference-between-flush-and-commit # Race has uniqueconstraint for club_id/name/year/fixeddist. It's been seen that there are additional races in athlinks, # but just assume the first is the correct one. raceyear = ftime.asc2dt(course.date).year race = Race.query.filter_by(club_id=self.club_id, name=course.name, year=raceyear, fixeddist=race_fixeddist(course.distmiles)).first() ### TODO: should the above be .all() then check for first race within epsilon distance? if not race: racecached = False race = Race(self.club_id, raceyear) race.name = course.name race.distance = course.distmiles race.fixeddist = race_fixeddist(race.distance) race.date = course.date race.active = True race.external = True race.surface = course.surface loc = self.locsvr.getlocation(course.location) race.locationid = loc.id db.session.add(race) db.session.flush() # force id to be created course.raceid = race.id db.session.add(course) db.session.flush() # force id to be created # maybe course was cached but location of race wasn't # update location of result race, if needed, and if supplied # this is here to clean up old database data if not race: race = Race.query.filter_by(club_id=self.club_id, name=course.name, year=ftime.asc2dt(course.date).year, fixeddist=race_fixeddist(course.distmiles)).first() if not race.locationid and course.location: # app.logger.debug('updating race with location {}'.format(course.location)) loc = self.locsvr.getlocation(course.location) race.locationid = loc.id insert_or_update(db.session, Race, race, skipcolumns=['id'], club_id=self.club_id, name=course.name, year=ftime.asc2dt(course.date).year, fixeddist=race_fixeddist(course.distmiles)) # else: # app.logger.debug('race.locationid={} course.location="{}"'.format(race.locationid, course.location)) # debug races if self.racefile: racestatusl = [] if not coursecached: racestatusl.append('addcourse') if not racecached: racestatusl.append('addrace') if not racestatusl: racestatusl.append('cached') racestatus = '-'.join(racestatusl) racerow = {'status': racestatus, 'runner': self.name} for racefield in self.racefields: if racefield in ['status', 'runner']: continue racerow[racefield] = getattr(course,racefield) self.RACE.writerow(racerow) # fill in output record fields from result, course # combine name, get age outrec['age'] = result['Age'] outrec['fuzzyage'] = result['fuzzyage'] # leave athlid blank if result not from an athlink member athlmember = result['IsMember'] if athlmember: outrec['athlid'] = result['RacerID'] # remember the entryid, high water mark of which can be used to limit the work here outrec['entryid'] = result['EntryID'] # race name, location; convert from unicode if necessary # TODO: make function to do unicode translation -- apply to runner name as well (or should csv just store unicode?) outrec['race'] = course.name outrec['date'] = course.date outrec['loc'] = course.location outrec['miles'] = course.distmiles outrec['km'] = course.distkm outrec['category'] = course.surface resulttime = result['TicksString'] # strange case of TicksString = ':00' if resulttime[0] == ':': resulttime = '0'+resulttime while resulttime.count(':') < 2: resulttime = '0:'+resulttime outrec['time'] = resulttime # strange case of 0 time, causes ZeroDivisionError and is clearly not valid if timeu.timesecs(resulttime) == 0: return None # leave out age grade if exception occurs, skip results which have outliers try: # skip result if runner's age doesn't match the age within the result # sometimes athlinks stores the age group of the runner, not exact age, # so also check if this runner's age is within the age group, and indicate if so e_racedate = athlinks.gettime(result['Race']['RaceDate']) resultgen = result['Gender'][0] dt_racedate = timeu.epoch2dt(e_racedate) racedateage = timeu.age(dt_racedate,self.dt_dob) agpercent,agresult,agfactor = ag.agegrade(racedateage,resultgen,course.distmiles,timeu.timesecs(resulttime)) outrec['ag'] = agpercent if agpercent < 15 or agpercent >= 100: return None # skip obvious outliers except: app.logger.warning(traceback.format_exc()) pass # and we're done return outrec
def main(): #---------------------------------------------------------------------- ''' update club membership information ''' parser = argparse.ArgumentParser( version='{0} {1}'.format('runningclub', version.__version__)) parser.add_argument('memberfile', help='csv, xls or xlsx file with member information') parser.add_argument( '-r', '--racedb', help= 'filename of race database (default is as configured during rcuserconfig)', default=None) parser.add_argument('--debug', help='if set, create updatemembers.txt for debugging', action='store_true') args = parser.parse_args() OUT = None if args.debug: OUT = open('updatemembers.txt', 'w') racedb.setracedb(args.racedb) session = racedb.Session() # get clubmembers from file memberfile = args.memberfile root, ext = os.path.splitext(memberfile) if ext in ['.xls', '.xlsx']: members = clubmember.XlClubMember(memberfile) elif ext in ['.csv']: members = clubmember.CsvClubMember(memberfile) else: print( '***ERROR: invalid memberfile {}, must be csv, xls or xlsx'.format( memberfile)) return # get old clubmembers from database dbmembers = clubmember.DbClubMember() # use default database # get all the member runners currently in the database # hash them into dict by (name,dateofbirth) allrunners = session.query(racedb.Runner).filter_by(member=True, active=True).all() inactiverunners = {} for thisrunner in allrunners: inactiverunners[thisrunner.name, thisrunner.dateofbirth] = thisrunner if OUT: OUT.write('found id={0}, runner={1}\n'.format( thisrunner.id, thisrunner)) # make report for new members found with this memberfile logdir = os.path.dirname(args.memberfile) memberfilebase = os.path.splitext(os.path.basename(args.memberfile))[0] newmemlogname = '{0}-newmem.csv'.format(memberfilebase) NEWMEM = open(os.path.join(logdir, newmemlogname), 'w', newline='') NEWMEMCSV = csv.DictWriter(NEWMEM, ['name', 'dob']) NEWMEMCSV.writeheader() # prepare for age check thisyear = timeu.epoch2dt(time.time()).year asofasc = '{}-1-1'.format(thisyear) # jan 1 of current year asof = tYmd.asc2dt(asofasc) # process each name in new membership list allmembers = members.getmembers() for name in allmembers: thesemembers = allmembers[name] # NOTE: may be multiple members with same name for thismember in thesemembers: thisname = thismember['name'] thisdob = thismember['dob'] thisgender = thismember['gender'][0].upper( ) # male -> M, female -> F thishometown = thismember['hometown'] # prep for if .. elif below by running some queries # handle close matches, if DOB does match age = timeu.age(asof, tYmd.asc2dt(thisdob)) matchingmember = dbmembers.findmember(thisname, age, asofasc) dbmember = None if matchingmember: membername, memberdob = matchingmember if memberdob == thisdob: dbmember = racedb.getunique(session, racedb.Runner, member=True, name=membername, dateofbirth=thisdob) # TODO: need to handle case where dob transitions from '' to actual date of birth # no member found, maybe there is nonmember of same name already in database if dbmember is None: dbnonmember = racedb.getunique(session, racedb.Runner, member=False, name=thisname) # TODO: there's a slim possibility that there are two nonmembers with the same name, but I'm sure we've already # bolloxed that up in importresult as there's no way to discriminate between the two # make report for new members NEWMEMCSV.writerow({'name': thisname, 'dob': thisdob}) # see if this runner is a member in the database already, or was a member once and make the update # add or update runner in database # get instance, if it exists, and make any updates found = False if dbmember is not None: thisrunner = racedb.Runner(membername, thisdob, thisgender, thishometown) # this is also done down below, but must be done here in case member's name has changed if (thisrunner.name, thisrunner.dateofbirth) in inactiverunners: inactiverunners.pop( (thisrunner.name, thisrunner.dateofbirth)) # overwrite member's name if necessary thisrunner.name = thisname added = racedb.update(session, racedb.Runner, dbmember, thisrunner, skipcolumns=['id']) found = True # if runner's name is in database, but not a member, see if this runner is a nonmemember which can be converted # Check first result for age against age within the input file # if ages match, convert nonmember to member elif dbnonmember is not None: # get dt for date of birth, if specified try: dob = tYmd.asc2dt(thisdob) except ValueError: dob = None # nonmember came into the database due to a nonmember race result, so we can use any race result to check nonmember's age if dob: result = session.query(racedb.RaceResult).filter_by( runnerid=dbnonmember.id).first() resultage = result.agage racedate = tYmd.asc2dt(result.race.date) expectedage = racedate.year - dob.year - int( (racedate.month, racedate.day) < (dob.month, dob.day)) # we found the right person, always if dob isn't specified, but preferably check race result for correct age if dob is None or resultage == expectedage: thisrunner = racedb.Runner(thisname, thisdob, thisgender, thishometown) added = racedb.update(session, racedb.Runner, dbnonmember, thisrunner, skipcolumns=['id']) found = True else: print( '{} found in database, wrong age, expected {} found {} in {}' .format(thisname, expectedage, resultage, result)) # TODO: need to make file for these, also need way to force update, because maybe bad date in database for result # currently this will cause a new runner entry # if runner was not found in database, just insert new runner if not found: thisrunner = racedb.Runner(thisname, thisdob, thisgender, thishometown) added = racedb.insert_or_update(session, racedb.Runner, thisrunner, skipcolumns=['id'], name=thisname, dateofbirth=thisdob) # remove this runner from collection of runners which should be deactivated in database if (thisrunner.name, thisrunner.dateofbirth) in inactiverunners: inactiverunners.pop((thisrunner.name, thisrunner.dateofbirth)) if OUT: if added: OUT.write('added or updated {0}\n'.format(thisrunner)) else: OUT.write('no updates necessary {0}\n'.format(thisrunner)) # any runners remaining in 'inactiverunners' should be deactivated for (name, dateofbirth) in inactiverunners: thisrunner = session.query( racedb.Runner).filter_by(name=name, dateofbirth=dateofbirth).first( ) # should be only one returned by filter thisrunner.active = False if OUT: OUT.write('deactivated {0}\n'.format(thisrunner)) session.commit() session.close() NEWMEM.close() if OUT: OUT.close()
def collect(searchfile,outfile,begindate,enddate): #---------------------------------------------------------------------- ''' collect race results from runningahead :param searchfile: path to file containing names, genders, birth dates to search for :param outfile: output file path :param begindate: epoch time - choose races between begindate and enddate :param enddate: epoch time - choose races between begindate and enddate ''' outfilehdr = 'GivenName,FamilyName,name,DOB,Gender,race,date,age,miles,km,time'.split(',') # open files _IN = open(searchfile,'rb') IN = csv.DictReader(_IN) _OUT = open(outfile,'wb') OUT = csv.DictWriter(_OUT,outfilehdr) OUT.writeheader() # common fields between input and output commonfields = 'GivenName,FamilyName,DOB,Gender'.split(',') # create runningahead access, grab users who have used the steeplechasers.org portal to RA ra = runningahead.RunningAhead() users = ra.listusers() rausers = [] for user in users: rauser = ra.getuser(user['token']) rausers.append((user,rauser)) # reset begindate to beginning of day, enddate to end of day dt_begindate = timeu.epoch2dt(begindate) a_begindate = fdate.dt2asc(dt_begindate) adj_begindate = datetime.datetime(dt_begindate.year,dt_begindate.month,dt_begindate.day,0,0,0) e_begindate = timeu.dt2epoch(adj_begindate) dt_enddate = timeu.epoch2dt(enddate) a_enddate = fdate.dt2asc(dt_enddate) adj_enddate = datetime.datetime(dt_enddate.year,dt_enddate.month,dt_enddate.day,23,59,59) e_enddate = timeu.dt2epoch(adj_enddate) # get today's date for high level age filter start = time.time() today = timeu.epoch2dt(start) # loop through runners in the input file for runner in IN: fname,lname = runner['GivenName'],runner['FamilyName'] membername = '{} {}'.format(fname,lname) log.debug('looking for {}'.format(membername)) e_dob = fdate.asc2epoch(runner['DOB']) dt_dob = fdate.asc2dt(runner['DOB']) dob = runner['DOB'] gender = runner['Gender'][0] # find thisuser foundmember = False for user,rauser in rausers: if 'givenName' not in rauser or 'birthDate' not in rauser: continue # we need to know the name and birth date givenName = rauser['givenName'] if 'givenName' in rauser else '' familyName = rauser['familyName'] if 'familyName' in rauser else '' rausername = '******'.format(givenName,familyName) if rausername == membername and dt_dob == fdate.asc2dt(rauser['birthDate']): foundmember = True log.debug('found {}'.format(membername)) break # member is not this ra user, keep looking # if we couldn't find this member in RA, try the next member if not foundmember: continue ## skip getting results if participant too young #todayage = timeu.age(today,dt_dob) #if todayage < 14: continue # if we're here, found the right user, now let's look at the workouts workouts = ra.listworkouts(user['token'],begindate=a_begindate,enddate=a_enddate,getfields=FIELD['workout'].keys()) # save race workouts, if any found results = [] if workouts: for wo in workouts: if wo['workoutName'].lower() != 'race': continue if 'duration' not in wo['details']: continue # seen once, not sure why thisdate = wo['date'] dt_thisdate = fdate.asc2dt(thisdate) thisdist = runningahead.dist2meters(wo['details']['distance']) thistime = wo['details']['duration'] thisrace = wo['course']['name'] if wo.has_key('course') else 'unknown' if thistime == 0: log.warning('{} has 0 time for {} {}'.format(membername,thisrace,thisdate)) continue stat = {'GivenName':fname,'FamilyName':lname,'name':membername, 'DOB':dob,'Gender':gender,'race':thisrace,'date':thisdate,'age':timeu.age(dt_thisdate,dt_dob), 'miles':thisdist/METERSPERMILE,'km':thisdist/1000.0,'time':render.rendertime(thistime,0)} results.append(stat) # loop through each result for result in results: e_racedate = fdate.asc2epoch(result['date']) # skip result if outside the desired time window if e_racedate < begindate or e_racedate > enddate: continue # create output record and copy fields outrec = result resulttime = result['time'] # strange case of TicksString = ':00' if resulttime[0] == ':': resulttime = '0'+resulttime while resulttime.count(':') < 2: resulttime = '0:'+resulttime outrec['time'] = resulttime OUT.writerow(outrec) _OUT.close() _IN.close() finish = time.time() print 'elapsed time (min) = {}'.format((finish-start)/60)
# home grown import version from loutilities.transform import Transform from loutilities.timeu import asctime, age from datetime import date from collections import defaultdict, OrderedDict # time stuff tymd = asctime('%Y-%m-%d') # transform DETAILS file produced by scoretility Results Analysis xform = Transform( { 'name' : 'runnername', 'gender' : 'gender', 'age' : lambda result: age(date.today(), tymd.asc2dt(result['dob'])), 'distmiles' : 'distmiles', 'ag' : lambda result: int(float(result['agpercent'])), 'year' : lambda result: tymd.asc2dt(result['racedate']).year }, sourceattr=False, targetattr=True) # # from https://gist.github.com/shenwei356/71dcc393ec4143f3447d # # from: http://stackoverflow.com/questions/651794/whats-the-best-way-to-initialize-a-dict-of-dicts-in-python # #---------------------------------------------------------------------- # def ddict(): # #---------------------------------------------------------------------- # return defaultdict(ddict) #######################################################################
def collect(searchfile,outfile,begindate,enddate): #---------------------------------------------------------------------- ''' collect race results from athlinks :param searchfile: path to file containing names, genders, birth dates to search for :param outfile: output file path :param begindate: epoch time - choose races between begindate and enddate :param enddate: epoch time - choose races between begindate and enddate ''' # open files _IN = open(searchfile,'rb') IN = csv.DictReader(_IN) _OUT = open(outfile,'wb') OUT = csv.DictWriter(_OUT,resultfilehdr) OUT.writeheader() # common fields between input and output commonfields = 'GivenName,FamilyName,DOB,Gender'.split(',') # create athlinks athl = athlinks.Athlinks(debug=True) # reset begindate to beginning of day, enddate to end of day dt_begindate = timeu.epoch2dt(begindate) adj_begindate = datetime.datetime(dt_begindate.year,dt_begindate.month,dt_begindate.day,0,0,0) begindate = timeu.dt2epoch(adj_begindate) dt_enddate = timeu.epoch2dt(enddate) adj_enddate = datetime.datetime(dt_enddate.year,dt_enddate.month,dt_enddate.day,23,59,59) enddate = timeu.dt2epoch(adj_enddate) # get today's date for high level age filter start = time.time() today = timeu.epoch2dt(start) # loop through runners in the input file for runner in IN: name = ' '.join([runner['GivenName'],runner['FamilyName']]) e_dob = ftime.asc2epoch(runner['DOB']) dt_dob = ftime.asc2dt(runner['DOB']) ## skip getting results if participant too young #todayage = timeu.age(today,dt_dob) #if todayage < 14: continue # get results for this athlete results = athl.listathleteresults(name) # loop through each result for result in results: e_racedate = athlinks.gettime(result['Race']['RaceDate']) # skip result if outside the desired time window if e_racedate < begindate or e_racedate > enddate: continue # create output record and copy common fields outrec = {} for field in commonfields: outrec[field] = runner[field] # skip result if runner's age doesn't match the age within the result # sometimes athlinks stores the age group of the runner, not exact age, # so also check if this runner's age is within the age group, and indicate if so dt_racedate = timeu.epoch2dt(e_racedate) racedateage = timeu.age(dt_racedate,dt_dob) resultage = int(result['Age']) if resultage != racedateage: # if results are not stored as age group, skip this result if (resultage/5)*5 != resultage: continue # result's age might be age group, not exact age else: # if runner's age consistent with race age, use result, but mark "fuzzy" if (racedateage/5)*5 == resultage: outrec['fuzzyage'] = 'Y' # otherwise skip result else: continue # skip result if runner's gender doesn't match gender within the result resultgen = result['Gender'][0] if resultgen != runner['Gender'][0]: continue # get course used for this result course = athl.getcourse(result['Race']['RaceID'],result['CourseID']) # skip result if not Running or Trail Running race thiscategory = course['Courses'][0]['RaceCatID'] if thiscategory not in race_category: continue # fill in output record fields from runner, result, course # combine name, get age outrec['name'] = '{} {}'.format(runner['GivenName'],runner['FamilyName']) outrec['age'] = result['Age'] # leave athlmember and athlid blank if result not from an athlink member athlmember = result['IsMember'] if athlmember: outrec['athlmember'] = 'Y' outrec['athlid'] = result['RacerID'] # race name, location; convert from unicode if necessary # TODO: make function to do unicode translation -- apply to runner name as well (or should csv just store unicode?) racename = csvu.unicode2ascii(course['RaceName']) coursename = csvu.unicode2ascii(course['Courses'][0]['CourseName']) outrec['race'] = '{} / {}'.format(racename,coursename) outrec['date'] = ftime.epoch2asc(athlinks.gettime(course['RaceDate'])) outrec['loc'] = csvu.unicode2ascii(course['Home']) # distance, category, time distmiles = athlinks.dist2miles(course['Courses'][0]['DistUnit'],course['Courses'][0]['DistTypeID']) distkm = athlinks.dist2km(course['Courses'][0]['DistUnit'],course['Courses'][0]['DistTypeID']) if distkm < 0.050: continue # skip timed events, which seem to be recorded with 0 distance outrec['miles'] = distmiles outrec['km'] = distkm outrec['category'] = race_category[thiscategory] resulttime = result['TicksString'] # strange case of TicksString = ':00' if resulttime[0] == ':': resulttime = '0'+resulttime while resulttime.count(':') < 2: resulttime = '0:'+resulttime outrec['time'] = resulttime # just leave out age grade if exception occurs try: agpercent,agresult,agfactor = ag.agegrade(racedateage,resultgen,distmiles,timeu.timesecs(resulttime)) outrec['ag'] = agpercent if agpercent < 15 or agpercent >= 100: continue # skip obvious outliers except: pass OUT.writerow(outrec) _OUT.close() _IN.close() finish = time.time() print 'number of URLs retrieved = {}'.format(athl.geturlcount()) print 'elapsed time (min) = {}'.format((finish-start)/60)