def __update(self, sql, add): c = Connection(self.config) if not self.inactive: sql += " where Active = 1" if self.mode == Mode.NEWEVENTS: if not self.inactive: sql += ' AND (LastUpdated < dateadd(day, -3, getdate()) or LastUpdated IS NULL)' else: sql += ' WHERE (LastUpdated < dateadd(day, -3, getdate()) or LastUpdated IS NULL)' data = c.execute(sql) for row in data: if add: if row['Parkrun'] not in self.__parkruns: self.logger.debug('Adding event {}'.format(row['Parkrun'])) self.__parkruns[row['Parkrun']] = { 'Name': row['Parkrun'], 'URL': row['URL'], 'EventURL': row['URL'].split('/')[3], 'lastEvent': row['LastEventNumber'], 'EventHistoryURL': row['EventHistoryURL'], 'EventNumberURL': row['EventNumberURL'], 'LatestResultsURL': row['LatestResultsURL'] } else: if row['Parkrun'] in self.__parkruns: self.logger.debug('Removing event {}'.format( row['Parkrun'])) del self.__parkruns[row['Parkrun']] data = c.execute("SELECT * FROM getParkrunCancellationsThisWeek") if self.mode == Mode.NEWEVENTS: for row in data: if row['Parkrun'] in self.__parkruns: self.logger.debug('Removing cancelled event {}'.format( row['Parkrun'])) del self.__parkruns[row['Parkrun']]
) parser.add_argument( '--delay', type=int, default=5, help='Wait n seconds before processing the next athlete') args = parser.parse_args() logger.debug(args) limit = args.limit delay = args.delay data = c.execute( "select * from getAthleteCheckHistoryList({}) ORDER BY EventCount DESC, NextCheckDate, HistoryLastChecked DESC, AthleteID" .format(limit)) baseURL = "http://www.parkrun.com.au/results/athleteeventresultshistory/?athleteNumber={}&eventNumber=0" counter = 0 start = timer() inQ = multiprocessing.Queue() outQ = multiprocessing.Queue() worker = Worker(inQ, outQ, 0, Mode.NORMAL, config, 10) worker.start() try: for athlete in data: tick = timer() while not outQ.empty():
def run(self): c = Connection(self.config) logging.config.dictConfig(self.config) self.logger = logging.getLogger(__name__) self.logger.info('Process {} Running'.format(self.id)) self.msgQ.put(Message('Process', self.id, 'Running')) while True: parkrun = self.inQ.get() self.logger.debug(parkrun) if parkrun is None: self.logger.info('Process {} Exiting'.format(self.id)) self.msgQ.put(Message('Process', self.id, 'Exiting')) break self.logger.debug('Process {} got record {}'.format( self.id, parkrun['EventURL'])) if parkrun['lastEvent'] is None: parkrun['lastEvent'] = 0 if self.mode == Mode.CHECKURLS: if self.getURL(parkrun['URL']) is not None: c.updateParkrunURL(parkrun['Name'], True, True) self.msgQ.put( Message('Process', self.id, 'Verified ' + parkrun['Name'] + ' valid')) else: c.updateParkrunURL(parkrun['Name'], True, False) self.msgQ.put( Message( 'Error', self.id, 'Could not verify ' + parkrun['Name'] + ' as valid')) if self.mode == Mode.NEWEVENTS: self.logger.info( 'Process {} checking for new results for {}'.format( self.id, parkrun['EventURL'])) self.msgQ.put( Message('Process', self.id, 'Checking for new results for ' + parkrun['Name'])) parkrun['EventNumber'], parkrun[ 'EventDate'], data = self.getLatestEvent( parkrun['URL'] + parkrun['LatestResultsURL']) if data is not None: self.logger.debug( 'Event {} got {} events in history'.format( parkrun['EventURL'], len(data))) parkrun['Runners'] = len(data) # Add the event if it's a new event # Check the event has the correct number of runners if not c.checkParkrunEvent(parkrun): self.logger.info( 'Parkrun {} event {}: runners did not match - reimporting.' .format(parkrun['Name'], parkrun['EventNumber'])) #if not, delete the old event record and re-import the data self.msgQ.put( Message( 'Process', self.id, 'Updating ' + parkrun['Name'] + ' event ' + xstr(parkrun['EventNumber']))) eventID = c.replaceParkrunEvent(parkrun) self.logger.debug( 'getLastEvent found {} runners'.format(len(data))) for row in data: row['EventID'] = eventID c.addParkrunEventPosition(row) sleep(self.delay) if self.mode == Mode.NORMAL: data = self.getEventHistory(parkrun['URL'] + parkrun['EventHistoryURL']) if data is not None: self.logger.debug( 'Event {} got {} events in history'.format( parkrun['URL'], len(data))) for row in data: row['Name'] = parkrun['Name'] row['EventURL'] = parkrun['EventURL'] # Add the event if it's a new event self.msgQ.put( Message( 'Process', self.id, 'Checking ' + row['Name'] + ' event ' + xstr(row['EventNumber']))) self.logger.debug(row) self.logger.debug( 'Process {} Checking {} event {}'.format( self.id, row['EventURL'], xstr(row['EventNumber']))) # Check the event has the correct number of runners if not c.checkParkrunEvent(row): #if not, delete the old event record and re-import the data self.logger.info( 'Parkrun {} event {}: runners did not match - reimporting.' .format(parkrun['EventURL'], row['EventNumber'])) self.msgQ.put( Message( 'Process', self.id, 'Updating ' + row['Name'] + ' event ' + xstr(row['EventNumber']))) eventID = c.replaceParkrunEvent(row) eData = self.getEvent( parkrun['URL'] + parkrun['EventNumberURL'], row['EventNumber']) if eData is not None: self.logger.debug( 'getEvent found {} runners'.format( len(eData))) for eRow in eData: eRow['EventID'] = eventID c.addParkrunEventPosition(eRow) sleep(self.delay) else: self.logger.debug('getEvent found no runners') else: self.logger.warning( 'Parkrun {} returns no history page.'.format( parkrun['Name'])) c.execute( "update p set p.LastUpdated = e.LastEvent from parkruns as p inner join (select ParkrunID, max(EventDate) as LastEvent from events group by ParkrunID) as e on p.ParkrunID = e.ParkrunID" ) self.logger.debug('Sleeping for {} seconds'.format(self.delay)) sleep(self.delay) c.close()