def parse_log(logfile): (loghash, logtime) = FILE_REGEX.match(logfile).group(1, 2) logtime = datetime.datetime.fromtimestamp(int(logtime) / 1000) fail = False logfile_full = log_dir + '/' + logfile try: logtext = gzip.open(logfile_full, 'rt', encoding='utf-8').read() except: try: logtext = open(logfile_full, 'rt', encoding='utf-8').read() except: logger.warn('Cannot read file: ' + logfile_full) failed[logfile] = sys.exc_info() failed_logtime[logfile] = logtime fail = True if not fail: try: game = gokoparse.parse_goko_log(logtext) game.logfile = logfile game.time = logtime games.append(game) if len(games) % 100 == 0: logger.info('Parsed %d' % len(games)) except: failed[logfile] = sys.exc_info() failed_logtime[logfile] = logtime
def parse_new_logs(date): # Determine which logs we have downloaded but not yet parsed log_dir = '%s/%s' % (LOG_DIR, date.strftime('%Y%m%d')) local_logs = os.listdir(log_dir) dblogs = db_manager.search_daily_log_filenames(date) not_parsed = set(local_logs) - set(dblogs) logger.info('Found %d downloaded logs to be parsed' % len(not_parsed)) # Parse the unparsed logs games = [] failed = {} failed_logtime = {} for logfile in not_parsed: (loghash, logtime) = FILE_REGEX.match(logfile).group(1, 2) logtime = datetime.datetime.fromtimestamp(int(logtime) / 1000) fail = False logfile_full = log_dir + '/' + logfile try: logtext = gzip.open(logfile_full, 'rt', encoding='utf-8').read() except: try: logtext = open(logfile_full, 'rt', encoding='utf-8').read() except: logger.warn('Cannot read file: ' + logfile_full) failed[logfile] = sys.exc_info() failed_logtime[logfile] = logtime fail = True if not fail: try: game = gokoparse.parse_goko_log(logtext) game.logfile = logfile game.time = logtime games.append(game) if len(games) % 100 == 0: logger.info('Parsed %d' % len(games)) except: failed[logfile] = sys.exc_info() failed_logtime[logfile] = logtime logger.info('Finished parsing') logger.info('%d games parsed' % len(games)) logger.info('%d games failed' % len(failed)) # Insert parsed games into database, 100 at a time count = 0 total_inserted = 0 try: db_manager.inserts(games) total_inserted += len(games) except: logger.info(sys.exc_info()) logger.info('inserted %d parsed games into DB' % total_inserted) # Notify of failed logs. Record in database. for f in failed: logger.warn('Failed to parse: %s in %s' % (failed[f][0].__name__, f)) for line in traceback.format_tb(failed[f][2]): logger.warn(line) time = failed_logtime[f] db_manager.insert_parsefail(time, f, failed[f][0].__name__) return total_inserted
def parse_new_logs(date): # Determine which logs we have downloaded but not yet parsed log_dir = '/mnt/raid/media/dominion/logs/%s' % date.strftime('%Y%m%d') local_logs = os.listdir(log_dir) dblogs = db_manager.search_daily_log_filenames(date) not_parsed = set(local_logs) - set(dblogs) logging.info('Found %d downloaded logs to be parsed' % len(not_parsed)) # Parse the unparsed logs games = [] failed = {} for logfile in not_parsed: (loghash, logtime) = FILE_REGEX.match(logfile).group(1, 2) logtime = datetime.datetime.fromtimestamp(int(logtime) / 1000) fail = False logfile_full = log_dir + '/' + logfile try: logtext = gzip.open(logfile_full, 'rt', encoding='utf-8').read() except: try: logtext = open(logfile_full, 'rt', encoding='utf-8').read() except: logging.warn('Cannot read file: ' + logfile_full) failed[logfile] = sys.exc_info() fail = True if not fail: try: game = gokoparse.parse_goko_log(logtext) game.logfile = logfile game.time = logtime games.append(game) if len(games) % 100 == 0: logging.info('Parsed %d' % len(games)) except: failed[logfile] = sys.exc_info() logging.info('Finished parsing') logging.info('%d games parsed' % len(games)) logging.info('%d games failed' % len(failed)) # Insert parsed games into database, 100 at a time # TODO: Logs are read in file order, not chronological order. This # is a (minor) violation of the TrueSkill algorithm: a game played at # 11:59 PM might be processed before one played at 12:00 AM on the # same day (almost 24 hours earlier). count = 0 total_inserted = 0 try: db_manager.inserts(games) # Update ratings count = -1 while count != 0: x = db_manager.get_last_rated_game() if x: (t, l) = x else: (t, l) = (None, None) count = ts.record_ratings(100, t, l, ts.isodominion_env) total_inserted += count except: logging.info(sys.exc_info()) logging.info('inserted %d parsed games into DB' % total_inserted) # Notify of failed logs. Record in database. for f in failed: logging.warn('Failed to parse: %s in %s' % (failed[f][0].__name__, f)) for line in traceback.format_tb(failed[f][2]): logging.warn(line) #TODO: record failures db_manager.insert_parsefail(f) return total_inserted