def main(argv): if len(argv) != 3: print "Usage: apache_count.py configfile logfile" raise SystemExit # Read config file p = ConfigParser.ConfigParser() p.read(argv[1]) # Read server-relative URI prefix files_url = urlparse.urlsplit(p.get('webui', 'files_url'))[2] # Setup database connection dbname = p.get('database', 'name') dbuser = p.get('database', 'user') dbpass = p.get('database', 'password') dbconn = psycopg.connect(database=dbname, user=dbuser, password=dbpass) cursor = dbconn.cursor() filename = argv[2] if filename.endswith(".gz"): f = gzip.open(filename) elif filename.endswith(".bz2"): f = bz2.BZ2File(filename) else: f = open(filename) cursor.execute("select value from timestamps where name='http'") last_http = cursor.fetchone()[0] downloads = {} for line in f: m = logre.search(line) if not m: continue path = m.group('path') if not path.startswith(files_url): continue day = int(m.group('day')) month = m.group('month').lower() month = month_index[month] year = int(m.group('year')) hour = int(m.group('hour')) minute = int(m.group('min')) sec = int(m.group('sec')) date = DateTime(year, month, day, hour, minute, sec) zone = utc_offset(m.group('zone')) date = date - zone if date < last_http: continue filename = os.path.basename(path) # see if we have already read the old download count if not downloads.has_key(filename): cursor.execute( "select downloads from release_files " "where filename=%s", (filename, )) record = cursor.fetchone() if not record: # No file entry. Could be a .sig file continue # make sure we're working with a number downloads[filename] = record[0] or 0 # add a download downloads[filename] += 1 if not downloads: return # Update the download counts for filename, count in downloads.items(): cursor.execute( "update release_files set downloads=%s " "where filename=%s", (count, filename)) # Update the download timestamp date = psycopg.TimestampFromMx(date) cursor.execute("update timestamps set value=%s " "where name='http'", (date, )) dbconn.commit()
def main(argv): if len(argv) != 3: print "Usage: apache_count.py configfile logfile" raise SystemExit # Read config file p = ConfigParser.ConfigParser() p.read(argv[1]) # Read server-relative URI prefix files_url = urlparse.urlsplit(p.get('webui', 'files_url'))[2] # Setup database connection dbname = p.get('database', 'name') dbuser = p.get('database', 'user') dbpass = p.get('database', 'password') dbconn = psycopg.connect(database=dbname, user=dbuser, password=dbpass) cursor = dbconn.cursor() filename = argv[2] if filename.endswith(".gz"): f = gzip.open(filename) elif filename.endswith(".bz2"): f = bz2.BZ2File(filename) else: f = open(filename) cursor.execute("select value from timestamps where name='http'") last_http = cursor.fetchone()[0] downloads = {} for line in f: m = logre.search(line) if not m: continue path = m.group('path') if not path.startswith(files_url): continue day = int(m.group('day')) month = m.group('month').lower() month = month_index[month] year = int(m.group('year')) hour = int(m.group('hour')) minute = int(m.group('min')) sec = int(m.group('sec')) date = DateTime(year, month, day, hour, minute, sec) zone = utc_offset(m.group('zone')) date = date - zone if date < last_http: continue filename = os.path.basename(path) # see if we have already read the old download count if not downloads.has_key(filename): cursor.execute("select downloads from release_files " "where filename=%s", (filename,)) record = cursor.fetchone() if not record: # No file entry. Could be a .sig file continue # make sure we're working with a number downloads[filename] = record[0] or 0 # add a download downloads[filename] += 1 if not downloads: return # Update the download counts for filename, count in downloads.items(): cursor.execute("update release_files set downloads=%s " "where filename=%s", (count, filename)) # Update the download timestamp date = psycopg.TimestampFromMx(date) cursor.execute("update timestamps set value=%s " "where name='http'", (date,)) dbconn.commit()
def main(config_file, logfile): """Populate the download counts.""" # Read config file p = ConfigParser.ConfigParser() p.read(config_file) # Read mirror infos mirrors = p.get('mirrors', 'folder') # Read server-relative URI prefix files_url = urlparse.urlsplit(p.get('webui', 'files_url'))[2] # Setup database connection dbconn, cursor = get_cursor(p) # create a log reader, that filters on files_url # build an iterator here with chain and all distant files cursor.execute("select * from mirrors") def read_distant_stats(mirror, filename): mirror_domain = urlparse.urlparse(mirror[0])[1] mirror_domain = os.path.join(mirrors, mirror_domain) distant_reader = ApacheDistantLocalStats(mirror_domain) stat_file_url = '%s/%s/%s' % (mirror[0], mirror[3], filename) return distant_reader.read_stats(stat_file_url) # it supposes it runs the program at day + 1 yesterday = datetime.datetime.now() - datetime.timedelta(1) filename = yesterday.strftime('%Y-%m-%d.bz2') mirrors = [read_distant_stats(mirror, filename) for mirror in cursor.fetchall()] logs = chain(*[ApacheLogReader(logfile, files_url)] + mirrors) _log('Working with local stats and %d mirror(s)' % len(mirrors)) # get last http access cursor.execute("select value from timestamps where name='http'") last_http = cursor.fetchone()[0] _log('Last time stamp was : %s' % last_http) downloads = {} # let's read the logs in the apache file for line in logs: day = int(line.get('day', yesterday.day)) month = line.get('month', yesterday.month) year = int(line.get('year', yesterday.year)) hour = int(line.get('hour', 0)) minute = int(line.get('min', 0)) sec = int(line.get('sec', 0)) date = DateTime(year, month, day, hour, minute, sec) zone = utc_offset(line.get('zone', 0)) date = date - zone count = int(line.get('count', 1)) if date < last_http: continue filename = line['filename'] _dotlog('.') # see if we have already read the old download count if not downloads.has_key(filename): cursor.execute("select downloads from release_files " "where filename=%s", (filename,)) record = cursor.fetchone() if not record: # No file entry. Could be a .sig file continue # make sure we're working with a number downloads[filename] = record[0] or 0 # add a download downloads[filename] += count if downloads != []: for filename, count in downloads.items(): # Update the download counts in the DB _log('Updating download count for %s: %s' % (filename, count)) cursor.execute("update release_files set downloads=%s " "where filename=%s", (count, filename)) # Update the download timestamp date = psycopg.TimestampFromMx(datetime.datetime.now()) cursor.execute("update timestamps set value=%s " "where name='http'", (date,)) dbconn.commit() # now creating the local stats file _log('Building local stats file') stats = ApacheLocalStats() stats_dir = p.get('mirrors', 'local-stats') if not os.path.exists(stats_dir): raise ValueError('"%s" folder not found (local-stats in config.ini)' \ % stats_dir) stats_file = os.path.join(stats_dir, filename) stats.build_daily_stats(yesterday.year, yesterday.month, yesterday.day, logfile, stats_file, files_url, 'bz2') # now creating the global stats file # which is built with the latest database counts _log('Building global stats file') globalstats_dir = p.get('mirrors', 'global-stats') if not os.path.exists(globalstats_dir): raise ValueError('"%s" folder not found (global-stats in config.ini)' \ % globalstats_dir) cursor.execute("select name, filename, downloads from release_files") def get_line(files_url): for line in cursor: data = {} data['day'] = yesterday.day data['month'] = yesterday.month data['year'] = yesterday.year data['filename'] = line[1] data['useragent'] = 'Unkown' # not stored yet data['packagename'] = line[0] data['count'] = line[2] yield data gstats = LocalStats() stats_file = os.path.join(globalstats_dir, filename) gstats.build_daily_stats(yesterday.year, yesterday.month, yesterday.day, get_line, stats_file, files_url, 'bz2')