def main(argv):
    if len(argv) != 3:
        print "Usage: apache_count.py configfile logfile"
        raise SystemExit
    # Read config file
    p = ConfigParser.ConfigParser()
    p.read(argv[1])
    # Read server-relative URI prefix
    files_url = urlparse.urlsplit(p.get('webui', 'files_url'))[2]
    # Setup database connection
    dbname = p.get('database', 'name')
    dbuser = p.get('database', 'user')
    dbpass = p.get('database', 'password')
    dbconn = psycopg.connect(database=dbname, user=dbuser, password=dbpass)
    cursor = dbconn.cursor()

    filename = argv[2]
    if filename.endswith(".gz"):
        f = gzip.open(filename)
    elif filename.endswith(".bz2"):
        f = bz2.BZ2File(filename)
    else:
        f = open(filename)

    cursor.execute("select value from timestamps where name='http'")
    last_http = cursor.fetchone()[0]

    downloads = {}
    for line in f:
        m = logre.search(line)
        if not m:
            continue
        path = m.group('path')
        if not path.startswith(files_url):
            continue
        day = int(m.group('day'))
        month = m.group('month').lower()
        month = month_index[month]
        year = int(m.group('year'))
        hour = int(m.group('hour'))
        minute = int(m.group('min'))
        sec = int(m.group('sec'))
        date = DateTime(year, month, day, hour, minute, sec)
        zone = utc_offset(m.group('zone'))
        date = date - zone

        if date < last_http:
            continue

        filename = os.path.basename(path)
        # see if we have already read the old download count
        if not downloads.has_key(filename):
            cursor.execute(
                "select downloads from release_files "
                "where filename=%s", (filename, ))
            record = cursor.fetchone()
            if not record:
                # No file entry. Could be a .sig file
                continue
            # make sure we're working with a number
            downloads[filename] = record[0] or 0
        # add a download
        downloads[filename] += 1

    if not downloads:
        return

    # Update the download counts
    for filename, count in downloads.items():
        cursor.execute(
            "update release_files set downloads=%s "
            "where filename=%s", (count, filename))
    # Update the download timestamp
    date = psycopg.TimestampFromMx(date)
    cursor.execute("update timestamps set value=%s "
                   "where name='http'", (date, ))
    dbconn.commit()
Exemple #2
0
def main(argv):
    if len(argv) != 3:
        print "Usage: apache_count.py configfile logfile"
        raise SystemExit
    # Read config file
    p = ConfigParser.ConfigParser()
    p.read(argv[1])
    # Read server-relative URI prefix
    files_url = urlparse.urlsplit(p.get('webui', 'files_url'))[2]
    # Setup database connection
    dbname = p.get('database', 'name')
    dbuser = p.get('database', 'user')
    dbpass = p.get('database', 'password')
    dbconn = psycopg.connect(database=dbname, user=dbuser, password=dbpass)
    cursor = dbconn.cursor()

    filename = argv[2]
    if filename.endswith(".gz"):
        f = gzip.open(filename)
    elif filename.endswith(".bz2"):
        f = bz2.BZ2File(filename)
    else:
        f = open(filename)

    cursor.execute("select value from timestamps where name='http'")
    last_http = cursor.fetchone()[0]

    downloads = {}
    for line in f:
        m = logre.search(line)
        if not m:
            continue
        path = m.group('path')
        if not path.startswith(files_url):
            continue
        day = int(m.group('day'))
        month = m.group('month').lower()
        month = month_index[month]
        year = int(m.group('year'))
        hour = int(m.group('hour'))
        minute = int(m.group('min'))
        sec = int(m.group('sec'))
        date = DateTime(year, month, day, hour, minute, sec)
        zone = utc_offset(m.group('zone'))
        date = date - zone
        
        if date < last_http:
            continue

        filename = os.path.basename(path)
        # see if we have already read the old download count
        if not downloads.has_key(filename):
            cursor.execute("select downloads from release_files "
                           "where filename=%s", (filename,))
            record = cursor.fetchone()
            if not record:
                # No file entry. Could be a .sig file
                continue
            # make sure we're working with a number
            downloads[filename] = record[0] or 0
        # add a download
        downloads[filename] += 1

    if not downloads:
        return

    # Update the download counts
    for filename, count in downloads.items():
        cursor.execute("update release_files set downloads=%s "
                       "where filename=%s", (count, filename))
    # Update the download timestamp
    date = psycopg.TimestampFromMx(date)
    cursor.execute("update timestamps set value=%s "
                   "where name='http'", (date,))
    dbconn.commit()
def main(config_file, logfile):
    """Populate the download counts."""
    # Read config file
    p = ConfigParser.ConfigParser()
    p.read(config_file)

    # Read mirror infos
    mirrors = p.get('mirrors', 'folder')

    # Read server-relative URI prefix
    files_url = urlparse.urlsplit(p.get('webui', 'files_url'))[2]
    # Setup database connection
    dbconn, cursor = get_cursor(p)

    # create a log reader, that filters on files_url
    # build an iterator here with chain and all distant files
    cursor.execute("select * from mirrors")
    def read_distant_stats(mirror, filename):
        mirror_domain = urlparse.urlparse(mirror[0])[1]
        mirror_domain = os.path.join(mirrors, mirror_domain)
        distant_reader = ApacheDistantLocalStats(mirror_domain)
        stat_file_url = '%s/%s/%s' % (mirror[0], mirror[3], filename)
        return distant_reader.read_stats(stat_file_url)

    # it supposes it runs the program at day + 1
    yesterday = datetime.datetime.now() - datetime.timedelta(1)
    filename = yesterday.strftime('%Y-%m-%d.bz2')
    mirrors = [read_distant_stats(mirror, filename) 
               for mirror in cursor.fetchall()]

    logs = chain(*[ApacheLogReader(logfile, files_url)] + mirrors)
    _log('Working with local stats and %d mirror(s)' % len(mirrors))

    # get last http access
    cursor.execute("select value from timestamps where name='http'")
    last_http = cursor.fetchone()[0]
    _log('Last time stamp was : %s' % last_http)

    downloads = {}

    # let's read the logs in the apache file
    for line in logs:
        day = int(line.get('day', yesterday.day))
        month = line.get('month', yesterday.month)
        year = int(line.get('year', yesterday.year))
        hour = int(line.get('hour', 0))
        minute = int(line.get('min', 0))
        sec = int(line.get('sec', 0))
        date = DateTime(year, month, day, hour, minute, sec)
        zone = utc_offset(line.get('zone', 0))
        date = date - zone
        count = int(line.get('count', 1))
        if date < last_http:
            continue
        
        filename = line['filename']
    
        _dotlog('.')
        # see if we have already read the old download count
        if not downloads.has_key(filename):
            cursor.execute("select downloads from release_files "
                           "where filename=%s", (filename,))
            record = cursor.fetchone()
            if not record:
                # No file entry. Could be a .sig file
                continue
            # make sure we're working with a number
            downloads[filename] = record[0] or 0
        # add a download
        downloads[filename] += count

    if downloads != []:

        for filename, count in downloads.items():
            # Update the download counts in the DB
            _log('Updating download count for %s: %s' % (filename, count))
            cursor.execute("update release_files set downloads=%s "
                        "where filename=%s", (count, filename))
        
        # Update the download timestamp
        date = psycopg.TimestampFromMx(datetime.datetime.now())
        cursor.execute("update timestamps set value=%s "
                    "where name='http'", (date,))

        dbconn.commit()

    # now creating the local stats file
    _log('Building local stats file')
    stats = ApacheLocalStats()
    stats_dir = p.get('mirrors', 'local-stats')
    if not os.path.exists(stats_dir):
        raise ValueError('"%s" folder not found (local-stats in config.ini)' \
                    % stats_dir)
    stats_file = os.path.join(stats_dir, filename) 
    stats.build_daily_stats(yesterday.year, yesterday.month, yesterday.day,
                            logfile, stats_file, files_url, 'bz2')


    # now creating the global stats file
    # which is built with the latest database counts
    _log('Building global stats file')
    globalstats_dir = p.get('mirrors', 'global-stats')   
    if not os.path.exists(globalstats_dir):
        raise ValueError('"%s" folder not found (global-stats in config.ini)' \
                % globalstats_dir)
    cursor.execute("select name, filename, downloads from release_files")

    def get_line(files_url):
        for line in cursor:
            data = {}
            data['day'] = yesterday.day
            data['month'] = yesterday.month
            data['year'] = yesterday.year
            data['filename'] = line[1]
            data['useragent'] = 'Unkown' # not stored yet
            data['packagename'] = line[0]
            data['count'] = line[2]
            yield data

    gstats = LocalStats()
    stats_file = os.path.join(globalstats_dir, filename) 
    gstats.build_daily_stats(yesterday.year, yesterday.month, yesterday.day,
                             get_line, stats_file, files_url, 'bz2')