Esempio n. 1
0
def parse_args():

    parser = SquidArgumentParser(
        description="Process a collection of squid logs and write certain extracted metrics to file"
    )
    parser.add_argument(
        "providers",
        metavar="PROVIDER_IDENTIFIER",
        nargs="*",
        default=DEFAULT_PROVIDERS,
        help="list of provider identifiers used in squid log file names",
    )
    parser.add_argument(
        "--name_format",
        dest="name_format",
        type=str,
        default="%s.log-%.gz.counts",
        help="a printf style format string which is formatted with the tuple: (provider_name, date_representation",
    )
    parser.set_defaults(datadir="/home/erosen/src/dashboard/mobile/zero_counts")

    args = parser.parse_args()
    # custom logic for which files to grab
    prov_files = {}
    for prov in args.providers:
        args.basename = prov
        logging.info("args prior to ge_files: %s", pprint.pformat(args.__dict__))
        prov_files[prov] = SquidArgumentParser.get_files(args)
    setattr(args, "squid_files", prov_files)

    logging.info(pprint.pformat(args.__dict__))
    return args
Esempio n. 2
0
def parse_args():

    parser = SquidArgumentParser(
        description="Process a collection of squid logs and write certain extracted metrics to file",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "providers",
        metavar="PROVIDER_IDENTIFIER",
        nargs="*",
        default=DEFAULT_PROVIDERS,
        help="list of provider identifiers used in squid log file names",
    )
    parser.add_argument(
        "--name_format",
        dest="name_format",
        type=str,
        default="%s.tab.log-%.gz",
        help="a printf style format string which is formatted with the tuple: (provider_name, date_representation",
    )
    parser.set_defaults(datadir="/a/squid/archive/zero")

    args = parser.parse_args()
    # custom logic for which files to grab
    prov_files = {}
    for prov in args.providers:
        basename = "zero-%s" % prov
        logger.debug("basename: %s", basename)
        prov_files[prov] = get_files(args.start, args.end, args.datadir, basename)
    setattr(args, "squid_files", prov_files)

    logger.info(pprint.pformat(args.__dict__))
    return args
Esempio n. 3
0
def main():
    parser = SquidArgumentParser()
    parser.add_argument('--nprocs', default=10)
    args = parser.parse_args()
    logger.info(pprint.pformat(args.__dict__))

    keepers = ['date', 'language', 'project', 'site', 'country', 'na']

    criteria = [
            lambda r : r.old_init_request(),
            lambda r : r.site() == 'M',
            lambda r : r.datetime() > args.start,
            lambda r : r.datetime() < args.end,
    ]

    counts = count_files(args.squid_files, 
            keepers, 
            criteria,
            count_event=1000,
            limit=args.max_lines,
            nproc=15,
            fname='country_counts_incremental.csv')

    write_counts(counts, 'country_counts.csv')