Esempio n. 1
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)
    logging.basicConfig(
        level=logging.DEBUG if args['--debug'] else logging.WARNING,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')
    logging.getLogger('requests').setLevel(logging.WARNING)
    logging.getLogger('revscoring.dependencies.dependent') \
           .setLevel(logging.WARNING)

    model = MLScorerModel.load(open(args['<model-file>'], 'rb'))

    session = mwapi.Session(
        args['--host'],
        user_agent="Revscoring score utility <*****@*****.**>")
    extractor = api.Extractor(session)

    if len(args['<rev_id>']) > 0:
        rev_ids = (int(rev_id) for rev_id in args['<rev_id>'])
    else:
        if args['--rev-ids'] == "<stdin>":
            rev_ids_f = sys.stdin
        else:
            rev_ids_f = open(args['--rev-ids'])

        rev_ids = (int(row.rev_id) for row in mysqltsv.read(rev_ids_f))

    if args['--caches'] is not None:
        caches = json.loads(args['--caches'])
    else:
        caches = None

    if args['--cache'] is not None:
        cache = json.loads(args['--cache'])
    else:
        cache = None

    batch_size = int(args['--batch-size'])

    if args['--cpu-workers'] == "<cpu-count>":
        cpu_workers = cpu_count()
    else:
        cpu_workers = int(args['--cpu-workers'])

    if args['--io-workers'] == "<auto>":
        io_workers = None
    else:
        io_workers = int(args['--io-workers'])

    verbose = args['--verbose']

    debug = args['--debug']

    score_processor = ScoreProcessor(model,
                                     extractor,
                                     batch_size=batch_size,
                                     cpu_workers=cpu_workers,
                                     io_workers=io_workers)

    run(score_processor, rev_ids, caches, cache, debug, verbose)
Esempio n. 2
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)
    logging.basicConfig(
        level=logging.DEBUG if args['--debug'] else logging.WARNING,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
    )
    logging.getLogger('requests').setLevel(logging.WARNING)
    logging.getLogger('revscoring.dependencies.dependent') \
           .setLevel(logging.WARNING)

    scoring_model = Model.load(models.open_file(args['<model-file>']))

    session = mwapi.Session(
        args['--host'],
        user_agent="Revscoring score utility <*****@*****.**>")
    extractor = api.Extractor(session)

    if len(args['<rev_id>']) > 0:
        rev_ids = (int(rev_id) for rev_id in args['<rev_id>'])
    else:
        if args['--rev-ids'] == "<stdin>":
            rev_ids_f = sys.stdin
        else:
            rev_ids_f = open(args['--rev-ids'])

        rev_ids = (int(row.rev_id) for row in mysqltsv.read(rev_ids_f))

    if args['--caches'] is not None:
        caches = json.loads(args['--caches'])
    else:
        caches = None

    if args['--cache'] is not None:
        cache = json.loads(args['--cache'])
    else:
        cache = None

    batch_size = int(args['--batch-size'])

    if args['--cpu-workers'] == "<cpu-count>":
        cpu_workers = cpu_count()
    else:
        cpu_workers = int(args['--cpu-workers'])

    if args['--io-workers'] == "<auto>":
        io_workers = None
    else:
        io_workers = int(args['--io-workers'])

    verbose = args['--verbose']

    debug = args['--debug']

    score_processor = ScoreProcessor(
        scoring_model, extractor, batch_size=batch_size,
        cpu_workers=cpu_workers, io_workers=io_workers)

    run(score_processor, rev_ids, caches, cache, debug, verbose)
Esempio n. 3
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)
    periods = mysqltsv.read(sys.stdin, types=[int, str, int, int, int])
    page_periods = {p.page_id: (p.start_rev_id, p.end_rev_id) for p in periods}

    scorer_model = MLScorerModel.load(open(args['<model-file>'], 'rb'))

    dump_paths = args['<dump-file>']

    run(page_periods, scorer_model, dump_paths)
Esempio n. 4
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)
    periods = mysqltsv.read(sys.stdin, types=[int, str, int, int, int])

    my_agent = "Quality Scores Script <*****@*****.**>"
    session = api.Session(args['--mwapi'], user_agent=my_agent)

    ores = ORESScorer(args['--ores'])

    run(periods, ores, session)
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.DEBUG if args['--debug'] else logging.INFO,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
    )
    if args['--class-weight'] is not None:
        class_weights = dict(
            map(_parse_class_weight_option, args['--class-weight'])
        )
        global CLASS_WEIGHTS
        CLASS_WEIGHTS.update(class_weights)

    paths = args['<dump-file>']
    with open(args['--model']) as f:
        model = Model.load(f)

    sunset = mwtypes.Timestamp(args['--sunset'])

    if args['--score-at'] not in SCORE_ATS:
        raise ValueError("--score-at value {0} not available in {1}"
                         .format(args['--score-at'], SCORE_ATS))
    else:
        score_at = args['--score-at']

    if args['--rev-scores'] == "<stdout>":
        rev_scores = mysqltsv.Writer(sys.stdout, headers=HEADERS)
    else:
        rev_scores = mysqltsv.Writer(
            open(args['--rev-scores'], "w"), headers=HEADERS)

    if args['--extend'] is None:
        skip_scores_before = {}
    else:
        logger.info("Reading in past scores from {0}".format(args['--extend']))
        skip_scores_before = {}
        rows = mysqltsv.read(
            open(args['--extend']),
            types=[int, str, int, mwtypes.Timestamp, str, float])
        for row in rows:
            skip_scores_before[row.page_id] = row.timestamp
        logger.info("Completed reading scores from old output.")

    if args['--processes'] == "<cpu count>":
        processes = cpu_count()
    else:
        processes = int(args['--processes'])

    verbose = args['--verbose']
    run(paths, model, sunset, score_at, rev_scores, skip_scores_before,
        processes, verbose=verbose)
Esempio n. 6
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.DEBUG if args['--debug'] else logging.INFO,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
    )

    paths = args['<dump-file>']
    with open(args['--model']) as f:
        model = ScorerModel.load(f)

    sunset = mwtypes.Timestamp(args['--sunset'])

    if args['--score-at'] not in SCORE_ATS:
        raise ValueError("--score-at value {0} not available in {1}"
                         .format(args['--score-at'], SCORE_ATS))
    else:
        score_at = args['--score-at']

    if args['--rev-scores'] == "<stdout>":
        rev_scores = mysqltsv.Writer(sys.stdout, headers=HEADERS)
    else:
        rev_scores = mysqltsv.Writer(
            open(args['--rev-scores'], "w"), headers=HEADERS)

    if args['--extend'] is None:
        skip_scores_before = {}
    else:
        logger.info("Reading in past scores from {0}".format(args['--extend']))
        skip_scores_before = {}
        rows = mysqltsv.read(
            open(args['--extend']),
            types=[int, str, int, mwtypes.Timestamp, str, float])
        for row in rows:
            skip_scores_before[row.page_id] = row.timestamp
        logger.info("Completed reading scores from old output.")

    if args['--processes'] == "<cpu count>":
        processes = cpu_count()
    else:
        processes = int(args['--processes'])

    verbose = args['--verbose']
    run(paths, model, sunset, score_at, rev_scores, skip_scores_before,
        processes, verbose=verbose)
def main():
    args = docopt.docopt(__doc__)

    logging.basicConfig(
        level=logging.DEBUG if args['--debug'] else logging.INFO,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')
    logging.getLogger('requests').setLevel(logging.WARNING)

    rev_ids = (int(r.rev_id) for r in mysqltsv.read(sys.stdin))

    scorer_model = MLScorerModel.load(open(args['<model-file>']))
    session = mwapi.Session(
        args['--host'], user_agent="Anon bias study <*****@*****.**>")
    extractor = api.Extractor(session)
    score_processor = ScoreProcessor(scorer_model, extractor)

    cache = json.loads(args['--cache'] or "{}")

    verbose = args['--verbose']
    debug = args['--debug']

    run(rev_ids, score_processor, cache, verbose, debug)
def main():
    args = docopt.docopt(__doc__)

    logging.basicConfig(
        level=logging.DEBUG if args['--debug'] else logging.INFO,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
    )
    logging.getLogger('requests').setLevel(logging.WARNING)

    rev_ids = (int(r.rev_id) for r in mysqltsv.read(sys.stdin))

    scorer_model = MLScorerModel.load(open(args['<model-file>']))
    session = mwapi.Session(
        args['--host'], user_agent="Anon bias study <*****@*****.**>")
    extractor = api.Extractor(session)
    score_processor = ScoreProcessor(scorer_model, extractor)

    cache = json.loads(args['--cache'] or "{}")

    verbose = args['--verbose']
    debug = args['--debug']

    run(rev_ids, score_processor, cache, verbose, debug)