def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.DEBUG if args['--debug'] else logging.WARNING, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') logging.getLogger('requests').setLevel(logging.WARNING) logging.getLogger('revscoring.dependencies.dependent') \ .setLevel(logging.WARNING) model = MLScorerModel.load(open(args['<model-file>'], 'rb')) session = mwapi.Session( args['--host'], user_agent="Revscoring score utility <*****@*****.**>") extractor = api.Extractor(session) if len(args['<rev_id>']) > 0: rev_ids = (int(rev_id) for rev_id in args['<rev_id>']) else: if args['--rev-ids'] == "<stdin>": rev_ids_f = sys.stdin else: rev_ids_f = open(args['--rev-ids']) rev_ids = (int(row.rev_id) for row in mysqltsv.read(rev_ids_f)) if args['--caches'] is not None: caches = json.loads(args['--caches']) else: caches = None if args['--cache'] is not None: cache = json.loads(args['--cache']) else: cache = None batch_size = int(args['--batch-size']) if args['--cpu-workers'] == "<cpu-count>": cpu_workers = cpu_count() else: cpu_workers = int(args['--cpu-workers']) if args['--io-workers'] == "<auto>": io_workers = None else: io_workers = int(args['--io-workers']) verbose = args['--verbose'] debug = args['--debug'] score_processor = ScoreProcessor(model, extractor, batch_size=batch_size, cpu_workers=cpu_workers, io_workers=io_workers) run(score_processor, rev_ids, caches, cache, debug, verbose)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.DEBUG if args['--debug'] else logging.WARNING, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) logging.getLogger('requests').setLevel(logging.WARNING) logging.getLogger('revscoring.dependencies.dependent') \ .setLevel(logging.WARNING) scoring_model = Model.load(models.open_file(args['<model-file>'])) session = mwapi.Session( args['--host'], user_agent="Revscoring score utility <*****@*****.**>") extractor = api.Extractor(session) if len(args['<rev_id>']) > 0: rev_ids = (int(rev_id) for rev_id in args['<rev_id>']) else: if args['--rev-ids'] == "<stdin>": rev_ids_f = sys.stdin else: rev_ids_f = open(args['--rev-ids']) rev_ids = (int(row.rev_id) for row in mysqltsv.read(rev_ids_f)) if args['--caches'] is not None: caches = json.loads(args['--caches']) else: caches = None if args['--cache'] is not None: cache = json.loads(args['--cache']) else: cache = None batch_size = int(args['--batch-size']) if args['--cpu-workers'] == "<cpu-count>": cpu_workers = cpu_count() else: cpu_workers = int(args['--cpu-workers']) if args['--io-workers'] == "<auto>": io_workers = None else: io_workers = int(args['--io-workers']) verbose = args['--verbose'] debug = args['--debug'] score_processor = ScoreProcessor( scoring_model, extractor, batch_size=batch_size, cpu_workers=cpu_workers, io_workers=io_workers) run(score_processor, rev_ids, caches, cache, debug, verbose)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) periods = mysqltsv.read(sys.stdin, types=[int, str, int, int, int]) page_periods = {p.page_id: (p.start_rev_id, p.end_rev_id) for p in periods} scorer_model = MLScorerModel.load(open(args['<model-file>'], 'rb')) dump_paths = args['<dump-file>'] run(page_periods, scorer_model, dump_paths)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) periods = mysqltsv.read(sys.stdin, types=[int, str, int, int, int]) my_agent = "Quality Scores Script <*****@*****.**>" session = api.Session(args['--mwapi'], user_agent=my_agent) ores = ORESScorer(args['--ores']) run(periods, ores, session)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.DEBUG if args['--debug'] else logging.INFO, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) if args['--class-weight'] is not None: class_weights = dict( map(_parse_class_weight_option, args['--class-weight']) ) global CLASS_WEIGHTS CLASS_WEIGHTS.update(class_weights) paths = args['<dump-file>'] with open(args['--model']) as f: model = Model.load(f) sunset = mwtypes.Timestamp(args['--sunset']) if args['--score-at'] not in SCORE_ATS: raise ValueError("--score-at value {0} not available in {1}" .format(args['--score-at'], SCORE_ATS)) else: score_at = args['--score-at'] if args['--rev-scores'] == "<stdout>": rev_scores = mysqltsv.Writer(sys.stdout, headers=HEADERS) else: rev_scores = mysqltsv.Writer( open(args['--rev-scores'], "w"), headers=HEADERS) if args['--extend'] is None: skip_scores_before = {} else: logger.info("Reading in past scores from {0}".format(args['--extend'])) skip_scores_before = {} rows = mysqltsv.read( open(args['--extend']), types=[int, str, int, mwtypes.Timestamp, str, float]) for row in rows: skip_scores_before[row.page_id] = row.timestamp logger.info("Completed reading scores from old output.") if args['--processes'] == "<cpu count>": processes = cpu_count() else: processes = int(args['--processes']) verbose = args['--verbose'] run(paths, model, sunset, score_at, rev_scores, skip_scores_before, processes, verbose=verbose)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.DEBUG if args['--debug'] else logging.INFO, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) paths = args['<dump-file>'] with open(args['--model']) as f: model = ScorerModel.load(f) sunset = mwtypes.Timestamp(args['--sunset']) if args['--score-at'] not in SCORE_ATS: raise ValueError("--score-at value {0} not available in {1}" .format(args['--score-at'], SCORE_ATS)) else: score_at = args['--score-at'] if args['--rev-scores'] == "<stdout>": rev_scores = mysqltsv.Writer(sys.stdout, headers=HEADERS) else: rev_scores = mysqltsv.Writer( open(args['--rev-scores'], "w"), headers=HEADERS) if args['--extend'] is None: skip_scores_before = {} else: logger.info("Reading in past scores from {0}".format(args['--extend'])) skip_scores_before = {} rows = mysqltsv.read( open(args['--extend']), types=[int, str, int, mwtypes.Timestamp, str, float]) for row in rows: skip_scores_before[row.page_id] = row.timestamp logger.info("Completed reading scores from old output.") if args['--processes'] == "<cpu count>": processes = cpu_count() else: processes = int(args['--processes']) verbose = args['--verbose'] run(paths, model, sunset, score_at, rev_scores, skip_scores_before, processes, verbose=verbose)
def main(): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.DEBUG if args['--debug'] else logging.INFO, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') logging.getLogger('requests').setLevel(logging.WARNING) rev_ids = (int(r.rev_id) for r in mysqltsv.read(sys.stdin)) scorer_model = MLScorerModel.load(open(args['<model-file>'])) session = mwapi.Session( args['--host'], user_agent="Anon bias study <*****@*****.**>") extractor = api.Extractor(session) score_processor = ScoreProcessor(scorer_model, extractor) cache = json.loads(args['--cache'] or "{}") verbose = args['--verbose'] debug = args['--debug'] run(rev_ids, score_processor, cache, verbose, debug)
def main(): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.DEBUG if args['--debug'] else logging.INFO, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) logging.getLogger('requests').setLevel(logging.WARNING) rev_ids = (int(r.rev_id) for r in mysqltsv.read(sys.stdin)) scorer_model = MLScorerModel.load(open(args['<model-file>'])) session = mwapi.Session( args['--host'], user_agent="Anon bias study <*****@*****.**>") extractor = api.Extractor(session) score_processor = ScoreProcessor(scorer_model, extractor) cache = json.loads(args['--cache'] or "{}") verbose = args['--verbose'] debug = args['--debug'] run(rev_ids, score_processor, cache, verbose, debug)