Exemple #1
0
def build_config(config_dirs=DEFAULT_DIRS,
                 logging_config="logging_config.yaml"):
    # Loads files in alphabetical order based on the bare filename
    config_file_paths = []
    for directory in config_dirs:
        dir_glob = os.path.join(directory, "*.yaml")
        config_file_paths.extend(
            (os.path.basename(path), path) for path in glob.glob(dir_glob))
    config_file_paths.sort()
    logger.info("Loading configs from {0}".format(config_file_paths))
    config = yamlconf.load(*(open(p) for fn, p in config_file_paths))

    # Load logging config if specified
    if logging_config is not None:
        with open(logging_config) as f:
            logging_config = yamlconf.load(f)
            logging.config.dictConfig(logging_config)

    # Add nltk data path if specified.
    if 'data_paths' in config['ores'] and \
       'nltk' in config['ores']['data_paths']:
        import nltk
        nltk.data.path.append(config['ores']['data_paths']['nltk'])

    return config
Exemple #2
0
def configure_logging(verbose=False,
                      debug=False,
                      logging_config=None,
                      **kwargs):
    # Load logging config if specified.  If no config file is specified, we
    # make a half-hearted attempt to find a distributed logging_config.yaml
    # in the current working directory.
    if logging_config is None:
        if os.path.exists(DEFAULT_LOGGING_CONFIG):
            logging_config = DEFAULT_LOGGING_CONFIG

    if logging_config is not None:
        with open(logging_config) as f:
            logging_config = yamlconf.load(f)
            logging.config.dictConfig(logging_config)

        # Secret sauce: if running from the console, mirror logs there.
        if sys.stdin.isatty():
            handler = logging.StreamHandler(stream=sys.stderr)
            formatter = logging.Formatter(fmt=DEFAULT_FORMAT)
            handler.setFormatter(formatter)
            logging.getLogger().addHandler(handler)

    else:
        # Configure fallback logging.
        logging.basicConfig(level=logging.INFO, format=DEFAULT_FORMAT)

    if debug:
        logging.getLogger().setLevel(logging.DEBUG)
Exemple #3
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))

    processes = int(args['--processes'])

    verbose = args['--verbose']

    debug = args['--debug']

    logging.basicConfig(
        level=logging.DEBUG if debug else logging.INFO,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')
    logging.getLogger('requests').setLevel(logging.INFO)
    if verbose:
        logging.getLogger('revscoring.dependencies.dependent') \
               .setLevel(logging.DEBUG)
    else:
        logging.getLogger('revscoring.dependencies.dependent') \
               .setLevel(logging.INFO)

    logging.getLogger("ores.metrics_collectors.logger").setLevel(logging.DEBUG)

    app = server.configure(config)
    app.run(host=args['--host'],
            port=int(args['--port']),
            debug=True,
            processes=processes)
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))
    db = DB.from_config(config)
    run(db)
Exemple #5
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.INFO if not args['--debug'] else logging.DEBUG,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')
    # Requests is loud.  Be quiet requests.
    logging.getLogger("requests").setLevel(logging.WARNING)
    requests.packages.urllib3.disable_warnings()
    # If we're using logging for metrics collection, show it.
    logging.getLogger("ores.metrics_collectors").setLevel(logging.DEBUG)

    stream_url = args['<stream-url>']
    ores_url = args['<ores-url>']
    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))
    delay = float(args['--delay'])
    verbose = bool(args['--debug'])
    ss_name = config['ores']['scoring_system']
    if 'metrics_collector' in config['scoring_systems'][ss_name]:
        metrics_collector = MetricsCollector.from_config(
            config, config['scoring_systems'][ss_name]['metrics_collector'])
    else:
        metrics_collector = Null()

    run(stream_url, ores_url, metrics_collector, config, delay, verbose)
Exemple #6
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.INFO if not args['--debug'] else logging.DEBUG,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')
    logging.getLogger("revscoring.scoring.models").setLevel(logging.WARNING)

    params_config = yamlconf.load(open(args['<params-config>']))

    features_path = args['<features>']
    features = yamlconf.import_path(features_path)

    if args['--observations'] == "<stdin>":
        observations = read_observations(sys.stdin)
    else:
        observations = read_observations(open(args['--observations']))

    logger.info("Reading feature values & labels...")
    label_name = args['<label>']
    value_labels = \
        [(list(solve(features, cache=ob['cache'])), ob[label_name])
         for ob in observations]

    statistic_path = args['<statistic>']
    additional_params = {}

    labels, label_weights, population_rates = \
        util.read_labels_and_population_rates(
            None, args['--label-weight'], args['--pop-rate'])
    if label_weights is not None:
        additional_params['label_weights'] = label_weights
    if population_rates is not None:
        additional_params['population_rates'] = population_rates

    maximize = not args['--minimize']

    folds = int(args['--folds'])

    if args['--report'] == "<stdout>":
        report = sys.stdout
    else:
        report = open(args['--report'], "w")

    if args['--processes'] == "<cpu-count>":
        processes = multiprocessing.cpu_count()
    else:
        processes = int(args['--processes'])

    if args['--cv-timeout'] == "<forever>":
        cv_timeout = None
    else:
        cv_timeout = float(args['--cv-timeout']) * 60  # Convert to seconds

    verbose = args['--verbose']

    run(params_config, features, features_path, value_labels, statistic_path,
        additional_params, maximize, folds, report, processes, cv_timeout,
        verbose)
Exemple #7
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)
    # This expects the database config file path
    db_config = yamlconf.load(open(args['<config>']))
    db = DB.from_params(**db_config)
    reload_test_data = args['--reload-test-data']

    run(db, reload_test_data)
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)
    # This expects the database config file path
    db_config = yamlconf.load(open(args['<config>']))
    db = DB.from_params(**db_config)
    reload_test_data = args['--reload-test-data']

    run(db, reload_test_data)
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(
        *(open(p) for p in sorted(glob.glob(config_paths))))
    db = DB.from_config(config)
    run(db)
Exemple #10
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    stream_url = args['<stream-url>']
    ores_url = args['<ores-url>']
    config = yamlconf.load(open(args['--config']))
    delay = float(args['--delay'])
    verbose = bool(args['--verbose'])
    run(stream_url, ores_url, config, delay,
        verbose)
Exemple #11
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    campaign_id = int(args['<campaign-id>'])

    tasks = (json.loads(line) for line in sys.stdin)

    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))
    db = DB.from_config(config)
    run(db, campaign_id, tasks)
Exemple #12
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    diff_docs = read_docs(sys.stdin)

    session = api.Session(args['--api'])

    config_doc = yamlconf.load(open(args['--config']))
    diff_engine = DiffEngine.from_config(config_doc, config_doc["diff_engine"])

    run(diff_docs, session, diff_engine)
Exemple #13
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    stream_url = args['<stream-url>']
    ores_url = args['<ores-url>']
    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))
    delay = float(args['--delay'])
    verbose = bool(args['--verbose'])
    notify = notify_socket()
    run(stream_url, ores_url, config, delay, notify, verbose)
Exemple #14
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)
    # This expects the database config file path

    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))

    db = DB.from_config(config)
    reload_test_data = args['--reload-test-data']

    run(db, reload_test_data)
Exemple #15
0
def main():
    config_path = os.path.dirname(os.path.abspath(__file__)) + '/config.yaml'
    with open(config_path, 'r') as f:
        config = yamlconf.load(f)
    data = get_ores_data(config['ores_host'])
    data = deep_merge.merge(data,
                            get_wikilabels_data(config['wikilabels_host']))
    data = {'data': data, 'timestamp': time.time()}
    path = os.path.dirname(os.path.abspath(__file__)) + '/static/data.json'
    with open(path, 'w') as f:
        f.write(json.dumps(data))
Exemple #16
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)
    # This expects the database config file path

    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))

    db = DB.from_config(config)
    reload_test_data = args['--reload-test-data']

    run(db, reload_test_data)
Exemple #17
0
def main():
    args = docopt.docopt(__doc__)
    logging.basicConfig(
        level=logging.INFO if not args['--debug'] else logging.DEBUG,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')
    logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)

    taxon_paths = args['<taxon>']
    logger.info("Loading taxon from {0}".format(taxon_paths))
    taxonomy = yamlconf.load(*(open(p) for p in taxon_paths))

    return print_nodes(taxonomy)
Exemple #18
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    campaign_id = int(args['<campaign-id>'])

    tasks = (json.loads(line) for line in sys.stdin)

    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(
        *(open(p) for p in sorted(glob.glob(config_paths))))
    db = DB.from_config(config)
    run(db, campaign_id, tasks)
Exemple #19
0
def build_config(config_dirs=DEFAULT_DIRS, **kwargs):
    # Loads files in alphabetical order based on the bare filename
    config_file_paths = []
    for directory in config_dirs:
        dir_glob = os.path.join(directory, "*.yaml")
        config_file_paths.extend(
            (os.path.basename(path), path) for path in glob.glob(dir_glob))
    config_file_paths.sort()
    logger.info("Loading configs from {0}".format(config_file_paths))
    config = yamlconf.load(*(open(p) for fn, p in config_file_paths))

    return config
Exemple #20
0
def main(argv=None):
    logging.basicConfig(level=logging.DEBUG)
    args = docopt.docopt(__doc__, argv=argv)

    config = yamlconf.load(open(args['--config']))

    name = config['ores']['score_processor']
    score_processor = Celery.from_config(config, name)

    score_processor.application.worker_main(
        argv=["celery_worker", "--loglevel=INFO"]
    )
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    stream_url = args['<stream-url>']
    ores_url = args['<ores-url>']
    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in
                             sorted(glob.glob(config_paths))))
    delay = float(args['--delay'])
    verbose = bool(args['--verbose'])
    notify = notify_socket()
    run(stream_url, ores_url, config, delay,
        notify, verbose)
Exemple #22
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    if args['--config'] is not None:
        config = yamlconf.load(open(args['--config']))
    else:
        config = None

    app = application.configure(config)
    app.run(host="0.0.0.0",
            port=int(args['--port']),
            debug=True,
            ssl_context="adhoc")
Exemple #23
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))

    if args['--ssl']:
        kwargs = {'ssl_context': "adhoc"}
    else:
        kwargs = {}

    app = server.configure(config)
    app.debug = True
    app.run(host="0.0.0.0", port=int(args['--port']), debug=True, **kwargs)
Exemple #24
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    if args['--config'] is not None:
        config = yamlconf.load(open(args['--config']))
    else:
        config = None

    app = server.configure(config)
    app.debug = True
    app.run(host="0.0.0.0",
            port=int(args['--port']),
            debug=True,
            ssl_context="adhoc")
Exemple #25
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.INFO if not args['--debug'] else logging.DEBUG,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
    )

    params_config = yamlconf.load(open(args['<params-config>']))

    features_path = args['<features>']
    features = yamlconf.import_path(features_path)

    if args['--observations'] == "<stdin>":
        observations = read_observations(sys.stdin)
    else:
        observations = read_observations(open(args['--observations']))

    logger.info("Reading feature values & labels...")
    label_name = args['<label>']
    value_labels = \
        [(list(solve(features, cache=ob['cache'])), ob[label_name])
         for ob in observations]

    # Get a sepecialized scorer if we have one
    scoring = metrics.SCORERS.get(args['--scoring'], args['--scoring'])

    folds = int(args['--folds'])

    if args['--report'] == "<stdout>":
        report = sys.stdout
    else:
        report = open(args['--report'], "w")

    if args['--processes'] == "<cpu-count>":
        processes = multiprocessing.cpu_count()
    else:
        processes = int(args['--processes'])

    if args['--cv-timeout'] == "<forever>":
        cv_timeout = None
    else:
        cv_timeout = float(args['--cv-timeout']) * 60  # Convert to seconds

    scale_features = args['--scale-features']
    verbose = args['--verbose']

    run(params_config, features_path, value_labels, scoring, folds,
        report, processes, cv_timeout, scale_features, verbose)
Exemple #26
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    wiki = args['<wiki>']
    name = args['<name>']
    form = args['<form>']
    view = args['<view>']
    labels_per_task = args['<labels-per-task>']
    tasks_per_assignment = args['<tasks-per-assignment>']
    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))
    db = DB.from_config(config)
    force = args['--force']
    run(db, wiki, name, form, view, labels_per_task, tasks_per_assignment,
        force)
Exemple #27
0
def main():
    args = docopt.docopt(__doc__)
    logging.basicConfig(
        level=logging.INFO if not args['--debug'] else logging.DEBUG,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')
    logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)

    taxon_paths = args['<taxon>']
    logger.info("Loading taxon from {0}".format(taxon_paths))
    taxonomy = yamlconf.load(*(open(p) for p in taxon_paths))

    session = mwapi.Session(ENWIKI_HOST, user_agent=args['--ua-email'])
    threads = int(args['--threads'])

    return check_wikiprojects(taxonomy, session, threads)
def index():
    """Application landing page."""
    SITE_ROOT = os.path.realpath(os.path.dirname(__file__))
    json_url = os.path.join(SITE_ROOT, 'static', 'data.json')
    config_path = os.path.dirname(os.path.abspath(__file__)) + '/config.yaml'
    with open(config_path, 'r') as f:
        config = yamlconf.load(f)
    data = json.load(open(json_url))
    wikis = transform_data(data['data'])
    update = time.strftime("%d %B %Y %H:%M:%S UTC",
                           time.gmtime(data['timestamp']))
    return flask.render_template('index.html',
                                 wikis=wikis,
                                 update=update,
                                 **config)
Exemple #29
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    config = yamlconf.load(open(args["--config"]))

    processes = int(args["--processes"])

    logging.basicConfig(
        level=logging.INFO if not args["--verbose"] else logging.DEBUG,
        format="%(asctime)s %(levelname)s:%(name)s -- %(message)s",
    )

    logging.getLogger("ores.metrics_collectors.logger").setLevel(logging.DEBUG)

    app = server.configure(config)
    app.run(host=args["--host"], port=int(args["--port"]), debug=True, processes=processes)
Exemple #30
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    config = yamlconf.load(open(args['--config']))

    if args['--verbose']:
        logging.basicConfig(
            level=logging.DEBUG,
            format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
        )

    app = server.configure(config)
    app.run(host="0.0.0.0",
            port=int(args['--port']),
            debug=True,
            threaded=True)
def main(argv=None):
    args = docopt(__doc__, argv=argv)

    weights = yamlconf.load(open(args['<weights>']))

    if args['--scores'] == '<stdin>':
        revision_scores = read_revision_scores(sys.stdin)
    else:
        revision_scores = read_revision_scores(open(args['--scores']))

    if args['--output'] == '<stdout>':
        output = sys.stdout
    else:
        output = open(args['--output'])

    run(revision_scores, weights, output)
Exemple #32
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    config_doc = yamlconf.load(open(args['--config']))
    diff_engine = DiffEngine.from_config(config_doc, config_doc["diff_engine"])

    drop_text = bool(args['--drop-text'])

    if args['--timeout'] == "<infinity>":
        timeout = None
    else:
        timeout = float(args['--timeout'])

    verbose = bool(args['--verbose'])

    run(read_docs(sys.stdin), diff_engine, timeout, drop_text, verbose)
Exemple #33
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    wiki = args['<wiki>']
    name = args['<name>']
    form = args['<form>']
    view = args['<view>']
    labels_per_task = args['<labels-per-task>']
    tasks_per_assignment = args['<tasks-per-assignment>']
    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in
                             sorted(glob.glob(config_paths))))
    db = DB.from_config(config)
    force = args['--force']
    run(db, wiki, name, form, view, labels_per_task, tasks_per_assignment,
        force)
Exemple #34
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.INFO if not args['--debug'] else logging.DEBUG,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
    )

    params_config = yamlconf.load(open(args['<params-config>']))

    features_path = args['<features>']
    features = yamlconf.import_module(features_path)

    label_decoder = util.DECODERS[args['--label-type']]
    if args['--observations'] == "<stdin>":
        observations_f = sys.stdin
    else:
        observations_f = open(args['--observations'])

    observations = util.read_observations(observations_f, features,
                                          label_decoder)

    # Get a sepecialized scorer if we have one
    scoring = metrics.SCORERS.get(args['--scoring'], args['--scoring'])

    folds = int(args['--folds'])

    if args['--report'] == "<stdout>":
        report = sys.stdout
    else:
        report = open(args['--report'], "w")

    if args['--processes'] == "<cpu-count>":
        processes = multiprocessing.cpu_count()
    else:
        processes = int(args['--processes'])

    if args['--cv-timeout'] == "<forever>":
        cv_timeout = None
    else:
        cv_timeout = float(args['--cv-timeout']) * 60  # Convert to seconds

    scale_features = args['--scale-features']
    verbose = args['--verbose']

    run(params_config, features_path, observations, scoring, folds,
        report, processes, cv_timeout, scale_features, verbose)
Exemple #35
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')
    ores_level = logging.DEBUG if args['--verbose'] else logging.INFO
    logging.getLogger('ores').setLevel(ores_level)

    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))

    name = config['ores']['score_processor']
    score_processor = Celery.from_config(config, name)

    score_processor.application.worker_main(
        argv=["celery_worker", "--loglevel=INFO"])
Exemple #36
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))

    if args['--ssl']:
        kwargs = {'ssl_context': "adhoc"}
    else:
        kwargs = {}

    app = server.configure(config)
    app.debug = True
    app.run(host="0.0.0.0",
            port=int(args['--port']),
            debug=True,
            **kwargs)
Exemple #37
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.INFO if not args['--debug'] else logging.DEBUG,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')

    params_config = yamlconf.load(open(args['<params-config>']))

    features_path = args['<features>']
    features = yamlconf.import_module(features_path)

    label_decoder = util.DECODERS[args['--label-type']]
    if args['--observations'] == "<stdin>":
        observations_f = sys.stdin
    else:
        observations_f = open(args['--observations'])

    observations = util.read_observations(observations_f, features,
                                          label_decoder)

    # Get a sepecialized scorer if we have one
    scoring = metrics.SCORERS.get(args['--scoring'], args['--scoring'])

    folds = int(args['--folds'])

    if args['--report'] == "<stdout>":
        report = sys.stdout
    else:
        report = open(args['--report'], "w")

    if args['--processes'] == "<cpu-count>":
        processes = multiprocessing.cpu_count()
    else:
        processes = int(args['--processes'])

    if args['--cv-timeout'] == "<forever>":
        cv_timeout = None
    else:
        cv_timeout = float(args['--cv-timeout']) * 60  # Convert to seconds

    scale_features = args['--scale-features']
    verbose = args['--verbose']

    run(params_config, features_path, observations, scoring, folds, report,
        processes, cv_timeout, scale_features, verbose)
Exemple #38
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
    )
    ores_level = logging.DEBUG if args['--verbose'] else logging.INFO
    logging.getLogger('ores').setLevel(ores_level)

    config = yamlconf.load(open(args['--config']))

    name = config['ores']['score_processor']
    score_processor = Celery.from_config(config, name)

    score_processor.application.worker_main(
        argv=["celery_worker", "--loglevel=INFO"]
    )
Exemple #39
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    if args['--config'] is not None:
        config = yamlconf.load(open(args['--config']))
    else:
        config = None

    if args['--ssl']:
        kwargs = {'ssl_context': "adhoc"}
    else:
        kwargs = {}

    app = server.configure(config)
    app.debug = True
    app.run(host="0.0.0.0",
            port=int(args['--port']),
            debug=True,
            **kwargs)
Exemple #40
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    stream_url = args['<stream-url>']
    ores_url = args['<ores-url>']
    config_paths = os.path.join(args['--config'], "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))
    delay = float(args['--delay'])
    verbose = bool(args['--verbose'])
    notify = notify_socket()
    ss_name = config['ores']['scoring_system']
    if 'metrics_collector' in config['scoring_systems'][ss_name]:
        metrics_collector = MetricsCollector.from_config(
            config, config['scoring_systems'][ss_name]['metrics_collector'])
    else:
        metrics_collector = Null()

    run(stream_url, ores_url, metrics_collector, config, delay, notify,
        verbose)
Exemple #41
0
def read_labels_and_population_rates(labels_str, label_weights_strs,
                                     pop_rates_strs, config_path):
    # First try config file
    if config_path:
        labels_config = yamlconf.load(open(os.path.expanduser(config_path)))
        return read_labels_config(labels_config)

    # Try to read --labels
    if labels_str is not None:
        labels = [json.loads(l) for l in labels_str.strip().split(",")]
    else:
        labels = None

    # Try to read --label-weight
    if len(label_weights_strs) > 0:
        label_weights = OrderedDict()
        for label_weights_str in label_weights_strs:
            label, weight = (
                json.loads(s) for s in label_weights_str.split("=", 1))
            label_weights[label] = weight
    else:
        label_weights = None

    # Try to read --pop-rate
    if len(pop_rates_strs) == 0:
        population_rates = None
    else:
        population_rates = OrderedDict()
        for label_rate_str in pop_rates_strs:
            label, rate = (json.loads(s) for s in label_rate_str.split("=", 1))
            population_rates[label] = rate

    if labels is None and label_weights is None and population_rates is None:
        raise RuntimeError("Either --pop-rates or --labels or \
                           --labels-config must be specified")
    elif labels is None:
        if population_rates is not None:
            labels = list(population_rates.keys())
        else:
            labels = list(label_weights.keys())

    return labels, label_weights, population_rates
Exemple #42
0
def read_labels_and_population_rates(labels_str, label_weights_strs,
                                     pop_rates_strs, config_path):
    # First try config file
    if config_path:
        labels_config = yamlconf.load(open(os.path.expanduser(config_path)))
        return read_labels_config(labels_config)

    # Try to read --labels
    if labels_str is not None:
        labels = [json.loads(l) for l in labels_str.strip().split(",")]
    else:
        labels = None

    # Try to read --label-weight
    if len(label_weights_strs) > 0:
        label_weights = OrderedDict()
        for label_weights_str in label_weights_strs:
            label, weight = (json.loads(s)
                             for s in label_weights_str.split("=", 1))
            label_weights[label] = weight
    else:
        label_weights = None

    # Try to read --pop-rate
    if len(pop_rates_strs) == 0:
        population_rates = None
    else:
        population_rates = OrderedDict()
        for label_rate_str in pop_rates_strs:
            label, rate = (json.loads(s) for s in label_rate_str.split("=", 1))
            population_rates[label] = rate

    if labels is None and label_weights is None and population_rates is None:
        raise RuntimeError("Either --pop-rates or --labels or \
                           --labels-config must be specified")
    elif labels is None:
        if population_rates is not None:
            labels = list(population_rates.keys())
        else:
            labels = list(label_weights.keys())

    return labels, label_weights, population_rates
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    config_doc = yamlconf.load(open(args['--config']))
    diff_engine = DiffEngine.from_config(config_doc, config_doc["diff_engine"])

    drop_text = bool(args['--drop-text'])

    if args['--timeout'] == "<infinity>":
        timeout = None
    else:
        timeout = float(args['--timeout'])

    if args['--namespaces'] == "<all>":
        namespaces = None
    else:
        namespaces = set(int(ns) for ns in args['--namespaces'].split(","))

    verbose = bool(args['--verbose'])

    run(read_docs(sys.stdin), diff_engine, timeout, namespaces, drop_text, verbose)
Exemple #44
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    if len(args['<dump_file>']) == 0:
        dump_files = []
    else:
        dump_files = args['<dump_file>']

    config_doc = yamlconf.load(open(args['--config']))
    diff_engine = DiffEngine.from_config(config_doc, config_doc['diff_engine'])

    drop_text = bool(args['--drop-text'])

    if args['--threads'] == "<cpu_count>":
        threads = cpu_count()
    else:
        threads = int(args['--threads'])

    verbose = bool(args['--verbose'])

    run(dump_files, diff_engine, threads, drop_text, verbose)
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    if len(args['<dump_file>']) == 0:
        dump_files = []
    else:
        dump_files = args['<dump_file>']

    config_doc = yamlconf.load(open(args['--config']))
    diff_engine = DiffEngine.from_config(config_doc, config_doc['diff_engine'])

    drop_text = bool(args['--drop-text'])

    if args['--threads'] == "<cpu_count>":
        threads = cpu_count()
    else:
        threads = int(args['--threads'])

    verbose = bool(args['--verbose'])

    run(dump_files, diff_engine, threads, drop_text, verbose)
Exemple #46
0
def configure_logging(verbose=False,
                      debug=False,
                      logging_config=None,
                      **kwargs):
    # Load logging config if specified.  If no config file is specified, we
    # make a half-hearted attempt to find a distributed logging_config.yaml
    # in the current working directory.
    if logging_config is None:
        if os.path.exists(DEFAULT_LOGGING_CONFIG):
            logging_config = DEFAULT_LOGGING_CONFIG

    if logging_config is not None:
        with open(logging_config) as f:
            logging_config = yamlconf.load(f)
            logging.config.dictConfig(logging_config)

        # Secret sauce: if running from the console, mirror logs there.
        if sys.stdin.isatty():
            handler = logging.StreamHandler(stream=sys.stderr)
            formatter = logging.Formatter(fmt=DEFAULT_FORMAT)
            handler.setFormatter(formatter)
            logging.getLogger().addHandler(handler)

    else:
        # Configure fallback logging.
        logging.basicConfig(level=logging.INFO, format=DEFAULT_FORMAT)
        logging.getLogger('requests').setLevel(logging.INFO)
        logging.getLogger('stopit').setLevel(logging.ERROR)

    # Command-line options can override some of the logging config, regardless
    # of whether logging_config.yaml was provided.
    # TODO: Document and reconcile debug vs verbose.
    if debug:
        logging.getLogger().setLevel(logging.DEBUG)
        logging.getLogger('ores.metrics_collectors.logger') \
               .setLevel(logging.DEBUG)

    if verbose:
        logging.getLogger('revscoring.dependencies.dependent') \
               .setLevel(logging.DEBUG)
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    config_doc = yamlconf.load(open(args['--config']))
    diff_engine = DiffEngine.from_config(config_doc, config_doc["diff_engine"])

    drop_text = bool(args['--drop-text'])

    if args['--timeout'] == "<infinity>":
        timeout = None
    else:
        timeout = float(args['--timeout'])

    if args['--namespaces'] == "<all>":
        namespaces = None
    else:
        namespaces = set(int(ns) for ns in args['--namespaces'].split(","))

    verbose = bool(args['--verbose'])

    run(read_docs(sys.stdin), diff_engine, timeout, namespaces, drop_text,
        verbose)
#!/usr/bin/env python3
import glob
import logging
import logging.config
from itertools import chain

import yamlconf
from wikilabels.wsgi import server

config_paths = sorted(glob.glob("config/*.yaml"))
if __name__ == '__main__':
    config_paths += sorted(glob.glob("config/localhost/*.yaml"))

config = yamlconf.load(*(open(p) for p in config_paths))
application = server.configure(config)

with open("logging_config.yaml") as f:
    logging_config = yamlconf.load(f)
    logging.config.dictConfig(logging_config)

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
)

if __name__ == '__main__':
    logging.getLogger('wikilabels').setLevel(logging.DEBUG)

    application.debug = True
    application.run(host="0.0.0.0", port=8080, debug=True)
def app():
    config_paths = os.path.join('config/', "*.yaml")
    config = yamlconf.load(*(open(p) for p in sorted(glob.glob(config_paths))))
    return server.configure(config)
Exemple #50
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    config = yamlconf.load(open(args["--config"]))
    run(args["<stream-url>"], args["<ores-url>"], config, args["--verbose"])
Exemple #51
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.INFO if not args['--debug'] else logging.DEBUG,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
    )
    logging.getLogger("revscoring.scoring.models").setLevel(logging.WARNING)

    params_config = yamlconf.load(open(args['<params-config>']))

    features_path = args['<features>']
    features = yamlconf.import_path(features_path)

    if args['--observations'] == "<stdin>":
        observations = read_observations(sys.stdin)
    else:
        observations = read_observations(open(args['--observations']))

    logger.info("Reading feature values & labels...")
    label_name = args['<label>']
    value_labels = \
        [(list(solve(features, cache=ob['cache'])), ob[label_name])
         for ob in observations]

    statistic_path = args['<statistic>']
    additional_params = {}

    labels, label_weights, population_rates = \
        util.read_labels_and_population_rates(
            args['--labels'], args['--label-weight'], args['--pop-rate'],
            args['--labels-config'])
    if label_weights is not None:
        additional_params['label_weights'] = label_weights
    if population_rates is not None:
        additional_params['population_rates'] = population_rates

    if args['--center']:
        additional_params['center'] = args['--center']
    if args['--scale']:
        additional_params['scale'] = args['--scale'],

    if args['--multilabel']:
        additional_params['multilabel'] = True

    maximize = not args['--minimize']

    folds = int(args['--folds'])

    if args['--report'] == "<stdout>":
        report = sys.stdout
    else:
        report = open(args['--report'], "w")

    if args['--processes'] == "<cpu-count>":
        processes = multiprocessing.cpu_count()
    else:
        processes = int(args['--processes'])

    if args['--cv-timeout'] == "<forever>":
        cv_timeout = None
    else:
        cv_timeout = float(args['--cv-timeout']) * 60  # Convert to seconds

    verbose = args['--verbose']

    run(params_config, features, labels, features_path, value_labels,
        statistic_path, additional_params, maximize, folds, report,
        processes, cv_timeout, verbose)
#!/usr/bin/env python3
import glob
import logging

import yamlconf
from wikilabels.wsgi import server

config = yamlconf.load(*(open(p) for p in sorted(glob.glob("config/*.yaml"))))

application = server.configure(config)


if __name__ == '__main__':
    logging.getLogger('wikilabels').setLevel(logging.DEBUG)

    application.debug = True
    application.run(host="0.0.0.0", debug=True)
#!/usr/bin/env python3
import os

from flask import request

import yamlconf
from wikilabels.wsgi import application

directory = os.path.dirname(os.path.realpath(__file__))

config_path = os.path.join(directory, "config/wikilabels.yaml")

config = yamlconf.load(open(config_path))

app = application.configure(config)
app.debug = True


if __name__ == '__main__':
    app.run(host="0.0.0.0", debug = True)