Example #1
0
def configs_info():
    """Fetch or set the info of run configs"""
    datarun_id = request.args.get('datarun_id', None, type=int)
    result = {'success': False}
    # with datarun_config(datarun_id):
    # run_path = current_app.config['run_config']
    if request.method == 'GET':
        try:
            config = load_datarun_config_dict(datarun_id)
        except FileNotFoundError as e:
            raise ApiError(e, 404)
        return jsonify(config)
        # with open(run_path) as f:
        #     run_args = yaml.load(f)
        #     result.update(run_args)
        #     result.update({'success':True})
    elif request.method == 'POST':
        run_path = current_app.config['RUN_CONFIG']
        # Get local set configs
        run_args = {}
        with open(run_path) as f:
            run_args = yaml.load(f)
        # Update Local set configs And Save in the default file.
        configs = json.loads(request.form['configs'])
        run_args.update(configs)
        with open(run_path, 'w') as f:
            yaml.dump(run_args, f)
        # Load Config Again. And Update Current APP Configs.
        config = {'run_config': run_path}
        # Load ATM confs
        _, run_conf, _, _ = load_config(**config)
        current_app.config.update({'RUN_CONF': run_conf})
        result['success'] = True
    return jsonify(result)
Example #2
0
def work(datarun_id, args=None):
    """
    A copy of the code in atm/scripts/worker.py
    A call to this function will start and run a simple worker
    """
    _logger = logging.getLogger('atm_server.worker:work')
    _logger.setLevel(logging.DEBUG)
    fh = logging.FileHandler('logs/works.log')
    fmt = '%(asctime)-12s %(name)s - %(levelname)s  %(message)s'
    fh.setFormatter(logging.Formatter(fmt))
    _logger.addHandler(fh)
    parser = argparse.ArgumentParser(description='Add more classifiers to database')
    add_arguments_sql(parser)
    add_arguments_aws_s3(parser)
    add_arguments_logging(parser)

    # add worker-specific arguments
    parser.add_argument('--cloud-mode', action='store_true', default=False,
                        help='Whether to run this worker in cloud mode')
    parser.add_argument('--time', help='Number of seconds to run worker', type=int)
    parser.add_argument('--choose-randomly', action='store_true',
                        help='Choose dataruns to work on randomly (default = sequential order)')
    parser.add_argument('--no-save', dest='save_files', default=True,
                        action='store_const', const=False,
                        help="don't save models and metrics at all")

    # parse arguments and load configuration
    if args is None:
        args = []
    _args = parser.parse_args(args)

    # default logging config is different if initialized from the command line
    if _args.log_config is None:
        _args.log_config = os.path.join(atm.PROJECT_ROOT,
                                        'config/templates/log-script.yaml')

    sql_config, _, aws_config, log_config = load_config(**vars(_args))
    initialize_logging(log_config)

    # let's go
    _logger.warning('Worker started!')
    with datarun_config(datarun_id) as config:
        _logger.warning('Using configs from ' + config.config_path)
        db = Database(**vars(sql_config))
        try:
            atm_work(db=db,
                     datarun_ids=[datarun_id],
                     choose_randomly=_args.choose_randomly,
                     save_files=_args.save_files,
                     cloud_mode=_args.cloud_mode,
                     aws_config=aws_config,
                     log_config=log_config,
                     total_time=_args.time,
                     wait=False)
        except Exception as e:
            _logger.error(e)
            mark_running_datarun_pending(db, datarun_id)
            raise e
    _logger.warning('Worker exited.')
Example #3
0
    def __init__(self, **kwargs):

        if kwargs.get('log_config') is None:
            kwargs['log_config'] = os.path.join(
                PROJECT_ROOT, 'config/templates/log-script.yaml')

        self.sql_conf, self.run_conf, self.aws_conf, self.log_conf = load_config(
            **kwargs)

        self.db = Database(**vars(self.sql_conf))

        initialize_logging(self.log_conf)
Example #4
0
def create_app(config=None):
    """Create and configure an instance of the Flask application."""
    app = Flask(__name__)
    CORS(app)

    # Update configs
    if app.config['ENV'] == 'production':
        app.config.from_object(ProductionConfig)
    elif app.config['ENV'] == 'development':
        app.config.from_object(DevelopmentConfig)
    else:
        app.config.from_object(Config)

    if config is None:
        config = {}
    config = {key: val for key, val in config.items() if val is not None}
    if config.get('run_config', None) is not None:
        config['RUN_CONFIG'] = config['run_config']
    if config.get('sql_config', None) is not None:
        config['SQL_CONFIG'] = config['sql_config']

    # print(config)
    app.config.update(config)

    # Load ATM confs
    sql_conf, run_conf, aws_conf, log_conf = load_config(**config)
    # print(run_conf.selector)
    # print(sql_conf)
    app.config.update({
        'SQL_CONF': sql_conf,
        'RUN_CONF': run_conf,
        'AWS_CONF': aws_conf,
        'LOG_CONF': log_conf
    })
    app.config.update({'RUN_PER_PARTITION': config['run_per_partition']})

    @app.route('/hello')
    def hello():
        return 'Hello, World!'

    db.init_app(app)
    app.register_blueprint(api, url_prefix='/api')
    app.register_blueprint(vis, url_prefix='/')
    return app
Example #5
0
def btb_test(dataruns=None, datasets=None, processes=1, graph=False, **kwargs):
    """
    Run a test datarun using the chosen tuner and selector, and compare it to
    the baseline performance.

    Tuner and selector will be specified in **kwargs, along with the rest of the
    standard datarun arguments.
    """
    sql_conf, run_conf, _, _ = load_config(sql_path=SQL_CONFIG,
                                           run_path=RUN_CONFIG,
                                           **kwargs)

    db = Database(**vars(sql_conf))
    datarun_ids = dataruns or []
    datarun_ids_per_dataset = [[each] for each in dataruns] if dataruns else []
    datasets = datasets or DATASETS_MAX_FIRST

    # if necessary, generate datasets and dataruns
    if not datarun_ids:
        for ds in datasets:
            run_conf.train_path = DATA_URL + ds
            run_conf.dataset_id = None
            print('Creating 10 dataruns for', run_conf.train_path)
            run_ids = [enter_data(sql_conf, run_conf) for i in range(10)]
            datarun_ids_per_dataset.append(run_ids)
            datarun_ids.extend(run_ids)

    # work on the dataruns til they're done
    print('Working on %d dataruns' % len(datarun_ids))
    work_parallel(db=db, datarun_ids=datarun_ids, n_procs=processes)
    print('Finished!')

    results = {}

    # compute and maybe graph the results for each dataset
    for rids in datarun_ids_per_dataset:
        res = report_auc_vs_baseline(db, rids, graph=graph)
        results[tuple(rids)] = {'test': res[0], 'baseline': res[1]}

    return results
Example #6
0
parser = argparse.ArgumentParser(description='''
Run a single end-to-end test with 10 sample datasets.
The script will create a datarun for each dataset, then run a worker until the
jobs are finished.
''')
parser.add_argument('--processes',
                    help='number of processes to run concurrently',
                    type=int,
                    default=4)
parser.add_argument('--total-time',
                    help='total time for each worker to work (in seconds)',
                    type=int,
                    default=None)

args = parser.parse_args()
sql_config, run_config, _, _ = load_config(sql_path=SQL_CONFIG,
                                           run_path=RUN_CONFIG)

db = Database(**vars(sql_config))

print('creating dataruns...')
datarun_ids = []
for ds in DATASETS:
    run_config.train_path = os.path.join(DATA_DIR, ds)
    datarun_ids.append(enter_data(sql_config=sql_config,
                                  run_config=run_config))

work_parallel(db=db,
              datarun_ids=datarun_ids,
              n_procs=args.processes,
              total_time=args.total_time)
Example #7
0
    parser.add_argument('--dataruns', help='Only train on dataruns with these ids',
                        nargs='+')
    parser.add_argument('--time', help='Number of seconds to run worker', type=int)
    parser.add_argument('--choose-randomly', action='store_true',
                        help='Choose dataruns to work on randomly (default = sequential order)')
    parser.add_argument('--no-save', dest='save_files', default=True,
                        action='store_const', const=False,
                        help="don't save models and metrics at all")

    # parse arguments and load configuration
    args = parser.parse_args()

    # default logging config is different if initialized from the command line
    if args.log_config is None:
        args.log_config = os.path.join(PROJECT_ROOT,
                                       'config/templates/log-script.yaml')

    sql_config, _, aws_config, log_config = load_config(**vars(args))
    initialize_logging(log_config)

    # let's go
    work(db=Database(**vars(sql_config)),
         datarun_ids=args.dataruns,
         choose_randomly=args.choose_randomly,
         save_files=args.save_files,
         cloud_mode=args.cloud_mode,
         aws_config=aws_config,
         log_config=log_config,
         total_time=args.time,
         wait=False)
Example #8
0
You can pass yaml configuration files (--sql-config, --aws-config, --run-config)
instead of passing individual arguments. Any arguments in the config files will
override arguments passed on the command line. See the examples in the config/
folder for more information. """)
    # Add argparse arguments for aws, sql, and datarun config
    add_arguments_aws_s3(parser)
    add_arguments_sql(parser)
    add_arguments_datarun(parser)
    add_arguments_logging(parser)
    parser.add_argument('--run-per-partition', default=False, action='store_true',
                        help='if set, generate a new datarun for each hyperpartition')

    args = parser.parse_args()

    # default logging config is different if initialized from the command line
    if args.log_config is None:
        args.log_config = os.path.join(PROJECT_ROOT,
                                       'config/templates/log-script.yaml')

    # create config objects from the config files and/or command line args
    sql_conf, run_conf, aws_conf, log_conf = load_config(sql_path=args.sql_config,
                                                         run_path=args.run_config,
                                                         aws_path=args.aws_config,
                                                         log_path=args.log_config,
                                                         **vars(args))
    initialize_logging(log_conf)

    # create and save the dataset and datarun
    enter_data(sql_conf, run_conf, aws_conf, args.run_per_partition)
Example #9
0
folder for more information. """)
    # Add argparse arguments for aws, sql, and datarun config
    add_arguments_aws_s3(parser)
    add_arguments_sql(parser)
    add_arguments_datarun(parser)
    add_arguments_logging(parser)
    parser.add_argument(
        '--run-per-partition',
        default=False,
        action='store_true',
        help='if set, generate a new datarun for each hyperpartition')

    args = parser.parse_args()

    # default logging config is different if initialized from the command line
    if args.log_config is None:
        args.log_config = os.path.join(PROJECT_ROOT,
                                       'config/templates/log-script.yaml')

    # create config objects from the config files and/or command line args
    sql_conf, run_conf, aws_conf, log_conf = load_config(
        sql_path=args.sql_config,
        run_path=args.run_config,
        aws_path=args.aws_config,
        log_path=args.log_config,
        **vars(args))
    initialize_logging(log_conf)

    # create and save the dataset and datarun
    enter_data(sql_conf, run_conf, aws_conf, args.run_per_partition)