def configs_info(): """Fetch or set the info of run configs""" datarun_id = request.args.get('datarun_id', None, type=int) result = {'success': False} # with datarun_config(datarun_id): # run_path = current_app.config['run_config'] if request.method == 'GET': try: config = load_datarun_config_dict(datarun_id) except FileNotFoundError as e: raise ApiError(e, 404) return jsonify(config) # with open(run_path) as f: # run_args = yaml.load(f) # result.update(run_args) # result.update({'success':True}) elif request.method == 'POST': run_path = current_app.config['RUN_CONFIG'] # Get local set configs run_args = {} with open(run_path) as f: run_args = yaml.load(f) # Update Local set configs And Save in the default file. configs = json.loads(request.form['configs']) run_args.update(configs) with open(run_path, 'w') as f: yaml.dump(run_args, f) # Load Config Again. And Update Current APP Configs. config = {'run_config': run_path} # Load ATM confs _, run_conf, _, _ = load_config(**config) current_app.config.update({'RUN_CONF': run_conf}) result['success'] = True return jsonify(result)
def work(datarun_id, args=None): """ A copy of the code in atm/scripts/worker.py A call to this function will start and run a simple worker """ _logger = logging.getLogger('atm_server.worker:work') _logger.setLevel(logging.DEBUG) fh = logging.FileHandler('logs/works.log') fmt = '%(asctime)-12s %(name)s - %(levelname)s %(message)s' fh.setFormatter(logging.Formatter(fmt)) _logger.addHandler(fh) parser = argparse.ArgumentParser(description='Add more classifiers to database') add_arguments_sql(parser) add_arguments_aws_s3(parser) add_arguments_logging(parser) # add worker-specific arguments parser.add_argument('--cloud-mode', action='store_true', default=False, help='Whether to run this worker in cloud mode') parser.add_argument('--time', help='Number of seconds to run worker', type=int) parser.add_argument('--choose-randomly', action='store_true', help='Choose dataruns to work on randomly (default = sequential order)') parser.add_argument('--no-save', dest='save_files', default=True, action='store_const', const=False, help="don't save models and metrics at all") # parse arguments and load configuration if args is None: args = [] _args = parser.parse_args(args) # default logging config is different if initialized from the command line if _args.log_config is None: _args.log_config = os.path.join(atm.PROJECT_ROOT, 'config/templates/log-script.yaml') sql_config, _, aws_config, log_config = load_config(**vars(_args)) initialize_logging(log_config) # let's go _logger.warning('Worker started!') with datarun_config(datarun_id) as config: _logger.warning('Using configs from ' + config.config_path) db = Database(**vars(sql_config)) try: atm_work(db=db, datarun_ids=[datarun_id], choose_randomly=_args.choose_randomly, save_files=_args.save_files, cloud_mode=_args.cloud_mode, aws_config=aws_config, log_config=log_config, total_time=_args.time, wait=False) except Exception as e: _logger.error(e) mark_running_datarun_pending(db, datarun_id) raise e _logger.warning('Worker exited.')
def __init__(self, **kwargs): if kwargs.get('log_config') is None: kwargs['log_config'] = os.path.join( PROJECT_ROOT, 'config/templates/log-script.yaml') self.sql_conf, self.run_conf, self.aws_conf, self.log_conf = load_config( **kwargs) self.db = Database(**vars(self.sql_conf)) initialize_logging(self.log_conf)
def create_app(config=None): """Create and configure an instance of the Flask application.""" app = Flask(__name__) CORS(app) # Update configs if app.config['ENV'] == 'production': app.config.from_object(ProductionConfig) elif app.config['ENV'] == 'development': app.config.from_object(DevelopmentConfig) else: app.config.from_object(Config) if config is None: config = {} config = {key: val for key, val in config.items() if val is not None} if config.get('run_config', None) is not None: config['RUN_CONFIG'] = config['run_config'] if config.get('sql_config', None) is not None: config['SQL_CONFIG'] = config['sql_config'] # print(config) app.config.update(config) # Load ATM confs sql_conf, run_conf, aws_conf, log_conf = load_config(**config) # print(run_conf.selector) # print(sql_conf) app.config.update({ 'SQL_CONF': sql_conf, 'RUN_CONF': run_conf, 'AWS_CONF': aws_conf, 'LOG_CONF': log_conf }) app.config.update({'RUN_PER_PARTITION': config['run_per_partition']}) @app.route('/hello') def hello(): return 'Hello, World!' db.init_app(app) app.register_blueprint(api, url_prefix='/api') app.register_blueprint(vis, url_prefix='/') return app
def btb_test(dataruns=None, datasets=None, processes=1, graph=False, **kwargs): """ Run a test datarun using the chosen tuner and selector, and compare it to the baseline performance. Tuner and selector will be specified in **kwargs, along with the rest of the standard datarun arguments. """ sql_conf, run_conf, _, _ = load_config(sql_path=SQL_CONFIG, run_path=RUN_CONFIG, **kwargs) db = Database(**vars(sql_conf)) datarun_ids = dataruns or [] datarun_ids_per_dataset = [[each] for each in dataruns] if dataruns else [] datasets = datasets or DATASETS_MAX_FIRST # if necessary, generate datasets and dataruns if not datarun_ids: for ds in datasets: run_conf.train_path = DATA_URL + ds run_conf.dataset_id = None print('Creating 10 dataruns for', run_conf.train_path) run_ids = [enter_data(sql_conf, run_conf) for i in range(10)] datarun_ids_per_dataset.append(run_ids) datarun_ids.extend(run_ids) # work on the dataruns til they're done print('Working on %d dataruns' % len(datarun_ids)) work_parallel(db=db, datarun_ids=datarun_ids, n_procs=processes) print('Finished!') results = {} # compute and maybe graph the results for each dataset for rids in datarun_ids_per_dataset: res = report_auc_vs_baseline(db, rids, graph=graph) results[tuple(rids)] = {'test': res[0], 'baseline': res[1]} return results
parser = argparse.ArgumentParser(description=''' Run a single end-to-end test with 10 sample datasets. The script will create a datarun for each dataset, then run a worker until the jobs are finished. ''') parser.add_argument('--processes', help='number of processes to run concurrently', type=int, default=4) parser.add_argument('--total-time', help='total time for each worker to work (in seconds)', type=int, default=None) args = parser.parse_args() sql_config, run_config, _, _ = load_config(sql_path=SQL_CONFIG, run_path=RUN_CONFIG) db = Database(**vars(sql_config)) print('creating dataruns...') datarun_ids = [] for ds in DATASETS: run_config.train_path = os.path.join(DATA_DIR, ds) datarun_ids.append(enter_data(sql_config=sql_config, run_config=run_config)) work_parallel(db=db, datarun_ids=datarun_ids, n_procs=args.processes, total_time=args.total_time)
parser.add_argument('--dataruns', help='Only train on dataruns with these ids', nargs='+') parser.add_argument('--time', help='Number of seconds to run worker', type=int) parser.add_argument('--choose-randomly', action='store_true', help='Choose dataruns to work on randomly (default = sequential order)') parser.add_argument('--no-save', dest='save_files', default=True, action='store_const', const=False, help="don't save models and metrics at all") # parse arguments and load configuration args = parser.parse_args() # default logging config is different if initialized from the command line if args.log_config is None: args.log_config = os.path.join(PROJECT_ROOT, 'config/templates/log-script.yaml') sql_config, _, aws_config, log_config = load_config(**vars(args)) initialize_logging(log_config) # let's go work(db=Database(**vars(sql_config)), datarun_ids=args.dataruns, choose_randomly=args.choose_randomly, save_files=args.save_files, cloud_mode=args.cloud_mode, aws_config=aws_config, log_config=log_config, total_time=args.time, wait=False)
You can pass yaml configuration files (--sql-config, --aws-config, --run-config) instead of passing individual arguments. Any arguments in the config files will override arguments passed on the command line. See the examples in the config/ folder for more information. """) # Add argparse arguments for aws, sql, and datarun config add_arguments_aws_s3(parser) add_arguments_sql(parser) add_arguments_datarun(parser) add_arguments_logging(parser) parser.add_argument('--run-per-partition', default=False, action='store_true', help='if set, generate a new datarun for each hyperpartition') args = parser.parse_args() # default logging config is different if initialized from the command line if args.log_config is None: args.log_config = os.path.join(PROJECT_ROOT, 'config/templates/log-script.yaml') # create config objects from the config files and/or command line args sql_conf, run_conf, aws_conf, log_conf = load_config(sql_path=args.sql_config, run_path=args.run_config, aws_path=args.aws_config, log_path=args.log_config, **vars(args)) initialize_logging(log_conf) # create and save the dataset and datarun enter_data(sql_conf, run_conf, aws_conf, args.run_per_partition)
folder for more information. """) # Add argparse arguments for aws, sql, and datarun config add_arguments_aws_s3(parser) add_arguments_sql(parser) add_arguments_datarun(parser) add_arguments_logging(parser) parser.add_argument( '--run-per-partition', default=False, action='store_true', help='if set, generate a new datarun for each hyperpartition') args = parser.parse_args() # default logging config is different if initialized from the command line if args.log_config is None: args.log_config = os.path.join(PROJECT_ROOT, 'config/templates/log-script.yaml') # create config objects from the config files and/or command line args sql_conf, run_conf, aws_conf, log_conf = load_config( sql_path=args.sql_config, run_path=args.run_config, aws_path=args.aws_config, log_path=args.log_config, **vars(args)) initialize_logging(log_conf) # create and save the dataset and datarun enter_data(sql_conf, run_conf, aws_conf, args.run_per_partition)