def add_workflows(args, other_args, subparser=None): """ Add GeneFlow workflows to database. Args: args.workflow_yaml: GeneFlow definition with workflows. args.config: GeneFlow config file path. args.environment: Config environment. Returns: On success: True. On failure: False. """ workflow_yaml = args.workflow_yaml config = args.config environment = args.environment # load config file cfg = Config() if not cfg.load(config): Log.an().error('cannot load config file: %s', config) return False config_dict = cfg.config(environment) if not config_dict: Log.an().error('invalid config environment: %s', environment) return False # connect to data source try: data_source = DataSource(config_dict['database']) except DataSourceException as err: Log.an().error('data source initialization error [%s]', str(err)) return False # import workflow defs = data_source.import_workflows_from_def(workflow_yaml) if not defs: Log.an().error('workflow definition load failed: %s', workflow_yaml) return False data_source.commit() # display new IDs for workflow in defs: Log.some().info('workflow loaded: %s -> %s', workflow, defs[workflow]) return True
def add_apps(args): """ Add GeneFlow apps to database. Args: args.app_yaml: GeneFlow definition with apps. args.config_file: GeneFlow config file path. args.environment: Config environment. Returns: On success: True. On failure: False. """ app_yaml = args.app_yaml config_file = args.config_file environment = args.environment # load config file cfg = Config() if not cfg.load(config_file): Log.an().error('cannot load config file: %s', config_file) return False config_dict = cfg.config(environment) if not config_dict: Log.an().error('invalid config environment: %s', environment) return False # connect to data source try: data_source = DataSource(config_dict['database']) except DataSourceException as err: Log.an().error('data source initialization error [%s]', str(err)) return False # import apps defs = data_source.import_apps_from_def(app_yaml) if not defs: Log.an().error('app definition load failed: %s', app_yaml) return False data_source.commit() # display new IDs for app in defs: Log.some().info('app loaded: %s -> %s', app, defs[app]) return True
def migrate_db(args, other_args, subparser=None): """ Migrate SQL DB schema. Currently only works for MySQL databases. Args: args.config: GeneFlow config file path. args.environment: Config environment. Returns: On success: True. On failure: False. """ config = args.config environment = args.environment cfg = Config() if not cfg.load(config): Log.an().error('cannot load config file: %s', config) return False config_dict = cfg.config(environment) if not config_dict: Log.an().error('invalid config environment: %s', environment) return False if config_dict['database']['type'] != 'mysql': Log.an().error('only mysql databases can be migrated') return False migrations_path = str(Path(GF_PACKAGE_PATH, 'data/migrations')) try: database = get_backend('{}://{}:{}@{}/{}'.format( config_dict['database']['type'], config_dict['database']['user'], config_dict['database']['password'], config_dict['database']['host'], config_dict['database']['database'])) migrations = read_migrations(migrations_path) with database.lock(): database.apply_migrations(database.to_apply(migrations)) except Exception as err: Log.an().error('cannot migrate database [%s]', str(err)) return False return True
def before_all(context): # load workflow config file based on env cfg = Config() assert cfg.load('./test.conf') geneflow_config = cfg.config( env=context.config.userdata.get('environment', 'local') ) if not geneflow_config: raise ValueError('Error loading geneflow config file') context.geneflow_config = geneflow_config # delete all items from database clear_database(context) # setup dict key for agave connection context.agave = {}
def init_db(args, other_args, subparser=None): """ Initialize SQLite DB schema. Args: args.config: GeneFlow config file path. args.environment: Config environment. Returns: On success: True. On failure: False. """ config = args.config environment = args.environment cfg = Config() if not cfg.load(config): Log.an().error('cannot load config file: %s', config) return False config_dict = cfg.config(environment) if not config_dict: Log.an().error('invalid config environment: %s', environment) return False if config_dict['database']['type'] != 'sqlite': Log.an().error('only sqlite databases can be initialized') return False if not Environment.init_sqlite_db(config_dict['database']['path']): Log.an().error('cannot initialize sqlite database: %s', config_dict['database']['path']) return False return True
def install_workflow(args): """ Install a GeneFlow workflow. Args: args: contains all command-line arguments. Returns: On success: True. On failure: False. """ # load config if specified config_dict = None cfg = Config() if args.config: if not args.environment: Log.an().error( 'must specify environment if specifying a config file') return False if not cfg.load(Path(args.config).resolve()): Log.an().error('cannot load config file: %s', args.config) return False config_dict = cfg.config(args.environment) if not config_dict: Log.an().error('invalid config environment: %s', args.environment) return False else: # load default config cfg.default('database.db') config_dict = cfg.config('local') # load agave params if specified agave_params = {} if args.agave_params: try: with open(args.agave_params, 'rU') as yaml_file: yaml_data = yaml_file.read() except IOError as err: Log.an().error('cannot read agave params file: %s [%s]', args.params, str(err)) return False try: agave_params = yaml.safe_load(yaml_data) except yaml.YAMLError as err: Log.an().error('invalid yaml: %s [%s]', yaml_data, str(err)) return False if not agave_params.get('agave'): agave_params['agave'] = {} # override any agave_params keys with command line options if args.agave_apps_prefix: agave_params['agave']['appsPrefix'] = args.agave_apps_prefix if args.agave_execution_system: agave_params['agave']['executionSystem'] = args.agave_execution_system if args.agave_deployment_system: agave_params['agave'][ 'deploymentSystem'] = args.agave_deployment_system if args.agave_apps_dir: agave_params['agave']['appsDir'] = args.agave_apps_dir if args.agave_test_data_dir: agave_params['agave']['testDataDir'] = args.agave_test_data_dir # initialize workflow installer object and install apps wf_installer = WorkflowInstaller(str(Path(args.workflow_path).resolve()), git=args.git, git_branch=args.git_branch, force=args.force, app_name=args.name, app_asset=args.asset, copy_prefix=args.prefix, clean=args.clean, config=config_dict, agave_params=agave_params, agave_username=args.agave_username, agave_publish=args.agave_publish, make_apps=args.make_apps) if not wf_installer.initialize(): Log.an().error('cannot initialize workflow installer') return False if not wf_installer.install_apps(): Log.an().error('cannot install workflow apps') return False if args.agave_test_data: if not wf_installer.upload_agave_test_data(): Log.an().error('cannot upload agave test data') return False return True
def run_pending(args): """ Run any jobs in database in the PENDING state. Args: args.config_file: GeneFlow config file path. args.environment: Config environment. Returns: On success: True. On failure: False. """ config_file = args.config_file environment = args.environment log_location = args.log_location # load config file cfg = Config() if not cfg.load(config_file): Log.an().error('cannot load config file: %s', config_file) return False config_dict = cfg.config(environment) if not config_dict: Log.an().error('invalid config environment: %s', environment) return False # connect to data source try: data_source = DataSource(config_dict['database']) except DataSourceException as err: Log.an().error('data source initialization error [%s]', str(err)) return False # get pending jobs from database pending_jobs = data_source.get_pending_jobs() if pending_jobs is False: Log.an().error('cannot query for pending jobs') return False if not pending_jobs: # no jobs found return True Log.some().info('pending jobs found:\n%s', pprint.pformat(pending_jobs)) pool = Pool(min(5, len(pending_jobs))) jobs = [{ 'name': job['name'], 'id': job['id'], 'log': str(Path(log_location) / (job['id'] + '.log')) } for job in pending_jobs] result = pool.map( partial(geneflow.cli.common.run_workflow, config=config_dict, log_level=args.log_level), jobs) pool.close() pool.join() if not all(result): Log.an().error('some jobs failed') return result