Esempio n. 1
0
def add_workflows(args, other_args, subparser=None):
    """
    Add GeneFlow workflows to database.

    Args:
        args.workflow_yaml: GeneFlow definition with workflows.
        args.config: GeneFlow config file path.
        args.environment: Config environment.

    Returns:
        On success: True.
        On failure: False.

    """
    workflow_yaml = args.workflow_yaml
    config = args.config
    environment = args.environment

    # load config file
    cfg = Config()
    if not cfg.load(config):
        Log.an().error('cannot load config file: %s', config)
        return False

    config_dict = cfg.config(environment)
    if not config_dict:
        Log.an().error('invalid config environment: %s', environment)
        return False

    # connect to data source
    try:
        data_source = DataSource(config_dict['database'])
    except DataSourceException as err:
        Log.an().error('data source initialization error [%s]', str(err))
        return False

    # import workflow
    defs = data_source.import_workflows_from_def(workflow_yaml)
    if not defs:
        Log.an().error('workflow definition load failed: %s', workflow_yaml)
        return False

    data_source.commit()

    # display new IDs
    for workflow in defs:
        Log.some().info('workflow loaded: %s -> %s', workflow, defs[workflow])

    return True
Esempio n. 2
0
def add_apps(args):
    """
    Add GeneFlow apps to database.

    Args:
        args.app_yaml: GeneFlow definition with apps.
        args.config_file: GeneFlow config file path.
        args.environment: Config environment.

    Returns:
        On success: True.
        On failure: False.

    """
    app_yaml = args.app_yaml
    config_file = args.config_file
    environment = args.environment

    # load config file
    cfg = Config()
    if not cfg.load(config_file):
        Log.an().error('cannot load config file: %s', config_file)
        return False

    config_dict = cfg.config(environment)
    if not config_dict:
        Log.an().error('invalid config environment: %s', environment)
        return False

    # connect to data source
    try:
        data_source = DataSource(config_dict['database'])
    except DataSourceException as err:
        Log.an().error('data source initialization error [%s]', str(err))
        return False

    # import apps
    defs = data_source.import_apps_from_def(app_yaml)
    if not defs:
        Log.an().error('app definition load failed: %s', app_yaml)
        return False

    data_source.commit()

    # display new IDs
    for app in defs:
        Log.some().info('app loaded: %s -> %s', app, defs[app])

    return True
Esempio n. 3
0
def migrate_db(args, other_args, subparser=None):
    """
    Migrate SQL DB schema. Currently only works for MySQL databases.

    Args:
        args.config: GeneFlow config file path.
        args.environment: Config environment.

    Returns:
        On success: True.
        On failure: False.

    """
    config = args.config
    environment = args.environment

    cfg = Config()
    if not cfg.load(config):
        Log.an().error('cannot load config file: %s', config)
        return False

    config_dict = cfg.config(environment)
    if not config_dict:
        Log.an().error('invalid config environment: %s', environment)
        return False

    if config_dict['database']['type'] != 'mysql':
        Log.an().error('only mysql databases can be migrated')
        return False

    migrations_path = str(Path(GF_PACKAGE_PATH, 'data/migrations'))

    try:
        database = get_backend('{}://{}:{}@{}/{}'.format(
            config_dict['database']['type'], config_dict['database']['user'],
            config_dict['database']['password'],
            config_dict['database']['host'],
            config_dict['database']['database']))
        migrations = read_migrations(migrations_path)
        with database.lock():
            database.apply_migrations(database.to_apply(migrations))
    except Exception as err:
        Log.an().error('cannot migrate database [%s]', str(err))
        return False

    return True
Esempio n. 4
0
def before_all(context):

    # load workflow config file based on env
    cfg = Config()
    assert cfg.load('./test.conf')
    geneflow_config = cfg.config(
        env=context.config.userdata.get('environment', 'local')
    )
    if not geneflow_config:
        raise ValueError('Error loading geneflow config file')
    context.geneflow_config = geneflow_config

    # delete all items from database
    clear_database(context)

    # setup dict key for agave connection
    context.agave = {}
Esempio n. 5
0
def init_db(args, other_args, subparser=None):
    """
    Initialize SQLite DB schema.

    Args:
        args.config: GeneFlow config file path.
        args.environment: Config environment.

    Returns:
        On success: True.
        On failure: False.

    """
    config = args.config
    environment = args.environment

    cfg = Config()
    if not cfg.load(config):
        Log.an().error('cannot load config file: %s', config)
        return False

    config_dict = cfg.config(environment)
    if not config_dict:
        Log.an().error('invalid config environment: %s', environment)
        return False

    if config_dict['database']['type'] != 'sqlite':
        Log.an().error('only sqlite databases can be initialized')
        return False

    if not Environment.init_sqlite_db(config_dict['database']['path']):
        Log.an().error('cannot initialize sqlite database: %s',
                       config_dict['database']['path'])
        return False

    return True
Esempio n. 6
0
def install_workflow(args):
    """
    Install a GeneFlow workflow.

    Args:
        args: contains all command-line arguments.

    Returns:
        On success: True.
        On failure: False.

    """
    # load config if specified
    config_dict = None
    cfg = Config()
    if args.config:
        if not args.environment:
            Log.an().error(
                'must specify environment if specifying a config file')
            return False

        if not cfg.load(Path(args.config).resolve()):
            Log.an().error('cannot load config file: %s', args.config)
            return False

        config_dict = cfg.config(args.environment)
        if not config_dict:
            Log.an().error('invalid config environment: %s', args.environment)
            return False

    else:
        # load default config
        cfg.default('database.db')
        config_dict = cfg.config('local')

    # load agave params if specified
    agave_params = {}
    if args.agave_params:
        try:
            with open(args.agave_params, 'rU') as yaml_file:
                yaml_data = yaml_file.read()
        except IOError as err:
            Log.an().error('cannot read agave params file: %s [%s]',
                           args.params, str(err))
            return False

        try:
            agave_params = yaml.safe_load(yaml_data)
        except yaml.YAMLError as err:
            Log.an().error('invalid yaml: %s [%s]', yaml_data, str(err))
            return False

    if not agave_params.get('agave'):
        agave_params['agave'] = {}

    # override any agave_params keys with command line options
    if args.agave_apps_prefix:
        agave_params['agave']['appsPrefix'] = args.agave_apps_prefix
    if args.agave_execution_system:
        agave_params['agave']['executionSystem'] = args.agave_execution_system
    if args.agave_deployment_system:
        agave_params['agave'][
            'deploymentSystem'] = args.agave_deployment_system
    if args.agave_apps_dir:
        agave_params['agave']['appsDir'] = args.agave_apps_dir
    if args.agave_test_data_dir:
        agave_params['agave']['testDataDir'] = args.agave_test_data_dir

    # initialize workflow installer object and install apps
    wf_installer = WorkflowInstaller(str(Path(args.workflow_path).resolve()),
                                     git=args.git,
                                     git_branch=args.git_branch,
                                     force=args.force,
                                     app_name=args.name,
                                     app_asset=args.asset,
                                     copy_prefix=args.prefix,
                                     clean=args.clean,
                                     config=config_dict,
                                     agave_params=agave_params,
                                     agave_username=args.agave_username,
                                     agave_publish=args.agave_publish,
                                     make_apps=args.make_apps)

    if not wf_installer.initialize():
        Log.an().error('cannot initialize workflow installer')
        return False

    if not wf_installer.install_apps():
        Log.an().error('cannot install workflow apps')
        return False

    if args.agave_test_data:
        if not wf_installer.upload_agave_test_data():
            Log.an().error('cannot upload agave test data')
            return False

    return True
Esempio n. 7
0
def run_pending(args):
    """
    Run any jobs in database in the PENDING state.

    Args:
        args.config_file: GeneFlow config file path.
        args.environment: Config environment.

    Returns:
        On success: True.
        On failure: False.

    """
    config_file = args.config_file
    environment = args.environment
    log_location = args.log_location

    # load config file
    cfg = Config()
    if not cfg.load(config_file):
        Log.an().error('cannot load config file: %s', config_file)
        return False

    config_dict = cfg.config(environment)
    if not config_dict:
        Log.an().error('invalid config environment: %s', environment)
        return False

    # connect to data source
    try:
        data_source = DataSource(config_dict['database'])
    except DataSourceException as err:
        Log.an().error('data source initialization error [%s]', str(err))
        return False

    # get pending jobs from database
    pending_jobs = data_source.get_pending_jobs()
    if pending_jobs is False:
        Log.an().error('cannot query for pending jobs')
        return False

    if not pending_jobs:
        # no jobs found
        return True

    Log.some().info('pending jobs found:\n%s', pprint.pformat(pending_jobs))

    pool = Pool(min(5, len(pending_jobs)))
    jobs = [{
        'name': job['name'],
        'id': job['id'],
        'log': str(Path(log_location) / (job['id'] + '.log'))
    } for job in pending_jobs]

    result = pool.map(
        partial(geneflow.cli.common.run_workflow,
                config=config_dict,
                log_level=args.log_level), jobs)
    pool.close()
    pool.join()

    if not all(result):
        Log.an().error('some jobs failed')

    return result