Example #1
0
def get_library_folder(gi, library_folder_name):
    galaxy_fs.make_path(gi.libraries, library_folder_name)
    data_folder = galaxy_fs.get_path(gi, library_folder_name)
    data_folder_id = data_folder["id"]

    lib_name = library_folder_name.split("/")[0]
    libs = gi.libraries.get_libraries(name=lib_name)
    if len(libs) == 0:
        fail('''couldn't find library named "%s"''', lib_name)

    lib = libs[0]
    lib_id = lib["id"]
    return lib_id, data_folder_id
Example #2
0
def upload_dataset(gi, data_path, file_type, folder_name, link="link_to_files"):
    lib_id, folder_id = get_library_folder(gi, folder_name)
    data_name = os.path.basename(data_path)
    galaxy_dataset = folder_name + "/" + data_name
    galaxy_path = galaxy_fs.get_path(gi, galaxy_dataset)
    if not galaxy_path:
        logging.info("Uploading: %s", galaxy_dataset)
        logging.info("File type: %s", file_type)
        logging.info("File path: %s", data_path)
        if os.path.isabs(data_path):
            galaxy_path = gi.libraries.upload_from_galaxy_filesystem(
                lib_id, data_path, folder_id, link_data_only=link, file_type=file_type)[0]
        else:
            galaxy_path=gi.libraries.upload_file_from_server(
                lib_id, data_path, folder_id, link_data_only=link, file_type=file_type)[0]
    else:
        logging.info('''Skipping upload, file exists at "{}"'''.format(galaxy_path))

    dataset = {"id": galaxy_path["id"], "name": data_name, "src": "ld"}

    return dataset
Example #3
0
def collect_inputs(config_path):
    # Load YAML config file.
    config_fh = open(config_path)
    logging.info("Config path: %s", (config_path))
    config = dynamic_yaml.load(config_fh)
    logging.info("Config: %s", (config))

    galaxy_api_key = os.getenv("GALAXY_API_KEY")
    if not galaxy_api_key:
        fail(
            "missing GALAXY_API_KEY; set this in the environment or .env file")

    def require_config(key):
        v = config.get(key)
        if not v:
            fail('missing "%s" in the config', key)
        return v

    # Load config
    galaxy_url = require_config("galaxy_url")
    common_inputs = require_config("common_inputs")
    workflow_name = require_config("workflow")
    library_folder_name = require_config("library_folder")
    sample_conf = require_config("sample")
    workflow_params_config = config.get("workflow_params", {})
    replacement_params = config.get("replacement_params", {})
    tags = config.get("tags")
    publish = config.get("publish")

    # Create galaxy object and get workflow descsription
    gi = bioblend.galaxy.GalaxyInstance(url=galaxy_url, key=galaxy_api_key)
    wfdesc = get_workflow_description(gi, workflow_name)

    # Create History
    sample = sample_conf["name"]
    alphanum = string.ascii_lowercase + string.digits
    rand = ''.join(random.choice(alphanum) for x in range(8))
    history_name = workflow_name + " " + sample + " " + rand

    logging.info("Creating history: %s", history_name)
    history = get_or_create_history(gi, history_name)
    history_id = history["id"]
    create_history_tags(gi, history_id, tags)
    gi.histories.update_history(history_id, published=publish)

    # Upload common_inputs
    logging.info("Uploading Common Inputs")
    cm_files = require_config("common_input_files")
    for cm_key, cm_val in cm_files.items():
        fd_name = os.path.dirname(common_inputs[cm_key])
        file_type = os.path.splitext(cm_val)[-1].lstrip(".")
        logging.info("Galaxy Common Inputs folder name: %s", fd_name)
        logging.info("Common Input file name: %s", cm_val)
        upload_dataset(gi, cm_val, file_type, fd_name)

    # Find files on filesystem
    logging.info("Collecting files from filesystem")
    sample_files = sample_conf["files"]

    # Upload files to Data Library
    logging.info("Uploading sample data")
    file_type = sample_conf["filetype"]
    sample_ids = []
    for sf in sample_files:
        sample_ids.append(
            upload_dataset(gi, sf, file_type, library_folder_name)["id"])

    # Create collection list in history
    logging.info("Populating sample data in history")
    sample_data = create_dataset_or_collection_in_history(
        gi, sample, history_id, sample_ids)
    logging.info("sample collection data: %s", sample_data)

    logging.info("Preparing to invoke workflow")
    common_inputs_library_ids = {}
    for k, v in common_inputs.items():
        logging.info('''Collecting common inputs from Galaxy: "%s" "%s"''', k,
                     v)
        f = galaxy_fs.get_path(gi, v)
        logging.info('''f path: "%s"''', f)
        common_inputs_library_ids[k] = f["id"]

    logging.info('''Collecting common inputs dict: "%s"''',
                 common_inputs_library_ids)
    steps_by_label = {}
    inputs = {}
    for step in wfdesc["steps"].values():
        label = step.get("label")
        logging.info('''Step label is: "%s"''', label)
        uuid = step.get("uuid")
        steps_by_label[label] = step
        if label in common_inputs_library_ids:
            lib_id = common_inputs_library_ids[label]
            inputs[uuid] = {
                "id": lib_id,
                "src": "ld",
            }

        if label == "INPUT":
            inputs[uuid] = {
                "id": sample_data["id"],
                "src": sample_data["src"],
            }

    params = {}
    if workflow_params_config:
        for step_label, step_params in workflow_params_config.items():
            if step_label not in steps_by_label:
                fail('configuring workflow params, missing step with label {}',
                     step_label)
            step_id = steps_by_label[step_label]['id']
            if steps_by_label[step_label]['type'] == 'subworkflow':
                sub_dict = {}
                for sub_label, sub_params in step_params.items():
                    sub_wfdesc = get_workflow_description(
                        gi, steps_by_label[step_label]['name'])
                    sub_wf_step = [
                        v for k, v in sub_wfdesc['steps'].items()
                        if v['label'] == sub_label
                    ][0]
                    param_key, param_value = list(sub_params.items())[0]
                    sub_id = "|".join([str(sub_wf_step['id']), param_key])
                    sub_dict[sub_id] = param_value
                params[step_id] = sub_dict
            else:
                step_dict = {}
                for step_k, step_v in step_params.items():
                    step_dict[step_k] = step_v

                params[step_id] = step_dict

    # Replacement params
    replace_dict = {}
    for k, v in replacement_params.items():
        logging.info("Collecting Replacement params: %s %s", k, v)
        replace_dict[k] = v

    # Invoke workflow
    logging.info("Invoking workflow")
    workflow_id = wfdesc["uuid"]
    logging.info("Replacement params: %s", replace_dict)
    logging.info("Workflow params: %s", params)
    logging.info("Inputs: %s", inputs)

    res = gi.workflows.invoke_workflow(workflow_id,
                                       inputs,
                                       history_id=history_id,
                                       params=params,
                                       import_inputs_to_history=False,
                                       replacement_params=replace_dict)
    print(json.dumps(res, indent=2))