Ejemplo n.º 1
0
def transfer_inputs(dataset_ids, results_ids, from_storage, to_storage):
    tantalus_api = TantalusApi()

    for dataset_id in dataset_ids:
        transfer_dataset(tantalus_api, dataset_id, 'sequencedataset', from_storage, to_storage)

    for results_id in results_ids:
        transfer_dataset(tantalus_api, results_id, 'resultsdataset', from_storage, to_storage)
Ejemplo n.º 2
0
def download_datasets(results_type,
                      from_storage_name,
                      to_storage_name,
                      dataset_id=None,
                      jira_ticket=None):
    ''' Download a set of datasets by type.
    '''

    tantalus_api = TantalusApi()

    if dataset_id is not None:
        datasets = tantalus_api.list('results', id=dataset_id)
    elif jira_ticket is not None:
        datasets = tantalus_api.list('results',
                                     results_type=results_type,
                                     analysis__jira_ticket=jira_ticket)
    else:
        datasets = tantalus_api.list('results', results_type=results_type)

    dataset_ids = list()
    for dataset in datasets:
        dataset_ids.append(dataset['id'])

    # Download most recent first
    dataset_ids = reversed(sorted(dataset_ids))

    failed = False
    for dataset_id in dataset_ids:
        try:
            transfer_dataset(tantalus_api, dataset_id, 'resultsdataset',
                             from_storage_name, to_storage_name)
        except:
            logging.exception(f'failed to download {dataset_id}')
            failed = True

    if failed:
        raise Exception('one or more downloads failed')
Ejemplo n.º 3
0
def add_generic_results(filepaths,
                        storage_name,
                        results_name,
                        results_type,
                        results_version,
                        sample_ids=(),
                        library_ids=(),
                        analysis_pk=None,
                        recursive=False,
                        tag_name=None,
                        update=False,
                        remote_storage_name=None):

    tantalus_api = TantalusApi()

    sample_pks = []
    for sample_id in sample_ids:
        samples = tantalus_api.get(
            "sample",
            sample_id=sample_id,
        )
        sample_pks.append(samples['id'])

    library_pks = []
    for library_id in library_ids:
        librarys = tantalus_api.get(
            "dna_library",
            library_id=library_id,
        )
        library_pks.append(librarys['id'])

    #Add the file resource to tantalus
    file_resource_pks = []
    for filepath in filepaths:
        if recursive:
            logging.info("Recursing directory {}".format(filepath))
            add_filepaths = []
            for (dirpath, dirnames, filenames) in os.walk(filepath):
                for filename in filenames:
                    add_filepaths.append(os.path.join(dirpath, filename))

        else:
            add_filepaths = [filepath]

        for add_filepath in add_filepaths:
            logging.info(
                "Adding file resource for {} to Tantalus".format(add_filepath))
            resource, instance = tantalus_api.add_file(
                storage_name=storage_name,
                filepath=add_filepath,
                update=update,
            )
            file_resource_pks.append(resource["id"])

    results_dataset_fields = dict(
        name=results_name,
        results_type=results_type,
        results_version=results_version,
        analysis=analysis_pk,
        samples=sample_pks,
        libraries=library_pks,
        file_resources=file_resource_pks,
    )

    #Add the dataset to tantalus
    try:
        results_id = tantalus_api.get(
            "results", name=results_dataset_fields["name"])["id"]
    except NotFoundError:
        results_id = None

    if update and results_id is not None:
        logging.warning("results dataset {} exists, updating".format(
            results_dataset_fields["name"]))
        results_dataset = tantalus_api.update("results",
                                              id=results_id,
                                              **results_dataset_fields)

    else:
        logging.info("creating results dataset {}".format(
            results_dataset_fields["name"]))
        results_dataset = tantalus_api.get_or_create("results",
                                                     **results_dataset_fields)

    if tag_name is not None:
        tantalus_api.tag(tag_name, resultsdataset_set=[results_id])

    logging.info("Succesfully created sequence dataset with ID {}".format(
        results_dataset["id"]))

    if remote_storage_name is not None:
        transfer_files.transfer_dataset(tantalus_api, results_dataset['id'],
                                        "resultsdataset", storage_name,
                                        remote_storage_name)

    return results_dataset