Esempio n. 1
0
def fix_bams(jira_ticket=None, dry_run=False):

    tantalus_api = TantalusApi()

    analyses_list = []
    storage_name = "singlecellresults"

    if jira_ticket is not None:
        analyses_list.append(tantalus_api.get('analysis', jira_ticket=jira_ticket, analysis_type__name="align", status="complete"))
    
    else:
        # Get all completed align analyses ran with specific version
        # the bams associated to these analyses are in the wrong storage account
        for version in ('v0.5.2', 'v0.5.3'):
            analyses = tantalus_api.list('analysis', analysis_type__name="align", status="complete", version=version)
            analyses_list += [a for a in analyses]

    for analysis in analyses_list:
        jira_ticket = analysis["jira_ticket"]

        filename = f'{jira_ticket}/results/bams/metadata.yaml'

        logging.info(f'adding file {filename}')
        if not dry_run:
            file_instance, file_resource = tantalus_api.add_file(storage_name, filename)

        # get all bam datasets associated with the jira ticket
        bam_datasets = tantalus_api.list(
            "sequencedataset",
            dataset_type="BAM",
            analysis__jira_ticket=jira_ticket,
        )

        for dataset in bam_datasets:
            dataset_id = dataset['id']

            logging.info(f'adding file to dataset {dataset_id}')
            if not dry_run:
                file_resource_ids = dataset['file_resources']
                file_resource_ids = file_resource_ids.append(file_resource['id'])
                tantalus_api.update('sequencedataset', id=dataset['id'], file_resources=file_resource_ids)
Esempio n. 2
0
def create_fastq_metadata_yaml(library_id, storage_name, dry_run=False):
    """
    Create a metadata.yaml file for a all FQ datasets for a library id.
    """
    tantalus_api = TantalusApi()

    storage = tantalus_api.get_storage(storage_name)
    client = tantalus_api.get_storage_client(storage_name)

    for dataset_info, metadata in create_lane_fastq_metadata(
            tantalus_api, library_id):
        metadata_filename = os.path.join(dataset_info['base_dir'],
                                         'metadata.yaml')
        metadata_filepath = tantalus_api.get_filepath(storage_name,
                                                      metadata_filename)

        metadata_io = io.BytesIO()
        metadata_io.write(
            yaml.dump(metadata, default_flow_style=False).encode())

        logging.info(f'writing metadata to file {metadata_filepath}')
        client.write_data(metadata_filename, metadata_io)

        logging.info(f'adding {metadata_filepath} to tantalus')

        if not dry_run:
            file_resource, file_instance = tantalus_api.add_file(
                storage_name, metadata_filepath, update=True)

            for dataset_id in dataset_info['dataset_ids']:
                dataset = tantalus_api.get('sequencedataset', id=dataset_id)

                new_file_resources = set(dataset['file_resources'])
                new_file_resources.add(file_resource['id'])

                tantalus_api.update('sequencedataset',
                                    id=dataset_id,
                                    file_resources=list(new_file_resources))
Esempio n. 3
0
def add_fastq_metadata_yaml(dataset_id, storage_name, dry_run=False):
    """
    Create a metadata.yaml file for a dataset and add to tantalus.
    """
    tantalus_api = TantalusApi()

    storage = tantalus_api.get_storage(storage_name)
    client = tantalus_api.get_storage_client(storage_name)

    metadata, base_dir = create_lane_fastq_metadata(tantalus_api, dataset_id)

    metadata_filename = os.path.join(base_dir, 'metadata.yaml')
    metadata_filepath = tantalus_api.get_filepath(storage_name,
                                                  metadata_filename)

    metadata_io = io.BytesIO()
    metadata_io.write(yaml.dump(metadata, default_flow_style=False).encode())

    print(f'writing metadata to file {metadata_filepath}')
    client.write_data(metadata_filename, metadata_io)

    print(f'adding {metadata_filepath} to tantalus')

    if not dry_run:
        file_resource, file_instance = tantalus_api.add_file(storage_name,
                                                             metadata_filepath,
                                                             update=True)

        dataset = tantalus_api.get('sequencedataset', id=dataset_id)

        new_file_resources = set(dataset['file_resources'])
        new_file_resources.add(file_resource['id'])

        tantalus_api.update('sequencedataset',
                            id=dataset_id,
                            file_resources=list(new_file_resources))