def fix_bams(jira_ticket=None, dry_run=False): tantalus_api = TantalusApi() analyses_list = [] storage_name = "singlecellresults" if jira_ticket is not None: analyses_list.append(tantalus_api.get('analysis', jira_ticket=jira_ticket, analysis_type__name="align", status="complete")) else: # Get all completed align analyses ran with specific version # the bams associated to these analyses are in the wrong storage account for version in ('v0.5.2', 'v0.5.3'): analyses = tantalus_api.list('analysis', analysis_type__name="align", status="complete", version=version) analyses_list += [a for a in analyses] for analysis in analyses_list: jira_ticket = analysis["jira_ticket"] filename = f'{jira_ticket}/results/bams/metadata.yaml' logging.info(f'adding file {filename}') if not dry_run: file_instance, file_resource = tantalus_api.add_file(storage_name, filename) # get all bam datasets associated with the jira ticket bam_datasets = tantalus_api.list( "sequencedataset", dataset_type="BAM", analysis__jira_ticket=jira_ticket, ) for dataset in bam_datasets: dataset_id = dataset['id'] logging.info(f'adding file to dataset {dataset_id}') if not dry_run: file_resource_ids = dataset['file_resources'] file_resource_ids = file_resource_ids.append(file_resource['id']) tantalus_api.update('sequencedataset', id=dataset['id'], file_resources=file_resource_ids)
def create_fastq_metadata_yaml(library_id, storage_name, dry_run=False): """ Create a metadata.yaml file for a all FQ datasets for a library id. """ tantalus_api = TantalusApi() storage = tantalus_api.get_storage(storage_name) client = tantalus_api.get_storage_client(storage_name) for dataset_info, metadata in create_lane_fastq_metadata( tantalus_api, library_id): metadata_filename = os.path.join(dataset_info['base_dir'], 'metadata.yaml') metadata_filepath = tantalus_api.get_filepath(storage_name, metadata_filename) metadata_io = io.BytesIO() metadata_io.write( yaml.dump(metadata, default_flow_style=False).encode()) logging.info(f'writing metadata to file {metadata_filepath}') client.write_data(metadata_filename, metadata_io) logging.info(f'adding {metadata_filepath} to tantalus') if not dry_run: file_resource, file_instance = tantalus_api.add_file( storage_name, metadata_filepath, update=True) for dataset_id in dataset_info['dataset_ids']: dataset = tantalus_api.get('sequencedataset', id=dataset_id) new_file_resources = set(dataset['file_resources']) new_file_resources.add(file_resource['id']) tantalus_api.update('sequencedataset', id=dataset_id, file_resources=list(new_file_resources))
def add_fastq_metadata_yaml(dataset_id, storage_name, dry_run=False): """ Create a metadata.yaml file for a dataset and add to tantalus. """ tantalus_api = TantalusApi() storage = tantalus_api.get_storage(storage_name) client = tantalus_api.get_storage_client(storage_name) metadata, base_dir = create_lane_fastq_metadata(tantalus_api, dataset_id) metadata_filename = os.path.join(base_dir, 'metadata.yaml') metadata_filepath = tantalus_api.get_filepath(storage_name, metadata_filename) metadata_io = io.BytesIO() metadata_io.write(yaml.dump(metadata, default_flow_style=False).encode()) print(f'writing metadata to file {metadata_filepath}') client.write_data(metadata_filename, metadata_io) print(f'adding {metadata_filepath} to tantalus') if not dry_run: file_resource, file_instance = tantalus_api.add_file(storage_name, metadata_filepath, update=True) dataset = tantalus_api.get('sequencedataset', id=dataset_id) new_file_resources = set(dataset['file_resources']) new_file_resources.add(file_resource['id']) tantalus_api.update('sequencedataset', id=dataset_id, file_resources=list(new_file_resources))