def generate_unique_name(cls, tantalus_api, jira, version, args, input_datasets, input_results): # Get hash of lane data based on bams from the same ticket bam_datasets = tantalus_api.list( 'sequence_dataset', analysis__jira_ticket=jira, library__library_id=args['library_id'], dataset_type='BAM', aligner__name__startswith=args['aligner'], reference_genome__name=args['ref_genome'], ) # TODO: check aligner and reference genome against bam dataset lanes_hashed = get_datasets_lanes_hash(tantalus_api, [d['id'] for d in bam_datasets]) # TODO: control aligner vocabulary elsewhere assert args['aligner'] in ('BWA_ALN', 'BWA_MEM') name = templates.SC_QC_ANALYSIS_NAME_TEMPLATE.format( analysis_type=cls.analysis_type_, aligner=args['aligner'], ref_genome=args['ref_genome'], library_id=args['library_id'], lanes_hashed=lanes_hashed, ) return name
def generate_unique_name(self, jira, version, args, input_datasets, input_results): lanes_hashed = get_datasets_lanes_hash(tantalus_api, input_datasets) name = templates.TENX_ANALYSIS_NAME_TEMPLATE.format( analysis_type="tenx", ref_genome=args['ref_genome'], library_id=args['library_id'], lanes_hashed=lanes_hashed, ) return name
def generate_unique_name(cls, tantalus_api, jira, version, args, input_datasets, input_results): lanes_hashed = get_datasets_lanes_hash(tantalus_api, input_datasets) name = templates.SC_QC_ANALYSIS_NAME_TEMPLATE.format( analysis_type=cls.analysis_type_, aligner=args['aligner'], ref_genome=args['ref_genome'], library_id=args['library_id'], lanes_hashed=lanes_hashed, ) return name
def generate_unique_name(cls, tantalus_api, jira, version, args, input_datasets, input_results): lanes_hashed = get_datasets_lanes_hash(tantalus_api, input_datasets) # TODO: control aligner vocabulary elsewhere assert args['aligner'] in ('BWA_ALN', 'BWA_MEM') name = templates.SC_QC_ANALYSIS_NAME_TEMPLATE.format( analysis_type=cls.analysis_type_, aligner=args['aligner'], ref_genome=args['ref_genome'], library_id=args['library_id'], lanes_hashed=lanes_hashed, ) return name
def create_output_datasets(self, tag_name=None, update=False): library_id = self.args["library_id"] ref_genome = self.args["ref_genome"] dna_library = tantalus_api.get("dna_library", library_id=library_id) tenx_library = colossus_api.get("tenxlibrary", name=library_id) sample_id = tenx_library["sample"]["sample_id"] sample = tantalus_api.get("sample", sample_id=sample_id) storage_name = "scrna_bams" storage_client = tantalus_api.get_storage(storage_name) sequence_lanes = self.get_lane_ids() lanes_hashed = get_datasets_lanes_hash(tantalus_api, self.analysis["input_datasets"]) bam_filepath = os.path.join(storage_client["prefix"], library_id, "bams.tar.gz") file_resource, file_instance = tantalus_api.add_file(storage_name, bam_filepath, update=True) name = "BAM-{}-SC_RNASEQ-lanes_{}-{}".format(library_id, lanes_hashed, ref_genome) sequence_dataset = tantalus_api.get_or_create( "sequence_dataset", name=name, dataset_type="BAM", sample=sample["id"], library=dna_library["id"], sequence_lanes=sequence_lanes, file_resources=[file_resource["id"]], reference_genome=self.args["ref_genome"], aligner=None, analysis=self.analysis['id'], ) log.info("Created sequence dataset {}".format(name))