def get_regular_sample(self, sample_data, tumor_type): legacy_fg = Q(file__file_group=FileGroup.objects.get( slug="fero-legacy-data")) data_files = FileRepository.filter(queryset=self.files, q=legacy_fg) sample_id = sample_data["sample_id"] sample = FileRepository.filter(queryset=data_files, metadata={ "cmoSampleName": sample_id, "igocomplete": True }, filter_redact=True) if not sample: # try dmp sample if "patient_id" in sample_data: patient_id = sample_data["patient_id"] if "bait_set" in sample_data: bait_set = sample_data["bait_set"] dmp_bam_id = sample_id.replace("s_", "").replace("_", "-") data = FileRepository.filter(queryset=self.files, metadata={"external_id": dmp_bam_id}) sample = list() for i in data: s = i metadata = build_dmp_sample(i, patient_id, bait_set, tumor_type)["metadata"] s.metadata = metadata sample.append(i) return sample
def _get_request_id(self): files = FileRepository.all() request_ids = set() for run_id in self.run_ids: run = Run.objects.filter(id=run_id)[0] sample_name = run.tags['sampleNameTumor'] sample_files = FileRepository.filter(queryset=files, metadata = {'cmoSampleName': sample_name}) for f in sample_files: metadata = f.metadata if 'requestId' in metadata: request_ids.add(metadata['requestId']) request_id = "_".join(list(request_ids)) return request_id
def get_dmp_bam(patient_id, bait_set, tumor_type): """ From a patient id and bait set, get matching dmp bam normal """ file_objs = FileRepository.all() dmp_query = build_dmp_query(patient_id, bait_set) dmp_bam = FileRepository.filter(queryset=file_objs, q=dmp_query).order_by('file__file_name').first() if dmp_bam: sample = build_dmp_sample(dmp_bam, patient_id, bait_set, tumor_type) built_sample = build_sample([sample], ignore_sample_formatting=True) return built_sample return None
def _get_samples_data(self): files = FileRepository.all() f = FileRepository.filter(queryset=files, metadata={ "cmoSampleName": self.tumor_sample_name, "igocomplete": True }, filter_redact=True) sample = None if f: # retrieve metadata from first record (should only be one) meta = f[0].metadata sample_id = meta["sampleId"] sample = SampleData(sample_id) return sample
def get_pooled_normal_files(run_ids, preservation_types, bait_set): pooled_normals = FileRepository.all() query = Q(file__file_group=settings.POOLED_NORMAL_FILE_GROUP) run_id_query = build_run_id_query(run_ids) preservation_query = build_preservation_query(preservation_types) q = query & run_id_query & preservation_query pooled_normals = FileRepository.filter(queryset=pooled_normals, q=q) pooled_normals, descriptor, sample_name = get_descriptor(bait_set, pooled_normals, preservation_types, run_ids) return pooled_normals, descriptor, sample_name
def generate_sample_data_content(files, pipeline_name, pipeline_github, pipeline_version): result = "SAMPLE_ID\tREQUEST_ID\tPROJECT_ID\tPATIENT_ID\tCOLLAB_ID\tSAMPLE_TYPE\tGENE_PANEL\tONCOTREE_CODE\tSAMPLE_CLASS\tSPECIMEN_PRESERVATION_TYPE\tSEX\tTISSUE_SITE\tIGO_ID\tPIPELINE\tPIPELINE_GITHUB_LINK\tPIPELINE_VERSION\n" ret_str = 'metadata__sampleId' query = Q(file__file_group_id=settings.IMPORT_FILE_GROUP) query |= Q(file__file_group__slug="origin-unknown") query = query & Q(file__path__in=files) samples = FileRepository.filter( q=query).order_by(ret_str).distinct(ret_str).all() for sample in samples: metadata = sample.metadata result += '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format( metadata.get( 'cmoSampleName', format_sample_name(metadata['sampleName'], metadata['specimenType'])), metadata['requestId'], get_project_id(metadata['requestId']), metadata['patientId'], metadata['investigatorSampleId'], MetadataValidator.clean_value(metadata['sampleClass']), MetadataValidator.clean_value(metadata['recipe']), MetadataValidator.clean_value(metadata['oncoTreeCode']), MetadataValidator.clean_value(metadata['specimenType']), MetadataValidator.clean_value(metadata['preservation']), MetadataValidator.clean_value(metadata['sex']), MetadataValidator.clean_value(metadata['tissueLocation']), metadata['sampleId'], pipeline_name, pipeline_github, pipeline_version, ) return result
def get_samples_from_patient_id(patient_id): """ Retrieves samples from the database based on the patient_id """ files = FileRepository.filter(metadata={"patientId": patient_id}, filter_redact=True) data = list() for current_file in files: sample = dict() sample['id'] = current_file.file.id sample['path'] = current_file.file.path sample['file_name'] = current_file.file.file_name sample['metadata'] = current_file.metadata data.append(sample) samples = list() # group by igoId igo_id_group = dict() for sample in data: igo_id = sample['metadata']['sampleId'] if igo_id not in igo_id_group: igo_id_group[igo_id] = list() igo_id_group[igo_id].append(sample) for igo_id in igo_id_group: samples.append(build_sample(igo_id_group[igo_id])) samples, bad_samples = remove_with_caveats(samples) number_of_bad_samples = len(bad_samples) if number_of_bad_samples > 0: LOGGER.warning( 'Some samples for patient query %s have invalid %i values', patient_id, number_of_bad_samples) return samples
def __init__( self, model, job_group_id=None, job_group_notifier_id=None, request_id=None, run_ids=[], pipeline=None, pairing=None, output_directory_prefix=None, ): if not isinstance(model, OperatorModel): raise Exception( "Must pass an instance of beagle_etl.models.Operator") self.model = model self.request_id = request_id self.job_group_id = job_group_id self.job_group_notifier_id = job_group_notifier_id self.run_ids = run_ids self.files = FileRepository.all() self.pairing = pairing # {"pairs": [{"tumor": "tumorSampleName", "normal": "normalSampleName"}]} self.output_directory_prefix = output_directory_prefix self._jobs = [] self._pipeline = pipeline
def populate_missing_info_in_notifier(apps, _): JobGroupNotifier = apps.get_model("notifier", "JobGroupNotifier") job_group_notifiers = JobGroupNotifier.objects.all() for jgn in job_group_notifiers: if jgn.jira_id and jgn.startswith("VADEV-"): project = jgn.jira_id.split("-")[0] jira_client = JiraClient(url=settings.JIRA_URL, username=settings.JIRA_USERNAME, password=settings.JIRA_PASSWORD, project=project) print("Populating status for ticket %s" % jgn.jira_id) jira_ticket = jira_client.get_ticket(jgn.jira_id).json() request_id = jira_ticket.get("fields", {}).get("summary") if request_id: file_obj = FileRepository.filter(metadata={ "requestId": request_id }).first() if file_obj: jgn.request_id = request_id jgn.PI = file_obj.metadata["labHeadName"] jgn.investigator = file_obj.metadata["investigatorName"] jgn.assay = file_obj.metadata["recipe"] jgn.save(update_fields=("request_id", "PI", "investigator", "assay")) else: print("Metadata can't be found")
def get_jobs(self): files = FileRepository.filter(queryset=self.files, metadata={ "requestId": self.request_id, "igocomplete": True }) data = [{ "id": f.file.id, "path": f.file.path, "file_name": f.file.file_name, "metadata": f.metadata } for f in files] sample_inputs = construct_sample_inputs(data) number_of_inputs = len(sample_inputs) return [(RunCreator( **{ "name": "ACCESS M1: %s, %i of %i" % (self.request_id, i + 1, number_of_inputs), "app": self.get_pipeline_id(), "inputs": job, "tags": { "requestId": self.request_id }, }), ) for i, job in enumerate(sample_inputs)]
def get_jobs(self): files = FileRepository.filter(queryset=self.files, metadata={ 'requestId': self.request_id, 'igocomplete': True }) data = [{ "id": f.file.id, "path": f.file.path, "file_name": f.file.file_name, "metadata": f.metadata } for f in files] inputs = construct_inputs(data, self.request_id) number_of_inputs = len(inputs) return [(APIRunCreateSerializer( data={ 'name': "LEGACY FASTQ Merge: %s, %i of %i" % (self.request_id, i + 1, number_of_inputs), 'app': self.get_pipeline_id(), 'output_metadata': { key: metadata[key] for key in METADATA_OUTPUT_FIELDS if key in metadata }, 'inputs': job, 'tags': { 'requestId': self.request_id, 'sampleId': metadata["sampleId"] } }), job) for i, (job, metadata) in enumerate(inputs)]
def get_jobs(self): files = FileRepository.filter(queryset=self.files, metadata={'requestId': self.request_id, 'igocomplete': True}) data = [ { "id": f.file.id, "path": f.file.path, "file_name": f.file.file_name, "metadata": f.metadata } for f in files ] sample_inputs = construct_sample_inputs(data) number_of_inputs = len(sample_inputs) return [ ( APIRunCreateSerializer( data={ 'name': "ACCESS M1: %s, %i of %i" % (self.request_id, i + 1, number_of_inputs), 'app': self.get_pipeline_id(), 'inputs': job, 'tags': {'requestId': self.request_id}} ), job ) for i, job in enumerate(sample_inputs) ]
def __init__(self, dmp_file): self.files = FileRepository.all() self.dmp_file = dmp_file self.bam_path = dmp_file.file.path self.metadata = dmp_file.metadata self.mutations_extended = self._set_data_muts_txt() self.dmp_sample_name = self._set_dmp_sample_name()
def get_samples_from_patient_id(patient_id): files = FileRepository.filter(metadata={"patientId": patient_id}) data = list() for f in files: sample = dict() sample["id"] = f.file.id sample["path"] = f.file.path sample["file_name"] = f.file.file_name sample["metadata"] = f.metadata data.append(sample) samples = list() # group by igoId igo_id_group = dict() for sample in data: igo_id = sample["metadata"]["sampleId"] if igo_id not in igo_id_group: igo_id_group[igo_id] = list() igo_id_group[igo_id].append(sample) for igo_id in igo_id_group: samples.append(build_sample(igo_id_group[igo_id])) samples, bad_samples = remove_with_caveats(samples) if len(bad_samples) > 0: logger.warning( "Some samples for patient query %s have invalid %i values" % (patient_id, len(bad_samples))) return samples
def __init__(self, sample_id): self.files = FileRepository.all() self.sample_id = sample_id self.patient_id, self.cmo_sample_name = self._get_sample_metadata() self.dmp_patient_id = self._get_dmp_patient_id() self.dmp_bams_tumor = self._find_dmp_bams("T") self.dmp_bams_normal = self._find_dmp_bams("N")
def get_dmp_normal(patient_id, bait_set): """ From a patient id and bait set, get matching dmp bam normal """ file_objs = FileRepository.all() dmp_query = build_dmp_query(patient_id, bait_set) dmp_bam = FileRepository.filter( queryset=file_objs, q=dmp_query).order_by("file__file_name").first() if dmp_bam: dmp_metadata = dmp_bam.metadata specimen_type = "DMP Normal" sample_name = dmp_metadata["external_id"] sequencingCenter = "MSKCC" platform = "Illumina" sample = dict() sample["id"] = dmp_bam.file.id sample["path"] = dmp_bam.file.path sample["file_name"] = dmp_bam.file.file_name sample["file_type"] = dmp_bam.file.file_type metadata = init_metadata() metadata["sampleId"] = sample_name metadata["sampleName"] = format_sample_name(sample_name, specimen_type) metadata["requestId"] = sample_name metadata["sequencingCenter"] = sequencingCenter metadata["platform"] = platform metadata["baitSet"] = bait_set metadata["recipe"] = bait_set metadata["run_id"] = "" metadata["preservation"] = "" metadata["libraryId"] = sample_name + "_1" metadata["R"] = "Not applicable" # because rgid depends on flowCellId and barcodeIndex, we will # spoof barcodeIndex so that pairing can work properly; see # build_sample in runner.operator.argos_operator.bin metadata["barcodeIndex"] = "DMP_BARCODEIDX" metadata["flowCellId"] = "DMP_FCID" metadata["tumorOrNormal"] = "Normal" metadata["patientId"] = patient_id metadata["specimenType"] = specimen_type sample["metadata"] = metadata built_sample = build_sample([sample], ignore_sample_formatting=True) return built_sample return None
def get_oncotree_codes(request_id): oncotree_dh = OncotreeDataHandler() files = FileRepository.all() oncotree_codes_tmp = set( FileRepository.filter(queryset=files, metadata={"requestId": request_id}).values_list( "metadata__oncoTreeCode", flat=True ) ) oncotree_codes = list() for val in oncotree_codes_tmp: if val: oncotree_codes.append(val) if not oncotree_codes: # hack; if there are no oncotree codes, just say it's mixed return "mixed" shared_nodes = oncotree_dh.find_shared_nodes_by_code_list(oncotree_codes) common_anc = oncotree_dh.get_highest_level_shared_node(shared_nodes) if common_anc.code.lower() == "tissue": common_anc.code = "mixed" return common_anc.code.lower()
def update(self, request, *args, **kwargs): try: port = Port.objects.get(id=kwargs.get('pk')) except Port.DoesNotExist: return Response({'details': 'Not Found'}, status=status.HTTP_404_NOT_FOUND) value = request.data if isinstance(port.schema.get('type'), dict): if port.schema.get('type').get('type') == 'array': if port.schema.get('type').get('items') != 'File': port.value = {"inputs": value.get('values')} else: input_ids = [] files = [] for val in value.get('values'): try: file = FileRepository.get(id=val) except FileNotFoundException: return Response({'details': 'Not Found'}, status=status.HTTP_404_NOT_FOUND) input_ids.append(val) file_val = self._create_file( file, port.schema.get('secondaryFiles')) files.append(file_val) port.value = {"refs": input_ids, "inputs": files} else: if port.schema.get('type') != 'File': port.value = {"inputs": value.get('values')} else: try: file = FileRepository.get(pk=value.get('values')[0]) except FileNotFoundException: return Response({'details': 'Not Found'}, status=status.HTTP_404_NOT_FOUND) port.value = { "inputs": self._create_file(file, port.schema.get('secondaryFiles')), "refs": str(file.id) } port.save() response = PortSerializer(port) return Response(response.data, status=status.HTTP_200_OK)
def get_request_pi(run_id_list): request_pis = set() files = FileRepository.all() all_request_ids = set() # reducing number of queries for run_id in run_id_list: argos_run = Run.objects.get(id=run_id) run_request_id = argos_run.tags["requestId"] all_request_ids.add(run_request_id) for request_id in all_request_ids: investigator_emails = FileRepository.filter(queryset=files, metadata={"requestId": request_id}).values_list( "metadata__investigatorEmail", flat=True ) request_pis = request_pis.union(set(investigator_emails)) request_pis_final = list() for request_pi in request_pis: if request_pi: request_pis_final.append(format_msk_id(request_pi)) return ",".join(request_pis_final)
def update(self, request, *args, **kwargs): try: port = Port.objects.get(id=kwargs.get("pk")) except Port.DoesNotExist: return Response({"details": "Not Found"}, status=status.HTTP_404_NOT_FOUND) value = request.data if isinstance(port.schema.get("type"), dict): if port.schema.get("type").get("type") == "array": if port.schema.get("type").get("items") != "File": port.value = {"inputs": value.get("values")} else: input_ids = [] files = [] for val in value.get("values"): try: file = FileRepository.get(id=val) except FileNotFoundException: return Response({"details": "Not Found"}, status=status.HTTP_404_NOT_FOUND) input_ids.append(val) file_val = self._create_file( file, port.schema.get("secondaryFiles")) files.append(file_val) port.value = {"refs": input_ids, "inputs": files} else: if port.schema.get("type") != "File": port.value = {"inputs": value.get("values")} else: try: file = FileRepository.get(pk=value.get("values")[0]) except FileNotFoundException: return Response({"details": "Not Found"}, status=status.HTTP_404_NOT_FOUND) port.value = { "inputs": self._create_file(file, port.schema.get("secondaryFiles")), "refs": str(file.id), } port.save() response = PortSerializer(port) return Response(response.data, status=status.HTTP_200_OK)
def create_or_update_file(path, request_id, file_group_id, file_type, igocomplete, data, library, run, sample, request_metadata, r, update=False, job_group_notifier=None): logger.info("Creating file %s " % path) try: file_group_obj = FileGroup.objects.get(id=file_group_id) file_type_obj = FileType.objects.filter(name=file_type).first() lims_metadata = copy.deepcopy(data) library_copy = copy.deepcopy(library) lims_metadata['requestId'] = request_id lims_metadata['igocomplete'] = igocomplete lims_metadata['R'] = r for k, v in library_copy.items(): lims_metadata[k] = v for k, v in run.items(): lims_metadata[k] = v for k, v in request_metadata.items(): lims_metadata[k] = v metadata = format_metadata(lims_metadata) # validator = MetadataValidator(METADATA_SCHEMA) except Exception as e: logger.error("Failed to parse metadata for file %s path" % path) raise FailedToFetchSampleException("Failed to create file %s. Error %s" % (path, str(e))) try: logger.info(lims_metadata) # validator.validate(metadata) except MetadataValidationException as e: logger.error("Failed to create file %s. Error %s" % (path, str(e))) raise FailedToFetchSampleException("Failed to create file %s. Error %s" % (path, str(e))) else: f = FileRepository.filter(path=path).first() if not f: create_file_object(path, file_group_obj, lims_metadata, metadata, file_type_obj, sample) if update: message = "File registered: %s" % path update = RedeliveryUpdateEvent(job_group_notifier, message).to_dict() send_notification.delay(update) else: if update: before = f.file.filemetadata_set.order_by('-created_date').count() update_file_object(f.file, path, metadata) after = f.file.filemetadata_set.order_by('-created_date').count() if after != before: all_metadata = f.file.filemetadata_set.order_by('-created_date') ddiff = DeepDiff(all_metadata[1].metadata, all_metadata[0].metadata, ignore_order=True) diff_file_name = "%s_metadata_update.json" % f.file.file_name message = "Updating file metadata: %s, details in file %s\n" % (path, diff_file_name) update = RedeliveryUpdateEvent(job_group_notifier, message).to_dict() diff_details_event = LocalStoreFileEvent(job_group_notifier, diff_file_name, str(ddiff)).to_dict() send_notification.delay(update) send_notification.delay(diff_details_event) else: raise FailedToFetchSampleException("File %s already exist with id %s" % (path, str(f.id)))
def get_descriptor(bait_set, pooled_normals, preservation_types, run_ids): """ Need descriptor to match pooled normal "recipe", which might need to be re-labeled as bait_set Adding correction for IMPACT505 pooled normals """ query = Q(file__file_group=settings.POOLED_NORMAL_FILE_GROUP) sample_name = None descriptor = None for pooled_normal in pooled_normals: bset_data = pooled_normal.metadata['recipe'] if bset_data.lower() in bait_set.lower(): descriptor = bset_data if descriptor: # From returned pooled normals, we found the bait set/recipe we're looking for pooled_normals = FileRepository.filter(queryset=pooled_normals, metadata={'recipe': descriptor}) # sample_name is FROZENPOOLEDNORMAL unless FFPE is in any of the preservation types # in preservation_types preservations_lower_case = set([x.lower() for x in preservation_types]) run_ids_suffix_list = [i for i in run_ids if i] # remove empty or false string values run_ids_suffix = "_".join(set(run_ids_suffix_list)) sample_name = "FROZENPOOLEDNORMAL_" + run_ids_suffix if "ffpe" in preservations_lower_case: sample_name = "FFPEPOOLEDNORMAL_" + run_ids_suffix elif "impact505" in bait_set.lower(): # We didn't find a pooled normal for IMPACT505; return "static" FROZEN or FFPE pool normal descriptor = "IMPACT505" preservations_lower_case = set([x.lower() for x in preservation_types]) sample_name = "FROZENPOOLEDNORMAL_IMPACT505_V1" if "ffpe" in preservations_lower_case: sample_name = "FFPEPOOLEDNORMAL_IMPACT505_V1" q = query & Q(metadata__sampleName=sample_name) pooled_normals = FileRepository.filter(queryset=pooled_normals, q=q) if not pooled_normals: LOGGER.error("Could not find IMPACT505 pooled normal to pair %s", sample_name) return pooled_normals, descriptor, sample_name
def list(self, request, *args, **kwargs): request_id = request.query_params.get("project_id") if not request_id: return Response(status=status.HTTP_404_NOT_FOUND) sample_ids = list( FileRepository.filter(metadata={ "requestId": request_id }, values_metadata="sampleId").all()) samples = Sample.objects.filter(sample_id__in=sample_ids) response = FullSampleSerializer(samples, many=True) return Response(response.data, status=status.HTTP_200_OK)
def get_files_for_pairs(self): all_files = [] cnt_tumors = 0 for pair in self.pairing.get('pairs'): tumors = FileRepository.filter(queryset=self.files, metadata={ 'cmoSampleName': pair['tumor'], 'igocomplete': True }, filter_redact=True) cnt_tumors += len(tumors) normals = FileRepository.filter(queryset=self.files, metadata={ 'cmoSampleName': pair['normal'], 'igocomplete': True }, filter_redact=True) if not normals and cnt_tumors > 0: # get from DMP bams patient_id = tumors[0].metadata['patientId'] bait_set = tumors[0].metadata['baitSet'] dmp_bam_id = pair['normal'] dmp_bam_id = dmp_bam_id.replace('s_', '').replace('_', '-') data = FileRepository.filter( queryset=self.files, metadata={'external_id': dmp_bam_id}) normals = list() for i in data: sample = i metadata = build_dmp_sample(i, patient_id, bait_set)['metadata'] sample.metadata = metadata normals.append(sample) for file in list(tumors): if file not in all_files: all_files.append(file) for file in list(normals): if file not in all_files: all_files.append(file) return all_files, cnt_tumors
def _get_muts(self, data_id): # There should only be one mutations file returned here, one per dmp sample query_results = FileRepository.filter( queryset=self.files, metadata={'dmp_link_id': data_id}) results = list() if query_results: for item in query_results: results.append(item.file.path) if len(results) > 1: LOGGER.error("More than one mutations file found for %s", data_id) if results: return results[0] return results
def get_samples_from_patient_id(patient_id): """ Retrieves samples from the database based on the patient_id Only retrieve patients from LIMS file group """ all_files = FileRepository.all() q_pid = Q(metadata__patientId=patient_id) q_fg = build_argos_file_groups_query() q = q_pid & q_fg files = FileRepository.filter(queryset=all_files, q=q, filter_redact=True) data = list() for current_file in files: sample = dict() sample["id"] = current_file.file.id sample["path"] = current_file.file.path sample["file_name"] = current_file.file.file_name sample["metadata"] = current_file.metadata data.append(sample) samples = list() # group by igoId igo_id_group = dict() for sample in data: igo_id = sample["metadata"]["sampleId"] if igo_id not in igo_id_group: igo_id_group[igo_id] = list() igo_id_group[igo_id].append(sample) for igo_id in igo_id_group: samples.append(build_sample(igo_id_group[igo_id])) samples, bad_samples = remove_with_caveats(samples) number_of_bad_samples = len(bad_samples) if number_of_bad_samples > 0: LOGGER.warning( "Some samples for patient query %s have invalid %i values", patient_id, number_of_bad_samples) return samples
def test_construct_inputs_obj_no_dmp_bams(self): """ Test the creation of the inputs obj with no dmp bams """ file_group_id = FileGroup.objects.get(name="DMP BAMs").pk files = FileRepository.filter(file_group=file_group_id) for single_file in files: single_file.delete() single_run = Run.objects.get(id=self.run_ids[0]) input_obj = InputsObj(single_run) input_json = input_obj._build_inputs_json() self.assertEqual(input_json["unindexed_bam_files"], []) self.assertEqual(input_json["unindexed_sample_ids"], []) self.assertEqual(input_json["unindexed_maf_files"], [])
def test_construct_inputs_obj_no_dmp_muts(self): """ Test the creation of the inputs obj with no dmp muts """ file_group_id = FileGroup.objects.get( name="DMP Data Mutations Extended").pk files = FileRepository.filter(file_group=file_group_id) for single_file in files: single_file.delete() single_run = Run.objects.get(id=self.run_ids[0]) input_obj = InputsObj(single_run) expected_input_json = self.first_run_expected_inputs input_json = input_obj._build_inputs_json() self.assertEqual(input_json["unindexed_maf_files"], [])
def _find_dmp_bams(self, tumor_type): # Retrieves dmp samples based on dmp bams file_list = list() if self.dmp_patient_id: files = FileRepository.filter(queryset=self.files, metadata={ 'patient__cmo': self.dmp_patient_id, "type": tumor_type }) if files: for f in files: file_list.append(BamData(f)) return file_list return None
def get_regular_sample(self, sample_data, tumor_type): sample_id = sample_data['sample_id'] sample = FileRepository.filter(queryset=self.files, metadata={ 'cmoSampleName': sample_id, 'igocomplete': True }, filter_redact=True) if not sample: # try dmp sample if 'patient_id' in sample_data: patient_id = sample_data['patient_id'] if 'bait_set' in sample_data: bait_set = sample_data['bait_set'] dmp_bam_id = sample_id.replace('s_', '').replace('_', '-') data = FileRepository.filter(queryset=self.files, metadata={'external_id': dmp_bam_id}) sample = list() for i in data: s = i metadata = build_dmp_sample(i, patient_id, bait_set, tumor_type)['metadata'] s.metadata = metadata sample.append(i) return sample