def build_pooled_normal_sample_by_file(pooled_normal, run_ids, preservation_types, bait_set, sample_name): specimen_type = "Pooled Normal" sample = dict() sample["id"] = pooled_normal.file.id sample["path"] = pooled_normal.file.path sample["file_name"] = pooled_normal.file.file_name metadata = init_metadata() metadata["sampleId"] = sample_name metadata["sampleName"] = sample_name metadata["cmoSampleName"] = sample_name metadata["requestId"] = sample_name metadata["sequencingCenter"] = "MSKCC" metadata["platform"] = "Illumina" metadata["baitSet"] = bait_set metadata["recipe"] = bait_set metadata["runId"] = run_ids metadata["preservation"] = preservation_types metadata["libraryId"] = sample_name + "_1" # because rgid depends on flowCellId and barcodeIndex, we will # spoof barcodeIndex so that pairing can work properly; see # build_sample in runner.operator.argos_operator.bin metadata["R"] = get_r_orientation(pooled_normal.file.file_name) metadata["barcodeIndex"] = spoof_barcode(sample["file_name"], metadata["R"]) metadata["flowCellId"] = "PN_FCID" metadata["tumorOrNormal"] = "Normal" metadata["patientId"] = "PN_PATIENT_ID" metadata["specimenType"] = specimen_type metadata["runMode"] = "" metadata["sampleClass"] = "" sample["metadata"] = metadata return sample
def _set_R(self, file_list): """ From the file list, retrieve R1 and R2 fastq files Sets PU and bids, as well Uses _get_fastq_from_list() to find R2 pair. """ r1s = list() r2s = list() for i in file_list: f = i.file r = get_r_orientation(f.path) if r == "R1": r1s.append(f) if r == "R2": r2s.append(f) for f in r1s: self.r1.append(f.path) fastq1 = f.path expected_r2 = 'R2'.join(fastq1.rsplit('R1', 1)) fastq2 = self._get_fastq_from_list(expected_r2, r2s) if fastq2: self.r2.append(fastq2.path) else: print("No fastq R2 found for %s" % f.path) self.paired = False
def build_pooled_normal_sample_by_file(pooled_normal, run_ids, preservation_types, bait_set, sample_name): specimen_type = 'Pooled Normal' sample = dict() sample['id'] = pooled_normal.file.id sample['path'] = pooled_normal.file.path sample['file_name'] = pooled_normal.file.file_name metadata = init_metadata() metadata['sampleId'] = sample_name metadata['sampleName'] = sample_name metadata['cmoSampleName'] = sample_name metadata['requestId'] = sample_name metadata['sequencingCenter'] = "MSKCC" metadata['platform'] = "Illumina" metadata['baitSet'] = bait_set metadata['recipe'] = bait_set metadata['runId'] = run_ids metadata['preservation'] = preservation_types metadata['libraryId'] = sample_name + "_1" # because rgid depends on flowCellId and barcodeIndex, we will # spoof barcodeIndex so that pairing can work properly; see # build_sample in runner.operator.argos_operator.bin metadata['R'] = get_r_orientation(pooled_normal.file.file_name) metadata['barcodeIndex'] = spoof_barcode(sample['file_name'], metadata['R']) metadata['flowCellId'] = 'PN_FCID' metadata['tumorOrNormal'] = 'Normal' metadata['patientId'] = 'PN_PATIENT_ID' metadata['specimenType'] = specimen_type metadata['runMode'] = "" metadata['sampleClass'] = "" sample['metadata'] = metadata return sample
def _set_pu(self): """ Creating a list of PU values; used by argos pipeline as scatter input Only iterating across r1s since r1 and r2 should have the same metadata """ pu = list() for f in self.r1: metadata = get_file(f).metadata if 'poolednormal' in self.sample_name.lower(): flowcell_id = 'PN_FCID' r = get_r_orientation(f) barcode_index = spoof_barcode(os.path.basename(f), r) else: flowcell_id = metadata['flowCellId'] barcode_index = metadata['barcodeIndex'] platform_unit = flowcell_id if barcode_index: platform_unit = '_'.join([flowcell_id, barcode_index]) pu.append(platform_unit) return pu