Esempio n. 1
0
def build_pooled_normal_sample_by_file(pooled_normal, run_ids,
                                       preservation_types, bait_set,
                                       sample_name):
    specimen_type = "Pooled Normal"
    sample = dict()
    sample["id"] = pooled_normal.file.id
    sample["path"] = pooled_normal.file.path
    sample["file_name"] = pooled_normal.file.file_name
    metadata = init_metadata()
    metadata["sampleId"] = sample_name
    metadata["sampleName"] = sample_name
    metadata["cmoSampleName"] = sample_name
    metadata["requestId"] = sample_name
    metadata["sequencingCenter"] = "MSKCC"
    metadata["platform"] = "Illumina"
    metadata["baitSet"] = bait_set
    metadata["recipe"] = bait_set
    metadata["runId"] = run_ids
    metadata["preservation"] = preservation_types
    metadata["libraryId"] = sample_name + "_1"
    # because rgid depends on flowCellId and barcodeIndex, we will
    # spoof barcodeIndex so that pairing can work properly; see
    # build_sample in runner.operator.argos_operator.bin
    metadata["R"] = get_r_orientation(pooled_normal.file.file_name)
    metadata["barcodeIndex"] = spoof_barcode(sample["file_name"],
                                             metadata["R"])
    metadata["flowCellId"] = "PN_FCID"
    metadata["tumorOrNormal"] = "Normal"
    metadata["patientId"] = "PN_PATIENT_ID"
    metadata["specimenType"] = specimen_type
    metadata["runMode"] = ""
    metadata["sampleClass"] = ""
    sample["metadata"] = metadata
    return sample
Esempio n. 2
0
    def _set_R(self, file_list):
        """
        From the file list, retrieve R1 and R2 fastq files

        Sets PU and bids, as well

        Uses _get_fastq_from_list() to find R2 pair.
        """
        r1s = list()
        r2s = list()
        for i in file_list:
            f = i.file
            r = get_r_orientation(f.path)
            if r == "R1":
                r1s.append(f)
            if r == "R2":
                r2s.append(f)
        for f in r1s:
            self.r1.append(f.path)
            fastq1 = f.path
            expected_r2 = 'R2'.join(fastq1.rsplit('R1', 1))
            fastq2 = self._get_fastq_from_list(expected_r2, r2s)
            if fastq2:
                self.r2.append(fastq2.path)
            else:
                print("No fastq R2 found for %s" % f.path)
                self.paired = False
Esempio n. 3
0
def build_pooled_normal_sample_by_file(pooled_normal, run_ids, preservation_types, bait_set, sample_name):
    specimen_type = 'Pooled Normal'
    sample = dict()
    sample['id'] = pooled_normal.file.id
    sample['path'] = pooled_normal.file.path
    sample['file_name'] = pooled_normal.file.file_name
    metadata = init_metadata()
    metadata['sampleId'] = sample_name
    metadata['sampleName'] = sample_name
    metadata['cmoSampleName'] = sample_name
    metadata['requestId'] = sample_name
    metadata['sequencingCenter'] = "MSKCC"
    metadata['platform'] = "Illumina"
    metadata['baitSet'] = bait_set 
    metadata['recipe'] = bait_set
    metadata['runId'] = run_ids
    metadata['preservation'] = preservation_types
    metadata['libraryId'] = sample_name + "_1"
    # because rgid depends on flowCellId and barcodeIndex, we will
    # spoof barcodeIndex so that pairing can work properly; see
    # build_sample in runner.operator.argos_operator.bin
    metadata['R'] = get_r_orientation(pooled_normal.file.file_name)
    metadata['barcodeIndex'] = spoof_barcode(sample['file_name'], metadata['R'])
    metadata['flowCellId'] = 'PN_FCID'
    metadata['tumorOrNormal'] = 'Normal'
    metadata['patientId'] = 'PN_PATIENT_ID'
    metadata['specimenType'] = specimen_type
    metadata['runMode'] = ""
    metadata['sampleClass'] = ""
    sample['metadata'] = metadata
    return sample
Esempio n. 4
0
    def _set_pu(self):
        """
        Creating a list of PU values; used by argos pipeline as scatter input

        Only iterating across r1s since r1 and r2 should have the same metadata
        """
        pu = list()
        for f in self.r1:
            metadata = get_file(f).metadata
            if 'poolednormal' in self.sample_name.lower():
                flowcell_id = 'PN_FCID'
                r = get_r_orientation(f)
                barcode_index = spoof_barcode(os.path.basename(f), r)
            else:
                flowcell_id = metadata['flowCellId']
                barcode_index = metadata['barcodeIndex']
            platform_unit = flowcell_id
            if barcode_index:
                platform_unit = '_'.join([flowcell_id, barcode_index])
            pu.append(platform_unit)
        return pu