def build_pooled_normal_sample_by_file(pooled_normal, run_ids, preservation_types, bait_set, sample_name): specimen_type = "Pooled Normal" sample = dict() sample["id"] = pooled_normal.file.id sample["path"] = pooled_normal.file.path sample["file_name"] = pooled_normal.file.file_name metadata = init_metadata() metadata["sampleId"] = sample_name metadata["sampleName"] = sample_name metadata["cmoSampleName"] = sample_name metadata["requestId"] = sample_name metadata["sequencingCenter"] = "MSKCC" metadata["platform"] = "Illumina" metadata["baitSet"] = bait_set metadata["recipe"] = bait_set metadata["runId"] = run_ids metadata["preservation"] = preservation_types metadata["libraryId"] = sample_name + "_1" # because rgid depends on flowCellId and barcodeIndex, we will # spoof barcodeIndex so that pairing can work properly; see # build_sample in runner.operator.argos_operator.bin metadata["R"] = get_r_orientation(pooled_normal.file.file_name) metadata["barcodeIndex"] = spoof_barcode(sample["file_name"], metadata["R"]) metadata["flowCellId"] = "PN_FCID" metadata["tumorOrNormal"] = "Normal" metadata["patientId"] = "PN_PATIENT_ID" metadata["specimenType"] = specimen_type metadata["runMode"] = "" metadata["sampleClass"] = "" sample["metadata"] = metadata return sample
def build_pooled_normal_sample_by_file(pooled_normal, run_ids, preservation_types, bait_set, sample_name): specimen_type = 'Pooled Normal' sample = dict() sample['id'] = pooled_normal.file.id sample['path'] = pooled_normal.file.path sample['file_name'] = pooled_normal.file.file_name metadata = init_metadata() metadata['sampleId'] = sample_name metadata['sampleName'] = sample_name metadata['cmoSampleName'] = sample_name metadata['requestId'] = sample_name metadata['sequencingCenter'] = "MSKCC" metadata['platform'] = "Illumina" metadata['baitSet'] = bait_set metadata['recipe'] = bait_set metadata['runId'] = run_ids metadata['preservation'] = preservation_types metadata['libraryId'] = sample_name + "_1" # because rgid depends on flowCellId and barcodeIndex, we will # spoof barcodeIndex so that pairing can work properly; see # build_sample in runner.operator.argos_operator.bin metadata['R'] = get_r_orientation(pooled_normal.file.file_name) metadata['barcodeIndex'] = spoof_barcode(sample['file_name'], metadata['R']) metadata['flowCellId'] = 'PN_FCID' metadata['tumorOrNormal'] = 'Normal' metadata['patientId'] = 'PN_PATIENT_ID' metadata['specimenType'] = specimen_type metadata['runMode'] = "" metadata['sampleClass'] = "" sample['metadata'] = metadata return sample
def build_dmp_sample(dmp_bam, patient_id, bait_set, tumor_type): dmp_metadata = dmp_bam.metadata specimen_type = "DMP" sample_name = dmp_metadata["external_id"] sequencingCenter = "MSKCC" platform = "Illumina" sample = dict() sample["id"] = dmp_bam.file.id sample["path"] = dmp_bam.file.path sample["file_name"] = dmp_bam.file.file_name sample["file_type"] = dmp_bam.file.file_type metadata = init_metadata() metadata["sampleId"] = sample_name metadata["sampleName"] = format_sample_name(sample_name, specimen_type) metadata["cmoSampleName"] = metadata["sampleName"] metadata["requestId"] = sample_name metadata["sequencingCenter"] = sequencingCenter metadata["platform"] = platform metadata["baitSet"] = bait_set metadata["recipe"] = bait_set metadata["run_id"] = "" metadata["preservation"] = "" metadata["libraryId"] = sample_name + "_1" metadata["R"] = "Not applicable" # because rgid depends on flowCellId and barcodeIndex, we will # spoof barcodeIndex so that pairing can work properly; see # build_sample in runner.operator.argos_operator.bin metadata["barcodeIndex"] = "DMP_BARCODEIDX" metadata["flowCellId"] = "DMP_FCID" metadata["tumorOrNormal"] = tumor_type metadata["patientId"] = patient_id metadata["specimenType"] = specimen_type metadata["runMode"] = "" metadata["sampleClass"] = "" sample["metadata"] = metadata return sample
def build_dmp_sample(dmp_bam, patient_id, bait_set, tumor_type): dmp_metadata = dmp_bam.metadata specimen_type = "DMP" sample_name = dmp_metadata['external_id'] sequencingCenter = "MSKCC" platform = "Illumina" sample = dict() sample['id'] = dmp_bam.file.id sample['path'] = dmp_bam.file.path sample['file_name'] = dmp_bam.file.file_name sample['file_type'] = dmp_bam.file.file_type metadata = init_metadata() metadata['sampleId'] = sample_name metadata['sampleName'] = format_sample_name(sample_name, specimen_type) metadata['cmoSampleName'] = metadata['sampleName'] metadata['requestId'] = sample_name metadata['sequencingCenter'] = sequencingCenter metadata['platform'] = platform metadata['baitSet'] = bait_set metadata['recipe'] = bait_set metadata['run_id'] = "" metadata['preservation'] = "" metadata['libraryId'] = sample_name + "_1" metadata['R'] = 'Not applicable' # because rgid depends on flowCellId and barcodeIndex, we will # spoof barcodeIndex so that pairing can work properly; see # build_sample in runner.operator.argos_operator.bin metadata['barcodeIndex'] = 'DMP_BARCODEIDX' metadata['flowCellId'] = 'DMP_FCID' metadata['tumorOrNormal'] = tumor_type metadata['patientId'] = patient_id metadata['specimenType'] = specimen_type metadata['runMode'] = "" metadata['sampleClass'] = "" sample['metadata'] = metadata return sample