Ejemplo n.º 1
0
def build_pooled_normal_sample_by_file(pooled_normal, run_ids,
                                       preservation_types, bait_set,
                                       sample_name):
    specimen_type = "Pooled Normal"
    sample = dict()
    sample["id"] = pooled_normal.file.id
    sample["path"] = pooled_normal.file.path
    sample["file_name"] = pooled_normal.file.file_name
    metadata = init_metadata()
    metadata["sampleId"] = sample_name
    metadata["sampleName"] = sample_name
    metadata["cmoSampleName"] = sample_name
    metadata["requestId"] = sample_name
    metadata["sequencingCenter"] = "MSKCC"
    metadata["platform"] = "Illumina"
    metadata["baitSet"] = bait_set
    metadata["recipe"] = bait_set
    metadata["runId"] = run_ids
    metadata["preservation"] = preservation_types
    metadata["libraryId"] = sample_name + "_1"
    # because rgid depends on flowCellId and barcodeIndex, we will
    # spoof barcodeIndex so that pairing can work properly; see
    # build_sample in runner.operator.argos_operator.bin
    metadata["R"] = get_r_orientation(pooled_normal.file.file_name)
    metadata["barcodeIndex"] = spoof_barcode(sample["file_name"],
                                             metadata["R"])
    metadata["flowCellId"] = "PN_FCID"
    metadata["tumorOrNormal"] = "Normal"
    metadata["patientId"] = "PN_PATIENT_ID"
    metadata["specimenType"] = specimen_type
    metadata["runMode"] = ""
    metadata["sampleClass"] = ""
    sample["metadata"] = metadata
    return sample
Ejemplo n.º 2
0
def build_pooled_normal_sample_by_file(pooled_normal, run_ids, preservation_types, bait_set, sample_name):
    specimen_type = 'Pooled Normal'
    sample = dict()
    sample['id'] = pooled_normal.file.id
    sample['path'] = pooled_normal.file.path
    sample['file_name'] = pooled_normal.file.file_name
    metadata = init_metadata()
    metadata['sampleId'] = sample_name
    metadata['sampleName'] = sample_name
    metadata['cmoSampleName'] = sample_name
    metadata['requestId'] = sample_name
    metadata['sequencingCenter'] = "MSKCC"
    metadata['platform'] = "Illumina"
    metadata['baitSet'] = bait_set 
    metadata['recipe'] = bait_set
    metadata['runId'] = run_ids
    metadata['preservation'] = preservation_types
    metadata['libraryId'] = sample_name + "_1"
    # because rgid depends on flowCellId and barcodeIndex, we will
    # spoof barcodeIndex so that pairing can work properly; see
    # build_sample in runner.operator.argos_operator.bin
    metadata['R'] = get_r_orientation(pooled_normal.file.file_name)
    metadata['barcodeIndex'] = spoof_barcode(sample['file_name'], metadata['R'])
    metadata['flowCellId'] = 'PN_FCID'
    metadata['tumorOrNormal'] = 'Normal'
    metadata['patientId'] = 'PN_PATIENT_ID'
    metadata['specimenType'] = specimen_type
    metadata['runMode'] = ""
    metadata['sampleClass'] = ""
    sample['metadata'] = metadata
    return sample
Ejemplo n.º 3
0
def build_dmp_sample(dmp_bam, patient_id, bait_set, tumor_type):

    dmp_metadata = dmp_bam.metadata
    specimen_type = "DMP"
    sample_name = dmp_metadata["external_id"]
    sequencingCenter = "MSKCC"
    platform = "Illumina"
    sample = dict()
    sample["id"] = dmp_bam.file.id
    sample["path"] = dmp_bam.file.path
    sample["file_name"] = dmp_bam.file.file_name
    sample["file_type"] = dmp_bam.file.file_type
    metadata = init_metadata()
    metadata["sampleId"] = sample_name
    metadata["sampleName"] = format_sample_name(sample_name, specimen_type)
    metadata["cmoSampleName"] = metadata["sampleName"]
    metadata["requestId"] = sample_name
    metadata["sequencingCenter"] = sequencingCenter
    metadata["platform"] = platform
    metadata["baitSet"] = bait_set
    metadata["recipe"] = bait_set
    metadata["run_id"] = ""
    metadata["preservation"] = ""
    metadata["libraryId"] = sample_name + "_1"
    metadata["R"] = "Not applicable"
    # because rgid depends on flowCellId and barcodeIndex, we will
    # spoof barcodeIndex so that pairing can work properly; see
    # build_sample in runner.operator.argos_operator.bin
    metadata["barcodeIndex"] = "DMP_BARCODEIDX"
    metadata["flowCellId"] = "DMP_FCID"
    metadata["tumorOrNormal"] = tumor_type
    metadata["patientId"] = patient_id
    metadata["specimenType"] = specimen_type
    metadata["runMode"] = ""
    metadata["sampleClass"] = ""
    sample["metadata"] = metadata
    return sample
Ejemplo n.º 4
0
def build_dmp_sample(dmp_bam, patient_id, bait_set, tumor_type):
    
    dmp_metadata = dmp_bam.metadata
    specimen_type = "DMP"
    sample_name = dmp_metadata['external_id']
    sequencingCenter = "MSKCC"
    platform = "Illumina"
    sample = dict()
    sample['id'] = dmp_bam.file.id
    sample['path'] = dmp_bam.file.path
    sample['file_name'] = dmp_bam.file.file_name
    sample['file_type'] = dmp_bam.file.file_type
    metadata = init_metadata()
    metadata['sampleId'] = sample_name
    metadata['sampleName'] = format_sample_name(sample_name, specimen_type)
    metadata['cmoSampleName'] = metadata['sampleName']
    metadata['requestId'] = sample_name
    metadata['sequencingCenter'] = sequencingCenter
    metadata['platform'] = platform
    metadata['baitSet'] = bait_set
    metadata['recipe'] = bait_set
    metadata['run_id'] = ""
    metadata['preservation'] = ""
    metadata['libraryId'] = sample_name + "_1"
    metadata['R'] = 'Not applicable'
    # because rgid depends on flowCellId and barcodeIndex, we will
    # spoof barcodeIndex so that pairing can work properly; see
    # build_sample in runner.operator.argos_operator.bin
    metadata['barcodeIndex'] = 'DMP_BARCODEIDX'
    metadata['flowCellId'] = 'DMP_FCID'
    metadata['tumorOrNormal'] = tumor_type
    metadata['patientId'] = patient_id
    metadata['specimenType'] = specimen_type
    metadata['runMode'] = ""
    metadata['sampleClass'] = ""
    sample['metadata'] = metadata
    return sample