예제 #1
0
def addContext(indelFile, indelSeqFileIns, outFile):
    seqs = loadSeqs((indelSeqFileIns,))
    with open(outFile, 'w') as fout, open(indelFile) as f:
        reader = csv.DictReader(f, delimiter='\t')
        header = reader.fieldnames
        print('\t'.join(header + ['st_seq-1', 'st_seq', 'st_seq+1',
                                  ]),
              file=fout)
        for row in reader:
            st, end = mkSnvBedFile.fixCoords(row)
            indelKeySt = ':'.join([row['chrom'], st])
            newData = list(seqs[indelKeySt])
            print('\t'.join( ([row[x] for x in header]
                              + newData)),
                  file=fout)
def annotateWithMasterVarCall(indelFile, indelCovFile,
                              indelTumorCovFile, annOut):
    indelToCov = getIndelCov(indelCovFile)
    indelToCov_tumor = getIndelCov(indelTumorCovFile)

    with open(annOut, 'w') as fout, open(indelFile) as f:
        reader = csv.DictReader(f, delimiter='\t')
        header = reader.fieldnames
        print('\t'.join(header + ['sampleCov', 'tumorSampleCov']),
              file=fout)
        for row in reader:
            st, end = mkSnvBedFile.fixCoords(row)
            indelKey = ':'.join([row['chrom'], str(st),
                                 str(end)])
            if indelKey in indelToCov and indelKey in indelToCov_tumor:
                print('\t'.join( ([row[x] for x in header]
                                  + [indelToCov[indelKey],
                                     indelToCov_tumor[indelKey]])
                             ),
                      file=fout)
            else:
                print(indelKey)
        for line in f:
            if line.split('\t')[1] != 'chromStart':
                print(line[3:].strip(), file=fout)
    
    captureDataStrict = bed_tools.load_nb222_exome_capture_driver('tmp.bed', 0)
    captureDataRelaxed = bed_tools.load_nb222_exome_capture_driver('tmp.bed', 100)
    with open(fileToAnnotate) as f, open(outFile, 'w') as fout:
        reader = csv.DictReader(f, delimiter='\t')
        print('\t'.join(reader.fieldnames) + '\t'
              + '\t'.join(['CAPTURE_STRICT_' + title,
                           'CAPTURE_RELAXED_' + title,
                           ]),
              file=fout)
        for row in reader:
            chrom = row['chrom']
            st,end = mkSnvBedFile.fixCoords(row)
            st = int(st)

            status = {}
            for captureData, label in ( (captureDataStrict, 'STRICT'),
                                        (captureDataRelaxed, 'RELAXED') ):
                captureStatusSt = bed_tools.find_location_in_bed("chr" + chrom, st, captureData[1], captureData[0])
                if captureStatusSt:
                    status[label] = 'CAP_%s_TRUE' % (label,)
                else:
                    status[label] = 'CAP_%s_FALSE' % (label,)

            print('\t'.join([row[x] for x in reader.fieldnames]
                            + [status['STRICT'],
                               status['RELAXED'],
                               ]),