f2 = os.popen('samtools faidx ' + refname + ' ' + rname + ':' + str(pos) + '-' + str(pos + len(aligned_read) - 1)) aligned_ref = ''.join([line2.rstrip('\n') for line2 in f2][1:]) f2.close() positions = range(pos, pos + len(aligned_ref)) # reverse complement if reverse_flag: aligned_read = str(Seq(aligned_read).reverse_complement()) aligned_ref = str(Seq(aligned_ref).reverse_complement()) aligned_quality = aligned_quality[::-1] positions = positions[::-1] hs_all, hd_all = homodist(aligned_ref) # index into the read to get this position siteindex = positions.index(i) ref = aligned_ref[siteindex] if not true and not reverse_flag: true = ref call = aligned_read[siteindex] hs = hs_all[siteindex] hd = hd_all[siteindex] q = aligned_quality[siteindex] # q = ord(aligned_quality[siteindex]) - 33 if call == '-': continue
#!/bin/python # generates homopolymer pickle based on reference fasta (presumably from a single amplicon) # this script generates the homopolymer data corresponding to a fasta (only one region ">" line) import sys, pickle, re from covariatefuncs import homodist fasta = sys.argv[1] homoout = sys.argv[2] lines = open(fasta, 'r').readlines() chr, start, end = re.split('[> : -]', lines[0][1:]) fullfasta = ''.join(lines[1:]).replace('\n', '') outdict = {} keys = [chr+':'+str(x) for x in range(int(start), int(end) + 1)] for pos in range(len(fullfasta)): outdict[keys[pos]] = {'forward':homodist(fullfasta)[pos], 'reverse':homodist(fullfasta[::-1])[-pos-1]} pickle.dump(outdict, open(homoout, 'w'))
#!/bin/python # generates homopolymer pickle based on reference fasta (presumably from a single amplicon) # this script generates the homopolymer data corresponding to a fasta (only one region ">" line) import sys, pickle, re from covariatefuncs import homodist fasta = sys.argv[1] homoout = sys.argv[2] lines = open(fasta, 'r').readlines() chr, start, end = re.split('[> : -]', lines[0][1:]) fullfasta = ''.join(lines[1:]).replace('\n', '') outdict = {} keys = [chr + ':' + str(x) for x in range(int(start), int(end) + 1)] for pos in range(len(fullfasta)): outdict[keys[pos]] = { 'forward': homodist(fullfasta)[pos], 'reverse': homodist(fullfasta[::-1])[-pos - 1] } pickle.dump(outdict, open(homoout, 'w'))
aligned_ref = ''.join([line.rstrip('\n') for line in f][1:]) f.close() # reverse complement if reverse_flag: aligned_read = Seq(aligned_read).reverse_complement(); aligned_ref = Seq(aligned_ref).reverse_complement(); aligned_quality = aligned_quality[::-1]; # ignore deletions from ref (insertions were removed by cigar2align) # dels = aligned_read ~= '-'; # aligned_read = aligned_read(dels); # aligned_ref = aligned_ref(dels); # aligned_quality = aligned_quality(dels); # pos = pos(dels); # compute the error pattern variables GC = (aligned_ref.count('G') + aligned_ref.count('C'))/float(len(aligned_ref)) hs, hd = homodist(str(aligned_ref)) readlen = len(seq) for i in range(len(aligned_read)): if aligned_read[i] == '-': continue #region = rname[3:]+':'+str(pos[i])+'-'+str(pos[i]) #f = os.popen('/ifs/scratch/c2b2/ngs_lab/sz2317/bin/src/tabix-0.2.5/tabix /ifs/scratch/c2b2/ngs_lab/ngs/Projects/IonTorrent/will/xunhai/covariate_table/dbSNP132b37.vcf.gz '+region) #if f.readline() == '': out.write('\t'.join(map(str,[rname, pos[i], aligned_ref[i], aligned_read[i], readlen, int(reverse_flag), aligned_quality[i], pos[i], hs[i], hd[i], GC]))+'\n') #f.close() out.close()
aligned_read, aligned_quality = aligned_data[regionindex].split() f2 = os.popen('samtools faidx '+refname+' '+rname+':'+str(pos)+'-'+str(pos + len(aligned_read) - 1)) aligned_ref = ''.join([line2.rstrip('\n') for line2 in f2][1:]) f2.close() positions = range(pos, pos + len(aligned_ref)) # reverse complement if reverse_flag: aligned_read = str(Seq(aligned_read).reverse_complement()); aligned_ref = str(Seq(aligned_ref).reverse_complement()); aligned_quality = aligned_quality[::-1]; positions = positions[::-1] hs_all, hd_all = homodist(aligned_ref) # index into the read to get this position siteindex = positions.index(i) ref = aligned_ref[siteindex] if not true and not reverse_flag: true = ref call = aligned_read[siteindex] hs = hs_all[siteindex] hd = hd_all[siteindex] q = aligned_quality[siteindex] # q = ord(aligned_quality[siteindex]) - 33 if call == '-': continue