Exemple #1
0
        f2 = os.popen('samtools faidx ' + refname + ' ' + rname + ':' +
                      str(pos) + '-' + str(pos + len(aligned_read) - 1))
        aligned_ref = ''.join([line2.rstrip('\n') for line2 in f2][1:])
        f2.close()

        positions = range(pos, pos + len(aligned_ref))

        # reverse complement
        if reverse_flag:
            aligned_read = str(Seq(aligned_read).reverse_complement())
            aligned_ref = str(Seq(aligned_ref).reverse_complement())
            aligned_quality = aligned_quality[::-1]
            positions = positions[::-1]

        hs_all, hd_all = homodist(aligned_ref)

        # index into the read to get this position
        siteindex = positions.index(i)

        ref = aligned_ref[siteindex]
        if not true and not reverse_flag:
            true = ref
        call = aligned_read[siteindex]
        hs = hs_all[siteindex]
        hd = hd_all[siteindex]
        q = aligned_quality[siteindex]
        # q = ord(aligned_quality[siteindex]) - 33

        if call == '-':
            continue
Exemple #2
0
#!/bin/python

# generates homopolymer pickle based on reference fasta (presumably from a single amplicon)
# this script generates the homopolymer data corresponding to a fasta (only one region ">" line)

import sys, pickle, re
from covariatefuncs import homodist

fasta = sys.argv[1]
homoout = sys.argv[2]

lines = open(fasta, 'r').readlines()
chr, start, end = re.split('[> : -]', lines[0][1:])
fullfasta = ''.join(lines[1:]).replace('\n', '')

outdict = {}
keys = [chr+':'+str(x) for x in range(int(start), int(end) + 1)]
for pos in range(len(fullfasta)):
    outdict[keys[pos]] = {'forward':homodist(fullfasta)[pos], 'reverse':homodist(fullfasta[::-1])[-pos-1]}

pickle.dump(outdict, open(homoout, 'w'))
Exemple #3
0
#!/bin/python

# generates homopolymer pickle based on reference fasta (presumably from a single amplicon)
# this script generates the homopolymer data corresponding to a fasta (only one region ">" line)

import sys, pickle, re
from covariatefuncs import homodist

fasta = sys.argv[1]
homoout = sys.argv[2]

lines = open(fasta, 'r').readlines()
chr, start, end = re.split('[> : -]', lines[0][1:])
fullfasta = ''.join(lines[1:]).replace('\n', '')

outdict = {}
keys = [chr + ':' + str(x) for x in range(int(start), int(end) + 1)]
for pos in range(len(fullfasta)):
    outdict[keys[pos]] = {
        'forward': homodist(fullfasta)[pos],
        'reverse': homodist(fullfasta[::-1])[-pos - 1]
    }

pickle.dump(outdict, open(homoout, 'w'))
Exemple #4
0
    aligned_ref = ''.join([line.rstrip('\n') for line in f][1:])
    f.close()

    # reverse complement
    if reverse_flag:
        aligned_read = Seq(aligned_read).reverse_complement();
        aligned_ref = Seq(aligned_ref).reverse_complement();
        aligned_quality = aligned_quality[::-1];

    # ignore deletions from ref (insertions were removed by cigar2align)
    # dels = aligned_read ~= '-';
    # aligned_read = aligned_read(dels);
    # aligned_ref = aligned_ref(dels);
    # aligned_quality = aligned_quality(dels);
    # pos = pos(dels);
    
    # compute the error pattern variables

    GC = (aligned_ref.count('G') + aligned_ref.count('C'))/float(len(aligned_ref))
    hs, hd = homodist(str(aligned_ref))
    readlen = len(seq)
    for i in range(len(aligned_read)):
        if aligned_read[i] == '-': continue
        #region = rname[3:]+':'+str(pos[i])+'-'+str(pos[i])
        #f = os.popen('/ifs/scratch/c2b2/ngs_lab/sz2317/bin/src/tabix-0.2.5/tabix /ifs/scratch/c2b2/ngs_lab/ngs/Projects/IonTorrent/will/xunhai/covariate_table/dbSNP132b37.vcf.gz '+region)
        #if f.readline() == '':
        out.write('\t'.join(map(str,[rname, pos[i], aligned_ref[i], aligned_read[i], readlen, int(reverse_flag), aligned_quality[i], pos[i], hs[i], hd[i], GC]))+'\n')
        #f.close()

out.close()
Exemple #5
0
        aligned_read, aligned_quality = aligned_data[regionindex].split() 

        f2 = os.popen('samtools faidx '+refname+' '+rname+':'+str(pos)+'-'+str(pos + len(aligned_read) - 1))
        aligned_ref = ''.join([line2.rstrip('\n') for line2 in f2][1:])
        f2.close()

        positions = range(pos, pos + len(aligned_ref))

        # reverse complement
        if reverse_flag:
            aligned_read = str(Seq(aligned_read).reverse_complement());
            aligned_ref = str(Seq(aligned_ref).reverse_complement());
            aligned_quality = aligned_quality[::-1];
            positions = positions[::-1]

        hs_all, hd_all = homodist(aligned_ref)

        # index into the read to get this position
        siteindex = positions.index(i)

        ref = aligned_ref[siteindex]        
        if not true and not reverse_flag:
            true = ref
        call = aligned_read[siteindex]
        hs = hs_all[siteindex]
        hd = hd_all[siteindex]
        q = aligned_quality[siteindex]
        # q = ord(aligned_quality[siteindex]) - 33

        if call == '-':
            continue