コード例 #1
0
ファイル: amplitest.py プロジェクト: dridk/sanger_amplitest
def amplitude_from_abif(filename):
	''' f
	Return amplitude dictionnary for each base. 
	filename : sanger data abif format 
	seq      : compute amplitude only for a sub sequence 
	''' 
	abif   = Trace(filename)
	ampl   = abif.get_data("P1AM1")
	output = []

	index  = 0 
	end    = len(abif.seq)

	while index < end : 
		base  = abif.seq[index]
		output.append((base,abif.get_data("P1AM1")[index]))
		index+=1

	return output
コード例 #2
0
def amplitude_from_abif(filename):
    ''' f
	Return amplitude dictionnary for each base. 
	filename : sanger data abif format 
	seq      : compute amplitude only for a sub sequence 
	'''
    abif = Trace(filename)
    ampl = abif.get_data("P1AM1")
    output = []

    index = 0
    end = len(abif.seq)

    while index < end:
        base = abif.seq[index]
        output.append((base, abif.get_data("P1AM1")[index]))
        index += 1

    return output
コード例 #3
0
ファイル: basecall.py プロジェクト: steves880106/sangerseq
    def __init__(self,filename,ratio,q_set):

        traceObject = Trace(filename)
        self.ratio = float(ratio)
        self.q_set = int(q_set)
        self.primarySeq = []
        self.secondarySeq = []
        self.trim5 = 0
        self.trim3 = 0

        self.order = list( traceObject.get_data('FWO_1') ) # The Order of the lightwave of Nucleotides

        self.primary_basecalls    = traceObject.get_data('PBAS2') # Bases called by the basecaller of machine
        self.primary_quality      = traceObject.get_data('PCON2') # Base Quality by basecaller
        self.primary_basecallPos  = np.array( traceObject.get_data('PLOC2')) # Primary Peak Location called by the basecaller


        '''Unprocessed  RAW DATA'''

        ### Analyzed Color Data
        self.traceMatrix = np.array( [ traceObject.get_data('DATA9'),
                                       traceObject.get_data('DATA10'),
                                       traceObject.get_data('DATA11'),
                                       traceObject.get_data('DATA12') ])

        #self.peakPosMatrix , self.peakAmpMatrix = self.getPeakPosAndAmp()

        correction = True
        try:
            self.primary_peakAmp      = np.array( traceObject.get_data('P1AM1')) # Primary Peak Amp called by the basecaller
            self.secondary_peakAmp    = np.array( traceObject.get_data('P2AM1'))
            self.secondary_peakBase   = traceObject.get_data('P2BA1')
        except KeyError:
            correction = False

        self.background_value = None;
        self.primarySeq , self.secondarySeq , self.trim5 , self.trim3 = self.makebasecall(self.ratio,self.q_set,correction)
コード例 #4
0
def main(trace, seq, gene2locus):
    '''
    tracefile, seq, make sure trace and seq correspend to each other correctly
    return heterosite{}
    '''
    heteroSite = {}
    gft2base = {}
    st = Trace(trace)
    startBase = SKIP_BASE_NUM
    endBase = len(st.seq) - SKIP_BASE_NUM
    st.basepos = st.get_data('PLOC1')
    st.basecalls = st.seq
    st.tracesamps = {'G': (), 'A': (), 'T': (), 'C': ()}
    st.tracesamps['G'], st.tracesamps['A'], st.tracesamps['T'], st.tracesamps[
        'C'] = st.get_data('DATA9'), st.get_data('DATA10'), st.get_data(
            'DATA11'), st.get_data('DATA12')
    #detect hetero base
    i = 0
    for basePos in st.basepos[startBase:endBase]:
        #base posGATC
        i += 1
        tracePos = [
            st.tracesamps['G'][basePos], st.tracesamps['A'][basePos],
            st.tracesamps['T'][basePos], st.tracesamps['C'][basePos]
        ]
        if max(tracePos) < TRACE_THRESHOLD:
            continue
        stdMn = calSimilty(
            sorted(tracePos, reverse=True)[:2]
        )  #calculate the first two largest trace value's similarity by formula: std / mean
        if stdMn > SIMILTY_THRESHOLD:
            continue
        altBase = baseOrderDict[getSecondLarge(tracePos)]
        heteroPos = startBase + i
        if heteroPos not in heteroSite:
            heteroSite[heteroPos] = [st.basecalls[startBase + i - 1], altBase]
        else:
            pass
        if args.debug.lower() == 'y':
            print(
                '{orgbasePos}\t{pos}\t{base}\t{G}\t{A}\t{T}\t{C}\t{stdMn}\t{altBase}'
                .format(base=st.basecalls[startBase + i - 1],
                        G=st.tracesamps['G'][basePos],
                        A=st.tracesamps['A'][basePos],
                        T=st.tracesamps['T'][basePos],
                        C=st.tracesamps['C'][basePos],
                        stdMn=stdMn,
                        pos=startBase + i,
                        orgbasePos=basePos,
                        altBase=altBase))

    #run blastn
    cmd = 'blastn -query "{0}" -db {1}   -evalue 0.01 -outfmt  "6 qseqid sseqid qstart qend sstart send qseq sseq btop"   -num_threads 8 -task megablast -max_target_seqs 1'.format(
        seq, refSeq)
    logging.debug(cmd)
    blastRes = os.popen(cmd).read()
    #extract base content
    qseqid, sseqid, qstart, qend, sstart, send, qseq, sseq, btop = blastRes.rstrip(
    ).split('\t')
    assert sseqid in gene2locus, '{0} not valid gene id'.format(sseqid)
    for gftid, cord in gene2locus[sseqid].items():
        if (int(sstart) <= int(cord) <= int(send)):
            #hit within range
            pos = int(qstart) + int(cord) - int(sstart)
            if pos in heteroSite:  #encounter a hetero site
                gft2base[gftid] = ''.join(heteroSite[pos])
            else:
                gft2base[gftid] = '{0}{0}'.format(qseq[int(cord) -
                                                       int(sstart) - 1])
        else:
            #hit W/O range
            pass
    return gft2base