Esempio n. 1
0
def test(r=1, **kw):
    S = make_dna_scoring_dict(10, -1, -8)

    seq2 = DNA.makeSequence('AAAATGCTTA' * r)
    seq1 = DNA.makeSequence('AATTTTGCTG' * r)

    t0 = time.time()
    aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, **kw)
    t = time.time() - t0
    return (len(seq1) * len(seq2)) / t

    print t
def test(r=1, **kw):
    S = make_dna_scoring_dict(10, -1, -8)

    seq2 = DNA.makeSequence("AAAATGCTTA" * r)
    seq1 = DNA.makeSequence("AATTTTGCTG" * r)

    t0 = time.time()
    aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, **kw)
    t = time.time() - t0
    return (len(seq1) * len(seq2)) / t

    print t
Esempio n. 3
0
def group_to_reference(fulldict, reference, nonref, structscore, norefseq=False):
    nogroup = []
    score_structures = ScoreStructures()
    for currstruct in nonref:
        strscore = structscore
        seqscore = 0
        bestref = ""
        #remove gaps from majority and set as seq1
        seq = fulldict[currstruct].majorityConsensus()
        seq1 = RnaSequence(''.join(seq).replace('-', ''))
        for teststruct in reference:
            holdscore = score_structures(currstruct, teststruct)
            if holdscore <= strscore:
                #remove gaps from majority and set as seq2
                seq = fulldict[teststruct].majorityConsensus()
                seq2 = RnaSequence(''.join(seq).replace('-', ''))
                #compare alignment score. subtract so lower is still better
                aln, alnscore = classic_align_pairwise(seq1, seq2, alnscores, -10, -10, False, return_score=True)
                if alnscore > seqscore:
                    strscore = holdscore
                    seqscore = alnscore
                    bestref = teststruct
        if bestref != "":
            #combine the two alignments into one alignment using reference sequence as guide
            #refseq must be ungapped to do this without realigning, hence the checks
            if norefseq:
                #realign all sequences since no refseq available
                combinedseqs = fulldict[bestref].degap().addSeqs(fulldict[currstruct].degap())
                fulldict[bestref] = align_unaligned_seqs(combinedseqs, RNA, params={"-maxiters": 2, "-diags": True})
                continue
            #if one refseq is gapless, can easily combine them without realigning
            if not fulldict[currstruct].getGappedSeq("refseq").isGapped():
                fulldict[bestref].addFromReferenceAln(fulldict[currstruct])
            elif not fulldict[bestref].getGappedSeq("refseq").isGapped():
                fulldict[currstruct].addFromReferenceAln(fulldict[bestref])
                fulldict[bestref] = fulldict[currstruct]
            else:
                #realign all sequences since both refseqs have gaps
                #hacky but it works, need to fix later
                fulldict[bestref].Names.remove("refseq")
                combinedseqs = fulldict[bestref].degap().addSeqs(fulldict[currstruct].degap())
                fulldict[bestref] = align_unaligned_seqs(combinedseqs, RNA)
                fulldict[bestref].Names.remove("refseq")
                fulldict[bestref].Names.insert(0, "refseq")
            fulldict.pop(currstruct)
        else:
            nogroup.append(currstruct)
    score_structures.end()
    return fulldict, nogroup
def test(r=1, **kw):   
    S = make_dna_scoring_dict(10, -1, -8)
    
    seq2 = DNA.makeSequence('AAAATGCTTA' * r)
    seq1 = DNA.makeSequence('AATTTTGCTG' * r)
    
    t0 = time.clock()
    try:
        # return_alignment is False in order to emphasise the quadratic part of the work.
        aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, return_alignment=False, **kw)
    except ArithmeticError:
        return '*'
    else:
        t = time.clock() - t0
        return int ( (len(seq1)*len(seq2))/t/1000 )
Esempio n. 5
0
def group_denovo(fulldict, keys, structscore, norefseq=False):
    topop = []
    score_structures = ScoreStructures()
    for pos, currstruct in enumerate(keys):
        strscore = structscore
        seqscore = 0
        bestref = ""
        #remove gaps from majority and set as seq1
        seq = fulldict[currstruct].majorityConsensus()
        seq1 = RnaSequence(''.join(seq).replace('-', ''))
        for secpos in range(pos+1, len(keys)):
            holdscore = score_structures(currstruct, keys[secpos])
            if holdscore <= strscore:
                #remove gaps from majority and set as seq2
                seq = fulldict[keys[secpos]].majorityConsensus()
                seq2 = RnaSequence(''.join(seq).replace('-', ''))
                #compare alignment score. Higher is better.
                aln, alnscore = classic_align_pairwise(seq1, seq2, alnscores, -10, -10, False, return_score=True)
                if alnscore > seqscore:
                    strscore = holdscore
                    seqscore = alnscore
                    bestref = keys[secpos]
        if bestref != "":
            if norefseq:
                #realign all sequences since no refseq available
                combinedseqs = fulldict[bestref].degap().addSeqs(fulldict[currstruct].degap())
                fulldict[bestref] = align_unaligned_seqs(combinedseqs, RNA)
                continue
            if not fulldict[currstruct].getGappedSeq("refseq").isGapped():
                fulldict[bestref].addFromReferenceAln(fulldict[currstruct])
            elif not fulldict[bestref].getGappedSeq("refseq").isGapped():
                fulldict[currstruct].addFromReferenceAln(fulldict[bestref])
                fulldict[bestref] = fulldict[currstruct]
            else:
                #realign all sequences since both refseqs have gaps
                #hacky but it works, need to fix later
                fulldict[bestref].Names.remove("refseq")
                combinedseqs = fulldict[bestref].degap().addSeqs(fulldict[currstruct].degap())
                fulldict[bestref] = align_unaligned_seqs(combinedseqs, RNA)
                fulldict[bestref].Names.remove("refseq")
                fulldict[bestref].Names.insert(0, "refseq")
            fulldict.pop(currstruct)
            topop.append(pos)
    topop.sort(reverse=True)
    for pos in topop:
        keys.pop(pos)
    score_structures.end()
    return fulldict, keys
Esempio n. 6
0
def test(r=1, **kw):
    S = make_dna_scoring_dict(10, -1, -8)

    seq2 = DNA.makeSequence('AAAATGCTTA' * r)
    seq1 = DNA.makeSequence('AATTTTGCTG' * r)

    t0 = time.clock()
    try:
        # return_alignment is False in order to emphasise the quadratic part of the work.
        aln = classic_align_pairwise(seq1,
                                     seq2,
                                     S,
                                     10,
                                     2,
                                     local=False,
                                     return_alignment=False,
                                     **kw)
    except ArithmeticError:
        return '*'
    else:
        t = time.clock() - t0
        return int((len(seq1) * len(seq2)) / t / 1000)
Esempio n. 7
0
 def _aligned_both_ways(self, seq1, seq2, **kw):
     S = make_dna_scoring_dict(10, -1, -8)
     a1 = classic_align_pairwise(seq1, seq2, S, 10, 2, **kw)
     a2 = classic_align_pairwise(seq2, seq1, S, 10, 2, **kw)
     return [a1, a2]
Esempio n. 8
0
 def _aligned_both_ways(self, seq1, seq2, **kw):
     S = make_dna_scoring_dict(10, -1, -8)
     a1 = classic_align_pairwise(seq1, seq2, S, 10, 2, **kw)
     a2 = classic_align_pairwise(seq2, seq1, S, 10, 2, **kw)
     return [a1, a2]