def test(r=1, **kw): S = make_dna_scoring_dict(10, -1, -8) seq2 = DNA.makeSequence('AAAATGCTTA' * r) seq1 = DNA.makeSequence('AATTTTGCTG' * r) t0 = time.time() aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, **kw) t = time.time() - t0 return (len(seq1) * len(seq2)) / t print t
def test(r=1, **kw): S = make_dna_scoring_dict(10, -1, -8) seq2 = DNA.makeSequence("AAAATGCTTA" * r) seq1 = DNA.makeSequence("AATTTTGCTG" * r) t0 = time.time() aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, **kw) t = time.time() - t0 return (len(seq1) * len(seq2)) / t print t
def group_to_reference(fulldict, reference, nonref, structscore, norefseq=False): nogroup = [] score_structures = ScoreStructures() for currstruct in nonref: strscore = structscore seqscore = 0 bestref = "" #remove gaps from majority and set as seq1 seq = fulldict[currstruct].majorityConsensus() seq1 = RnaSequence(''.join(seq).replace('-', '')) for teststruct in reference: holdscore = score_structures(currstruct, teststruct) if holdscore <= strscore: #remove gaps from majority and set as seq2 seq = fulldict[teststruct].majorityConsensus() seq2 = RnaSequence(''.join(seq).replace('-', '')) #compare alignment score. subtract so lower is still better aln, alnscore = classic_align_pairwise(seq1, seq2, alnscores, -10, -10, False, return_score=True) if alnscore > seqscore: strscore = holdscore seqscore = alnscore bestref = teststruct if bestref != "": #combine the two alignments into one alignment using reference sequence as guide #refseq must be ungapped to do this without realigning, hence the checks if norefseq: #realign all sequences since no refseq available combinedseqs = fulldict[bestref].degap().addSeqs(fulldict[currstruct].degap()) fulldict[bestref] = align_unaligned_seqs(combinedseqs, RNA, params={"-maxiters": 2, "-diags": True}) continue #if one refseq is gapless, can easily combine them without realigning if not fulldict[currstruct].getGappedSeq("refseq").isGapped(): fulldict[bestref].addFromReferenceAln(fulldict[currstruct]) elif not fulldict[bestref].getGappedSeq("refseq").isGapped(): fulldict[currstruct].addFromReferenceAln(fulldict[bestref]) fulldict[bestref] = fulldict[currstruct] else: #realign all sequences since both refseqs have gaps #hacky but it works, need to fix later fulldict[bestref].Names.remove("refseq") combinedseqs = fulldict[bestref].degap().addSeqs(fulldict[currstruct].degap()) fulldict[bestref] = align_unaligned_seqs(combinedseqs, RNA) fulldict[bestref].Names.remove("refseq") fulldict[bestref].Names.insert(0, "refseq") fulldict.pop(currstruct) else: nogroup.append(currstruct) score_structures.end() return fulldict, nogroup
def test(r=1, **kw): S = make_dna_scoring_dict(10, -1, -8) seq2 = DNA.makeSequence('AAAATGCTTA' * r) seq1 = DNA.makeSequence('AATTTTGCTG' * r) t0 = time.clock() try: # return_alignment is False in order to emphasise the quadratic part of the work. aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, return_alignment=False, **kw) except ArithmeticError: return '*' else: t = time.clock() - t0 return int ( (len(seq1)*len(seq2))/t/1000 )
def group_denovo(fulldict, keys, structscore, norefseq=False): topop = [] score_structures = ScoreStructures() for pos, currstruct in enumerate(keys): strscore = structscore seqscore = 0 bestref = "" #remove gaps from majority and set as seq1 seq = fulldict[currstruct].majorityConsensus() seq1 = RnaSequence(''.join(seq).replace('-', '')) for secpos in range(pos+1, len(keys)): holdscore = score_structures(currstruct, keys[secpos]) if holdscore <= strscore: #remove gaps from majority and set as seq2 seq = fulldict[keys[secpos]].majorityConsensus() seq2 = RnaSequence(''.join(seq).replace('-', '')) #compare alignment score. Higher is better. aln, alnscore = classic_align_pairwise(seq1, seq2, alnscores, -10, -10, False, return_score=True) if alnscore > seqscore: strscore = holdscore seqscore = alnscore bestref = keys[secpos] if bestref != "": if norefseq: #realign all sequences since no refseq available combinedseqs = fulldict[bestref].degap().addSeqs(fulldict[currstruct].degap()) fulldict[bestref] = align_unaligned_seqs(combinedseqs, RNA) continue if not fulldict[currstruct].getGappedSeq("refseq").isGapped(): fulldict[bestref].addFromReferenceAln(fulldict[currstruct]) elif not fulldict[bestref].getGappedSeq("refseq").isGapped(): fulldict[currstruct].addFromReferenceAln(fulldict[bestref]) fulldict[bestref] = fulldict[currstruct] else: #realign all sequences since both refseqs have gaps #hacky but it works, need to fix later fulldict[bestref].Names.remove("refseq") combinedseqs = fulldict[bestref].degap().addSeqs(fulldict[currstruct].degap()) fulldict[bestref] = align_unaligned_seqs(combinedseqs, RNA) fulldict[bestref].Names.remove("refseq") fulldict[bestref].Names.insert(0, "refseq") fulldict.pop(currstruct) topop.append(pos) topop.sort(reverse=True) for pos in topop: keys.pop(pos) score_structures.end() return fulldict, keys
def test(r=1, **kw): S = make_dna_scoring_dict(10, -1, -8) seq2 = DNA.makeSequence('AAAATGCTTA' * r) seq1 = DNA.makeSequence('AATTTTGCTG' * r) t0 = time.clock() try: # return_alignment is False in order to emphasise the quadratic part of the work. aln = classic_align_pairwise(seq1, seq2, S, 10, 2, local=False, return_alignment=False, **kw) except ArithmeticError: return '*' else: t = time.clock() - t0 return int((len(seq1) * len(seq2)) / t / 1000)
def _aligned_both_ways(self, seq1, seq2, **kw): S = make_dna_scoring_dict(10, -1, -8) a1 = classic_align_pairwise(seq1, seq2, S, 10, 2, **kw) a2 = classic_align_pairwise(seq2, seq1, S, 10, 2, **kw) return [a1, a2]