def test_paralinear_for_determinant_lte_zero(self): """returns distance of None if the determinant is <= 0""" data = dict(seq1="AGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT", seq2="TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC") aln = LoadSeqs(data=data, moltype=DNA) paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln) paralinear_calc.run(show_progress=False) dists = paralinear_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is None) paralinear_calc.run(show_progress=False) dists = paralinear_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is None)
def test_paralinear_for_determinant_lte_zero(self): """returns distance of None if the determinant is <= 0""" data = dict( seq1= "AGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT", seq2= "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC") aln = LoadSeqs(data=data, moltype=DNA) paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln) paralinear_calc.run(show_progress=False) dists = paralinear_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is None) paralinear_calc.run(show_progress=False) dists = paralinear_calc.getPairwiseDistances() self.assertTrue(dists.values()[0] is None)
def test_paralinear_pair_aa(self): """paralinear shouldn't fail to produce distances for aa seqs""" aln = LoadSeqs('data/brca1_5.paml', moltype=DNA) aln = aln.getTranslation() paralinear_calc = ParalinearPair(moltype=PROTEIN, alignment=aln) paralinear_calc.run(show_progress=False) dists = paralinear_calc.getPairwiseDistances()
def get_paralinear_distances(gene, data_directory=None, third_position=False, **kw): filenames = glob.glob(os.path.join(data_directory, gene+'.fasta*')) assert len(filenames) == 1, 'Wrong number of alignment files for ' + gene filename = filenames[0] if filename.endswith('.fasta'): with open(filename) as fastafile: fastadata = fastafile.read() elif filename.endswith('.fasta.gz'): with GzipFile(filename) as fastafile: fastadata = fastafile.read() else: raise RuntimeError(gene + ' file could not be read') sequences = LoadSeqs(data=fastadata) if third_position: indices = [(i, i+1) for i in range(len(sequences))[2::3]] pos3 = sequences.addFeature('pos3', 'pos3', indices) sequences = pos3.getSlice() sequences = sequences.filtered(lambda x: set(''.join(x)) <= set(DNA)) paralinear_calc = ParalinearPair(moltype=DNA, alignment=sequences) paralinear_calc.run(show_progress=False) dists = paralinear_calc.getPairwiseDistances() return {frozenset(k):v for k, v in dists.items()}