def test_scorecons(self): sc = util.ScoreconsRunner() aln = Align.new_from_fasta(self.example_fasta_file) sc_res = sc.run_fasta(self.example_fasta_file) self.assertEqual(sc_res.dops, 92.889) self.assertEqual(len(sc_res.scores), aln.aln_positions)
def get_funfam_alignment(cls, *, funfam_id, cath_version): """ Retrieves the FunFam alignment TODO: move this functionality to :class:`cathpy` """ # http://www.cathdb.info/version/v4_2_0/superfamily/1.10.8.10/funfam/10980/files/stockholm?task_id=&max_sequences=200&onlyseq=1 # TODO: move to cathpy try: sfam_id, _, ff_num = re.split(r'[\-/]', funfam_id) except ValueError as e: LOG.error('failed to parse funfam id "%s": %s', funfam_id, e) raise ff_url = CATH_FUNFAM_STOCKHOLM_URL.format( version=cath_version, sfam_id=sfam_id, ff_num=ff_num, ) LOG.info("GET {}".format(ff_url)) res = requests.get(ff_url) res.raise_for_status() sto_io = io.StringIO(res.content.decode('utf-8')) aln = Align.from_stockholm(sto_io) return aln
def setUp(self): """Define the test client and other test variables.""" super().setUp() self.query_seq = Sequence(DEFAULT_QUERY_ID, DEFAULT_QUERY_SEQ) self.query_subseq = self.query_seq.apply_segments([[50, 150], [200, 250]]) data_dir = os.path.join(os.path.dirname(__file__), '..', 'example_data') self.ff1_file = os.path.join(data_dir, '3.20.20.10-ff-9715.sto.gz') self.ff2_file = os.path.join(data_dir, '2.40.37.10-ff-6607.sto.gz') self.ff1_aln = Align.from_stockholm(self.ff1_file) self.ff2_aln = Align.from_stockholm(self.ff2_file)
def run(self): seqfile = tempfile.NamedTemporaryFile(suffix='.seqs.fa', mode='wt', delete=False) alnfile = tempfile.NamedTemporaryFile(suffix='.aln.fa', mode='wt', delete=False) outfile = tempfile.NamedTemporaryFile(suffix='.merged.fa', mode='wt', delete=False) # write ungapped sequence(s) to file seqfile.write(self.sequence.to_fasta()) seqfile.close() # write existing alignment to file self.align.write_fasta(alnfile.name) # use mafft to add sequences to alignment mafft_cmds = (self.mafft_exe, '--add', seqfile.name, alnfile.name) LOG.info("Running MAFFT: %s", " ".join(mafft_cmds)) subprocess.run(mafft_cmds, check=True, stdout=outfile, stderr=subprocess.PIPE) outfile.close() # transfer meta data from original alignment merged_aln = Align.from_fasta(outfile.name) new_aln = self.align.copy() # set the length of the alignment (avoids complaints when adding sequences) new_seq = merged_aln.find_seq_by_id(self.sequence.uid) new_aln.aln_positions = len(new_seq) # add reference sequence to the start of the alignment new_aln.add_sequence(new_seq, offset=0) # update the newly aligned sequences for the rest of the entries for seq in new_aln.sequences: try: merged_seq = merged_aln.find_seq_by_id(seq.uid) except: raise Exception( "failed to find sequence id {} in merged alignment".format( seq.uid)) LOG.debug("Sub merged sequence from:%s: %s", seq.uid, seq.seq) LOG.debug(" to:%s: %s", merged_seq.uid, merged_seq.seq) seq.set_sequence(merged_seq.seq) return new_aln
def test_merge_aln_with_correspondence(self): aln_ref = Align.new_from_fasta(self.aln_structure) self.assertEqual(aln_ref.count_sequences, 2) aln_merge1 = Align.new_from_fasta(self.aln_merge1) self.assertEqual(aln_merge1.count_sequences, 3) aln_merge2 = Align.new_from_fasta(self.aln_merge2) self.assertEqual(aln_merge2.count_sequences, 3) gcf = Correspondence.new_from_gcf(self.gcf_ref1) aln_ref.merge_alignment(aln_merge1, 'ref1', gcf) aln_after_merge1 = Align.new_from_fasta(self.aln_after_merge1) self.assertIn('ref1_merge', [s.id for s in aln_ref.seqs]) #LOG.info("aln_after_merge1:\n%s", aln_ref.to_fasta()) self.assertEqual(aln_ref.to_fasta(), aln_after_merge1.to_fasta()) aln_ref.merge_alignment(aln_merge2, 'ref2') aln_after_merge2 = Align.new_from_fasta(self.aln_after_merge2) #LOG.info("aln_after_merge2:\n%s", aln_ref.to_fasta()) self.assertEqual(aln_ref.to_fasta(), aln_after_merge2.to_fasta())
def test_groupsim(self): gs = util.GroupsimRunner() aln = Align.new_from_fasta(self.example_fasta_file) seqs = aln.seqs for s in seqs[:2]: s.set_cluster_id('0001') for s in seqs[2:]: s.set_cluster_id('0002') gs_res = gs.run_alignment(aln) self.assertEqual(gs_res.count_positions, aln.aln_positions) print("GS: {}".format(repr(gs_res.__dict__)))
def get_funfam_alignment(*, cath_version, superfamily_id, funfam_number, **kwargs): """ Given a FunfamInfo """ ff_url = API_FUNFAM_ALN.format(cath_version=cath_version, superfamily_id=superfamily_id, funfam_number=funfam_number) LOG.info("GET %s", ff_url) response = requests.get(ff_url) response.raise_for_status() aln = Align.from_stockholm(response.text) return aln