Ejemplo n.º 1
0
    def test_scorecons(self):
        sc = util.ScoreconsRunner()
        aln = Align.new_from_fasta(self.example_fasta_file)

        sc_res = sc.run_fasta(self.example_fasta_file)
        self.assertEqual(sc_res.dops, 92.889)
        self.assertEqual(len(sc_res.scores), aln.aln_positions)
Ejemplo n.º 2
0
    def get_funfam_alignment(cls, *, funfam_id, cath_version):
        """
        Retrieves the FunFam alignment

        TODO:
            move this functionality to :class:`cathpy`
        """

        # http://www.cathdb.info/version/v4_2_0/superfamily/1.10.8.10/funfam/10980/files/stockholm?task_id=&max_sequences=200&onlyseq=1

        # TODO: move to cathpy
        try:
            sfam_id, _, ff_num = re.split(r'[\-/]', funfam_id)
        except ValueError as e:
            LOG.error('failed to parse funfam id "%s": %s', funfam_id, e)
            raise

        ff_url = CATH_FUNFAM_STOCKHOLM_URL.format(
            version=cath_version,
            sfam_id=sfam_id,
            ff_num=ff_num,
        )
        LOG.info("GET {}".format(ff_url))
        res = requests.get(ff_url)
        res.raise_for_status()
        sto_io = io.StringIO(res.content.decode('utf-8'))
        aln = Align.from_stockholm(sto_io)
        return aln
Ejemplo n.º 3
0
    def setUp(self):
        """Define the test client and other test variables."""

        super().setUp()

        self.query_seq = Sequence(DEFAULT_QUERY_ID, DEFAULT_QUERY_SEQ)
        self.query_subseq = self.query_seq.apply_segments([[50, 150],
                                                           [200, 250]])

        data_dir = os.path.join(os.path.dirname(__file__), '..',
                                'example_data')

        self.ff1_file = os.path.join(data_dir, '3.20.20.10-ff-9715.sto.gz')
        self.ff2_file = os.path.join(data_dir, '2.40.37.10-ff-6607.sto.gz')

        self.ff1_aln = Align.from_stockholm(self.ff1_file)
        self.ff2_aln = Align.from_stockholm(self.ff2_file)
Ejemplo n.º 4
0
    def run(self):

        seqfile = tempfile.NamedTemporaryFile(suffix='.seqs.fa',
                                              mode='wt',
                                              delete=False)
        alnfile = tempfile.NamedTemporaryFile(suffix='.aln.fa',
                                              mode='wt',
                                              delete=False)
        outfile = tempfile.NamedTemporaryFile(suffix='.merged.fa',
                                              mode='wt',
                                              delete=False)

        # write ungapped sequence(s) to file
        seqfile.write(self.sequence.to_fasta())
        seqfile.close()

        # write existing alignment to file
        self.align.write_fasta(alnfile.name)

        # use mafft to add sequences to alignment
        mafft_cmds = (self.mafft_exe, '--add', seqfile.name, alnfile.name)
        LOG.info("Running MAFFT: %s", " ".join(mafft_cmds))
        subprocess.run(mafft_cmds,
                       check=True,
                       stdout=outfile,
                       stderr=subprocess.PIPE)
        outfile.close()

        # transfer meta data from original alignment
        merged_aln = Align.from_fasta(outfile.name)

        new_aln = self.align.copy()

        # set the length of the alignment (avoids complaints when adding sequences)
        new_seq = merged_aln.find_seq_by_id(self.sequence.uid)
        new_aln.aln_positions = len(new_seq)

        # add reference sequence to the start of the alignment
        new_aln.add_sequence(new_seq, offset=0)

        # update the newly aligned sequences for the rest of the entries
        for seq in new_aln.sequences:
            try:
                merged_seq = merged_aln.find_seq_by_id(seq.uid)
            except:
                raise Exception(
                    "failed to find sequence id {} in merged alignment".format(
                        seq.uid))

            LOG.debug("Sub merged sequence from:%s: %s", seq.uid, seq.seq)
            LOG.debug("                      to:%s: %s", merged_seq.uid,
                      merged_seq.seq)
            seq.set_sequence(merged_seq.seq)

        return new_aln
Ejemplo n.º 5
0
    def test_merge_aln_with_correspondence(self):
        aln_ref = Align.new_from_fasta(self.aln_structure)
        self.assertEqual(aln_ref.count_sequences, 2)
        aln_merge1 = Align.new_from_fasta(self.aln_merge1)
        self.assertEqual(aln_merge1.count_sequences, 3)
        aln_merge2 = Align.new_from_fasta(self.aln_merge2)
        self.assertEqual(aln_merge2.count_sequences, 3)

        gcf = Correspondence.new_from_gcf(self.gcf_ref1)

        aln_ref.merge_alignment(aln_merge1, 'ref1', gcf)
        aln_after_merge1 = Align.new_from_fasta(self.aln_after_merge1)
        self.assertIn('ref1_merge', [s.id for s in aln_ref.seqs])
        #LOG.info("aln_after_merge1:\n%s", aln_ref.to_fasta())
        self.assertEqual(aln_ref.to_fasta(), aln_after_merge1.to_fasta())

        aln_ref.merge_alignment(aln_merge2, 'ref2')
        aln_after_merge2 = Align.new_from_fasta(self.aln_after_merge2)
        #LOG.info("aln_after_merge2:\n%s", aln_ref.to_fasta())
        self.assertEqual(aln_ref.to_fasta(), aln_after_merge2.to_fasta())
Ejemplo n.º 6
0
    def test_groupsim(self):
        gs = util.GroupsimRunner()
        aln = Align.new_from_fasta(self.example_fasta_file)

        seqs = aln.seqs

        for s in seqs[:2]:
            s.set_cluster_id('0001')
        for s in seqs[2:]:
            s.set_cluster_id('0002')

        gs_res = gs.run_alignment(aln)
        self.assertEqual(gs_res.count_positions, aln.aln_positions)
        print("GS: {}".format(repr(gs_res.__dict__)))
Ejemplo n.º 7
0
def get_funfam_alignment(*, cath_version, superfamily_id, funfam_number,
                         **kwargs):
    """
    Given a FunfamInfo
    """
    ff_url = API_FUNFAM_ALN.format(cath_version=cath_version,
                                   superfamily_id=superfamily_id,
                                   funfam_number=funfam_number)

    LOG.info("GET %s", ff_url)
    response = requests.get(ff_url)
    response.raise_for_status()

    aln = Align.from_stockholm(response.text)
    return aln