Python SeqGroup.SeqGroup Examples

Programming Language: Python

Namespace/Package Name: epac.ete2

Class/Type: SeqGroup

Method/Function: SeqGroup

Examples at hotexamples.com: 5

Python SeqGroup.SeqGroup - 5 examples found. These are the top rated real world Python examples of epac.ete2.SeqGroup.SeqGroup extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SeqGroup(5)

set_seq(3)

get_entries(2)

add_name_prefix(1)

get_seqbyid(1)

iter_entries(1)

Example #1

Show file

File: epa_classifier.py Project: pbordron/sativa

    def run_ptp(self, jp):
        full_aln = SeqGroup(self.epa_alignment)
        species_list = epa_2_ptp(epa_jp=jp,
                                 ref_jp=self.refjson,
                                 full_alignment=full_aln,
                                 min_lw=0.5,
                                 debug=self.cfg.debug)

        self.cfg.log.debug("Species clusters:")

        if fout:
            fo2 = open(fout + ".species", "w")
        else:
            fo2 = None

        for sp_cluster in species_list:
            translated_taxa = []
            for taxon in sp_cluster:
                origin_taxon_name = EpacConfig.strip_query_prefix(taxon)
                translated_taxa.append(origin_taxon_name)
            s = ",".join(translated_taxa)
            if fo2:
                fo2.write(s + "\n")
            self.cfg.log.debug(s)

        if fo2:
            fo2.close()

Example #2

Show file

File: epa_classifier.py Project: pbordron/sativa

    def checkinput(self, query_fname, minp=0.9):
        formats = [
            "fasta", "phylip", "iphylip", "phylip_relaxed", "iphylip_relaxed"
        ]
        for fmt in formats:
            try:
                self.seqs = SeqGroup(sequences=query_fname, format=fmt)
                break
            except:
                self.cfg.log.debug("Guessing input format: not " + fmt)
        if self.seqs == None:
            self.cfg.exit_user_error(
                "Invalid input file format: %s\nThe supported input formats are fasta and phylip"
                % query_fname)

        if self.ignore_refalign:
            self.cfg.log.info(
                "Assuming query file contains reference sequences, skipping the alignment step...\n"
            )
            self.write_combined_alignment()
            return

        self.query_count = len(self.seqs)

        # add query seq name prefix to avoid confusion between reference and query sequences
        self.seqs.add_name_prefix(EpacConfig.QUERY_SEQ_PREFIX)

        self.seqs.write(format="fasta", outfile=self.tmpquery)
        self.cfg.log.info("Checking if query sequences are aligned ...")
        entries = self.seqs.get_entries()
        seql = len(entries[0][1])
        aligned = True
        for entri in entries[1:]:
            l = len(entri[1])
            if not seql == l:
                aligned = False
                break

        if aligned and len(self.seqs) > 1:
            self.cfg.log.info("Query sequences are aligned")
            refalnl = self.refjson.get_alignment_length()
            if refalnl == seql:
                self.cfg.log.info(
                    "Merging query alignment with reference alignment")
                self.merge_alignment(self.seqs)
            else:
                self.cfg.log.info(
                    "Merging query alignment with reference alignment using MUSCLE"
                )
                self.require_muscle()
                refaln = self.refjson.get_alignment(fout=self.tmp_refaln)
                m = muscle(self.cfg)
                self.epa_alignment = m.merge(refaln, self.tmpquery)
        else:
            self.cfg.log.info("Query sequences are not aligned")
            self.cfg.log.info(
                "Align query sequences to the reference alignment using HMMER")
            self.require_hmmer()
            self.align_to_refenence(self.noalign, minp=minp)

Example #3

Show file

 def load_reduced_refalign(self):
     formats = ["fasta", "phylip_relaxed"]
     for fmt in formats:
         try:
             self.reduced_refalign_seqs = SeqGroup(
                 sequences=self.reduced_refalign_fname, format=fmt)
             break
         except:
             pass
     if self.reduced_refalign_seqs == None:
         errmsg = "FATAL ERROR: Invalid input file format in %s! (load_reduced_refalign)" % self.reduced_refalign_fname
         self.cfg.exit_fatal_error(errmsg)

Example #4

Show file

 def load_alignment(self):
     in_file = self.cfg.align_fname
     self.input_seqs = None
     formats = [
         "fasta", "phylip_relaxed", "iphylip_relaxed", "phylip", "iphylip"
     ]
     for fmt in formats:
         try:
             self.input_seqs = SeqGroup(sequences=in_file, format=fmt)
             break
         except:
             self.cfg.log.debug("Guessing input format: not " + fmt)
     if self.input_seqs == None:
         self.cfg.exit_user_error(
             "Invalid input file format: %s\nThe supported input formats are fasta and phylip"
             % in_file)

Example #5

Show file

File: test_input_validator.py Project: eliasOnAWS/sativa

    def setUp(self):
        cfg = EpacTrainerConfig()
        cfg.debug = True
        testfile_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                    "testfiles")
        tax_fname = os.path.join(testfile_dir, "test.tax")
        phy_fname = os.path.join(testfile_dir, "test.phy")
        tax = Taxonomy(EpacConfig.REF_SEQ_PREFIX, tax_fname)
        seqs = SeqGroup(sequences=phy_fname, format="phylip")
        self.inval = InputValidator(cfg, tax, seqs, False)

        self.expected_mis_ids = ["Missing1", "Missing2"]
        self.expected_dups = ["DupSeq(01)", "DupSeq02"]
        self.expected_merges = [
            self.inval.taxonomy.seq_rank_id(sid) for sid in self.expected_dups
        ]