예제 #1
0
파일: testConfig.py 프로젝트: nvt-1009/sepp
    def testConfigFileMissingFile(self):
        # Just to make different test cases independent of each other.
        config._options_singelton = None
        # Diasable main config path for this test
        config.main_config_path = self.fp_config

        sys.argv = [
            sys.argv[0], "-c",
            get_data_path("configs/test2.config"), "-f",
            get_data_path("simulated/test.fas"), "-a",
            get_data_path("simulated/test.small.fas")
        ]
        assert isinstance(options().config_file, filetypes) and \
            options().config_file.name.endswith(
                "data/configs/test2.config"), \
            "Commandline option -c not read properly"

        assert isinstance(options().alignment_file, filetypes) and\
            options().alignment_file.name.endswith(
                "data/simulated/test.small.fas"), \
            "Config file option alignment not read properly"

        assert isinstance(options().fragment_file, filetypes) and\
            options().fragment_file.name.endswith(
                "data/simulated/test.fas"), \
            "Command-line option -f alignment not read properly"
예제 #2
0
파일: testUPP.py 프로젝트: nvt-1009/sepp
    def setUp(self):
        # ensure necessary settings are made
        sepp.scheduler._jobPool = None
        sys.argv = [sys.argv[0], "-c", get_data_path("configs/test3.config")]
        self.x = UPPExhaustiveAlgorithm()
        self.x.options.backbone_size = 407
        self.x.options.alignment_size = 50
        self.x.options.placement_size = None
        self.x.options.backtranslation_sequence_file = None
        self.x.options.long_branch_filter = 100000
        self.x.options.molecule = 'amino'
        self.x.molecule = 'amino'
        self.x.options.alignment_file = open(
            get_data_path("upp_frag/backbone_pasta.fasta"), "r")
        self.x.options.sequence_file = open(
            get_data_path("upp_frag/query.fas"), "r")
        self.x.options.tree_file = open(
            get_data_path("upp_frag/backbone_pasta.fasttree"), "r")
        self.x.options.outdir = tempfile.mkdtemp()

        suff_bit = "-64" if sys.maxsize > 2**32 else "-32"
        if platform.system() == 'Darwin':
            suff_bit = ""
        for prog in ['hmmalign', 'hmmbuild', 'hmmsearch']:
            setattr(
                self.x.options, prog,
                Namespace(
                    path=get_data_path("../../../tools/bundled/%s/%s%s" %
                                       (platform.system(), prog, suff_bit))))
예제 #3
0
파일: testConfig.py 프로젝트: smirarab/sepp
    def testMainConfigFile(self):
        # Just to make different test cases independent of each other.
        config._options_singelton = None

        sys.argv = [sys.argv[0], "-c", get_data_path("configs/test2.config")]
        # set pplacer filepath to a file shipped with the code base
        options().pplacer.path = get_data_path(
            "../../../tools/bundled/Darwin/pplacer")

        assert (options().pplacer is not None and os.path.exists(
                options().pplacer.path)), \
            ("main config file options not read properly,"
             "or nonexistent binaries: pplacer = %s" %
             options().pplacer.path)

        options().hmmalign.path = get_data_path(
            "../../../tools/bundled/Darwin/hmmalign")
        assert (options().hmmalign is not None and os.path.exists(
                options().hmmalign.path)), \
            ("main config file options not read properly, or nonexistent "
             "binaries: hmmalign = %s" % options().hmmalign.path)

        options().hmmsearch.path = get_data_path(
            "../../../tools/bundled/Darwin/hmmsearch")
        assert (options().hmmsearch is not None and os.path.exists(
                options().hmmsearch.path)), \
            ("main config file options not read properly, or nonexistent "
             "binaries: hmmsearch = %s" % options().hmmsearch.path)
예제 #4
0
파일: testSepp.py 프로젝트: sfeng1030/Sepp
    def setUp(self):
        # ensure necessary settings are made
        sepp.scheduler._jobPool = None
        sys.argv = [sys.argv[0], "-c", get_data_path("configs/test2.config")]
        self.x = ExhaustiveAlgorithm()
        self.x.options.alignment_file = open(
            get_data_path(
                "q2-fragment-insertion/reference_alignment_tiny.fasta"), "r")
        self.x.options.info_file = open(
            get_data_path(
                "q2-fragment-insertion/RAxML_info-reference-gg-raxml-bl.info"),
            "r")
        self.x.options.tree_file = open(
            get_data_path(
                "q2-fragment-insertion/reference_phylogeny_tiny.nwk"), "r")
        self.x.options.outdir = tempfile.mkdtemp()

        suff_bit = "-64" if sys.maxsize > 2**32 else "-32"
        if platform.system() == 'Darwin':
            suff_bit = ""
        for prog in ['hmmalign', 'hmmbuild', 'hmmsearch', 'pplacer']:
            setattr(
                self.x.options, prog,
                Namespace(
                    path=get_data_path("../../../tools/bundled/%s/%s%s" %
                                       (platform.system(), prog, suff_bit))))
예제 #5
0
파일: testConfig.py 프로젝트: nvt-1009/sepp
    def testMainConfigFile(self):
        # Just to make different test cases independent of each other.
        config._options_singelton = None

        sys.argv = [sys.argv[0], "-c", get_data_path("configs/test2.config")]
        # set pplacer filepath to a file shipped with the code base
        options().pplacer.path = get_data_path(
            "../../../tools/bundled/Darwin/pplacer")

        assert (options().pplacer is not None and os.path.exists(
                options().pplacer.path)), \
            ("main config file options not read properly,"
             "or nonexistent binaries: pplacer = %s" %
             options().pplacer.path)

        options().hmmalign.path = get_data_path(
            "../../../tools/bundled/Darwin/hmmalign")
        assert (options().hmmalign is not None and os.path.exists(
                options().hmmalign.path)), \
            ("main config file options not read properly, or nonexistent "
             "binaries: hmmalign = %s" % options().hmmalign.path)

        options().hmmsearch.path = get_data_path(
            "../../../tools/bundled/Darwin/hmmsearch")
        assert (options().hmmsearch is not None and os.path.exists(
                options().hmmsearch.path)), \
            ("main config file options not read properly, or nonexistent "
             "binaries: hmmsearch = %s" % options().hmmsearch.path)
예제 #6
0
    def testMerge(self):
        sys.argv = [sys.argv[0]]
        stdindata = open(get_data_path("tmp/tempmerge")).read()

        # write path of seppJsonMerger.jar into configuration
        setattr(sepp.config.options(), 'jsonmerger', Namespace(
            path=get_data_path("../../../tools/merge/seppJsonMerger.jar")))
        mergeJsonJob = MergeJsonJob()
        mergeJsonJob.setup(stdindata.replace(
            'data/tmp/pplacer.extended',
            os.path.dirname(get_data_path("tmp/tempmerge")) +
            '/pplacer.extended'), get_data_path("tmp/mergedfile"))
        mergeJsonJob.run()
예제 #7
0
    def testAlignmentReadFasta(self):
        alg = MutableAlignment()
        alg.read_filepath(get_data_path("mock/pyrg/sate.fasta"))

        assert len(alg) == 65, "MutableAlignment length is %s" % len(alg)

        assert all([not alg.is_all_gap(i) for i in range(0, alg.get_length())])
예제 #8
0
 def test_notpiped_jobs(self):
     sepp.config.options().hmmsearch.piped = "False"
     self.x.options.fragment_file = open(
         get_data_path(
             "q2-fragment-insertion/input_fragments_tiny.fasta"), "r")
     self.x.run()
     self.assertTrue(self.x.results is not None)
예제 #9
0
파일: testConfig.py 프로젝트: nvt-1009/sepp
    def testConfigFile(self):
        # Just to make different test cases independent of each other.
        config._options_singelton = None
        # Diasable main config path for this test
        config.main_config_path = self.fp_config

        sys.argv = [
            sys.argv[0], "-A", "2", "-c",
            get_data_path("configs/test.config"), "--outdir",
            "dir_form_commandline"
        ]

        assert options().alignment_size == 2, \
            "Commandline option -A not read properly"

        assert isinstance(options().config_file, filetypes) and \
            options().config_file.name.endswith("data/configs/test.config"), \
            "Commandline option -c not read properly"

        assert (options().pplacer is not None and
                options().pplacer.path == "pplacer"), \
            "config file options not read properly"

        assert options().placement_size == 10, \
            "Config file option placementSize not read properly"

        assert options().outdir.endswith("dir_form_commandline"), \
            "Config file value outdir is not properly overwritten:%s " % \
            options().outdir

        assert options().tempdir is not None, \
            "Default value not properly set for tempfile attribute"
예제 #10
0
    def testAlignmentReadFasta(self):
        alg = MutableAlignment()
        alg.read_filepath(get_data_path("mock/pyrg/sate.fasta"))

        assert len(alg) == 65, "MutableAlignment length is %s" % len(alg)

        assert all([not alg.is_all_gap(i) for i in range(0, alg.get_length())])
예제 #11
0
파일: testConfig.py 프로젝트: smirarab/sepp
    def testConfigFile(self):
        # Just to make different test cases independent of each other.
        config._options_singelton = None
        # Diasable main config path for this test
        config.main_config_path = self.fp_config

        sys.argv = [
            sys.argv[0], "-A", "2",
            "-c", get_data_path("configs/test.config"),
            "--outdir", "dir_form_commandline"]

        assert options().alignment_size == 2, \
            "Commandline option -A not read properly"

        assert isinstance(options().config_file, filetypes) and \
            options().config_file.name.endswith("data/configs/test.config"), \
            "Commandline option -c not read properly"

        assert (options().pplacer is not None and
                options().pplacer.path == "pplacer"), \
            "config file options not read properly"

        assert options().placement_size == 10, \
            "Config file option placementSize not read properly"

        assert options().outdir.endswith("dir_form_commandline"), \
            "Config file value outdir is not properly overwritten:%s " % \
            options().outdir

        assert options().tempdir is not None, \
            "Default value not properly set for tempfile attribute"
예제 #12
0
 def test_diamCent(self):
     self.x.options.fragment_file = open(
         get_data_path("q2-fragment-insertion/input_fragments.fasta"), "r")
     self.x.options.maxDiam = 0.1
     self.x.options.fragmentChunkSize = 1
     self.x.options.decomp_strategy = "centroid"
     self.x.run()
     self.assertTrue(self.x.results is not None)
예제 #13
0
파일: testSepp.py 프로젝트: sfeng1030/Sepp
 def test_seqnames_whitespaces(self):
     self.x.options.fragment_file = open(
         get_data_path(
             "q2-fragment-insertion/input_fragments_spaces.fasta"), "r")
     with self.assertRaisesRegex(ValueError,
                                 "contain either whitespaces: "):
         self.x.run()
     self.assertTrue(self.x.results is None)
예제 #14
0
파일: testSepp.py 프로젝트: sfeng1030/Sepp
 def test_id_collision_collision(self):
     self.x.options.fragment_file = open(
         get_data_path(
             "q2-fragment-insertion/input_fragments_collide.fasta"), "r")
     with self.assertRaisesRegex(
             ValueError,
             ' whose names overlap with names in your reference'):
         self.x.run()
     self.assertTrue(self.x.results is None)
예제 #15
0
파일: testSepp.py 프로젝트: smirarab/sepp
 def test_seqnames_whitespaces(self):
     self.x.options.fragment_file = open(
         get_data_path(
             "q2-fragment-insertion/input_fragments_spaces.fasta"), "r")
     with self.assertRaisesRegex(
             ValueError,
             "contain either whitespaces: "):
         self.x.run()
     self.assertTrue(self.x.results is None)
예제 #16
0
파일: testSepp.py 프로젝트: smirarab/sepp
 def test_id_collision_collision(self):
     self.x.options.fragment_file = open(
         get_data_path(
             "q2-fragment-insertion/input_fragments_collide.fasta"), "r")
     with self.assertRaisesRegex(
             ValueError,
             ' whose names overlap with names in your reference'):
         self.x.run()
     self.assertTrue(self.x.results is None)
예제 #17
0
 def test_diamCent(self):
     self.x.options.fragment_file = open(
         get_data_path(
             "q2-fragment-insertion/input_fragments.fasta"), "r")
     self.x.options.maxDiam = 0.1
     self.x.options.fragmentChunkSize = 1
     self.x.options.decomp_strategy = "centroid"
     self.x.run()
     self.assertTrue(self.x.results is not None)
예제 #18
0
 def resetSepp(self):
     # ensure necessary settings are made
     sepp.scheduler._jobPool = None
     sepp.scheduler._parser = None
     self.x = ExhaustiveAlgorithm()
     self.x.options.alignment_file = open(
         get_data_path(
             "q2-fragment-insertion/reference_alignment_tiny.fasta"), "r")
     self.x.options.info_file = open(
         get_data_path(
             "q2-fragment-insertion/RAxML_info-reference-gg-raxml-bl.info"),
         "r")
     self.x.options.tree_file = open(
         get_data_path(
             "q2-fragment-insertion/reference_phylogeny_tiny.nwk"), "r")
     self.x.options.fragment_file = open(
         get_data_path(
             "q2-fragment-insertion/input_fragments.fasta"), "r")
     self.x.options.outdir = tempfile.mkdtemp()
     self.x.options.placement_size = 20
예제 #19
0
    def resetSepp(self):
        sepp.scheduler._jobPool = None
        self.x = ExhaustiveAlgorithm()
        self.x.options.alignment_file = open(
            get_data_path(
                "q2-fragment-insertion/reference_alignment_tiny.fasta"), "r")
        self.x.options.info_file = open(
            get_data_path(
                "q2-fragment-insertion/RAxML_info-reference-gg-raxml-bl.info"),
            "r")
        self.x.options.tree_file = open(
            get_data_path(
                "q2-fragment-insertion/reference_phylogeny_tiny.nwk"), "r")
        self.x.options.outdir = tempfile.mkdtemp()

        suff_bit = "-64" if sys.maxsize > 2**32 else "-32"
        if platform.system() == 'Darwin':
            suff_bit = ""
        for prog in ['hmmalign', 'hmmbuild', 'hmmsearch', 'pplacer']:
            setattr(self.x.options, prog, Namespace(
                path=get_data_path("../../../tools/bundled/%s/%s%s" % (
                    platform.system(), prog, suff_bit))))
예제 #20
0
    def testReadOnlySubAlignment(self):
        alg = MutableAlignment()
        alg.read_filepath(get_data_path("mock/pyrg/sate.fasta"))

        subset = [
            'NC_008701_720717_722309', 'NC_013156_149033_150643',
            'NC_013887_802739_801129'
        ]
        readonly_subalignment = ReadonlySubalignment(subset, alg)

        assert len(readonly_subalignment) == 3, len(readonly_subalignment)

        assert set(readonly_subalignment.keys()) == set(
            readonly_subalignment.get_sequence_names()) == set(subset), \
            "Subalignment keys not matching given keys %s vs %s" % (
            list(readonly_subalignment.keys()), subset)

        for (k, s) in list(readonly_subalignment.items()):
            assert k in subset, \
                "%s not found in subset but returned by subalignment" % k
            assert s == alg[k], \
                "sequence associated with %s not matching parent alignment" % k

        try:
            readonly_subalignment[2] = "ACGT"
            assert False, "Readony alignment is successfully modified. "
        except TypeError:
            pass

        assert readonly_subalignment.get_length() == alg.get_length(), \
            "alignment length should not change"

        assert readonly_subalignment.is_aligned() is True

        assert readonly_subalignment.is_all_gap(2) is True, \
            "Site 2 should be all gaps"
        assert readonly_subalignment.is_all_gap(150) is False, \
            "Site 100 should not be all gaps"

        readonly_subalignment.write_to_path(
            self.fp_dummy1)  # "mock/pyrg/sate.sub.fasta"

        mutable_subalignment = readonly_subalignment.get_mutable_alignment()
        mutable_subalignment.delete_all_gap()

        assert all([
            not mutable_subalignment.is_all_gap(i)
            for i in range(0, mutable_subalignment.get_length())
        ])
예제 #21
0
파일: testConfig.py 프로젝트: smirarab/sepp
    def testConfigFileMissingFile(self):
        # Just to make different test cases independent of each other.
        config._options_singelton = None
        # Diasable main config path for this test
        config.main_config_path = self.fp_config

        sys.argv = [sys.argv[0],
                    "-c", get_data_path("configs/test2.config"),
                    "-f", get_data_path("simulated/test.fas"),
                    "-a", get_data_path("simulated/test.small.fas")]
        assert isinstance(options().config_file, filetypes) and \
            options().config_file.name.endswith(
                "data/configs/test2.config"), \
            "Commandline option -c not read properly"

        assert isinstance(options().alignment_file, filetypes) and\
            options().alignment_file.name.endswith(
                "data/simulated/test.small.fas"), \
            "Config file option alignment not read properly"

        assert isinstance(options().fragment_file, filetypes) and\
            options().fragment_file.name.endswith(
                "data/simulated/test.fas"), \
            "Command-line option -f alignment not read properly"
예제 #22
0
    def testReadOnlySubAlignment(self):
        alg = MutableAlignment()
        alg.read_filepath(get_data_path("mock/pyrg/sate.fasta"))

        subset = ['NC_008701_720717_722309', 'NC_013156_149033_150643',
                  'NC_013887_802739_801129']
        readonly_subalignment = ReadonlySubalignment(subset, alg)

        assert len(readonly_subalignment) == 3, len(readonly_subalignment)

        assert set(readonly_subalignment.keys()) == set(
            readonly_subalignment.get_sequence_names()) == set(subset), \
            "Subalignment keys not matching given keys %s vs %s" % (
            list(readonly_subalignment.keys()), subset)

        for (k, s) in list(readonly_subalignment.items()):
            assert k in subset, \
                "%s not found in subset but returned by subalignment" % k
            assert s == alg[k], \
                "sequence associated with %k not matching parent alignment" % k

        try:
            readonly_subalignment[2] = "ACGT"
            assert False, "Readony alignment is successfully modified. "
        except TypeError:
            pass

        assert readonly_subalignment.get_length() == alg.get_length(), \
            "alignment length should not change"

        assert readonly_subalignment.is_aligned() is True

        assert readonly_subalignment.is_all_gap(2) is True, \
            "Site 2 should be all gaps"
        assert readonly_subalignment.is_all_gap(150) is False, \
            "Site 100 should not be all gaps"

        readonly_subalignment.write_to_path(
            self.fp_dummy1)  # "mock/pyrg/sate.sub.fasta"

        mutable_subalignment = readonly_subalignment.get_mutable_alignment()
        mutable_subalignment.delete_all_gap()

        assert all([not mutable_subalignment.is_all_gap(i)
                    for i in range(0, mutable_subalignment.get_length())])
예제 #23
0
파일: testSepp.py 프로젝트: sfeng1030/Sepp
 def test_id_collision_working(self):
     self.x.options.fragment_file = open(
         get_data_path("q2-fragment-insertion/input_fragments.fasta"), "r")
     self.x.run()
     self.assertTrue(self.x.results is not None)
예제 #24
0
파일: testSepp.py 프로젝트: smirarab/sepp
 def test_id_collision_working(self):
     self.x.options.fragment_file = open(
         get_data_path(
             "q2-fragment-insertion/input_fragments.fasta"), "r")
     self.x.run()
     self.assertTrue(self.x.results is not None)
예제 #25
0
    def testExtendedAlignment(self):
        subset = [
            "SFIF", "SFII", "SCFC", "SGHD", "SDCC", "SBGE", "SFBB", "SDI",
            "SCGB", "SJGF", "SGBI", "SCJA", "SGAD", "SHEB", "SFHB", "SDJI",
            "SHED", "SJJJ", "SBBE", "SCCH", "SDJB", "SDAC", "SHEH", "SFDC",
            "SFEI", "SHHB", "SC", "SIAB", "SDDI", "SBCB", "SJB", "SEBD",
            "SFGD", "SHA", "SIDA", "SGHI", "SGIB", "SBFJ", "SFIE", "SCJF",
            "SJHJ", "SJBG", "SEJI", "SFFF", "SJ", "SIII", "SJHH", "SEIH",
            "SBDC", "SHDJ", "SJDD", "SGDB", "SIHA", "SIBB", "SECC", "SCAD",
            "SGBB", "SGIF", "SJHC", "SFCD", "SEAA", "SEFF", "SDFG", "SDJE",
            "SCFG", "SFH", "SCJ", "SDDD", "SEGD", "SCIH", "SDAG", "SCJE",
            "SFAJ", "SIDJ", "SE", "SHBC", "SJFF", "SCHD", "SBHA", "SEDF",
            "SFAF", "SEDD", "SDHD", "SGJD", "SIBH", "SGDF", "SIFA", "SJGA",
            "SIJB", "SFI", "SGA", "SBFC", "SBJA", "SFFC", "SFDH", "SFEE",
            "SBDF", "SGBJ", "SDHE", "SJIB", "SHHI", "SIDE", "SJII"]

        alg = MutableAlignment()
        alg.read_filepath(get_data_path("simulated/test.fasta"))
        alg.delete_all_gap()
        tlen = alg.get_length()

        frg = MutableAlignment()
        frg.read_filepath(get_data_path("simulated/test.fas"))
        # print frg.get_num_taxa()

        pp = SeppProblem(list(alg.keys()))
        pp.fragments = frg
        pp.subalignment = alg

        cp1 = SeppProblem(subset, pp)
        cp2 = SeppProblem(list(set(alg.keys()) - set(subset)), pp)
        cp1.fragments = ReadonlySubalignment(
            [k for k in list(frg.keys()) if int(k[-1]) >= 9], frg)
        cp2.fragments = ReadonlySubalignment(
            [k for k in list(frg.keys()) if int(k[-1]) <= 1], frg)

        cp1labels = cp1.write_subalignment_without_allgap_columns(
            self.fp_dummy1)  # tmp/cp1.fasta
        cp2labels = cp2.write_subalignment_without_allgap_columns(
            self.fp_dummy2)  # tmp/cp2.fasta
        tmp = MutableAlignment().read_filepath(self.fp_dummy1)
        assert all([not tmp.is_all_gap(pos)
                    for pos in range(0, tmp.get_length())])
        tmp = MutableAlignment().read_filepath(self.fp_dummy2)
        assert all([not tmp.is_all_gap(pos)
                    for pos in range(0, tmp.get_length())])

        cp1.fragments.write_to_path(self.fp_dummy3)  # tmp/cp1.frags.fas
        cp2.fragments.write_to_path(self.fp_dummy4)  # tmp/cp2.frags.fas

        '''We have done the hmmalign before.
           don't worry about that right now'''

        ext1 = ExtendedAlignment(cp1.fragments)
        ext1.build_extended_alignment(
            self.fp_dummy1,
            get_data_path("tmp/cp1.extended.sto"))
        ext1.relabel_original_columns(cp1labels)
        ext2 = ExtendedAlignment(cp2.fragments)
        ext2.build_extended_alignment(
            self.fp_dummy2,
            get_data_path("tmp/cp2.extended.sto"))
        ext2.relabel_original_columns(cp2labels)

        extmerger = ExtendedAlignment([])
        extmerger.merge_in(ext1)
        mixed = extmerger.merge_in(ext2)

        extmerger.write_to_path(self.fp_dummy5)  # tmp/extended.merged.fasta

        assert extmerger.is_aligned(), "Merged alignment is not aligned"
        in1 = len([x for x in ext1._col_labels if x < 0])
        in2 = len([x for x in ext2._col_labels if x < 0])
        assert (in1 + in2 + tlen - mixed) == extmerger.get_length(), \
            ("Lengths don't match up after merging. Merged:%d. Insertion1:%d "
             "Insertion2:%d BaseLen:%d Mixed-insertion: %d") % (
                extmerger.get_length(), in1, in2, tlen, mixed)
        assert (in1 + in2 - mixed) == len(extmerger.get_insertion_columns()), \
            ("Columns are not correctly labeled after merging. Merged "
             "insertion count:%d. Insertion1:%d Insertion2:%d Mixed-insertion:"
             " %d") % (
             len(list(extmerger.iter_insertion_columns())), in1, in1, mixed)

        tmp = extmerger.get_base_readonly_alignment().get_mutable_alignment()
        tmp.delete_all_gap()
        assert tmp.is_aligned(), "merged alignment should be aligned!"
        assert tmp.get_length() == tlen, "merged alignment has wrong length"
        assert all([alg[k] == s for (k, s) in list(tmp.items())]), \
            "merged alignment should match original alignment"
예제 #26
0
 def test_fake_jobs(self):
     self.x.options.fragment_file = open(
         get_data_path(
             "q2-fragment-insertion/input_fragments_tiny.fasta"), "r")
     self.x.run()
     self.assertTrue(self.x.results is not None)
예제 #27
0
 def setUp(self):
     # ensure necessary settings are made
     sys.argv = [sys.argv[0], "-c", get_data_path("configs/test2.config")]
     self.resetSepp()
예제 #28
0
    def testExtendedAlignment(self):
        subset = [
            "SFIF", "SFII", "SCFC", "SGHD", "SDCC", "SBGE", "SFBB", "SDI",
            "SCGB", "SJGF", "SGBI", "SCJA", "SGAD", "SHEB", "SFHB", "SDJI",
            "SHED", "SJJJ", "SBBE", "SCCH", "SDJB", "SDAC", "SHEH", "SFDC",
            "SFEI", "SHHB", "SC", "SIAB", "SDDI", "SBCB", "SJB", "SEBD",
            "SFGD", "SHA", "SIDA", "SGHI", "SGIB", "SBFJ", "SFIE", "SCJF",
            "SJHJ", "SJBG", "SEJI", "SFFF", "SJ", "SIII", "SJHH", "SEIH",
            "SBDC", "SHDJ", "SJDD", "SGDB", "SIHA", "SIBB", "SECC", "SCAD",
            "SGBB", "SGIF", "SJHC", "SFCD", "SEAA", "SEFF", "SDFG", "SDJE",
            "SCFG", "SFH", "SCJ", "SDDD", "SEGD", "SCIH", "SDAG", "SCJE",
            "SFAJ", "SIDJ", "SE", "SHBC", "SJFF", "SCHD", "SBHA", "SEDF",
            "SFAF", "SEDD", "SDHD", "SGJD", "SIBH", "SGDF", "SIFA", "SJGA",
            "SIJB", "SFI", "SGA", "SBFC", "SBJA", "SFFC", "SFDH", "SFEE",
            "SBDF", "SGBJ", "SDHE", "SJIB", "SHHI", "SIDE", "SJII"
        ]

        alg = MutableAlignment()
        alg.read_filepath(get_data_path("simulated/test.fasta"))
        alg.delete_all_gap()
        tlen = alg.get_length()

        frg = MutableAlignment()
        frg.read_filepath(get_data_path("simulated/test.fas"))
        # print frg.get_num_taxa()

        pp = SeppProblem(list(alg.keys()))
        pp.fragments = frg
        pp.subalignment = alg

        cp1 = SeppProblem(subset, pp)
        cp2 = SeppProblem(list(set(alg.keys()) - set(subset)), pp)
        cp1.fragments = ReadonlySubalignment(
            [k for k in list(frg.keys()) if int(k[-1]) >= 9], frg)
        cp2.fragments = ReadonlySubalignment(
            [k for k in list(frg.keys()) if int(k[-1]) <= 1], frg)

        cp1labels = cp1.write_subalignment_without_allgap_columns(
            self.fp_dummy1)  # tmp/cp1.fasta
        cp2labels = cp2.write_subalignment_without_allgap_columns(
            self.fp_dummy2)  # tmp/cp2.fasta
        tmp = MutableAlignment().read_filepath(self.fp_dummy1)
        assert all(
            [not tmp.is_all_gap(pos) for pos in range(0, tmp.get_length())])
        tmp = MutableAlignment().read_filepath(self.fp_dummy2)
        assert all(
            [not tmp.is_all_gap(pos) for pos in range(0, tmp.get_length())])

        cp1.fragments.write_to_path(self.fp_dummy3)  # tmp/cp1.frags.fas
        cp2.fragments.write_to_path(self.fp_dummy4)  # tmp/cp2.frags.fas
        '''We have done the hmmalign before.
           don't worry about that right now'''

        ext1 = ExtendedAlignment(cp1.fragments)
        ext1.build_extended_alignment(self.fp_dummy1,
                                      get_data_path("tmp/cp1.extended.sto"))
        ext1.relabel_original_columns(cp1labels)
        ext2 = ExtendedAlignment(cp2.fragments)
        ext2.build_extended_alignment(self.fp_dummy2,
                                      get_data_path("tmp/cp2.extended.sto"))
        ext2.relabel_original_columns(cp2labels)

        extmerger = ExtendedAlignment([])
        extmerger.merge_in(ext1)
        mixed = extmerger.merge_in(ext2)

        extmerger.write_to_path(self.fp_dummy5)  # tmp/extended.merged.fasta

        assert extmerger.is_aligned(), "Merged alignment is not aligned"
        in1 = len([x for x in ext1._col_labels if x < 0])
        in2 = len([x for x in ext2._col_labels if x < 0])
        assert (in1 + in2 + tlen - mixed) == extmerger.get_length(), \
            ("Lengths don't match up after merging. Merged:%d. Insertion1:%d "
             "Insertion2:%d BaseLen:%d Mixed-insertion: %d") % (
                extmerger.get_length(), in1, in2, tlen, mixed)
        assert (in1 + in2 - mixed) == len(extmerger.get_insertion_columns()), \
            ("Columns are not correctly labeled after merging. Merged "
             "insertion count:%d. Insertion1:%d Insertion2:%d Mixed-insertion:"
             " %d") % (
             len(list(extmerger.iter_insertion_columns())), in1, in1, mixed)

        tmp = extmerger.get_base_readonly_alignment().get_mutable_alignment()
        tmp.delete_all_gap()
        assert tmp.is_aligned(), "merged alignment should be aligned!"
        assert tmp.get_length() == tlen, "merged alignment has wrong length"
        assert all([alg[k] == s for (k, s) in list(tmp.items())]), \
            "merged alignment should match original alignment"
예제 #29
0
 def setUp(self):
     # ensure necessary settings are made
     sys.argv = [sys.argv[0], "-c", get_data_path("configs/test2.config")]
     self.resetSepp()