Ejemplo n.º 1
0
    def setUp(self):

        pip_install_vtam_for_tests()

        self.test_path = PathManager.get_test_path()
        self.outdir_path = os.path.join(self.test_path, 'outdir')
        pathlib.Path(self.outdir_path).mkdir(exist_ok=True, parents=True)
Ejemplo n.º 2
0
 def setUpClass(cls):
     cls.test_path = PathManager.get_test_path()
     cls.tags_file_path = os.path.join(cls.test_path, "test_files",
                                       "FilesInputCutadapt")
     cls.fastainfo = os.path.join(cls.tags_file_path, "fastainfo.tsv")
     cls.fastainfoNoDuplicates = os.path.join(cls.tags_file_path,
                                              "fastainfoNoDuplicates.tsv")
     cls.mergedFasta1 = "14Ben01_1_fw_48.fasta"
Ejemplo n.º 3
0
    def setUpClass(cls):

        cmd = '{} -m pip install . -q --upgrade --use-feature=in-tree-build'.format(
            sys.executable)
        if sys.platform.startswith("win"):
            args = cmd
        else:
            args = shlex.split(cmd)
        subprocess.run(args=args, cwd=PathManager.get_project_path())

        cls.package_path = os.path.join(PathManager.get_package_path())
        cls.test_path = os.path.join(PathManager.get_test_path())
        cls.outdir_path = os.path.join(cls.test_path, 'outdir')
        cls.outdir_data_path = os.path.join(cls.outdir_path, 'data')
        shutil.rmtree(cls.outdir_path, ignore_errors=True)
        pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True)

        ############################################################################################
        #
        # Download sorted reads dataset
        #
        ############################################################################################

        sorted_tar_path = os.path.join(cls.outdir_data_path, "sorted.tar.gz")
        pathlib.Path(os.path.dirname(sorted_tar_path)).mkdir(parents=True,
                                                             exist_ok=True)
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(sorted_tar_path) or pathlib.Path(
                sorted_tar_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(sorted_tar_path))
                    urllib.request.urlretrieve(sorted_tar_gz_url1,
                                               sorted_tar_path,
                                               reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url2,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url3,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
        tar = tarfile.open(sorted_tar_path, "r:gz")
        tar.extractall(path=cls.outdir_data_path)
        tar.close()
Ejemplo n.º 4
0
    def setUpClass(cls):

        ########################################################################
        #
        # These tests need the vtam command in the path
        #
        ########################################################################

        pip_install_vtam_for_tests()

        cls.package_path = os.path.join(PathManager.get_package_path())
        cls.test_path = os.path.join(PathManager.get_test_path())
        cls.outdir_path = os.path.join(cls.test_path, 'outdir')
        cls.outdir_data_path = os.path.join(cls.outdir_path, 'data')
        shutil.rmtree(cls.outdir_path, ignore_errors=True)
        pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True)

        ############################################################################################
        #
        # Download sorted reads dataset (Updated Oct 10, 2020)
        #
        ############################################################################################

        sorted_tar_path = os.path.join(cls.outdir_data_path, "sorted.tar.gz")
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(sorted_tar_path) or pathlib.Path(
                sorted_tar_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(sorted_tar_path))
                    urllib.request.urlretrieve(sorted_tar_gz_url1,
                                               sorted_tar_path,
                                               reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url2,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url3,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
        tar = tarfile.open(sorted_tar_path, "r:gz")
        tar.extractall(path=cls.outdir_data_path)
        tar.close()
Ejemplo n.º 5
0
    def setUp(self):

        test_path = os.path.join(PathManager.get_test_path())
        self.variantid_identity_lineage_df = pandas.read_csv(os.path.join(test_path, "test_runner_ltg_selection", "variantid_identity_lineage.tsv"), sep="\t", header=0)
        self.ltg_bak_df = pandas.read_csv(os.path.join(test_path, "test_runner_ltg_selection", "ltg_bak.tsv"), sep="\t")

        # create_vtam_data_dir()
        testdir_path = os.path.join(PathManager.get_test_path())
        self.outdir_path = os.path.join(testdir_path, "outdir")
        pathlib.Path(self.outdir_path).mkdir(exist_ok=True, parents=True)
        taxonomy_tsv_path = os.path.join(self.outdir_path, "taxonomy.tsv")
        CommandTaxonomy(
            taxonomy_tsv=taxonomy_tsv_path).download_precomputed_taxonomy()

        self.taxonomy_df = pandas.read_csv(taxonomy_tsv_path, sep="\t", header=0,
                                      dtype={'tax_id': 'int', 'parent_tax_id': 'int',
                                             'old_tax_id': 'float'}).drop_duplicates()
        self.taxonomy_df.set_index('tax_id', drop=True, inplace=True)
        self.taxonomy_df = self.taxonomy_df[[
            'parent_tax_id', 'rank', 'name_txt']].drop_duplicates()
        taxonomy = Taxonomy(taxonomy_tsv_path)
        self.taxonomy_df = taxonomy.df
    def setUp(self):

        self.package_path = PathManager.get_package_path()
        test_path = PathManager.get_test_path()

        # Minimal merge command
        foopaths = {}
        foopaths['foofile'] = os.path.relpath(__file__, self.package_path)
        foopaths['foodir'] = os.path.relpath(os.path.dirname(__file__), self.package_path)
        foopaths['outdir'] = 'tests/output'
        foopaths['sortedinfo_tsv'] = "data/example/sortedinfo_mfzr.tsv"
        foopaths['tsv_path'] = "data/example/sortedinfo_mfzr.tsv"
        foopaths['known_occurrences'] = "data/example/known_occurrences.tsv"
        self.foopaths = foopaths
Ejemplo n.º 7
0
    def setUpClass(cls):

        cls.package_path = PathManager.get_package_path()
        cls.test_path = PathManager.get_test_path()

        foopaths = {}
        foopaths['foofile'] = os.path.relpath(__file__, cls.package_path)
        foopaths['foodir'] = os.path.relpath(os.path.dirname(__file__),
                                             cls.package_path)
        foopaths['sorteddir'] = 'output'
        foopaths['sortedinfo_tsv'] = "data/example/sortedinfo_mfzr.tsv"
        foopaths[
            'optimize_lfn_variant_specific'] = "tests/test_files_dryad.f40v5_small/run1_mfzr_zfzr/optimize_lfn_variant_specific.tsv"
        cls.foopaths = foopaths

        cls.minseqlength_value_32 = 32
        cls.minseqlength_value_40 = 40
        cls.lfn_variant_replicate_cutoff = 0.002
Ejemplo n.º 8
0
    def setUpClass(cls):

        # vtam needs to be in the tsv_path
        pip_install_vtam_for_tests()

        cls.test_path = os.path.join(PathManager.get_test_path())
        cls.outdir_path = os.path.join(cls.test_path, 'outdir')

        cls.args = {}
        cls.args['taxonomy'] = os.path.join(cls.outdir_path, "taxonomy.tsv")
        cls.args['coi_blast_db_dir'] = os.path.join(cls.outdir_path, "coi_blast_db_dir")
        pathlib.Path(cls.args['coi_blast_db_dir']).mkdir(exist_ok=True, parents=True)

        ############################################################################################
        #
        # Run 'vtam taxonomy'
        #
        ############################################################################################

        cmd = "vtam taxonomy --output {taxonomy} --precomputed".format(**cls.args)
        if sys.platform.startswith("win"):
            args = cmd
        else:
            args = shlex.split(cmd)
        subprocess.run(args=args)

        ############################################################################################
        #
        # Run 'vtam coi_blast_db'
        #
        ############################################################################################

        cmd = "vtam coi_blast_db --blastdbdir {coi_blast_db_dir} --blastdbname coi_blast_db_20200420 ".format(**cls.args)

        # if not (os.path.isfile(os.path.join(cls.args['coi_blast_db_dir'], "coi_blast_db_20200420.nhr"))):
        if sys.platform.startswith("win"):
            args = cmd
        else:
            args = shlex.split(cmd)
        subprocess.run(args=args)
Ejemplo n.º 9
0
    def setUp(self):

        self.__testdir_path = os.path.join(PathManager.get_test_path())

        #
        self.variant_df = pandas.DataFrame({
            'id': [1, 22],
            'sequence_': ["tata", "tgtg"],
        })
        self.variant_read_count_df = pandas.DataFrame({
            'run_id': [1] * 150,
            'marker_id':
            150 * [1],
            'sample_id': [1, 1, 1, 2, 2, 2] * 25,
            'replicate': [1, 2, 3] * 50,
            'variant_id': [*itertools.chain(*[[l] * 6 for l in range(1, 26)])],
            # [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, ..
            'read_count': [
                10,
                5,
                0,
                249,
                58,
                185,
                68,
                54,
                100,
                0,
                0,
                0,
                0,
                0,
                0,
                258,
                126,
                500,
                0,
                0,
                0,
                0,
                1,
                0,
                0,
                0,
                1,
                0,
                0,
                0,
                1524,
                1815,
                789,
                118,
                98,
                50,
                1,
                0,
                0,
                0,
                0,
                0,
                0,
                1,
                0,
                0,
                0,
                0,
                125,
                214,
                20,
                1284,
                1789,
                1913,
                0,
                1,
                0,
                0,
                1,
                0,
                15,
                0,
                1,
                0,
                0,
                25,
                0,
                0,
                2,
                598,
                50,
                875,
                2,
                60,
                12,
                1,
                0,
                0,
                1,
                0,
                0,
                0,
                0,
                2,
                0,
                3,
                0,
                0,
                5,
                0,
                65,
                98,
                152,
                2,
                0,
                1,
                52,
                74,
                85,
                0,
                0,
                0,
                1,
                0,
                0,
                5,
                0,
                8,
                5,
                0,
                1,
                0,
                0,
                21,
                0,
                0,
                0,
                524,
                658,
                125,
                0,
                0,
                0,
                2,
                0,
                10,
                25,
                58,
                23,
                10980,
                8999,
                13814,
                0,
                5,
                0,
                0,
                2,
                0,
                1,
                0,
                1,
                1,
                0,
                284,
                0,
                2,
                0,
                0,
                5,
                0,
            ],
        })
        self.marker_id = 1
        #
        self.filter_lfn_runner = RunnerFilterLFN(self.variant_read_count_df)
Ejemplo n.º 10
0
    def setUp(self):

        pip_install_vtam_for_tests()

        self.test_path = PathManager.get_test_path()
        self.package_path = PathManager.get_package_path()
        self.outdir_path = os.path.join(self.test_path, 'outdir')
        shutil.rmtree(self.outdir_path, ignore_errors=True)
        pathlib.Path(self.outdir_path).mkdir(parents=True, exist_ok=True)

        self.args = {}
        self.args['runmarker'] = os.path.join(self.package_path, "data",
                                              "example", "pool_run_marker.tsv")
        self.args['db'] = os.path.join(self.outdir_path, "db.sqlite")

        ############################################################################################
        #
        # Init DB
        #
        ############################################################################################

        filter_codon_stop_path = os.path.join(self.test_path,
                                              "test_files_dryad.f40v5_small",
                                              "run1_mfzr_zfzr",
                                              "filter_codon_stop.tsv")
        variant_path = os.path.join(self.test_path,
                                    "test_files_dryad.f40v5_small",
                                    "run1_mfzr_zfzr",
                                    "variant_filter_codon_stop.tsv")
        sample_information_path = os.path.join(self.test_path,
                                               "test_files_dryad.f40v5_small",
                                               "run1_mfzr_zfzr",
                                               "sample_information.tsv")

        self.engine = sqlalchemy.create_engine('sqlite:///{}'.format(
            self.args['db']),
                                               echo=False)

        sample_information_df = pandas.read_csv(sample_information_path,
                                                sep="\t",
                                                header=0)
        sample_information_df.to_sql(name=SampleInformation.__tablename__,
                                     con=self.engine.connect(),
                                     if_exists='replace')

        run_df = pandas.DataFrame({'name': ['run1']}, index=range(1, 2))
        run_df.to_sql(name=Run.__tablename__,
                      con=self.engine.connect(),
                      index_label='id',
                      if_exists='replace')

        marker_df = pandas.DataFrame({'name': ['MFZR', 'ZFZR']},
                                     index=range(1, 3))
        marker_df.to_sql(name=Marker.__tablename__,
                         con=self.engine.connect(),
                         index_label='id',
                         if_exists='replace')

        sample_df = pandas.DataFrame(
            {'name': ['tpos1_run1', 'tnegtag_run1', '14ben01', '14ben02']},
            index=range(1, 5))
        sample_df.to_sql(name=Sample.__tablename__,
                         con=self.engine.connect(),
                         index_label='id',
                         if_exists='replace')

        variant_df = pandas.read_csv(variant_path,
                                     sep="\t",
                                     header=0,
                                     index_col='id')
        variant_df.to_sql(name=Variant.__tablename__,
                          con=self.engine.connect(),
                          index_label='id',
                          if_exists='replace')

        filter_codon_stop_df = pandas.read_csv(filter_codon_stop_path,
                                               sep="\t",
                                               header=0)
        filter_codon_stop_df.to_sql(name=FilterCodonStop.__tablename__,
                                    con=self.engine.connect(),
                                    if_exists='replace')

        filter_chimera_borderline_path = os.path.join(
            self.test_path, "test_files_dryad.f40v5_small", "run1_mfzr_zfzr",
            "filter_chimera_borderline_and_filter_codon_stop.tsv")
        filter_chimera_borderline_db = pandas.read_csv(
            filter_chimera_borderline_path, sep="\t", header=0)
        filter_chimera_borderline_db.to_sql(
            name=FilterChimeraBorderline.__tablename__,
            con=self.engine.connect(),
            if_exists='replace')

        self.sample_list = ['tpos1_run1', 'tnegtag_run1', '14ben01', '14ben02']
Ejemplo n.º 11
0
    def setUp(self):

        pip_install_vtam_for_tests()

        self.test_path = PathManager.get_test_path()
        self.package_path = PathManager.get_package_path()
        self.outdir_path = os.path.join(self.test_path, 'outdir')
        shutil.rmtree(self.outdir_path, ignore_errors=True)
        pathlib.Path(self.outdir_path).mkdir(exist_ok=True, parents=True)

        marker_str = "id name\n1 IIICBR"
        run_str = "id name\n1 TAS2"
        sample_str = "id name\n1 S21"
        variant_str = """id sequence
1 ATTGTCAGACACTCCGTACCATTAGGGTGCTGCAGTCGACTAGTCTATTTTAAGCTTACACGTAGCCGGAATTAGTTCATTACTGGGGTCAATTAATATCATAACAACGATCATTAACTAGAGGGCCCCAGGAATGACCTGGGAGAACTTACCGTTATTCGTGTGGGCTGTATTTATTACAGCGTGGTTACTTGTACTGTCTTTACCAGTACTAGCTGGTGCGATTACCATGCTGCTAACAGATAGGAACTAGAATACTAGTTTCTACGACCCGAACGGAGGAGGAGATCCTCTGCTATACCAGCATCTATTC"""
        filter_indel_str = """id run_id marker_id variant_id sample_id replicate read_count filter_delete
1 1 1 1 1 1 50 0"""

        from sqlalchemy import create_engine

        db_path = os.path.join(self.outdir_path, 'db.sqlite')
        self.engine = create_engine('sqlite:///{}'.format(db_path), echo=True)

        from wopmars.Base import Base
        Session = sqlalchemy.orm.sessionmaker(bind=self.engine)
        self.session = Session()

        Base.metadata.create_all(self.engine)

        from io import StringIO

        run_df = pandas.read_csv(StringIO(run_str), sep=" ")
        run_df.to_sql(name=Run.__tablename__,
                      con=self.engine.connect(),
                      if_exists='append',
                      index=False)

        marker_df = pandas.read_csv(StringIO(marker_str), sep=" ")
        marker_df.to_sql(name=Marker.__tablename__,
                         con=self.engine.connect(),
                         if_exists='append',
                         index=False)

        sample_df = pandas.read_csv(StringIO(sample_str), sep=" ")
        sample_df.to_sql(name=Sample.__tablename__,
                         con=self.engine.connect(),
                         if_exists='append',
                         index=False)

        filter_indel_df = pandas.read_csv(StringIO(filter_indel_str), sep=" ")
        filter_indel_df.to_sql(name=FilterIndel.__tablename__,
                               con=self.engine.connect(),
                               if_exists='append',
                               index=False)

        variant_df = pandas.read_csv(StringIO(variant_str), sep=" ")
        variant_df.to_sql(name=Variant.__tablename__,
                          con=self.engine.connect(),
                          if_exists='append',
                          index=False)

        pathlib.Path(os.path.join(self.outdir_path, "params.yml")).touch()

        sortereadinfo_str = """run	marker	sample	replicate	sortedfasta
TAS2	IIICBR	S21	1	TAS2-R1_S1_L001_R1_001_000.fasta"""

        with open(os.path.join(self.outdir_path, "sortedinfo.tsv"),
                  'w') as fout:
            fout.write(sortereadinfo_str)
Ejemplo n.º 12
0
    def setUp(self):

        # vtam needs to be in the tsv_path
        cmd = '{} -m pip install . -q --upgrade --use-feature=in-tree-build'.format(
            sys.executable)
        if sys.platform.startswith("win"):
            args = cmd
        else:
            args = shlex.split(cmd)
        subprocess.run(args=args, cwd=PathManager.get_project_path())

        self.package_path = os.path.join(PathManager.get_package_path())
        self.test_path = os.path.join(PathManager.get_test_path())
        self.outdir_path = os.path.join(self.test_path, 'outdir')
        self.outdir_data_path = os.path.join(self.outdir_path, 'data')
        # during development of the test, this prevents errors
        shutil.rmtree(self.outdir_path, ignore_errors=True)
        pathlib.Path(self.outdir_data_path).mkdir(parents=True, exist_ok=True)
        os.environ['VTAM_LOG_VERBOSITY'] = str(10)

        ############################################################################################
        #
        # Download sorted fasta test dataset
        #
        ############################################################################################

        sorted_tar_path = os.path.join(self.outdir_data_path, "sorted.tar.gz")
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(sorted_tar_path) or pathlib.Path(
                sorted_tar_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(sorted_tar_path))
                    urllib.request.urlretrieve(sorted_tar_gz_url1,
                                               sorted_tar_path,
                                               reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url2,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url3,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
        tar = tarfile.open(sorted_tar_path, "r:gz")
        tar.extractall(path=self.outdir_path)
        tar.close()

        ############################################################################################
        #
        # Paths
        #
        ############################################################################################

        self.asvtable_path = os.path.join(self.outdir_path,
                                          "asvtable_default.tsv")

        self.args = {}
        self.args['sortedinfo'] = os.path.join(os.path.dirname(__file__),
                                               "sortedinfo.tsv")
        self.args['params'] = os.path.join(os.path.dirname(__file__),
                                           "params_min_replicate_number1.yml")
        self.args['params_lfn_variant'] = os.path.join(
            os.path.dirname(__file__), "params_lfn_variant.yml")
        self.args['params_lfn_variant_replicate'] = os.path.join(
            os.path.dirname(__file__), "params_lfn_variant_replicate.yml")
Ejemplo n.º 13
0
    def setUp(self):
        self.parser = ArgParser.get_main_arg_parser()

        package_path = PathManager.get_package_path()
        test_path = PathManager.get_test_path()
        self.test_path = test_path
        outdir_path = os.path.join(test_path, "outdir")

        self.foopaths = {}
        self.foopaths['filedoesnotexist'] = "filedoesnotexist"
        self.foopaths['dirdoesnotexist'] = "dirdoesnotexist"
        self.foopaths['fileisempty'] = os.path.join(test_path, "test_files",
                                                    "emptyfile")
        self.foopaths['filenottsv'] = __file__
        self.foopaths['fastainfo_tsv'] = os.path.join(
            test_path, "data/example/sortedinfo_mfzr.tsv")
        self.foopaths['sortedinfo_tsv'] = os.path.join(
            package_path, "data/example/sortedinfo_mfzr.tsv")
        self.foopaths['sortedinfo_duplicated_sample_names'] = os.path.join(
            test_path, "test_files",
            "sortedinfo_mfzr_duplicated_sample_names.tsv")
        self.foopaths['params_yml'] = os.path.join(
            package_path, "data/example/params_mfzr.yml")
        self.foopaths['params_wrong_yml'] = os.path.join(
            test_path, "test_params_file/params_wrong.yml")
        self.foopaths['known_occurrences'] = os.path.join(
            package_path, "data/example/known_occurrences.tsv")
        self.foopaths['asvtable_tsv'] = os.path.join(
            test_path, "test_files_dryad.f40v5_small",
            "run1_mfzr_zfzr/asvtable_default.tsv")

        self.foopaths['fastqinfo'] = os.path.join(self.test_path, "test_files",
                                                  "fastqinfo.tsv")
        self.foopaths['fastqinfo_duplicated_sample_names'] = os.path.join(
            self.test_path, "test_files",
            "mergedinfo_duplicated_sample_names.tsv")
        self.foopaths['fastqdir'] = os.path.join(self.test_path, "test_files",
                                                 "fastq")

        self.foopaths['mergedinfo'] = os.path.join(self.test_path,
                                                   "test_files",
                                                   "mergedinfo.tsv")
        self.foopaths['mergedinfo_duplicated_sample_names'] = os.path.join(
            self.test_path, "test_files",
            "mergedinfo_duplicated_sample_names.tsv")
        self.foopaths['mergeddir'] = os.path.join(self.test_path, "test_files",
                                                  "merged")

        self.foopaths['runmarker_tsv'] = os.path.join(package_path,
                                                      "data/example",
                                                      "pool_run_marker.tsv")

        self.foopaths['taxonomy_tsv'] = os.path.join(
            PathManager.get_test_path(), "test_files_dryad.f40v5_small",
            "taxonomy.tsv")

        self.foopaths['foodir'] = package_path
        self.foopaths['sorteddir'] = outdir_path
        self.foopaths['emptydir'] = os.path.join(outdir_path, 'emptydir')
        pathlib.Path(os.path.join(self.foopaths['emptydir'])).mkdir(
            parents=True, exist_ok=True)
        self.foopaths['blastdb'] = os.path.relpath(
            os.path.join(PathManager.get_test_path(), 'test_files', 'blastdb'),
            PathManager.get_package_path())
Ejemplo n.º 14
0
    def setUp(self):

        self.test_path = os.path.join(PathManager.get_test_path())
        self.outdir_path = os.path.join(self.test_path, 'outdir')

        # during development of the test, this prevents errors
        shutil.rmtree(self.outdir_path, ignore_errors=True)
        pathlib.Path(self.outdir_path).mkdir(parents=True, exist_ok=True)

        db_path = os.path.join(self.outdir_path, "db.sqlite")
        filter_codon_stop_path = os.path.join(
            self.test_path,
            "test_files_dryad.f40v5_small/run1_mfzr_zfzr/filter_codon_stop.tsv"
        )
        variant_path = os.path.join(
            self.test_path,
            "test_files_dryad.f40v5_small/run1_mfzr_zfzr/variant_filter_codon_stop.tsv"
        )
        filter_chimera_borderline_path = os.path.join(
            self.test_path,
            "test_files_dryad.f40v5_small/run1_mfzr_zfzr/filter_chimera_borderline_and_filter_codon_stop.tsv"
        )

        self.engine = sqlalchemy.create_engine('sqlite:///{}'.format(db_path),
                                               echo=False)
        run_df = pandas.DataFrame({'name': ['run1']}, index=range(1, 2))
        run_df.to_sql(name=Run.__tablename__,
                      con=self.engine.connect(),
                      index_label='id')

        marker_df = pandas.DataFrame({'name': ['MFZR', 'ZFZR']},
                                     index=range(1, 3))
        marker_df.to_sql(name=Marker.__tablename__,
                         con=self.engine.connect(),
                         index_label='id')

        sample_df = pandas.DataFrame(
            {'name': ['tpos1_run1', 'tnegtag_run1', '14ben01', '14ben02']},
            index=range(1, 5))
        sample_df.to_sql(name=Sample.__tablename__,
                         con=self.engine.connect(),
                         index_label='id')

        variant_df = pandas.read_csv(variant_path,
                                     sep="\t",
                                     header=0,
                                     index_col='id')
        variant_df.to_sql(name=Variant.__tablename__,
                          con=self.engine.connect(),
                          index_label='id')

        filter_chimera_borderline_db = pandas.read_csv(
            filter_chimera_borderline_path, sep="\t", header=0)
        filter_chimera_borderline_db.to_sql(
            name=FilterChimeraBorderline.__tablename__,
            con=self.engine.connect())

        self.filter_codon_stop_df = pandas.read_csv(filter_codon_stop_path,
                                                    sep="\t",
                                                    header=0)
        self.sample_list = ['tpos1_run1', 'tnegtag_run1', '14ben01', '14ben02']
Ejemplo n.º 15
0
 def setUpClass(cls):
     cls.test_path = PathManager.get_test_path()
     cls.outdir_path = os.path.join(cls.test_path, 'outdir')
Ejemplo n.º 16
0
import shutil
import pandas
import sqlalchemy

from vtam.models.FilterCodonStop import FilterCodonStop
from vtam.models.Marker import Marker
from vtam.models.Run import Run
from vtam.models.Sample import Sample
from vtam.models.SampleInformation import SampleInformation
from vtam.models.Variant import Variant
from vtam.utils.PathManager import PathManager
from sqlalchemy.orm import sessionmaker

#%% Set variables

test_path = os.path.join(PathManager.get_test_path())
outdir_path = os.path.join(test_path, 'outdir')
shutil.rmtree(outdir_path, ignore_errors=True)
pathlib.Path(outdir_path).mkdir(parents=True, exist_ok=True)

asv_table_tsv = os.path.join(test_path, "test_files_dryad.f40v5_small",
                             "run1_mfzr_zfzr", "asvtable_default.tsv")
occurrences_keep_tsv = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                    "tools", "known_occurrences_keep.tsv")
sample_type_tsv = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               "tools", "sample_types.tsv")
sample_information_path = os.path.join(test_path,
                                       "test_files_dryad.f40v5_small",
                                       "run1_mfzr_zfzr",
                                       "sample_information.tsv")
filter_codon_stop_path = os.path.join(test_path,
Ejemplo n.º 17
0
    def setUpClass(cls):

        cls.test_path = PathManager.get_test_path() # return the path vtam.test_path__path__[0]/tests
        cls.outdir_path = os.path.join(cls.test_path, 'outdir_gz')
Ejemplo n.º 18
0
    def setUpClass(cls):

        pip_install_vtam_for_tests()  # vtam needs to be in the path

        cls.package_path = PathManager.get_package_path()
        cls.test_path = os.path.join(PathManager.get_test_path())

        cls.outdir_path = os.path.join(cls.test_path, 'outdir')
        cls.outdir_data_path = os.path.join(cls.outdir_path, 'data')
        shutil.rmtree(cls.outdir_path, ignore_errors=True
                      )  # during development of the test, this prevents errors
        pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True)

        ############################################################################################
        #
        # Download fastq test dataset
        #
        ############################################################################################

        fastq_tar_path = os.path.join(cls.outdir_data_path, "fastq.tar.gz")
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(fastq_tar_path) or pathlib.Path(
                fastq_tar_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(fastq_tar_path))
                    urllib.request.urlretrieve(fastq_tar_gz_url1,
                                               fastq_tar_path,
                                               reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(fastq_tar_path))
                        urllib.request.urlretrieve(fastq_tar_gz_url2,
                                                   fastq_tar_path,
                                                   reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(fastq_tar_path))
                        urllib.request.urlretrieve(fastq_tar_gz_url3,
                                                   fastq_tar_path,
                                                   reporthook=tqdm_hook(t))

        tar = tarfile.open(fastq_tar_path, "r:gz")
        tar.extractall(path=cls.outdir_path)
        tar.close()

        # Set test paths
        cls.fastqinfo_path = os.path.join(PathManager.get_package_path(),
                                          "data/example/fastqinfo.tsv")
        cls.fastqdir_path = os.path.join(cls.outdir_path, "fastq")
        cls.fastainfo_path = os.path.join(cls.outdir_path, "fastainfo.tsv")
        cls.fastadir_path = os.path.join(cls.outdir_path, "merged")

        cls.sorted_dir_path = os.path.join(cls.outdir_path, "sorted")
        cls.sortedinfo_path = os.path.join(cls.sorted_dir_path,
                                           "sortedinfo.tsv")

        cls.log_path = os.path.join(cls.outdir_path, "vtam.log")

        cls.asvtable_path = os.path.join(cls.outdir_path,
                                         "asvtable_default.tsv")

        cls.args = {}
        cls.args['fastqinfo'] = cls.fastqinfo_path
        cls.args['fastqdir'] = cls.fastqdir_path
        cls.args['fastainfo'] = cls.fastainfo_path
        cls.args['fastadir'] = cls.fastadir_path
        cls.args['sorted'] = cls.sorted_dir_path
        cls.args['db'] = os.path.join(cls.outdir_path, "db.sqlite")
        cls.args['sortedinfo'] = cls.sortedinfo_path
        cls.args['sorteddir'] = cls.sorted_dir_path
        cls.args['asvtable'] = cls.asvtable_path
        cls.args['log'] = cls.log_path

        ################################################################################################################
        #
        # Command Merge
        #
        ################################################################################################################

        cmd = "vtam merge --fastqinfo {fastqinfo} --fastqdir {fastqdir} --fastainfo {fastainfo} --fastadir {fastadir} " \
              "-v --log {log}".format(**cls.args)

        if sys.platform.startswith("win"):
            args = cmd
        else:
            args = shlex.split(cmd)
        subprocess.run(args=args)
Ejemplo n.º 19
0
    def setUp(self):

        self.test_path = os.path.join(PathManager.get_test_path())
Ejemplo n.º 20
0
    def setUpClass(cls):

        ########################################################################
        #
        # These tests need the vtam command in the path
        #
        ########################################################################

        pip_install_vtam_for_tests()  # vtam needs to be in the path

        cls.package_path = PathManager.get_package_path()
        cls.test_path = PathManager.get_test_path()

        cls.outdir_path = os.path.join(cls.test_path, 'outdir')
        shutil.rmtree(cls.outdir_path, ignore_errors=True)
        cls.outdir_data_path = os.path.join(cls.outdir_path, 'data')
        pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True)

        cls.outdir_download_path = os.path.join(cls.test_path,
                                                'outdir_download')
        pathlib.Path(cls.outdir_download_path).mkdir(parents=True,
                                                     exist_ok=True)

        cls.snakefile_tuto_data = os.path.join(
            cls.package_path, "data/snake.tuto.data_makeknownoccurrences.yml")

        ############################################################################################
        #
        # Set command args
        #
        ############################################################################################

        cls.args = {}
        cls.args['package_path'] = cls.package_path
        cls.args['snake_tuto_data'] = cls.snakefile_tuto_data

        ############################################################################################
        #
        # Download fastq test dataset
        #
        ############################################################################################

        fastq_tar_path = os.path.join(cls.outdir_download_path, "fastq.tar.gz")
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(fastq_tar_path) or pathlib.Path(
                fastq_tar_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(fastq_tar_path))
                    urllib.request.urlretrieve(fastq_tar_gz_url1,
                                               fastq_tar_path,
                                               reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(fastq_tar_path))
                        urllib.request.urlretrieve(fastq_tar_gz_url2,
                                                   fastq_tar_path,
                                                   reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(fastq_tar_path))
                        urllib.request.urlretrieve(fastq_tar_gz_url3,
                                                   fastq_tar_path,
                                                   reporthook=tqdm_hook(t))
        tar = tarfile.open(fastq_tar_path, "r:gz")
        tar.extractall(path=cls.outdir_path)
        tar.close()

        ############################################################################################
        #
        # Copy data to directory tree
        #
        ############################################################################################

        cmd = "snakemake --cores 1 -s {snake_tuto_data} --config MARKER=mfzr " \
              "PROJECT=asper1 PACKAGE_PATH={package_path} --until all_one_marker_makeknownoccurrences".format(**cls.args)

        if sys.platform.startswith("win"):
            args = cmd
        else:
            args = shlex.split(cmd)
        subprocess.run(args=args, check=True, cwd=cls.outdir_path)
Ejemplo n.º 21
0
    def setUp(self):

        self.test_path = os.path.join(PathManager.get_test_path())
        self.outdir_path = os.path.join(self.test_path, 'outdir')
        shutil.rmtree(self.outdir_path, ignore_errors=True)
        pathlib.Path(self.outdir_path).mkdir(parents=True, exist_ok=True)