Example #1
0
    def download_precomputed_taxonomy(self):
        """
        Copy the online TSV taxonomy DB
        to the pathname output
        """
        Logger.instance().debug(
            "file: {}; line: {}; Downloading taxonomy tsv".format(
                __file__,
                inspect.currentframe().f_lineno,
            ))

        ############################################################################################
        #
        # Download sorted reads dataset
        #
        ############################################################################################

        taxonomy_tsv_gz_path = '{}.gz'.format(self.taxonomy_tsv_path)
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(self.taxonomy_tsv_path) or pathlib.Path(
                self.taxonomy_tsv_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(taxonomy_tsv_gz_url1, taxonomy_tsv_gz_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(taxonomy_tsv_gz_url1))
                    urllib.request.urlretrieve(taxonomy_tsv_gz_url1,
                                               taxonomy_tsv_gz_path,
                                               reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(taxonomy_tsv_gz_url2, taxonomy_tsv_gz_path,
                    #                            MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(
                            os.path.basename(taxonomy_tsv_gz_url2))
                        urllib.request.urlretrieve(taxonomy_tsv_gz_url2,
                                                   taxonomy_tsv_gz_path,
                                                   reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(taxonomy_tsv_gz_url3, taxonomy_tsv_gz_path,
                    #                            MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(
                            os.path.basename(taxonomy_tsv_gz_url1))
                        urllib.request.urlretrieve(taxonomy_tsv_gz_url3,
                                                   taxonomy_tsv_gz_path,
                                                   reporthook=tqdm_hook(t))
            with gzip.open(taxonomy_tsv_gz_path, 'rb') as fin:
                with open(self.taxonomy_tsv_path, 'wb') as fout:
                    shutil.copyfileobj(fin, fout)
            try:
                pathlib.Path(taxonomy_tsv_gz_path).unlink()
            except FileNotFoundError:
                pass
Example #2
0
    def setUpClass(cls):

        cmd = '{} -m pip install . -q --upgrade --use-feature=in-tree-build'.format(
            sys.executable)
        if sys.platform.startswith("win"):
            args = cmd
        else:
            args = shlex.split(cmd)
        subprocess.run(args=args, cwd=PathManager.get_project_path())

        cls.package_path = os.path.join(PathManager.get_package_path())
        cls.test_path = os.path.join(PathManager.get_test_path())
        cls.outdir_path = os.path.join(cls.test_path, 'outdir')
        cls.outdir_data_path = os.path.join(cls.outdir_path, 'data')
        shutil.rmtree(cls.outdir_path, ignore_errors=True)
        pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True)

        ############################################################################################
        #
        # Download sorted reads dataset
        #
        ############################################################################################

        sorted_tar_path = os.path.join(cls.outdir_data_path, "sorted.tar.gz")
        pathlib.Path(os.path.dirname(sorted_tar_path)).mkdir(parents=True,
                                                             exist_ok=True)
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(sorted_tar_path) or pathlib.Path(
                sorted_tar_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(sorted_tar_path))
                    urllib.request.urlretrieve(sorted_tar_gz_url1,
                                               sorted_tar_path,
                                               reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url2,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url3,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
        tar = tarfile.open(sorted_tar_path, "r:gz")
        tar.extractall(path=cls.outdir_data_path)
        tar.close()
Example #3
0
    def setUpClass(cls):

        ########################################################################
        #
        # These tests need the vtam command in the path
        #
        ########################################################################

        pip_install_vtam_for_tests()

        cls.package_path = os.path.join(PathManager.get_package_path())
        cls.test_path = os.path.join(PathManager.get_test_path())
        cls.outdir_path = os.path.join(cls.test_path, 'outdir')
        cls.outdir_data_path = os.path.join(cls.outdir_path, 'data')
        shutil.rmtree(cls.outdir_path, ignore_errors=True)
        pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True)

        ############################################################################################
        #
        # Download sorted reads dataset (Updated Oct 10, 2020)
        #
        ############################################################################################

        sorted_tar_path = os.path.join(cls.outdir_data_path, "sorted.tar.gz")
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(sorted_tar_path) or pathlib.Path(
                sorted_tar_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(sorted_tar_path))
                    urllib.request.urlretrieve(sorted_tar_gz_url1,
                                               sorted_tar_path,
                                               reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url2,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url3,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
        tar = tarfile.open(sorted_tar_path, "r:gz")
        tar.extractall(path=cls.outdir_data_path)
        tar.close()
Example #4
0
 def __download_ncbi_taxonomy_dump(self):
     # Download files
     remotefile = "ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz"
     new_taxdump_path = os.path.join(self.tempdir,
                                     os.path.basename(remotefile))
     Logger.instance().debug(
         "file: {}; line: {}; Downloading NCBI taxonomy dump".format(
             __file__,
             inspect.currentframe().f_lineno))
     if not os.path.isfile(new_taxdump_path):
         Logger.instance().info("Downloading NCBI taxonomy dump")
         # urllib.request.urlretrieve(remotefile, new_taxdump_path, MyProgressBar())
         with tqdm(...) as t:
             t.set_description(os.path.basename(new_taxdump_path))
             urllib.request.urlretrieve(remotefile,
                                        new_taxdump_path,
                                        reporthook=tqdm_hook(t))
     return new_taxdump_path
Example #5
0
    def setUpClass(cls):

        ########################################################################
        #
        # These tests need the vtam command in the path
        #
        ########################################################################

        pip_install_vtam_for_tests()  # vtam needs to be in the path

        cls.package_path = PathManager.get_package_path()
        cls.test_path = PathManager.get_test_path()

        cls.outdir_path = os.path.join(cls.test_path, 'outdir')
        shutil.rmtree(cls.outdir_path, ignore_errors=True)
        cls.outdir_data_path = os.path.join(cls.outdir_path, 'data')
        pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True)

        cls.outdir_download_path = os.path.join(cls.test_path,
                                                'outdir_download')
        pathlib.Path(cls.outdir_download_path).mkdir(parents=True,
                                                     exist_ok=True)

        cls.snakefile_tuto_data = os.path.join(
            cls.package_path, "data/snake.tuto.data_makeknownoccurrences.yml")

        ############################################################################################
        #
        # Set command args
        #
        ############################################################################################

        cls.args = {}
        cls.args['package_path'] = cls.package_path
        cls.args['snake_tuto_data'] = cls.snakefile_tuto_data

        ############################################################################################
        #
        # Download fastq test dataset
        #
        ############################################################################################

        fastq_tar_path = os.path.join(cls.outdir_download_path, "fastq.tar.gz")
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(fastq_tar_path) or pathlib.Path(
                fastq_tar_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(fastq_tar_path))
                    urllib.request.urlretrieve(fastq_tar_gz_url1,
                                               fastq_tar_path,
                                               reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(fastq_tar_path))
                        urllib.request.urlretrieve(fastq_tar_gz_url2,
                                                   fastq_tar_path,
                                                   reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(fastq_tar_path))
                        urllib.request.urlretrieve(fastq_tar_gz_url3,
                                                   fastq_tar_path,
                                                   reporthook=tqdm_hook(t))
        tar = tarfile.open(fastq_tar_path, "r:gz")
        tar.extractall(path=cls.outdir_path)
        tar.close()

        ############################################################################################
        #
        # Copy data to directory tree
        #
        ############################################################################################

        cmd = "snakemake --cores 1 -s {snake_tuto_data} --config MARKER=mfzr " \
              "PROJECT=asper1 PACKAGE_PATH={package_path} --until all_one_marker_makeknownoccurrences".format(**cls.args)

        if sys.platform.startswith("win"):
            args = cmd
        else:
            args = shlex.split(cmd)
        subprocess.run(args=args, check=True, cwd=cls.outdir_path)
Example #6
0
    def setUp(self):

        # vtam needs to be in the tsv_path
        cmd = '{} -m pip install . -q --upgrade --use-feature=in-tree-build'.format(
            sys.executable)
        if sys.platform.startswith("win"):
            args = cmd
        else:
            args = shlex.split(cmd)
        subprocess.run(args=args, cwd=PathManager.get_project_path())

        self.package_path = os.path.join(PathManager.get_package_path())
        self.test_path = os.path.join(PathManager.get_test_path())
        self.outdir_path = os.path.join(self.test_path, 'outdir')
        self.outdir_data_path = os.path.join(self.outdir_path, 'data')
        # during development of the test, this prevents errors
        shutil.rmtree(self.outdir_path, ignore_errors=True)
        pathlib.Path(self.outdir_data_path).mkdir(parents=True, exist_ok=True)
        os.environ['VTAM_LOG_VERBOSITY'] = str(10)

        ############################################################################################
        #
        # Download sorted fasta test dataset
        #
        ############################################################################################

        sorted_tar_path = os.path.join(self.outdir_data_path, "sorted.tar.gz")
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(sorted_tar_path) or pathlib.Path(
                sorted_tar_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(sorted_tar_gz_url1, sorted_tar_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(sorted_tar_path))
                    urllib.request.urlretrieve(sorted_tar_gz_url1,
                                               sorted_tar_path,
                                               reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(sorted_tar_gz_url2, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url2,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(sorted_tar_gz_url3, sorted_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(sorted_tar_path))
                        urllib.request.urlretrieve(sorted_tar_gz_url3,
                                                   sorted_tar_path,
                                                   reporthook=tqdm_hook(t))
        tar = tarfile.open(sorted_tar_path, "r:gz")
        tar.extractall(path=self.outdir_path)
        tar.close()

        ############################################################################################
        #
        # Paths
        #
        ############################################################################################

        self.asvtable_path = os.path.join(self.outdir_path,
                                          "asvtable_default.tsv")

        self.args = {}
        self.args['sortedinfo'] = os.path.join(os.path.dirname(__file__),
                                               "sortedinfo.tsv")
        self.args['params'] = os.path.join(os.path.dirname(__file__),
                                           "params_min_replicate_number1.yml")
        self.args['params_lfn_variant'] = os.path.join(
            os.path.dirname(__file__), "params_lfn_variant.yml")
        self.args['params_lfn_variant_replicate'] = os.path.join(
            os.path.dirname(__file__), "params_lfn_variant_replicate.yml")
    def setUpClass(cls):

        pip_install_vtam_for_tests()  # vtam needs to be in the path

        cls.package_path = PathManager.get_package_path()
        cls.test_path = os.path.join(PathManager.get_test_path())

        cls.outdir_path = os.path.join(cls.test_path, 'outdir')
        cls.outdir_data_path = os.path.join(cls.outdir_path, 'data')
        shutil.rmtree(cls.outdir_path, ignore_errors=True
                      )  # during development of the test, this prevents errors
        pathlib.Path(cls.outdir_data_path).mkdir(parents=True, exist_ok=True)

        ############################################################################################
        #
        # Download fastq test dataset
        #
        ############################################################################################

        fastq_tar_path = os.path.join(cls.outdir_data_path, "fastq.tar.gz")
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(fastq_tar_path) or pathlib.Path(
                fastq_tar_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(fastq_tar_path))
                    urllib.request.urlretrieve(fastq_tar_gz_url1,
                                               fastq_tar_path,
                                               reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(fastq_tar_path))
                        urllib.request.urlretrieve(fastq_tar_gz_url2,
                                                   fastq_tar_path,
                                                   reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(fastq_tar_path))
                        urllib.request.urlretrieve(fastq_tar_gz_url3,
                                                   fastq_tar_path,
                                                   reporthook=tqdm_hook(t))

        tar = tarfile.open(fastq_tar_path, "r:gz")
        tar.extractall(path=cls.outdir_path)
        tar.close()

        # Set test paths
        cls.fastqinfo_path = os.path.join(PathManager.get_package_path(),
                                          "data/example/fastqinfo.tsv")
        cls.fastqdir_path = os.path.join(cls.outdir_path, "fastq")
        cls.fastainfo_path = os.path.join(cls.outdir_path, "fastainfo.tsv")
        cls.fastadir_path = os.path.join(cls.outdir_path, "merged")

        cls.sorted_dir_path = os.path.join(cls.outdir_path, "sorted")
        cls.sortedinfo_path = os.path.join(cls.sorted_dir_path,
                                           "sortedinfo.tsv")

        cls.log_path = os.path.join(cls.outdir_path, "vtam.log")

        cls.asvtable_path = os.path.join(cls.outdir_path,
                                         "asvtable_default.tsv")

        cls.args = {}
        cls.args['fastqinfo'] = cls.fastqinfo_path
        cls.args['fastqdir'] = cls.fastqdir_path
        cls.args['fastainfo'] = cls.fastainfo_path
        cls.args['fastadir'] = cls.fastadir_path
        cls.args['sorted'] = cls.sorted_dir_path
        cls.args['db'] = os.path.join(cls.outdir_path, "db.sqlite")
        cls.args['sortedinfo'] = cls.sortedinfo_path
        cls.args['sorteddir'] = cls.sorted_dir_path
        cls.args['asvtable'] = cls.asvtable_path
        cls.args['log'] = cls.log_path

        ################################################################################################################
        #
        # Command Merge
        #
        ################################################################################################################

        cmd = "vtam merge --fastqinfo {fastqinfo} --fastqdir {fastqdir} --fastainfo {fastainfo} --fastadir {fastadir} " \
              "-v --log {log}".format(**cls.args)

        if sys.platform.startswith("win"):
            args = cmd
        else:
            args = shlex.split(cmd)
        subprocess.run(args=args)
Example #8
0
    def main(outdir):

        package_path = PathManager.get_package_path()
        pathlib.Path(outdir).mkdir(parents=True, exist_ok=True)

        #######################################################################
        #
        # Download fastq
        #
        #######################################################################

        fastq_tar_path = os.path.join(outdir, "fastq.tar.gz")
        # Test first in local dir, otherwise in the remote URLs
        if not os.path.isfile(fastq_tar_path) or pathlib.Path(fastq_tar_path).stat().st_size < 1000000:
            try:
                # urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, MyProgressBar())
                with tqdm(...) as t:
                    t.set_description(os.path.basename(fastq_tar_path))
                    urllib.request.urlretrieve(fastq_tar_gz_url1, fastq_tar_path, reporthook=tqdm_hook(t))
            except Exception:
                try:
                    # urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(fastq_tar_path))
                        urllib.request.urlretrieve(fastq_tar_gz_url2, fastq_tar_path, reporthook=tqdm_hook(t))
                except Exception:
                    # urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, MyProgressBar())
                    with tqdm(...) as t:
                        t.set_description(os.path.basename(fastq_tar_path))
                        urllib.request.urlretrieve(fastq_tar_gz_url3, fastq_tar_path, reporthook=tqdm_hook(t))
        tar = tarfile.open(fastq_tar_path, "r:gz")
        tar.extractall(path=outdir)
        tar.close()

        os.remove(fastq_tar_path)

        #######################################################################
        #
        # Set command args
        #
        #######################################################################

        args = {}
        args['package_path'] = package_path
        args['snake_tuto_data'] = os.path.join(package_path, "data/snake.tuto.data.yml")

        #######################################################################
        #
        # Copy data to directory tree
        #
        #######################################################################

        cmd = "snakemake --cores 1 -s {snake_tuto_data} --config MARKER=mfzr " \
              "PROJECT=asper1 PACKAGE_PATH={package_path} --until all_one_marker".format(**args)

        if sys.platform.startswith("win"):
            args = cmd
        else:
            args = shlex.split(cmd)
        subprocess.run(args=args, check=True, cwd=outdir)