コード例 #1
0
ファイル: kraken.py プロジェクト: brwnj/sequana
 def to_js(self, output="krona.html", onweb=False):
     if self._data_created == False:
         status = self.kraken_to_krona()
     execute("ktImportText %s -o %s" % (self.output_filename, output))
     if onweb is True:
         import easydev
         easydev.onweb(output)
コード例 #2
0
ファイル: kraken.py プロジェクト: sequana/sequana
 def to_js(self, output="krona.html", onweb=False):
     if self._data_created == False:
         status = self.kraken_to_krona()
     execute("ktImportText %s -o %s" % (self.output_filename, output))
     if onweb is True:
         import easydev
         easydev.onweb(output)
コード例 #3
0
    def _build_kraken(self):
        print('Building the kraken db ')
        self.params['hash_size'] = int(self.params["hash_size"])

        cmd = """kraken-build  --rebuild -db %(dbname)s \
            --minimizer-len %(minimizer_len)s\
            --max-db-size %(max_db_size)s \
            --threads %(threads)s\
            --kmer-len %(kmer_length)s \
            --jellyfish-hash-size %(hash_size)s""" % self.params

        # again, kraken-build prints on stderr so we cannot use easydev.shellcmd
        execute(cmd)
コード例 #4
0
ファイル: busco.py プロジェクト: sequana/sequana
    def download(self, uncompress=True):
        """Download the datasets (tar.gz) and uncompress them

        :param bool uncompress: if True, uncompress the tar.gz and delete it
        """
        url = "http://busco.ezlab.org/v2/datasets"
        for filename in self.filenames:
            basename = filename + ".tar.gz"
            target = self.base + "/" + basename
            print(url + "/" + basename)
            wget(url + "/" + basename, target)
            # TODO untar datasets and cleanup the tar.gz
            if uncompress:
                execute("tar xvfz %s -C %s" % (target, self.base))
                execute("rm -f %s" % (target))
コード例 #5
0
ファイル: busco.py プロジェクト: sequana/sequana
    def download(self, uncompress=True):
        """Download the datasets (tar.gz) and uncompress them

        :param bool uncompress: if True, uncompress the tar.gz and delete it
        """
        url = "http://busco.ezlab.org/v2/datasets"
        for filename in self.filenames:
            basename = filename + ".tar.gz"
            target = self.base + "/" + basename
            print(url + "/" + basename)
            wget(url + "/" + basename, target)
            # TODO untar datasets and cleanup the tar.gz 
            if uncompress:
                execute("tar xvfz %s -C %s" % (target, self.base))
                execute("rm -f %s" % ( target))
コード例 #6
0
    def clean_db(self):
        """Once called, you will not be able to append more FASTA files

        """
        # Now we can clean the kraken db:
        print('Cleaning the kraken db ')
        # Clean the nodes.dmp and names.dmp
        print('Identifying the GI numbers')
        gis = self.get_gis()
        taxons = self.get_taxons_from_gis(gis)
        print("")

        self.gis = gis
        self.taxons = taxons

        # This cleans the nodes.dmp and names.dmp. This must be done
        # before kraken-build --clean since it requires the gi_taxid_nucl.dmp
        # file
        names_file = self.taxon_path + os.sep + "names.dmp"
        nodes_file = self.taxon_path + os.sep + "nodes.dmp"
        names_file_temp = self.taxon_path + os.sep + "names_temp.dmp"
        nodes_file_temp = self.taxon_path + os.sep + "nodes_temp.dmp"

        taxon_file_reader = NCBITaxonReader(names=names_file,
                                            nodes=nodes_file,
                                            verbose=True)
        print("Filtering")
        taxon_file_reader.filter_nodes_dmp_file(nodes_file,
                                                nodes_file_temp,
                                                taxons=taxons)
        taxon_file_reader.filter_names_dmp_file(names_file,
                                                names_file_temp,
                                                taxons=taxons)

        # mv the new files into the old ones
        os.rename(names_file_temp, names_file)
        os.rename(nodes_file_temp, nodes_file)

        # Finally, the kraken cleaning itself
        cmd = "kraken-build --clean --db %s" % self.params['dbname']
        execute(cmd)
コード例 #7
0
    def download_taxonomy(self, force=False):
        """Download kraken data, once for all instead of doing it for each build.


        The downloaded file is large (1.3Gb) and the unzipped file is about 9Gb.

        If already present, do not download the file except if the *force*
        parameter is set to True.

        """
        # valid with kraken 1.1

        urls
        #ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_gb.accession2taxid.gz
        #ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_wgs.accession2taxid.gz
        #ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz

        # If the requested file exists, nothing to do
        expected_filename = self.taxon_path + os.sep + "gi_taxid_nucl.dmp"
        expected_md5 = "8c182ac2df452d836206ad13275cd8af"
        print(
            '\nDownloading taxonomy files. Takes a while depending on your connection'
        )

        if os.path.exists(expected_filename) is False or \
                md5(expected_filename) != expected_md5:
            # download taxonomy
            # We could use kraken-build --download-taxonomy + a subprocess but
            # even simpler to get the file via ftp
            FTP = "ftp.ncbi.nih.gov"
            execute(
                "wget %s/pub/taxonomy/gi_taxid_nucl.dmp.gz --directory-prefix %s"
                % (FTP, self.taxon_path))
            # Unzip the files
            execute('unpigz %s/gi_taxid_nucl.dmp.gz' % self.taxon_path)
        else:
            print("Found local expected file %s " % expected_filename)

        expected_filename = self.taxon_path + os.sep + "names.dmp"
        expected_md5 = "90d88912ad4c94f6ac07dfab0443da9b"
        if os.path.exists(expected_filename) is False or \
                md5(expected_filename) != expected_md5:

            execute(
                "wget %s/pub/taxonomy/taxdump.tar.gz --directory-prefix %s" %
                (FTP, self.taxon_path))

            execute('tar xvfz %s/taxdump.tar.gz -C %s' %
                    (self.taxon_path, self.taxon_path))
        else:
            print("Found local expected file %s " % expected_filename)
コード例 #8
0
def main(args=None):

    user_options = Options(prog="sequana")

    if args is None:
        args = sys.argv

    # If --help or no options provided, show the help
    if len(args) == 1:
        user_options.parse_args(["prog", "--help"])
    else:
       options = user_options.parse_args(args[1:])

    if options.version:
        import sequana
        print(sequana.version)
        sys.exit()

    if options.jobs > 20 and options.bypass is False:
        raise ValueError('The number of jobs is limited to 20. You can ' +
            'force this limit by using --bypass-job-limit')

    if misc.on_cluster("tars-") and options.unlock is False:
        if options.cluster is None:
            raise ValueError("You are on TARS (Institut Pasteur). You " +
                " must use --cluster option to provide the scheduler " +
                " options (typically ' --cluster 'sbatch --qos normal' )")

    # valid codecs:
    valid_extensions = [("fastq." + ext2).rstrip(".")
                        for ext2 in ['', 'bz2', 'gz', 'dsrc']]

    valid_extensions += [("fq." + ext2).rstrip(".")
                        for ext2 in ['', 'bz2', 'gz', 'dsrc']]

    valid_combos = [(x, y) for x in valid_extensions
                           for y in valid_extensions
                           if x!=y]

    if (options.source, options.target) not in valid_combos:
        raise ValueError("""--target and --source combo not valid.
Must be one of fastq, fastq.gz, fastq.bz2 or fastq.dsrc""")

    # Create the config file locally
    module = Module("compressor")

    with TempFile(suffix=".yaml", dir=".") as temp:
        cfg = SequanaConfig(module.config)
        cfg.config.compressor.source = options.source
        cfg.config.compressor.target = options.target
        cfg.config.compressor.recursive = options.recursive
        cfg.config.compressor.verbose = options.verbose
        cfg.config.compressor.threads = options.threads
        cfg._update_yaml()
        cfg.save(filename=temp.name)

        # The Snakefile can stay in its original place:
        rule = module.path + os.sep +  "compressor.rules"

        # Run the snakemake command itself.
        cmd = 'snakemake -s %s  --configfile %s -j %s ' % \
                (rule, temp.name, options.jobs)

        if options.dryrun:
            cmd += " --dryrun "

        if options.verbose is False:
            cmd += " --quiet "
        else:
            cmd += " -p "

        # for slurm only: --cores-per-socket
        if options.cluster:
            cluster = ' --cluster "%s" ' % options.cluster
            cmd += cluster

        if options.snakemake:
            if " -s " in options.snakemake or " -j " in options.snakemake:
                raise ValueError("-s or -j cannot be used in " +
                    " --snakemake-options    (already used internally")
            cmd += options.snakemake

        if options.unlock:
            cmd += " --unlock "

        if options.verbose:
            print(cmd)

        # On travis, snakemake.shell command from snakemake fails.
        # Most probably because travis itself uses a subprocess.
        # excute from easydev uses pexpect.spawn, which seems to work well
        from easydev import execute
        execute(cmd, showcmd=False)
コード例 #9
0
ファイル: test_sequana_gui.py プロジェクト: ranjit58/sequana
def test_only(qtbot):
    from easydev import execute
    execute("sequanix --no-splash --testing")
コード例 #10
0
ファイル: databases.py プロジェクト: naveen584/sequana
 def download_list(self):
     """Download all standard lists of accession numbers from ENA"""
     for key, values in self._metadata.items():
         execute("wget -q -t 3 http://www.ebi.ac.uk/genomes/%s -O %s" %
                 (values[0], values[0]))