Beispiel #1
0
def main(args=None):

    if args is None:
        args = sys.argv[:]

    user_options = Options(prog="sequana")

    # If --help or no options provided, show the help
    if len(args) == 1:
        user_options.parse_args(["prog", "--help"])
    else:
        options = user_options.parse_args(args[1:])

    logger.level = options.level

    if options.update_taxonomy is True:
        from sequana.taxonomy import Taxonomy
        tax = Taxonomy()
        from sequana import sequana_config_path as cfg
        logger.info(
            "Will overwrite the local database taxonomy.dat in {}".format(cfg))
        tax.download_taxonomic_file(overwrite=True)
        sys.exit(0)

    # We put the import here to make the --help faster
    from sequana import KrakenPipeline
    from sequana.kraken import KrakenSequential
    devtools = DevTools()

    if options.download:
        from sequana import KrakenDownload
        kd = KrakenDownload()
        kd.download(options.download)
        sys.exit()

    fastq = []
    if options.file1:
        devtools.check_exists(options.file1)
        fastq.append(options.file1)
    if options.file2:
        devtools.check_exists(options.file2)
        fastq.append(options.file2)

    from sequana import sequana_config_path as scfg
    if options.databases is None:
        logger.critical("You must provide a database")
        sys.exit(1)

    databases = []
    for database in options.databases:
        if database == "toydb":
            database = "kraken_toydb"
        elif database == "minikraken":
            database = "minikraken_20141208"

        if os.path.exists(scfg + os.sep + database):  # in Sequana path
            databases.append(scfg + os.sep + database)
        elif os.path.exists(database):  # local database
            databases.append(database)
        else:
            msg = "Invalid database name (%s). Neither found locally "
            msg += "or in the sequana path %s; Use the --download option"
            raise ValueError(msg % (database, scfg))

    output_directory = options.directory + os.sep + "kraken"
    devtools.mkdirs(output_directory)

    # if there is only one database, use the pipeline else KrakenHierarchical
    _pathto = lambda x: "{}/kraken/{}".format(options.directory, x) if x else x
    if len(databases) == 1:
        logger.info("Using 1 database")
        k = KrakenPipeline(fastq,
                           databases[0],
                           threads=options.thread,
                           output_directory=output_directory,
                           confidence=options.confidence)

        k.run(output_filename_classified=_pathto(options.classified_out),
              output_filename_unclassified=_pathto(options.unclassified_out))
    else:
        logger.info("Using %s databases" % len(databases))
        k = KrakenSequential(fastq,
                             databases,
                             threads=options.thread,
                             output_directory=output_directory + os.sep,
                             force=True,
                             keep_temp_files=options.keep_temp_files,
                             output_filename_unclassified=_pathto(
                                 options.unclassified_out),
                             confidence=options.confidence)
        k.run(output_prefix="kraken")

    # This statements sets the directory where HTML will be saved
    from sequana.utils import config
    config.output_dir = options.directory

    # output_directory first argument: the directory where to find the data
    # output_filename is relative to the config.output_dir defined above
    kk = KrakenModule(output_directory, output_filename="summary.html")

    logger.info("Open ./%s/summary.html" % options.directory)
    logger.info("or ./%s/kraken/kraken.html" % options.directory)

    if options.html is True:
        ss.onweb()
Beispiel #2
0
def main(args=None):

    if args is None:
        args = sys.argv[:]

    user_options = Options(prog="sequana")

    # If --help or no options provided, show the help
    if len(args) == 1:
        user_options.parse_args(["prog", "--help"])
    else:
       options = user_options.parse_args(args[1:])

    logger.level = options.level

    # We put the import here to make the --help faster
    from sequana import KrakenPipeline
    from sequana.kraken import KrakenHierarchical
    devtools = DevTools()

    if options.download:
        from sequana import KrakenDownload
        kd = KrakenDownload()
        kd.download(options.download)
        sys.exit()

    fastq = []
    if options.file1:
        devtools.check_exists(options.file1)
        fastq.append(options.file1)
    if options.file2:
        devtools.check_exists(options.file2)
        fastq.append(options.file2)


    from sequana import sequana_config_path as scfg
    if options.databases is None:
        _log.critical("You must provide a database")
        sys.exit(1)

    databases = []
    for database in options.databases:
        if database == "toydb":
            database = "kraken_toydb"
        elif database == "minikraken":
            database = "minikraken_20141208"

        if os.path.exists(scfg + os.sep + database): # in Sequana path
            databases.append(scfg + os.sep + database)
        elif os.path.exists(database): # local database
            databases.append(database)
        else:
            msg = "Invalid database name (%s). Neither found locally "
            msg += "or in the sequana path %s; Use the --download option"
            raise ValueError(msg % (database, scfg))

    output_directory = options.directory + os.sep + "kraken"
    devtools.mkdirs(output_directory)

    # if there is only one database, use the pipeline else KrakenHierarchical
    if len(databases) == 1:
        _log.info("Using 1 database")
        k = KrakenPipeline(fastq, databases[0], threads=options.thread,
            output_directory=output_directory)

        _pathto = lambda x: "{}/kraken/{}".format(options.directory, x) if x else x
        k.run(output_filename_classified=_pathto(options.classified_out),
              output_filename_unclassified=_pathto(options.unclassified_out))
    else:
        _log.info("Using %s databases" % len(databases))
        k = KrakenHierarchical(fastq, databases, threads=options.thread,
            output_directory=output_directory+os.sep, force=True,
            keep_temp_files=options.keep_temp_files)
        k.run(output_prefix="kraken")

    # This statements sets the directory where HTML will be saved
    from sequana.utils import config
    config.output_dir = options.directory

    # output_directory first argument: the directory where to find the data
    # output_filename is relative to the config.output_dir defined above
    kk = KrakenModule(output_directory, output_filename="summary.html")

    _log.info("Open ./%s/summary.html" % options.directory)
    _log.info("or ./%s/kraken/kraken.html" % options.directory)

    if options.html is True:
        ss.onweb()
Beispiel #3
0
class KrakenAnalysis(object):
    """Run kraken on a set of FastQ files

    In order to run a Kraken analysis, we firtst need a local database.
    We provide a Toy example. The ToyDB is downloadable as follows ( you will
    need to run the following code only once)::

        from sequana import KrakenDownload
        kd = KrakenDownload()
        kd.download_kraken_toydb()

    .. seealso:: :class:`KrakenDownload`  for more database and
        :class:`sequana.kraken_builder.KrakenBuilder` to build your own
        databases

    The path to the database is required to run the analysis. It has been
    stored in the directory ./config/sequana/kraken_toydb under Linux platforms
    The following code should be platform independent::

        import os
        from sequana import sequana_config_path
        database = sequana_config_path + os.sep + "kraken_toydb")

    Finally, we can run the analysis on the toy data set::

        from sequana import sequana_data
        data = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz", "data")
        ka = KrakenAnalysis(data, database=database)
        ka.run()

    This creates a file named *kraken.out*. It can be interpreted with
    :class:`KrakenResults`
    """
    def __init__(self, fastq, database, threads=4):
        """.. rubric:: Constructor

        :param fastq: either a fastq filename or a list of 2 fastq filenames
        :param database: the path to a valid Kraken database
        :param threads: number of threads to be used by Kraken
        :param output: output filename of the Krona HTML page

        :param return:

        """
        self._devtools = DevTools()
        self._devtools.check_exists(database)

        self.database = database
        self.threads = threads

        # Fastq input
        if isinstance(fastq, str):
            self.paired = False
            self.fastq = [fastq]
        elif isinstance(fastq, list):
            if len(fastq) == 2:
                self.paired = True
            else:
                self.paired = False
            self.fastq = fastq
        else:
            raise ValueError(
                "Expected a fastq filename or list of 2 fastq filenames")

        for this in self.fastq:
            self._devtools.check_exists(database)

    def run(self,
            output_filename=None,
            output_filename_classified=None,
            output_filename_unclassified=None,
            only_classified_output=False):
        """Performs the kraken analysis

        :param str output_filename: if not provided, a temporary file is used
            and stored in :attr:`kraken_output`.
        :param str output_filename_classified: not compressed
        :param str output_filename_unclassified: not compressed

        """
        if output_filename is None:
            self.kraken_output = TempFile().name
        else:
            self.kraken_output = output_filename

        params = {
            "database": self.database,
            "thread": self.threads,
            "file1": self.fastq[0],
            "kraken_output": self.kraken_output,
            "output_filename_unclassified": output_filename_unclassified,
            "output_filename_classified": output_filename_classified,
        }

        if self.paired:
            params["file2"] = self.fastq[1]

        command = "kraken -db %(database)s %(file1)s "

        if self.paired:
            command += " %(file2)s --paired"
        command += " --threads %(thread)s --out %(kraken_output)s"

        if output_filename_unclassified:
            command += " --unclassified-out %(output_filename_unclassified)s "

        if only_classified_output is True:
            command += " --only-classified-output"

        if output_filename_classified:
            command += " --classified-out %(output_filename_classified)s "

        command = command % params
        # Somehow there is an error using easydev.execute with pigz
        from snakemake import shell
        shell(command)
Beispiel #4
0
class KrakenAnalysis(object):
    """Run kraken on a set of FastQ files

    In order to run a Kraken analysis, we firtst need a local database.
    We provide a Toy example. The ToyDB is downloadable as follows ( you will
    need to run the following code only once)::

        from sequana import KrakenDownload
        kd = KrakenDownload()
        kd.download_kraken_toydb()

    .. seealso:: :class:`KrakenDownload`  for more database and
        :class:`sequana.kraken_builder.KrakenBuilder` to build your own
        databases

    The path to the database is required to run the analysis. It has been
    stored in the directory ./config/sequana/kraken_toydb under Linux platforms
    The following code should be platform independent::

        import os
        from sequana import sequana_config_path
        database = sequana_config_path + os.sep + "kraken_toydb")

    Finally, we can run the analysis on the toy data set::

        from sequana import sequana_data
        data = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz", "data")
        ka = KrakenAnalysis(data, database=database)
        ka.run()

    This creates a file named *kraken.out*. It can be interpreted with
    :class:`KrakenResults`
    """
    def __init__(self, fastq, database, threads=4 ):
        """.. rubric:: Constructor

        :param fastq: either a fastq filename or a list of 2 fastq filenames
        :param database: the path to a valid Kraken database
        :param threads: number of threads to be used by Kraken
        :param output: output filename of the Krona HTML page

        :param return:

        """
        self._devtools = DevTools()
        self._devtools.check_exists(database)

        self.database = database
        self.threads = threads

        # Fastq input
        if isinstance(fastq, str):
            self.paired = False
            self.fastq = [fastq]
        elif isinstance(fastq, list):
            if len(fastq) == 2:
                self.paired = True
            else:
                self.paired = False
            self.fastq = fastq
        else:
            raise ValueError("Expected a fastq filename or list of 2 fastq filenames")

        for this in self.fastq:
            self._devtools.check_exists(database)

    def run(self, output_filename=None, output_filename_classified=None,
            output_filename_unclassified=None, only_classified_output=False):
        """Performs the kraken analysis

        :param str output_filename: if not provided, a temporary file is used
            and stored in :attr:`kraken_output`.
        :param str output_filename_classified: not compressed
        :param str output_filename_unclassified: not compressed

        """
        if output_filename is None:
            self.kraken_output = TempFile().name
        else:
            self.kraken_output = output_filename

        params = {
            "database": self.database,
            "thread": self.threads,
            "file1": self.fastq[0],
            "kraken_output": self.kraken_output,
            "output_filename_unclassified": output_filename_unclassified,
            "output_filename_classified": output_filename_classified,
            }

        if self.paired:
            params["file2"] = self.fastq[1]

        command = "kraken -db %(database)s %(file1)s "

        if self.paired:
            command += " %(file2)s --paired"
        command += " --threads %(thread)s --output %(kraken_output)s "
        command += " --out-fmt legacy"

        if output_filename_unclassified:
            command +=  " --unclassified-out %(output_filename_unclassified)s "

        if only_classified_output is True:
            command += " --only-classified-output"

        if output_filename_classified:
            command +=  " --classified-out %(output_filename_classified)s "

        command = command % params
        # Somehow there is an error using easydev.execute with pigz
        from snakemake import shell
        shell(command)