Beispiel #1
0
    def __init__(self, infile, outfile, reference=None, *args, **kargs):
        """.. rubric:: constructor

        :param str infile: input BAM file
        :param str outfile: output CRAM filename
        :param str reference: reference file in :term:`FASTA` format

        """
        super(BAM2CRAM, self).__init__(infile, outfile, *args, **kargs)

        self._default_method = "samtools"

        self.reference = reference
        if self.reference is None:
            logger.debug("No reference provided. Infering from input file")
            # try to find the local file replacing .sam by .fa
            reference = infile.replace(".cram", ".fa")
            if os.path.exists(reference):
                logger.debug(
                    "Reference found from inference ({})".format(reference))
            else:
                logger.debug("No reference found.")
                msg = "Please enter the reference corresponding "
                msg += "to the input BAM file:"
                reference = input(msg)
                if os.path.exists(reference) is False:
                    raise IOError("Reference required")
                else:
                    logger.debug("Reference exist ({}).".format(reference))

            self.reference = reference
        self.threads = cpu_count()
Beispiel #2
0
    def __init__(self, infile, outfile, reference=None, *args, **kargs):
        """.. rubric:: constructor

        :param str infile: input SAM file
        :param str outfile: output filename
        :param str reference: reference file in :term:`FASTA` format

        command used::

            samtools view -SCh

        .. note:: the API related to the third argument may change in the future.
        """
        super(SAM2CRAM, self).__init__(infile, outfile, *args, **kargs)
        self.reference = reference
        if self.reference is None:
            logger.debug("No reference provided. Infering from input file")
            # try to find the local file replacing .sam by .fa
            reference = infile.replace(".sam", ".fa")
            if os.path.exists(reference):
                logger.debug("Reference found from inference ({})".format(reference))
            else:
                logger.debug("No reference found.")
                msg = "Please enter the reference corresponding "
                msg += "to the input SAM file:"
                reference = raw_input(msg)
                if os.path.exists(reference) is False:
                    raise IOError("Reference required")
                else:
                    logger.debug("Reference exist ({}).".format(reference))

            self.reference = reference
        self.threads = cpu_count()
Beispiel #3
0
    def __init__(self, infile, outfile):
        """.. rubric:: constructor

        :param str infile: The path of the input file.
        :param str outfile: The path of The output file
        """
        if os.path.exists(infile) is False:
            msg = "Incorrect input file: %s" % infile
            _log.error(msg)
            raise ValueError(msg)

        self.infile = infile
        self.outfile = outfile
        self.threads = cpu_count()
        self._execute_mode = "subprocess"  # set to shell to call shell() method
Beispiel #4
0
    def __init__(self, infile, outfile, reference=None, *args, **kargs):
        """.. rubric:: constructor

        :param str infile: input FASTQ file
        :param str outfile: output filename
        :param str reference: reference file in :term:`FASTA` format

        command used::

            samtools view -@ <thread> -Sh -T <reference> in.cram > out.sam

        .. note:: the API related to the third argument may change in the future.
        """
        super(CRAM2FASTQ, self).__init__(infile, outfile, *args, **kargs)
        self.threads = cpu_count()
Beispiel #5
0
    def __init__(self, infile, outfile):
        """.. rubric:: constructor

        :param str infile: The path of the input file.
        :param str outfile: The path of The output file
        """
        # do not check the existence of the input file because it could be just a prefix
        # if os.path.exists(infile) is False:
        #     msg = "Incorrect input file: %s" % infile
        #     _log.error(msg)
        #     raise ValueError(msg)

        if not outfile:
            outfile = generate_outfile_name(infile, self.output_ext[0])

        self.infile = infile
        self.outfile = outfile
        self.threads = cpu_count()
        self._execute_mode = "shell"  #"subprocess"  # set to shell to call shell() method
        self.logger = logger
Beispiel #6
0
class ConvBase(metaclass=ConvMeta):
    """Base class for all converters.

    To build a new converter, create a new class which inherits from
    :class:`ConvBase` and implement method that performs the conversion.
    The name of the converter method must start with ``_method_``.

    For instance: ::

        class Fastq2Fasta(ConvBase):

            def _method_python(self, *args, **kwargs):
                # include your code here. You can use the infile and outfile
                # attributes.
                self.infile
                self.outfile

    """
    # specify the extensions of the input file, can be a sequence (must be
    # overridden in subclasses)
    input_ext = None

    # specify the extensions of the output file, can be a sequence (must be
    # overridden in subclasses)
    output_ext = None

    # list available methods
    available_methods = []

    # default method should be provided
    _default_method = None
    _library_to_install = None
    _is_compressor = False

    # threads to be used by default if argument is required in a method
    threads = cpu_count()

    def __init__(self, infile, outfile):
        """.. rubric:: constructor

        :param str infile: the path of the input file.
        :param str outfile: the path of The output file
        """
        if not outfile:
            outfile = generate_outfile_name(infile, self.output_ext[0])

        self.infile = infile
        self.outfile = outfile

        # execute mode can be shell or subprocess.
        self._execute_mode = "shell"

        # The logger to be set to INFO, DEBUG, WARNING, ERROR, CRITICAL
        self.logger = logger

    def __call__(self, *args, method_name=None, **kwargs):
        """

        :param str method: the method to be found in :attr:`available_methods`
        :param *args: positional arguments
        :param *kwargs: keyword arguments

        """
        # If method provided, use it
        if "method" in kwargs:
            method_name = kwargs["method"]
            del kwargs["method"]

        # If not, but there is one argument, presumably this is
        # the method
        method_name = method_name or self.default

        # If not, we need to check the name
        # "dummy" is a method used to evaluate the cost of the
        # execute() method for the benchmark
        if method_name not in self.available_methods + ['dummy']:
            msg = "Methods available are {}".format(self.available_methods)
            _log.error(msg)
            raise ValueError(msg)

        _log.info("{}> Executing {} method ".format(self.name, method_name))
        # reference to the method requested
        method_reference = getattr(self, "_method_{}".format(method_name))

        # call the method itself

        t1 = time.time()
        method_reference(*args, **kwargs)
        t2 = time.time()
        _log.info("Took {} seconds ".format(t2 - t1))

    #FIXME property not use
    @property
    def name(self):
        """
        The name of the class
        """
        return type(self).__name__

    def _method_dummy(self, *args, **kwargs):
        # The execute commands has a large initialisation cost (about a second)
        # This commands does not and can be used to evaluate that cost
        self.execute("")

    def shell(self, cmd):
        from bioconvert.core.shell import shell
        _log.info("CMD: {}".format(cmd))
        shell(cmd)

    def execute(self, cmd, ignore_errors=False, verbose=False, shell=False):

        if shell is True or self._execute_mode == "shell":
            self.shell(cmd)
            return
        _log.info("CMD: {}".format(cmd))
        self._execute(cmd, ignore_errors, verbose)

    def _execute(self, cmd, ignore_errors=False, verbose=False):
        """
        Execute a command in a sub-shell

        :param str cmd: the command to execute
        :param ignore_errors: If True the result is returned whatever the
                              return value of the sub-shell.
                              Otherwise a Runtime error is raised when the sub-shell
                              return a non zero value
        :param verbose: If true displays errors on standard error
        :return: the result of the command
        :rtype: a :class:`StringIO` instance
        """
        try:
            process_ = Popen(cmd,
                             shell=True,
                             stdout=PIPE,
                             stderr=PIPE,
                             stdin=None)
        except Exception as err:
            msg = "Failed to execute Command: '{}'. error: '{}'".format(cmd, err)
            raise RuntimeError(msg)

        inputs = [process_.stdout, process_.stderr]
        output = StringIO()
        errors = StringIO()
        while process_.poll() is None:
            # select has 3 parameters, 3 lists, the sockets, the fileobject to watch
            # in reading, writing, the errors
            # in addition a timeout option (the call is blocking while a fileObject
            # is not ready to be processed)
            # by return we get 3 lists with the fileObject to be processed
            # in reading, writing, errors.
            readable, writable, exceptional = select.select(inputs, [], [], 1)

            while readable and inputs:
                for flow in readable:
                    data = flow.read()
                    if not data:
                        # the flow ready in reading which has no data
                        # is a closed flow
                        # thus we must stop to watch it
                        inputs.remove(flow)
                    if flow is process_.stdout:
                        output.write(data.decode("utf-8"))
                    elif flow is process_.stderr:
                        errors.write(data.decode("utf-8"))
                        print(process_.stderr)
                readable, writable, exceptional = select.select(inputs, [], [], 1)

        errors = errors.getvalue().strip()
        if verbose:
            if errors:
                print(errors, file=sys.stderr)

        if process_.returncode != 0:
            if not ignore_errors:
                raise RuntimeError(errors)
        else:
            return output

    def boxplot_benchmark(self, N=5, rerun=True, include_dummy=False,
                          to_exclude=[], to_include=[], rot_xticks=90, 
                          boxplot_args={}):
        """Simple wrapper to call :class:`Benchmark` and plot the results

        see :class:`~bioconvert.core.benchmark.Benchmark` for details.

        """
        self._benchmark = Benchmark(self, N=N, to_exclude=to_exclude,
                                    to_include=to_include)
        self._benchmark.include_dummy = include_dummy
        data = self._benchmark.plot(rerun=rerun, rot_xticks=rot_xticks,
                                    boxplot_args=boxplot_args)
        return data

    def _get_default_method(self):
        if self._default_method is None:
            return self.available_methods[0]
        elif self._default_method not in self.available_methods:
            return self.available_methods[0]
        else:
            return self._default_method
    default = property(_get_default_method)

    def install_tool(self, executable):
        """Install the given tool, using the script:
        bioconvert/install_script/install_executable.sh
        if the executable is not already present

        :param executable: executable to install
        :return: nothing

        """
        # imported but not unused (when we don't have bioconvert_path)
        # import bioconvert
        from bioconvert import bioconvert_data

        if shutil.which(executable) is None:
            logger.info("Installing tool : " + executable)
            # Assigned but never used, says flake8
            # bioconvert_path = bioconvert.__path__[0]
            script = bioconvert_data(
                'install_' + executable + '.sh', where="../misc")
            subprocess.call(['sh', script])

    @classmethod
    def add_argument_to_parser(cls, sub_parser):
        sub_parser.description = cls.get_description()
        for arg in itertools.chain(cls.get_common_arguments_for_converter(), cls.get_additional_arguments()):
            arg.add_to_sub_parser(sub_parser)

    @classmethod
    def get_description(cls):
        return "Allow to convert file in '%s' to '%s' format." % ConvMeta.split_converter_to_format(cls.__name__)

    @classmethod
    def get_additional_arguments(cls):
        return []

    @staticmethod
    def get_common_arguments():
        yield ConvArg(
            names="input_file",
            nargs="?",
            default=None,
            type=ConvArg.file,
            help="The path to the file to convert.",
        )
        yield ConvArg(
            names="output_file",
            nargs="?",
            default=None,
            type=ConvArg.file,
            output_argument=True,
            help="The path where the result will be stored.",
        )
        yield ConvArg(
            names=["-f", "--force", ],
            action="store_true",
            help="if outfile exists, it is overwritten with this option",
        )
        yield ConvArg(
            names=["-v", "--verbosity", ],
            default=bioconvert.logger.level,
            help="Set the outpout verbosity.",
            choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
        )
        yield ConvArg(
            names=["--raise-exception", ],
            action="store_true",
            help="Let exception ending the execution be raised and displayed",
        )
        yield ConvArg(
            names=["-m", "--batch", ],
            default=False,
            action="store_true",
            help="Allow conversion of a set of files using wildcards. You "
                 "must use quotes to escape the wildcards. For instance: "
                 "--batch 'test*fastq' ")
        yield ConvArg(
            names=["-b", "--benchmark", ],
            default=False,
            action="store_true",
            help="Running all available methods",
        )
        yield ConvArg(
            names=["-N", "--benchmark-N", ],
            default=5,
            type=int,
            help="Number of trials for each methods",
        )
        yield ConvArg(
            names=["-a", "--allow-indirect-conversion", ],
            default=False,
            action="store_true",
            help="Allow to chain converter when direct conversion is absent",
        )

    @classmethod
    def get_common_arguments_for_converter(cls):
        for a in ConvBase.get_common_arguments():
            yield a
        try:
            # Some converter does not have any method and work in __call__, so preventing to crash by searching for them
            yield ConvArg(
                names=["-c", "--method", ],
                nargs="?",
                default=cls._get_default_method(cls),
                help="The method to use to do the conversion.",
                choices=cls.available_methods,
            )
        except Exception as e:
            _log.warning("converter '{}' does not seems to have methods: {}".format(cls.__name__, e))
            pass
        yield ConvArg(
            names=["-s", "--show-methods", ],
            default=False,
            action="store_true",
            help="A converter may have several methods",
        )