コード例 #1
0
ファイル: converter.py プロジェクト: zhaotao1987/bioconvert
def main(args=None):
    registry = Registry()

    if args is None:
        args = sys.argv[1:]

    if not len(sys.argv) == 1:

        # check that the first argument is not a converter in the registry
        if args[0].lower() not in list(registry.get_converters_names()) \
                and "." in args[0]:

            in_ext = utils.get_extension(args[0], remove_compression=True)
            out_ext = utils.get_extension(args[1], remove_compression=True)

            # Check that the input file exists
            # Fixes https://github.com/bioconvert/bioconvert/issues/204
            if os.path.exists(args[0]) is False:
                _log.error("Input file {} does not exist".format(args[0]))
                sys.exit(1)

            # assign to converter the converter (s) found for the ext_pair = (in_ext, out_ext)
            try:
                converter = registry.get_ext((in_ext, out_ext))
                # for testing the mutiple converter for one extension pair
                # converter = [bioconvert.fastq2fasta.Fastq2Fasta, bioconvert.phylip2xmfa.PHYLIP2XMFA]
            except KeyError:
                converter = []

            # if no converter is found
            if not converter:
                _log.error(
                    '\n Bioconvert does not support conversion {} -> {}. \n'
                    'Please specify the converter'
                    '\n Usage : \n\n'
                    '\t bioconvert converter input_file output_file \n '
                    '\n To see all the converter : '
                    '\n \t bioconvert --help '.format(in_ext, out_ext))

                sys.exit(1)
            # if the ext_pair matches a single converter
            elif len(converter) == 1:
                args.insert(0, converter[0].__name__.lower())
            # if the ext_pair matches multiple converters
            else:

                _log.error("Ambiguous extension.\n"
                           "You must specify the right conversion  Please "
                           "choose a conversion from: \n\n"
                           "{}".format("\n".join(
                               [c.__name__.lower() for c in converter])))
                sys.exit(1)

    # Set the default level
    bioconvert.logger.level = "ERROR"

    # Changing the log level before argparse is run
    try:
        bioconvert.logger.level = args[args.index("-l") + 1]
    except:
        pass
    try:
        bioconvert.logger.level = args[args.index("--level") + 1]
    except:
        pass

    try:
        bioconvert.logger.level = args[args.index("-v") + 1]
    except:
        pass
    try:
        bioconvert.logger.level = args[args.index("--verbosity") + 1]
    except:
        pass

    allow_indirect_conversion = False
    try:
        args.index("--allow-indirect-conversion")
        allow_indirect_conversion = True
    except:
        pass
    try:
        args.index("-a")
        allow_indirect_conversion = True
    except:
        pass

    arg_parser = argparse.ArgumentParser(
        prog="bioconvert",
        description="""Convertor infer the
                                         formats from the first command. We do
                                         not scan the input file. Therefore
                                         users must ensure that their input
                                         format files are properly
                                         formatted.""",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Bioconvert contains tens of converters whose list is available as follows:

    bioconvert --help

Each conversion has its own sub-command and dedicated help. For instance:

    bioconvert fastq2fasta --help

Because the subcommand contains the format, extensions are not important
for the conversion itself. This would convert the test.txt file (fastq
format) into a fasta file:

    bioconvert fastq2fasta test.txt test.fasta

Users must ensure that their input format files are properly formatted.

If there is a conversion from A to B and another for B to C, you can also
perform indirect conversion using -a argument (experimental). This command
shows all possible indirect conversions:

    bioconvert --help -a

Please visit http://bioconvert.readthedocs.org for more information about the
project or formats available.

Bioconvert is an open source collaborative project. Please feel free to 
join us at https://github/biokit/bioconvert
""")

    subparsers = arg_parser.add_subparsers(
        help='sub-command help',
        dest='converter',
    )
    max_converter_width = 2 + max(
        [len(in_fmt) for in_fmt, _, _, _ in registry.iter_converters()])

    # show all possible conversion
    for in_fmt, out_fmt, converter, path in \
            sorted(registry.iter_converters(allow_indirect_conversion)):

        sub_parser_name = "{}2{}".format(in_fmt.lower(), out_fmt.lower())

        if converter:
            link_char = '-'
            if len(converter.available_methods
                   ) < 1 and converter._library_to_install is None:
                help_details = " (no available methods please see the doc" \
                               " for install the necessary libraries) "
            elif len(converter.available_methods
                     ) < 1 and converter._library_to_install is not None:
                help_details = " (no available methods please install {} \n" \
                               "see the doc for more details) ".format(converter._library_to_install)
            else:
                help_details = " (%i methods)" % len(
                    converter.available_methods)
        else:  #if path:
            link_char = '~'
            if len(path) == 3:
                help_details = " (w/ 1 intermediate)"
            else:
                help_details = " (w/ %i intermediates)" % (len(path) - 2)

        help_text = '{}to{}> {}{}'.format(
            (in_fmt + ' ').ljust(max_converter_width, link_char),
            link_char,
            out_fmt,
            help_details,
        )
        sub_parser = subparsers.add_parser(
            sub_parser_name,
            help=help_text,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
            # aliases=["{}_to_{}".format(in_fmt.lower(), out_fmt.lower()), ],
            epilog="""Bioconvert is an open source collaborative project. 
Please feel free to join us at https://github/biokit/bioconvert
""",
        )

        if converter:
            converter.add_argument_to_parser(sub_parser=sub_parser)
        elif path:
            for a in ConvBase.get_common_arguments():
                a.add_to_sub_parser(sub_parser)

    arg_parser.add_argument(
        "-v",
        "--verbosity",
        default=bioconvert.logger.level,
        help="Set the outpout verbosity.",
        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
    )

    arg_parser.add_argument(
        "--dependency-report",
        action="store_true",
        default=False,
        help="Output all bioconvert dependencies in json and exit")

    arg_parser.add_argument("-a",
                            "--allow-indirect-conversion",
                            action="store_true",
                            help="Show all possible indirect conversions "
                            "(labelled as intermediate) (EXPERIMENTAL)")

    arg_parser.add_argument("--version",
                            action="store_true",
                            default=False,
                            help="Show version")

    arg_parser.add_argument(
        "--conversion-graph",
        nargs="?",
        default=None,
        choices=[
            "cytoscape",
            "cytoscape-all",
        ],
    )

    try:
        args = arg_parser.parse_args(args)
    except SystemExit as e:
        # parsing ask to stop, maybe a normal exit
        if e.code == 0:
            raise e
        # Parsing failed, trying to guess converter
        from bioconvert.core.levenshtein import wf_levenshtein as lev
        sub_command = None
        args_i = 0
        while sub_command is None and args_i < len(args):
            if args[args_i][0] != '-' and (
                    args_i == 0 or args[args_i - 1] != '-v'
                    and args[args_i - 1] != '--verbose'
                    and args[args_i - 1] != '--conversion-graph'):
                sub_command = args[args_i]
            args_i += 1

        if sub_command is None:
            # No sub_command found, so letting the initial exception be risen
            raise e

        conversions = []
        for in_fmt, out_fmt, converter, path in registry.iter_converters(
                allow_indirect_conversion):
            conversion_name = "{}2{}".format(in_fmt.lower(), out_fmt.lower())
            conversions.append((lev(conversion_name,
                                    sub_command), conversion_name))
        matches = sorted(conversions)[:5]
        if matches[0][0] == 0:
            # sub_command was ok, problem comes from elswhere
            raise e
        arg_parser.exit(
            e.code, '\n\nYour converter {}() was not found. \n'
            'Here is a list of possible matches: {} ... '
            '\nYou may also add the -a argument to enfore a '
            'transitive conversion. The whole list is available using\n\n'
            '    bioconvert --help -a \n'.format(
                sub_command, ', '.join([v for _, v in matches])))

    if args.version:
        print("{}".format(bioconvert.version))
        sys.exit(0)

    if args.dependency_report:
        print(
            json.dumps(
                get_known_dependencies_with_availability(as_dict=True),
                sort_keys=True,
                indent=4,
            ))
        sys.exit(0)

    if args.conversion_graph:
        if args.conversion_graph.startswith("cytoscape"):
            all_converter = args.conversion_graph == "cytoscape-all"
            print(
                json.dumps(
                    graph.create_graph_for_cytoscape(
                        all_converter=all_converter),
                    indent=4,
                ))
        sys.exit(0)

    if args.converter is None:
        msg = 'No converter specified. You can list converter by doing bioconvert --help'
        arg_parser.error(msg)

    if not (getattr(args, "show_methods", False) or args.input_file):
        arg_parser.error('Either specify an input_file (<INPUT_FILE>) or '
                         'ask for available methods (--show-method)')

    if not args.allow_indirect_conversion and \
        ConvMeta.split_converter_to_format(args.converter) not in registry:

        arg_parser.error('The conversion {} is not available directly, '
                         'you have to accept that we chain converter to do'
                         ' so (--allow-indirect-conversion or -a)'.format(
                             args.converter))

    args.raise_exception = args.raise_exception or args.verbosity == "DEBUG"

    # Set the logging level
    bioconvert.logger.level = args.verbosity

    # Figure out whether we have several input files or not
    # Are we in batch mode ?
    import glob
    if args.batch:
        filenames = glob.glob(args.input_file)
    else:
        filenames = [args.input_file]

    for filename in filenames:
        args.input_file = filename
        try:
            analysis(args)
        except Exception as e:
            if args.raise_exception:
                raise e
            else:
                bioconvert.logger.error(e)
            sys.exit(1)
コード例 #2
0
def main(args=None):

    # used later on
    registry = Registry()

    if args is None:
        args = sys.argv[1:]

    # convenient variable to check implicit/explicit mode and
    # get information about the arguments.
    ph = ParserHelper(args)

    if not len(sys.argv) == 1:

        if ph.mode == "implicit":

            # Check that the input file exists
            # Fixes https://github.com/bioconvert/bioconvert/issues/204
            if os.path.exists(args[0]) is False:
                _log.error("First input file {} does not exist".format(
                    args[0]))
                sys.exit(1)

            # list of filenames from which we get the extensions
            filenames = ph.get_filelist()
            exts = [
                utils.get_extension(x, remove_compression=True)
                for x in filenames
            ]

            # We need to get the corresponding converter if any.

            # We assume that the input formats are ordered alphabetically
            # (bioconvert API).
            # For instance fasta,qual to fastq can be
            # found but qual,fasta to fastq cannot. Indeed, in more complex
            # cases such as a,b -> c,d we cannot know whether there are 1 or 3
            # inputs. This would require extra code here below
            try:
                L = len(exts)
                converter = []
                # if input is a,b,c,d we want to try a->(b,c,d) and
                # (a,b)->(c,d) and (a,b,c)-> c so L-1 case
                for i in range(1, L):
                    in_ext = tuple(exts[0:i])
                    out_ext = tuple(exts[i:])
                    try:
                        converter.extend(registry.get_ext((in_ext, out_ext)))
                    except KeyError:
                        pass
            except KeyError:
                converter = []

            # For 1-to-1, if the extensions are identical but different
            # compression, this means we just want to decompress and
            # re-compress in another format.
            if not converter and (exts[0] == exts[1]):
                exts_with_comp = [
                    utils.get_extension(x, remove_compression=False)
                    for x in filenames
                ]
                in_ext, out_ext = exts_with_comp[0], exts_with_comp[1]
                comps = ['gz', 'dsrc', 'bz2']
                if in_ext in comps and out_ext in comps:
                    converter.extend(
                        registry.get_ext(((in_ext, ), (out_ext, ))))

            # if no converter is found, print information
            if not converter:
                msg = '\nBioconvert does not support conversion {} -> {}. \n\n'
                msg = msg.format(in_ext, out_ext)

                # maybe it is an indirect conversion ? let us look at the
                # digraph
                try:
                    _path = registry._path_dict_ext[in_ext][out_ext]
                    #Here, we have a transitive list of tuples to go from A to C
                    # example from fq to clustal returns:
                    # [('fq',), ('fa',), ('clustal',)]
                    # If we naively build the converter from those names
                    # (fq2clustal), this is a non official converter name. The
                    # official one is fastq2clustal, so we need some hack here:
                    in_name, int_name, out_name = _path
                    a = registry._ext_registry[
                        in_name, int_name][0].__name__.split("2")[0]
                    b = registry._ext_registry[
                        int_name, out_name][0].__name__.split("2")[1]

                    convname = "2".join([a, b]).lower()

                    msg += "\n".join(
                        textwrap.wrap(
                            "Note, however, that an indirect conversion through"
                            " an intermediate format is possible for your input and "
                            " output format. To do so, you need to use the -a option "
                            " and be explicit about the type of conversion. To get "
                            " the list of possible direct and indirect conversion, "
                            " please use:\n\n"))
                    msg += "\n\n    bioconvert --help -a\n\n"
                    msg += "For help and with your input/output most probably"
                    msg += "the command should be: \n\n    bioconvert {} {} -a\n\n ".format(
                        convname, " ".join(ph.get_filelist()))
                except KeyError:
                    pass  # not converter found in the path
                error(msg)

            # if the ext_pair matches a single converter
            elif len(converter) == 1:
                args.insert(0, converter[0].__name__.lower())
            # if the ext_pair matches multiple converters
            else:
                _log.error("Ambiguous extension.\n"
                           "You must specify the right conversion  Please "
                           "choose a conversion from: \n\n"
                           "{}".format("\n".join(
                               [c.__name__.lower() for c in converter])))
                sys.exit(1)

    # Set the default level
    bioconvert.logger.level = "ERROR"

    # Changing the log level before argparse is run
    try:
        bioconvert.logger.level = args[args.index("-l") + 1]
    except:
        pass
    try:
        bioconvert.logger.level = args[args.index("--level") + 1]
    except:
        pass
    try:
        bioconvert.logger.level = args[args.index("-v") + 1]
    except:
        pass
    try:
        bioconvert.logger.level = args[args.index("--verbosity") + 1]
    except:
        pass

    # if there is the ability to convert from A to B to C, we must set
    # the option -a (--allow_indirect_conversion)
    allow_indirect_conversion = False

    try:
        args.index("--allow-indirect-conversion")
        allow_indirect_conversion = True
    except:
        pass

    try:
        args.index("-a")
        allow_indirect_conversion = True
    except:
        pass

    # Now, the instanciation of the main bioconvert user interface
    arg_parser = argparse.ArgumentParser(
        prog="bioconvert",
        description="",
        #""Convertor infer the
        #formats from the first command. We do
        #not scan the input file. Therefore
        #users must ensure that their input
        #format files are properly
        #formatted.""",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Bioconvert contains tens of converters whose list is available as follows:

    bioconvert --help

Each conversion has its own sub-command and dedicated help. For instance:

    bioconvert fastq2fasta --help

Because the subcommand contains the format, extensions are not important
for the conversion itself. This would convert the test.txt file (fastq
format) into a fasta file:

    bioconvert fastq2fasta test.txt test.fasta

If you use known extensions, the converter may be omitted::

    bioconvert test.fastq test.fasta

Users must ensure that their input format files are properly formatted.

If there is a conversion from A to B and another for B to C, you can also
perform indirect conversion using -a argument (experimental). This command
shows all possible indirect conversions:

    bioconvert --help -a

Please visit http://bioconvert.readthedocs.org for more information about the
project or formats available. Would you wish to help, please join our open 
source collaborative project at https://github/bioconvert/bioconvert
""")

    subparsers = arg_parser.add_subparsers(
        help='sub-command help',
        dest='converter',
    )

    max_converter_width = 2 + max(
        [len(in_fmt) for in_fmt, _, _, _ in registry.iter_converters()])

    def sorting_tuple_string(item):
        if type(item) is tuple:
            return item[0][0]
        if type(item) is str:
            return item[0]

    # show all possible conversion including indirect conversion
    for in_fmt, out_fmt, converter, path in \
            sorted(registry.iter_converters(allow_indirect_conversion), key=sorting_tuple_string):
        in_fmt = ConvBase.lower_tuple(in_fmt)
        in_fmt = ["_".join(in_fmt)]

        out_fmt = ConvBase.lower_tuple(out_fmt)
        out_fmt = ["_".join(out_fmt)]

        sub_parser_name = "{}2{}".format("_".join(in_fmt), "_".join(out_fmt))

        if converter:
            link_char = '-'
            if len(converter.available_methods) < 1:
                help_details = " (no available methods please see the doc" \
                               " for install the necessary libraries) "
            else:
                help_details = " (%i methods)" % len(
                    converter.available_methods)
        else:  #if path:
            link_char = '~'
            if len(path) == 3:
                help_details = " (w/ 1 intermediate)"
            else:
                help_details = " (w/ %i intermediates)" % (len(path) - 2)

        help_text = '{}to{}> {}{}'.format(
            ("_".join(in_fmt) + ' ').ljust(max_converter_width, link_char),
            link_char,
            ("_".join(out_fmt)),
            help_details,
        )
        sub_parser = subparsers.add_parser(
            sub_parser_name,
            help=help_text,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
            # aliases=["{}_to_{}".format(in_fmt.lower(), out_fmt.lower()), ],
            epilog="""Bioconvert is an open source collaborative project. 
Please feel free to join us at https://github/biokit/bioconvert
""",
        )
        if converter:
            converter.add_argument_to_parser(sub_parser=sub_parser)
        elif path:
            for a in ConvBase.get_IO_arguments():
                a.add_to_sub_parser(sub_parser)
            for a in ConvBase.get_common_arguments():
                a.add_to_sub_parser(sub_parser)

    # arguments when no explicit conversion provided.

    arg_parser.add_argument(
        "-v",
        "--verbosity",
        default=bioconvert.logger.level,
        help="Set the outpout verbosity.",
        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
    )

    arg_parser.add_argument(
        "-l",
        "--level",
        default=bioconvert.logger.level,
        help="Set the outpout verbosity. Same as --verbosity",
        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
    )

    arg_parser.add_argument(
        "--dependency-report",
        action="store_true",
        default=False,
        help="Output all bioconvert dependencies in json and exit")

    arg_parser.add_argument("-a",
                            "--allow-indirect-conversion",
                            action="store_true",
                            help="Show all possible indirect conversions "
                            "(labelled as intermediate)")

    arg_parser.add_argument("--version",
                            action="store_true",
                            default=False,
                            help="Show version")

    arg_parser.add_argument(
        "--conversion-graph",
        nargs="?",
        default=None,
        choices=[
            "cytoscape",
            "cytoscape-all",
        ],
    )

    try:
        args = arg_parser.parse_args(args)
    except SystemExit as e:
        # parsing ask to stop, maybe a normal exit
        if e.code == 0:
            raise e

        # Parsing failed, trying to guess converter
        from bioconvert.core.levenshtein import wf_levenshtein as lev

        sub_command = None
        args_i = 0
        while sub_command is None and args_i < len(args):
            if args[args_i][0] != '-' and (
                    args_i == 0 or args[args_i - 1] != '-v'
                    and args[args_i - 1] != '--verbose'
                    and args[args_i - 1] != '--conversion-graph'):
                sub_command = args[args_i]
            args_i += 1

        if sub_command is None:
            # No sub_command found, so letting the initial exception be risen
            raise e

        conversions = []
        for in_fmt, out_fmt, converter, path in registry.iter_converters(
                allow_indirect_conversion):
            in_fmt = ConvBase.lower_tuple(in_fmt)
            in_fmt = ["_".join(in_fmt)]
            out_fmt = ConvBase.lower_tuple(out_fmt)
            out_fmt = ["_".join(out_fmt)]
            conversion_name = "{}2{}".format("_".join(in_fmt),
                                             "_".join(out_fmt))
            conversions.append((lev(conversion_name,
                                    sub_command), conversion_name))
        matches = sorted(conversions)[:5]
        if matches[0][0] == 0:
            # sub_command was ok, problem comes from elswhere
            raise e
        arg_parser.exit(
            e.code, '\n\nYour converter {}() was not found. \n'
            'Here is a list of possible matches: {} ... '
            '\nYou may also add the -a argument to enfore a '
            'transitive conversion. The whole list is available using\n\n'
            '    bioconvert --help -a \n'.format(
                sub_command, ', '.join([v for _, v in matches])))

    if args.version:
        print("{}".format(bioconvert.version))
        sys.exit(0)

    if args.dependency_report:
        print(
            json.dumps(
                get_known_dependencies_with_availability(as_dict=True),
                sort_keys=True,
                indent=4,
            ))
        sys.exit(0)

    if args.conversion_graph:
        if args.conversion_graph.startswith("cytoscape"):
            all_converter = args.conversion_graph == "cytoscape-all"
            print(
                json.dumps(
                    graph.create_graph_for_cytoscape(
                        all_converter=all_converter),
                    indent=4,
                ))
        sys.exit(0)

    if args.converter is None:
        msg = "No converter specified. "
        msg += "You can list all converters by using:\n\n\tbioconvert --help"
        arg_parser.error(msg)

    if not (getattr(args, "show_methods", False) or args.input_file):
        arg_parser.error('Either specify an input_file (<INPUT_FILE>) or '
                         'ask for available methods (--show-method)')

    if not args.allow_indirect_conversion and \
        ConvMeta.split_converter_to_format(args.converter) not in registry:

        arg_parser.error('The conversion {} is not available directly, '
                         'you have to accept that we chain converter to do'
                         ' so (--allow-indirect-conversion or -a)'.format(
                             args.converter))

    args.raise_exception = args.raise_exception or args.verbosity == "DEBUG"

    # Set the logging level
    bioconvert.logger.level = args.verbosity

    # Figure out whether we have several input files or not
    # Are we in batch mode ?
    if args.batch:
        filenames = glob.glob(args.input_file)
    else:
        filenames = [args.input_file]

    N = len(filenames)
    for i, filename in enumerate(filenames):
        if N > 1:
            _log.info("Converting {} ({}/{})".format(filename, i + 1, N))
        args.input_file = filename
        try:
            analysis(args)
        except Exception as e:
            if args.raise_exception:
                raise e
            else:
                bioconvert.logger.error(e)
            sys.exit(1)