def analysis(args): in_fmt, out_fmt = ConvMeta.split_converter_to_extensions(args.command) # do we want to know the available methods ? If so, print info and quit if getattr(args, "show_methods", False): class_converter = Registry()[(in_fmt, out_fmt)] print("Methods available: {}".format( class_converter.available_methods)) print("\nPlease see http://bioconvert.readthedocs.io/en/master/" "references.html#{} for details ".format( str(class_converter).split("'")[1])) if args.raise_exception: return sys.exit(0) # Input and output filename infile = args.input_file if args.output_file is None and infile: outext = ConvMeta.split_converter_to_extensions(args.command) outfile = infile.rsplit(".", 1)[0] + "." + outext[1].lower() else: outfile = args.output_file # Call a generic wrapper of all available conversion conv = Bioconvert( infile, outfile, in_fmt=in_fmt, out_fmt=out_fmt, force=args.force, ) # # Users may provide information about the input file. # # Indeed, the input may be a FastQ file but with an extension # # that is not standard. For instance fq instead of fastq # # If so, we can use the --input-format fastq to overwrite the # # provided filename extension # no need to do this # if args.input_format: # inext = args.input_format # if not conv.inext.startswith("."): # conv.inext = "." + inext if not conv.in_fmt: raise RuntimeError("convert infer the format from the extension name." " So add extension to the input file name or use" " --input-format option.") if not conv.out_fmt: raise RuntimeError("convert infer the format from the extension name." " So add extension to the output file name or use" " --output-format option.") bioconvert.logger.info("Converting from %s to %s" % (conv.in_fmt, conv.out_fmt)) # params = {"threads": args.threads} if args.benchmark: conv.boxplot_benchmark(N=args.benchmark_N) import pylab try: pylab.savefig("benchmark_{}.png".format(conv.name)) except: pylab.savefig("benchmark_{}.png".format(conv.converter.name)) else: # params["method"] = args.method conv(**vars(args))
def main(args=None): if args is None: args = sys.argv[1:] # Set the default level bioconvert.logger.level = "ERROR" # Changing the log level before argparse is run try: bioconvert.logger.level = args[args.index("-l") + 1] except: pass try: bioconvert.logger.level = args[args.index("--level") + 1] except: pass try: bioconvert.logger.level = args[args.index("-v") + 1] except: pass try: bioconvert.logger.level = args[args.index("--verbosity") + 1] except: pass allow_indirect_conversion = False try: args.index("--allow-indirect-conversion") allow_indirect_conversion = True except: pass try: args.index("-a") allow_indirect_conversion = True except: pass arg_parser = argparse.ArgumentParser( prog="bioconvert", description="""Convertor infer the formats from the first command. We do not scan the input file. Therefore users must ensure that their input format files are properly formatted.""", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Bioconvert contains tens of converters whose list is available as follows: bioconvert --help Each conversion has its own sub-command and dedicated help. For instance: bioconvert fastq2fasta --help Because the subcommand contains the format, extensions are not important for the conversion itself. This would convert the test.txt file (fastq format) into a fasta file: bioconvert fastq2fasta test.txt test.fasta Users must ensure that their input format files are properly formatted. If there is a conversion from A to B and another for B to C, you can also perform indirect conversion using -a argument (experimental). This command shows all possible indirect conversions: bioconvert --help -a Please visit http://bioconvert.readthedocs.org for more information about the project or formats available. Bioconvert is an open source collaborative project. Please feel free to join us at https://github/biokit/bioconvert """) registry = Registry() subparsers = arg_parser.add_subparsers( help='sub-command help', dest='command', ) max_converter_width = 2 + max( [len(in_fmt) for in_fmt, _, _, _ in registry.iter_converters()]) # show all possible conversion for in_fmt, out_fmt, converter, path in \ sorted(registry.iter_converters(allow_indirect_conversion)): sub_parser_name = "{}2{}".format(in_fmt.lower(), out_fmt.lower()) if converter: link_char = '-' if len(converter.available_methods) <= 1: help_details = "" else: help_details = " (%i methods)" % len( converter.available_methods) else: #if path: link_char = '~' if len(path) == 3: help_details = " (w/ 1 intermediate)" else: help_details = " (w/ %i intermediates)" % (len(path) - 2) help_text = '%sto%s> %s%s' % ( (in_fmt + ' ').ljust(max_converter_width, link_char), link_char, out_fmt, help_details, ) sub_parser = subparsers.add_parser( sub_parser_name, help=help_text, formatter_class=argparse.ArgumentDefaultsHelpFormatter, # aliases=["{}_to_{}".format(in_fmt.lower(), out_fmt.lower()), ], epilog="""Bioconvert is an open source collaborative project. Please feel free to join us at https://github/biokit/bioconvert """, ) if converter: converter.add_argument_to_parser(sub_parser=sub_parser) elif path: for a in ConvBase.get_common_arguments(): a.add_to_sub_parser(sub_parser) arg_parser.add_argument( "-v", "--verbosity", default=bioconvert.logger.level, help="Set the outpout verbosity.", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], ) arg_parser.add_argument( "--dependency-report", action="store_true", default=False, help="Output all bioconvert dependencies in json and exit") arg_parser.add_argument("-a", "--allow-indirect-conversion", action="store_true", help="Show all possible indirect conversions " "(labelled as intermediate) (EXPERIMENTAL)") arg_parser.add_argument("--version", action="store_true", default=False, help="Show version") arg_parser.add_argument( "--conversion-graph", nargs="?", default=None, choices=[ "cytoscape", "cytoscape-all", ], ) try: args = arg_parser.parse_args(args) except SystemExit as e: # parsing ask to stop, maybe a normal exit if e.code == 0: raise e # Parsing failed, trying to guess command from bioconvert.core.levenshtein import wf_levenshtein as lev sub_command = None args_i = 0 while sub_command is None and args_i < len(args): if args[args_i][0] != '-' and ( args_i == 0 or args[args_i - 1] != '-v' and args[args_i - 1] != '--verbose' and args[args_i - 1] != '--conversion-graph'): sub_command = args[args_i] args_i += 1 if sub_command is None: # No sub_command found, so letting the initial exception be risen raise e conversions = [] for in_fmt, out_fmt, converter, path in registry.iter_converters( allow_indirect_conversion): conversion_name = "{}2{}".format(in_fmt.lower(), out_fmt.lower()) conversions.append((lev(conversion_name, sub_command), conversion_name)) matches = sorted(conversions)[:5] if matches[0][0] == 0: # sub_command was ok, problem comes from elswhere raise e arg_parser.exit( e.code, '\n\nYour converter {}() was not found. \n' 'Here is a list of possible matches: {} ... ' '\nYou may also add the -a argument to enfore a ' 'transitive conversion. The whole list is available using\n\n' ' bioconvert --help -a \n'.format( sub_command, ', '.join([v for _, v in matches]))) if args.version: print("{}".format(bioconvert.version)) sys.exit(0) if args.dependency_report: print( json.dumps( get_known_dependencies_with_availability(as_dict=True), sort_keys=True, indent=4, )) sys.exit(0) if args.conversion_graph: if args.conversion_graph.startswith("cytoscape"): all_converter = args.conversion_graph == "cytoscape-all" print( json.dumps( graph.create_graph_for_cytoscape( all_converter=all_converter), indent=4, )) sys.exit(0) if args.command is None: msg = 'No converter specified. You can list converter by doing bioconvert --help' arg_parser.error(msg) if not (getattr(args, "show_methods", False) or args.input_file): arg_parser.error('Either specify an input_file (<INPUT_FILE>) or ' 'ask for available methods (--show-method)') if not args.allow_indirect_conversion and \ ConvMeta.split_converter_to_extensions(args.command) not in registry: arg_parser.error('The conversion %s is not available directly, ' 'you have to accept that we chain converter to do' ' so (--allow-indirect-conversion or -a)' % args.command) args.raise_exception = args.raise_exception or args.verbosity == "DEBUG" # Set the logging level bioconvert.logger.level = args.verbosity # Figure out whether we have several input files or not # Are we in batch mode ? import glob if args.batch: filenames = glob.glob(args.input_file) else: filenames = [args.input_file] for filename in filenames: args.input_file = filename try: analysis(args) except Exception as e: if args.raise_exception: raise e else: bioconvert.logger.error(e) sys.exit(1)
def __init__(self, infile, outfile, force=False, threads=None, extra=None): """.. rubric:: constructor :param str infile: The path of the input file. :param str outfile: The path of The output file :param bool force: overwrite output file if it exists already otherwise raises an error """ # don't check the input file because there are cases where input parameter is just a prefix # if os.path.exists(infile) is False: # msg = "Incorrect input file: %s" % infile # _log.error(msg) # raise ValueError(msg) # check existence of output file. If it exists, # fails except if force argument is set to True if type(outfile) is str: outfile = [outfile] if type(infile) is str: infile = [infile] # some checking on the output files (existence, special case of dsrc) for filename in outfile: if os.path.exists(filename) is True: msg = "output file {} exists already.".format(filename) if force is False: _log.critical( "output file exists. If you are using bioconvert, use --force " ) raise ValueError(msg) else: _log.warning(msg + " --force used so will be over written") # Only fastq files can be compressed with dsrc if filename.endswith(".dsrc"): # only valid for FastQ files extension # dsrc accepts only .fastq file extension if filename.endswith(".fastq.dsrc") is False: msg = "When compressing with .dsrc extension, " +\ "only files ending with .fastq extension are " +\ "accepted. This is due to the way dsrc executable "+\ "is implemented." _log.critical(msg) raise IOError Lin = len(infile) Lout = len(outfile) self.inext = [] self.outext = [] # populate the inext for filename in infile: # example: fastq.gz to fasta.bz2 # Here, we want to decompress, convert, compress. # so we need the extension without .gz or .bz2 # We should have inext set to fastq and outext # set to fasta.bz2 self.inext.append(getext(filename, remove_compression=True)) # populate the outext for filename in outfile: self.outext.append(getext(filename, remove_compression=True)) # special case one to one for compression/decompression # Case 2, fastq.gz to fastq.bz2 # data is not changed, just the type of compression, so we want # to keep the original extensions, here inext and outext will contain # .gz and .bz2 # if 1 to 1 and same extension, we overwrite self.inext and self.outext if Lin == Lout == 1: if self.inext == self.outext: _log.info("decompression/compression mode") self.inext = [getext(infile[0])] self.outext = [getext(outfile[0])] self.mapper = Registry() # From the input parameters 1 and 2, we get the module name if not list( set(list(self.mapper.get_converters_names())).intersection( sys.argv)): # get format from extensions in_fmt = [get_format_from_extension(x) for x in self.inext] out_fmt = [get_format_from_extension(x) for x in self.outext] else: in_fmt, out_fmt = ConvMeta.split_converter_to_format( list( set(list(self.mapper.get_converters_names())).intersection( sys.argv))[0]) self.in_fmt = in_fmt self.out_fmt = out_fmt self.in_fmt = [format.lower() for format in in_fmt] self.in_fmt = tuple(in_fmt) self.out_fmt = [format.lower() for format in out_fmt] self.out_fmt = tuple(out_fmt) _log.info("Input: {}".format(self.in_fmt)) _log.info("Output: {}".format(self.out_fmt)) try: class_converter = self.mapper[(self.in_fmt, self.out_fmt)] self.name = class_converter.__name__ except KeyError: # This module name was not found # Try to find path of converters conv_path = self.mapper.conversion_path(self.in_fmt, self.out_fmt) _log.debug("path: {}".format(conv_path)) if conv_path: _log.info("Direct conversion not implemented. " "Chaining converters.") # implemented in bioconvert/core/base.py # using temporary files class_converter = make_chain([(pair, self.mapper[pair]) for pair in conv_path]) else: msg = "Requested input format ('{}') to output format ('{}') is not available in bioconvert".format( self.in_fmt, self.out_fmt, ) _log.critical(msg) _log.critical( "Use --formats to know the available formats and --help for examples" ) raise Exception(msg) # If --threads provided, we update the threads attribute #FIXME: hack for the compression/decompression decorators if Lin == 1: infile = infile[0] if Lout == 1: outfile = outfile[0] self.converter = class_converter(infile, outfile) if threads is not None: self.converter.threads = threads if extra: self.converter._extra_arguments = extra _log.info("Using {} class (with {} threads if needed)".format( self.converter.name, self.converter.threads))
def analysis(args): in_fmt, out_fmt = ConvMeta.split_converter_to_format(args.converter) # do we want to know the available methods ? If so, print info and quit if getattr(args, "show_methods", False): class_converter = Registry()[(in_fmt, out_fmt)] print("Methods available: {}".format( class_converter.available_methods)) print("\nPlease see http://bioconvert.readthedocs.io/en/master/" "references.html#{} for details ".format( str(class_converter).split("'")[1])) if args.raise_exception: return sys.exit(0) # Input and output filename infile = args.input_file # Check that the input file exists # Fixes https://github.com/bioconvert/bioconvert/issues/204 if os.path.exists(infile) is False: # Some convertors uses prefix instead of filename. We could have # ambiguities: if we use a prefix without extension, # we could be confused with the convertor name. This is true # for the plink families if "plink" in args.converter: pass else: _log.error( "Input file {} does not exist (analysis)".format(infile)) sys.exit(1) if args.output_file is None and infile: outext = ConvMeta.split_converter_to_format(args.converter) outfile = infile.rsplit(".", 1)[0] + "." + outext[1].lower() else: outfile = args.output_file # Call a generic wrapper of all available conversion conv = Bioconvert( infile, outfile, in_fmt=in_fmt, out_fmt=out_fmt, force=args.force, ) # # Users may provide information about the input file. # # Indeed, the input may be a FastQ file but with an extension # # that is not standard. For instance fq instead of fastq # # If so, we can use the --input-format fastq to overwrite the # # provided filename extension # no need to do this # if args.input_format: # inext = args.input_format # if not conv.inext.startswith("."): # conv.inext = "." + inext if not conv.in_fmt: raise RuntimeError("convert infer the format from the extension name." " So add extension to the input file name or use" " --input-format option.") if not conv.out_fmt: raise RuntimeError("convert infer the format from the extension name." " So add extension to the output file name or use" " --output-format option.") bioconvert.logger.info("Converting from {} to {}".format( conv.in_fmt, conv.out_fmt)) # params = {"threads": args.threads} if args.benchmark: conv.boxplot_benchmark(N=args.benchmark_N) import pylab try: outpng = "benchmark_{}.png".format(conv.name) pylab.savefig(outpng, dpi=200) except: outpng = "benchmark_{}.png".format(conv.converter.name) pylab.savefig(outpng, dpi=200) bioconvert.logger.info("File {} created") else: # params["method"] = args.method conv(**vars(args))
def main(args=None): # used later on registry = Registry() if args is None: args = sys.argv[1:] # convenient variable to check implicit/explicit mode and # get information about the arguments. ph = ParserHelper(args) if not len(sys.argv) == 1: if ph.mode == "implicit": # Check that the input file exists # Fixes https://github.com/bioconvert/bioconvert/issues/204 if os.path.exists(args[0]) is False: _log.error("First input file {} does not exist".format( args[0])) sys.exit(1) # list of filenames from which we get the extensions filenames = ph.get_filelist() exts = [ utils.get_extension(x, remove_compression=True) for x in filenames ] # We need to get the corresponding converter if any. # We assume that the input formats are ordered alphabetically # (bioconvert API). # For instance fasta,qual to fastq can be # found but qual,fasta to fastq cannot. Indeed, in more complex # cases such as a,b -> c,d we cannot know whether there are 1 or 3 # inputs. This would require extra code here below try: L = len(exts) converter = [] # if input is a,b,c,d we want to try a->(b,c,d) and # (a,b)->(c,d) and (a,b,c)-> c so L-1 case for i in range(1, L): in_ext = tuple(exts[0:i]) out_ext = tuple(exts[i:]) try: converter.extend(registry.get_ext((in_ext, out_ext))) except KeyError: pass except KeyError: converter = [] # For 1-to-1, if the extensions are identical but different # compression, this means we just want to decompress and # re-compress in another format. if not converter and (exts[0] == exts[1]): exts_with_comp = [ utils.get_extension(x, remove_compression=False) for x in filenames ] in_ext, out_ext = exts_with_comp[0], exts_with_comp[1] comps = ['gz', 'dsrc', 'bz2'] if in_ext in comps and out_ext in comps: converter.extend( registry.get_ext(((in_ext, ), (out_ext, )))) # if no converter is found, print information if not converter: msg = '\nBioconvert does not support conversion {} -> {}. \n\n' msg = msg.format(in_ext, out_ext) # maybe it is an indirect conversion ? let us look at the # digraph try: _path = registry._path_dict_ext[in_ext][out_ext] #Here, we have a transitive list of tuples to go from A to C # example from fq to clustal returns: # [('fq',), ('fa',), ('clustal',)] # If we naively build the converter from those names # (fq2clustal), this is a non official converter name. The # official one is fastq2clustal, so we need some hack here: in_name, int_name, out_name = _path a = registry._ext_registry[ in_name, int_name][0].__name__.split("2")[0] b = registry._ext_registry[ int_name, out_name][0].__name__.split("2")[1] convname = "2".join([a, b]).lower() msg += "\n".join( textwrap.wrap( "Note, however, that an indirect conversion through" " an intermediate format is possible for your input and " " output format. To do so, you need to use the -a option " " and be explicit about the type of conversion. To get " " the list of possible direct and indirect conversion, " " please use:\n\n")) msg += "\n\n bioconvert --help -a\n\n" msg += "For help and with your input/output most probably" msg += "the command should be: \n\n bioconvert {} {} -a\n\n ".format( convname, " ".join(ph.get_filelist())) except KeyError: pass # not converter found in the path error(msg) # if the ext_pair matches a single converter elif len(converter) == 1: args.insert(0, converter[0].__name__.lower()) # if the ext_pair matches multiple converters else: _log.error("Ambiguous extension.\n" "You must specify the right conversion Please " "choose a conversion from: \n\n" "{}".format("\n".join( [c.__name__.lower() for c in converter]))) sys.exit(1) # Set the default level bioconvert.logger.level = "ERROR" # Changing the log level before argparse is run try: bioconvert.logger.level = args[args.index("-l") + 1] except: pass try: bioconvert.logger.level = args[args.index("--level") + 1] except: pass try: bioconvert.logger.level = args[args.index("-v") + 1] except: pass try: bioconvert.logger.level = args[args.index("--verbosity") + 1] except: pass # if there is the ability to convert from A to B to C, we must set # the option -a (--allow_indirect_conversion) allow_indirect_conversion = False try: args.index("--allow-indirect-conversion") allow_indirect_conversion = True except: pass try: args.index("-a") allow_indirect_conversion = True except: pass # Now, the instanciation of the main bioconvert user interface arg_parser = argparse.ArgumentParser( prog="bioconvert", description="", #""Convertor infer the #formats from the first command. We do #not scan the input file. Therefore #users must ensure that their input #format files are properly #formatted.""", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Bioconvert contains tens of converters whose list is available as follows: bioconvert --help Each conversion has its own sub-command and dedicated help. For instance: bioconvert fastq2fasta --help Because the subcommand contains the format, extensions are not important for the conversion itself. This would convert the test.txt file (fastq format) into a fasta file: bioconvert fastq2fasta test.txt test.fasta If you use known extensions, the converter may be omitted:: bioconvert test.fastq test.fasta Users must ensure that their input format files are properly formatted. If there is a conversion from A to B and another for B to C, you can also perform indirect conversion using -a argument (experimental). This command shows all possible indirect conversions: bioconvert --help -a Please visit http://bioconvert.readthedocs.org for more information about the project or formats available. Would you wish to help, please join our open source collaborative project at https://github/bioconvert/bioconvert """) subparsers = arg_parser.add_subparsers( help='sub-command help', dest='converter', ) max_converter_width = 2 + max( [len(in_fmt) for in_fmt, _, _, _ in registry.iter_converters()]) def sorting_tuple_string(item): if type(item) is tuple: return item[0][0] if type(item) is str: return item[0] # show all possible conversion including indirect conversion for in_fmt, out_fmt, converter, path in \ sorted(registry.iter_converters(allow_indirect_conversion), key=sorting_tuple_string): in_fmt = ConvBase.lower_tuple(in_fmt) in_fmt = ["_".join(in_fmt)] out_fmt = ConvBase.lower_tuple(out_fmt) out_fmt = ["_".join(out_fmt)] sub_parser_name = "{}2{}".format("_".join(in_fmt), "_".join(out_fmt)) if converter: link_char = '-' if len(converter.available_methods) < 1: help_details = " (no available methods please see the doc" \ " for install the necessary libraries) " else: help_details = " (%i methods)" % len( converter.available_methods) else: #if path: link_char = '~' if len(path) == 3: help_details = " (w/ 1 intermediate)" else: help_details = " (w/ %i intermediates)" % (len(path) - 2) help_text = '{}to{}> {}{}'.format( ("_".join(in_fmt) + ' ').ljust(max_converter_width, link_char), link_char, ("_".join(out_fmt)), help_details, ) sub_parser = subparsers.add_parser( sub_parser_name, help=help_text, formatter_class=argparse.ArgumentDefaultsHelpFormatter, # aliases=["{}_to_{}".format(in_fmt.lower(), out_fmt.lower()), ], epilog="""Bioconvert is an open source collaborative project. Please feel free to join us at https://github/biokit/bioconvert """, ) if converter: converter.add_argument_to_parser(sub_parser=sub_parser) elif path: for a in ConvBase.get_IO_arguments(): a.add_to_sub_parser(sub_parser) for a in ConvBase.get_common_arguments(): a.add_to_sub_parser(sub_parser) # arguments when no explicit conversion provided. arg_parser.add_argument( "-v", "--verbosity", default=bioconvert.logger.level, help="Set the outpout verbosity.", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], ) arg_parser.add_argument( "-l", "--level", default=bioconvert.logger.level, help="Set the outpout verbosity. Same as --verbosity", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], ) arg_parser.add_argument( "--dependency-report", action="store_true", default=False, help="Output all bioconvert dependencies in json and exit") arg_parser.add_argument("-a", "--allow-indirect-conversion", action="store_true", help="Show all possible indirect conversions " "(labelled as intermediate)") arg_parser.add_argument("--version", action="store_true", default=False, help="Show version") arg_parser.add_argument( "--conversion-graph", nargs="?", default=None, choices=[ "cytoscape", "cytoscape-all", ], ) try: args = arg_parser.parse_args(args) except SystemExit as e: # parsing ask to stop, maybe a normal exit if e.code == 0: raise e # Parsing failed, trying to guess converter from bioconvert.core.levenshtein import wf_levenshtein as lev sub_command = None args_i = 0 while sub_command is None and args_i < len(args): if args[args_i][0] != '-' and ( args_i == 0 or args[args_i - 1] != '-v' and args[args_i - 1] != '--verbose' and args[args_i - 1] != '--conversion-graph'): sub_command = args[args_i] args_i += 1 if sub_command is None: # No sub_command found, so letting the initial exception be risen raise e conversions = [] for in_fmt, out_fmt, converter, path in registry.iter_converters( allow_indirect_conversion): in_fmt = ConvBase.lower_tuple(in_fmt) in_fmt = ["_".join(in_fmt)] out_fmt = ConvBase.lower_tuple(out_fmt) out_fmt = ["_".join(out_fmt)] conversion_name = "{}2{}".format("_".join(in_fmt), "_".join(out_fmt)) conversions.append((lev(conversion_name, sub_command), conversion_name)) matches = sorted(conversions)[:5] if matches[0][0] == 0: # sub_command was ok, problem comes from elswhere raise e arg_parser.exit( e.code, '\n\nYour converter {}() was not found. \n' 'Here is a list of possible matches: {} ... ' '\nYou may also add the -a argument to enfore a ' 'transitive conversion. The whole list is available using\n\n' ' bioconvert --help -a \n'.format( sub_command, ', '.join([v for _, v in matches]))) if args.version: print("{}".format(bioconvert.version)) sys.exit(0) if args.dependency_report: print( json.dumps( get_known_dependencies_with_availability(as_dict=True), sort_keys=True, indent=4, )) sys.exit(0) if args.conversion_graph: if args.conversion_graph.startswith("cytoscape"): all_converter = args.conversion_graph == "cytoscape-all" print( json.dumps( graph.create_graph_for_cytoscape( all_converter=all_converter), indent=4, )) sys.exit(0) if args.converter is None: msg = "No converter specified. " msg += "You can list all converters by using:\n\n\tbioconvert --help" arg_parser.error(msg) if not (getattr(args, "show_methods", False) or args.input_file): arg_parser.error('Either specify an input_file (<INPUT_FILE>) or ' 'ask for available methods (--show-method)') if not args.allow_indirect_conversion and \ ConvMeta.split_converter_to_format(args.converter) not in registry: arg_parser.error('The conversion {} is not available directly, ' 'you have to accept that we chain converter to do' ' so (--allow-indirect-conversion or -a)'.format( args.converter)) args.raise_exception = args.raise_exception or args.verbosity == "DEBUG" # Set the logging level bioconvert.logger.level = args.verbosity # Figure out whether we have several input files or not # Are we in batch mode ? if args.batch: filenames = glob.glob(args.input_file) else: filenames = [args.input_file] N = len(filenames) for i, filename in enumerate(filenames): if N > 1: _log.info("Converting {} ({}/{})".format(filename, i + 1, N)) args.input_file = filename try: analysis(args) except Exception as e: if args.raise_exception: raise e else: bioconvert.logger.error(e) sys.exit(1)
def analysis(args): in_fmt, out_fmt = ConvMeta.split_converter_to_format(args.converter) # do we want to know the available methods ? If so, print info and quit if getattr(args, "show_methods", False): class_converter = Registry()[(in_fmt, out_fmt)] print("Methods available: {}".format( class_converter.available_methods)) print("\nPlease see http://bioconvert.readthedocs.io/en/master/" "references.html#{} for details ".format( str(class_converter).split("'")[1])) if args.raise_exception: return sys.exit(0) # Input and output filename infile = args.input_file # Check that the input file exists # Fixes https://github.com/bioconvert/bioconvert/issues/204 if type(infile) is tuple: for file in infile: if os.path.exists(file) is False: # Some convertors uses prefix instead of filename. We could have # ambiguities: if we use a prefix without extension, # we could be confused with the convertor name. This is true # for the plink families if "plink" in args.converter: pass else: _log.error( "Input file {} does not exist (analysis)".format(file)) sys.exit(1) if args.output_file is None and infile: outext = ConvMeta.split_converter_to_format(args.converter) if infile.split(".")[-1] in ["gz", "dsrc", "bz2"]: outfile = infile.split(".", 1)[0].split(".", 1)[0] outfile += "." + outext[1][0].lower() else: outfile = infile.rsplit(".", 1)[0] + "." + outext[1][0].lower() print(outext, outfile) else: outfile = args.output_file # check whether a valid --thread option was provided if "threads" in args: threads = args.threads else: threads = None # default will be "" if "extra_arguments" in args: extra_arguments = args.extra_arguments # Call a generic wrapper of all available conversion conv = Bioconvert( infile, outfile, #in_fmt=in_fmt, #out_fmt=out_fmt, force=args.force, threads=threads, extra=extra_arguments) if args.benchmark: conv.boxplot_benchmark(N=args.benchmark_N, to_include=args.benchmark_methods) print(args.benchmark_methods) import pylab try: outpng = "benchmark_{}.png".format(conv.name) pylab.savefig(outpng, dpi=200) except: outpng = "benchmark_{}.png".format(conv.converter.name) pylab.savefig(outpng, dpi=200) bioconvert.logger.info("File {} created") else: # params["method"] = args.method conv(**vars(args))
def analysis(args): in_fmt, out_fmt = ConvMeta.split_converter_to_format(args.converter) # Input and output filename infile = args.input_file # Check that the input file exists # Fixes https://github.com/bioconvert/bioconvert/issues/204 if type(infile) is tuple: for file in infile: if os.path.exists(file) is False: # Some convertors uses prefix instead of filename. We could have # ambiguities: if we use a prefix without extension, # we could be confused with the convertor name. This is true # for the plink families if "plink" in args.converter: pass else: _log.error("Input file {} does not exist (analysis)".format(file)) sys.exit(1) if args.output_file is None and infile: outext = ConvMeta.split_converter_to_format(args.converter) if infile.split(".")[-1] in ["gz", "dsrc", "bz2"]: # get rid of extension gz/dsrc/bz2 outfile = infile.rsplit(".", 1)[0] # get rid of extension itself outfile = outfile .rsplit(".",1)[0] outfile += "." + outext[1][0].lower() else: outfile = infile.rsplit(".", 1)[0] + "." + outext[1][0].lower() else: outfile = args.output_file # check whether a valid --thread option was provided if "threads" in args: threads = args.threads else: threads = None # default will be "" if "extra_arguments" in args: extra_arguments = args.extra_arguments # Call a generic wrapper of all available conversion conv = Bioconvert( infile, outfile, #in_fmt=in_fmt, #out_fmt=out_fmt, force=args.force, threads=threads, extra=extra_arguments ) if args.benchmark: conv.boxplot_benchmark(N=args.benchmark_N, to_include=args.benchmark_methods) print(args.benchmark_methods) import pylab try: outpng = "benchmark_{}.png".format(conv.name) pylab.savefig(outpng, dpi=200) except: outpng = "benchmark_{}.png".format(conv.converter.name) pylab.savefig(outpng, dpi=200) bioconvert.logger.info("File {} created") else: # params["method"] = args.method conv(**vars(args))