def main(argv=None): if argv is None: argv = sys.argv parser = argparse.ArgumentParser( prog="mtsv-plugin", description="Plugins and extensions to MTSv", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.set_defaults( timestamp=datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) subparsers = parser.add_subparsers(title="commands", metavar="COMMANDS", help="Plugin Commands") for command, cmd_class in COMMANDS.items(): make_sub_parser(subparsers, command, cmd_class) # Return help if no command is passed if len(argv) == 1: parser.print_help(sys.stdout) sys.exit(0) try: setup_and_run(argv, parser) except KeyboardInterrupt: error("\n-- Stopped by user --", exception=False)
def modify_params(self): # trace back params from input files self.params['analyze_outpath'] = os.path.dirname( self.params['analysis_file']) try: summary_params = json.loads( open( os.path.join(os.path.dirname(self.params['summary_file']), ".params"), 'r').read())['summary_file'][self.params['summary_file']] merge_file = summary_params['merge_file'] bin_params = json.loads( open(os.path.join(os.path.dirname(merge_file), ".params"), 'r').read())['merge_file'][merge_file] kmer = json.loads( open( os.path.join(os.path.dirname(bin_params['fasta']), ".params"), 'r').read())['readprep'][bin_params['fasta']]['kmer'] except (IOError, ValueError, KeyError) as e: error("Problem with input file metadata. " "Avoid moving input files from their original directory " "because the directory contains required metadata" + e) self.params['kmer'] = kmer for key in [ 'seed-size', 'min-seeds', 'seed-gap', 'edits', 'fm_index_paths', 'binning_mode' ]: self.params[key.replace("-", "_")] = bin_params[key] self.params['database_config'] = file_type( bin_params['database_config']) self.params['header'] = [] self.params['candidate_taxa'] = outfile_type( os.path.join(self.params['analyze_outpath'], "candidate_taxa.txt")) self.params['candidate_taxa_req'] = outfile_type( os.path.join(self.params['analyze_outpath'], "candidate_taxa_required.txt")) self.params['summary_file_in'] = self.params['summary_file'] self.params['tax_level'] = summary_params['tax_level'] # self.params['lca'] = summary_params['lca'] # move all normal output into analysis subdirectory self.params['binning_outpath'] = self.modify_helper("Binning") self.params['fasta'] = self.modify_helper("analysis_queries.fasta") self.params['merge_file'] = self.modify_helper("Binning/merged.clp") self.params['signature_file'] = self.modify_helper("signature.txt") self.params['summary_file'] = self.modify_helper("summary.csv") try: self.params['fasta_db'] = file_type( get_database_params(self.params['database_config'], "fasta-path")) self.params['serial_path'] = file_type( get_database_params(self.params['database_config'], "serialization-path")) except argparse.ArgumentTypeError: error("""Database paths used to produce summary file have been moved or deleted""")
def main(argv=None): if argv is None: argv = sys.argv parser = argparse.ArgumentParser( prog="mtsv-setup", description="Download and build sequence databases and indices", formatter_class=argparse.ArgumentDefaultsHelpFormatter) subparsers = parser.add_subparsers(title="commands", metavar="COMMAND", help="Setup Commands") for command, cmd_class in COMMANDS.items(): make_sub_parser(subparsers, command, cmd_class) p = subparsers.add_parser("json_update") p.add_argument("--path") p = subparsers.add_parser("oneclick") p = subparsers.add_parser("json_combine") p.add_argument("--path", type=str) p.add_argument("--custom_db", nargs='+', type=str) p.add_argument("--partitions", type=str) p.add_argument("--output", type=str) p = subparsers.add_parser("ff_list") p.add_argument("--path") for command, cmd_class in COMMANDS.items(): for arg, desc in get_global_config(cmd_class.config_section).items(): if "_meta" in arg: continue if 'type' in desc: desc['type'] = TYPES[desc['type']] if 'default' in desc and 'help' in desc: desc['help'] += " (default: {})".format(desc['default']) if 'action' in desc and desc['action'] in ACTIONS: desc['action'] = getattr(sys.modules[__name__], desc['action']) arg = "--{}".format(arg) if 'positional' in desc: del desc['positional'] try: p.add_argument(arg, **desc) except argparse.ArgumentError: continue try: add_default_arguments(p) except argparse.ArgumentError: pass if len(argv) == 1: parser.print_help(sys.stdout) sys.exit(1) try: setup_and_run(parser) except KeyboardInterrupt: error("\n-- Stopped by user --", exception=False)
def remove_datastore(datastore, ds_path_id): ds = DataStore(datastore) ds_path = "hash_{0}/kmer_size_{1}/edits_{2}/" \ "seed_size_{3}/seed_gap_{4}/min_seeds_{5}".format( *ds_path_id.split("-")) try: ds.remove("{}/expected".format(ds_path)) ds.remove("{}/bloom".format(ds_path)) except KeyError: error("Dataset not found in datastore") finally: ds.close()
def project_dir_type(input_path): '''Creates a project directory if one does not exist and Throws PermissionError if there are no permissions to create directory. Returns absolute path to directory.''' input_path = os.path.abspath(input_path) if not os.path.isdir(input_path): try: os.mkdir(input_path) except PermissionError: error("No permission to make directory: {}".format(input_path)) if os.getcwd() != input_path: os.chdir(input_path) return input_path
def outfile_type(input_file): '''Checks that path to file exists, if it doesn't, the path is created, returns abs path to file. PermissionError exception when there is no permission to create directory''' input_file = os.path.abspath(input_file) path = os.path.dirname(input_file) if not os.path.isdir(path): try: os.mkdir(path) except PermissionError: error("No permission to create file: {}".format(input_file)) return input_file
def outpath_type(input_path): '''Outpath_type creates a directory if one does not exist. Throws PermissionError if there are no permissions to create directory. If path already exists and it is not empty, a warning is issued. Returns absolute path to directory.''' input_path = os.path.abspath(input_path) try: os.mkdir(input_path) logger.info("Creating directory: {}".format(input_path)) except PermissionError: error("No permission to make directory: {}".format(input_path)) except OSError: logger.info("Directory already exists: {}. ".format(input_path)) if os.listdir(input_path): warn("Files in {} may be overwritten!".format(input_path)) return input_path
def run(self): for rule in self.rules: cmd = ["snakemake", "--snakefile", rule, "--config"] config = [ "{0}={1}".format(k,v) for k, v in self.params.items() if v is not None] cmd += config cmd += self.snake_params try: p = sp.run(cmd, check=True) self._params.write_parameters() except ( KeyboardInterrupt, sp.CalledProcessError) as e: warn("Unlocking directory after failed snakemake") sp.run(cmd + ["--unlock"], check=True ) error(e)
def get_unaligned_queries(collapse_file, query_fasta, outpath, sample_names=None): LOGGER.info("Reading query hits from: {}".format(collapse_file)) # read_hits(collapse_file, threads) n_samples = peek_at_samples_in_hits(collapse_file) # n_samples = len(parse_query_id(hits[0])) LOGGER.info("{} sample(s) in file.".format(n_samples)) if n_samples != peek_at_samples_in_fasta(query_fasta): msg = """ The number of samples in query fasta does not match the number of sequences in merge file """ LOGGER.error(msg) error(msg) total_dict = init_total_dict() # total_dict = add_hit_counts(total_dict, hits, threads) total_queries = get_total_queries(query_fasta) total_dict['total_unique_queries'] = total_queries unaligned_query_ids = get_unaligned_query_ids(collapse_file, total_queries, total_dict) unaligned_queries_file = os.path.join(outpath, "unaligned_queries.fasta") LOGGER.info("Finding unaligned queries, writing to {}".format( unaligned_queries_file)) write_unaligned_query_seqs(query_fasta, unaligned_query_ids, unaligned_queries_file) LOGGER.info("Finished finding unaligned queries") LOGGER.info("Sorting unaligned queries by sample") outfiles = get_outfiles(sample_names, n_samples, outpath) LOGGER.info("Sorting unaligned queries by sample, writing to: {}".format( ", ".join([f.name for f in outfiles]))) write_unaligned_queries_by_sample(unaligned_queries_file, outfiles, total_dict) LOGGER.info("Finished sorting unaligned queries by sample") calculate_final_totals(total_dict) log_results(total_dict) LOGGER.info("Writing summary json") write_summary_json(outpath, total_dict) LOGGER.info("FINISHED finding unaligned queries")
def parse_config_sections(config_file, sections): config = configparser.ConfigParser() config.read(config_file) config_for_sections = {} try: for section in sections: with suppress(configparser.NoSectionError): for cmd, val in config.items(section): # Only add if there is a value # avoids adding optional params if val: config_for_sections[cmd] = val except configparser.ParsingError: error( "Cannot parse config file: {}".format( config_file.name)) return config_for_sections
def report(configfile, report, snakemake_args): """ Generate report.\n Additional Snakemake parameters should be passed at the command line. Runs:\n 'snakemake --snakefile SNAKEPATH --configfile CONFIGFILE --report results/report.html' """ cmd = get_cmd(configfile.name, ["--report", report] + list(snakemake_args)) try: run_command(cmd, "Report", check_result=True) info("") info("Report written to {}".format(report)) except sp.CalledProcessError: error("Report Failed")
def main(argv=None): if argv is None: argv = sys.argv parser = argparse.ArgumentParser( prog="mtsv", description="Metagenomic analysis pipeline", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.set_defaults( timestamp=datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) subparsers = parser.add_subparsers(title="commands", metavar="COMMAND", help="Pipeline Commands") parser_init = subparsers.add_parser( 'init', help="Initializes a directory with a pre-filled parameters file") parser_init.add_argument( "-c", "--config", type=TYPES['write_handle_type'], default=DEFAULT_CFG_FNAME, help="Specify path to write config file, " "not required if using default config (Default: ./mtsv.cfg)") parser_init.add_argument('-wd', "--working_dir", type=str, default=os.getcwd(), help="Specify working directory to place output. " "(default: {})".format(os.getcwd())) parser_init.set_defaults(cmd_class=Init) for command, cmd_class in COMMANDS.items(): make_sub_parser(subparsers, command, cmd_class) # Return help if no command is passed if len(argv) == 1: parser.print_help(sys.stdout) sys.exit(0) try: setup_and_run(argv, parser) except KeyboardInterrupt: error("\n-- Stopped by user --", exception=False)
def run_subprocess(cmd): try: result = sp.run(cmd, stdout=sp.PIPE, check=True) except sp.CalledProcessError as e: error("Unexpected error running command: {0}".format("\n".join(e.cmd))) return result