def datastore(datastore, listing, remove): """ Manage datastore of expected value calculations. """ if listing: table = get_datastore_info_table(datastore) click.secho(table.to_string(), fg="blue") # except: # error("Cannot find datastore: {0}".format(datastore)) elif remove is not None: warn(""" The MTSv datastores keep previously collected expected values for faster statistical analysis. Deleting datastores will require these to be recalculated. Datastores should be deleted if a new sequence database has been build and the expected values calculated from the previous database are no longer needed. """) while True: click.secho('Are you sure you want to remove? [y/n] ', nl=False, fg="red") c = click.getchar() click.echo() if c.lower() == 'y': remove_datastore(datastore, remove) click.secho('Deleting: {0}'.format(remove), fg="red") break elif c.lower() == 'n': click.secho('Abort!', fg="red") break else: click.secho('Invalid input', fg="red")
def run_command(cmd, cmd_name, ignore_changed=False, final_target=False, config=False, check_result=False): if "--unlock" in cmd: # Bypass run command if --unlock is passed info("Unlocking working directory", color="blue") run_subprocess(cmd) return info("Running MTSv {0}".format(cmd_name)) if ignore_changed: changed_targets = [] warn(""" Ignoring parameter changes. This is may cause down stream analysis to be wrong. Do not ignore parameter changes if binning parameters, kmer size, or sequence database have been modified. """) else: # try to capture snakemake utility options passthrough = set(PASSTHROUGH_ARGS).intersection(set(cmd)) if passthrough: sp.run(cmd, check=check_result) return info("Checking if parameters have changed.", color="blue") changed_targets = get_targets_with_changed_params(cmd) if changed_targets: warn("Parameters changed for targets: {0}\nRerunning".format( "\n".join(changed_targets))) else: info("No parameters have changed.", color="blue") # add filter_candidate_taxa because it is not revaluated after # checkpoint is hit. if final_target: changed_targets.append(final_target) cmd = cmd if not changed_targets else add_force_targets( cmd, changed_targets) if config: cmd = add_config(cmd, config) dryrun_flag = set(["--dryrun", "--dry-run", "-n"]).intersection(set(cmd)) if dryrun_flag: # don't capture standard out for dryrun sp.run(cmd) else: p = run_subprocess(cmd) info("Finished running MTSv {0}".format(cmd_name))
def setup_and_run(argv, parser): """Setup and run a command""" change_wkdir(argv) if '--config' in argv or '-c' in argv: args, snake_args, missing = add_cfg_to_args(argv, parser) if missing: warn("Section(s) missing in config file, " "using defaults: {}".format(", ".join(missing))) else: args, snake_args = parser.parse_known_args() args.log_file = set_log_file(args.log_file, args.cmd_class.__name__, args.timestamp) params = Parameters(args, snake_args) cmd = args.cmd_class(params) cmd.run()
def outpath_type(input_path): '''Outpath_type creates a directory if one does not exist. Throws PermissionError if there are no permissions to create directory. If path already exists and it is not empty, a warning is issued. Returns absolute path to directory.''' input_path = os.path.abspath(input_path) try: os.mkdir(input_path) logger.info("Creating directory: {}".format(input_path)) except PermissionError: error("No permission to make directory: {}".format(input_path)) except OSError: logger.info("Directory already exists: {}. ".format(input_path)) if os.listdir(input_path): warn("Files in {} may be overwritten!".format(input_path)) return input_path
def run(self): for rule in self.rules: cmd = ["snakemake", "--snakefile", rule, "--config"] config = [ "{0}={1}".format(k,v) for k, v in self.params.items() if v is not None] cmd += config cmd += self.snake_params try: p = sp.run(cmd, check=True) self._params.write_parameters() except ( KeyboardInterrupt, sp.CalledProcessError) as e: warn("Unlocking directory after failed snakemake") sp.run(cmd + ["--unlock"], check=True ) error(e)
def heatmap_figure(analysis_files, output, table_output, kwargs): df = cat_analysis_files(analysis_files) if len(df['sample'].unique()) == 1: msg = """ Only one sample, a duplicate sample is added for clustermap to work. """ logging.warn(msg) warn(msg) df = duplicate_column(df) # need to have more than one column to work if len(df['Scientific_Name'].unique()) == 1: msg = """ Only one taxa is significant, a duplicate of this taxa is added for clustermap to work. """ logging.warn(msg) warn(msg) df = duplicate_row(df) # need to have more than one row to work df = get_pivot_table(df) df.to_csv(table_output) fig = draw_figure(df, kwargs) fig.savefig(output, bbox_inches="tight")
PRECALC_CANTAXA = '{}' if snakemake.params['use_data']: LOGGER.info("Removing taxa that already have estimates") CANTAXA, PRECALC_CANTAXA = remove_prev_calculated( CANTAXA, **snakemake.params['exp_db_params']) LOGGER.info("The following taxa still need expected " "value estimates\n{}".format(",".join( [str(c) for c in CANTAXA]))) write_to_file(CANTAXA, snakemake.output[1]) with open(snakemake.output[0], 'w') as json_out: json_out.write(PRECALC_CANTAXA) if len(CANTAXA) > 300: warn("The number of candidate taxa is very large " "which may result in a very large query fasta file " "that will take a long time to process and require a " "lot of memory. You may want to rerun with a stricter " "cutoff to reduce the size of the query fasta file " "or break up the candidate taxa into chunks and run them " "individually by passing each chunk into analyze using " "the --can_taxa_list option") LOGGER.info("Finished collecting candidate taxa") except NameError: PARSER = argparse.ArgumentParser( prog="MTSv Candidate Taxa", description="Get list of candidate taxa from summary.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) PARSER.add_argument("summary", metavar="SUMMARY_FILE", type=file_type, help="Path to summary output file.")