def main(): """Aggregates the outputs of all the tools for a specific dataset. For command line help, run with the '-h' flag. """ args = get_args() dataset, chrom, _ = common.process_args(args) if not dataset: return # Collects the scores to the scored_targets dictionary, where the first ket # is the target spacer, the second is the tool and the third is the score's # name. targets = dataset.get_targets(args.chr) scored_targets = {} for target in targets: scored_targets[get_spacer(target)] = defaultdict(defaultdict) for tool in TOOLS.values(): add_scores(args, tool, scored_targets) # Converts the mapping to a list of records, each one containing all of the # scores of its corresponding target. records = aggregate(targets, scored_targets) # Writes the output. aggregate_path = join_path(dataset.get_out_path(), consts.AGG_OUT_NAME % args.chr) with open(aggregate_path, 'w') as out: out.write(common.to_csv_line(*get_headers())) for record in records: out.write(common.to_csv_line(*record))
def main(): """Runs SSC. For command line help, run with the '-h' flag. """ args = get_args() dataset, chrom, tool = common.process_args(args, tool_name=TOOL) if not dataset or not tool: return tool_path = tool.get_dir_path() out_path = tool.get_out_dir(dataset.name) spacer = os.path.join(tool_path, SPACER_PATH) spacer_out = os.path.join(out_path, "%s_spacer.txt" % chrom.name) tool.run_bash(["-i", chrom.get_path(), "-o", spacer_out], spacer) ssc = tool.get_bin_path() matrix = os.path.join(tool_path, MATRIX_PATH) ssc_out = os.path.join(out_path, RAW_OUT_NAME % chrom.name) stdout, stderr = tool.run_bash( ["-i", spacer_out, "-o", ssc_out, "-m", matrix, "-l", "30"], ssc) if stderr: print stderr if stdout: print stdout tools_common.create_csv(chrom, tool, RAW_OUT_NAME) tools_common.normalise(chrom, tool)
def main(): """Runs sgRNA Scorer 2.0. For command line help, run with the '-h' flag. """ args = get_args() dataset, chrom, tool = common.process_args(args, tool_name=TOOL) if not dataset or not tool: return tool.chdir() script_path = os.path.join(tool.get_dir_path(), tool.bin_path) params = [ "-d", args.dataset, "-c", str(args.chr), "-o", RAW_OUT_NAME % chrom.name, "-p", "3", "-s", "20", "-l", "NGG", ] process = subprocess.Popen( ["python", script_path] + params, stdout=subprocess.PIPE, stderr=subprocess.PIPE) outs, errs = process.communicate() print(outs) print(errs) tools_common.create_csv(chrom, tool, RAW_OUT_NAME) tools_common.normalise(chrom, tool)
def main(argv=None): if argv is None: argv = sys.argv options = process_args(argv[1:]) try: wv = wvlib.load(options.vectors, max_rank=options.max_rank) wv = wv.normalize() except Exception, e: print >> sys.stderr, 'Error: %s' % str(e) return 1
def main(argv=None): if argv is None: argv = sys.argv # TODO: remove irrelevant options options = process_args(argv[1:]) try: wv = wvlib.load(options.vectors, max_rank=options.max_rank) except Exception, e: print >> sys.stderr, 'Error: %s' % str(e) return 1
def main(argv=None): if argv is None: argv = sys.argv options = process_args(argv[1:]) try: wv = wvlib.load(options.vectors, max_rank=options.max_rank) wv = wv.normalize() except Exception as e: print('Error: %s' % str(e), file=sys.stderr) return 1 return query_loop(wv, options, process_query, query_count=3)
def main(): """Returns the length of a chromosome sequence. For command line help, run with the '-h' flag. Prints: The length of the sequence. """ args = get_args() dataset, chrom, _ = common.process_args(args) if not dataset: return length = 0 with open(chrom.get_path(), 'r') as fd: for line in fd: if line.startswith('>'): continue length += len(line.strip()) print length
def main(): """Runs phytoCRISP-Ex. For command line help, run with the '-h' flag. """ args = get_args() dataset, chrom, tool = common.process_args(args, tool_name=TOOL) if not dataset or not tool: return params = ["-g", args.dataset, "-c", chrom.name, "NGG", "G"] tool.run_bash(params) shutil.move( os.path.join(dataset.get_data_path(), SRC_OUT_NAME % chrom.name), os.path.join(tool.get_out_dir(dataset.name), DST_OUT_NAME % chrom.name)) tools_common.normalise(chrom, tool)
def main(): """Gets precision stats for a tool against experimental data. For command line help, run with the '-h' flag. Prints: Precision statistics for the tool. """ args = get_args() dataset, chrom, tool = common.process_args(args, tool_name=args.tool) if not dataset or not tool: return # If a specific scoring signal is wanted rather than just the first one, # adjust the column. score_start_idx = consts.SCORE_COLUMN score_shift_idx = 0 if args.score: score_shift_idx = tool.get_score_idx(args.score) if score_shift_idx == -1: print "unknown score %s for tool %s" % (args.score, args.tool) return score_idx = score_start_idx + score_shift_idx # Reads the normalised output of the tool. found = {} output_path = tool.get_normalised_out_path(chrom.num, dataset.name) with open(output_path, 'r') as fd: for line in fd: values = line.split(',') target = values[consts.TARGET_COLUMN] score = values[score_idx] if len(values) > score_idx else 0 if args.no_pam: target = target[:-consts.PAM_LEN] found[target] = float(score) print_stat("Number of targets found", len(found)) efficient = set(dataset.get_efficient_targets(args.chr, args.no_pam)) inefficient = set(dataset.get_inefficient_targets(args.chr, args.no_pam)) if args.relative: precision = relative_precision(found, efficient, inefficient) else: threshold = -EPSILON if args.non_negative else args.threshold precision = abs_precision(found, efficient, inefficient, threshold) print_stat("\nPrecision", "%.02f%%" % precision)
def main(): """Runs mm10db. For command line help, run with the '-h' flag. """ args = get_args() dataset, chrom, tool = common.process_args(args, tool_name=TOOL) if not dataset or not tool: return tool.chdir() script_params = ["-g", args.dataset, "-c", chrom.name] out_path = tool.get_out_dir(dataset.name) # Runs the mm10db pipeline. for script in SCRIPTS: tool.run_python(script_params, script) tools_common.create_csv(chrom, tool, RAW_OUT_NAME) add_rejected(chrom, out_path) tools_common.normalise(chrom, tool)
def main(): """Runs CHOPCHOP. For command line help, run with the '-h' flag. """ args = get_args() dataset, chrom, tool = common.process_args(args, tool_name=TOOL) if not dataset or not tool: return tool.chdir() chr_name = chrom.get_name() out_path = tool.get_out_dir(dataset.name) config_path = os.path.join(tool.get_dir_path(), CONFIG_FILE) # Configures and runs CHOPCHOP, and then reverts the configuration file. config(dataset.get_data_path(), config_path) params = [ "-G", dataset.name, "-o", os.path.join(out_path, chr_name), "-F", "--targets", chrom.get_path(), "--chr", chr_name, "--scoringMethod", "ALL", ] output, _ = tool.run_python(params) unconfig(config_path) # Writes the output. with open(os.path.join(out_path, RAW_OUT_NAME % chr_name), 'w') as txt: txt.write(output) tools_common.create_csv(chrom, tool, RAW_OUT_NAME) tools_common.normalise(chrom, tool)
def main(raw_args=None): """Normalises the output of the various prediction tools. For command line help, run with the '-h' flag. """ args = get_args(raw_args) dataset, chrom, tool = common.process_args(args, tool_name=args.tool) if not dataset or not tool: return input_path = args.input if args.input else\ tool.get_csv_out_path(args.chr, args.dataset) output_path = args.output if args.output else\ tool.get_normalised_out_path(args.chr, args.dataset) # Reads the unnormalised output. first_entry = 1 if tool.has_headers else 0 with open(input_path, 'r') as in_fd: entries = [line.strip() for line in in_fd.readlines()[first_entry:]] # Writes the normalised output. with open(output_path, 'w') as out_fd: normalised = tool.normalise(entries) for record in normalised: out_fd.write(record)
def main(): """Produces a features file for models to be trained on. For command line help, run with the '-h' flag. Writes: An output CSV file with the targets, labels and feature representations. """ args = get_args() dataset, chrom, _ = common.process_args(args) if not dataset: return out_file = join_path( args.path, consts.DATA_DIR, OUT_FILE % (dataset.name, chrom.num)) fd_out = open(out_file, 'w') handlers = get_column_handlers() out_dir = dataset.get_out_path() in_file = join_path(out_dir, consts.AGG_OUT_NAME % chrom.num) def label_getter(target): return dataset.get_value(args.chr, target, dataset.genome_label_idx) parse_features(in_file, handlers, fd_out, label_getter) fd_out.close()
def main(): """Runs FlashFry. For command line help, run with the '-h' flag. """ args = get_args() dataset, chrom, tool = common.process_args(args, tool_name=TOOL) if not dataset or not tool: return chr_path = chrom.get_path() out_path = tool.get_out_dir(dataset.name) bin_path = tool.get_bin_path() chr_name = chrom.get_name() index_path = os.path.join(out_path, INDEX_NAME % chr_name) discover_path = os.path.join(out_path, DISCOVER_NAME % chr_name) targets_path = os.path.join(out_path, RAW_OUT_NAME % chr_name) cmd_index = [ "java", "-Xmx4g", "-jar", bin_path, "index", "--tmpLocation", out_path, "--database", index_path, "--reference", chr_path, "--enzyme", "spcas9ngg", ] cmd_discover = [ "java", "-Xmx4g", "-jar", bin_path, "discover", "--database", index_path, "--fasta", chr_path, "--output", discover_path, ] cmd_score = [ "java", "-Xmx4g", "-jar", bin_path, "score", "--input", discover_path, "--output", targets_path, "--scoringMetrics", "doench2014ontarget,moreno2015", "--database", index_path, ] tool.chdir() _ = subprocess.Popen(cmd_index).wait() _ = subprocess.Popen(cmd_discover).wait() _ = subprocess.Popen(cmd_score).wait() tools_common.create_csv(chrom, tool, RAW_OUT_NAME) tools_common.normalise(chrom, tool)