Esempio n. 1
0
def main():
    """Aggregates the outputs of all the tools for a specific dataset.

    For command line help, run with the '-h' flag.
    """
    args = get_args()
    dataset, chrom, _ = common.process_args(args)
    if not dataset: return

    # Collects the scores to the scored_targets dictionary, where the first ket
    # is the target spacer, the second is the tool and the third is the score's
    # name.
    targets = dataset.get_targets(args.chr)
    scored_targets = {}
    for target in targets:
        scored_targets[get_spacer(target)] = defaultdict(defaultdict)
    for tool in TOOLS.values():
        add_scores(args, tool, scored_targets)

    # Converts the mapping to a list of records, each one containing all of the
    # scores of its corresponding target.
    records = aggregate(targets, scored_targets)
    # Writes the output.
    aggregate_path = join_path(dataset.get_out_path(),
                               consts.AGG_OUT_NAME % args.chr)
    with open(aggregate_path, 'w') as out:
        out.write(common.to_csv_line(*get_headers()))
        for record in records:
            out.write(common.to_csv_line(*record))
Esempio n. 2
0
def main():
    """Runs SSC.

    For command line help, run with the '-h' flag.
    """
    args = get_args()
    dataset, chrom, tool = common.process_args(args, tool_name=TOOL)
    if not dataset or not tool:
        return

    tool_path = tool.get_dir_path()
    out_path = tool.get_out_dir(dataset.name)

    spacer = os.path.join(tool_path, SPACER_PATH)
    spacer_out = os.path.join(out_path, "%s_spacer.txt" % chrom.name)
    tool.run_bash(["-i", chrom.get_path(), "-o", spacer_out], spacer)

    ssc = tool.get_bin_path()
    matrix = os.path.join(tool_path, MATRIX_PATH)
    ssc_out = os.path.join(out_path, RAW_OUT_NAME % chrom.name)
    stdout, stderr = tool.run_bash(
        ["-i", spacer_out, "-o", ssc_out, "-m", matrix, "-l", "30"], ssc)
    if stderr: print stderr
    if stdout: print stdout

    tools_common.create_csv(chrom, tool, RAW_OUT_NAME)
    tools_common.normalise(chrom, tool)
Esempio n. 3
0
def main():
    """Runs sgRNA Scorer 2.0.

    For command line help, run with the '-h' flag.
    """
    args = get_args()
    dataset, chrom, tool = common.process_args(args, tool_name=TOOL)
    if not dataset or not tool:
        return
    tool.chdir()

    script_path = os.path.join(tool.get_dir_path(), tool.bin_path)
    params = [
        "-d", args.dataset,
        "-c", str(args.chr),
        "-o", RAW_OUT_NAME % chrom.name,
        "-p", "3", "-s", "20", "-l", "NGG",
    ]
    process = subprocess.Popen(
        ["python", script_path] + params,
        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    outs, errs = process.communicate()
    print(outs)
    print(errs)

    tools_common.create_csv(chrom, tool, RAW_OUT_NAME)
    tools_common.normalise(chrom, tool)
Esempio n. 4
0
def main(argv=None):
    if argv is None:
        argv = sys.argv
    options = process_args(argv[1:])
    try:
        wv = wvlib.load(options.vectors, max_rank=options.max_rank)
        wv = wv.normalize()
    except Exception, e:
        print >> sys.stderr, 'Error: %s' % str(e)
        return 1
Esempio n. 5
0
def main(argv=None):
    if argv is None:
        argv = sys.argv
    options = process_args(argv[1:])
    try:
        wv = wvlib.load(options.vectors, max_rank=options.max_rank)
        wv = wv.normalize()
    except Exception, e:
        print >> sys.stderr, 'Error: %s' % str(e)
        return 1
Esempio n. 6
0
def main(argv=None):
    if argv is None:
        argv = sys.argv
    # TODO: remove irrelevant options
    options = process_args(argv[1:])
    try:
        wv = wvlib.load(options.vectors, max_rank=options.max_rank)
    except Exception, e:
        print >> sys.stderr, 'Error: %s' % str(e)
        return 1
Esempio n. 7
0
def main(argv=None):
    if argv is None:
        argv = sys.argv
    options = process_args(argv[1:])
    try:
        wv = wvlib.load(options.vectors, max_rank=options.max_rank)
        wv = wv.normalize()
    except Exception as e:
        print('Error: %s' % str(e), file=sys.stderr)
        return 1
    return query_loop(wv, options, process_query, query_count=3)
Esempio n. 8
0
def main():
    """Returns the length of a chromosome sequence.

    For command line help, run with the '-h' flag.

    Prints:
        The length of the sequence.
    """
    args = get_args()
    dataset, chrom, _ = common.process_args(args)
    if not dataset: return

    length = 0
    with open(chrom.get_path(), 'r') as fd:
        for line in fd:
            if line.startswith('>'): continue
            length += len(line.strip())
    print length
def main():
    """Runs phytoCRISP-Ex.

    For command line help, run with the '-h' flag.
    """
    args = get_args()
    dataset, chrom, tool = common.process_args(args, tool_name=TOOL)
    if not dataset or not tool:
        return

    params = ["-g", args.dataset, "-c", chrom.name, "NGG", "G"]
    tool.run_bash(params)

    shutil.move(
        os.path.join(dataset.get_data_path(), SRC_OUT_NAME % chrom.name),
        os.path.join(tool.get_out_dir(dataset.name),
                     DST_OUT_NAME % chrom.name))
    tools_common.normalise(chrom, tool)
Esempio n. 10
0
def main():
    """Gets precision stats for a tool against experimental data.

    For command line help, run with the '-h' flag.

    Prints:
        Precision statistics for the tool.
    """
    args = get_args()
    dataset, chrom, tool = common.process_args(args, tool_name=args.tool)
    if not dataset or not tool: return

    # If a specific scoring signal is wanted rather than just the first one,
    # adjust the column.
    score_start_idx = consts.SCORE_COLUMN
    score_shift_idx = 0
    if args.score:
        score_shift_idx = tool.get_score_idx(args.score)
    if score_shift_idx == -1:
        print "unknown score %s for tool %s" % (args.score, args.tool)
        return
    score_idx = score_start_idx + score_shift_idx

    # Reads the normalised output of the tool.
    found = {}
    output_path = tool.get_normalised_out_path(chrom.num, dataset.name)
    with open(output_path, 'r') as fd:
        for line in fd:
            values = line.split(',')
            target = values[consts.TARGET_COLUMN]
            score = values[score_idx] if len(values) > score_idx else 0
            if args.no_pam: target = target[:-consts.PAM_LEN]
            found[target] = float(score)
    print_stat("Number of targets found", len(found))

    efficient = set(dataset.get_efficient_targets(args.chr, args.no_pam))
    inefficient = set(dataset.get_inefficient_targets(args.chr, args.no_pam))

    if args.relative:
        precision = relative_precision(found, efficient, inefficient)
    else:
        threshold = -EPSILON if args.non_negative else args.threshold
        precision = abs_precision(found, efficient, inefficient, threshold)
    print_stat("\nPrecision", "%.02f%%" % precision)
Esempio n. 11
0
def main():
    """Runs mm10db.

    For command line help, run with the '-h' flag.
    """
    args = get_args()
    dataset, chrom, tool = common.process_args(args, tool_name=TOOL)
    if not dataset or not tool:
        return
    tool.chdir()

    script_params = ["-g", args.dataset, "-c", chrom.name]
    out_path = tool.get_out_dir(dataset.name)

    # Runs the mm10db pipeline.
    for script in SCRIPTS:
        tool.run_python(script_params, script)

    tools_common.create_csv(chrom, tool, RAW_OUT_NAME)
    add_rejected(chrom, out_path)
    tools_common.normalise(chrom, tool)
def main():
    """Runs CHOPCHOP.

    For command line help, run with the '-h' flag.
    """
    args = get_args()
    dataset, chrom, tool = common.process_args(args, tool_name=TOOL)
    if not dataset or not tool:
        return
    tool.chdir()

    chr_name = chrom.get_name()
    out_path = tool.get_out_dir(dataset.name)
    config_path = os.path.join(tool.get_dir_path(), CONFIG_FILE)

    # Configures and runs CHOPCHOP, and then reverts the configuration file.
    config(dataset.get_data_path(), config_path)
    params = [
        "-G",
        dataset.name,
        "-o",
        os.path.join(out_path, chr_name),
        "-F",
        "--targets",
        chrom.get_path(),
        "--chr",
        chr_name,
        "--scoringMethod",
        "ALL",
    ]
    output, _ = tool.run_python(params)
    unconfig(config_path)

    # Writes the output.
    with open(os.path.join(out_path, RAW_OUT_NAME % chr_name), 'w') as txt:
        txt.write(output)
    tools_common.create_csv(chrom, tool, RAW_OUT_NAME)
    tools_common.normalise(chrom, tool)
def main(raw_args=None):
    """Normalises the output of the various prediction tools.

    For command line help, run with the '-h' flag.
    """
    args = get_args(raw_args)
    dataset, chrom, tool = common.process_args(args, tool_name=args.tool)
    if not dataset or not tool: return

    input_path = args.input if args.input else\
        tool.get_csv_out_path(args.chr, args.dataset)
    output_path = args.output if args.output else\
        tool.get_normalised_out_path(args.chr, args.dataset)

    # Reads the unnormalised output.
    first_entry = 1 if tool.has_headers else 0
    with open(input_path, 'r') as in_fd:
        entries = [line.strip() for line in in_fd.readlines()[first_entry:]]

    # Writes the normalised output.
    with open(output_path, 'w') as out_fd:
        normalised = tool.normalise(entries)
        for record in normalised:
            out_fd.write(record)
Esempio n. 14
0
def main():
    """Produces a features file for models to be trained on.

    For command line help, run with the '-h' flag.

    Writes:
        An output CSV file with the targets, labels and feature representations.
    """
    args = get_args()
    dataset, chrom, _ = common.process_args(args)
    if not dataset: return

    out_file = join_path(
        args.path, consts.DATA_DIR, OUT_FILE % (dataset.name, chrom.num))
    fd_out = open(out_file, 'w')
    handlers = get_column_handlers()

    out_dir = dataset.get_out_path()
    in_file = join_path(out_dir, consts.AGG_OUT_NAME % chrom.num)
    def label_getter(target):
        return dataset.get_value(args.chr, target, dataset.genome_label_idx)
    parse_features(in_file, handlers, fd_out, label_getter)

    fd_out.close()
Esempio n. 15
0
def main():
    """Runs FlashFry.

    For command line help, run with the '-h' flag.
    """
    args = get_args()
    dataset, chrom, tool = common.process_args(args, tool_name=TOOL)
    if not dataset or not tool:
        return

    chr_path = chrom.get_path()
    out_path = tool.get_out_dir(dataset.name)
    bin_path = tool.get_bin_path()

    chr_name = chrom.get_name()
    index_path = os.path.join(out_path, INDEX_NAME % chr_name)
    discover_path = os.path.join(out_path, DISCOVER_NAME % chr_name)
    targets_path = os.path.join(out_path, RAW_OUT_NAME % chr_name)

    cmd_index = [
        "java",
        "-Xmx4g",
        "-jar",
        bin_path,
        "index",
        "--tmpLocation",
        out_path,
        "--database",
        index_path,
        "--reference",
        chr_path,
        "--enzyme",
        "spcas9ngg",
    ]
    cmd_discover = [
        "java",
        "-Xmx4g",
        "-jar",
        bin_path,
        "discover",
        "--database",
        index_path,
        "--fasta",
        chr_path,
        "--output",
        discover_path,
    ]
    cmd_score = [
        "java",
        "-Xmx4g",
        "-jar",
        bin_path,
        "score",
        "--input",
        discover_path,
        "--output",
        targets_path,
        "--scoringMetrics",
        "doench2014ontarget,moreno2015",
        "--database",
        index_path,
    ]

    tool.chdir()
    _ = subprocess.Popen(cmd_index).wait()
    _ = subprocess.Popen(cmd_discover).wait()
    _ = subprocess.Popen(cmd_score).wait()

    tools_common.create_csv(chrom, tool, RAW_OUT_NAME)
    tools_common.normalise(chrom, tool)