Esempio n. 1
0
def is_tRNA(clus_obj, out_dir, args):
    """
    Iterates through cluster precursors to predict sRNA types
    """
    ref = os.path.abspath(args.reference)
    utils.safe_dirs(out_dir)
    for nc in clus_obj[0]:
        c = clus_obj[0][nc]
        loci = c['loci']
        out_fa = "cluster_" + nc
        if loci[0][3] - loci[0][2] < 500:
            with make_temp_directory() as tmpdir:
                os.chdir(tmpdir)
                get_loci_fasta({loci[0][0]: [loci[0][0:5]]}, out_fa, ref)
                summary_file, str_file = _run_tRNA_scan(out_fa)
                if "predictions" not in c:
                    c['predictions'] = {}
                c['predictions']['tRNA'] = _read_tRNA_scan(summary_file)
                score = _read_tRNA_scan(summary_file)
                logger.debug(score)
                shutil.move(summary_file, op.join(out_dir, summary_file))
                shutil.move(str_file, op.join(out_dir, str_file))
        else:
            c['errors'].add("precursor too long")
        clus_obj[0][nc] = c

    return clus_obj
Esempio n. 2
0
def make_predictions(clus_obj, out_dir, args):
    """
    Iterates through cluster precursors to predict sRNA types
    """
    ref = os.path.abspath(args.reference)
    utils.safe_dirs(out_dir)
    for nc in clus_obj[0]:
        c = clus_obj[0][nc]
        loci = c['loci']
        out_fa = "cluster_" + nc
        if loci[0][3] - loci[0][2] < 500:
            with make_temp_directory() as tmpdir:
                os.chdir(tmpdir)
                get_loci_fasta({loci[0][0]: [loci[0][0:5]]}, out_fa, ref)
                summary_file, str_file = _run_tRNA_scan(out_fa)
                if "predictions" not in c:
                    c['predictions'] = {}
                c['predictions']['tRNA'] = _read_tRNA_scan(summary_file)
                score = _read_tRNA_scan(summary_file)
                logger.debug(score)
                shutil.move(summary_file, op.join(out_dir, summary_file))
                shutil.move(str_file, op.join(out_dir, str_file))
        else:
            c['errors'].add("precursor too long")
        clus_obj[0][nc] = c

    return clus_obj
Esempio n. 3
0
def make_profile(data, out_dir, args):
    """
    Make data report for each cluster
    """
    safe_dirs(out_dir)
    main_table = []
    header = ['id', 'ann']
    n = len(data[0])
    bar = ProgressBar(maxval=n).start()
    bar.update(0)
    for itern, c in enumerate(data[0]):
        bar.update(itern)
        logger.debug("creating cluser: {}".format(c))
        safe_dirs(os.path.join(out_dir, c))
        valid, ann, pos_structure = _single_cluster(c, data, os.path.join(out_dir, c, "maps.tsv"), args)
        data[0][c].update({'profile': pos_structure})
        loci = data[0][c]['loci']
        logger.debug("precursor_sequence")
        data[0][c]['precursor'] = {"seq": precursor_sequence(loci[0][0:5], args.ref)}
        logger.debug("parse alignments")
        data[0][c]['precursor']["colors"] = list(_parse(data[0][c]['profile'], data[0][c]['precursor']["seq"]))
        logger.debug("update rnafold")
        data[0][c]['precursor'].update(run_rnafold(data[0][c]['precursor']['seq']))

    return data
Esempio n. 4
0
def make_profile(data, out_dir, args):
    """
    Make data report for each cluster
    """
    safe_dirs(out_dir)
    main_table = []
    header = ['id', 'ann']
    n = len(data[0])
    bar = ProgressBar(maxval=n)
    bar.start()
    bar.update(0)
    for itern, c in enumerate(data[0]):
        bar.update(itern)
        logger.debug("creating cluser: {}".format(c))
        safe_dirs(os.path.join(out_dir, c))
        valid, ann, pos_structure = _single_cluster(
            c, data, os.path.join(out_dir, c, "maps.tsv"), args)
        data[0][c].update({'profile': pos_structure})
        loci = data[0][c]['loci']
        data[0][c]['precursor'] = {
            "seq": precursor_sequence(loci[0][0:5], args.ref)
        }
        data[0][c]['precursor']["colors"] = _parse(
            data[0][c]['profile'], data[0][c]['precursor']["seq"])
        data[0][c]['precursor'].update(
            run_rnafold(data[0][c]['precursor']['seq']))

    return data
Esempio n. 5
0
def run_coral(clus_obj, out_dir, args):
    """
    Run some CoRaL modules to predict small RNA function
    """
    if not args.bed:
        raise ValueError("This module needs the bed file output from cluster subcmd.")
    workdir = op.abspath(op.join(args.out, 'coral'))
    safe_dirs(workdir)
    bam_in = op.abspath(args.bam)
    bed_in = op.abspath(args.bed)
    reference = op.abspath(args.ref)
    with chdir(workdir):
        bam_clean = coral.prepare_bam(bam_in, bed_in)
        out_dir = op.join(workdir, "regions")
        safe_dirs(out_dir)
        prefix = "seqcluster"
        loci_file = coral.detect_regions(bam_clean, bed_in, out_dir, prefix)
        coral.create_features(bam_clean, loci_file, reference, out_dir)
Esempio n. 6
0
def run_coral(clus_obj, out_dir, args):
    """
    Run some CoRaL modules to predict small RNA function
    """
    if not args.bed:
        raise ValueError(
            "This module needs the bed file output from cluster subcmd.")
    workdir = op.abspath(op.join(args.out, 'coral'))
    safe_dirs(workdir)
    bam_in = op.abspath(args.bam)
    bed_in = op.abspath(args.bed)
    reference = op.abspath(args.ref)
    with chdir(workdir):
        bam_clean = coral.prepare_bam(bam_in, bed_in)
        out_dir = op.join(workdir, "regions")
        safe_dirs(out_dir)
        prefix = "seqcluster"
        loci_file = coral.detect_regions(bam_clean, bed_in, out_dir, prefix)
        coral.create_features(bam_clean, loci_file, reference, out_dir)
Esempio n. 7
0
def predictions(args):
    """
    Create predictions of clusters
    """

    logger.info(args)
    logger.info("Reading sequences")
    out_file = os.path.abspath(os.path.splitext(args.json)[0] + "_prediction.json")
    data = load_data(args.json)
    out_dir = os.path.abspath(safe_dirs(os.path.join(args.out, "predictions")))

    logger.info("Make predictions")
    data = is_tRNA(data, out_dir, args)

    if args.coral:
        logger.info("Make CoRaL predictions")
        run_coral(data, out_dir, args)
    write_data(data[0], out_file)
    logger.info("Done")
Esempio n. 8
0
def predictions(args):
    """
    Create predictions of clusters
    """

    logger.info(args)
    logger.info("reading sequeces")
    out_file = os.path.abspath(os.path.splitext(args.json)[0] + "_prediction.json")
    data = load_data(args.json)
    out_dir = os.path.abspath(safe_dirs(os.path.join(args.out, "predictions")))

    logger.info("make predictions")
    data = is_tRNA(data, out_dir, args)

    if args.coral:
        logger.info("make CoRaL predictions")
        run_coral(data, out_dir, args)
    write_data(data[0], out_file)
    logger.info("Done")
Esempio n. 9
0
def make_database(data, name="seqcluster.db", out_dir="database"):
    out_dir = safe_dirs(out_dir)
    op.abspath(out_dir)
    con = _create_db(op.join(out_dir, name))
    _insert_data(con, data)
    _close(con)
Esempio n. 10
0
def make_database(data, name="seqcluster.db", out_dir="database"):
    out_dir = safe_dirs(out_dir)
    op.abspath(out_dir)
    con = _create_db(op.join(out_dir, name))
    _insert_data(con, data)
    _close(con)