Esempio n. 1
0
def scan_normal(s, inputfile, fa, motifs, cutoff, bgfile, nreport, scan_rc,
                pvalue, moods, bed):

    table = False
    if moods:
        result_it = scan_it_moods(inputfile, motifs, cutoff, bgfile, nreport,
                                  scan_rc, pvalue, table)
        for motif, d in result_it:
            for seq_id, matches in d.items():
                for pos, score, strand in matches:
                    yield format_line(fa[seq_id],
                                      seq_id,
                                      motif,
                                      score,
                                      pos,
                                      strand,
                                      bed=bed)
    else:
        result_it = s.scan(fa, nreport, scan_rc)
        for i, result in enumerate(result_it):
            seq_id = fa.ids[i]
            seq = fa[seq_id]
            for motif, matches in zip(motifs, result):
                for (score, pos, strand) in matches:
                    yield format_line(seq,
                                      seq_id,
                                      motif,
                                      score,
                                      pos,
                                      strand,
                                      bed=bed)
Esempio n. 2
0
def scan_table(s, inputfile, fa, motifs, cutoff, bgfile, nreport, scan_rc,
               pvalue, moods):
    # header
    yield "\t{}".format("\t".join([m.id for m in motifs]))
    table = True
    if moods:
        result_it = scan_it_moods(inputfile, motifs, cutoff, bgfile, nreport,
                                  scan_rc, pvalue, table)
        for seq_id, counts in result_it:
            yield "{}\t{}".format(seq_id, "\t".join([str(x) for x in counts]))
    else:
        # get iterator
        result_it = s.count(fa, nreport, scan_rc)
        # counts table
        for i, counts in enumerate(result_it):
            yield "{}\t{}".format(fa.ids[i],
                                  "\t".join([str(x) for x in counts]))
Esempio n. 3
0
def scan_table(s, inputfile, fa, motifs, cutoff, bgfile, nreport, scan_rc, pvalue, moods):
    # header
    yield "\t{}".format("\t".join([m.id for m in motifs]))
    table = True
    if moods:
        result_it = scan_it_moods(inputfile, motifs, cutoff, bgfile,  nreport, scan_rc, pvalue, table)
        for seq_id, counts in result_it:
            yield "{}\t{}".format(seq_id, "\t".join([str(x) for x in counts]))
    else:
        # get iterator
        result_it = s.count(fa, nreport, scan_rc)
        # counts table
        for i, counts in enumerate(result_it):
            yield "{}\t{}".format(
                        fa.ids[i], 
                        "\t".join([str(x) for x in counts])
                        )
Esempio n. 4
0
def scan_normal(s, inputfile, fa, motifs, cutoff, bgfile, nreport, scan_rc, pvalue, moods, bed, normalize):
    
    table = False
    if moods:
        result_it = scan_it_moods(inputfile, motifs, cutoff, bgfile, nreport, scan_rc, pvalue, table)
        for motif, d in result_it:
            for seq_id,matches in d.items():
                for pos,score,strand in matches:
                    yield format_line(fa[seq_id], seq_id, motif,
                            score, pos, strand, bed=bed)
    else:
        result_it = s.scan(fa, nreport, scan_rc, normalize)
        for i, result in enumerate(result_it):
            seq_id = fa.ids[i]
            seq = fa[seq_id]
            for motif, matches in zip(motifs, result):
                for (score, pos, strand) in matches:
                    yield format_line(seq, seq_id, motif, 
                               score, pos, strand, bed=bed)
Esempio n. 5
0
def command_scan(inputfile, pwmfile, nreport=1, cutoff=0.9, bed=False, 
        scan_rc=True, table=False, score_table=False, moods=False, 
        pvalue=None, bgfile=None, genome=None):
    motifs = pwmfile_to_motifs(pwmfile)
    
    index_dir = None
    if genome is not None:
        index_dir = os.path.join(MotifConfig().get_index_dir(), genome) 
    
    # initialize scanner
    s = Scanner()
    s.set_motifs(pwmfile)
    
    fa = as_fasta(inputfile, index_dir)
    
    if moods:
        result_it = scan_it_moods(inputfile, motifs, cutoff, bgfile, nreport, scan_rc, pvalue, table)
    else:
        result_it = s.scan(fa, nreport, scan_rc, cutoff)

    
    if table:
        # header
        yield "\t{}".format("\t".join([m.id for m in motifs]))
        
        if moods:
            result_it = scan_it_moods(inputfile, motifs, cutoff, bgfile,  nreport, scan_rc, pvalue, table)
            for seq_id, counts in result_it:
                yield "{}\t{}".format(seq_id, "\t".join([str(x) for x in counts]))
        else:
            # get iterator
            result_it = s.count(fa, nreport, scan_rc, cutoff)
            # counts table
            for i, counts in enumerate(result_it):
                yield "{}\t{}".format(
                        fa.ids[i], 
                        "\t".join([str(x) for x in counts])
                        )

    elif score_table:
        # get iterator
        result_it = s.best_score(fa, scan_rc)
        # header
        yield "\t{}".format("\t".join([m.id for m in motifs]))
        # score table
        for i,scores in enumerate(result_it):
            yield "{}\t{}".format(
                    fa.ids[i], 
                    "\t".join([str(x) for x in scores])
                    )

    else:
        if moods:
            for motif, d in result_it:
                for seq_id,matches in d.items():
                    for pos,score,strand in matches:
                        yield format_line(fa, seq_id, motif,
                                score, pos, strand, bed=bed)
        else:
            for i, result in enumerate(result_it):
                seq_id = fa.ids[i]
                for motif, matches in zip(motifs, result):
                    for (score, pos, strand) in matches:
                        yield format_line(fa, seq_id, motif, 
                                   score, pos, strand, bed=bed)