Example #1
0
def main():
    from TAMO import MotifMetrics
    if len(sys.argv) < 2:
        print "Usage: %s <fasta_file>"%(re.sub('^.*/','',sys.argv[0]))
        print '       [-w width (10)]    Model Width (note AlignACE allows gaps)'
        print '       [-iter    (10)]    Number of times to run AlignACE '
        print '       [-genome  fsafile] Genome (for computing background)'
        print '       [-gcback  (.38)    GC background (use 0.44 for human, 0.38 for yeast)]'
        sys.exit(1)

    print "#" + ' '.join([x.replace(' ','\ ') for x in sys.argv])

    fastafile = sys.argv[1]
    width     = 10
    valid_tfs = []
    iter      = 10
    genome    = 'YEAST'
    gcback    = 0.38
    
    for tok,i in zip(sys.argv,range(len(sys.argv))):
        if   tok == '-w'     : width = int(sys.argv[i+1])
        elif tok == '-valid' : valid_tfs.append(sys.argv[i+1])
        elif tok == '-iter'  : iter  = int(sys.argv[i+1])
        elif tok == '-gcback': gcback = float(sys.argv[i+1])
        elif tok == '-genome' :
            genome = sys.argv[i+1]
        elif tok == '-H250'  :
            genome = 'HUMAN_250'
            gcback = 0.44
        elif tok == '-Ch22'  :
            genome = 'Ch22'
            gcback = 0.44

    theMeta = MetaAce(fastafile,width,iter,gcback)

    Genome  = MotifMetrics.ProbeSet(genome)
    ids     = Genome.ids_from_file(fastafile)
    ids     = Genome.filter(ids)  #Only uses IDs that are actually in the Genome file

    motifs  = []
    motifs.extend(theMeta.results)

    for motif in motifs:
        motif.pvalue = Genome.p_value(motif,ids,factor=0.7)
        motif.church = Genome.church(motif,ids)
        for valid_tf in valid_tfs:
            motif.valid = Validate.validate(motif,valid_tf,'','Want Tuple')

    motifs.sort(lambda x,y: cmp(x.church,y.church))
    print_motifs(motifs,kmer_count=-1)
Example #2
0
def main():
    short_opts = 'f:'
    long_opts = ['genome=', 'range=', 'top=', 'pcnt=', 'bgfile=']
    try:
        opts, args = getopt.getopt(sys.argv[1:], short_opts, long_opts)
    except getopt.GetoptError:
        print getopt.GetoptError.__dict__
        usage()
    if not opts: usage()

    fastafile = ''
    top_count = 10
    top_pcnt = None
    genome = 'YEAST'
    w_start = 8
    w_stop = 15
    bgfile = MDSCAN_DIR + 'yeast_int.bg'
    for opt, value in opts:
        if opt == '-f': fastafile = value
        if opt == '--genome': genome = value
        if opt == '--top': top_count = int(value)
        if opt == '--pcnt': top_pcnt = float(value)
        if opt == '--range':
            w_start, w_stop = [int(x) for x in value.split(',')]

    print "#" + ' '.join(sys.argv)
    probeids = Fasta.keys(fastafile)
    Genome = MotifMetrics.ProbeSet(genome)

    probeids = Genome.filter(probeids)

    if top_pcnt:
        top_count = max(top_count, int(top_pcnt / 100.0 * len(probeids)))

    theMeta = metaMDscan(fastafile, w_start, w_stop, top_count)

    for m in theMeta.motifs:
        m.pvalue = Genome.p_value(m, probeids, 'v')
        m.church = Genome.church(m, probeids, 'v')
        sys.stdout.flush()

    theMeta.motifs.sort(lambda x, y: cmp(x.pvalue, y.pvalue))
    print_motifs(theMeta.motifs)
Example #3
0
def main():
    short_opts = 'f:'
    long_opts  = ['genome=', 'range=', 'top=', 'pcnt=', 'bgfile=']
    try:   opts, args = getopt.getopt(sys.argv[1:], short_opts, long_opts)
    except getopt.GetoptError:
        print getopt.GetoptError.__dict__
        usage()
    if not opts: usage()

    fastafile = ''
    top_count = 10
    top_pcnt  = None
    genome    = 'YEAST'
    w_start   = 8
    w_stop    = 15
    bgfile    = MDSCAN_DIR + 'yeast_int.bg'
    for opt,value in opts:
        if opt == '-f':         fastafile = value
        if opt == '--genome':   genome    = value
        if opt == '--top':      top_count = int(value)
        if opt == '--pcnt':     top_pcnt  = float(value)
        if opt == '--range':    w_start,w_stop= [int(x) for x in value.split(',')]

    print "#" + ' '.join(sys.argv)
    probeids = Fasta.keys(fastafile)
    Genome = MotifMetrics.ProbeSet(genome)

    probeids = Genome.filter(probeids)

    if top_pcnt: top_count = max(top_count,int(top_pcnt/100.0 * len(probeids)))

    theMeta = metaMDscan(fastafile,w_start,w_stop,top_count)

    for m in theMeta.motifs:
        m.pvalue = Genome.p_value(m,probeids,'v')
        m.church = Genome.church(m,probeids,'v')
        sys.stdout.flush()

    theMeta.motifs.sort(lambda x,y: cmp(x.pvalue,y.pvalue))
    print_motifs(theMeta.motifs)