Exemple #1
def tamo2tf(TAMO_file):
    '''Converts TAMO files to the TRANSFAC format

    ml = MotifTools.txt2motifs(TAMO_file)
    TAMO_file_name = TAMO_file.split("/")[-1]
    ACGT = ["A", "C", "G", "T"]
    n = 1
    oup = open("%s.tf" % (TAMO_file), "w")
    for m in ml:
        if m.source == "":
            oup.write("DE\t%s_%s\t%s_%s\n" %
                      (TAMO_file_name, n, TAMO_file_name, n))
            oup.write("DE\t%s\t%s\n" % (m.source, m.source))
        count = 0
        #print m.source
        for i in range(m.width):
            oup.write("%s\t" % count)
            for letter in ACGT:
                if m.logP:
                    Pij = pow(2.0, m.logP[i][letter])
                    oup.write("%s\t" % int(Pij * 100))
            count += 1
        n += 1
Exemple #2
def parse_opts():
    global GLOBALS
    global DFUNC, DMAX
    short_opts = 'm:'
    long_opts  = ['dfunc:']
    try:   opts, args = getopt.getopt(sys.argv[1:], short_opts, long_opts)
    except getopt.GetoptError:
        print getopt.GetoptError.__dict__
    if not opts: usage()

    GLOBALS['args'] = args
    GLOBALS['motifs'] = []
    DFUNCtxt = None
    for opt,value in opts:
        if opt == '-m':                   GLOBALS['motifs'] = MotifTools.txt2motifs(value)
        if opt == '--dfunc':              DFUNCtxt = value
        if opt == '-d':                   DMAX     = float(value)

    # Deal with DFUNC and DMAX
    if DFUNCtxt == 'NCB':
        _DFUNC = MotifCompare.negcommonbits
    elif DFUNCtxt:
            exec ("_DFUNC = MotifCompare.%s"%DFUNCtxt)
            usage("No such distance metric: %s"%DFUNCtxt)
    if _DFUNC:  set_dfunc(_DFUNC,DMAX)
Exemple #3
def pick_chunk_score(wdir, TAMO_file, target, genome):
    '''Trims and returns the top motif in a cluster.
    This script takes in the TAMO file from the motifs in a single cluster. It
    trims the low-information ends from each motifs. It then indentifies the
    motif that is most significantly represented in the target genes in your
    genome. If no motif is significantly represented, then a blank top motif
    file is created.
    os.system("cd %s" % wdir)

    script_dir = '/'.join(os.path.abspath(__file__).split('/')
                          [:-1])  # path to pcc_merge_CC.py script

    # step 1 trim tamo to eliminate low information flanking sequence
    trim_motif(TAMO_file, 0.1)

    # step 2 Group Specificity Score" from the Church lab
    # python MotifMetrics.py [Genes of interest] -genome [FASTA of promoter sequence] -t [Trimmed TAMO of cluster motifs]
    # MotifMetrics.py checks if the motifs appear disproportionatly to the
    # targets compared to the rest of the genes.
        "python %s/MotifMetrics.py %s -genome %s -t %s_0.1.trim -spec > %s_0.1.trim_Cout"
        % (script_dir, target, genome, TAMO_file, TAMO_file))

    # Gets the motif that is most significantly represented in your target genes
    # Returns "None" if none of the motifs has a p-value above 0.001.
    topm = parse_out_pcs("%s_0.1.trim_Cout" % TAMO_file)
    print "topm", topm

    # Writes the top motif to its own directory.
    if topm != "None":

        newdic = {}
        ml = MotifTools.txt2motifs("%s_0.1.trim" % TAMO_file)

        for m in ml:

            if m.oneletter == topm:
                newdic[m.oneletter] = m

        save_motifs(newdic.values(), "%s.TOP" % TAMO_file)
        os.system("rm %s_0.1.trim" % TAMO_file)
        os.system("rm %s_0.1.trim_Cout" % TAMO_file)

    # Writes a blank document if there was no top motif.
        oup = open("%s.TOP" % TAMO_file, "w")
Exemple #4
def TAMO_split(TAMO_file, motifs_per_file=190):
    '''This function splits a TAMO into smaller files for create_cc'''
    ml = MotifTools.txt2motifs(TAMO_file)
    total = len(ml) / int(motifs_per_file)  # Total number of TAMOs to generate
    by = motifs_per_file
    for i in range(total):
        print i
        print i * by + by, TAMO_file + '_n%s' % i
        save_motifs(ml[i * by:i * by + by], TAMO_file + '_n%s' % i)
    print total * by, len(ml), TAMO_file + '_n%s' % (total)
    save_motifs(ml[total * by:len(ml)], TAMO_file + '_n%s' % (total))
    return (total)
Exemple #5
def combine_distance_matrix_for_2(wdir, TAMO_file_1, TAMO_file_2):
    '''Combines matricies made from two TAMO files.
    This script is used to create the final matrix after all jobs from 
    create_cc_for_2 are complete.

    ml_1 = MotifTools.txt2motifs(TAMO_file_1)
    ml_2 = MotifTools.txt2motifs(TAMO_file_2)

    n_split_1 = len(ml_1) / 100
    n_split_2 = len(ml_2) / 100

    print n_split_1, len(ml_1)
    print n_split_2

    # Change to the working directory.
    os.system("cd %s" % wdir)

    # This loop will paste together matricies
    for i in range(n_split_1 + 1):
        com = "paste "
        for j in range(n_split_2 + 1):
            com += "%s_n%s-%s_n%s.dm " % (TAMO_file_1, i, TAMO_file_2, j)
        com += "> distance_%s" % i
        print com

    com = "cat "
    for i in range(n_split_1 + 1):
        com += "distance_%s " % i
    com += "> %s-%s.dm" % (TAMO_file_1, TAMO_file_2)

    print com
Exemple #6
def parse_opts():
    global GLOBALS
    short_opts = 'm:g:'
    long_opts  = ['genome=','top=']
    try:   opts, args = getopt.getopt(sys.argv[1:], short_opts, long_opts)
    except getopt.GetoptError:
        print getopt.GetoptError.__dict__
    if not opts: usage()

    GLOBALS['args'] = args
    for opt,value in opts:
        if opt == '-m':                GLOBALS['motifs']     = MotifTools.txt2motifs(value)
        if opt in ['-g', '--genome']:  GLOBALS['genomefile'] = value
        if opt == '--top':             GLOBALS['top']        = int(value)
Exemple #7
def main():
        opts, args = getopt.getopt(sys.argv[1:], "f:m:n:L:t:a:S:i:", ["help", "output="])  # AD added 'i'
    except getopt.GetoptError:
    if not opts:

    print "#" + ' '.join(sys.argv)
    fastafile, motiffile, motifnums, labels, thresh = (None, None, [], None, 0.75) # AD changed thresh val to 0.75 from 0.7
    ambigs = []

    scale   = 50.0 / 1000.0
    motifs = []
    for opt, value in opts:
        #print opt, value
        if   opt ==  '-f':  fastafile = value
        elif opt ==  '-m':  motifs.extend(MotifTools.txt2motifs(value))
        elif opt ==  '-n':  motifnums = [int(x) for x in value.split(',')]
        elif opt ==  '-L':  labels    = list(value)
        elif opt ==  '-t':  thresh    = float(value)
        elif opt ==  '-a':  ambigs.extend(value.split(','))
        elif opt ==  '-S':  scale     = float(value)
        elif opt ==  '-i':  motiffile = value  # AD added this option to ACTUALLY supply the tamo motif file at the command-line.  The code to deal with motiffiles already existed. There was just no code for User to supply one.
    probes = Fasta.load(fastafile)
    if motiffile:
        for f in motiffile.split(','):      # AD added this to allow supplying multiple tamo files at the prompt like you can supply multiple motifs
    if ambigs:
        for ambig in ambigs:
            motifs.append( MotifTools.Motif_from_text(ambig,0.1) )
    if not motifnums:  motifnums = range(len(motifs))
    print '# %d: %s'%(len(motifs),motifnums)
    for i in range(len(motifnums)):
        motif = motifs[motifnums[i]]
        if labels and i < len(labels):
            txt = labels[i]
            txt = '%d'%i
        print '%-3s : %s %5.2f (%4.2f)'%(txt,motif,thresh*motif.maxscore,thresh)

    probehits = {}
    for key in probes.keys():
        hits_by_motif = []
        save_flag     = 0
        if re.search('[BDHU]',probes[key]): continue
        for num in motifnums:
            result = motifs[num].scan(probes[key],thresh*motif.maxscore)
            if result[0]:
                save_flag = 1
        if save_flag:

    #scale   = .1
    maxw = 40
    for key in probehits.keys():
        l       = len(probes[key])
        a       = list('-'* int(scale*l) )
        a.extend( list(' '*10 ) )
        desc    = []
        matches = probehits[key]
        for i in range(len(matches)):
            if matches[i]:
                subseqs,endpoints,scores = matches[i]
                for idx in range(len(subseqs)):
                    start,stop = endpoints[idx]
                    subseq     = subseqs[idx]
                    score      = scores[idx]
                    if labels and (i<len(labels)): ID = labels[i]
                    else                         : ID = '%d'%i
                    desc.append('%s %s %d-%d %4.2f '%(ID,subseq,start,stop,score))
                    start = int(start*scale)
                    for offset in range(10):
                        if a[start+offset] == '-':
                            if labels and (i < len(labels)):
                                a[start+offset] = labels[i]
                                a[start+offset] = '%d'%i
        print '%-14s %s'%(key,''.join(a)),
        print ' '*max(0,maxw-len(a)), '| '.join(['%-27s'%x for x in desc])
    print "Found matches in %d of %d input probes"%(len(probehits),len(probes))
Exemple #8
def merge_runs_cc(TAMO_file, wdir, height, distance, ancestor, target, genome):
    '''This script is used to merge motifs with the PCC matrix of all motifs.
    The script was originally written by Cheng Zou, and then converted to a 
    function by Alex Seddon.

    print "Here are the parameters you specified in this run "
    print "-tamo        %s" % TAMO_file
    print "-wdir        %s" % wdir
    print "-h        height to cut the tree, %s" % height
    print "-ancestor    %s" % ancestor
    print "-target    %s" % target
    print "-genome    %s" % genome

    if TAMO_file == '' or wdir == '':

    os.system("cd %s" % wdir)


    # Get the directory where the script is located.
    script_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1])

    # This code was in the original clustering script. It has been taken out
    # because the processes involved take too long and have been taken up by
    # the matrrix creation scripts and the run_UPGMA script.
    #if distance==0:
    #    os.system("python /mnt/home/seddonal/gil scottscripts/5_motif_merging/3.calculate_distance_matrix.py   -i %s --dfunc pccrange" % TAMO_file)
    #os.system("R --vanilla --slave --args %s.dm  %s< /mnt/home/seddonal/scripts/5_motif_merging/UPGMA_final.R> %s.Rout" % (TAMO_file,height,TAMO_file))

    cl_dic = {}
    n = 0

    # The file, TAMO_file.dm_UPGMA_Cl_0.05, is inorder of the motifs that appear
    # in the TAMO_file. If two motifs have the same number, they are considered
    # a part of the same cluster.
    # This loop pulls the clustering information out of this file and creats
    # the dictionary cl_dic = {cluster_index:{motif_index:'1'}}
    for line in open("%s.dm_UPGMA_Cl_%s" % (TAMO_file, height), "r"):

        # Gets the clusterindex of this motif
        cl = line.strip()

        # Adds the cluster index if it has not been
        if not cl_dic.has_key(cl):
            cl_dic[cl] = {}

        cl_dic[cl][n] = "1"  # Adds the motif to that cluster
        n += 1  # Increases the motif index for the next motif

    #print cl_dic

    ml = MotifTools.txt2motifs(TAMO_file)
    old = []  # List of motifs that are the sole members of a cluster.

    # I think I can divide up this portion of the code to create a series
    print ancestor, ancestor == 0

    cc_output = open('merge_runs_cc', 'w')

    if ancestor == 0:

        # This loop Looks at each cluster and attempts to merge the motifs
        # in the cluster if there are multiple motifs.
        for i in cl_dic.keys():

            print i, cl_dic[i]

            # If there are multiple motifs in the cluster, it merges the motifs
            if len(cl_dic[i]) > 1:

                # Adds all of the motifs in the cluster to an object called
                # mlist.
                mlist = []
                for j in cl_dic[i]:

                # Saves these motifs to there own TAMO file.
                save_motifs(mlist, "%s_sub_%s.tm" % (TAMO_file, i))

                    'module load TAMO; python %s/pcc_merge_CC.py merge_runs_no_ancestor -t %s/%s -i %s -target %s -genome %s\n'
                    % (script_dir, wdir, TAMO_file, i, target, genome))

            # If there is only one motif in the cluster, it leaves it alone,
            # And adds it to old
                key = cl_dic[i].keys()[0]

    if ancestor == 1:

        # This loop Looks at each cluster and attempts to merge the motifs
        # in the cluster if there are multiple motifs.
        for i in cl_dic.keys():

            print i, cl_dic[i]

            # If there are multiple motifs in the cluster, it merges the motifs
            if len(cl_dic[i]) > 1:

                # Adds all of the motifs in the cluster to an object called
                # mlist.
                mlist = []
                for j in cl_dic[i]:

                # Saves these motifs to there own TAMO file.
                save_motifs(mlist, "%s_sub_%s.tm" % (TAMO_file, i))

                    'module load TAMO; module load STAMPmotif; python %s/pcc_merge_CC.py merge_runs_ancestor -t %s/%s -i %s -target %s -genome %s\n'
                    % (script_dir, wdir, TAMO_file, i, target, genome))

                key = cl_dic[i].keys()[0]

    # Combine together the motifs that are in there own cluster.
    #os.system("cat %s_sub_*_sum.tm.tf.tm.TOP > %s_sub_new.tm" % (TAMO_file,TAMO_file))
    save_motifs(old, "%s_sub_old.tm" % (TAMO_file))
Exemple #9
def merge_runs(TAMO_file, wdir, height, distance, ancestor, target, genome):
    '''This script is used to merge motifs with the PCC matrix of all motifs.
    The script was originally written by Cheng Zou, and then converted to a 
    function by Alex Seddon.

    print "Here are the parameters you specified in this run "
    print "-tamo        %s" % TAMO_file
    print "-wdir        %s" % wdir
    print "-h        height to cut the tree, %s" % height
    print "-distance    %s" % distance
    print "-ancestor    %s" % ancestor
    print "-target    %s" % target
    print "-genome    %s" % genome

    if TAMO_file == '' or wdir == '':

    os.system("cd %s" % wdir)


    # This code was in the original clustering script. It has been taken out
    # because the processes involved take too long and have been replaced by
    # the matrix creation scripts and the run_UPGMA script.
    #if distance==0:
    #    os.system("python /mnt/home/seddonal/scripts/5_motif_merging/3.calculate_distance_matrix.py   -i %s --dfunc pccrange" % TAMO_file)
    #os.system("R --vanilla --slave --args %s.dm  %s< /mnt/home/seddonal/scripts/5_motif_merging/UPGMA_final.R> %s.Rout" % (TAMO_file,height,TAMO_file))

    cl_dic = {}
    n = 0

    # The file, TAMO_file.dm_UPGMA_Cl_0.05, is inorder of the motifs that appear
    # in the TAMO_file. If two motifs have the same number, they are considered
    # a part of the same cluster.
    # This loop pulls the clustering information out of this file and creats
    # the dictionary cl_dic = {cluster_index:{motif_index:'1'}}
    for line in open("%s.dm_UPGMA_Cl_%s" % (TAMO_file, height), "r"):

        # Gets the clusterindex of this motif
        cl = line.strip()

        # Adds the cluster index if it has not been
        if not cl_dic.has_key(cl):
            cl_dic[cl] = {}

        cl_dic[cl][n] = "1"  # Adds the motif to that cluster
        n += 1  # Increases the motif index for the next motif

    #print cl_dic

    ml = MotifTools.txt2motifs(TAMO_file)
    old = []  # List of motifs that are the sole members of a cluster.

    # I think I can divide up this portion of the code to create a series
    print ancestor, ancestor == 0
    if ancestor == 0:

        # This loop Looks at each cluster and attempts to merge the motifs
        # in the cluster if there are multiple motifs.
        for i in cl_dic.keys():

            print i, cl_dic[i]

            # If there are multiple motifs in the cluster, it merges the motifs
            if len(cl_dic[i]) > 1:

                # Adds all of the motifs in the cluster to an object called
                # mlist.
                mlist = []
                for j in cl_dic[i]:

                # Saves these motifs to there own TAMO file.
                save_motifs(mlist, "%s_sub_%s.tm" % (TAMO_file, i))

                # I am fairly certain that this process of converting to TF and
                # then returning it to TAMO format is only for keeping the names
                # consistent. I need to verify this suspicion
                tamo2tf("%s_sub_%s.tm" % (TAMO_file, i))
                os.system("cat  %s_sub_%s.tm.tf > %s_sub_%s_sum.tm.tf" %
                          (TAMO_file, i, TAMO_file, i))
                tf2tamo("%s_sub_%s_sum.tm.tf" % (TAMO_file, i))

                # Gets the top motif in the cluster.
                                 '%s_sub_%s_sum.tm.tf.tm' % (TAMO_file, i),
                                 target, genome)

                # Removes the files that were created.
                os.system("rm  %s_sub_%s_sum.tm.tf.tm" % (TAMO_file, i))
                os.system("rm %s_sub_%s_sum.tm.tf" % (TAMO_file, i))
                os.system("rm -R %s_sub_%s.tm.tf_ST*" % (TAMO_file, i))

            # If there is only one motif in the cluster, it leaves it alone,
            # And adds it to old
                key = cl_dic[i].keys()[0]

    if ancestor == 1:

        # This loop Looks at each cluster and attempts to merge the motifs
        # in the cluster if there are multiple motifs.
        for i in cl_dic.keys():

            print i, cl_dic[i]

            # If there are multiple motifs in the cluster, it merges the motifs
            if len(cl_dic[i]) > 1:

                # Adds all of the motifs in the cluster to an object called
                # mlist.
                mlist = []
                for j in cl_dic[i]:

                # Saves these motifs to there own TAMO file.
                save_motifs(mlist, "%s_sub_%s.tm" % (TAMO_file, i))

                # Merges the motifs in the same cluster using STAMP
                tamo2tf("%s_sub_%s.tm" % (TAMO_file, i))

                # Gets the JASPER motifs that best match the motifs from within
                # the cluster.
                    "STAMP -tf  %s_sub_%s.tm.tf  -sd /home/chengzou/bin/STAMP/ScoreDists/JaspRand_PCC_SWU.scores  \
                 -go  1000 -ge 1000 -cc PCC -align SWU -out %s_sub_%s.tm.tf_STout -chp > %s_sub_%s.tm.tf_STout.log"
                    % (TAMO_file, i, TAMO_file, i, TAMO_file, i))
                parse_out_STAMP(TAMO_file, i)

                # combines the JASPER motifs with the cluster motif and then
                # converts them all to one TAMO file
                    "cat  %s_sub_%s.tm.tf %s_sub_%s.tm.tf_SToutFBP.txt.mod %s_sub_%s.tm.tf_STout_tree_clusters.txt > %s_sub_%s_sum.tm.tf"
                    % (TAMO_file, i, TAMO_file, i, TAMO_file, i, TAMO_file, i))
                tf2tamo("%s_sub_%s_sum.tm.tf" % (TAMO_file, i))

                # Gets the top motif within the TAMO file.
                                 '%s_sub_%s_sum.tm.tf.tm' % (TAMO_file, i),
                                 target, genome)

                # Removes any files created in the processing.
                os.system("rm  %s_sub_%s_sum.tm.tf.tm" % (TAMO_file, i))
                os.system("rm %s_sub_%s_sum.tm.tf" % (TAMO_file, i))
                os.system("rm -R %s_sub_%s.tm.tf_ST*" % (TAMO_file, i))
                key = cl_dic[i].keys()[0]

    # Combine together the top motifs from every
    os.system("cat %s_sub_*_sum.tm.tf.tm.TOP > %s_sub_new.tm" %
              (TAMO_file, TAMO_file))
    save_motifs(old, "%s_sub_old.tm" % (TAMO_file))
    os.system("cat %s_sub_old.tm %s_sub_new.tm > %s_P1.tm" %
              (TAMO_file, TAMO_file, TAMO_file))
Exemple #10
def combine_distance_matrix(wdir, TAMO_file):
    '''Combines the PCC score matricies and outputs them as a single matrix.
    Originaly written by Cheng Zou, and converted to a function by Alex Seddon.
    ml = MotifTools.txt2motifs(TAMO_file)
    n_split = len(ml) / 100
    # Change to the working directory.
    os.system("cd %s" % wdir)
    # The following loop keeps counts the number of lines in the each of the
    # PCC matricies for a comparison of a TAMO file with itself.
    lendic = {}  # Dictionary with the length of PCC matricies.
    for i in range(n_split + 1):
        lendic[i] = line_count("%s_n%s.dm" % (TAMO_file, i))
    print lendic
    # This loop creates files with blanks. The files are used to ensure that
    # the PCC-distance matrix is square. The blank files will be created to take
    # the place of files that would have been left blank
    for i in range(n_split + 1):
        for j in range(0, i):
            # open the file to add blanks
            oup = open("%s_n%s-%s_n%s.dm" % (TAMO_file, i, TAMO_file, j), "w")
            print lendic[j], lendic[i]
            list = []
            # Add a number of "-" to the list equal to the number of lines in
            # the self comparison files.
            for y in range(lendic[j]):
            for x in range(lendic[i]):
                oup.write("%s\n" % "\t".join(list))

    # Creates a copy of the self comparison file so that it can be easily picked
    # out by the function.
    for i in range(n_split + 1):
        os.system("cp %s_n%s.dm %s_n%s-%s_n%s.dm" %
                  (TAMO_file, i, TAMO_file, i, TAMO_file, i))

    # This loop will look at each
    for i in range(n_split + 1):
        com = "paste "
        for j in range(n_split + 1):
            com += "%s_n%s-%s_n%s.dm " % (TAMO_file, i, TAMO_file, j)
        com += "> distance_%s" % i
        print com

    com = "cat "
    for i in range(n_split + 1):
        com += "distance_%s " % i
    com += "> %s.dm" % TAMO_file

    print com
    # Concatonate all the matricies
    # My embarisingly ad hoc way of removing double tabs
    remove_double_tabs("%s.dm" % TAMO_file)
    threshold = math.pow(10, -float(sys.argv[2]))
    maxthreshold = float(sys.argv[3])  # for strong score, using 0.9*max score
    ATbias = float(sys.argv[4])  # 0.33
    GCbias = float(sys.argv[5])  # 0.17
    seq_file = sys.argv[6]  # FASTA file of the sequence
    tar_dir = ""  # Target directory for the output file

    for i in range(1, len(sys.argv)):
        if sys.argv[i] == "-d":
            tar_dir = sys.argv[i + 1].rstrip("/")
    print tar_dir
    ml = MotifTools.txt2motifs(file)
    n = 0
    new_list = []

    # Looks at each motif from the TAMO file. Uses the find function from
    # motility to find the sequences with that motif.
    for Ikey in range(len(ml)):
        #print m.ll

        time1 = time.time()

        m = ml[Ikey]  # Pull out the motif from the motif list.
        save_motifs([m], file + '_' + str(Ikey))  # Save the motif as a file.

Exemple #12
def main():
        opts, args = getopt.getopt(sys.argv[1:], "f:m:n:L:t:a:S:", ["help", "output="])
    except getopt.GetoptError:
    if not opts:

    print "#" + ' '.join(sys.argv)
    fastafile, motiffile, motifnums, labels, thresh = (None, None, [], None, 0.7)
    ambigs = []

    scale   = 50.0 / 1000.0
    motifs = []
    for opt, value in opts:
        #print opt, value
        if   opt == '-f':  fastafile = value
        elif opt == '-m':  motifs.extend(MotifTools.txt2motifs(value))
        elif opt == '-n':  motifnums = [int(x) for x in value.split(',')]
        elif opt == '-L':  labels    = list(value)
        elif opt == '-t':  thresh    = float(value)
        elif opt == '-a':  ambigs.extend(value.split(','))
        elif opt == '-S':  scale     = float(value)
    probes = Fasta.load(fastafile)
    if motiffile:
    if ambigs:
        for ambig in ambigs:
            motifs.append( MotifTools.Motif_from_text(ambig,0.1) )
    if not motifnums:  motifnums = range(len(motifs))
    print '# %d: %s'%(len(motifs),motifnums)
    for i in range(len(motifnums)):
        motif = motifs[motifnums[i]]
        if labels and i < len(labels):
            txt = labels[i]
            txt = '%d'%i
        print '%-3s : %s %5.2f (%4.2f)'%(txt,motif,thresh*motif.maxscore,thresh)

    probehits = {}
    for key in probes.keys():
        hits_by_motif = []
        save_flag     = 0
        if re.search('[BDHU]',probes[key]): continue
        for num in motifnums:
            result = motifs[num].scan(probes[key],thresh*motif.maxscore)
            if result[0]:
                save_flag = 1
        if save_flag:

    #scale   = .1
    maxw = 40
    for key in probehits.keys():
        l       = len(probes[key])
        a       = list('-'* int(scale*l) )
        a.extend( list(' '*10 ) )
        desc    = []
        matches = probehits[key]
        for i in range(len(matches)):
            if matches[i]:
                subseqs,endpoints,scores = matches[i]
                for idx in range(len(subseqs)):
                    start,stop = endpoints[idx]
                    subseq     = subseqs[idx]
                    score      = scores[idx]
                    if labels and (i<len(labels)): ID = labels[i]
                    else                         : ID = '%d'%i
                    desc.append('%s %s %d-%d %4.2f '%(ID,subseq,start,stop,score))
                    start = int(start*scale)
                    for offset in range(10):
                        if a[start+offset] == '-':
                            if labels and (i < len(labels)):
                                a[start+offset] = labels[i]
                                a[start+offset] = '%d'%i
        print '%-14s %s'%(key,''.join(a)),
        print ' '*max(0,maxw-len(a)), '| '.join(['%-27s'%x for x in desc])
    print "Found matches in %d of %d input probes"%(len(probehits),len(probes))