Python makePWMFromIUPACの例

プログラミング言語: Python

名前空間/パッケージ名: hts_waterworks.utils.sequence_motif

メソッド/関数: makePWMFromIUPAC

hotexamples.comのコード掲載数: 4

Python makePWMFromIUPAC - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのhts_waterworks.utils.sequence_motif.makePWMFromIUPACの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: motif_discovery.py プロジェクト: bunbun/HTS-waterworks

def motif_presence_sorted_peaks(in_files, out_patterns, in_prefix, in_suffix):
    """Plot the running motif presence, starting at most significant peaks"""
    in_peaks, in_motifs = in_files[0], in_files[1:]
    out_summary = in_prefix + in_suffix + '.%s.peak_motif_presence'
    out_png = in_prefix + in_suffix + '.%s.peak_motif_presence.png'
    out_locations = in_prefix + in_suffix + '.%s.peak_motif_locations'
    out_locations_bed = in_prefix + in_suffix + '.%s.peak_motif_locations.bed'
    wb_genome = worldbase(cfg.get('DEFAULT', 'worldbase_genome'))
    old_size = matplotlib.rcParams['font.size']
    matplotlib.rcParams['font.size'] = 6
    # read in the peaks file, sorting it by *score*
    print in_peaks
    print open(in_peaks).readline()
    try:
        peaks = [float(l.strip().split('\t')[4]) for l in open(in_peaks)]
        print peaks
        peaks = sorted([l.strip().split('\t') for l in open(in_peaks)],
                        key=lambda line:float(line[4]), reverse=True)
    except ValueError:
        print 'here is the error!', l.strip(), float(l.strip().split('\t')[4])
        raise
    motifs_in_peaks = dict((tuple(p), defaultdict(list)) for p in peaks)
    for m_file in in_motifs:
        cur_motifs = {}
        m_file_short = re.sub(r'((treat|fastq|fastq_illumina|min_qual|bowtie|' +
                                    r'maq|peaks|with_mean_sd|discovered|' +
                                    r'motifs_meme_out|motifs|matched_size_[0-9]|sorted|[0-9]+_around|small_sample)\.)+(motifs\.*)*',
                              '', m_file)
        #print m_file_short
        with open(m_file) as infile:
            try:
                cur_motifs.update(pickle.load(infile))
            except:
                infile.seek(0)
                for line in infile:
                    #print line,
                    name, consensus = line.strip('\n').split('\t')
                    cur_motifs.update({name:
                                    sequence_motif.makePWMFromIUPAC(consensus)})
        #print m_file, cur_motifs
        all_motif_percent = {}
        for zscore in cfg.get('motifs','motif_zscores').strip().split(','):
            for name, pwm in cur_motifs.items():
                with_motif = 0
                percent_with = []  # percent with motif at each peak
                for total, p in enumerate(peaks):
                    chrom, start, stop = p[0], int(p[1]), int(p[2])
                    region = wb_genome[chrom][start:stop]
                    # extend peaks to at least pwm length
                    while len(region) < len(pwm):
                        region = wb_genome[chrom][region.start-5:region.stop+5]
                        # catch nasty infinite loops for very short scaffolds
                        if len(region) == len(wb_genome[chrom]):
                            break
                    # check if the motif occurs in the region
                    try:
                        hits = list(pwm.find_in_region(region,
                                                       zscore=float(zscore)))
                    except Exception as e:
                        log.debug('issue with sequence', repr(region),
                                        name, e.message)
                        hits = []
                    if len(hits) > 0:
                        with_motif += 1
                        # add all peak locations to the list
                        motifs_in_peaks[tuple(p)][name].extend((
                                            h[0] + start, h[1] + start,
                                            '+' if h[2] == 1 else '-')
                                                                for h in hits)
                    percent_with.append(float(with_motif) / (total+1))
                
                #print all_motif_percent, name, percent_with
                all_motif_percent[name] = percent_with
            # having calculated for all motifs in all files,
            # plot a figure and give a summary
            with open(out_summary % ('z' + zscore), 'w') as outfile:
                outfile.writelines('%s\t%s\n' % (name, percent)
                                for name, percent in all_motif_percent.items())

            # write the peak locations along with the motif instances
            # that occur in them
            with open(out_locations % ('z' + zscore), 'w') as outfile:
                with open(out_locations_bed % ('z' + zscore), 'w') as out_bed:
                    # header is 6 columns of peak info, then motif info
                    outfile.write('\t'.join(['p_chrom', 'p_start', 'p_stop',
                                             'p_name', 'p_score', 'p_strand']))
                    for motif_name in sorted(cur_motifs):
                        outfile.write('\t%s\t#instances_%s' % (motif_name,
                                                               motif_name))
                    outfile.write('\n')
                    
                    # write one line per peak, then the motif counts and
                    # instances in the peak
                    # instances for each motif are all in one column
                    for p in peaks:
                        outfile.write('\t'.join(map(str, p)))
                        for motif_name in sorted(cur_motifs):
                            hits = motifs_in_peaks[tuple(p)][motif_name]
                            outfile.write('\t%s\t%s' % (len(hits), hits))
                            for h in hits:
                                out_bed.write('\t'.join(map(str, [p[0], h[0],
                                                        h[1], motif_name, 1000,
                                                        h[2]])) + '\n')
                        outfile.write('\n')
                    
            all_motif_percent_dict = sorted(all_motif_percent.items())
            names = [k for k, v in all_motif_percent_dict]
            datapoints = numpy.array([v for k, v in all_motif_percent_dict]).T
            
            # plot original data
            pyplot.plot(datapoints)
            pyplot.legend(names)
            pyplot.title('Motifs from\n%s\nPresence in\n%s' % (m_file_short,
                                                               in_peaks))
            pyplot.savefig(out_png % ('z'+zscore))
            pyplot.close()
            
            # plot top 10% of data
            plot_top = len(datapoints) / 10
            #print datapoints
            #print datapoints[:plot_top, :]
            # check if the slice is the right dimension
            pyplot.plot(datapoints[:plot_top, :])
            pyplot.legend(names)
            pyplot.title('Top 10%% of Motifs from\n%s\nPresence in\n%s' % (
                                                        m_file_short, in_peaks))
            pyplot.savefig(out_png % ('z' + zscore + '.top10percent'))
            pyplot.close()
        
    matplotlib.rcParams['font.size'] = old_size

コード例 #2

ファイルを表示

ファイル: motif_significance.py プロジェクト: kdaily/HTS-waterworks

def main(argv=None):
    """ Calculate significance of a motif in peaks with genomic background
    Can use restricted annotationDB, such as only promoter regions """

    parser = optparse.OptionParser("%prog [options] peaks.bed [outfile] \n"+main.__doc__)
    parser.add_option("--genome", '-g', dest="genome_resource", type="string",
                      help="""The pygr resource for the genome""")
    parser.add_option("--motif_file", '-m', dest="motif_file", type="string",
                      help="""The index file for all motifs, as a pickled dictionary, of pwm's or Motifs e.g.,
                      {"LRH_1":[[.25,.25,.1,.4],[.2,.2,.3,.3]]}""")
    parser.add_option("--consensus_file", '-c', dest="consensus_file", type="string",
                      help="""index file for consensus motifs (IUPAC format, one
                      per line in the file""")
    parser.add_option("--motif_key", '-k', dest="motif_key", type="string",
                      help="""The key for the current motif in motif_file, default=all""")
    parser.add_option('--zscore', '-z', dest='zscore', type='float', default=4.29,
                      help="""Calculate threshold score estimate from this Z-score. [default=%default]""")
    parser.add_option('--overlap_resource', dest='overlap_resource', type='string',
                      help="""Only count fg and bg that overlap with pygr resource""")
    parser.add_option('--bg_samples', dest='bg_samples', type='string',
                      help="""Pickled or Fasta file of background sequences to use instead of sampling the genome""")
    parser.add_option('--no_bg', dest='no_bg', action='store_true',
                      help="""skip sampling in the background""")
    parser.add_option('--report_region', type='string', help='Report the genomic regions of peaks with motif instances to this file')
    parser.add_option("--output_file", '-f', dest="output_file", type="string",
                      help="""Append the zscore information to the given file""")
    if argv is None:
        argv = sys.argv[1:]
    opts, args = parser.parse_args(argv)
    if len(args) != 1:
        parser.print_help()
        print 'Specify the peaks bed file!'
        sys.exit(-1)
    if not opts.motif_file and not opts.consensus_file:
        parser.print_help()
        print 'Specify the motif file!'
        sys.exit(-1)

    updated_motifs = False
    print '# Loading resources...'
    opts.genome_resource = getFullGenomeName(opts.genome_resource)
    genome = worldbase(opts.genome_resource)
    if opts.overlap_resource:
        annotMap = worldbase(opts.overlap_resource)
        annotDB = worldbase(opts.overlap_resource + '_db')
    
    allMotifs = {}
    # load pickled dict of motifs
    if opts.motif_file:
        allMotifs.update(pickle.load(file(opts.motif_file, 'rb')))
    # create consensus dict of motifs
    if opts.consensus_file:
        with open(opts.consensus_file) as infile:
            for line in infile:
                name, consensus = line.strip().split('\t')
                allMotifs.update({name:makePWMFromIUPAC(consensus)})

    if opts.motif_key:
        allKeys = [opts.motif_key]
    else:
        allKeys = allMotifs.keys()
    
    # write a header
    if opts.output_file:
        outstr = '\t'.join(['peaks', 'motif', 'threshold_z', 'vs_bg_normal_Z',
                            'hypergeo_pvalue', 'fgMatches', 'fgSize',
                            'fgMatches/fgSize', 'bgMatches', 'bgSize'])
        open(opts.output_file, 'w').write(outstr)

    for motifKey in allKeys:
        print '# Loaded motif %s...' % motifKey
        pwm = allMotifs[motifKey]
        if type(pwm) is list:
            pwm = Motif(pwm)
            allMotifs[motifKey] = pwm
        if not pwm.bg_calculated():
            print '# Calculating motif background distribution...'
            pwm.calculate_background(genome)
            updated_motifs = True
        print 'motif %s: length=%s threshold=%s mean=%s sd=%s' % (motifKey, len(pwm), pwm.get_threshold(opts.zscore), pwm._mean, pwm._sd)
        allPeaks = open(args[0]).readlines()
        allPeaks = list(readBedLines(allPeaks))
        peakSizes = [stop - start for _, start, stop, _ in allPeaks]

        print '# Searching foreground sequence...'
        sys.stdout.flush()
        peakRegions = (genome[chrom][start:stop] for chrom, start, stop, _ in allPeaks)
        if opts.overlap_resource:
            # check to see if the bed line overlaps the resource
            overlappingRegions = [region for region in peakRegions \
                                        if len(annotMap[region]) > 0]
            # run a search in each of the overlapping regions
            motifInstancesInOverlap = [pwm.find_in_region(region, zscore=opts.zscore) \
                                        for region in overlappingRegions]
            fgSize = len(overlappingRegions)
            # count the number of peaks with at least one motif instance
            fgMatches = len(filter(lambda matches: len(matches) > 0, motifInstancesInOverlap))
        else:
            matchingPeaks = [region for region in peakRegions \
                                        if len(pwm.find_in_region(region, zscore=opts.zscore)) > 0]
            fgMatches = len(matchingPeaks)
            fgSize = len(allPeaks)
            
        if opts.report_region is not None:
            with open(opts.report_region, 'w') as outfile:
                outfile.writelines('%s\t%s\t%s\n' % (region.id, region.start, region.stop) for region in matchingPeaks)

        if opts.no_bg:
            outstr = '\t'.join([args[0], motifKey] + map(str, [opts.zscore, fgMatches, fgSize, 
                                                      float(fgMatches)/fgSize]))
            if opts.output_file:
                open(opts.output_file, 'a').write(outstr + '\n')
            else:
                print >>sys.stderr, outstr
        else:
            print '# Searching background sequence...'
            sys.stdout.flush()
            if opts.bg_samples:
                try:
                    bgSamples = pickle.load(open(opts.bg_samples))
                except:
                    try:
                        bgSamples = parseFastaLines(open(opts.bg_samples))
                    except:
                        raise RuntimeError("specified background samples file %s"
                                           "was niether a pickled file nor a fasta file!" %
                                           opts.bg_samples)
                
            elif opts.overlap_resource:
                bgSamples = sample_resource(annotDB, peakSizes, sampleSize=100000)
            else:
                bgSamples = sample_genome(genome, peakSizes, sampleSize=100000)
                #bgSamples = sample_genome(genome, peakSizes, sampleSize=100)
            bgSize = 0
            bgMatches = 0
            for region in bgSamples:
                bgSize += 1
                if len(pwm.find_in_region(region, zscore=opts.zscore)) > 0:
                    bgMatches += 1
    
            #calculate significance of foreground vs. background
            zscore = zscore_normal(fgMatches, fgSize, bgMatches, bgSize)
            pvalue = pvalue_hypergeometric(fgMatches, fgSize, bgMatches, bgSize)
            outstr = '\t'.join([args[0], motifKey] + map(str, ['thesh_z='+str(opts.zscore), zscore, pvalue, fgMatches, fgSize, float(fgMatches)/fgSize,bgMatches, bgSize]))
            if opts.output_file:
                open(opts.output_file, 'a').write(outstr + '\n')
            else:
                print >>sys.stderr, outstr
    if updated_motifs:
        print '# Saving motif info back to %s' % opts.motif_file
        pickle.dump(allMotifs, open(opts.motif_file, 'wb'))

コード例 #3

ファイルを表示

def main(argv=None):
    """ Calculate significance of a motif in peaks with genomic background
    Can use restricted annotationDB, such as only promoter regions """

    parser = optparse.OptionParser("%prog [options] peaks.bed [outfile] \n" +
                                   main.__doc__)
    parser.add_option("--genome",
                      '-g',
                      dest="genome_resource",
                      type="string",
                      help="""The pygr resource for the genome""")
    parser.add_option(
        "--motif_file",
        '-m',
        dest="motif_file",
        type="string",
        help=
        """The index file for all motifs, as a pickled dictionary, of pwm's or Motifs e.g.,
                      {"LRH_1":[[.25,.25,.1,.4],[.2,.2,.3,.3]]}""")
    parser.add_option(
        "--consensus_file",
        '-c',
        dest="consensus_file",
        type="string",
        help="""index file for consensus motifs (IUPAC format, one
                      per line in the file""")
    parser.add_option(
        "--motif_key",
        '-k',
        dest="motif_key",
        type="string",
        help="""The key for the current motif in motif_file, default=all""")
    parser.add_option(
        '--zscore',
        '-z',
        dest='zscore',
        type='float',
        default=4.29,
        help=
        """Calculate threshold score estimate from this Z-score. [default=%default]"""
    )
    parser.add_option(
        '--overlap_resource',
        dest='overlap_resource',
        type='string',
        help="""Only count fg and bg that overlap with pygr resource""")
    parser.add_option(
        '--bg_samples',
        dest='bg_samples',
        type='string',
        help=
        """Pickled or Fasta file of background sequences to use instead of sampling the genome"""
    )
    parser.add_option('--no_bg',
                      dest='no_bg',
                      action='store_true',
                      help="""skip sampling in the background""")
    parser.add_option(
        '--report_region',
        type='string',
        help=
        'Report the genomic regions of peaks with motif instances to this file'
    )
    parser.add_option(
        "--output_file",
        '-f',
        dest="output_file",
        type="string",
        help="""Append the zscore information to the given file""")
    parser.add_option('--search_genome', action='store_true')
    if argv is None:
        argv = sys.argv[1:]
    opts, args = parser.parse_args(argv)
    if len(args) != 1:
        parser.print_help()
        print 'Specify the peaks bed file!'
        sys.exit(-1)
    if not opts.motif_file and not opts.consensus_file:
        parser.print_help()
        print 'Specify the motif file!'
        sys.exit(-1)

    updated_motifs = False
    print '# Loading resources...'
    opts.genome_resource = getFullGenomeName(opts.genome_resource)
    genome = worldbase(opts.genome_resource)
    if opts.overlap_resource:
        annotMap = worldbase(opts.overlap_resource)
        annotDB = worldbase(opts.overlap_resource + '_db')

    allMotifs = {}
    # load pickled dict of motifs
    if opts.motif_file:
        if opts.motif_file.endswith('.transfac'):
            allMotifs.update(
                parseMotifsFromTransfac(open(opts.motif_file, 'r').read()))
        else:
            allMotifs.update(pickle.load(open(opts.motif_file)))
    # create consensus dict of motifs
    if opts.consensus_file:
        with open(opts.consensus_file) as infile:
            for line in infile:
                name, consensus = line.strip().split('\t')
                allMotifs.update({name: makePWMFromIUPAC(consensus)})

    if opts.motif_key:
        allKeys = [opts.motif_key]
    else:
        allKeys = allMotifs.keys()

    # write a header
    if opts.output_file:
        outstr = '\t'.join([
            'peaks', 'motif', 'threshold_z', 'vs_bg_normal_Z',
            'hypergeo_pvalue', 'fgMatches', 'fgSize', 'fgMatches/fgSize',
            'bgMatches', 'bgSize'
        ])
        open(opts.output_file, 'w').write(outstr)

    for motifKey in allKeys:
        print '# Loaded motif %s...' % motifKey
        pwm = allMotifs[motifKey]
        if isinstance(pwm, list):
            pwm = Motif(pwm)
            allMotifs[motifKey] = pwm
        if not pwm.bg_calculated():
            print '# Calculating motif background distribution...'
            pwm.calculate_background(genome)
            updated_motifs = True
        print 'motif %s: length=%s threshold=%s mean=%s sd=%s max_score=%s' % (
            motifKey, len(pwm), pwm.get_threshold(
                opts.zscore), pwm._mean, pwm._sd, pwm.max_score())

        if opts.search_genome and opts.report_region is not None:
            # search the genome with the motif
            print 'searching genome!'
            with open(opts.report_region, 'w') as outfile:
                for chrom in genome:
                    for match in pwm.find_in_region(genome[chrom]):
                        outstr = '{chrom}\t{start}\t{stop}\t{name}\t{score}\t{strand}\n'.format(
                            chrom=chrom,
                            start=match[0],
                            stop=match[1],
                            name=motifKey,
                            score=pwm.calc_score(match[3]),
                            strand='+' if match[2] == 1 else '-')
                        outfile.write(outstr)
            continue

        allPeaks = open(args[0]).readlines()
        allPeaks = list(readBedLines(allPeaks))
        peakSizes = [stop - start for _, start, stop, _ in allPeaks]

        print '# Searching foreground sequence...'
        sys.stdout.flush()
        peakRegions = (genome[chrom][start:stop]
                       for chrom, start, stop, _ in allPeaks)
        if opts.overlap_resource:
            # check to see if the bed line overlaps the resource
            overlappingRegions = [region for region in peakRegions \
                                        if len(annotMap[region]) > 0]
            # run a search in each of the overlapping regions
            motifInstancesInOverlap = [pwm.find_in_region(region, zscore=opts.zscore) \
                                        for region in overlappingRegions]
            fgSize = len(overlappingRegions)
            # count the number of peaks with at least one motif instance
            fgMatches = len(
                filter(lambda matches: len(matches) > 0,
                       motifInstancesInOverlap))
        else:
            matchingPeaks = [region for region in peakRegions \
                                        if len(pwm.find_in_region(region, zscore=opts.zscore)) > 0]
            fgMatches = len(matchingPeaks)
            fgSize = len(allPeaks)

        if opts.report_region is not None:
            with open(opts.report_region, 'w') as outfile:
                outfile.writelines('%s\t%s\t%s\n' %
                                   (region.id, region.start, region.stop)
                                   for region in matchingPeaks)

        if opts.no_bg:
            outstr = '\t'.join([args[0], motifKey] + map(
                str,
                [opts.zscore, fgMatches, fgSize,
                 float(fgMatches) / fgSize]))
            if opts.output_file:
                open(opts.output_file, 'a').write(outstr + '\n')
            else:
                print >> sys.stderr, outstr
        else:
            print '# Searching background sequence...'
            sys.stdout.flush()
            if opts.bg_samples:
                try:
                    bgSamples = pickle.load(open(opts.bg_samples))
                except:
                    try:
                        bgSamples = parseFastaLines(open(opts.bg_samples))
                    except:
                        raise RuntimeError(
                            "specified background samples file %s"
                            "was niether a pickled file nor a fasta file!" %
                            opts.bg_samples)

            elif opts.overlap_resource:
                bgSamples = sample_resource(annotDB,
                                            peakSizes,
                                            sampleSize=100000)
            else:
                bgSamples = sample_genome(genome, peakSizes, sampleSize=100000)
                #bgSamples = sample_genome(genome, peakSizes, sampleSize=100)
            bgSize = 0
            bgMatches = 0
            for region in bgSamples:
                bgSize += 1
                if len(pwm.find_in_region(region, zscore=opts.zscore)) > 0:
                    bgMatches += 1

            #calculate significance of foreground vs. background
            zscore = zscore_normal(fgMatches, fgSize, bgMatches, bgSize)
            pvalue = pvalue_hypergeometric(fgMatches, fgSize, bgMatches,
                                           bgSize)
            outstr = '\t'.join([args[0], motifKey] +
                               map(str, [
                                   'thesh_z=' + str(opts.zscore), zscore,
                                   pvalue, fgMatches, fgSize,
                                   float(fgMatches) / fgSize, bgMatches, bgSize
                               ]))
            if opts.output_file:
                open(opts.output_file, 'a').write(outstr + '\n')
            else:
                print >> sys.stderr, outstr
    if updated_motifs:
        print '# Saving motif info back to %s' % opts.motif_file
        pickle.dump(allMotifs, open(opts.motif_file, 'wb'))

コード例 #4

ファイルを表示

def motif_presence_sorted_peaks(in_files, out_patterns, in_prefix, in_suffix):
    """Plot the running motif presence, starting at most significant peaks"""
    in_peaks, in_motifs = in_files[0], in_files[1:]
    out_summary = in_prefix + in_suffix + '.%s.peak_motif_presence'
    out_png = in_prefix + in_suffix + '.%s.peak_motif_presence.png'
    out_locations = in_prefix + in_suffix + '.%s.peak_motif_locations'
    out_locations_bed = in_prefix + in_suffix + '.%s.peak_motif_locations.bed'
    wb_genome = worldbase(cfg.get('DEFAULT', 'worldbase_genome'))
    old_size = matplotlib.rcParams['font.size']
    matplotlib.rcParams['font.size'] = 6
    # read in the peaks file, sorting it by *score*
    print in_peaks
    print open(in_peaks).readline()
    try:
        peaks = [float(l.strip().split('\t')[4]) for l in open(in_peaks)]
        print peaks
        peaks = sorted([l.strip().split('\t') for l in open(in_peaks)],
                       key=lambda line: float(line[4]),
                       reverse=True)
    except ValueError:
        print 'here is the error!', l.strip(), float(l.strip().split('\t')[4])
        raise
    motifs_in_peaks = dict((tuple(p), defaultdict(list)) for p in peaks)
    for m_file in in_motifs:
        cur_motifs = {}
        m_file_short = re.sub(
            r'((treat|fastq|fastq_illumina|min_qual|bowtie|' +
            r'maq|peaks|with_mean_sd|discovered|' +
            r'motifs_meme_out|motifs|matched_size_[0-9]|sorted|[0-9]+_around|small_sample)\.)+(motifs\.*)*',
            '', m_file)
        #print m_file_short
        with open(m_file) as infile:
            try:
                cur_motifs.update(pickle.load(infile))
            except:
                infile.seek(0)
                for line in infile:
                    #print line,
                    name, consensus = line.strip('\n').split('\t')
                    cur_motifs.update(
                        {name: sequence_motif.makePWMFromIUPAC(consensus)})
        #print m_file, cur_motifs
        all_motif_percent = {}
        for zscore in cfg.get('motifs', 'motif_zscores').strip().split(','):
            for name, pwm in cur_motifs.items():
                with_motif = 0
                percent_with = []  # percent with motif at each peak
                for total, p in enumerate(peaks):
                    chrom, start, stop = p[0], int(p[1]), int(p[2])
                    region = wb_genome[chrom][start:stop]
                    # extend peaks to at least pwm length
                    while len(region) < len(pwm):
                        region = wb_genome[chrom][region.start -
                                                  5:region.stop + 5]
                        # catch nasty infinite loops for very short scaffolds
                        if len(region) == len(wb_genome[chrom]):
                            break
                    # check if the motif occurs in the region
                    try:
                        hits = list(
                            pwm.find_in_region(region, zscore=float(zscore)))
                    except Exception as e:
                        log.debug('issue with sequence', repr(region), name,
                                  e.message)
                        hits = []
                    if len(hits) > 0:
                        with_motif += 1
                        # add all peak locations to the list
                        motifs_in_peaks[tuple(p)][name].extend(
                            (h[0] + start, h[1] + start,
                             '+' if h[2] == 1 else '-') for h in hits)
                    percent_with.append(float(with_motif) / (total + 1))

                #print all_motif_percent, name, percent_with
                all_motif_percent[name] = percent_with
            # having calculated for all motifs in all files,
            # plot a figure and give a summary
            with open(out_summary % ('z' + zscore), 'w') as outfile:
                outfile.writelines(
                    '%s\t%s\n' % (name, percent)
                    for name, percent in all_motif_percent.items())

            # write the peak locations along with the motif instances
            # that occur in them
            with open(out_locations % ('z' + zscore), 'w') as outfile:
                with open(out_locations_bed % ('z' + zscore), 'w') as out_bed:
                    # header is 6 columns of peak info, then motif info
                    outfile.write('\t'.join([
                        'p_chrom', 'p_start', 'p_stop', 'p_name', 'p_score',
                        'p_strand'
                    ]))
                    for motif_name in sorted(cur_motifs):
                        outfile.write('\t%s\t#instances_%s' %
                                      (motif_name, motif_name))
                    outfile.write('\n')

                    # write one line per peak, then the motif counts and
                    # instances in the peak
                    # instances for each motif are all in one column
                    for p in peaks:
                        outfile.write('\t'.join(map(str, p)))
                        for motif_name in sorted(cur_motifs):
                            hits = motifs_in_peaks[tuple(p)][motif_name]
                            outfile.write('\t%s\t%s' % (len(hits), hits))
                            for h in hits:
                                out_bed.write('\t'.join(
                                    map(str, [
                                        p[0], h[0], h[1], motif_name, 1000,
                                        h[2]
                                    ])) + '\n')
                        outfile.write('\n')

            all_motif_percent_dict = sorted(all_motif_percent.items())
            names = [k for k, v in all_motif_percent_dict]
            datapoints = numpy.array([v for k, v in all_motif_percent_dict]).T

            # plot original data
            pyplot.plot(datapoints)
            pyplot.legend(names)
            pyplot.title('Motifs from\n%s\nPresence in\n%s' %
                         (m_file_short, in_peaks))
            pyplot.savefig(out_png % ('z' + zscore))
            pyplot.close()

            # plot top 10% of data
            plot_top = len(datapoints) / 10
            #print datapoints
            #print datapoints[:plot_top, :]
            # check if the slice is the right dimension
            pyplot.plot(datapoints[:plot_top, :])
            pyplot.legend(names)
            pyplot.title('Top 10%% of Motifs from\n%s\nPresence in\n%s' %
                         (m_file_short, in_peaks))
            pyplot.savefig(out_png % ('z' + zscore + '.top10percent'))
            pyplot.close()

    matplotlib.rcParams['font.size'] = old_size