Example #1
0
def plot_footprint_profile(ex, bedlist, signals, chrnames, groups, logfile):
    files = dict((gid, {'pdf': "", 'mat': []}) for gid in bedlist.keys())
    logfile.write("Plotting footprints:\n")
    logfile.flush()
    for gid, motifbed in bedlist.iteritems():
        #        signals = [track(sig) for sig in siglist[gid]]
        snames = [sig.name for sig in signals[gid]]
        tmotif = track(motifbed, format='bed')
        data = {}
        numregs = {}
        for chrom in chrnames:
            fread = {}
            for r in tmotif.read(chrom):
                r2 = r[3].split(":")
                key = (r2[0], len(r2[1]))
                if key in fread: fread[key].append(r[1:3])
                else: fread[key] = [r[1:3]]
            for motif, regs in fread.iteritems():
                if motif not in data:
                    data[motif] = zeros(shape=(motif[1] + 2 * _plot_flank[1],
                                               len(signals[gid])))
                    numregs[motif] = 0
                numregs[motif] += len(regs)
                tFeat = sorted_stream(
                    segment_features(FeatureStream(regs,
                                                   fields=['start', 'end']),
                                     nbins=motif[1],
                                     upstream=_plot_flank,
                                     downstream=_plot_flank))
                for t in score_by_feature(
                    [s.read(chrom) for s in signals[gid]], tFeat):
                    data[motif][t[2]] += t[3:]
        files[gid]['pdf'] = unique_filename_in()
        new = True
        last = len(data)
        for motif, dat in data.iteritems():
            last -= 1
            mname, nbins = motif
            dat /= float(numregs[motif])
            X = range(-_plot_flank[1], _plot_flank[1] + nbins)
            for k in range(nbins):
                X[k + _plot_flank[1]] = str(k + 1)
            ####### Could do a heatmap (sort by intensity)...
            lineplot(X, [dat[:, n] for n in range(dat.shape[-1])],
                     mfrow=[4, 2],
                     output=files[gid]['pdf'],
                     new=new,
                     last=(last == 0),
                     legend=snames,
                     main=mname)
            new = False
            _datf = unique_filename_in()
            with open(_datf, "w") as dff:
                dff.write("\t".join([""] + [str(x) for x in X]) + "\n")
                for n, sn in enumerate(snames):
                    dff.write("\t".join([sn] + [str(x)
                                                for x in dat[:, n]]) + "\n")
            files[gid]['mat'].append((mname, _datf))
    return files
Example #2
0
def plot_footprint_profile( ex, bedlist, signals, chrnames, groups, logfile ):
    files = dict((gid,{'pdf':"",'mat':[]}) for gid in bedlist.keys())
    logfile.write("Plotting footprints:\n");logfile.flush()
    for gid, motifbed in bedlist.iteritems():
#        signals = [track(sig) for sig in siglist[gid]]
        snames = [sig.name for sig in signals[gid]]
        tmotif = track(motifbed,format='bed')
        data = {}
        numregs = {}
        for chrom in chrnames:
            fread = {}
            for r in tmotif.read(chrom):
                r2 = r[3].split(":")
                key = (r2[0],len(r2[1]))
                if key in fread: fread[key].append(r[1:3])
                else: fread[key] = [r[1:3]]
            for motif, regs in fread.iteritems():
                if motif not in data:
                    data[motif] = zeros(shape=(motif[1]+2*_plot_flank[1], len(signals[gid])))
                    numregs[motif] = 0
                numregs[motif] += len(regs)
                tFeat = sorted_stream(segment_features(FeatureStream(regs,fields=['start','end']),
                                                       nbins=motif[1],upstream=_plot_flank,downstream=_plot_flank))
                for t in score_by_feature([s.read(chrom) for s in signals[gid]], tFeat): 
                    data[motif][t[2]] += t[3:]
        files[gid]['pdf'] = unique_filename_in()
        new = True
        last = len(data)
        for motif, dat in data.iteritems():
            last -= 1
            mname, nbins = motif
            dat /= float(numregs[motif])
            X = range(-_plot_flank[1],_plot_flank[1]+nbins)
            for k in range(nbins): X[k+_plot_flank[1]] = str(k+1)
####### Could do a heatmap (sort by intensity)...
            lineplot(X, [dat[:, n] for n in range(dat.shape[-1])], mfrow=[4,2],
                     output=files[gid]['pdf'], new=new, last=(last==0), 
                     legend=snames, main=mname)
            new = False
            _datf = unique_filename_in()
            with open(_datf,"w") as dff:
                dff.write("\t".join([""]+[str(x) for x in X])+"\n")
                for n,sn in enumerate(snames):
                    dff.write("\t".join([sn]+[str(x) for x in dat[:, n]])+"\n")
            files[gid]['mat'].append((mname,_datf))
    return files
Example #3
0
    def __call__(self, **kw):

        def make_X_labels(X,start,end,strand,down,up):
            flen = end-start
            i0 = (list(where(X == 0)[0])+[-1])[0]+1
            i1 = (list(where(X == 1)[0])+[len(X)-1])[0]+1
            i2 = len(X)-i1
            istep  = 0.5/(i1-i0)
            if down < 1: down *= flen
            if up < 1: up *= flen
            Xup = (array(range(-i0,0))+.5)*up/i0
            Xb = (X[i0:i1]+istep)*flen
            Xdown = flen+(array(range(i2))+.5)*down/i2
            if strand is None or strand > 0: return start+concatenate([Xup,Xb,Xdown])
            else:                            return end-concatenate([Xup,Xb,Xdown])

        chrmeta = "guess"
        features = track(kw.get('features'), chrmeta=chrmeta)
        #signals = kw.get('SigMulti',{}).get('signals', [])
        signals = kw.get('signals', [])
        if not isinstance(signals, list): signals = [signals]
        signals = [track(sig) for sig in signals]
        snames = [sig.name for sig in signals]
        labels = None
        data = None
        upstr = _upstr
        downstr = _downstr
        if kw.get("upstream") is not None:
            _up = int(kw["upstream"])
            if _up > 50: upstr = (_up,5)
            elif _up > 0: upstr = (_up,1)
            else: upstr = (0,0)
        if kw.get("downstream") is not None:
            _down = int(kw["downstream"])
            if _down > 50: downstr = (_down,5)
            elif _down > 0: downstr = (_down,1)
            else: downstr = (0,0)
        if kw.get("nbins") is not None: nbins = max(1,int(kw["nbins"]))
        else: nbins = _nbins
        if kw.get("noclust") is not None: 
            noclust = str(kw["noclust"]).lower() in ['1','true','t','on']
        else: 
            noclust = False
        try:
            ymin = float(kw.get('ymin'))
        except (ValueError, TypeError):
            ymin = None
        try:
            ymax = float(kw.get('ymax'))
        except (ValueError, TypeError):
            ymax = None
        for chrom in features.chrmeta:
            if 'name' in features.fields: _fread = features.read(chrom)
            else: _fread = add_name_field(features.read(chrom))
            _l, _d = feature_matrix([s.read(chrom) for s in signals], _fread,
                                    segment=True, nbins=nbins, 
                                    upstream=upstr, downstream=downstr)
            if _d.size == 0:
                continue
            if data is None:
                labels = _l
                data = _d
            else:
                labels = concatenate((labels, _l))
                data = vstack((data, _d))
        outf = str(kw.get('output'))
        if outf not in output_list:
            outf = output_list[0]
        pdf = self.temporary_path(fname='plot_features.pdf')
        if outf == 'archive':
            tarname = self.temporary_path(fname='plot_features.tar.gz')
            tarfh = tarfile.open(tarname, "w:gz")
        if data is None:
            raise ValueError("No data")
        mode = kw.get('mode', 0)
        if str(mode) in [str(x[0]) for x in plot_types]:
            mode = int(mode)
        X = array(range(-upstr[1]+1,nbins+downstr[1]+1))/(1.0*nbins)
        if mode in plot_types[0]: #heatmap
            new = True
            if 'name' in features.fields: _fread = features.read(fields=['chr','start','end','name'])
            else: _fread = add_name_field(features.read(fields=['chr','start','end']))
            order = [where(labels == feat[3])[0][0] for feat in _fread]
            for n in range(data.shape[-1]-1):
                heatmap(data[order, :, n], output=pdf, new=new, last=False,
                        rows=labels[order], columns=X, main=snames[n],
                        orderRows=not(noclust), orderCols=False, 
                        ymin=ymin, ymax=ymax)
                new = False
            heatmap(data[order, :, -1], output=pdf, new=new, last=True,
                    rows=labels[order],  columns=X, main=snames[-1],
                    orderRows=not(noclust), orderCols=False, 
                    ymin=ymin, ymax=ymax)
            if outf == 'archive':
                for n,sn in enumerate(snames):
                    _datf = self.temporary_path(fname=sn+"_data.txt")
                    with open(_datf,"w") as dff:
                        dff.write("\t".join([""]+[str(x) for x in X])+"\n")
                        for k in order:
                            dff.write("\t".join([labels[k]]+[str(x) for x in data[k, :, n]])+"\n")
                    tarfh.add(_datf,arcname=os.path.basename(_datf))
        elif mode in plot_types[1]: #average lineplot
            Y = data.mean(axis=0)
            if ymin is None: ymin = min([x.min() for x in Y]+[0])
            if ymax is None: ymax = max([x.max() for x in Y])
            lineplot(X, [Y[:, n] for n in range(data.shape[-1])],
                     output=pdf, new=True, last=True, legend=snames, 
                     ylim=(ymin,ymax))
            if outf == 'archive':
                _datf = self.temporary_path(fname="lineplot_data.txt")
                with open(_datf,"w") as dff:
                    dff.write("\t".join([""]+[str(x) for x in X])+"\n")
                    for n,sn in enumerate(snames):
                        dff.write("\t".join([sn]+[str(x) for x in Y[:, n]])+"\n")
                tarfh.add(_datf,arcname=os.path.basename(_datf))
        elif mode in plot_types[2]: #mosaic
            mfrow = [4,3]
            nplot = min(data.shape[0], max_pages*mfrow[0]*mfrow[1])
            if ymin is None: ymin = min([data.min(),0])
            if ymax is None: ymax = data.max()
            _f = ['chr','start','end']
            _si = None
            if 'strand' in features.fields: 
                _f.append('strand')
                _si = 3
            if 'name' in features.fields: _fread = features.read(fields=_f+['name'])
            else: _fread = add_name_field(features.read(fields=_f))
            order = []
            for nf,feat in enumerate(_fread):
                reg = where(labels == feat[-1])[0][0]
                order.append(reg)
                X1 = make_X_labels(X, feat[1], feat[2], feat[_si] if _si else None, downstr[0], upstr[0])
                xlim = (X1[0],X1[-1])
                Y = [data[reg, :, n] for n in range(data.shape[-1])]
                if nf == 0:
                    lineplot(X1, Y,  output=pdf, new=True, last=False, mfrow=mfrow,
                             main=labels[reg], ylim=(ymin,ymax), xlim=xlim)
                elif nf < nplot-1:
                    lineplot(X1, Y, output=pdf, new=False, last=False, 
                             main=labels[reg], ylim=(ymin,ymax), xlim=xlim)
                else:
                    lineplot(X1, Y, output=pdf, new=False, last=True, legend=snames, 
                             main=labels[reg], ylim=(ymin,ymax), xlim=xlim)
                    break
            if outf == 'archive':
                for n,sn in enumerate(snames):
                    _datf = self.temporary_path(fname=sn+"_data.txt")
                    with open(_datf,"w") as dff:
                        dff.write("\t".join([""]+[str(x) for x in X])+"\n")
                        for k in order:
                            dff.write("\t".join([labels[k]]+[str(x) for x in data[k, :, n]])+"\n")
                    tarfh.add(_datf,arcname=os.path.basename(_datf))
        else:
            raise ValueError("Mode not implemented: %s" % mode)
        if outf == 'archive':
            tarfh.add(pdf,arcname=os.path.basename(pdf))
            tarfh.close()
            self.new_file(tarname, 'data_archive')
        else:
            self.new_file(pdf, 'plot_features')
        return self.display_time()
Example #4
0
    def __call__(self, **kw):
        def make_X_labels(X, start, end, strand, down, up):
            flen = end - start
            i0 = (list(where(X == 0)[0]) + [-1])[0] + 1
            i1 = (list(where(X == 1)[0]) + [len(X) - 1])[0] + 1
            i2 = len(X) - i1
            istep = 0.5 / (i1 - i0)
            if down < 1: down *= flen
            if up < 1: up *= flen
            Xup = (array(range(-i0, 0)) + .5) * up / i0
            Xb = (X[i0:i1] + istep) * flen
            Xdown = flen + (array(range(i2)) + .5) * down / i2
            if strand is None or strand > 0:
                return start + concatenate([Xup, Xb, Xdown])
            else:
                return end - concatenate([Xup, Xb, Xdown])

        chrmeta = "guess"
        features = track(kw.get('features'), chrmeta=chrmeta)
        signals = kw.get('SigMulti', {}).get('signals', [])
        if not isinstance(signals, list): signals = [signals]
        signals = [track(sig) for sig in signals]
        snames = [sig.name for sig in signals]
        labels = None
        data = None
        upstr = _upstr
        downstr = _downstr
        if kw.get("upstream") is not None:
            _up = int(kw["upstream"])
            if _up > 50: upstr = (_up, 5)
            elif _up > 0: upstr = (_up, 1)
            else: upstr = (0, 0)
        if kw.get("downstream") is not None:
            _down = int(kw["downstream"])
            if _down > 50: downstr = (_down, 5)
            elif _down > 0: downstr = (_down, 1)
            else: downstr = (0, 0)
        if kw.get("nbins") is not None: nbins = max(1, int(kw["nbins"]))
        else: nbins = _nbins
        if kw.get("noclust") is not None:
            noclust = str(kw["noclust"]).lower() in ['1', 'true', 't', 'on']
        else:
            noclust = False
        try:
            ymin = float(kw.get('ymin'))
        except (ValueError, TypeError):
            ymin = None
        try:
            ymax = float(kw.get('ymax'))
        except (ValueError, TypeError):
            ymax = None
        for chrom in features.chrmeta:
            if 'name' in features.fields: _fread = features.read(chrom)
            else: _fread = add_name_field(features.read(chrom))
            _l, _d = feature_matrix([s.read(chrom) for s in signals],
                                    _fread,
                                    segment=True,
                                    nbins=nbins,
                                    upstream=upstr,
                                    downstream=downstr)
            if _d.size == 0:
                continue
            if data is None:
                labels = _l
                data = _d
            else:
                labels = concatenate((labels, _l))
                data = vstack((data, _d))
        outf = str(kw.get('output'))
        if outf not in output_list:
            outf = output_list[0]
        pdf = self.temporary_path(fname='plot_features.pdf')
        if outf == 'archive':
            tarname = self.temporary_path(fname='plot_features.tar.gz')
            tarfh = tarfile.open(tarname, "w:gz")
        if data is None:
            raise ValueError("No data")
        mode = kw.get('mode', 0)
        if str(mode) in [str(x[0]) for x in plot_types]:
            mode = int(mode)
        X = array(range(-upstr[1] + 1, nbins + downstr[1] + 1)) / (1.0 * nbins)
        if mode in plot_types[0]:  #heatmap
            new = True
            if 'name' in features.fields:
                _fread = features.read(fields=['chr', 'start', 'end', 'name'])
            else:
                _fread = add_name_field(
                    features.read(fields=['chr', 'start', 'end']))
            order = [where(labels == feat[3])[0][0] for feat in _fread]
            for n in range(data.shape[-1] - 1):
                heatmap(data[order, :, n],
                        output=pdf,
                        new=new,
                        last=False,
                        rows=labels[order],
                        columns=X,
                        main=snames[n],
                        orderRows=not (noclust),
                        orderCols=False,
                        ymin=ymin,
                        ymax=ymax)
                new = False
            heatmap(data[order, :, -1],
                    output=pdf,
                    new=new,
                    last=True,
                    rows=labels[order],
                    columns=X,
                    main=snames[-1],
                    orderRows=not (noclust),
                    orderCols=False,
                    ymin=ymin,
                    ymax=ymax)
            if outf == 'archive':
                for n, sn in enumerate(snames):
                    _datf = self.temporary_path(fname=sn + "_data.txt")
                    with open(_datf, "w") as dff:
                        dff.write("\t".join([""] + [str(x) for x in X]) + "\n")
                        for k in order:
                            dff.write("\t".join(
                                [labels[k]] + [str(x)
                                               for x in data[k, :, n]]) + "\n")
                    tarfh.add(_datf, arcname=os.path.basename(_datf))
        elif mode in plot_types[1]:  #average lineplot
            Y = data.mean(axis=0)
            if ymin is None: ymin = min([x.min() for x in Y] + [0])
            if ymax is None: ymax = max([x.max() for x in Y])
            lineplot(X, [Y[:, n] for n in range(data.shape[-1])],
                     output=pdf,
                     new=True,
                     last=True,
                     legend=snames,
                     ylim=(ymin, ymax))
            if outf == 'archive':
                _datf = self.temporary_path(fname="lineplot_data.txt")
                with open(_datf, "w") as dff:
                    dff.write("\t".join([""] + [str(x) for x in X]) + "\n")
                    for n, sn in enumerate(snames):
                        dff.write("\t".join([sn] + [str(x)
                                                    for x in Y[:, n]]) + "\n")
                tarfh.add(_datf, arcname=os.path.basename(_datf))
        elif mode in plot_types[2]:  #mosaic
            mfrow = [4, 3]
            nplot = min(data.shape[0], max_pages * mfrow[0] * mfrow[1])
            if ymin is None: ymin = min([data.min(), 0])
            if ymax is None: ymax = data.max()
            _f = ['chr', 'start', 'end']
            _si = None
            if 'strand' in features.fields:
                _f.append('strand')
                _si = 3
            if 'name' in features.fields:
                _fread = features.read(fields=_f + ['name'])
            else:
                _fread = add_name_field(features.read(fields=_f))
            order = []
            for nf, feat in enumerate(_fread):
                reg = where(labels == feat[-1])[0][0]
                order.append(reg)
                X1 = make_X_labels(X, feat[1], feat[2],
                                   feat[_si] if _si else None, downstr[0],
                                   upstr[0])
                xlim = (X1[0], X1[-1])
                Y = [data[reg, :, n] for n in range(data.shape[-1])]
                if nf == 0:
                    lineplot(X1,
                             Y,
                             output=pdf,
                             new=True,
                             last=False,
                             mfrow=mfrow,
                             main=labels[reg],
                             ylim=(ymin, ymax),
                             xlim=xlim)
                elif nf < nplot - 1:
                    lineplot(X1,
                             Y,
                             output=pdf,
                             new=False,
                             last=False,
                             main=labels[reg],
                             ylim=(ymin, ymax),
                             xlim=xlim)
                else:
                    lineplot(X1,
                             Y,
                             output=pdf,
                             new=False,
                             last=True,
                             legend=snames,
                             main=labels[reg],
                             ylim=(ymin, ymax),
                             xlim=xlim)
                    break
            if outf == 'archive':
                for n, sn in enumerate(snames):
                    _datf = self.temporary_path(fname=sn + "_data.txt")
                    with open(_datf, "w") as dff:
                        dff.write("\t".join([""] + [str(x) for x in X]) + "\n")
                        for k in order:
                            dff.write("\t".join(
                                [labels[k]] + [str(x)
                                               for x in data[k, :, n]]) + "\n")
                    tarfh.add(_datf, arcname=os.path.basename(_datf))
        else:
            raise ValueError("Mode not implemented: %s" % mode)
        if outf == 'archive':
            tarfh.add(pdf, arcname=os.path.basename(pdf))
            tarfh.close()
            self.new_file(tarname, 'data_archive')
        else:
            self.new_file(pdf, 'plot_features')
        return self.display_time()