Exemple #1
0
    def __call__(self, **kw):
        def _shift(stream, shift):
            istart = stream.fields.index('start')
            iend = stream.fields.index('end')
            i1 = min(istart, iend)
            i2 = max(istart, iend)

            def _apply_shift(x):
                return x[:i1] + (x[i1] + shift,) + x[i1 + 1:i2] + (x[i2] + shift,) + x[i2 + 1:]
            return track.FeatureStream((_apply_shift(x) for x in stream),
                                       fields=stream.fields)

        tfwd = track.track(kw.get('forward'), chrmeta=kw.get('assembly') or None)
        trev = track.track(kw.get('reverse'), chrmeta=kw.get('assembly') or None)
        if not kw.get('assembly'):  # btrack does the job, take the max of both chromosome lengths
            chrmeta = tfwd.chrmeta
            for k, v in trev.chrmeta.iteritems():
                chrmeta.setdefault(k, {})['length'] = max(v['length'], chrmeta.get(k, {}).get('length', 0))
        elif tfwd.chrmeta:
            chrmeta = tfwd.chrmeta  # For sql files, btrack doesn't make it,
        elif trev.chrmeta:
            chrmeta = trev.chrmeta  # so one can contain the info while the second does not.
        else:
            raise ValueError("Must specify an assembly.")  # In case nothing works - should not happen

        shiftval = int(kw.get('shift', 0))
        if shiftval < 0:  # Determine shift automatically
            shiftval = None
            xcor_lim = 300
            for chrom, v in chrmeta.iteritems():
                chrsize = v['length']
                xcor_lim = min(xcor_lim, 0.01 * chrsize)
                xcor = correlation([tfwd.read(chrom), trev.read(chrom)], regions=(1, chrsize),
                                   limits=(-xcor_lim, xcor_lim))
                max_xcor_idx = xcor.argmax()
                if xcor[max_xcor_idx] > 0.2:
                    shiftval = (max_xcor_idx - xcor_lim - 1) / 2
                    #print "Autocorrelation shift=%i, correlation is %f at index %d for chromosome %s." \
                    #       % (shiftval,xcor[max_xcor_idx],max_xcor_idx,chrom)
                    break
            if not shiftval:
                raise ValueError("Unable to detect shift automatically. Must specify a shift value.")

        output = self.temporary_path(fname='density_merged', ext='sql')
        fields = ['chr', 'start', 'end', 'score']
        tout = track.track(output, format='sql', fields=fields, chrmeta=chrmeta,
                           info={'datatype': 'quantitative'})
        mode = 'write'
        for chrom in chrmeta.keys():
            tout.write(merge_scores([_shift(tfwd.read(selection=chrom), shiftval[chrom]),
                                     _shift(trev.read(selection=chrom), -shiftval[chrom])]),
                       chrom=chrom, mode=mode, clip=True)
            mode = 'append'
        tout.close()
        trev.close()
        tfwd.close()
        self.new_file(output, 'density_merged')
        return 1
Exemple #2
0
 def __call__(self, **kw):
     feature_type = int(kw.get('feature_type') or 0)
     assembly_id = kw.get('assembly') or None
     chrmeta = "guess"
     if assembly_id:
         assembly = genrep.Assembly(assembly_id)
         chrmeta = assembly.chrmeta
         genes = assembly.gene_track
         exons = assembly.exon_track
     elif not(feature_type == 2):
         raise ValueError("Please specify an assembly")
     signals = kw.get('signals', [])
     if not isinstance(signals, list): signals = [signals]
     snames = [os.path.splitext(os.path.basename(sig))[0] for sig in signals]
     signals = [track(sig, chrmeta=chrmeta) for sig in signals]
     if feature_type == 0: #bodies
         features = genes
     elif feature_type == 1: #promoters
         prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def),
                      'after_start': int(kw.get('downstream') or prom_down_def),
                      'on_strand': True}
         features = lambda c: neighborhood(genes(c), **prom_pars)
     elif feature_type == 2: #exons
         features = exons
     elif feature_type == 3: #custom track
         _t = track(kw.get('features'), chrmeta=chrmeta)
         chrmeta = _t.chrmeta
         features = _t.read
     else:
         raise ValueError("Feature type not known: %i" % feature_type)
     pdf = self.temporary_path(fname='plot_pairs.pdf')
     narr = None
     if int(kw['mode']) == 0: #correl
         xarr = array(range(-cormax, cormax + 1))
         srtdchrom = sorted(chrmeta.keys())
         features = [x[:3] for chrom in srtdchrom
                     for x in sorted_stream(features(chrom))]
         _f = ['chr', 'start', 'end', 'score']
         narr = correlation([s.read(fields=_f) for s in signals],
                            features, (-cormax, cormax), True)
     elif int(kw['mode']) == 1: #density
         xarr = None
         for chrom in chrmeta:
             feat = features(chrom)
             means = score_by_feature([s.read(chrom) for s in signals], feat)
             mf = means.fields[len(feat.fields):]
             _n, _l = score_array(means, mf)
             if _n.size == 0: continue
             if narr is None: narr = _n
             else:            narr = vstack((narr, _n))
     else:
         raise ValueError("Mode not implemented: %s" % kw['mode'])
     if narr is None:
         raise ValueError("No data")
     pairs(narr, xarr, labels=snames, output=pdf)
     self.new_file(pdf, 'plot_pairs')
     return self.display_time()