예제 #1
0
 def __call__(self, **kw):
     feature_type = int(kw.get('feature_type') or 0)
     assembly_id = kw.get('assembly') or None
     chrmeta = "guess"
     if assembly_id:
         assembly = genrep.Assembly(assembly_id)
         chrmeta = assembly.chrmeta
         genes = assembly.gene_track
         exons = assembly.exon_track
     elif not(feature_type == 2):
         raise ValueError("Please specify an assembly")
     signals = kw.get('signals', [])
     if not isinstance(signals, list): signals = [signals]
     snames = [os.path.splitext(os.path.basename(sig))[0] for sig in signals]
     signals = [track(sig, chrmeta=chrmeta) for sig in signals]
     if feature_type == 0: #bodies
         features = genes
     elif feature_type == 1: #promoters
         prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def),
                      'after_start': int(kw.get('downstream') or prom_down_def),
                      'on_strand': True}
         features = lambda c: neighborhood(genes(c), **prom_pars)
     elif feature_type == 2: #exons
         features = exons
     elif feature_type == 3: #custom track
         _t = track(kw.get('features'), chrmeta=chrmeta)
         chrmeta = _t.chrmeta
         features = _t.read
     else:
         raise ValueError("Feature type not known: %i" % feature_type)
     pdf = self.temporary_path(fname='plot_pairs.pdf')
     narr = None
     if int(kw['mode']) == 0: #correl
         xarr = array(range(-cormax, cormax + 1))
         srtdchrom = sorted(chrmeta.keys())
         features = [x[:3] for chrom in srtdchrom
                     for x in sorted_stream(features(chrom))]
         _f = ['chr', 'start', 'end', 'score']
         narr = correlation([s.read(fields=_f) for s in signals],
                            features, (-cormax, cormax), True)
     elif int(kw['mode']) == 1: #density
         xarr = None
         for chrom in chrmeta:
             feat = features(chrom)
             means = score_by_feature([s.read(chrom) for s in signals], feat)
             mf = means.fields[len(feat.fields):]
             _n, _l = score_array(means, mf)
             if _n.size == 0: continue
             if narr is None: narr = _n
             else:            narr = vstack((narr, _n))
     else:
         raise ValueError("Mode not implemented: %s" % kw['mode'])
     if narr is None:
         raise ValueError("No data")
     pairs(narr, xarr, labels=snames, output=pdf)
     self.new_file(pdf, 'plot_pairs')
     return self.display_time()
예제 #2
0
 def quantify(self,**kw):
     feature_type = int(kw.get('feature_type', 0))
     func = str(kw.get('score_op', 'mean'))
     assembly_id = kw.get('assembly')
     format = kw.get('format','sql')
     chrmeta = "guess"
     if assembly_id:
         assembly = genrep.Assembly(assembly_id)
         chrmeta = assembly.chrmeta
         genes = assembly.gene_track
         exons = assembly.exon_track
     elif not(feature_type == 3):
         raise ValueError("Please specify an assembly")
     signals = kw.get('signals', [])
     if not isinstance(signals, list): signals = [signals]
     signals = [track(sig, chrmeta=chrmeta) for sig in signals]
     if feature_type == 0:
         features = genes
     elif feature_type == 1:
         prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def),
                      'after_start': int(kw.get('downstream') or prom_down_def),
                      'on_strand': True}
         features = lambda c: neighborhood(genes(c), **prom_pars)
     elif feature_type == 2:
             features = exons
     elif feature_type == 3:
         assert os.path.exists(str(kw.get('features'))), "Features file not found: '%s'" % kw.get("features")
         _t = track(kw.get('features'), chrmeta=chrmeta)
         chrmeta = _t.chrmeta
         features = _t.read
     else:
         raise ValueError("Take feature_type in %s." %ftypes)
     output = self.temporary_path(fname='features_quantification.'+format)
     if len(signals) > 1:
         _f = ["score" + str(i) for i in range(len(signals))]
     else:
         _f = ["score"]
     tout = track(output, format, fields=['chr','start','end','name'] + _f,
                  chrmeta=chrmeta, info={'datatype':'qualitative'})
     for chrom in chrmeta:
         sread = [sig.read(chrom) for sig in signals]
         tout.write(score_by_feature(sread, features(chrom), fn=func),
                    chrom=chrom, clip=True)
     tout.close()
     return output
예제 #3
0
 def __call__(self, **kw):
     feature_type = int(kw.get('feature_type', 0))
     func = str(kw.get('score_op', 'mean'))
     assembly_id = kw.get('assembly') or None
     chrmeta = "guess"
     if assembly_id:
         assembly = genrep.Assembly(assembly_id)
         chrmeta = assembly.chrmeta
         genes = assembly.gene_track
     elif not(feature_type == 2):
         raise ValueError("Please specify an assembly")
     signals = [track(sig, chrmeta=chrmeta) for sig in kw.get('signals', [])]
     if feature_type == 0:
         features = genes
     elif feature_type == 1:
         prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def),
                       'after_start': int(kw.get('downstream') or prom_down_def),
                       'on_strand': True}
         features = lambda c: neighborhood(genes(c), **prom_pars)
     elif feature_type == 2:
         _t = track(kw.get('features'), chrmeta=chrmeta)
         chrmeta = _t.chrmeta
         features = _t.read
     else:
         return 2
     output = self.temporary_path(fname='features_quantification.sql')
     if len(signals) > 1:
         _f = ["score" + str(i) for i in range(len(signals))]
     else:
         _f = ["score"]
     tout = track(output, format='sql', fields=['start', 'end', 'name'] + _f,
                  chrmeta=chrmeta, info={'datatype': 'qualitative'})
     for chrom in chrmeta:
         sread = [sig.read(chrom) for sig in signals]
         tout.write(score_by_feature(sread, features(chrom), fn=func),
                    chrom=chrom, clip=True)
     tout.close()
     self.new_file(output, 'features_quantification')
     return 1
예제 #4
0
    def __call__(self, **kw):
        feature_type = int(kw.get('feature_type', 0))
        assembly_id = kw.get('assembly')
        chrmeta = "guess"
        if assembly_id:
            assembly = genrep.Assembly(assembly_id)
            chrmeta = assembly.chrmeta
            genes = assembly.gene_track
            exons = assembly.exon_track
        elif not(feature_type == 3):
            raise ValueError("Please specify an assembly")
        if feature_type == 0:
            features = genes
        elif feature_type == 1:
            prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def),
                          'after_start': int(kw.get('downstream') or prom_down_def),
                          'on_strand': True}
            features = lambda c: neighborhood(genes(c), **prom_pars)
        elif feature_type == 2:
            features = exons
        elif feature_type == 3:
            assert os.path.exists(kw.get('features'))
            _t = track(kw.get('features'), chrmeta=chrmeta)
            chrmeta = _t.chrmeta
            features = _t.read
        else:
            return 2

        signals = []
        norm_factors = []
        for sig in kw.get('signals', []):
            assert os.path.exists(sig), "File not found: %s." % sig
            _t = track(sig, chrmeta=chrmeta)
            if 'normalization' in _t.info:
                _nf = float(_t.info['normalization'])
            elif 'nreads' in _t.info:
                _nf = float(_t.info['nreads']) * 1e-7 / float(_t.info.get('read_extension', 1))
            else:
                _nf = 1
            signals.append(_t)
            norm_factors.append(_nf)
        if len(signals) > 1:
            _f = ["score" + str(i) for i in range(len(signals))]
        else:
            _f = ["score"]
        de_list = []
        for chrom in chrmeta:
            sread = [sig.read(chrom) for sig in signals]
            mread = score_by_feature(sread, features(chrom), fn='sum')
            de_list.extend(list(mread))
        name_idx = mread.fields.index("name")
        # Turn all scores into integers
        de_matrix = numpy.asarray([[int(s * norm_factors[k] + .5) for s in x[-len(_f):]]
                                   for k, x in enumerate(de_list)], dtype=numpy.float)
        rownames = numpy.asarray([x[name_idx] for x in de_list])
        colnames = numpy.asarray([os.path.splitext(os.path.basename(s.path))[0]
                                  for s in signals])
        del de_list
        output = self.temporary_path(fname='DE')

        robjects.r.assign('Mdata', numpy2ri.numpy2ri(de_matrix))
        robjects.r.assign('row_names', numpy2ri.numpy2ri(rownames))
        robjects.r.assign('col_names', numpy2ri.numpy2ri(colnames))
        robjects.r("""
        Mdata <- as.data.frame(Mdata,row.names=row_names)
        conds <- unlist(strsplit(col_names,".",fixed=T))
        colnames(Mdata) <- conds
        groups <- unique(conds)
        couples <- combn(groups,2)

        # Still need to check that replicates are not identical - lfproc would fail
        if (any(table(conds)>1)){ method = 'normal' # if replicates
        } else { method = 'blind' }

        library(DESeq)
        cds <- newCountDataSet(Mdata, conds)
        cds <- estimateSizeFactors(cds)
        cds <- estimateVarianceFunctions(cds,method='blind')
        """)

        groups = list(set(colnames))
        couples = itertools.combinations(groups, 2)
        for c in couples:
            out = output + '_' + c[0] + '-' + c[1] + '.txt'
            print out
            r_cmd = """
            res <- nbinomTest(cds, '%s', '%s')
            res <- res[order(res[,8]),]
            write.table(res, '%s', row.names=F)
            """ % (c[0], c[1], out)
            robjects.r(r_cmd)
            self.new_file(out, 'differential_expression')
        return 1