def __call__(self, **kw): feature_type = int(kw.get('feature_type') or 0) assembly_id = kw.get('assembly') or None chrmeta = "guess" if assembly_id: assembly = genrep.Assembly(assembly_id) chrmeta = assembly.chrmeta genes = assembly.gene_track exons = assembly.exon_track elif not(feature_type == 2): raise ValueError("Please specify an assembly") signals = kw.get('signals', []) if not isinstance(signals, list): signals = [signals] snames = [os.path.splitext(os.path.basename(sig))[0] for sig in signals] signals = [track(sig, chrmeta=chrmeta) for sig in signals] if feature_type == 0: #bodies features = genes elif feature_type == 1: #promoters prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def), 'after_start': int(kw.get('downstream') or prom_down_def), 'on_strand': True} features = lambda c: neighborhood(genes(c), **prom_pars) elif feature_type == 2: #exons features = exons elif feature_type == 3: #custom track _t = track(kw.get('features'), chrmeta=chrmeta) chrmeta = _t.chrmeta features = _t.read else: raise ValueError("Feature type not known: %i" % feature_type) pdf = self.temporary_path(fname='plot_pairs.pdf') narr = None if int(kw['mode']) == 0: #correl xarr = array(range(-cormax, cormax + 1)) srtdchrom = sorted(chrmeta.keys()) features = [x[:3] for chrom in srtdchrom for x in sorted_stream(features(chrom))] _f = ['chr', 'start', 'end', 'score'] narr = correlation([s.read(fields=_f) for s in signals], features, (-cormax, cormax), True) elif int(kw['mode']) == 1: #density xarr = None for chrom in chrmeta: feat = features(chrom) means = score_by_feature([s.read(chrom) for s in signals], feat) mf = means.fields[len(feat.fields):] _n, _l = score_array(means, mf) if _n.size == 0: continue if narr is None: narr = _n else: narr = vstack((narr, _n)) else: raise ValueError("Mode not implemented: %s" % kw['mode']) if narr is None: raise ValueError("No data") pairs(narr, xarr, labels=snames, output=pdf) self.new_file(pdf, 'plot_pairs') return self.display_time()
def quantify(self,**kw): feature_type = int(kw.get('feature_type', 0)) func = str(kw.get('score_op', 'mean')) assembly_id = kw.get('assembly') format = kw.get('format','sql') chrmeta = "guess" if assembly_id: assembly = genrep.Assembly(assembly_id) chrmeta = assembly.chrmeta genes = assembly.gene_track exons = assembly.exon_track elif not(feature_type == 3): raise ValueError("Please specify an assembly") signals = kw.get('signals', []) if not isinstance(signals, list): signals = [signals] signals = [track(sig, chrmeta=chrmeta) for sig in signals] if feature_type == 0: features = genes elif feature_type == 1: prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def), 'after_start': int(kw.get('downstream') or prom_down_def), 'on_strand': True} features = lambda c: neighborhood(genes(c), **prom_pars) elif feature_type == 2: features = exons elif feature_type == 3: assert os.path.exists(str(kw.get('features'))), "Features file not found: '%s'" % kw.get("features") _t = track(kw.get('features'), chrmeta=chrmeta) chrmeta = _t.chrmeta features = _t.read else: raise ValueError("Take feature_type in %s." %ftypes) output = self.temporary_path(fname='features_quantification.'+format) if len(signals) > 1: _f = ["score" + str(i) for i in range(len(signals))] else: _f = ["score"] tout = track(output, format, fields=['chr','start','end','name'] + _f, chrmeta=chrmeta, info={'datatype':'qualitative'}) for chrom in chrmeta: sread = [sig.read(chrom) for sig in signals] tout.write(score_by_feature(sread, features(chrom), fn=func), chrom=chrom, clip=True) tout.close() return output
def __call__(self, **kw): feature_type = int(kw.get('feature_type', 0)) func = str(kw.get('score_op', 'mean')) assembly_id = kw.get('assembly') or None chrmeta = "guess" if assembly_id: assembly = genrep.Assembly(assembly_id) chrmeta = assembly.chrmeta genes = assembly.gene_track elif not(feature_type == 2): raise ValueError("Please specify an assembly") signals = [track(sig, chrmeta=chrmeta) for sig in kw.get('signals', [])] if feature_type == 0: features = genes elif feature_type == 1: prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def), 'after_start': int(kw.get('downstream') or prom_down_def), 'on_strand': True} features = lambda c: neighborhood(genes(c), **prom_pars) elif feature_type == 2: _t = track(kw.get('features'), chrmeta=chrmeta) chrmeta = _t.chrmeta features = _t.read else: return 2 output = self.temporary_path(fname='features_quantification.sql') if len(signals) > 1: _f = ["score" + str(i) for i in range(len(signals))] else: _f = ["score"] tout = track(output, format='sql', fields=['start', 'end', 'name'] + _f, chrmeta=chrmeta, info={'datatype': 'qualitative'}) for chrom in chrmeta: sread = [sig.read(chrom) for sig in signals] tout.write(score_by_feature(sread, features(chrom), fn=func), chrom=chrom, clip=True) tout.close() self.new_file(output, 'features_quantification') return 1
def __call__(self, **kw): feature_type = int(kw.get('feature_type', 0)) assembly_id = kw.get('assembly') chrmeta = "guess" if assembly_id: assembly = genrep.Assembly(assembly_id) chrmeta = assembly.chrmeta genes = assembly.gene_track exons = assembly.exon_track elif not(feature_type == 3): raise ValueError("Please specify an assembly") if feature_type == 0: features = genes elif feature_type == 1: prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def), 'after_start': int(kw.get('downstream') or prom_down_def), 'on_strand': True} features = lambda c: neighborhood(genes(c), **prom_pars) elif feature_type == 2: features = exons elif feature_type == 3: assert os.path.exists(kw.get('features')) _t = track(kw.get('features'), chrmeta=chrmeta) chrmeta = _t.chrmeta features = _t.read else: return 2 signals = [] norm_factors = [] for sig in kw.get('signals', []): assert os.path.exists(sig), "File not found: %s." % sig _t = track(sig, chrmeta=chrmeta) if 'normalization' in _t.info: _nf = float(_t.info['normalization']) elif 'nreads' in _t.info: _nf = float(_t.info['nreads']) * 1e-7 / float(_t.info.get('read_extension', 1)) else: _nf = 1 signals.append(_t) norm_factors.append(_nf) if len(signals) > 1: _f = ["score" + str(i) for i in range(len(signals))] else: _f = ["score"] de_list = [] for chrom in chrmeta: sread = [sig.read(chrom) for sig in signals] mread = score_by_feature(sread, features(chrom), fn='sum') de_list.extend(list(mread)) name_idx = mread.fields.index("name") # Turn all scores into integers de_matrix = numpy.asarray([[int(s * norm_factors[k] + .5) for s in x[-len(_f):]] for k, x in enumerate(de_list)], dtype=numpy.float) rownames = numpy.asarray([x[name_idx] for x in de_list]) colnames = numpy.asarray([os.path.splitext(os.path.basename(s.path))[0] for s in signals]) del de_list output = self.temporary_path(fname='DE') robjects.r.assign('Mdata', numpy2ri.numpy2ri(de_matrix)) robjects.r.assign('row_names', numpy2ri.numpy2ri(rownames)) robjects.r.assign('col_names', numpy2ri.numpy2ri(colnames)) robjects.r(""" Mdata <- as.data.frame(Mdata,row.names=row_names) conds <- unlist(strsplit(col_names,".",fixed=T)) colnames(Mdata) <- conds groups <- unique(conds) couples <- combn(groups,2) # Still need to check that replicates are not identical - lfproc would fail if (any(table(conds)>1)){ method = 'normal' # if replicates } else { method = 'blind' } library(DESeq) cds <- newCountDataSet(Mdata, conds) cds <- estimateSizeFactors(cds) cds <- estimateVarianceFunctions(cds,method='blind') """) groups = list(set(colnames)) couples = itertools.combinations(groups, 2) for c in couples: out = output + '_' + c[0] + '-' + c[1] + '.txt' print out r_cmd = """ res <- nbinomTest(cds, '%s', '%s') res <- res[order(res[,8]),] write.table(res, '%s', row.names=F) """ % (c[0], c[1], out) robjects.r(r_cmd) self.new_file(out, 'differential_expression') return 1