def __call__(self, track, slice=None): edir = EXPORTDIR geneset = track method = self.method rst_text = [] for level in self.levels: for fn in ( "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_heatmap.png", "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_scvplot.png", "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_pvalue_vs_length.png"): f = fn % locals() if not os.path.exists(f): continue rst_text.append(".. figure:: %(f)s" % locals()) if rst_text: rst_text = ''' %(geneset)s +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ''' % locals() + "\n\n".join( rst_text ) else: rst_text = "" return odict((("text", rst_text),))
def __call__(self, track, slice=None): edir = EXPORTDIR level = "gene" geneset = slice method = self.method filename = "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_fit_%(track)s.png" % locals() # fitting information will not exist if there are no replicates if not os.path.exists(filename): return None rst_text = ( """ %(level)s %(track)s %(geneset)s +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. figure:: %(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_fit_%(track)s.png .. figure:: %(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_residuals_%(track)s.png """ % locals() ) return odict((("text", rst_text),))
def __call__(self, track, slice=None): ANNOTATIONS_NAME = P['annotations_name'] try: data1 = self.getValue("""SELECT count(distinct gene_id) as intervals FROM %(track)s_replicated_%(ANNOTATIONS_NAME)s_overlap where (genes_nover>0 OR downstream_flank_nover>0 OR upstream_flank_nover>0)""" % locals() ) except: data1 = "0" try: data2 = self.getValue("""SELECT count(distinct gene_id) as intervals FROM %(track)s_replicated_%(ANNOTATIONS_NAME)s_noncoding_tss_distance where closest_dist < 1000""" % locals() ) except: data2 = "0" try: data3 = self.getValue("""SELECT distinct count(distinct interval_id) as intervals, "enhancer" as feature_class FROM %(track)s_replicated_h3k4me1_intervals""" % locals() ) except: data3 = "0" try: data4 = self.getValue("""SELECT count(distinct gene_id) as intervals FROM %(track)s_replicated_rnaseq_tss_distance where closest_dist < 1000""" % locals() ) except: data4 = "0" try: data5 = self.getValue("""SELECT count(distinct gene_id) as intervals FROM %(track)s_replicated_lncrna_tss_distance where closest_dist < 1000""" % locals() ) except: data5 = "0" return odict(list(zip(("Protein-coding TSS", "Non-coding TSS", "H3K4Me1 Enhancer", "RNAseq transcript", "lincRNA TSS"), (data1, data2, data3, data4, data5))))
def __call__(self, track, slice=None): edir, method = EXPORTDIR, self.method rst_text = [] geneset = track for level in self.levels: for x, y in itertools.combinations(EXPERIMENTS, 2): filename = ( "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_%(x)s_vs_%(y)s_significance.png" % locals() ) if not os.path.exists(filename): continue rst_text.append( """ %(geneset)s %(level)s %(x)s vs %(y)s +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. figure:: %(filename)s """ % locals() ) return odict((("text", "\n".join(rst_text)),))
def asMatrix(rownames, colnames, data): nrows, ncols = len(rownames), len(colnames) matrix = numpy.zeros((nrows, ncols)) for x, y in data: try: matrix[x, y] += 1 except IndexError: pass results = odict() for x in range(nrows): r = odict() for y in range(ncols): r[str(colnames[y])] = matrix[x, y] results[str(rownames[x])] = r return results
def __call__(self, track, slice=None): data = self.get( "SELECT track, annotation, round(expected,0) as expected, observed, round(fold,1) as fold, pvalue FROM long_intervals_gat_results " ) return odict( zip(("Dataset1", "Dataset2", "Expected overlap", "Observed overlap", "Fold Enrichment", "P-value"), zip(*data)))
def __call__(self, track, slice=None): c = "%s_FPKM" % slice table = track + "_levels" if c not in self.getColumns(table): return None statement = '''SELECT %(slice)s_fpkm FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f''' data = self.getValues(statement) return odict((("fpkm", data),))
def __call__(self, track, slice=None): c = "%s_FPKM" % slice table = track + "_levels" if c not in self.getColumns(table): return None statement = '''SELECT %(slice)s_fpkm FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f''' data = self.getValues(statement) return odict((("fpkm", data), ))
def __call__(self, track, slice=None): vals = self.getValues( """SELECT avg(FPKM) FROM %(track)s_cuffcompare_tracking AS t, %(track)s_cuffcompare_transcripts AS a WHERE code = '%(slice)s' AND a.transfrag_id = t.transfrag_id GROUP BY a.transfrag_id""" % locals() ) return odict((("fpkm", vals), ))
def __call__(self, track, slice=None): vals = self.getValues("""SELECT avg(FPKM) FROM %(track)s_cuffcompare_tracking AS t, %(track)s_cuffcompare_transcripts AS a WHERE code = '%(slice)s' AND a.transfrag_id = t.transfrag_id GROUP BY a.transfrag_id""" % locals()) return odict((("fpkm", vals), ))
def __call__(self, track, slice=None): fn = "ortholog_pairs_with_feature.matrix2" if not os.path.exists(fn): return x = IOTools.openFile(fn) matrix, rownames, colnames = IOTools.readMatrix(x) return odict( (('matrix', matrix), ('rows', rownames), ('columns', colnames)))
def __call__(self, track, slice=None): statement = '''SELECT pas.TRACK, pas.total_reads/2 as Total_read_pairs, pas.reads_aligned_in_pairs/2 as Aligned_pairs, ROUND((pas.reads_aligned_in_pairs/2)*(1-strand_balance),0) as reverse, pds.read_pair_duplicates as duplicate_pairs FROM picard_stats_alignment_summary_metrics pas, picard_duplicates_duplicate_metrics pds WHERE pas.track=pds.track and pas.category='PAIR';''' data = self.get(statement) result = odict() # Define tracks as first column for d in data: result[d[0]] = odict() # Define slices as other columns for d in data: for s, v in zip(("total", "mapped", "reverse", "duplicates"), d[1:]): result[d[0]][s] = v # print result return result
def __call__(self, track, slice=None): fn = "ortholog_pairs_with_feature.matrix2" if not os.path.exists(fn): return x = IOTools.openFile(fn) matrix, rownames, colnames = IOTools.readMatrix(x) return odict((('matrix', matrix), ('rows', rownames), ('columns', colnames)))
def __call__(self, track, slice=None): c = "%s_FPKM" % slice table = track + "_levels" if c not in self.getColumns(table): return None # divide by two to get relative error statement = '''SELECT (%(slice)s_conf_hi - %(slice)s_conf_lo ) / %(slice)s_fpkm / 2 FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f''' data = self.getValues(statement) return odict((("relative_error", data), ))
def __call__(self, track, slice=None): c = "%s_FPKM" % slice table = track + "_levels" if c not in self.getColumns(table): return None max_fpkm = float( self.getValue('''SELECT max(%(slice)s_fpkm) FROM %(table)s''')) statement = '''SELECT CAST( %(slice)s_fpkm AS FLOAT) / %(max_fpkm)f FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f''' data = self.getValues(statement) return odict((("percent of max(fpkm)", data), ))
def __call__(self, track, slice=None): c = "%s_FPKM" % slice table = track + "_levels" if c not in self.getColumns(table): return None # divide by two to get relative error statement = '''SELECT (%(slice)s_conf_hi - %(slice)s_conf_lo ) / %(slice)s_fpkm / 2 FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f''' data = self.getValues(statement) return odict((("relative_error", data),))
def __call__(self, track, slice=None): c = "%s_FPKM" % slice table = track + "_levels" if c not in self.getColumns(table): return None max_fpkm = float( self.getValue( '''SELECT max(%(slice)s_fpkm) FROM %(table)s''')) statement = '''SELECT CAST( %(slice)s_fpkm AS FLOAT) / %(max_fpkm)f FROM %(table)s WHERE %(slice)s_fpkm > %(min_fpkm)f''' data = self.getValues(statement) return odict((("percent of max(fpkm)", data),))
def __call__(self, track, slice=None): fn = "ortholog_pairs_with_feature.matrix2" if not os.path.exists(fn): return x = open(fn) data = odict() for line in x: temp = line.split() name = temp[0] scores = temp[1:] data[name] = scores return data
def __call__(self, track, slice=None): where = self.mWhere select = self.mSelect table = self.mTable if slice == "all" or slice is None: data = self.getFirstRow( "%(select)s FROM %(track)s_%(table)s WHERE %(where)s" % locals()) else: data = self.getFirstRow( "%(select)s FROM %(track)s_%(table)s WHERE %(where)s AND is_%slices" % locals()) return odict(list(zip(self.mColumns, data)))
def __call__(self, track, slice=None): pair1, pair2 = track a = self.get( '''SELECT test_id, treatment_name, control_name FROM %(slice)s_%(pair1)s_gene_diff WHERE significant''') b = self.get( '''SELECT test_id, treatment_name, control_name FROM %(slice)s_%(pair2)s_gene_diff WHERE significant''') a = set(map(str, a)) b = set(map(str, b)) return odict(((pair1, len(a)), (pair2, len(b)), ("shared", len(a.intersection(b)))))
def __call__(self, track, slice=None): annotations = self.mAnnotations table = self.mTable column, where = self.mColumn, self.mWhere if not slice or slice == "all": data = self.getValues( """SELECT %(column)s FROM %(track)s_%(table)s AS d WHERE %(where)s""" % locals() ) else: data = self.getValues( """SELECT %(column)s FROM %(track)s_%(table)s AS d, %(track)s_%(annotations)s as a WHERE d.gene_id = a.gene_id AND a.is_%(slice)s AND %(where)s""" % locals() ) hist, bins = numpy.histogram( data, bins=numpy.arange(0, max(data) + 1, 1)) return odict(list(zip(list(map(str, bins[:-1])), hist)))
def __call__(self, track, slice=None): data = self.getValues( """SELECT count(distinct gene_id) as intervals FROM ( SELECT gene_id, CASE WHEN tss_gene_extended_pover1 > 0 THEN 'TSS' WHEN upstream_flank_pover1 >0 THEN 'Upstream' WHEN genes_pover1 > 0 THEN 'Gene' WHEN downstream_flank_pover1 >0 THEN 'Downstream' ELSE 'Intergenic' END AS feature_class FROM %(track)s_merged_ensembl_gene_overlap) group by feature_class order by feature_class asc""" % locals() ) result = odict( zip(("Downstream", "Gene", "Intergenic", "TSS", "Upstream"), data)) return result
def __call__(self, track, slice=None): data = self.getValues( """SELECT count(distinct gene_id) as intervals FROM ( SELECT gene_id, CASE WHEN tss_gene_extended_pover1 > 0 THEN 'TSS' WHEN upstream_flank_pover1 >0 THEN 'Upstream' WHEN genes_pover1 > 0 THEN 'Gene' WHEN downstream_flank_pover1 >0 THEN 'Downstream' ELSE 'Intergenic' END AS feature_class FROM %(track)s_merged_ensembl_gene_overlap) group by feature_class order by feature_class asc""" % locals() ) result = odict( list(zip(("Downstream", "Gene", "Intergenic", "TSS", "Upstream"), data))) return result
def __call__(self, track, slice=None): data = self.getFirstRow( """SELECT round(sum(length*pGC)/sum(length),3) as mean_GC, round(sum(length*CpG_ObsExp)/sum(length),3) as mean_CpG_ObsExp, round(sum(length*pCpG)/sum(length),3) as mean_CpG FROM annotations.genome WHERE id <> 'total' AND id not like 'chrX%%' AND id not like 'chrY%%' AND id not like 'chrW%%' AND id not like 'chrZ%%' AND id not like 'chrM%%' AND id not like '%%random%%' """ ) mColumns = ["GC content", "CpG Obs/Exp", "CpG density"] return odict(zip(mColumns, data))
def __call__(self, track): r = odict() r["total"] = self.getValue( '''SELECT reads_total FROM bam_stats WHERE track = '%(track)s' ''' ) mapped = self.getValue( '''SELECT reads_mapped FROM bam_stats WHERE track = '%(track)s' ''' ) r["mapped"] = mapped statement = '''SELECT SUM(sense_nreads) + SUM(antisense_nreads) AS anysense, CAST((SUM(sense_nreads) + SUM(antisense_nreads)) AS FLOAT) / %(mapped)i AS anysense_percent, SUM(antisense_nreads) AS antisense, CAST(SUM(antisense_nreads) AS FLOAT) / %(mapped)i AS antisense_percent, SUM(sense_nreads) AS sense, CAST(SUM(sense_nreads) AS FLOAT) / %(mapped)i AS sense_percent, CAST(SUM(antisense_nreads) AS FLOAT)/ (SUM(antisense_nreads) + SUM(sense_nreads) ) AS ratio FROM %(track)s_%(section)s_coverage ''' r.update(self.getRow(statement)) return r
def __call__(self, track, slice=None): data = self.getFirstRow( """SELECT round(sum(length*pGC)/sum(length),3) as mean_GC, round(sum(length*CpG_ObsExp)/sum(length),3) as mean_CpG_ObsExp, round(sum(length*pCpG)/sum(length),3) as mean_CpG FROM annotations.genome WHERE id <> 'total' AND id not like 'chrX%%' AND id not like 'chrY%%' AND id not like 'chrW%%' AND id not like 'chrZ%%' AND id not like 'chrM%%' AND id not like '%%random%%' """) mColumns = ["GC content", "CpG Obs/Exp", "CpG density"] return odict(list(zip(mColumns, data)))
def __call__(self, track, slice=None): ANNOTATIONS_NAME = P['annotations_name'] try: data1 = self.getValue( """SELECT count(distinct gene_id) as intervals FROM %(track)s_replicated_%(ANNOTATIONS_NAME)s_overlap where (genes_nover>0 OR downstream_flank_nover>0 OR upstream_flank_nover>0)""" % locals()) except: data1 = "0" try: data2 = self.getValue( """SELECT count(distinct gene_id) as intervals FROM %(track)s_replicated_%(ANNOTATIONS_NAME)s_noncoding_tss_distance where closest_dist < 1000""" % locals()) except: data2 = "0" try: data3 = self.getValue( """SELECT distinct count(distinct interval_id) as intervals, "enhancer" as feature_class FROM %(track)s_replicated_h3k4me1_intervals""" % locals()) except: data3 = "0" try: data4 = self.getValue( """SELECT count(distinct gene_id) as intervals FROM %(track)s_replicated_rnaseq_tss_distance where closest_dist < 1000""" % locals()) except: data4 = "0" try: data5 = self.getValue( """SELECT count(distinct gene_id) as intervals FROM %(track)s_replicated_lncrna_tss_distance where closest_dist < 1000""" % locals()) except: data5 = "0" return odict( list( zip(("Protein-coding TSS", "Non-coding TSS", "H3K4Me1 Enhancer", "RNAseq transcript", "lincRNA TSS"), (data1, data2, data3, data4, data5))))
def __call__(self, track, slice=None): table = self.mTable where = self.mWhere data = self.getValues( """ SELECT count(distinct gene_id) as intervals FROM ( SELECT gene_id, CASE WHEN %(where)s > 0 THEN 'TSS' WHEN genes_pover1 > 0 THEN 'Gene' WHEN upstream_flank_pover1 >0 THEN 'Upstream' WHEN downstream_flank_pover1 >0 THEN 'Downstream' ELSE 'Intergenic' END AS feature_class FROM %(track)s%(table)s) group by feature_class order by feature_class asc""" % locals() ) result = odict( list(zip(("Downstream", "Gene", "Intergenic", "TSS", "Upstream"), data))) return result
def __call__(self, track, slice=None): data = self.getFirstRow("""SELECT sum(is_cds) AS cds, sum(is_utr) AS utr, sum(is_upstream) AS upstream, sum(is_downstream) AS downstream, sum(is_intronic) AS intronic, sum(is_intergenic) AS intergenic, sum(is_flank) AS flank, sum(is_ambiguous) AS ambiguous FROM cgi_annotations""") mColumns = [ "cds", "utr", "upstream", "downstream", "intronic", "intergenic", "flank", "ambiguous" ] return odict(list(zip(mColumns, data)))
def __call__(self, track, slice=None): table = self.mTable where = self.mWhere data = self.getValues( """ SELECT count(distinct gene_id) as intervals FROM ( SELECT gene_id, CASE WHEN %(where)s > 0 THEN 'TSS' WHEN genes_pover1 > 0 THEN 'Gene' WHEN upstream_flank_pover1 >0 THEN 'Upstream' WHEN downstream_flank_pover1 >0 THEN 'Downstream' ELSE 'Intergenic' END AS feature_class FROM %(track)s%(table)s) group by feature_class order by feature_class asc""" % locals() ) result = odict( zip(("Downstream", "Gene", "Intergenic", "TSS", "Upstream"), data)) return result
def __call__(self, track, slice=None): edir, method = EXPORTDIR, self.method rst_text = [] geneset = track for level in self.levels: for x, y in itertools.combinations(EXPERIMENTS, 2): filename = "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_%(x)s_vs_%(y)s_significance.png" % locals() if not os.path.exists(filename): continue rst_text.append(''' %(geneset)s %(level)s %(x)s vs %(y)s +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. figure:: %(filename)s ''' % locals()) return odict((("text", "\n".join(rst_text)),))
def __call__(self, track, slice=None): data = self.getFirstRow( """SELECT sum(is_cds) AS cds, sum(is_utr) AS utr, sum(is_upstream) AS upstream, sum(is_downstream) AS downstream, sum(is_intronic) AS intronic, sum(is_intergenic) AS intergenic, sum(is_flank) AS flank, sum(is_ambiguous) AS ambiguous FROM cgi_annotations""" ) mColumns = ["cds", "utr", "upstream", "downstream", "intronic", "intergenic", "flank", "ambiguous"] return odict(zip(mColumns, data))
def __call__(self, track, slice=None): edir = EXPORTDIR level = "gene" geneset = slice method = self.method filename = "%(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_fit_%(track)s.png" % locals() # fitting information will not exist if there are no replicates if not os.path.exists(filename): return None rst_text = ''' %(level)s %(track)s %(geneset)s +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ .. figure:: %(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_fit_%(track)s.png .. figure:: %(edir)s/%(method)s/%(geneset)s_%(method)s_%(level)s_residuals_%(track)s.png ''' % locals() return odict((("text", rst_text),))
def __call__(self, track, slice=None): statement = '''SELECT SUM(CASE WHEN nover>0 THEN 1 ELSE 0 END) as with, SUM(CASE WHEN nover=0 THEN 1 ELSE 0 END) AS without FROM %(track)s_merged_repeats ''' return odict(list(zip(("with", "without"), self.getFirstRow(statement))))
def __call__(self, track, slice=None): statement = '''SELECT SUM(CASE WHEN nover>0 THEN 1 ELSE 0 END) as with, SUM(CASE WHEN nover=0 THEN 1 ELSE 0 END) AS without FROM %(track)s_merged_repeats ''' return odict(zip(("with", "without"), self.getFirstRow(statement)))
def __call__(self, track, slice=None): statement = self.getStatement(track, slice) if not statement: return [] return odict(list(zip(("with", "without"), self.getFirstRow(statement))))
def __call__(self, track, slice=None): data = self.get( "SELECT track, annotation, round(expected,0) as expected, observed, round(fold,1) as fold, pvalue FROM overlapped_genes_gat_results ") return odict(list(zip(("Dataset1", "Dataset2", "Expected overlap", "Observed overlap", "Fold Enrichment", "P-value"), list(zip(*data)))))
def __call__(self, track): statement = '''SELECT sense_nreads FROM %(track)s_%(section)s_coverage ORDER BY gene_id''' return odict((("nreads", self.getValues(statement)),))
def __call__(self, track): statement = '''SELECT CAST(antisense_nreads AS FLOAT)/ (antisense_nreads + sense_nreads ) AS proportion FROM %(track)s_%(section)s_coverage ORDER BY gene_id''' return odict((("proportion", self.getValues(statement)),))
def __call__(self, track, slice=None): return odict( (('mean', self.getValues('''SELECT mean FROM %(track)s_mappability''')), ))
def __call__(self, track): statement = '''SELECT nfailed FROM %(table)s WHERE %(track)s_nh > 0 LIMIT %(limit)i''' return odict(((track, self.getValues(statement)),))
def __call__(self, track, slice=None): statement = self.getStatement(track, slice) if not statement: return [] return odict( list(zip(("with", "without"), self.getFirstRow(statement))))
def __call__(self, track, slice=None): return odict( (('mean', self.getValues( '''SELECT mean FROM %(track)s_mappability''' )), ))