def test_allkinds(self): format = 'pdf' from bbcflib.gfminer.figure import venn D1 = {'A':126} D2 = {'Albert':126, 'Barthur':247, 'Albert|Barthur':50} D31 = {'Ar':521, 'Bi':14, 'Co':290, 'Ar|Bi':11, 'Ar|Co':100, 'Bi|Co':4, 'Ar|Bi|Co':1} D32 = {'A':521, 'B':300, 'C':290, 'A|B':11, 'A|C':100, 'B|C':44, 'A|B|C':5} D4 = {'A':210, 'B':220, 'C':230, 'D':240, 'A|B':80, 'A|C':80, 'A|D':80, 'B|C':80, 'B|D':80, 'C|D':80, 'A|B|C':30, 'A|B|D':30, 'A|C|D':30, 'B|C|D':30, 'A|B|C|D':10} venn(D1,output=path+'d1.'+format,legend=['file1.bed'],format=format) venn(D2,output=path+'d2.'+format,format=format) venn(D31,output=path+'d3.1.'+format,format=format) venn(D32,output=path+'d3.2.'+format,legend=['file1','file2','file3'],format=format) venn(D4,output=path+'d4.'+format,legend=['file1','file2','file3','file4'],format=format)
def __call__(self,**kw): files_list = kw['SigMulti']['files'] column = int(kw['column'])-1 output = self.temporary_path(fname='intersections.') counts,legend = self.compare(files_list, output, column) # compress output_targz = self.temporary_path(fname=output+'tar.gz') tar = tarfile.open(output_targz, 'w:gz') tar.add(output) tar.close() self.new_file(output+'.tar.gz', 'intersections') if len(files_list) <= 4: # Venn diagram venn_format = 'png' venn_outname = self.temporary_path(fname='venn'+venn_format) venn(counts,legend=None,options={},output=venn_outname,format=venn_format) self.new_file(venn_outname, 'venn_diagram') return self.display_time()
def __call__(self, **kw): files_list = kw['SigMulti']['files'] column = int(kw['column']) - 1 output = self.temporary_path(fname='intersections.') counts, legend = self.compare(files_list, output, column) # compress output_targz = self.temporary_path(fname=output + 'tar.gz') tar = tarfile.open(output_targz, 'w:gz') tar.add(output) tar.close() self.new_file(output + '.tar.gz', 'intersections') if len(files_list) <= 4: # Venn diagram venn_format = 'png' venn_outname = self.temporary_path(fname='venn' + venn_format) venn(counts, legend=None, options={}, output=venn_outname, format=venn_format) self.new_file(venn_outname, 'venn_diagram') return self.display_time()
def __call__(self, **kw): def _parse_logic(string): s = re.sub(r'[^\w\d!=><\. ]', '', string) s = re.sub(r' OR ', ')or(%f ', s) s = re.sub(r' AND ', ')and(%f ', s) return "(%f "+s+")" def _run_test(row, indx, cond): num = float(row[col_ind[indx]]) num = max(-sys.maxint,min(sys.maxint,num)) num = (num,)*c.count("%f") return eval(cond % (num)) def _add_label(s,x): _f = s.fields+['track_name'] return FeatureStream((y+(x,) for y in s), fields=_f) venn_options = {} # tune it here tracks = [] intype = kw.get("input_type") or "Table" if intype == "Table": s_cols = kw.get('id_columns','') s_filters = kw.get('filters','') infile = track(kw.get('table',''),format='txt',header=True) col_ind = [int(i)-1 for i in s_cols.split(",")] legend = [infile.fields[i] if i<len(infile.fields) else str(i) for i in col_ind] conds = [_parse_logic(x) for x in s_filters.split(",")] tlabels = [chr(k+65) for k in range(len(col_ind))] conds += ["1"]*(len(col_ind)-len(conds)) combn = [tuple(sorted(x)) for k in range(len(tlabels)) for x in combinations(tlabels,k+1)] c1 = dict(("|".join(c),0) for c in combn) c2 = dict(("|".join(c),0) for c in combn) indx = dict((c,[tlabels.index(x) for x in c]) for c in combn) for row in infile: tests = [_run_test(row,i,c) for i,c in enumerate(conds)] for c in combn: c1["|".join([tlabels[n] for n,t in enumerate(tests) if t])] += 1 c2["|".join(c)] += all([tests[i] for i in indx[c]]) nsamples = len(col_ind) combn = ['|'.join(y) for x in combn for y in x] elif intype == "Tracks": #filenames = kw['TrMulti']['files'] filenames = kw['files'] if not isinstance(filenames,(list,tuple)): filenames = [filenames] for f in filenames: assert os.path.exists(f), "File not found: %s ." % f tracks = [track(f,chrmeta='guess') for f in filenames] nsamples = len(tracks) tlabels = [chr(k+65) for k in range(len(tracks))] combn = [combinations(tlabels,k+1) for k in range(len(tlabels))] combn = ['|'.join(sorted(y)) for x in combn for y in x] c1 = dict(zip(combn,[0]*len(combn))) c2 = dict(zip(combn,[0]*len(combn))) total_cov = 0.0 _scored = (kw.get('type') == 'score') chromset = set([c for t in tracks for c in t.chrmeta]) for chrom in chromset: streams = [_add_label(t.read(chrom),tlabels[n]) for n,t in enumerate(tracks)] s = cobble(concatenate(streams),scored=_scored) name_idx = s.fields.index('track_name') start_idx = s.fields.index('start') end_idx = s.fields.index('end') if _scored: score_idx = s.fields.index('score') for x in s: length = x[end_idx]-x[start_idx] total_cov += length sub = sorted(list(set(x[name_idx].split('|')))) # avoid 'A|A' cb = [combinations(sub,k) for k in range(1,len(sub)+1)] cb = ['|'.join(sorted(y)) for c in cb for y in c] if _scored: c1['|'.join(sub)] += x[score_idx] for c in cb: c2[c] += x[score_idx] else: c1['|'.join(sub)] += length for c in cb: c2[c] += length if total_cov < 1: output = self.temporary_path(fname='venn_summary.txt') with open(output,'wb') as summary: summary.write("Empty content (no coverage) on %s." %(",".join(chromset))) self.new_file(output, 'venn_summary') return legend = [t.name for t in tracks] if _scored: for c in combn: c2[c] = round(c2[c]) else: for c in combn: c2[c] = round((100*c2[c])/total_cov) c1[c] = (100*c1[c])/total_cov else: raise ValueError("Input type '%s' not supported." %intype) if nsamples <= 4: format = kw.get('output') or 'pdf' output = self.temporary_path(fname='venn_diagram.'+format) venn(c2,legend=legend,options=venn_options,output=output,format=format) self.new_file(output, 'venn_diagram') # Text summary output = self.temporary_path(fname='venn_summary.txt') with open(output,'w') as summary: summary.write("%s\t%s\t%s\n" % ("Group","Coverage", "Cumulative coverage")) record = "%s\t%.2f\t%d\n" for c in sorted(combn, key=lambda x:(len(x),x)): summary.write(record%(c,c1[c],c2[c])) self.new_file(output, 'venn_summary') return self.display_time()
def __call__(self, **kw): def _parse_logic(string): s = re.sub(r'[^\w\d!=><\. ]', '', string) s = re.sub(r' OR ', ')or(%f ', s) s = re.sub(r' AND ', ')and(%f ', s) return "(%f " + s + ")" def _run_test(row, indx, cond): num = float(row[col_ind[indx]]) num = max(-sys.maxint, min(sys.maxint, num)) num = (num, ) * c.count("%f") return eval(cond % (num)) def _add_label(s, x): _f = s.fields + ['track_name'] return FeatureStream((y + (x, ) for y in s), fields=_f) venn_options = {} # tune it here tracks = [] intype = kw.get("input_type") or "Table" if intype == "Table": s_cols = kw.get('id_columns', '') s_filters = kw.get('filters', '') infile = track(kw.get('table', ''), format='txt', header=True) col_ind = [int(i) - 1 for i in s_cols.split(",")] legend = [ infile.fields[i] if i < len(infile.fields) else str(i) for i in col_ind ] conds = [_parse_logic(x) for x in s_filters.split(",")] tlabels = [chr(k + 65) for k in range(len(col_ind))] conds += ["1"] * (len(col_ind) - len(conds)) combn = [ tuple(sorted(x)) for k in range(len(tlabels)) for x in combinations(tlabels, k + 1) ] c1 = dict(("|".join(c), 0) for c in combn) c2 = dict(("|".join(c), 0) for c in combn) indx = dict((c, [tlabels.index(x) for x in c]) for c in combn) for row in infile: tests = [_run_test(row, i, c) for i, c in enumerate(conds)] for c in combn: c1["|".join([tlabels[n] for n, t in enumerate(tests) if t])] += 1 c2["|".join(c)] += all([tests[i] for i in indx[c]]) nsamples = len(col_ind) combn = ['|'.join(y) for x in combn for y in x] elif intype == "Tracks": filenames = kw['TrMulti']['files'] if not isinstance(filenames, (list, tuple)): filenames = [filenames] for f in filenames: assert os.path.exists(f), "File not found: %s ." % f tracks = [track(f, chrmeta='guess') for f in filenames] nsamples = len(tracks) tlabels = [chr(k + 65) for k in range(len(tracks))] combn = [combinations(tlabels, k + 1) for k in range(len(tlabels))] combn = ['|'.join(sorted(y)) for x in combn for y in x] c1 = dict(zip(combn, [0] * len(combn))) c2 = dict(zip(combn, [0] * len(combn))) total_cov = 0.0 _scored = (kw.get('type') == 'score') chromset = set([c for t in tracks for c in t.chrmeta]) for chrom in chromset: streams = [ _add_label(t.read(chrom), tlabels[n]) for n, t in enumerate(tracks) ] s = cobble(concatenate(streams), scored=_scored) name_idx = s.fields.index('track_name') start_idx = s.fields.index('start') end_idx = s.fields.index('end') if _scored: score_idx = s.fields.index('score') for x in s: length = x[end_idx] - x[start_idx] total_cov += length sub = sorted(list(set( x[name_idx].split('|')))) # avoid 'A|A' cb = [combinations(sub, k) for k in range(1, len(sub) + 1)] cb = ['|'.join(sorted(y)) for c in cb for y in c] if _scored: c1['|'.join(sub)] += x[score_idx] for c in cb: c2[c] += x[score_idx] else: c1['|'.join(sub)] += length for c in cb: c2[c] += length if total_cov < 1: output = self.temporary_path(fname='venn_summary.txt') with open(output, 'wb') as summary: summary.write("Empty content (no coverage) on %s." % (",".join(chromset))) self.new_file(output, 'venn_summary') return legend = [t.name for t in tracks] if _scored: for c in combn: c2[c] = round(c2[c]) else: for c in combn: c2[c] = round((100 * c2[c]) / total_cov) c1[c] = (100 * c1[c]) / total_cov else: raise ValueError("Input type '%s' not supported." % intype) if nsamples <= 4: format = kw.get('format') or 'pdf' output = self.temporary_path(fname='venn_diagram.' + format) venn(c2, legend=legend, options=venn_options, output=output, format=format) self.new_file(output, 'venn_diagram') # Text summary output = self.temporary_path(fname='venn_summary.txt') with open(output, 'w') as summary: summary.write("%s\t%s\t%s\n" % ("Group", "Coverage", "Cumulative coverage")) record = "%s\t%.2f\t%d\n" for c in sorted(combn, key=lambda x: (len(x), x)): summary.write(record % (c, c1[c], c2[c])) self.new_file(output, 'venn_summary') return self.display_time()