def make_venn(direc,file1,file2,filename,text1,text2,color1,color2,color3): smallername = "" biggername = "" fileinput1 = direc + file1 fileinput2 = direc + file2 statinfo1 = os.stat(fileinput1) statinfo2 = os.stat(fileinput2) if statinfo1.st_size > statinfo2.st_size: smallerfile = open(fileinput2) biggerfile = open(fileinput1) smallername = text2 biggername = text1 else: smallerfile = open(fileinput1) biggerfile = open(fileinput2) smallername = text1 biggername = text2 overlap_info = peak_overlap(smallerfile, biggerfile) file1_nums = overlap_info[1] - overlap_info[0] file2_nums = overlap_info[2] - overlap_info[0] print file1_nums, file2_nums if biggername == text1: v= venn2(subsets = (file2_nums, file1_nums, overlap_info[0]), set_labels = (biggername, smallername)) else: v= venn2(subsets = (file1_nums, file2_nums, overlap_info[0]), set_labels = (smallername, biggername)) print file2_nums, file1_nums, overlap_info[0] v.get_patch_by_id('10').set_color(color1) v.get_patch_by_id('01').set_color(color2) v.get_patch_by_id('11').set_color(color3) v.get_patch_by_id('10').set_alpha(1.0) v.get_patch_by_id('01').set_alpha(1.0) v.get_patch_by_id('11').set_alpha(0.7) plt.title(filename) savefig(filename) plt.close()
def compre_two(ans_file1, ans_file2): with open(ans_file1) as f: a = json.load(f) with open(ans_file2) as f: b = json.load(f) true_a = [] wrong_a = [] for ele in a: if ele['em']: true_a.append(ele['id']) if not ele['f1']: wrong_a.append(ele['id']) true_b = [] wrong_b = [] for ele in b: if not ele['f1']: wrong_b.append(ele['id']) if ele['em']: true_b.append(ele['id']) label_a = get_label(ans_file1) label_b = get_label(ans_file2) venn2([set(true_a), set(true_b)], set_labels=(label_a, label_b)) diff = set(true_b).intersection(set(wrong_a)) if len(diff) > 20: diff = list(diff)[:20] print('true in b, but wrong in a:') print(diff)
def plot_venn(t1=None, t2=None, t3=None, ax=None, set_colors=('r', 'b', 'k')): """input: 2 or 3 tuples: (list/set, name_to_display) """ assert len(t1) == len(t2) == 2 if t3: venn3( [set(t[0]) for t in [t1,t2,t3]], tuple( ['%s\n(%s)'%(t[1], len(set(t[0])) ) for t in [t1,t2,t3]]) , set_colors=set_colors, alpha=0.5,ax=ax) else: venn2( [set(t[0]) for t in [t1,t2]], tuple( ['%s\n(%s)'%(t[1], len(set(t[0])) ) for t in [t1,t2]]), set_colors=set_colors[0:2],alpha=0.5, ax=ax)
def func_plot( self, json_data, str_output_figure ): """ Function that quickly plots a venn diagram of data in a json file. """ str_title = json_data[ qp.c_STR_TITLE ] if qp.c_STR_TITLE in json_data else qp.c_STR_TITLE_DEFAULT ldict_data = json_data[ qp.c_STR_DATA ] # Two venn diagram mode if len( ldict_data ) == 2: # Plot venn diagram # Labels str_data_label_1 = ldict_data[ 0 ][ qp.c_STR_DATA_LABEL ] if qp.c_STR_DATA_LABEL in ldict_data[ 0 ] else None str_data_label_2 = ldict_data[ 1 ][ qp.c_STR_DATA_LABEL ] if qp.c_STR_DATA_LABEL in ldict_data[ 1 ] else None # Set colors str_data_color_1 = ldict_data[ 0 ][ qp.c_C_PLOT_COLOR ] if qp.c_C_PLOT_COLOR in ldict_data[ 0 ] else 'r' str_data_color_2 = ldict_data[ 1 ][ qp.c_C_PLOT_COLOR ] if qp.c_C_PLOT_COLOR in ldict_data[ 1 ] else 'g' venn2( [ set( ldict_data[ 0 ][ qp.c_STR_DATA ] ), set( ldict_data[ 1 ][ qp.c_STR_DATA ] ) ], set_labels = [ str_data_label_1, str_data_label_2 ], set_colors = [ str_data_color_1, str_data_color_2 ] ) else: return False plt.title( str_title ) plt.tight_layout() plt.savefig( str_output_figure ) plt.close() return True
def draw_venn_diag(venn_dataset, first_name, second_name, out_dir): """Draws Venn diagram comparing genes hit in each dataset. Parameters ---------- venn_dataset : list of integers First element : Number of genes with hits in first dataset only Second element : Number of genes with hits in second dataset only Third element : Number of genes with hits in both datasets *_name : string Filename for first or second input hits file directory out_dir : string Name for output directory Writes ------- venn_diag.*.png : png file Venn diagram image """ venn2(subsets=(venn_dataset[0], venn_dataset[1], venn_dataset[2]), set_labels = (first_name, second_name)) plt.title('$C.$ $albicans$ genes with hits') plt.savefig(os.path.join(out_dir, 'venn_diag.%s_and_%s.png' % (first_name, second_name))) plt.close()
def venn_pair( set1, set2, mask): set_a = set([v.strip() for v in open( mask % ethnicity_code[set1]).readlines()]) set_b = set([v.strip() for v in open( mask % ethnicity_code[set2]).readlines()]) plt.cla() venn2((set_a, set_b), set_labels=(set1, set2)) plt.savefig("%s_%s_%s.png" % (set1, set2, 'venn'+mask.split('%s')[1] ))
def venn_diagram(df1, df2, labels=['A', 'B'], save=False): from matplotlib_venn import venn2 import pandas as pd try: labels = [s.strip('.txt') for s in labels] except: pass s1 = set(df1.index) s2 = set(df2.index) venn2([s1, s2], set_colors=['navy','lightblue'], set_labels=labels) if save: inter = [s for s in s1 & s2] s1_only = [s for s in s1 - s2] s2_only = [s for s in s2 - s1] merge = pd.merge(df1, df2, how='outer', left_index=True, right_index=True) merge.loc[inter].to_csv('inter.txt', sep='\t', index=True, header=True) merge.loc[s1_only].to_csv('s1_only.txt', sep='\t', index=True, header=True) merge.loc[s2_only].to_csv('s2_only.txt', sep='\t', index=True, header=True) plt.savefig(labels[0] + '_vs_' + labels[1] , dpi=100)
def draw_venn2(A, B, sets): venn = [0]*3 venn[2] = len(sets["AB"]) venn[0] = len(sets["A"]) - len(sets["AB"]) venn[1] = len(sets["B"]) - len(sets["AB"]) labelA = A + " (" + str(len(sets["A"])) + ")" labelB = B + " (" + str(len(sets["B"])) + ")" print venn venn2(subsets=venn, set_labels=(labelA, labelB)) plt.show()
def venn(args): """ %prog venn *.benchmark Display benchmark results as Venn diagram. """ from matplotlib_venn import venn2 p = OptionParser(venn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x9") if len(args) < 1: sys.exit(not p.print_help()) bcs = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) pad = .02 ystart = 1 ywidth = 1. / len(bcs) tags = ("Bowers", "YGOB", "Schnable") for bc, tag in zip(bcs, tags): fp = open(bc) data = [] for row in fp: prog, pcounts, tcounts, shared = row.split() pcounts = int(pcounts) tcounts = int(tcounts) shared = int(shared) data.append((prog, pcounts, tcounts, shared)) xstart = 0 xwidth = 1. / len(data) for prog, pcounts, tcounts, shared in data: a, b, c = pcounts - shared, tcounts - shared, shared ax = fig.add_axes([xstart + pad, ystart - ywidth + pad, xwidth - 2 * pad, ywidth - 2 * pad]) venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) message = "Sn={0} Pu={1}".\ format(percentage(shared, tcounts, precision=0, mode=-1), percentage(shared, pcounts, precision=0, mode=-1)) print >> sys.stderr, message ax.text(.5, .92, latex(message), ha="center", va="center", transform=ax.transAxes, color='b') ax.set_axis_off() xstart += xwidth ystart -= ywidth panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"), (.04, .96 - 2 * ywidth, "C"))) panel_labels(root, ((.5, .98, "A. thaliana duplicates"), (.5, .98 - ywidth, "14 Yeast genomes"), (.5, .98 - 2 * ywidth, "4 Grass genomes"))) normalize_axes(root) savefig("venn.pdf", dpi=opts.dpi)
def plot_two_set_venn_diagram(Ab, aB, AB, set_labels=None): ''' Plots a two-set venn diagram, A and B. Inputs: Ab: counts in set A but not in B. AB: counts in both set A and B (intersection). aB: counts in set B but not in A. set_labels: a list of length 2 for set A (index 0) and set B (index 1). ''' venn2(subsets=(Ab, Ab, AB), set_labels=set_labels) plt.show()
def venn_pair( set1, set2, mask): set_a = set() for s in linguistic_groups[set1]: set_a.update([v.strip() for v in open( mask % ethnicity_code[s]).readlines()]) set_b = set() for s in linguistic_groups[set2]: set_b.update([v.strip() for v in open( mask % ethnicity_code[s]).readlines()]) plt.cla() venn2((set_a, set_b), set_labels=(set1, set2)) # figure.set_size_inches(3,15.4) # figure.set_dpi(400) plt.savefig("%s_%s_%s.svg" % (set1, set2, 'venn'+mask.split('%s')[1] ))
def main(*fields): client = MongoClient() db = client['github'] influences = db['influences'] sets = map(lambda x: set([i[0] for i in influences.find_one( {'field': x})['ranks'][:25]]), fields) venn2(sets, fields) plt.savefig('images/' + fields[0] + '-' + fields[1] + '-venn2.png') print sets[0].intersection(sets[1])
def plot_venn(List_of_sets, Set_labels, Main = "I forgot to give this plot a name.", Out_File = "", Custom_overlap_numbers = []): """ Given a list of sets, generate a venn diagram in Out_Dir. Arguments: List_of_sets (two or three only!) Set_labels: Label for each circle Main: Title of plot Out_File: Where should plot be saved? And what should the file be named? Parent directory expected to already exist... This will overwrite plots if they already exist Custom_overlap_numbers: optional. If you want to supply your own 3 overlap sets: [# in first, # in second, # in both] """ if not os.path.isdir(os.path.dirname(Out_File)): raise ValueError(os.path.dirname(Out_File)+" <--- PATH DOESN'T EXIST") if len(Custom_overlap_numbers) != 0 and len(Custom_overlap_numbers) != 3: raise ValueError("Custom overlap only works for 2 circle venn diagrams at the moment...") if len(Custom_overlap_numbers) == 3: plt.figure() venn2(subsets={'10': Custom_overlap_numbers[0], '01': Custom_overlap_numbers[1], '11': Custom_overlap_numbers[2]}, set_labels = Set_labels) plt.title(Main) plt.savefig(Out_File) return if len(List_of_sets) == 2: if len(Set_labels) != 2: raise ValueError("Set_labels needs to be the same length as the number of sets...") # Default figure dimensions... plt.figure() venn2(List_of_sets,Set_labels) plt.title(Main) plt.savefig(Out_File) elif len(List_of_sets) == 3: if len(Set_labels) != 3: raise ValueError("Set_labels needs to be the same length as the number of sets...") # Default figure dimensions... plt.figure() venn3(List_of_sets,Set_labels) plt.title(Main) plt.savefig(Out_File) else: raise ValueError("List_of_sets needs to be of length 2 or 3.")
def main(epistasis_file): dict_epistasis = {} #list of list of sequences, where each item represents a label with open(epistasis_file) as e: lines = e.readlines() for l in lines[1:]: #ignore header line tokens = l.split(',') #value consists of Starting Ratio, Ending Ratio, Epistasis, Ending Fitness, # of Mutations, list of InterSeqs, list of InterFits, list of InterRatios if dict_epistasis.get((tokens[2], tokens[0])) is None: dict_epistasis[(tokens[0],tokens[2])] = [ float(tokens[1]), float(tokens[3]), float(tokens[5]), tokens[4], len(tokens[6::3]), tokens[6::3], tokens[7::3], tokens[8::3] ] neg_epistasis = [0] * 4 no_epistasis = [0] * 4 pos_epistasis = [0] * 4 n_functional = [0] * 4 n_should_be_functional = [0] * 4 n_total = [0] * 4 for i in xrange(2,6): ind = i-2 neg_epistasis[ind] = sum([ 1 for key, value in dict_epistasis.items() if value[2] < -0.000005 and value[4] == i ]) no_epistasis[ind] = sum([ 1 for key, value in dict_epistasis.items() if abs(value[2]) < 0.000005 and value[4] == i ]) pos_epistasis[ind] = sum([ 1 for key, value in dict_epistasis.items() if value[2] > 0.000005 and value[4] == i ]) n_functional[ind] = sum([ 1 for key, value in dict_epistasis.items() if value[3] == "CLEAVED" and value[4] == i ]) n_should_be_functional[ind] = sum([ 1 for key, value in dict_epistasis.items() if all(v == "CLEAVED" for v in value[6]) and value[4] == i ]) n_total[ind] = float(sum([ 1 for key, value in dict_epistasis.items() if value[4] == i])) seq_func = set([ key[1] for key,val in dict_epistasis.items() if val[3] == "CLEAVED" ]) seq_pred_func = set([ key[1] for key,val in dict_epistasis.items() if all(v == "CLEAVED" for v in val[6]) ]) fig, axarr = pconv.create_ax(1, 1, shx=True, shy=True) fig2, axarr2 = pconv.create_ax(1, 1) artists = [] artists.extend(plot_epi(no_epistasis, n_total, axarr[0,0], "No", color="gray")) artists.extend(plot_epi(neg_epistasis, n_total, axarr[0,0], "Neg.", bottom=no_epistasis, color="white")) artists.extend(plot_epi(pos_epistasis, n_total, axarr[0,0], "Pos.", bottom=[no + neg for no, neg in zip(no_epistasis, neg_epistasis)], color="black")) n_func_frac = [ func/total for func, total in zip(n_functional, n_total) ] n_pred_frac = [ pred/total for pred, total in zip(n_should_be_functional, n_total) ] scatterplot.plot_series(axarr2[0,0], [(range(2,6),n_func_frac,"% Cleaved"),(range(2,6),n_pred_frac,"% Pred Cleaved")], "", "Number of Mutations", "Fraction of Total Cases", size=40, connect_dots=True, alpha=1.0) axarr2[0,0].set_ylim([0,4.0]) fig_venn, axarr_venn = pconv.create_ax(1, 1) venn2([seq_func, seq_pred_func], set_labels = ["Cleaved", "Pred Cleaved"], ax=axarr_venn[0,0]) lgd = axarr[0,0].legend(artists,["No","Neg.","Pos."], loc="center left", bbox_to_anchor=(1.05, 0.5), borderaxespad=0., prop={'size':9}, ncol=1, fancybox=True) pconv.save_fig(fig, epistasis_file, "plot", 3, 2.5, tight=False, size=9, extra_artists=lgd) pconv.save_fig(fig2, epistasis_file, "pred_v_cl", 5, 5, tight=True, size=10) pconv.save_fig(fig_venn, epistasis_file, "venn", 5, 5, tight=True, size=14)
def test_pr_28(): import matplotlib_venn as mv v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = None) assert v.get_label_by_id('010').get_text() == '2' v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('010').get_text() == 'Value: +0.020' v = mv.venn2((1, 2, 3), subset_label_formatter = None) assert v.get_label_by_id('01').get_text() == '2' v = mv.venn2((1, 2, 3), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('01').get_text() == 'Value: +0.020' v = mv.venn3_unweighted((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('010').get_text() == 'Value: +0.020' v = mv.venn2_unweighted((1, 2, 3), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('01').get_text() == 'Value: +0.020'
def draw(set1, set2, set3, label1, label2, label3): set1 = set(set1) set2 = set(set2) if label3: set3 = set(set3) v = venn3([set1,set2, set3], (label1, label2, label3)) plt.title('Venn diagram for hubs: ' + label1 + "," + label2 +"," + label3, fontsize=20) else: v = venn2([set1, set2], (label1, label2)) plt.title('Venn diagram for hubs:' + label1 + "," + label2, fontsize=20) # if v.get_label_by_id('110'): # plt.annotate(percent_of(set1,set2)+"% of " +label1 , xy=v.get_label_by_id('110').get_position() - np.array([0.15, 0.10])) # plt.annotate(percent_of(set2,set1)+"% of " +label2 , xy=v.get_label_by_id('110').get_position() - np.array([0.15, 0.15])) if v.get_patch_by_id('100'): v.get_patch_by_id('100').set_color("blue") if v.get_patch_by_id('010'): v.get_patch_by_id('010').set_color("red") if v.get_patch_by_id('110'): v.get_patch_by_id('110').set_color("purple") if label3 and v.get_patch_by_id('001'): v.get_patch_by_id('001').set_color("green") if v.get_patch_by_id('111'): v.get_patch_by_id('111').set_color("black") gca().set_axis_bgcolor('white') gca().set_axis_on() plt.show()
def venn_diagram(questions, output_dir): em_model1_ids = [x for x in questions if questions[x].em[0] == 1] em_model2_ids = [x for x in questions if questions[x].em[1] == 1] model_names = questions[list(questions.keys())[0]].model_names print('\nVenn diagram') correct_model1 = em_model1_ids correct_model2 = em_model2_ids correct_model1_and_model2 = list(set(em_model1_ids).intersection(set(em_model2_ids))) correct_model1_and_not_model2 = list(set(em_model1_ids) - set(em_model2_ids)) correct_model2_and_not_model1 = list(set(em_model2_ids) - set(em_model1_ids)) print('{0} answers correctly = {1}'.format(model_names[0], len(correct_model1))) print('{0} answers correctly = {1}'.format(model_names[1], len(correct_model2))) print('Both answer correctly = {1}'.format(model_names[0], len(correct_model1_and_model2))) print('{0} correct & {1} incorrect = {2}'.format(model_names[0], model_names[1], len(correct_model1_and_not_model2))) print('{0} correct & {1} incorrect = {2}'.format(model_names[1], model_names[0], len(correct_model2_and_not_model1))) plt.clf() venn_diagram_plot = venn2( subsets=(len(correct_model1_and_not_model2), len(correct_model2_and_not_model1), len(correct_model1_and_model2)), set_labels=('{0} correct'.format(model_names[0]), '{0} correct'.format(model_names[1]), 'Both correct'), set_colors=('r', 'b'), alpha=0.3, normalize_to=1 ) plt.savefig(os.path.join(output_dir, 'venn_diagram.png')) plt.close() return correct_model1, correct_model2, correct_model1_and_model2, correct_model1_and_not_model2, correct_model2_and_not_model1
def vdoc_plot(overlap): plt.figure(figsize=(13,13), facecolor="white") #syntax: set1, set2, set1x2... subset_tuple=(5,2,overlap) v = venn2(subsets=subset_tuple, set_labels = ('A', 'B', 'C')) v.get_patch_by_id('100').set_alpha(0.1) v.get_patch_by_id('100').set_color('gray') if overlap != 0: v.get_patch_by_id('110').set_color('green') v.get_patch_by_id('110').set_alpha(0.7) v.get_label_by_id('110').set_text('Consciousnes') v.get_patch_by_id('010').set_alpha(0.4) v.get_label_by_id('100').set_text('Set of all qualia') v.get_label_by_id('010').set_text('Set of all concurrent\n mental processes') v.get_label_by_id('A').set_text('') v.get_label_by_id('B').set_text('') c = venn2_circles(subsets=subset_tuple) c[0].set_ls('dotted') c[1].set_ls('dashed') plt.title("Venn Diagram of Consciousnes") from matplotlib.transforms import Affine2D ax = plt.gca() center = [np.mean(ax.get_xlim()), np.mean(ax.get_ylim())] t = Affine2D().rotate_deg_around(center[0], center[1], 90) + ax.transData for v in ax.patches + ax.texts: v.set_transform(t) yl = ax.get_ylim() plt.ylim(yl[0]-0.2, yl[1]+0.2) plt.show()
def main(): usage = 'usage: %prog [options] <peaks1_bed> <peaks2_bed> <out_pdf>' parser = OptionParser(usage) parser.add_option('--l1', dest='label1', default='peaks1', help='Label for peak set 1') parser.add_option('--l2', dest='label2', default='peaks2', help='Label for peak set 2') (options,args) = parser.parse_args() if len(args) != 3: parser.error('Must provide two peaks BED files and output PDF') else: peaks1_bed = args[0] peaks2_bed = args[1] out_pdf = args[2] # count individual peaks1_count = count_peaks(peaks1_bed) peaks2_count = count_peaks(peaks2_bed) # count overlap copeaks_count = 0 p = subprocess.Popen('intersectBed -u -a %s -b %s' % (peaks1_bed, peaks2_bed), stdout=subprocess.PIPE, shell=True) for line in p.stdout: copeaks_count += 1 p.communicate() plt.figure() venn_diag = venn2(subsets=(peaks1_count-copeaks_count, peaks2_count-copeaks_count, copeaks_count), set_labels=[options.label1, options.label2], set_colors=['#e41a1c', '#A1A838']) plt.savefig(out_pdf) plt.close()
def main(): # args parser = argparse.ArgumentParser( description = (" Generate a venn diagram from Gene Lists "), ) parser.add_argument("--a", help="first list file", required=True) parser.add_argument("--b", help="second list file", required=True) parser.add_argument("--labelA", help="first list label", default="A") parser.add_argument("--labelB", help="second list label", default="B") parser.add_argument("--c", help="third optional list file") parser.add_argument("--interest", help=" interested genes to be plotted as points") parser.add_argument("--title", help=" Text for plot title", default="A vs B Venn Diagram Plot") args = parser.parse_args() if(args.c): log.warn("Three circle not supported yet") raise SystemExit firstGenes = load_gene_list(args.a) secondGenes = load_gene_list(args.b) plt.figure(figsize=(10,10)) v = venn2([firstGenes, secondGenes],(args.labelA,args.labelB)) plt.title(args.title) if(args.interest): interest_genes = load_gene_list(args.interest) plot_point_on_circle(v, 0, interest_genes, plt) ts = str(time.time()).replace(".","") plt.savefig(args.labelA+"-"+args.labelB+"-"+ts+".png")
def plot_venn_ligands(output_path, qu2, mobp2, intersection_mobp): """ Plots venn diagram of mOBP vs. Qaio et al. ligands tested in their binding-assays Args: output_path = "../../data/obp_output/venn_mobp_vs_qaio.png" qu2 = see: @lower mobp2 = see: @lower intersection = see: Usage: plot_venn_ligands("../../data/obp_output/venn_mobp_vs_qaio.png", qu2, mobp2, intersection) """ plt.figure(figsize=(4,4)) #v = venn3(subsets=(1, 1, 1, 1, 1, 1, 1), set_labels = ('A', 'B', 'C')) v = venn2(subsets = (len(qu2), len(mobp2), len(intersection_mobp))) v.get_label_by_id('A').set_text('Exp') v.get_label_by_id('B').set_text('MD') plt.savefig(output_path) plt.close()
def plot_two_set_venn(set1, set2, mycolors=('r', 'g'), mylabels=None, title='Plot title'): ''' Plot two circle venn diagram ''' # Set matplotlib font size globally font = {'family': 'sans', 'sans-serif': 'Arial', 'weight': 'bold', 'size': 25} matplotlib.rc('font', **font) Ab = len(set1 - set2) aB = len(set2 - set1) AB = len(set1 & set2) fig = plt.figure() # Fill whitespace in the margins by adjusting subplot fig.subplots_adjust(bottom=0.10) fig.subplots_adjust(left=0.12) fig.subplots_adjust(right=0.90) fig.subplots_adjust(top=0.90) ax = fig.add_subplot(111) p = venn2(subsets=(Ab, aB, AB), set_colors=mycolors, set_labels=mylabels) plt.title(title) plt.show()
def venn2(subsets, title, unit_title, filename, set_labels=None, normalize=1.0, annotation=None): plt.figure() v = venn.venn2(subsets=subsets, set_labels=set_labels) for i in range(len(venn2_keys)): label_id = venn2_keys[i] text = v.get_label_by_id(label_id) text.set_position(text.get_position() + np.array([0, 0.02])) # TEMPORALLY COUPLED WITH CREATION OF DIAGRAM subtitle = unit_title if text.get_text() != '1': subtitle += 's' text.set_text(text.get_text() + '\n' + subtitle) text.set_size(text.get_size() - 2) if annotation is not None: for a in annotation: text = v.get_label_by_id(a) xy = text.get_position() - np.array([0, 0.085]) plt.annotate(annotation[a], xy=xy, xytext=xy, ha='center', textcoords='offset points', color='r', weight='bold') for label in v.subset_labels: label.set_fontname('serif') if title is not None: plt.title(title) plt.savefig(filename) plt.close()
def draw_venn(title, names, numbers, out): if len(numbers) == 7: if numbers[0] + numbers[2] + numbers[4] + numbers[6] == 0: numbers = [ numbers[1], numbers[3], numbers[5] ]; names = [ names[1], names[2] ]; elif numbers[1] + numbers[2] + numbers[5] + numbers[6] == 0: numbers = [ numbers[0], numbers[3], numbers[4] ]; names = [ names[0], names[2] ]; elif numbers[3] + numbers[4] + numbers[5] + numbers[6] == 0: numbers = [ numbers[0], numbers[1], numbers[2] ]; names = [ names[0], names[1] ]; #fi #fi plt.cla(); plt.figure(figsize=(10,10)) if len(numbers) == 7: plt.cla(); plt.figure(figsize=(10,10)) v = venn3(subsets=numbers, set_labels = names) c = venn3_circles(subsets=numbers, linestyle='dashed') else: v = venn2(subsets = numbers, set_labels = names); c = venn2_circles(subsets = numbers, linestyle='dashed'); #fi plt.title(title) plt.savefig(out);
def venn(df1, df2, df3=None, labels=None, ix1=None, ix2=None, ix3=None, return_intersection=False): try: import matplotlib_venn as mplv except: ImportError("To plot venn diagrams, install matplotlib-venn package: pip install matplotlib-venn") if labels is None: labels = ["A", "B", "C"] s1 = _process_ix(df1.index, ix1) s2 = _process_ix(df2.index, ix2) if df3 is not None: s3 = _process_ix(df3.index, ix3) if df3 is not None: vn = mplv.venn3([s1,s2,s3], set_labels=labels) intersection = s1 & s2 & s3 else: vn = mplv.venn2([s1,s2], set_labels=labels) intersection = s1 & s2 ax = plt.gca() if return_intersection: return ax, intersection else: return ax
def overlap_bedpe(): # Values a = 35778 b = 10959 a_and_b = 10949 # Plot plt.close('all') fig, ax = plt.subplots(figsize = (8, 8)) venn2(subsets = (a - a_and_b, b - a_and_b, a_and_b), set_labels = ('All', '<52x')) outfile = '%s/Figures/venn.diagram.overlap.exclude.include.regions.high.coverage.png' % DIR savefig(outfile, bbox_inches = 'tight', dpi = 400) # Print summary statistics print "Not excluding regions with coverage >52x identified %f times more sites" % ((a*1.)/b) print "Virtually all regions in the more-restrictive call set are included in the more flexible one"
def process_multiple(names): writer = pd.ExcelWriter('/Users/agatorano/Code/metascape/metascape.org/media/%s'%names[-1]) files_=[] #print(names[:len(names)-1]) for n in names[:len(names)-1]: files_.append(pd.ExcelFile('/Users/agatorano/Code/metascape/metascape.org/media/%s'%n)) for f,i in zip(files_,range(len(files_))): df = f.parse(f.sheet_names[0]) df.to_excel(writer,'Sheet%s'%i) writer.save() xls = xlrd.open_workbook(r'/Users/agatorano/Code/metascape/metascape.org/media/%s'%names[-1], on_demand=True) name = names[-1] data = [] genes = [] for sheet in xls.sheet_names(): list_ = get_gid(name,sheet) list_ = add_annotation(list_) genes.append(set([x[0] for x in list_])) #print(genes) data.append(add_cols(list_,name,sheet)) writer = pd.ExcelWriter('/Users/agatorano/Code/metascape/metascape.org/media/'+name) for i in range(len(data)): save_excel(data[i],writer,i+1) plt.figure(figsize=(7,7)) now = datetime.datetime.now() path = 'img/'+now.strftime("%Y/%m/%d/venn%H_%M_%S.png") img = '/Users/agatorano/Code/metascape/metascape.org/media/'+path output_directory = os.path.dirname(img) if not os.path.exists(output_directory): os.makedirs(output_directory) if(len(genes)==3): venn3(genes, ('File1', 'File2', 'File3')) plt.savefig(img) elif(len(genes)==2): venn2(genes, ('File1', 'File2')) plt.savefig(img) writer.save() return data,path
def get_plot(result, colors, outdir): subsets = (result[2]-result[4], result[3]-result[4], result[4]); labels = result[:2]; title = "_VS_".join(labels); v = venn2(subsets=subsets, set_labels=labels, set_colors=colors) #v.get_patch_by_id('10').set_color(colors[0]) #v.get_patch_by_id('01').set_color(colors[1]) plt.savefig(os.path.join(outdir, title)) plt.close()
def makeavenndiagram(folder): apracount = 0 hspxcount = 0 bothcount = 0 ALL_FIRST_DATA = C1_FD + C2_FD + C3_FD ALL_SECOND_DATA = C1_SD + C2_SD + C3_SD for i in ALL_SECOND_DATA: if i[11].startswith('HspX') and i[10].startswith('AprA'): bothcount += 1 elif i[11].startswith('HspX'): hspxcount += 1 print "...Making a Venn Diagram of hits..." venn2(subsets=(166, hspxcount, bothcount), set_labels = ('AprA HTS-specific hits', 'HspX HTS-specific hits', 'Common hits')) plt.title('AprA and HspX HTS hit comparisons') outf = open(folder + '/' + 'Venn Diagram.png', 'w') plt.savefig(folder + "/" + 'Venn Diagram.png', dpi=300) outf.close() plt.clf() return
def make_venn(sizes,title,minp): """Write venn diagram""" from matplotlib import pyplot as plt from matplotlib_venn import venn2 try: elements = title.split('.')[0].split('_') title = 'DMPs with p < %.2E between %s and %s'%(minp,elements[1],elements[3]) c = venn2(subsets = sizes, set_labels = ('WGBS','epiGBS','')) except IndexError: elements = title.split('.')[0].split('_') title = 'DMPs with p < %.2E in %s'%(minp,title) c = venn2(subsets = sizes, set_labels = ('WGBS gen30', 'WGBS gen31')) # c.set_lw(1.0) # c.set_ls('dotted') plt.title(title) # # plt.annotate('Unknown set', xy=v.get_label_by_id('100').get_position() - np.array([0, 0.05]), xytext=(-70,-70), # ha='center', textcoords='offset points', bbox=dict(boxstyle='round,pad=0.5', fc='gray', alpha=0.1), # arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0.5',color='gray')) plt.show()
def retFig(key, key2): ''' This function will find all the common proteins between the two BioID datasets and return a figure containing both a venn diagram of the common proteins, but also a short text segment indicating the number of common proteins and the names of the common proteins. ''' # Identification of the common proteins between the two datasets common = set(dict1[key]) & set(dict2[key2]) common_sorted = list(common) common_sorted.sort() figtxt = " " figtxt2 = figtxt.join(common_sorted) # Generation of the Venn diagram c = plt.figure(figsize=(10,10)) plt.title("Comparing " + key + " vs " + key2 + " BioID Datasets:", fontname="Helvetica", fontsize=18, fontweight='bold') txt = "There are a total of " + str(len(common)) + " proteins common between the two datasets.\n Common: " + TextWrap(figtxt2, 70) plt.text(0.05,0.07,txt, transform=c.transFigure, size=10) venn2([set(dict1[key]), set(dict2[key2])], set_labels = (key, key2)) plt.close() return c
def make_ven2_plot(benchmark_set, compare_set, benchmark, file_to_compare): if venn_bool == True: plt.figure(figsize=(4, 4)) v = venn2([benchmark_set, compare_set], set_labels=(benchmark[0:-4], file_to_compare[0:-4])) plt.title(benchmark[0:-4] + '_' + file_to_compare[0:-4] + '_venn') #+str(filter_y)) plt.savefig(benchmark[0:-4] + '_' + file_to_compare[0:-4] + '_venn') #+str(filter_y)) plt.tight_layout() plt.show() elif venn_bool == False: plt.show()
def test_pr_28(): import matplotlib_venn as mv v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter=None) assert v.get_label_by_id('010').get_text() == '2' v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter=lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('010').get_text() == 'Value: +0.020' v = mv.venn2((1, 2, 3), subset_label_formatter=None) assert v.get_label_by_id('01').get_text() == '2' v = mv.venn2((1, 2, 3), subset_label_formatter=lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('01').get_text() == 'Value: +0.020' v = mv.venn3_unweighted((1, 2, 3, 4, 5, 6, 7), subset_label_formatter=lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('010').get_text() == 'Value: +0.020' v = mv.venn2_unweighted((1, 2, 3), subset_label_formatter=lambda x: 'Value: %+0.3f' % (x / 100.0)) assert v.get_label_by_id('01').get_text() == 'Value: +0.020'
def plotVennDiag(geneset1, geneset2, labels, filename): v = venn2([set(geneset1),set(geneset2)],set_labels=labels) v.get_patch_by_id('10').set_color('#a6cee3') # human body map color v.get_patch_by_id('01').set_color('#b2df8a') # gtex color v.get_patch_by_id('11').set_color('grey') # overlap color - make this prettier #venn2(subsets=(64,45,104),set_labels=('HumanBodyMap','GTEx')) #v.get_patch_by_id('10').set_color('#fb8072') # chang et al color #v.get_patch_by_id('01').set_color('#8dd3c7') # eisenberg and levanon color #v.get_patch_by_id('11').set_color('grey') plt.savefig(filename,bbox_inches='tight') plt.close() print('wrote venn diagram figure to',filename)
def plot_sumfiles(fig, fig_venn, only_data, not_data): ax = fig.add_subplot(nrows,ncols,1) if len(sorted_hm)<4: ax_venn = fig_venn.add_subplot(nrows,ncols,1) xdata=['everyone', *['only '+(x if x!=pr else 'songexplorer') for x in sorted_hm]] ydata=only_data if len(sorted_hm)>2: xdata.extend(['not '+(x if x!=pr else 'songexplorer') for x in sorted_hm]) ydata.extend(not_data) if len(sorted_hm)==2: idx = [1,2,0] venn2(subsets=[ydata[x] for x in idx], set_labels=[x if x!=pr else 'songexplorer' for x in sorted_hm], ax=ax_venn) elif len(sorted_hm)==3: idx = [1,2,6,3,5,4,0] venn3(subsets=[ydata[x] for x in idx], set_labels=[x if x!=pr else 'songexplorer' for x in sorted_hm], ax=ax_venn) ax_venn.set_title('all files', fontsize=8) ax.bar(xdata, ydata, color='k') ax.set_xticklabels(xdata, rotation=40, ha='right') ax.set_title('all files', fontsize=8)
def plot_file(fig, fig_venn, only_data, not_data): ax = fig.add_subplot(nrows,ncols,iplot) if len(sorted_hm)<4: ax_venn = fig_venn.add_subplot(nrows,ncols,iplot) xdata=['everyone', *['only '+(x if x!=pr else 'songexplorer') for x in sorted_hm]] ydata=only_data if len(sorted_hm)>2: xdata.extend(['not '+(x if x!=pr else 'songexplorer') for x in sorted_hm]) ydata.extend(not_data) if len(sorted_hm)==2: idx = [1,2,0] # Ab, aB, AB venn2(subsets=[ydata[x] for x in idx], set_labels=[x if x!=pr else 'songexplorer' for x in sorted_hm], ax=ax_venn) elif len(sorted_hm)==3: idx = [1,2,6,3,5,4,0] # Abc, aBc, ABc, abC, AbC, aBC, ABC venn3(subsets=[ydata[x] for x in idx], set_labels=[x if x!=pr else 'songexplorer' for x in sorted_hm], ax=ax_venn) ax_venn.set_title(os.path.basename(csvbase), fontsize=8) ax.bar(xdata, ydata, color='k') ax.set_title(os.path.basename(csvbase), fontsize=8) ax.set_xticklabels(xdata, rotation=40, ha='right')
def make_venn_plot(A=None, B=None, AB=None): ven = venn2(subsets=(A, B, AB)) ven.get_patch_by_id('10').set_color('white') ven.get_patch_by_id('10').set_edgecolor('black') ven.get_patch_by_id('01').set_color('white') ven.get_patch_by_id('01').set_edgecolor('black') ven.get_patch_by_id('11').set_color('skyblue') ven.get_patch_by_id('11').set_edgecolor('black') # 背景色を変更する pyplot.gca().set_axis_on() pyplot.gca().set_facecolor('white') return pyplot
def getVenn2plot(sets, labels, title, caption, path_to_img): fig = plt.figure(figsize=(7, 7)) plt.xlim(0, 3) plt.ylim(0, 3) fig.text(0.15, .05, caption, fontsize=12) v = venn2(subsets=sets, set_labels=labels) v.get_patch_by_id('11').set_color('purple') v.get_patch_by_id('01').set_color('blue') plt.title(title) plt.savefig(path_to_img, bbox_inches='tight', transparent=False) plt.show()
def main(args): args = parse_args(args) sort_type = "" #print(file_type_checker(args.input)) seq1 = nuc_to_pep(parse_fasta(args.input1)[0]) seq2 = nuc_to_pep(parse_fasta(args.input2)[0]) # print(seq[0]) list_pep1 = list(seq1[0]) list_pep2 = list(seq2[0]) A = set(list_pep1) B = set(list_pep2) venn2([A, B], set_labels=(args.A_label, args.B_label)) plt.savefig(str(args.output + '.png')) AB = A.intersection(B) print(AB) NotAB = A.symmetric_difference(B) NotABlist = list(NotAB) #print(list_pep1) sys.exit(0)
def create_venn2(df, comparison_pair): """ Create a 2 circle Venn Diagram Parameters ---------- df: DataFrame df contains all option ratings for each feature comparison_pair: list Two strings. Determines which options to compare. """ list_of_dicts = df[comparison_pair].T.to_dict('records') list_of_strings = [] for key, value in list_of_dicts[0].items(): list_of_strings.append(str(key) + ':' + str(value)) set_A = set(list_of_strings) try: list_of_strings = [] for key, value in list_of_dicts[1].items(): list_of_strings.append(str(key) + ':' + str(value)) set_B = set(list_of_strings) except: set_B = set_A list_of_sets = [set_A, set_B] plt.figure(figsize=(10, 10)) v = venn2(list_of_sets, set_labels=comparison_pair) alpha_strings = list(set_B.difference(set_A)) v.get_label_by_id('01').set_text('\n'.join(alpha_strings)) beta_strings = list(set_A.difference(set_B)) v.get_label_by_id('10').set_text('\n'.join(beta_strings)) intersection_strings = list(set_B.intersection(set_A)) try: v.get_label_by_id('11').set_text('\n'.join(intersection_strings)) except: pass #v.get_label_by_id('11').set_text('no overlap') plt.title('Venn Diagram') plt.show() return
def performance_visual(estimated, test_y): ''' Args: estimated labels/probabilities, test_y Returns: prints a performance venn diagram ''' tp, tn, fp, fn = confusion_matrix(classify(estimated), test_y) fn_circle = max(fn, fn - tp) fp_cicle = max(fp, fp - tp) diagram = venn2(subsets=(fn_circle, fp_cicle, tp), set_labels=('', '', ''), set_colors=('#5791c6', '#d8b66e')) diagram.get_label_by_id('10').set_text('FN') diagram.get_label_by_id('11').set_text('TP') diagram.get_label_by_id('01').set_text('FP')
def plot_venn_two( sizes: List[int], labels: List[str], figpath: str = 'venn_two.pdf', title: str = '', **kwargs, ) -> None: """Plot a single Venn Diagram with two terms. Args: sizes (List[int]): List of ints of length 3. First two elements correspond to the labels, third one to the intersection. labels ([type]): List of str of length 2, containing names of circles. figpath (str): Name under which figure is saved. Defaults to 'venn_two.pdf', i.e. it is inferred from labels. title (str): Title of the plot. Defaults to '', i.e. it is inferred from labels. **kwargs: Additional keyword arguments for venn2. """ assert len(sizes) == 3, 'Incorrect type/length of sizes' assert len(labels) == 2, 'Incorrect type/length of labels' title = get_name(labels) if title == '' else title figname = title.lower().replace(' vs. ', '_') if figpath == '' else figpath venn2(subsets=sizes, set_labels=labels, alpha=0.6, **kwargs) venn2_circles(subsets=sizes, linestyle='solid', linewidth=0.6, color='grey', **kwargs) if kwargs.get('ax', False): print(kwargs, type(kwargs)) print(kwargs['ax']) kwargs['ax'].set_title(title, fontdict={'fontweight': 'bold'}, size=15) else: plt.title(title, fontdict={'fontweight': 'bold'}, size=15) plt.savefig(f'{figname}.pdf')
def comparePredictors(pred1, pred2, names=None, allele='HLA-DRB1*0101'): """Compare 2 predictors with various metrics and plot output. Input is a dataframe with sequence records""" from matplotlib_venn import venn2 import pylab as plt f = plt.figure(figsize=(10,10)) ax = f.add_subplot(221) binders1 = pred1.getBinders('cutoff') binders2 = pred2.getBinders('cutoff') names = dict(list(pred1.data.groupby('name'))).keys() #get merged binders m,x,y = getMatchingPredictions(pred1, pred2, method='cutoff') ax.plot(x,y,'o',ms=3,alpha=0.5) ax.set_xlabel(pred1.name) ax.set_ylabel(pred2.name) ind=np.arange(len(names)) b1 = list(binders1.peptide) b2 = list(binders2.peptide) #print list(set(names1) & set(names2)) groups1 = dict(list(binders1.groupby('name'))) groups2 = dict(list(binders2.groupby('name'))) prots1 = groups1.keys() prots2 = groups2.keys() ax1 = f.add_subplot(222) ax1.set_title('proteins overlap') venn2([set(prots1), set(prots2)], set_labels = (pred1.name,pred2.name)) ax3=f.add_subplot(212) venn2([set(b1), set(b2)], set_labels = (pred1.name,pred2.name)) f.suptitle('%s vs. %s' %(pred1.name,pred2.name),fontsize=16) plt.show() return
def myvenn2(data, parameters, output): #data subset_Ab = 0 subset_aB = 0 subset_AB = 0 colorA = '#E8B601' colorB = '#7AC3B1' colorAB = '#6B7E23' with open(data) as f: f_csv = csv.reader(f) headers = next(f_csv) for row in f_csv: set_a = float(row[1]) set_b = float(row[2]) if set_a == 0 and set_b != 0: subset_aB += 1 elif set_a != 0 and set_b == 0: subset_Ab += 1 elif set_a != 0 and set_b != 0: subset_AB += 1 fparam = '' if 'figsize' in parameters.keys(): fparam = fparam + ',figsize=' + parameters['figsize'] exec("fig = plt.figure(facecolor='w'" + fparam + ")") if 'title' in parameters.keys(): plt.title(parameters['title']) if 'colorA' in parameters.keys(): colorA = parameters['colorA'] if 'colorB' in parameters.keys(): colorB = parameters['colorB'] if 'colorAB' in parameters.keys(): colorAB = parameters['colorAB'] v = matplotlib_venn.venn2(subsets=(subset_Ab, subset_aB, subset_AB), set_labels=headers[1:]) v.get_patch_by_id('10').set_color(colorA) v.get_patch_by_id('01').set_color(colorB) v.get_patch_by_id('11').set_color(colorAB) v.get_patch_by_id('10').set_alpha(0.6) v.get_patch_by_id('01').set_alpha(0.6) v.get_patch_by_id('11').set_alpha(0.7) savefig(output, format='svg')
def __drawVenn(self): self.ui.widgetVenn.figure.clear() # Clear figure pro_list = [ "Capacity", "Conductivity", "Coulombic Efficiency", "Energy", "Voltage" ] data_dic = {} color_dic = dict(zip(pro_list, ['C0', 'C2', 'C6', 'C8', 'C9'])) for i in pro_list: query = QSqlQuery( db=self.DB, query= "SELECT COUNT(DISTINCT NAME) FROM BATTERY WHERE PROPERTY LIKE '%s' AND NUM_RECORDS LIKE 'NONE'" % i) while query.next(): num_value = query.value(0) data_dic[i] = num_value for i, combo in enumerate(itertools.combinations(pro_list, 2)): query = QSqlQuery( db=self.DB, query= "SELECT COUNT() FROM (SELECT DISTINCT NAME AS PRO1 FROM BATTERY WHERE PROPERTY LIKE '%s'AND NUM_RECORDS LIKE 'NONE') INNER JOIN (SELECT DISTINCT NAME AS PRO2 FROM BATTERY WHERE PROPERTY LIKE '%s'AND NUM_RECORDS LIKE 'NONE') ON PRO1 = PRO2" % combo) while query.next(): num = query.value(0) x3 = num x1 = data_dic[combo[0]] - x3 x2 = data_dic[combo[1]] - x3 hf = self.ui.widgetVenn.figure # hf.set_figheight(30) # hf.set_figwidth(30) # print(dir(hf)) hf.set_size_inches((10, 10)) ax1 = hf.add_subplot(5, 2, i + 1) v = venn2(subsets=(x1, x2, x3), set_labels=(combo[0], combo[1]), ax=ax1) v.get_patch_by_id('A').set_alpha(1) v.get_patch_by_id('A').set_color(color_dic[combo[0]]) v.get_patch_by_id('B').set_color(color_dic[combo[1]]) ax1.figure.canvas.draw()
def venn2_plot(set1, set2, label1, label2, size_label, size_num): v = venn2([set1, set2], ([label1, label2])) c = venn2_circles(subsets=[set1, set2], linestyle='dashed', linewidth=2, color="k") v.get_patch_by_id('11').set_color('w') v.get_patch_by_id('11').set_alpha(1) v.get_label_by_id('11').set_color('k') v.get_label_by_id('11').set_fontweight('bold') for text in v.set_labels: text.set_fontsize(size_label) text.set_fontweight('bold') for text in v.subset_labels: text.set_fontsize(size_num)
def make_venn2(set_list, title, output_file): """ Draw multiple stacked 2-circle venn diagrams with all pairs of sets. """ num_sets = len(set_list) if num_sets == 2: c = venn2(set_list, set_labels=base_vcf_file_name_list) colorize_venn2(c) plt.title(title) else: fig, axes = plt.subplots(3) plt_idx = 0 for pair in itertools.combinations(range(num_sets), 2): sets = [set_list[k] for k in range(num_sets) if k in pair] names = [ base_vcf_file_name_list[k] for k in range(num_sets) if k in pair ] c = venn2(sets, set_labels=names, ax=axes[plt_idx]) colorize_venn2(c) plt_idx += 1 axes[0].set_title(title) plt.savefig(output_file) plt.close()
def main(): nicks = [] with open("constraint_primate_subs_1000_scc_accelerated_chr8.sorted.merged.bed", 'rb+') as nick: #with open("1000_scc_accelerated_chr8p.bed", 'rb+') as nick: #with open("1000_scc_accelerated_chr8.bed", 'rb+') as nick: #with open("chromosome8_accelerated_regions.bed", 'rb+') as nick: for line in nick: line = line.strip().split() start, end = int(line[1]), int(line[2]) pos = range(start,end) nicks.extend(pos) katies = [] with open("hg19_accelerated_regions_chr8p.bed", 'rb+') as katie: for line in katie: line = line.strip().split() start, end = int(line[1]), int(line[2]) pos = range(start,end) katies.extend(pos) nicks = set(nicks) katies = set(katies) nick_and_katie = nicks.intersection(katies) nicks = nicks - nick_and_katie katies = katies - nick_and_katie s = ( len(nicks), len(katies), len(nick_and_katie) ) print (s) venn2(subsets=s, set_labels=('Nick', 'Katie')) plt.title("Genomic Positions in Nicks-HARs intersected with Katies-HARs") plt.show()
def plot_group(data): ''' Plot the ven diagram for manual selected groups :param data: is a dict where k is name and v is the snp position :return: ''' all_names = [x for x in data.keys()] for i in range(0, len(all_names), 3): group_for_ven = all_names[i:i+3] if len(group_for_ven) == 3: list1 = data[group_for_ven[0]] list2 = data[group_for_ven[1]] list3 = data[group_for_ven[2]] venn3([set(list1), set(list2), set(list3)], set_labels=(group_for_ven[0], group_for_ven[1], group_for_ven[2])) plt.show() elif len(group_for_ven) ==2: list1 = data[group_for_ven[0]] list2 = data[group_for_ven[1]] venn2([set(list1), set(list2)], set_labels=(group_for_ven[0], group_for_ven[1])) plt.show()
def plot_venn(dflist): """ create ven diagrams for total number of transcripts with detectable readthorugh """ ### Untreated figout_untr = "%s/figures/Fig3S1B_untr.pdf" % (rootDir) untr_1 = dflist[0] untr_2 = dflist[1] print "plotting Venn Diagrams of %s vs %s" % (samplelist[0], samplelist[1]) df_untr = untr_1.merge(untr_2, on="#transcript") u1v = df_untr.loc[df_untr['RRTS_x'] > 0] u2v = df_untr.loc[df_untr['RRTS_y'] > 0] fig, ax = plt.subplots(figsize=(4, 4)) v = venn2([set(u1v['#transcript']), set(u2v['#transcript'])]) plt.savefig(figout_untr, format='pdf', bbox_inches="tight") ### G418 figout_g418 = "%s/figures/Fig3S1B_g418.pdf" % (rootDir) g418_1 = dflist[2] g418_2 = dflist[3] print "plotting Venn Diagrams of %s vs %s" % (samplelist[2], samplelist[3]) df_g418 = g418_1.merge(g418_2, on="#transcript") g1v = df_g418.loc[df_g418['RRTS_x'] > 0] g2v = df_g418.loc[df_g418['RRTS_y'] > 0] fig, ax = plt.subplots(figsize=(4, 4)) v = venn2([set(g1v['#transcript']), set(g2v['#transcript'])]) plt.savefig(figout_g418, format='pdf', bbox_inches="tight")
def venn_diagram(*args, **kwargs): ax = kwargs.pop('ax', plt.gca()) n = len(args) venn = None if n not in {2, 3, 4}: raise NotImplementedError( "At present, we only support 2, 3 and 4 way Venn diagrams") venn_sets, venn_counts = setops.venn_from_arrays(*args, **kwargs) if n == 2: venn = venn2(subsets=venn_counts, ax=ax, **kwargs) elif n == 3: venn = venn3(subsets=venn_counts, ax=ax, **kwargs) elif n == 4: venn = venn4(venn_counts, ax=ax, **kwargs) return venn, venn_sets, venn_counts
def test(cls): print("Starting test...") # Label.print_type_tree(LabelType.CHARGES_TYPE, Charges.SEXUAL_ASSAULT_OF_MINOR) a = set() for i in range(0, 6): a.add(i) b = set() for i in range(4, 10): b.add(i) a_dif = a.difference(b) b_dif = b.difference(a) print(a) print(b) print(a_dif) print(b_dif) venn2(subsets=(a, b)) plt.show() print("Ending test...")
def main(curated_sleuth_1: "Path to a curated Sleuth file", set1_name: "Name of the set1", curated_sleuth_2: "Path to a second curated Sleuth file", set2_name: "Name of the second set", output: "Output prefix" = "Venn", q_threashold: "Threashold above which qval is significant" = 0.05, fc_threashold: "Threashold above which FC is significant" = 1, show: "Show graph instead of saving it to a file" = False): """ This script builds a venn diagramm among two curated Sleuth files. Example of curated Sleuth file: GeneIdentifier TranscriptIdentifier Q-Value stat_change FC(Test_D1/Ref_D0) RNA5S1 ENSG00000199352.1 1.6520570e-06 -2.987555871 -101.72037083 SPP1 ENSG00000118785.13 9.9955378e-05 1.9717884427 7.79610821734 EGR1 ENSG00000120738.7 9.9955325-05 2.3872604942 15.8468125203 CCR2 ENSG00000121807.5 9.9955325e-05 -2.683142349 -8.1815541162 """ sl1 = read_curated(curated_sleuth_1, q_threashold, fc_threashold) sl2 = read_curated(curated_sleuth_2, q_threashold, fc_threashold) venn_dict = { set1_name: sl1 - sl2, "%s & %s" % (set1_name, set2_name): sl1 & sl2, set2_name: sl2 - sl1 } if len(venn_dict[set1_name]) == 0: set1_label = "" g = venn2((len(sl1 - sl2), len(sl1 & sl2), len(sl2 - sl1)), set_labels=(set1_name if len(venn_dict[set1_name]) > 0 else "", set2_name if len(venn_dict[set2_name]) > 0 else "")) g.get_patch_by_id("01").set_color("blue") g.get_patch_by_id("11").set_color("#DCDCDC") g.get_patch_by_id("10").set_color("red") if show: plt.show() else: plt.savefig("%s.png" % output, bbox_inches='tight') with open("%s.txt" % output, 'w') as outfile: outfile.write("%s: %s\n%s: %s\n%s: %s" % (set1_name, ", ".join(list(sl1 - sl2)), "%s & %s" % (set1_name, set2_name), ", ".join(list(sl1 & sl2)), set2_name, ", ".join(list(sl2 - sl1))))
def SimpleMatplotVenn(names,data,outputDir=False,display=True): """ Uses http://pypi.python.org/pypi/matplotlib-venn (code combined into one module) to export simple or complex, overlapp weighted venn diagrams as an alternative to the default methods in this module """ import numpy as np pylab.figure(figsize=(11,7),facecolor='w') vd = get_labels(data, fill="number") set_labels=[] for i in names: set_labels.append(string.replace(i,'.txt','')) if len(set_labels)==2: from matplotlib_venn import venn2, venn2_circles set_colors = ('r', 'g') subsets = (vd['10'], vd['01'], vd['11']) v = venn2(subsets=subsets, set_labels = set_labels, set_colors=set_colors) c = venn2_circles(subsets=subsets, alpha=0.5, linewidth=1.5, linestyle='dashed') if len(set_labels)==3: from matplotlib_venn import venn3, venn3_circles set_colors = ('r', 'g', 'b') subsets = (vd['100'], vd['010'], vd['110'], vd['001'], vd['101'], vd['011'], vd['111']) v = venn3(subsets=subsets, set_labels = set_labels,set_colors=set_colors) c = venn3_circles(subsets=subsets, alpha=0.5, linewidth=1.5, linestyle='dashed') pylab.title("Overlap Weighted Venn Diagram",fontsize=24) try: if outputDir!=False: filename = outputDir+'/%s.pdf' % venn_export_weighted pylab.savefig(filename) filename = outputDir+'/%s.png' % venn_export_weighted pylab.savefig(filename, dpi=100) #,dpi=200 except Exception: print 'Image file not saved...' if display: pylab.show() try: import gc fig.clf() pylab.close() gc.collect() except Exception: pass
def two_sets_process(user_input: str, sets: dict) -> None: """Manage all the methods and functions to make operations with two sets to show the graphic result.""" plt.title(user_input) set_names = list(sets.keys()) set_names.sort() set_zones = {} set_zones[set_names[0]] = {"100", "110"} set_zones[set_names[1]] = {"110", "010"} user_input = user_input.replace("(", "") user_input = user_input.replace(" ", "") divided_equation = user_input.split(")") elements = findall("[ABCU\∩\-'\Δ]+?", user_input) elements_chunk = len(elements) for i in elements: if i in "ABC": set_result = set_zones[i] break for i in range(elements_chunk): if elements[i] in "ABC" and i + 1 < elements_chunk: if elements[i + 1] == "-": set_result = set_result - set_zones[elements[i + 2]] elif elements[i + 1] == "U": set_result = set_result.union(set_zones[elements[i + 2]]) elif elements[i + 1] == "∩": set_result = set_result.intersection(set_zones[elements[i + 2]]) elif elements[i + 1] == "Δ": set_result = set_result.symmetric_difference( set_zones[elements[i + 2]]) v = venn2(subsets=(1, 1, 1), set_labels=(set_names[0], set_names[1])) for i in zones2: v.get_patch_by_id(i).set_color("white") v.get_label_by_id(i).set_text("") c = venn2_circles(subsets=(1, 1, 1)) for i in range(len(c)): c[i].set_lw(1.0) c[i].set_ls("solid") for i in set_result: v.get_patch_by_id(i).set_color("red") plt.show()
def venn_pandas(a, b, colors=None, alpha=.7): from matplotlib_venn import venn2 if colors is None: colors = np.array(plt.rcParams['axes.color_cycle'])[[0, 2, 4]] gc = pd.concat([a, b], axis=1).dropna().astype(int).astype(str).apply(lambda s: ''.join(s), axis=1) v = venn2(gc.value_counts().sort_index()[1:], set_labels=[b.name, a.name], normalize_to=1.0) v.patches[0].set_facecolor(colors[0]) v.patches[1].set_facecolor(colors[1]) v.patches[2].set_facecolor(colors[2]) for p in v.patches: p.set_alpha(alpha) p.set_lw(2) for l in v.subset_labels: l.set_fontsize(12) return v
def two_way_venn(bedfiles, names, colors): Site1 = pybedtools.BedTool(bedfiles[0]) Site2 = pybedtools.BedTool(bedfiles[1]) Int = Site1.intersect(Site2, wa=True) Sets = (len(Site1) - len(Int), len(Site2) - len(Int), len(Int)) fig = plt.figure(figsize=(5, 5)) v = venn2(subsets=Sets, set_labels=names) v.get_patch_by_id('10').set_color(colors[0]) v.get_patch_by_id('01').set_color(colors[1]) v.get_patch_by_id('11').set_color(colors[2]) c = venn2_circles(subsets=Sets, linestyle='solid') return fig
def example(): plt.figure(figsize=(4, 4)) # v = venn3(subsets=(1, 1, 1, 1, 1, 1, 1), set_labels = ('A', 'B', 'C')) v = venn2(subsets=(1, 2, 3), set_labels=('A', 'B')) # v.get_patch_by_id('100').set_alpha(1.0) # v.get_patch_by_id('100').set_color('white') # v.get_label_by_id('100').set_text('Unknown') # v.get_label_by_id('A').set_text('Set "A"') # c = venn3_circles(subsets=(1, 1, 1, 1, 1, 1, 1), linestyle='dashed') # c[0].set_lw(1.0) # c[0].set_ls('dotted') # plt.title("Sample Venn diagram") # plt.annotate('Unknown set', xy=v.get_label_by_id('100').get_position() - np.array([0, 0.05]), xytext=(-70,-70), # ha='center', textcoords='offset points', bbox=dict(boxstyle='round,pad=0.5', fc='gray', alpha=0.1), # arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0.5',color='gray')) plt.show()
def _matp(): # !pip install matplotlib # !pip install matplotlib-venn import matplotlib.pyplot as plt from matplotlib_venn import venn2 admins = {'Moose', 'Joker', 'Joker'} moderators = {'Ann', 'Chris', 'Jane', 'Moose', 'Zero'} v = venn2(subsets=(admins, moderators), set_labels=('admins', 'moderators')) v.get_label_by_id('11').set_text('\n'.join(admins & moderators)) v.get_label_by_id('10').set_text('\n'.join(admins - moderators)) v.get_label_by_id('01').set_text('\n'.join(moderators - admins)) plt.show()
def venn4overlap(df1, df2, overlap, dirPath, name): from matplotlib import pyplot as plt from matplotlib_venn import venn2, venn2_circles name1 = name[0].split(' ')[0] name2 = name[1].split(' ')[0] noverlap = len(set(overlap['Sample1_row'])) plt.figure(figsize=(5, 5)) v = venn2(subsets=(1, 1, 1), set_labels=(name1, name2)) v.get_label_by_id('10').set_text(str(df1 - noverlap)) v.get_label_by_id('01').set_text(str(df2 - noverlap)) v.get_label_by_id('11').set_text(str(noverlap)) #venn2_circles(subsets=(1, 1, 1), linestyle='solid') plt.title("Sample overlap") plt.tight_layout() plt.savefig(os.path.join(dirPath, 'venn_diagram.png')) plt.close()