Exemple #1
0
def venn(inp, names, title="venn", folder=''):
    """
    Plots a venn diagram using the pyvenn package

    Args:
    -----
      inp: list[set()] of sets of values (e.g. [(1,2,3,4),(2,3),(1,3,4,5)])
      names: list[str] of the name of each leaf
      title: str the plot title
      folder: str of location where to save the plot, won't save if empty
    """
    labels = pyvenn.get_labels(inp, fill=['number', 'logic'])
    if len(inp) == 2:
        fig, ax = pyvenn.venn2(labels, names=names)
    elif len(inp) == 3:
        fig, ax = pyvenn.venn3(labels, names=names)
    elif len(inp) == 4:
        fig, ax = pyvenn.venn4(labels, names=names)
    elif len(inp) == 5:
        fig, ax = pyvenn.venn5(labels, names=names)
    elif len(inp) == 6:
        fig, ax = pyvenn.venn6(labels, names=names)
    else:
        raise ValueError('need to be between 2 to 6')
    ax.set_title(title)
    if folder:
        fig.savefig(folder + title + '_venn.pdf')
    fig.show()
    plt.pause(0.1)
def generate_venn_diagram(nb_dim, files, names, sep, label):
    """Cette fonction fait l'appel du fichier venn.py (qui doit être présent dans le même dossier que generate_venn.py)
	pour générer les diagrammes de Venn.
	:param: nombre de dimensions présent dans le diagramme de Venn (1 < nb_dim <= 6)
	:param: files: liste des chemins d'accès aux fichiers contenant les résultats des modèles
	:param: names: liste des noms des modèles (doit être en adéquation avec la liste des modèles files)
	:param: sep: séparateur présent dans filename séparant les id-instance des label 
	:label: label que l'on souhaite extraire
	"""
    if nb_dim < 2 or nb_dim > 6:
        print(
            'Le diagramme de Venn ne peut être généré que pour un nombre de dimensions supérieur à un et inférieur ou égal à 6.'
        )
    else:
        i = 0
        models = []
        while i < nb_dim:
            models.append(
                get_id_instances_title(files[i], sep=sep, label=label))
            i += 1

        labels = venn.get_labels(models)
        if nb_dim == 2:
            fig, ax = venn.venn2(labels, names=names)
            plt.show()
        elif nb_dim == 3:
            fig, ax = venn.venn3(labels, names=names)
            plt.show()
        elif nb_dim == 4:
            fig, ax = venn.venn4(labels, names=names)
            plt.show()
        elif nb_dim == 5:
            fig, ax = venn.venn5(labels, names=names)
            plt.show()
        elif nb_dim == 6:
            fig, ax = venn.venn6(labels, names=names)
            plt.show()
        else:
            print(
                'Le diagramme de Venn ne peut être généré que pour un nombre de dimensions supérieur à un et inférieur ou égal à 6.'
            )
    def uniq_share_cal(self, infile):
        for line in infile:
            s = line.rstrip()
            orths = list(s.split('\t'))
            ids = []
            for x in xrange(len(orths)):
                item = orths[x].split('|')[0]
                if item in self.ids:
                    ids.append(item)
            target = set(ids)
            for key in self.data:
                if set(key) == target:
                    self.data[key] += 1

        labels = {}
        for key in self.data:
            key2 = ''
            for x in xrange(len(self.ids)):
                if self.ids[x] in key:
                    key2 += '1'
                else:
                    key2 += '0'
            labels[key2] = self.data[key]

        if len(self.ids) == 3:
            fig, ax = venn.venn3(labels, names=self.ids)
            fig.savefig('gene_family_3taxa.pdf')

        elif len(self.ids) == 4:
            fig, ax = venn.venn4(labels, names=self.ids)
            fig.savefig('gene_family_4taxa.pdf')

        elif len(self.ids) == 5:
            fig, ax = venn.venn5(labels, names=self.ids)
            fig.savefig('gene_family_5taxa.pdf')

        elif len(self.ids) == 6:
            fig, ax = venn.venn6(labels, names=self.ids)
            fig.savefig('gene_family_6taxa.pdf')
  n34 = len(intersection(tp[ids[2]], tp[ids[3]])) 
  n35 = len(intersection(tp[ids[2]], tp[ids[4]]))
  n45 = len(intersection(tp[ids[3]], tp[ids[4]])) 
  area5 = len(tp[ids[4]]) 
  area4 = len(tp[ids[3]])
  area3 = len(tp[ids[2]]) 
  area2 = len(tp[ids[1]]) 
  area1 = len(tp[ids[0]])  
  venn_diag_list = [area1, area2, area3, area4, area5, n12, n13, n14, n15,n23, n24, n25,
   n34, n35, n45, n123, n124, n125, n134, n135, n145, n234, n235, n245, n345, n1234, 
   n1235, n1245, n1345, n2345, n12345]

print('Overlap list needed for VennDiagram in R')
print(venn_diag_list)
out1.write('\n\n## Overlap list needed for VennDiagram in R:\n\n%s' % venn_diag_list)

if plot.lower() == 't' or plot.lower() == 'true':
  if n_comparing == 5:
    labels = venn.get_labels([tp[ids[0]], tp[ids[1]], tp[ids[2]], tp[ids[3]], tp[ids[4]]], fill = ['number'])
    fig, ax = venn.venn5(labels, names = ids)
  elif n_comparing == 4:
    labels = venn.get_labels([tp[ids[0]], tp[ids[1]], tp[ids[2]], tp[ids[3]]], fill = ['number'])
    fig, ax = venn.venn4(labels, names = ids)
  elif n_comparing == 3:
    labels = venn.get_labels([tp[ids[0]], tp[ids[1]], tp[ids[2]]], fill = ['number'])
    fig, ax = venn.venn3(labels, names = ids)
  elif n_comparing == 2:
    labels = venn.get_labels([tp[ids[0]], tp[ids[1]]], fill = ['number'])
    fig, ax = venn.venn2(labels, names = ids)
  filename = save+'_pred_compared.pdf'
  fig.savefig(filename)
Exemple #5
0
  n25 = len(intersection(comp[ids[1]], comp[ids[4]])) 
  n34 = len(intersection(comp[ids[2]], comp[ids[3]])) 
  n35 = len(intersection(comp[ids[2]], comp[ids[4]]))
  n45 = len(intersection(comp[ids[3]], comp[ids[4]])) 
  area5 = len(comp[ids[4]]) 
  area4 = len(comp[ids[3]])
  area3 = len(comp[ids[2]]) 
  area2 = len(comp[ids[1]]) 
  area1 = len(comp[ids[0]])  
  venn_diag_list = [area1, area2, area3, area4, area5, n12, n13, n14, n15,n23, n24, n25,
   n34, n35, n45, n123, n124, n125, n134, n135, n145, n234, n235, n245, n345, n1234, 
   n1235, n1245, n1345, n2345, n12345]

print('Overlap list needed for VennDiagram in R')
print(venn_diag_list)
#out1.write('\n\n## Overlap list needed for VennDiagram in R:\n\n%s' % venn_diag_list)

if n_comparing == 5:
  labels = venn.get_labels([comp[ids[0]], comp[ids[1]], comp[ids[2]], comp[ids[3]], comp[ids[4]]], fill = ['number'])
  fig, ax = venn.venn5(labels, names = ids)
elif n_comparing == 4:
  labels = venn.get_labels([comp[ids[0]], comp[ids[1]], comp[ids[2]], comp[ids[3]]], fill = ['number'])
  fig, ax = venn.venn4(labels, names = ids)
elif n_comparing == 3:
  labels = venn.get_labels([comp[ids[0]], comp[ids[1]], comp[ids[2]]], fill = ['number'])
  fig, ax = venn.venn3(labels, names = ids)
elif n_comparing == 2:
  labels = venn.get_labels([comp[ids[0]], comp[ids[1]]], fill = ['number'])
  fig, ax = venn.venn2(labels, names = ids)
filename = save+'_pred_compared.pdf'
fig.savefig(filename)
Exemple #6
0
# ipython notebook requires this
# %matplotlib inline

# python console requires this
import matplotlib
matplotlib.use('Agg')

import matplotlib.pyplot as plt

import sys
sys.path.append('lib')

import venn

sparqldata = set(line.strip()
                 for line in open('out/dataset-full_name-sparql-total.csv'))
cypherdata = set(line.strip()
                 for line in open('out/dataset-full_name-cypher-total.csv'))
graphqldata = set(line.strip()
                  for line in open('out/dataset-full_name-graphql-total.csv'))
gremlindata = set(line.strip()
                  for line in open('out/dataset-full_name-gremlin-total.csv'))

data = [sparqldata, cypherdata, graphqldata, gremlindata]

labels = venn.get_labels(data, fill=['number'])
fig, ax = venn.venn4(labels, names=['SPARQL', 'Cypher', 'GraphQL', 'Gremlin'])
fig.savefig('vennrepos.pdf', bbox_inches='tight')
plt.close()
plt.ylabel('Number of kinase')
plt.legend()
plt.show()

infl = pd.read_excel('results/Staturosporine_TPP_data_Savitski/Inflect.xlsx')

infl_sig = np.intersect1d(infl.iloc[:, 0].values.astype(str),
                          infl.iloc[:, 1].values.astype(str))
tpp_m_sig = tpp_m['Protein_ID'].values
tpp_s_sig = tpp_s['Protein_ID'].values[:100]
npp_sig = npp['id'].values[:100]

labels = venn.get_labels([infl_sig, tpp_m_sig, tpp_s_sig, npp_sig],
                         fill=['number'])
plt.figure(dpi=300)
fig, ax = venn.venn4(labels,
                     names=['Inflect', 'TPP_MeltCurve', 'TPP_Spline', 'NPARC'])
fig.show()

infl_kin = [i for i in infl_sig if i in kins]
tpp_m_kin = [i for i in tpp_m_sig if i in kins]
tpp_s_kin = [i for i in tpp_s_sig if i in kins]
npp_kin = [i for i in npp_sig if i in kins]

labels = venn.get_labels([infl_kin, tpp_m_kin, tpp_s_kin, npp_kin],
                         fill=['number'])
plt.figure(dpi=300)
fig, ax = venn.venn4(labels,
                     names=['Inflect', 'TPP_MeltCurve', 'TPP_Spline', 'NPARC'])
fig.show()
# Get labels for the overlapping sections of the venn
labels_up = venn.get_labels(pos, fill=['number'])
labels_down = venn.get_labels(neg, fill=['number'])
labels_tot = venn.get_labels(tot, fill=['number'])

# Combine labels for up- and downregulated genes into a single label
labels_combined = {}
for key in labels_up:
    label_up = labels_up[key]
    label_down = labels_down[key]
    combined_label = f'⇧{label_up}\n⇩{label_down}'
    labels_combined[key] = combined_label

# Create Venn diagram for the up- und downregulated genes only
fig, ax = venn.venn4(labels_combined, names=order, colors=colors)
#for i in range(4):
#    curr_ell = ax.get_children()[i]
#    curr_ell.set_facecolor(colors[i])
#    curr_ell.set_edgecolor(colors[i])
#plt.draw()
fig.savefig('venn_up_and_downregulated.svg')
fig.savefig('venn_up_and_downregulated.pdf')

fig.clear()

# Create Venn diagram for all differentially expressed genes, regardless of directionality
fig, ax = venn.venn4(labels_tot, names=order, colors=colors)
fig.savefig('venn_total.svg')
fig.savefig('venn_total.pdf')
#together_4 = len(zhang_2018 & wang_2012 & lukasik_2013 & ours)
#
#zwl_4 = len(zhang_2018 & wang_2012 & lukasik_2013) - together_4
#zwo_4 = len(zhang_2018 & wang_2012 & ours) - together_4
#zlo_4 = len(zhang_2018 & ours & lukasik_2013) - together_4
#wlo_4 = len(ours & wang_2012 & lukasik_2013) - together_4

#%%
import venn

plt.figure(figsize=(7, 7))

labels = venn.get_labels([ours, zhang_2018, wang_2012, lukasik_2013])
fig, ax = venn.venn4(labels,
                     names=[
                         'Present Study', 'Zhang et al. 2018',
                         'Wang et al. 2012', 'Lukasik et al. 2013'
                     ])
fig.savefig('previous_bol_mirnas_venn_wours.svg',
            bbox_inches='tight',
            format='svg')
fig.show()

#%%

# Use eulerAPE instead for proportional #

from matplotlib_venn import venn3
from matplotlib import pyplot as plt

plt.figure(figsize=(7, 7))
Exemple #10
0
        elif "full_model" in results_file:
            all_labels.append("Full model")

    for r_label in results:
        # print unique TP to this result
        tps = set(results[r_label]["all"]) & set(gold_pairs["all"])
        for r_label2 in results:
            if r_label2 == r_label:
                continue
            tps = tps - set(results[r_label2]["all"])
        print()
        print("unique to {}:".format(r_label))
        print(len(tps))
        print(tps)
        print()
    print(all_labels)
    # print(results)
    labels = venn.get_labels(all_pairs, fill=["number"])
    if len(all_pairs) == 2:
        fig, ax = venn.venn2(labels, names=all_labels)
    elif len(all_pairs) == 3:
        fig, ax = venn.venn3(labels, names=all_labels)
    elif len(all_pairs) == 4:
        fig, ax = venn.venn4(labels, names=all_labels)
    elif len(all_pairs) == 5:
        fig, ax = venn.venn5(labels, names=all_labels)
    elif len(all_pairs) == 6:
        fig, ax = venn.venn6(labels, names=all_labels)
    fig.savefig("{}.png".format("_".join(all_labels)), bbox_inches="tight")
    plt.close()
Exemple #11
0
     ]
 for i, loc in enumerate(cancer_locs):
     relevant_tmp_df = sub_tmp_df_both[sub_tmp_df_both.cancer_loc == loc]
     labels = venn.get_labels(
         [relevant_tmp_df[c].dropna().index for c in relevant_nb_clones],
         fill=['number', 'logic'])
     labels_short = {k: v.split(': ')[1] for k, v in labels.items()}
     label_names = [
         c.split('_')[0].replace('pyclone', 'PyClone').replace(
             'sciclone', 'SciClone').replace('expands', 'Expands')
         for c in relevant_nb_clones
     ]
     label_names_fig = ['' for l in label_names]
     ax = venn.venn4(labels_short,
                     names=label_names_fig,
                     colors=[colors_protected[i] for i in label_names],
                     fontsize_text=22,
                     fontsize_number=25,
                     ax=axes[ii, i])
     nb_samples_total = relevant_tmp_df[relevant_nb_clones].dropna(
         axis='index', how='all').shape[0]
     ax.set_title('{loc} (n={n})'.format(loc=loc, n=nb_samples_total),
                  fontsize=40)
     if i == 0:
         ax.text(-0.1,
                 0.4,
                 '{}'.format(folder).replace(
                     'merged', 'public and protected\nintersection'),
                 fontsize=40,
                 rotation=90,
                 va='center',
                 ha='center')
def linear_regression_venn4(gene, grouping, subgroups, showplot=False):
	# move to correct directory
	os.chdir('%s\\Linear_Regression' % grouping)
	# pull in gene lists
	allpatients = pd.read_csv('short_summary_All Patients.csv', index_col = 0)
	group1 = pd.read_csv('short_summary_%s.csv' % subgroups[0], index_col = 0)
	group2 = pd.read_csv('short_summary_%s.csv' % subgroups[1], index_col = 0)
	group3 = pd.read_csv('short_summary_%s.csv' % subgroups[2], index_col = 0)
	group4 = pd.read_csv('short_summary_%s.csv' % subgroups[3], index_col = 0)

	allgenes = set(allpatients.index.tolist())
	group1_genes = set(group1.index.tolist())
	group2_genes = set(group2.index.tolist())
	group3_genes = set(group3.index.tolist())
	group4_genes = set(group4.index.tolist())


	# plot venn diagram of gene list intersections
	fig, ax = plt.subplots(figsize = (12,12))
	args = {'font': 'Arial', 'group_labels': 24, 'patch_labels': 19}
	venn4([group1_genes, group2_genes, group3_genes, group4_genes], (subgroups), ax=ax, fig=fig, **args)
	plt.title('Genes Correlated to %s by Subgroup' % gene, fontname='Arial', fontsize=30)
	textbox = dict(horizontalalignment = 'center', verticalalignment = 'center', fontname = 'Arial', fontsize = 22)
	plt.text(0.5, 0.97, '%d total genes' % (len(allgenes)), textbox, transform=ax.transAxes)
	plt.tight_layout()

	if showplot == True:
		plt.show()
	fig.savefig('Venn_Diagram.png', transparent = True)
	fig.savefig('Venn_Diagram.eps', transparent = True)

	# create and save total gene list with TRUE and FALSE variables for sorting
	groups = {subgroups[0]: set(group1_genes), subgroups[1]: set(group2_genes), subgroups[2]: set(group3_genes), subgroups[3]: set(group4_genes)}

	summary = {}
	summary['All Patients'] = True
	for group in subgroups:
		summary[group] = {}
		for gene in allgenes:
			if gene in groups[group]:
				summary[group][gene] = True
			else:
				summary[group][gene] = False

	df = pd.DataFrame.from_dict(summary)
	df.to_csv("Venn_Diagram.csv")

	# # create a short list with unique values for each group as well as overall intersections
	group1_alone = df[(df[subgroups[0]] == True ) & (df[subgroups[1]] == False) & (df[subgroups[2]] == False) & (df[subgroups[3]] == False)].index.values.tolist()
	group2_alone = df[(df[subgroups[1]] == True ) & (df[subgroups[0]] == False) & (df[subgroups[2]] == False) & (df[subgroups[3]] == False)].index.values.tolist()
	group3_alone = df[(df[subgroups[2]] == True ) & (df[subgroups[1]] == False) & (df[subgroups[0]] == False) & (df[subgroups[3]] == False)].index.values.tolist()
	group4_alone = df[(df[subgroups[3]] == True ) & (df[subgroups[0]] == False) & (df[subgroups[1]] == False) & (df[subgroups[2]] == False)].index.values.tolist()
	intersection = df[(df[subgroups[0]] == True ) & (df[subgroups[1]] == True) & (df[subgroups[2]] == True) & (df[subgroups[3]] == True)].index.values.tolist()

	venn_summary = {subgroups[0]: group1_alone, subgroups[1]: group2_alone, subgroups[2]: group3_alone, subgroups[3]: group4_alone, 'intersection': intersection}

	with open('Venn_Summary.txt', 'w') as f:
		for key in subgroups:
			f.write('%s\t%d genes\t' % (key, len(venn_summary[key])))
			for gene in venn_summary[key]:
				f.write('%s\t' % gene)
			f.write('\n')
		f.write('Intersection\t%d genes\t' % (len(venn_summary['intersection'])))
		for gene in venn_summary['intersection']:
			f.write('%s\t' % gene)
		f.write('\n')

	elevate()
	elevate()
def create_venn4(labels, names):
    fig, ax = venn.venn4(labels, names=names)

    #fig.savefig('venn4.png', bbox_inches='tight')
    plt.show()
Exemple #14
0
def create_venn_diagram(df, venn_labels, figsize=[10, 9], percent_only=False):
    """
    df - dataframe with data
    venn_labels - ordered dictionary
       keys are the column names to be used in the venn diagram - columns should only have 0s/1s
       values are the "pretty" label for the column
    """
    sets = list()
    set_names = list()
    for c in venn_labels:
        idx = df[c] == 1
        sets.append(set(df.loc[idx, 'icustay_id']))
        set_names.append(venn_labels[c])

    if len(venn_labels) > 4:
        print('Only supports up to a 4 set venn diagrams')
        return

    if len(venn_labels) == 4:
        if percent_only:
            fill = 'percent_only'
        else:
            fill = 'percent'
        venn4(sets,
              set_names,
              show_plot=False,
              fontdict={
                  'fontsize': 15,
                  'fontweight': 'normal'
              },
              fill=fill,
              figsize=figsize)
        leg = plt.legend('off')
        leg.remove()
        plt.show()
    else:
        if percent_only:
            string_formatter = lambda x: '{:2.1f}%'.format(x * 100.0 / df.
                                                           shape[0])
        else:
            string_formatter = lambda x: '{:,}\n{:2.1f}%'.format(
                x, x * 100.0 / df.shape[0])

        plt.figure(figsize=figsize)
        plt.rcParams.update({'font.size': 15})
        venn3(sets, set_names, subset_label_formatter=string_formatter)
        plt.show()

    # excluded IDs
    set_other = set(df['icustay_id'].values).difference(*sets)

    # Print other numbers for above venn diagram
    print('{} patients ({:2.1f}%) satisfied all criteria.'.format(
        len(set.intersection(*sets)),
        len(set.intersection(*sets)) * 100.0 / df.shape[0]))
    print('{} patients ({:2.1f}%) satisfied no criteria.'.format(
        len(set_other),
        len(set_other) * 100.0 / df.shape[0]))

    # pair-wise counts
    for i, c1 in enumerate(venn_labels):
        for j, c2 in enumerate(venn_labels):
            if i <= j:
                continue
            else:
                set_both = set.intersection(sets[i], sets[j])
                print('{:2.1f}% ({}) - {} & {}'.format(
                    len(set_both) * 100.0 / df.shape[0], len(set_both), c1,
                    c2))
    """
Exemple #15
0
    rgi_set = set(
        pandas.read_csv(rgi,
                        sep="\t",
                        header=None,
                        names=["db", "sample", "antibiotic"]).query(
                            'antibiotic == "%s"' % antibio)["sample"].tolist())
    mykrobe_set = set(
        pandas.read_csv(mykrobe,
                        sep="\t",
                        header=None,
                        names=["db", "sample", "antibiotic"]).query(
                            'antibiotic == "%s"' % antibio)["sample"].tolist())
    tbprofiler_set = set(
        pandas.read_csv(tbprofiler,
                        sep="\t",
                        header=None,
                        names=["db", "sample", "antibiotic"]).query(
                            'antibiotic == "%s"' % antibio)["sample"].tolist())
    walker_set = set(
        pandas.read_csv(walker,
                        sep="\t",
                        header=None,
                        names=["db", "sample", "antibiotic"]).query(
                            'antibiotic == "%s"' % antibio)["sample"].tolist())

labels = venn.get_labels([rgi_set, mykrobe_set, tbprofiler_set, walker_set],
                         fill=['number'])

fig, ax = venn.venn4(labels, names=['rgi', 'mykrobe', 'tbprofiler', 'walker'])

fig.savefig(snakemake.output[0])
Exemple #16
0
from collections import Counter
from itertools import combinations
import ast
from operator import itemgetter
import operator
import pandas as pd
import csv
import numpy as np

import matplotlib.pyplot as plt
import venn

labels = venn.get_labels(
    [range(10), range(5, 15),
     range(3, 8), range(8, 17)],
    fill=['number', 'logic'])
fig, ax = venn.venn4(labels, names=['list 1', 'list 2', 'list 3', 'list 4'])
fig.show()
Exemple #17
0
matplotlib.use('Agg')

import matplotlib.pyplot as plt
import venn

labels = venn.get_labels([range(10), range(5, 15)], fill=['number', 'logic'])
fig, ax = venn.venn2(labels, names=['list 1', 'list 2'])
fig.savefig('venn2.png', bbox_inches='tight')
plt.close()

labels = venn.get_labels([range(10), range(5, 15), range(3, 8)], fill=['number', 'logic'])
fig, ax = venn.venn3(labels, names=['list 1', 'list 2', 'list 3'])
fig.savefig('venn3.png', bbox_inches='tight')
plt.close()

labels = venn.get_labels([range(10), range(5, 15), range(3, 8), range(8, 17)], fill=['number', 'logic'])
fig, ax = venn.venn4(labels, names=['list 1', 'list 2', 'list 3', 'list 4'])
fig.savefig('venn4.png', bbox_inches='tight')
plt.close()

labels = venn.get_labels([range(10), range(5, 15), range(3, 8), range(8, 17), range(10, 20)], fill=['number', 'logic'])
fig, ax = venn.venn5(labels, names=['list 1', 'list 2', 'list 3', 'list 4', 'list 5'])
fig.savefig('venn5.png', bbox_inches='tight')
plt.close()

labels = venn.get_labels([range(10), range(5, 15), range(3, 8), range(8, 17), range(10, 20), range(13, 25)], fill=['number', 'logic'])
fig, ax = venn.venn6(labels, names=['list 1', 'list 2', 'list 3', 'list 4', 'list 5', 'list 6'])
fig.savefig('venn6.png', bbox_inches='tight')
plt.close()