Exemple #1
0
def venn2_plot(set1 = set(),
               set2 = set(),
               lab_set1 = 'Set1',
               lab_set2 = 'Set2',
               linewidth = 1,
               color_line = 'black',
               alpha_sets = 0.3,
               font_sets = False, # False o 'bold'
               size_vals_sets = 12,
               alpha_inter = 0.3,
               font_inter = False, # False o 'bold'
               size_vals_inter = 12,
               size_label = 12,
               font_label = False): # False o 'bold'
    v = venn2_unweighted(subsets = (set1, set2), set_labels = (lab_set1, lab_set2))
    c = venn2_circles(subsets = (1, 1, 1),
                    linestyle='--', linewidth= linewidth, color=color_line)
    v.get_patch_by_id('10').set_alpha(0)
    partes = ['10', '01', '11']
    partes2 = ['10', '01']
    venn_info = [[i, j] for i, j in zip(v.subset_labels, partes)]
    for i in venn_info:
        if i[0] != None:
            if i[1] in partes2:
                v.get_patch_by_id(i[1]).set_alpha(alpha_sets) # i[1] = el conjunto creado,  0 = alpha del conjunto
                v.get_label_by_id(i[1]).set_fontweight(font_sets)
                v.get_label_by_id(i[1]).set_fontsize(size_vals_sets)
            if i[1] == '11': # configurar la intersección independientemente '111'
                v.get_patch_by_id('11').set_alpha(alpha_inter) # i[1] = el conjunto creado,  0 = alpha del conjunto
                v.get_label_by_id('11').set_fontweight(font_inter)
                v.get_label_by_id('11').set_fontsize(size_vals_inter)    
    for text in v.set_labels:
        text.set_fontsize(size_label)
        text.set_fontweight(font_label)
Exemple #2
0
def init_compare(input1, input2, labels=None):
    input1_df = df.from_csv(input1, index_col=False)
    input2_df = df.from_csv(input2, index_col=False)
    input1_df_index = [
        ';'.join(_i.split('\t')[:5]) for _i in list(input1_df.Otherinfo)
    ]
    input2_df_index = [
        ';'.join(_i.split('\t')[:5]) for _i in list(input2_df.Otherinfo)
    ]
    if labels:
        ax = venn2_unweighted(
            [set(input1_df_index), set(input2_df_index)], set_labels=labels)
        return ax
    else:
        input1_df.index = input1_df_index
        input2_df.index = input2_df_index
        result = {}

        result['small_one_unique'] = input1_df.loc[
            set(input1_df_index).difference(set(input2_df_index)), :]
        result['big_one_should_in_small'] = input2_df.loc[
            gene_filter(input2_df, _in=gene_list), :]
        result['shared'] = input1_df.loc[
            set(input1_df_index).intersection(set(input2_df_index)), :]
        return result
def process_venn(out_dir, venn):
    from matplotlib_venn import venn3, venn3_unweighted, venn2_unweighted

    plt.clf()
    pvalue_cutoff = venn["pvalue_cutoff"]
    biofluid_regions = venn["biofluid_regions"]
    disorders = venn["disorders"]
    genes = {}
    for biofluid_region in biofluid_regions:
        col = 0
        for disorder in disorders:
            filename = "data/out/" + biofluid_region + "/" + disorder + "/lrt.tsv"
            if os.path.exists(filename):
                df = pd.read_csv(filename, sep='\t', index_col=0)
                # Filter genes with pvalue less than cutoff
                df = df[df["pvalue"] < pvalue_cutoff]
                # Add to list
                if disorder in genes:
                    genes[disorder] = genes[disorder] + df.index.tolist()
                else:
                    genes[disorder] = df.index.tolist()
            else:
                genes[disorder] = []

    # Find unique genes per disorder
    for disorder in disorders:
        genes[disorder] = set(genes[disorder])

    a = genes[disorders[0]]
    b = genes[disorders[1]]

    intersection = a & b
    a_only = a - intersection
    b_only = b - intersection

    fig = venn2_unweighted(subsets=(len(a - (a & b)), len(b - (a & b)),
                                    len((a & b))),
                           set_labels=tuple(disorders),
                           alpha=0.5)
    plt.text(x=0.05,
             y=-0.2,
             s="\n".join(list(intersection)),
             color='black',
             bbox=dict(facecolor='orange', alpha=0.5))
    plt.text(x=-0.84,
             y=-0.25,
             s="\n".join(list(a_only)),
             color='black',
             bbox=dict(facecolor='red', alpha=0.5))
    plt.text(x=0.66,
             y=-0.2,
             s="\n".join(list(b_only)),
             color='black',
             bbox=dict(facecolor='green', alpha=0.5))

    plt.title(venn["title"])
    plt.savefig(out_dir + "/venn.png")
    return
Exemple #4
0
def motifplot(G, proatom_order, lig_name, MotifFolder):
    '''this function is to make a picture combined bar plot with pie chart characterizing the motif biochemical properties,
       and make a venn plot to show overlaps of protein pockets containing different motifs for one ligand
       o input: (1) G: the number of function groups of the ligand , type: int
                (2) proatom_order: a dict of different binding motifs for a ligand
                (3) lig_name: the ligand name
                (4) MotifFolder: the location of a folder to place plots that display motif features
       o output: save the pictures into motifFolder folder.
                 for each of 233 ligands:
                    return 2 pictures, one is a picture comprising bar plot and pie chart,and the other one is a venn plot.
                 note: if the number of function groups for one ligand is more than 3, there will be only one picture.
    '''
    fig, axes = plt.subplots(G, 2, figsize=(8, 3 * G), dpi=300)
    fig1, axes1 = plt.subplots(1, 1)
    atomorder = sorted(proatom_order.items())
    cluster_set = []
    labels = []
    # plot bar and pie chart displaying motif amino acids distribution and atom properties distribution respectively
    for cluster in atomorder:
        A, aminoAcids, props = GetMotif(cluster[1])
        PlotBarChart(axes[atomorder.index(cluster), 0], aminoAcids)
        PlotPieChart(axes[atomorder.index(cluster), 1], props)
        cluster_set.append(set(A))
        labels.append(cluster[0])
    fig.savefig(os.path.join(MotifFolder, "%s_analysis.png" % lig_name),
                dpi=300,
                bbox_inches="tight")

    # make a venn plot to show overlaps of protein pockets containing 2 motifs for one ligand
    if G == 2:
        venn2_unweighted(cluster_set, labels, ax=axes1)
        fig1.savefig(os.path.join(MotifFolder, "%s_provenn.png" % lig_name),
                     dpi=300)

    # make a venn plot to show overlaps of protein pockets containing 3 motifs for one ligand
    elif G == 3:
        venn3_unweighted(cluster_set, labels, ax=axes1)
        fig1.savefig(os.path.join(MotifFolder, "%s_provenn.png" % lig_name),
                     dpi=300)
def test_pr_28():
    import matplotlib_venn as mv
    v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = None)
    assert v.get_label_by_id('010').get_text() == '2'
    v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0))
    assert v.get_label_by_id('010').get_text() == 'Value: +0.020'
    v = mv.venn2((1, 2, 3), subset_label_formatter = None)
    assert v.get_label_by_id('01').get_text() == '2'
    v = mv.venn2((1, 2, 3), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0))
    assert v.get_label_by_id('01').get_text() == 'Value: +0.020'
    
    v = mv.venn3_unweighted((1, 2, 3, 4, 5, 6, 7), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0))
    assert v.get_label_by_id('010').get_text() == 'Value: +0.020'
    v = mv.venn2_unweighted((1, 2, 3), subset_label_formatter = lambda x: 'Value: %+0.3f' % (x / 100.0))
    assert v.get_label_by_id('01').get_text() == 'Value: +0.020'
Exemple #6
0
    def venn(self, amplicons, primers, mismatch):

        fig, ax = plt.subplots()
        
        n = len(primers)
        
        if  n > 2:
            primers = primers[:3]
            venn_data = venn3_unweighted(map(lambda k: set(amplicons.get(k)), primers), set_labels = primers)
        elif n ==  2:
            venn_data = venn2_unweighted(map(lambda k: set(amplicons.get(k)), primers), set_labels = primers)
        else:
            return

        [ text.set_fontsize(16) for text in venn_data.set_labels ]
        fig_fname = '.'.join(['venn_mismatch{}'.format(mismatch),'pdf'])
        plt.tight_layout()
        plt.savefig(fig_fname, dpi=1000)
        plt.close()
        plt.clf()
Exemple #7
0
def test_pr_28():
    import matplotlib_venn as mv
    v = mv.venn3((1, 2, 3, 4, 5, 6, 7), subset_label_formatter=None)
    assert v.get_label_by_id('010').get_text() == '2'
    v = mv.venn3((1, 2, 3, 4, 5, 6, 7),
                 subset_label_formatter=lambda x: 'Value: %+0.3f' %
                 (x / 100.0))
    assert v.get_label_by_id('010').get_text() == 'Value: +0.020'
    v = mv.venn2((1, 2, 3), subset_label_formatter=None)
    assert v.get_label_by_id('01').get_text() == '2'
    v = mv.venn2((1, 2, 3),
                 subset_label_formatter=lambda x: 'Value: %+0.3f' %
                 (x / 100.0))
    assert v.get_label_by_id('01').get_text() == 'Value: +0.020'

    v = mv.venn3_unweighted((1, 2, 3, 4, 5, 6, 7),
                            subset_label_formatter=lambda x: 'Value: %+0.3f' %
                            (x / 100.0))
    assert v.get_label_by_id('010').get_text() == 'Value: +0.020'
    v = mv.venn2_unweighted((1, 2, 3),
                            subset_label_formatter=lambda x: 'Value: %+0.3f' %
                            (x / 100.0))
    assert v.get_label_by_id('01').get_text() == 'Value: +0.020'
Exemple #8
0
### Venn diagram of clonal sequences for each sample
fig = plt.figure()
ax = fig.add_subplot(111)

#colors = {'10X': 'skyblue', 'Immunoseq': 'purple', 'lcRNA-Seq': 'red', 'mRNA-Seq': 'green', '1ng': 'orange'}
colors = defaultdict(lambda: 'white')

abundance1 = int(100. * sharedcount[0] / totals[0])
abundance2 = int(100. * sharedcount[1] / totals[1])
lowest_abun = min([abundance1, abundance2])
intersect_label = "(%s%%/%s%%)" % (abundance1, abundance2)

figure = venn2_unweighted(subsets=(len(set_list[0]) - intersect,
                                   len(set_list[1]) - intersect, intersect),
                          set_labels=(argv[2].split('.')[0],
                                      argv[3].split('.')[0]),
                          set_colors=(colors[argv[2].split('.')[0]],
                                      colors[argv[3].split('.')[0]]))
venn2_circles(subsets=(1, 1, 1), linewidth=3)
print(len(set_list[0]) - intersect, len(set_list[1]) - intersect, intersect)

for text in figure.set_labels:
    text.set_fontsize(36)
for text in figure.subset_labels:
    text.set_fontsize(36)

lbl1 = figure.get_label_by_id("A")
lbl2 = figure.get_label_by_id("B")

x1, y1 = lbl1.get_position()
x2, y2 = lbl2.get_position()
Exemple #9
0
    
    if args.unweighted:
        venn3_unweighted(venn_sizes, set_labels=args.labels)
        venn3_circles({'100': 1, '010': 1, '001': 1, '110': 1, 
                       '101': 1, '011': 1, '111': 1})
    else:
        venn3(venn_sizes, set_labels=args.labels)
        venn3_circles(venn_sizes)
else:
    from matplotlib_venn import venn2, venn2_circles, venn2_unweighted
    
    venn_data = get_venn2_subsets(*[set_info[x] for x in set_info])
    venn_sizes = {x:len(venn_data[x]) for x in venn_data}
    
    if args.unweighted:
        venn2_unweighted(venn_sizes, set_labels=args.labels)
        venn2_circles({'10': 1, '01': 1, '11': 1})
    else:
        venn2(venn_sizes, set_labels=args.labels)
        venn2_circles(venn_sizes)

if args.savefig:
    # without bbox_inches, the saved figure has truncated axes.
    plt.savefig(args.savefig, bbox_inches='tight')

# print parsed output. fancy lambda basically enforces the order of keys
# being printed out, i.e. '111', '110', '101', '011', '100', '010', '001'.
for v in sorted(venn_data, key=lambda x: (x.count('1'), int(x)), reverse=True):
    print (', '.join([args.labels[n] for n, x in enumerate(v) if x == '1']),
           end=':\n')
    if args.summary:
Exemple #10
0
def vis():
    print("Extracting history... ")
    # creating .json file
    recentsongs = spotify.current_user_recently_played(limit=50)
    out_file = open("recentsongs.json", "w")
    out_file.write(json.dumps(recentsongs, sort_keys=True, indent=2))
    out_file.close()

    f = open('recentsongs.json', )
    data = json.load(f)
    f.close()
    track_id = []
    track_name = []
    album_name = []
    artist_name = []
    track_time = []
    # creating arrays for storing id and name
    for i in recentsongs['items']:
        track_id.append(i['track']['id'])
        track_name.append(i['track']['name'])
        album_name.append(i['track']['album']['name'])
        artist_name.append(i['track']['artists'][0]['name'])
        temp = i['played_at']
        track_time.append(re.sub('.[0-9].[000-999]Z', '', temp))

    print("Accessing features of all 50 tracks... ")
    features = []
    tracks = {}
    for track in track_id:
        features.append(sp.audio_features(track))

    # initialising all tracks with corresponding feature values and storing in a dictionary
    for i in range(len(track_name)):
        tracks[i + 1] = {}
    for i in range(len(track_name)):
        tracks[i + 1]['time'] = track_time[i]
        tracks[i + 1]['name'] = track_name[i]
        tracks[i + 1]['id'] = track_id[i]
        tracks[i + 1]['album'] = album_name[i]
        tracks[i + 1]['artist'] = artist_name[i]
        tracks[i + 1]['acousticness'] = features[i][0]['acousticness']
        tracks[i + 1]['danceability'] = features[i][0]['danceability']
        tracks[i + 1]['energy'] = features[i][0]['energy']
        tracks[i + 1]['instrumentalness'] = features[i][0]['instrumentalness']
        tracks[i + 1]['liveness'] = features[i][0]['liveness']
        tracks[i + 1]['loudness'] = features[i][0]['loudness']
        tracks[i + 1]['speechiness'] = features[i][0]['speechiness']
        tracks[i + 1]['tempo'] = features[i][0]['tempo']
        tracks[i + 1]['valence'] = features[i][0]['valence']
        pop = sp.track(track_id[i])
        tracks[i + 1]['popularity'] = pop['popularity']

    # creating dictionary to convert into dataframe
    feature = [
        'time', 'name', 'id', 'album', 'artist', 'acousticness',
        'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness',
        'speechiness', 'tempo', 'valence', 'popularity'
    ]
    dic_df = {}
    print("Converting into dataframe...")
    time.sleep(2)
    # initialising dictionary
    for x in feature:
        dic_df[x] = []
    for j in range(len(track_name)):
        for x in feature:
            dic_df[x].extend([tracks[j + 1][x]])

    # creating dataframe from dictionary
    dataframe = pd.DataFrame.from_dict(
        dic_df).iloc[::-1]  # .drop_duplicates(subset='name')
    pd.set_option('display.width', None)
    print(dataframe)
    valence_vals = dataframe['valence'].tolist()
    less_count, more_count, middle_count = 0, 0, 0
    for num in valence_vals:

        if num >= 0 and num < 0.5:
            less_count += 1
        elif num >= 0.5 and num < 0.6:
            middle_count += 1
        else:
            more_count += 1
    print("Plotting Graphs... ")
    venn2_unweighted(subsets=(less_count, more_count, middle_count),
                     set_labels=('Low Spirit', 'High Spirit'),
                     set_colors=('navy', 'lime'),
                     alpha=0.5)
    dataframe.plot.line(x='time', y=['danceability', 'energy', 'valence'])
    plt.xticks(rotation=90)
    plt.show()

    if (less_count / (less_count + more_count + middle_count) > 0.5):
        val = input(
            "We've noticed you've been listening to quite a few sad songs lately.\nWould you like us to suggest you a curated playlist to lift your mood?(y/n)"
        )
        if val == "y":
            print("Evaluating...")
            time.sleep(5)
            print(
                "In your spotify library, you'll find a playlist named 'Mood Booster'. Enjoy!!"
            )
Exemple #11
0
        sets,
        [
            "{} (Total {})".format(name, length)
            for (name, length) in zip(names, lengths)
        ],
    )

if len(sets) == 2:
    try:
        from matplotlib_venn import venn2_unweighted
    except ImportError:
        sys.exit("Requires the Python library matplotlib_venn")
    venn2_unweighted(
        sets,
        [
            "{} (Total {})".format(name, length)
            for (name, length) in zip(names, lengths)
        ],
    )

# not sure what I am doing here,
# matplotlib_venn does not want to create a single Venn circle
# stick to the old behavior (rpy and Limma) as much as possible
if len(sets) == 1:
    try:
        from matplotlib_venn import venn2
    except ImportError:
        sys.exit("Requires the Python library matplotlib_venn")
    venn2((sets[0], set()), [set_data[0][2], ""])

plt.title(all_label)
Exemple #12
0
    print(bucket_detailsD)

    bucketAB = bucket_detailsB.merge(bucket_detailsA,
                                     how="outer",
                                     on="Bucket_Name",
                                     indicator=True)
    print(bucketAB)
    New_Bucket = bucketAB.loc[bucketAB._merge ==
                              "right_only"].iloc[:, [0, 4, 5, 6]]
    New_Bucket.columns = bucket_header
    print(New_Bucket)

    v = vpit.venn2_unweighted(subsets={
        "10": len(bucket_detailsB.index),
        '01': len(New_Bucket.index),
        '11': len(bucket_detailsD.index)
    },
                              set_labels=('Before', 'New'),
                              alpha=0.5)
    matplotlib.pyplot.savefig("temp.png")
    data_uri = base64.b64encode(open('temp.png', 'rb').read()).decode('utf-8')
    venn_image = '<img src="data:image/png;base64,{0}">'.format(data_uri)

    HEADER = """
    <html>
    <head>
    <style>


    .panel{margin-bottom20px;background-color:#fff;border:3px solid;border-radius;border-color:#1f0f66;}
    .panel-body{margin: 30px;}
deetcont = pd.read_csv("edgeR-Genes/genelists/downDeet.csv", header=None)
perm = pd.read_csv("edgeR-Genes/genelists/upPerm.csv", header=None)
permcont = pd.read_csv("edgeR-Genes/genelists/downPerm.csv", header=None)
genes = pd.read_csv("edgeR-Genes/genelists/listofgenes.csv", header=None)


def compare(s1, s2):
    c = len(list(set(s1.iloc[:, 0]) & set(s2.iloc[:, 0])))
    return (c)


diff = compare(deet, genes)
len(deet)
fig, axes = plt.subplots(2)
v1 = venn2_unweighted(subsets=(len(deet), len(deetcont),
                               len(genes) - len(deetcont) - len(deet)),
                      set_labels=("Up Regulated", "Down Regulated"),
                      ax=axes[0])
v2 = venn2_unweighted(subsets=(len(perm), len(permcont),
                               len(genes) - len(permcont) - len(perm)),
                      set_labels=("Up Regulated", "Down Regulated"),
                      ax=axes[1])
axes[0].set_title("Deet")
axes[1].set_title("Perm")
fig.tight_layout()
fig.savefig("figures/EdgeR/DeetAndPermRegulation.png",
            bbox_inches="tight",
            dpi=250)
plt.show()

genes = pd.read_csv("GeneReference/listofgenes.csv", header=None)
body = pd.read_csv("GeneReference/Body.csv")
Exemple #14
0
        sets,
        [
            "{} (Total {})".format(name, length)
            for (name, length) in zip(names, lengths)
        ],
    )

if len(sets) == 2:
    try:
        from matplotlib_venn import venn2_unweighted
    except ImportError:
        sys.exit("Requires the Python library matplotlib_venn")
    venn2_unweighted(
        sets,
        [
            "{} (Total {})".format(name, length)
            for (name, length) in zip(names, lengths)
        ],
    )

# not sure what I am doing here,
# matplotlib_venn does not want to create a single Venn circle
# stick to the old behavior (rpy and Limma) as much as possible
if len(sets) == 1:
    try:
        from matplotlib_venn import venn2
    except ImportError:
        sys.exit("Requires the Python library matplotlib_venn")
    venn2((sets[0], set()), [set_data[0][2], ""])

plt.title(all_label)
Exemple #15
0
sub=[]

for condition_nb in set_to_title.values():
	if condition_nb in venn_diagram:
		sub.append(venn_diagram[condition_nb])
	else:
		sub.append(0)

# print tuple(sub)
# avec les 3 fichiers test, resultat attendu 
#Only in test1	Only in test2	Common test1 test2	Only in test3	Common test1 test3	Common test2 test3	Common test1 test2 test3
#(8, 6, 1, 7, 0, 2, 0)

plt.figure(figsize=(14,10)) # first number : width , second number : height
if len(liste_sets) == 2:
	v = venn2_unweighted(subsets = tuple(sub), set_labels = (liste_sets[0], liste_sets[1]))
elif len(liste_sets) == 3:
	v = venn3_unweighted(subsets = tuple(sub), set_labels = (liste_sets[0], liste_sets[1], liste_sets[2]))
	for text in v.set_labels: # file name size
		text.set_fontsize(12)
	for text in v.subset_labels: # numbers inside circles size
   		 text.set_fontsize(16)

if args.venn_title is not None :
	plt.title(args.venn_title)

# display title
#liste_sets_string = ', '.join(liste_sets)
#print liste_sets_string
#plt.title(liste_sets_string)
Exemple #16
0
genelist_output_location = '../gene_lists'

namingscheme_parts = list(findOutputFilename(WT_filepath))
WT_cutoff_id = namingscheme_parts[0]
WT_zone_id = namingscheme_parts[1]
venn_output_filename = 'fig1_' + WT_zone_id + '_' + WT_cutoff_id + '.png'

WT_expr = set(readGeneList(WT_filepath))
dgt_expr = set(readGeneList(dgt_filepath))

zone_id_key = {'D': 'Differentiation Zone', 'E': 'Elongation Zone',\
'M': 'Meristem'}
zone = zone_id_key[WT_zone_id]

#Generate venn diagram
venn = venn2_unweighted([WT_expr, dgt_expr], set_labels=("WT", "dgt"))
plt.title(str(zone))
plt.savefig(os.path.join(venn_output_location, venn_output_filename))

#Create gene lists for each section of venn diagram
intersections_dict = {'WT_dgt': WT_expr.intersection(dgt_expr),\
'WT_only': WT_expr.difference(dgt_expr), 'dgt_only': dgt_expr.difference(WT_expr)}

for inter_key in intersections_dict:
    filename = inter_key + '_' + WT_zone_id + '_' + WT_cutoff_id + '.txt'
    filepath = os.path.join(genelist_output_location, filename)
    IF = open(filepath, 'w')
    for member in intersections_dict[inter_key]:
        IF.write(member + '\n')
    IF.close()
                    help='Each one representing one set. [2 or 3 files].')
parser.add_argument("-o",
                    "--output",
                    required=True,
                    help='Output basename to write the figure.')
args = parser.parse_args()

if len(args.inputfiles) < 2 or len(args.inputfiles) > 3:
    print("Please provide at least 2 sets to compare.")
    exit(1)

list_sets = []
labels = []
for file in args.inputfiles:
    s = set()
    l = os.path.basename(file).split(".")[0]
    labels.append(l)
    with open(file, 'r') as infile:
        for line in infile:
            s.add(line.rstrip())
    infile.close()
    list_sets.append(s)

if len(list_sets) == 2:
    plt.figure(figsize=(7, 4))
    venn2_unweighted(list_sets, labels)
elif len(list_sets) == 3:
    plt.figure(figsize=(9, 5))
    venn3_unweighted(list_sets, labels)
plt.savefig(args.output + ".png")
Exemple #18
0
sub=[]

for condition_nb in set_to_title.values():
	if condition_nb in venn_diagram:
		sub.append(venn_diagram[condition_nb])
	else:
		sub.append(0)

# print tuple(sub)
# avec les 3 fichiers test, resultat attendu 
#Only in test1	Only in test2	Common test1 test2	Only in test3	Common test1 test3	Common test2 test3	Common test1 test2 test3
#(8, 6, 1, 7, 0, 2, 0)

plt.figure(figsize=(14,10)) # first number : width , second number : height
if len(liste_sets) == 2:
	v = venn2_unweighted(subsets = tuple(sub), set_labels = (liste_sets[0], liste_sets[1]))
elif len(liste_sets) == 3:
	v = venn3_unweighted(subsets = tuple(sub), set_labels = (liste_sets[0], liste_sets[1], liste_sets[2]))
	for text in v.set_labels: # file name size
		text.set_fontsize(12)
	for text in v.subset_labels: # numbers inside circles size
   		 text.set_fontsize(16)

if args.venn_title is not None :
	plt.title(args.venn_title)

# display title
#liste_sets_string = ', '.join(liste_sets)
#print liste_sets_string
#plt.title(liste_sets_string)
Exemple #19
0
deetcont = pd.read_csv("Genes/deetcont.csv", header=None)
perm = pd.read_csv("Genes/perm.csv", header=None)
permcont = pd.read_csv("Genes/permcont.csv", header=None)
genes = pd.read_csv("Genes/listofgenes.csv", header=None)


def compare(s1, s2):
    c = len(list(set(s1.iloc[:, 0]) & set(s2.iloc[:, 0])))
    return (c)


diff = compare(deet, genes)
len(deet)
fig, axes = plt.subplots(2)
v1 = venn2_unweighted(subsets=(len(deet), len(deetcont),
                               len(genes) - len(deetcont) - len(deet)),
                      set_labels=("Up Regulated", "Down Regulated"),
                      ax=axes[0])
v2 = venn2_unweighted(subsets=(len(perm), len(permcont),
                               len(genes) - len(permcont) - len(perm)),
                      set_labels=("Up Regulated", "Down Regulated"),
                      ax=axes[1])
axes[0].set_title("Deet")
axes[1].set_title("Perm")
fig.tight_layout()
fig.savefig("VennDiagrams/RevisedDiagrams/DeetnPerm.png",
            bbox_inxhes="tight",
            dpi=250)
plt.show()

#%% fig3
Exemple #20
0
    def feature_overlap(self,
                        exclude=None,
                        weighted=True,
                        label_fontsize=10,
                        count_fontsize=10,
                        save_path=None):
        """
        Plots a venn diagram illustrating the overlap in features between the datasets.

        :param save_path: Path to which venn diagram shall be saved.
        :return:
        """
        feat_set = self.get_feature_sets(exclude)

        # Plotting when two datasets are compared
        if len(self) == 2:
            # set variables needed to assign new color scheme
            colors = ["blue", "green"]
            ids = ["A", "B"]
            plt.figure(figsize=[3, 4])

            if weighted:
                # create circles
                v = mv.venn2(feat_set, set_labels=self.df_names)
                # create lines around circles
                circles = mv.venn2_circles(feat_set)
            else:
                # create circles
                v = mv.venn2_unweighted(feat_set, set_labels=self.df_names)
                # create lines around circles
                circles = mv.venn2_circles(subsets=(1, 1, 1))

        # Plotting when three datasets are compared
        elif len(self) == 3:
            # set variables needed to assign new color scheme
            colors = ["blue", "green", "purple"]
            ids = ["A", "B", "001"]
            # create cirlces
            v = mv.venn3_unweighted(feat_set, set_labels=self.df_names)
            # create lines around circles
            circles = mv.venn3_circles(subsets=(1, 1, 1, 1, 1, 1, 1))

        else:
            raise ValueError(
                "Too many datasets in DataCollection. Venn diagram only supported for up to 3 datasets."
            )

        # set colors for the circles in venn diagram
        for df_name, color in zip(ids, colors):
            v.get_patch_by_id(df_name).set_color(color)

        # reduce line width around circles
        for c in circles:
            c.set_lw(1.0)

        # change font size of dataset labels
        for text in v.set_labels:
            text.set_fontsize(label_fontsize)
        # change font size of displayed counts
        for text in v.subset_labels:
            text.set_fontsize(count_fontsize)

        plt.title("Feature Overlap")

        if save_path:
            plt.savefig(save_path, dpi=300)
        else:
            plt.show()
figure_pida_depErangE, axes_pida_depErangE = plt.subplots(1, 2)

subsets_pida_depErangE_p = (nooverlap_pida_depErangeE_p0,
                            nooverlap_pida_depErangeE_p1,
                            overlap_pida_depErangeE_p01)
areas_pida_depErangE_p = [1, 1, 1]

for i in range(0, 3):
    if subsets_pida_depErangE_p[i] < 0.03 * true_protons:
        areas_pida_depErangE_p[i] = 0.03 * true_protons
    else:
        areas_pida_depErangE_p[i] = subsets_pida_depErangE_p[i]

venn2_unweighted(subsets_pida_depErangE_p,
                 set_labels=('PIDA', r'$E_{DEP} - E_{RANGE}$'),
                 ax=axes_pida_depErangE[0],
                 subset_areas=areas_pida_depErangE_p)

subsets_pida_depErangE_mu = (nooverlap_pida_depErangeE_mu0,
                             nooverlap_pida_depErangeE_mu1,
                             overlap_pida_depErangeE_mu01)
areas_pida_depErangE_mu = [1, 1, 1]

for i in range(0, 3):
    if subsets_pida_depErangE_mu[i] < 0.03 * true_muons:
        areas_pida_depErangE_mu[i] = 0.03 * true_muons
    else:
        areas_pida_depErangE_mu[i] = subsets_pida_depErangE_mu[i]

venn2_unweighted(subsets_pida_depErangE_mu,
                 set_labels=('PIDA', r'$E_{DEP} - E_{RANGE}$'),
Exemple #22
0
# creating dataframe from dictionary
dataframe = pd.DataFrame.from_dict(dic_df).drop_duplicates(subset='name')
pd.set_option('display.width', None)
print(dataframe)
valence_vals = dataframe['valence'].tolist()
less_count, more_count, middle_count = 0, 0, 0
for num in valence_vals:

    if num >= 0 and num < 0.5:
        less_count += 1
    elif num >= 0.5 and num < 0.6:
        middle_count += 1
    else:
        more_count += 1

venn2_unweighted(subsets=(less_count, more_count, middle_count),
                 set_labels=('Low Spirit', 'High Spirit'),
                 set_colors=('navy', 'lime'),
                 alpha=0.5)
dataframe.plot.line(x='time', y=['danceability', 'energy', 'valence'])
plt.xticks(rotation=90)
plt.show()

# if the graph is erratic, thay maybe because of streaming of a particular artist/ album,
# since an album contains a mixture of sad and energetic songs. so we take a look
# at the number of unique artists and albums in the history :

print(dataframe['album'].value_counts(ascending=False))
print(dataframe['artist'].value_counts(ascending=False))