def get_node_overlap_image(): import pyupset as pyu import matplotlib.pyplot as plt import pandas as pd from six import BytesIO network_ids = request.args.get('networks') if not network_ids: return flask.abort(500) networks = [ api.get_network(int(network_id.strip())) for network_id in network_ids.split(',') ] data_dict = { network.name.replace('_', ' '): pd.DataFrame(network.nodes()) for network in networks } pyu.plot(data_dict) buf = BytesIO() plt.savefig(buf, format='png') buf.seek(0) output = make_response(buf.getvalue()) output.headers["Content-type"] = "image/png" return output
def upset(sets=[[]],resultdir='../../results/validation/EGFR'): ''' sets= list of lists ''' #with open('./test_data_dict.pckl', 'rb') as f: # data_dict = pickle.load(f) #pyu.plot(data_dict) for fl,test in sets:#[['EGFRSetsp.csv','MeTeOR'],['EGFRSetsp2.csv','MeTeORSimple'],['EGFRSetsPredp.csv','MeTeORPred'],['EGFRSetsPredp2.csv','MeTeORPredSimple']]: df=pd.read_csv('{}/{}'.format(resultdir,fl),sep='\t') mydata={} title=fl colNames=list(df.columns[2:]) sNone=[x for x in colNames if 'None' in x][0] for col in df.columns[2:]: #print(col) mydata[col]=pd.DataFrame(df['Genes'][df[col]==1]) for arg in ['size','degree']: pyu.plot(mydata, colNames,title,resultdir,sort_by=arg) #pyu.plot(mydata, sort_by=arg, query=[('IPMS')]) pl.savefig('{}/UpsetOverlap_None_{}_{}.pdf'.format(resultdir,arg,test)) mydata.pop(sNone,None) colNames.pop(colNames.index(sNone)) for arg in ['size','degree']: pyu.plot(mydata, colNames,title,resultdir,sort_by=arg) pl.savefig('{}/UpsetOverlap_{}_{}.pdf'.format(resultdir,arg,test))
def plot_element_by_source(self, element, filter_func=lambda x: bool(x), min_bound=1, max_bound=1000000000): element_by_source = self.get_element_by_source(element) df_dict = dict() column_name = ['attribute'] for source in element_by_source: filtered_elements = list(filter(filter_func, element_by_source[source])) df_dict[source] = pd.DataFrame(filtered_elements, columns=column_name) x = pyu.plot(df_dict, unique_keys=column_name, inters_size_bounds=(min_bound, max_bound)) x['input_data'] = element_by_source return x
# Add the information of the dataset to the dictionary data_dict[row["TypeTerm"]] = pd.DataFrame({'Property': subjTermList}) print(row["TypeTerm"]) print(subjTermList) print() tock = datetime.now() diff = tock - tick # the result is a datetime.timedelta object print(str(diff.total_seconds()) + " seconds") print("Plot") tick = datetime.now() # Create the UpSet Plot using the given dictionary pyu.plot(data_dict, unique_keys=['Property'], sort_by='degree', inters_size_bounds=(10, 20)) # Plot the UpSet Plot plt.show(pyu) #current_figure = plt.gcf() #current_figure.savefig("test.png") tock = datetime.now() diff = tock - tick # the result is a datetime.timedelta object print(str(diff.total_seconds()) + " seconds") """ PLOT THE VOCABULARY AS A GRAPH import rdflib from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph #from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph import networkx as nx
def main(): parser = argparse.ArgumentParser("Script to create the Venn Plots") parser.add_argument("-t", "--type", choices=["missing", "full", "fusion"], required=True) parser.add_argument("-c", "--configuration", required=True, type=argparse.FileType("r")) parser.add_argument("-em", "--exclude-mikado", dest="exclude", action="store_true", default=False, help="Flag. If set, Mikado results will be excluded") parser.add_argument("-o", "--out", type=str, help="Output file", required=True) parser.add_argument("--format", choices=["svg", "tiff", "png"], default=None) # parser.add_argument("-a", "--aligner", choices=["STAR", "TopHat"], # required=True) parser.add_argument( "--transcripts", action="store_true", default=False, help="Flag. If set, Venn plotted against transcripts, not genes.") parser.add_argument("--title", default="Venn Diagram") args = parser.parse_args() options = parse_configuration(args.configuration, exclude_mikado=args.exclude) sets = OrderedDict() total = Counter() first = True # Update the sets for each gene and label if args.transcripts is True: colname = "ref_id" ccode = "ccode" tag = "transcripts" else: colname = "ref_gene" ccode = "best_ccode" tag = "genes" for aligner in ["STAR", "TopHat"]: for method in options["methods"]: refmap = "{}.refmap".format( re.sub(".stats$", "", options["methods"][method][aligner][0])) with open(refmap) as ref: tsv = csv.DictReader(ref, delimiter="\t") meth = "{} ({})".format(method, aligner) sets[meth] = set() for row in tsv: if first: total.update([row[colname]]) if row[ccode].lower() in ("na", "x", "p", "i", "ri") and args.type == "missing": sets[meth].add(row[colname]) elif row[ccode] in ("=", "_") and args.type == "full": sets[meth].add(row[colname]) elif row[ccode][0] == "f" and args.type == "fusion": sets[meth].add(row[colname]) else: continue if first: for gid in total: total[gid] = 0 first = False for aligner in ["STAR", "TopHat"]: for method in sorted(options["methods"].keys()): set_name = "{} ({})".format(method, aligner) # print(set_name) sets[set_name] = pd.DataFrame(list(sets[set_name]), columns=["TID"]) pyu.plot( sets, # sort_by="degree", inters_size_bounds=(100, 20000), ) if args.format is None: args.format = "svg" plt.savefig(args.out, format=args.format)
#! /usr/bin/env python import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt import pyupset as pyu import matplotlib as mpl from pickle import load import pandas as pd import glob genus_dict = {} for file in glob.glob('*csv'): df = pd.read_csv(file, delimiter=",") x = file.split('.')[0] genus_dict[x] = df print(x) genus_dict['reads'] = genus_dict.pop('ERR1719497_paired_gather_all') genus_dict['assembly'] = genus_dict.pop('tara_f135_full_megahit') pplot = pyu.plot(genus_dict, unique_keys=['name']) pplot['figure'].savefig('plot-gather.png')
import matplotlib.pyplot as plt matplotlib.rcParams['figure.figsize'] = (20.0, 5.0) def filter_model_name(model_name): model_name = model_name.replace('SingleClassification', 'STNN-C') model_name = model_name.replace('SingleRegression', 'STNN-R') model_name = model_name.replace('MultiClassification', 'MTNN-C') model_name = model_name.replace('RandomForest', 'RF') model_name = model_name.replace('ConsensusDocking', 'CD') model_name = model_name.replace('Docking', 'Dock') return model_name selected_names = ['Baseline', 'ConsensusDocking_efr1_opt', 'IRV_d', 'RandomForest_h', 'SingleClassification_a', 'SingleRegression_b', 'MultiClassification_b', 'LSTM_b'] plot_dict = {} for model_name in selected_names: positive_rank = rank_df[rank_df['label']>0][model_name] positive_rank = positive_rank.where(positive_rank<250) positive_rank = positive_rank.dropna() filtered_index = positive_rank.keys() filtered_df = pd.DataFrame(data=filtered_index, columns=['Items']) plot_dict[filter_model_name(model_name)] = filtered_df matplotlib.rcParams.update({'font.size': 15}) fig = pyu.plot(plot_dict, inters_size_bounds=(1, 50)) fig[0]['figure'].savefig('./plottings/prospective_screening_venn/venn_diagram', bbox_inches='tight')
'../out/Generic-production/feature_selection/boruta/Confirmed.boruta_features.csv', header=None) generic_confirmed_df.columns = ['Features'] generic_confirmed_df = homogenise_tissue_specific_features( generic_confirmed_df, reset_index=False) #print(generic_confirmed_df.head()) confirmed_features_dict = {} confirmed_features_dict['CKD'] = ckd_confirmed_df confirmed_features_dict['Epilepsy'] = epilepsy_confirmed_df confirmed_features_dict['ALS'] = als_confirmed_df confirmed_features_dict['Generic'] = generic_confirmed_df min_inters_size = 1 pyu.plot(confirmed_features_dict, sort_by='degree', inters_size_bounds=(min_inters_size, np.inf)) cur_fig = matplotlib.pyplot.gcf() cur_fig.savefig('Confirmed_features_intersection_between_classifiers.pdf', bbox_inches='tight') # === Print intersection / union sets === # Degree 4 intersection_disease_features = list( set(ckd_confirmed_df['Features'].tolist()) & set(epilepsy_confirmed_df['Features'].tolist()) & set(als_confirmed_df['Features'].tolist())) #print('intersection_disease_features:', intersection_disease_features) disease_generic_intersection = list( set(intersection_disease_features)
def upset_plots_gen(self): # Takes about 3.5 hours to process in total. self.spikes_dataframe_gen(n_sample_groups=300) ups = self.pyupset_format() plt.rc('font', size=12) pyu.plot(ups, unique_keys=['SpaceGroup', 'TimeGroup'], inters_degree_bounds=(2, 2), sort_by='size') plt.title('Pairwise Spike Coincidences', { 'fontsize': 18, 'fontweight': 'bold' }) plt.savefig('/Users/mskirk/Documents/Conferences/AGU 2017/2-way.png') plt.rc('font', size=12) pyu.plot(ups, unique_keys=['SpaceGroup', 'TimeGroup'], inters_degree_bounds=(2, 2), sort_by='size', query=[('304', '94'), ('211', '193'), ('335', '131')]) plt.title('Pairwise Spike Coincidences', { 'fontsize': 18, 'fontweight': 'bold' }) plt.savefig('/Users/mskirk/Documents/Conferences/AGU 2017/2-way_c.png') plt.rc('font', size=12) pyu.plot(ups, unique_keys=['SpaceGroup', 'TimeGroup'], inters_degree_bounds=(3, 3), sort_by='size') plt.title('3-Way Spike Coincidences', { 'fontsize': 18, 'fontweight': 'bold' }) plt.savefig('/Users/mskirk/Documents/Conferences/AGU 2017/3-way.png') plt.rc('font', size=12) pyu.plot(ups, unique_keys=['SpaceGroup', 'TimeGroup'], inters_degree_bounds=(4, 4), sort_by='size') plt.title('4-way Spike Coincidences', { 'fontsize': 18, 'fontweight': 'bold' }) plt.savefig('/Users/mskirk/Documents/Conferences/AGU 2017/4-way.png') plt.rc('font', size=12) pyu.plot(ups, unique_keys=['SpaceGroup', 'TimeGroup'], inters_degree_bounds=(5, 7), sort_by='degree', query=[('304', '94', '211', '193', '335', '131', '171')]) plt.title('5, 6, and 7-way Spike Coincidences', { 'fontsize': 18, 'fontweight': 'bold' }) plt.savefig('/Users/mskirk/Documents/Conferences/AGU 2017/567-way.png')
c_str = '' a_str = '' for a_key, a_value in tp.items(): a_str += a_key + "," for b_key, b_value in tp.items(): print(a_key + " tp of " + b_key + " tp: " + str(len(a_value.intersection(b_value)) / float(len(b_value)))) c_str += "{:0.2f}".format( round(len(a_value.intersection(b_value)) / float(len(b_value)), 2)) + "," #c_str += str(len(a_value.intersection(b_value))) + "," c_str += '\n' print(a_str) print(c_str) ''' #store true positives in dict true_positives_dict['ED2'] = generate_truepositives(ground_truth, ed2) true_positives_dict['NADEEF'] = generate_truepositives(ground_truth, nadeef) true_positives_dict['KATARA'] = generate_truepositives(ground_truth, katara) true_positives_dict['Gaussian'] = generate_truepositives(ground_truth, gaussian) true_positives_dict['Histogram'] = generate_truepositives(ground_truth, histogram) true_positives_dict['Mixture'] = generate_truepositives(ground_truth, mixture) true_positives_dict['ActiveClean'] = generate_truepositives(ground_truth, active_clean) true_positives_dict['BoostClean'] = generate_truepositives(ground_truth, boost_clean) pyu.plot(true_positives_dict, sort_by='degree', inters_size_bounds=(2500, np.inf))
df3 = df2[df2['Count']>2] df3.to_excel(writer,"More than 2") df3 = df2[df2['Count']>3] df3.to_excel(writer,"More than 3") writer.save() modes = ["APCI","APPI","ESI","LDI"] ######## Negative ########## df_dict_neg = {'APPI':df2[(df2['Polarity']=='Neg') & (df2['Mode']=='APPI')], 'APCI':df2[(df2['Polarity']=='Neg') & (df2['Mode']=='APCI')], 'ESI':df2[(df2['Polarity']=='Neg') & (df2['Mode']=='ESI')], 'LDI':df2[(df2['Polarity']=='Neg') & (df2['Mode']=='LDI')]} upset = pyu.plot(df_dict_neg,unique_keys=['Formula'],sort_by='degree') plt.savefig(outputdata+"UpSetNeg.png",dpi=300) intsets = upset['intersection_sets'] intsetkeys = [] for y in intsets: intsetkeys.append(y) def plotcommontoall(): df_common = intsets[intsetkeys[-1]] df_common = df_common.sort_values('Mass') glocmap = cm.viridis_r sns.set_style("white") sns.set_context("paper",font_scale=2)