def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues, filename='viz\\confusion_matrix.png'): plt.figure() plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') plt.savefig(filename)
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues): if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print("Confusion Matrix, without normalization") print(cm) #imshow displays data as an image on a 2d master plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() #returns evenly spaced values with a given inteerval tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment='center', color='white' if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True Label') plt.xlabel('Predicted Label')
def plotConfusionMatrix(lbllist, predlist, classes, type): confusionMatrix = confusion_matrix(lbllist, predlist) # print(confusionMatrix) plt.imshow(confusionMatrix, interpolation="nearest", cmap=plt.cm.Blues) if type == 'train': plt.title("Confusion matrix training") elif type == 'test': plt.title("Confusion matrix testing") plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) fmt = "d" thresh = confusionMatrix.max() / 2. for i, j in itertools.product(range(confusionMatrix.shape[0]), range(confusionMatrix.shape[1])): plt.text(j, i, format(confusionMatrix[i, j], fmt), horizontalalignment="center", color="white" if confusionMatrix[i, j] > thresh else "black") plt.tight_layout() plt.ylabel("True label") plt.xlabel("Predicted label") # plt.show() if type == 'train': plt.savefig(LOG_PATH + 'Confusion matrix training.png') elif type == 'test': plt.savefig(LOG_PATH + 'Confusion matrix testing.png') plt.close()
def plot_confusion_matrix(cm, classes, title='混淆矩阵', cmap=plt.cm.Greens): # imshow() 表示绘制并显示二维图 有18个参数 # 参数1 X 混淆矩阵中显示的数值 二维数组 # 参数2 cmap 颜色 plt.cm.Blues表示蓝色 plt.cm.Reds表示红色 plt.cm.Greens表示绿色 # 参数5 interpolation 插值法 一般有如下值 # nearest 最近邻插值法 # bilinear 双线性插值法 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False plt.imshow(cm, cmap=cmap, interpolation="nearest") plt.title(title) # 标题 plt.colorbar() # 显示颜色的进度条 tick_marks = np.arange(2) # [0 1] plt.xticks(tick_marks, classes) # 对x轴上分类进行标记 plt.yticks(tick_marks, classes) # 对y轴上分类进行标记 thresh = np.mean(cm) for i in range(2): for j in range(2): plt.text(i, j, cm[j][i], horizontalalignment='center', color='white' if cm[i][j] >= thresh else 'black') plt.xlabel('预测值') plt.ylabel('真实值')
def plot_confusion_matrix(confusion_matrix, class_labels, normalize=False, title='Confusion Matrix', cmap=plt.cm.Blues): """ Code courtesy of Abinav Sagar: https://towardsdatascience.com/convolutional-neural-network-for-breast-cancer-classification-52f1213dcc9 """ if normalize: confusion_matrix = confusion_matrix.astype( 'float') / confusion_matrix.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(confusion_matrix) plt.imshow(confusion_matrix, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(class_labels)) plt.xticks(tick_marks, class_labels, rotation=55) plt.yticks(tick_marks, class_labels) fmt = '.2f' if normalize else 'd' thresh = confusion_matrix.max() / 2. for i, j in itertools.product(range(confusion_matrix.shape[0]), range(confusion_matrix.shape[1])): plt.text(j, i, format(confusion_matrix[i, j], fmt), horizontalalignment="center", color="white" if confusion_matrix[i, j] > thresh else "black") plt.ylabel('True label') plt.xlabel('Predicted label') plt.tight_layout()
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label')
def silhouette(): if not os.path.exists("Stardust_results"): print( "The directory structure Stardust_results doest not exist. Please run run_stardust first" ) sys.exit() if not os.path.exists("Stardust_results/analysis"): os.mkdir("Stardust_results/analysis") output_path = "Stardust_results/analysis/" from sklearn.metrics import silhouette_samples, silhouette_score data_df = pd.read_csv( 'Stardust_results/visualization_output/3_pass/data.csv', delimiter=",", index_col=False) data_df.set_index('data', inplace=True) silhouette_avg = silhouette_score(data_df[['x', 'y']], data_df['cluster']) sample_silhouette_values = silhouette_samples(data_df[['x', 'y']], data_df['cluster']) print("silhouette score ", silhouette_avg) y_lower = 10 import matplotlib.cm as cm fig = plt.figure(figsize=(4, 7)) n_clusters = len(list(data_df['cluster'].unique())) for i in range(n_clusters): # Aggregate the silhouette scores for samples belonging to # cluster i, and sort them ith_cluster_silhouette_values = \ sample_silhouette_values[data_df['cluster'] == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / n_clusters) plt.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) # Label the silhouette plots with their cluster numbers at the middle plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) # Compute the new y_lower for next plot y_lower = y_upper + 10 # 10 for the 0 samples plt.title("The silhouette plot for the various clusters.") plt.xlabel("silhouette coefficient", fontsize=20) plt.ylabel("Cluster label", fontsize=20) plt.axvline(x=silhouette_avg, color="red", linestyle="--") plt.yticks([]) # Clear the yaxis labels / ticks plt.xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1]) sns.despine(bottom=False, left=False) fig.savefig(output_path + "/silhouette.pdf", bbox_inches='tight', dpi=600) fig.savefig(output_path + "/silhouette.png", bbox_inches='tight', dpi=600)
def add_labels(rects): for rect in rects: height = rect.get_height() plt.text(rect.get_x() + rect.get_width() / 2, height, height, ha='center', va='bottom') rect.set_edgecolor('white')
def plottrace(self, point): # 使用matplotlib之pyplot绘制船舶轨迹 # point = 38 def initial(ax): ax.axis("equal") #设置图像显示的时候XY轴比例 ax.set_xlabel('Horizontal Position') ax.set_ylabel('Vertical Position') ax.set_title('Vessel trajectory') plt.grid(True) #添加网格 return ax es_time = np.zeros([point]) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax = initial(ax) # test ax2 = fig.add_subplot(1, 1, 1) ax2 = initial(ax2) plt.ion() #interactive mode on 动态绘制 # IniObsX=0000 # IniObsY=4000 # IniObsAngle=135 # IniObsSpeed=10*math.sqrt(2) #米/秒 # print('开始仿真') obsX = [] obsX2 = [] # obsY = [4000,] obsY = [] obsY2 = [] for t in range(point): # t0 = time.time() #障碍物船只轨迹 # obsX.append(IniObsX+IniObsSpeed*math.sin(IniObsAngle/180*math.pi)*t) obsX.append(sim_res.SHIP1POS[t][0]) obsX2.append(sim_res.SHIP2POS[t][0]) # obsY.append(IniObsY+IniObsSpeed*math.cos(IniObsAngle/180*math.pi)*t) obsY.append(sim_res.SHIP1POS[t][1]) obsY2.append(sim_res.SHIP2POS[t][1]) plt.cla() ax = initial(ax) ax.plot(obsX, obsY, '-g', marker='*') #散点图 # test ax2 = initial(ax2) ax2.plot(obsX2, obsY2, '-r', marker='o') risk_value_text = 'Risk value: ' + str( sim_res.RISKVALUE[t]) plt.text(0, 7, risk_value_text) plt.pause(0.5) # es_time[t] = 1000*(time.time() - t0) plt.pause(0) # return es_time pass
def vertical_mean_line(x, **kwargs): plt.axvline(x.mean(), linestyle="--", color=kwargs.get("color", "r")) txkw = dict(size=15, color=kwargs.get("color", "r")) label_x_pos_adjustment = 0.08 # this needs customization based on your data label_y_pos_adjustment = 5 # this needs customization based on your data if x.mean() < 6: # this needs customization based on your data tx = "mean: {:.2f}\n(std: {:.2f})".format(x.mean(), x.std()) plt.text(x.mean() + label_x_pos_adjustment, label_y_pos_adjustment, tx, **txkw) else: tx = "mean: {:.2f}\n (std: {:.2f})".format(x.mean(), x.std()) plt.text(x.mean() - 1.4, label_y_pos_adjustment, tx, **txkw)
def get_regression_report(y_true=None, prediction=None, show_r2_plot=True, save_plot=False): ''' Generates performance report for a regression problem. Parameters: ------------------ y_true: Array, series, list. The truth/ground value from the train data set. prediction: Array, series, list. The predicted value by a trained model. show_r2_plot: Bool, default True. Show the r-squared curve. save_plot: Bool, default True. Save the plot to the current working directory. ''' mae = mean_absolute_error(y_true, prediction) mse = mean_squared_error(y_true, prediction) msle = mean_squared_log_error(y_true, prediction) r2 = r2_score(y_true, prediction) print("Mean Absolute Error: ", round(mae, 5)) print("Mean Squared Error: ", round(mse, 5)) print("Mean Squared Log Error: ", round(msle, 5)) print("R-squared Error: ", round(r2, 5)) print("*" * 100) if show_r2_plot: plt.scatter(y_true, prediction) plt.xlabel('Truth values') plt.ylabel('Predicted values') plt.plot(np.unique(y_true), np.poly1d(np.polyfit(y_true, y_true, 1))(np.unique(y_true))) plt.text(0.7, 0.2, 'R-squared = %0.2f' % r2) plt.show() if save_plot: plt.savefig("r2_plot.png")
def plotMultipleNumpylist(plotDict, yLabel, xLable): # this function plots multiple lines using values from diffrent numpy list _max = [] for key, item in plotDict.items(): plt.plot(item, linewidth = .7) _max.append(max(item)) plt.ylabel(yLabel) plt.xlabel(xLable) _text_loc_y = max(_max) plt.axvline(120, ymin=0, ymax =100, linestyle = 'dashed', color = 'maroon') plt.text(120, _text_loc_y, " GRAMs Launching", {'color': 'maroon', 'fontsize': 10}) plt.legend(plotDict.keys(), loc='upper left') plt.show() return
def RysujGeom(x_range, WEZLY, ELEMENTY, types): plt.plot(x_range[0], 0, '*') #x_range[0] plt.plot(x_range[1], 0, '*') #x_range[1] plt.plot(x_range, [0, 0]) plt.plot(WEZLY, np.zeros(len(WEZLY)), '*') plt.text(x_range[0] - 0.15, 0, types[0]) plt.text(x_range[1] + 0.15, 0, types[1]) for i in range(0, len(WEZLY)): plt.text(WEZLY[i] - 0.03, 0.01, str(WEZLY[i])) plt.text(WEZLY[i] - 0.05, -0.05, str(i + 1)) for i in range(0, len(WEZLY) - 1): print((WEZLY[i] - WEZLY[i + 1]) / 2) plt.text(WEZLY[i] / 2 + WEZLY[i + 1] / 2, 0.05, str(i + 1)) plt.xlim([x_range[0] - 0.3, x_range[1] + 0.3]) plt.ylim([-0.2, 0.42])
def plot_confusion_matrix(cm, classes, title='Confusion Matrix', cmap=plt.cm.Blues): # np.seterr(divide='ignore',invalid='ignore') cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] plt.figure(figsize=(10, 10)) plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) fmt = '.2f' thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") pass plt.ylabel('True Label') plt.xlabel('Predicted Label') pass
def display(self, data, candidates, fname, display): finallist=[] for c in candidates: finallist.append(c[0]) #print finallist part1 = finallist[:len(finallist)/2] part2 = finallist[len(finallist)/2:] meandiff=int(np.sqrt(np.power(np.mean(part2),2)-np.power(np.mean(part1),2))) rangeA = max(part1)-min(part1) rangeB = max(part2)-min(part2) span = int((rangeA+rangeB)/2) dspan = int(meandiff/span) theta = float(meandiff/(rangeA+rangeB)) oneortwo="" if dspan >3 and meandiff > 20 or meandiff>36: oneortwo = "Two distributions \n\n MD: %d \n Span: %d \n Dspan: %d \n theta: %d" % (meandiff, span, dspan, theta) else: oneortwo = "One distribution \n\n MD: %d \n Span: %d \n Dspan: %d \n theta: %d" % (meandiff, span, dspan, theta) cans = np.array(candidates) plt.plot(cans[:,0],cans[:,1],'ro') plt.axhline(max(cans[:,1])/4, color='r') plt.axhline(max(cans[:,1]/2), color='r') plt.axhline(int(max(cans[:,1]))*0.75, color='r') red_patch = mpatches.Patch(color='red', label='75%, 50% and 25% \nof maximum frequency') plt.legend(handles=[red_patch]) plt.ylabel('Frequency of occurence') plt.xlabel('separate items') plt.title('Frequency distribution estimation graph: %s' %(fname)) plt.text(max(data)*1.1, max(cans[:,1])*0.62, oneortwo, fontsize = 11, color = 'r') plt.hist(data,range(int(min(data)),int(max(data)),1)) ofile = fname[0:-3]+"png" print ("Writing outfile: %s") % (ofile) plt.savefig(ofile, bbox_inches='tight') if display == True: plt.show() return;
def Plant(RTE_NBR, TOTAL_PKG_STOP_LIST, TOTAL_STOP, Current_MGR, Current_DATE): print("=======================================") print("测试绘制1") CM1 = str(Current_MGR) CD1 = str(Current_DATE) CM = str(Current_MGR).replace(" ", "") + "_" CD = str(Current_DATE).replace(" ", "").replace(":", "_").replace( "/", "_") + "_" fig, ax = plt.subplots() # 解决自动顺序排列 plt.xticks(arange(len(RTE_NBR)), RTE_NBR) # 注意绘制顺序 ax.bar(arange(len(RTE_NBR)), TOTAL_PKG_STOP_LIST, color="red") ax.bar(arange(len(RTE_NBR)), TOTAL_STOP, color="green") # plt.text(4, 1, str(list2[0]), ha='center', wrap=True) ax.set(xlabel="RTE NBR", title="(MGR:" + CM1 + ")" + "Fedex" + CD1 + "Working_Detail") # plt.text(5, 10, str(list2[0]), fontsize=18, style='oblique', ha='center',va='top',wrap=True) # 用于解决柱状图的间距太窄的问题 text = ax.text(0.02, 0.90, "") # 设置文字 text.set_position((0.9, .9)) # 不能超过1,和上面的设置是一样的 for x, y, z, v in zip(arange(len(RTE_NBR)), TOTAL_PKG_STOP_LIST, TOTAL_PKG, TOTAL_STOP): plt.text(x - 0.5, y, '%d/%d' % (v, z)) # for x,y in zip(arange(len(RTE_NBR)),) ax.legend(["TOTAL PKG", "TOTAL STOP"]) # 设置图例 # plt.show() fig.savefig("MGR_" + CM + CD + "Working_Detail.png") pass
def make_bar_graph(data=housing): ''' :param data: :return: ''' data = data mp.figure('Bar', facecolor='lightgray') mp.title('Bar', fontsize=20) gs = mg.GridSpec(3, 4) i, j = 0, 0 for column in housing.columns: # 创建子图 mp.subplot(gs[i, j]) # 在图形内部添加文字,设置位置,内容,对齐方式,字号,颜色,透明度 mp.text(0.5, 0.5, str(i) + '+' + str(j), ha='center', va='center', size=35, color='red', alpha=0.5) # 删除边界刻度 mp.xticks(()) mp.yticks(()) # 绘制柱状图 single_data = housing[column] min_data, max_data = min(single_data), max(single_data) mp.xlim(min(min_data, max_data)) step = (max_data - min_data) / 10 for x in range(min_data, max_data, step): sum_num = sum(x <= single_data <= x + step) y.append(sum_num) x = np.range(len(y)) mp.bar(x, y, 0.4, color='dodgerblue', label=column, alpha=0.75) # 调整子图位置 j += 1 j = j % 4 i = i + j // 4 # 改变布局形式,改为紧凑布局 mp.tight_layout() pass
def qqPlot(theoreticalQ, sampleQ, name): slope, intercept, r_value, p_value, std_err = 0 # regr(theoreticalQ, sampleQ) plt.figure() plt.scatter(theoreticalQ, sampleQ, s=0.8, label=name, c='blue') y = [x * slope + intercept for x in theoreticalQ] plt.plot(theoreticalQ, y, 'r', label='Trend line') plt.text(0, max(sampleQ) * 0.6, '\n\n$R^2$ = ' + str('%.6f' % r_value**2)) if intercept > 0: plt.text( 0, max(sampleQ) * 0.55, 'y = ' + str('%.6f' % slope) + 'x + ' + str('%.6f' % intercept)) else: plt.text(0, max(sampleQ) * 0.55, 'y = ' + str('%.6f' % slope) + 'x ' + str('%.6f' % intercept)) plt.xlabel('Theoretical Quantile') plt.ylabel('Sample Quantile') plt.title('QQ plot ' + name) plt.grid(True) plt.legend()
hbars = ax3.barh(h['bins'][wlow], h['area'][wlow] / N.max(h['area']), left=i - 0.2, color=(0.85, 0.85, 0.85, 1), height=30, edgecolor=(0.85, 0.85, 0.85, 1), zorder=0, linewidth=0) #PLOTTING ASTERISKS BY GLACIERS WITH RECORDS SHORTER THAN 5 YEARS ax2.plot(N.array([12, 13, 47, 48]) + 0.2, N.zeros(4) - 3, 'k*') ax3.set_ylim(-4000, 4000) ax3.yaxis plt.text(51, 2600, "Elevation (m)", rotation=-90) for i, m in enumerate(tidemedian): bar = ax3.plot([i - 0.2, i + 0.8], [m, m], '-k', zorder=0.4) print fig2.axes print fig2.axes #ax2.set_frame_on(False) #ax3.set_frame_on(False) ax2.plot([-5, 60], [-0.921, -0.921], 'r-', zorder=0, alpha=0.5) ax2.plot([-5, 60], [0., 0], 'k-', zorder=0) ax3.set_yticks([0, 1000, 2000, 3000, 4000]) plt.show() if myr:
def compute(inp_dataset, input_path, output_path, de_analysis, n_pass): print("Current pass ", n_pass) import json import matplotlib as plt import csv from sklearn.manifold import TSNE import matplotlib.pyplot as plt from sklearn.decomposition import PCA from decimal import Decimal import seaborn as sns import pandas as pd import networkx as nx from sklearn.cluster import DBSCAN from sklearn.cluster import KMeans import operator import numpy as np import random import sys #csvData=[['data','x','y','type']] print("Processing the input data into datafames....") csvData = [] count = 0 #filename = "G:/Thesis/Dropclust/plots/output_normalized_own_cc.csv" filename = "G:/Thesis/Dropclust/plots/PCA_GENES/output_normalized_own_cc.csv" filename = #"G:/Thesis/Dropclust/output_normalized_zscore_cc1.csv" filename = "C:/Users/Swagatam/IdeaProjects/openOrd/output_normalized_own_cc.csv" filename = input_path + "/output_normalized_own_cc.csv" coord_data = pd.read_csv(filename, names=['data', 'x', 'y']) coord_data.set_index('data', inplace=True) data = [] data_outlier = [] with open(filename, 'r') as csvfile: csvreader = csv.reader(csvfile) for row in csvreader: #f=0 #row=[float(i) for i in row] data.append(row) temp_outlier = [] temp_outlier.append(row[1]) temp_outlier.append(row[2]) data_outlier.append(temp_outlier) temp = row #if row[0].isnumeric(): # temp.append('cell') if len(row[0]) >= 16: temp.append('cell') else: temp.append('gene') count = count + 1 csvData.append(temp) # # DB SCAN # In[20]: if n_pass != 4: noise = [] print("Performing clustering....") db = DBSCAN(eps=180, min_samples=55).fit_predict(data_outlier) final_data = [] csvData = [['data', 'x', 'y', 'type']] for i in range(0, len(list(db))): if db[i] != -1: final_data.append(data[i]) csvData.append(data[i]) if db[i] == -1: noise.append(data[i][0]) data = final_data n_clusters = len(set(db)) - (1 if -1 in list(db) else 0) print("Clustering done. the number of obtained clusters: ", n_clusters) else: remove_data = [] prev_df = pd.read_csv( "Stardust_results/visualization_output/3_pass/data.csv", delimiter=",", index_col=False) prev_df.set_index('data', inplace=True) clusters_info = [] for i in range(0, len(csvData)): if csvData[i][3] == 'cell': if csvData[i][0] in (prev_df.index): clusters_info.append(prev_df.loc[csvData[i][0]]['cluster']) else: remove_data.append(csvData[i]) else: f = 0 import pickle with open( 'Stardust_results/visualization_output/3_pass/de_genes_cluster.txt', 'rb') as fp: de_gene_cluster = pickle.load(fp) for rank in range(0, len(de_gene_cluster)): if csvData[i][0] in de_gene_cluster[rank]: f = 1 clusters_info.append(de_gene_cluster[rank].index( csvData[i][0])) break if f == 0: remove_data.append(csvData[i]) for r in remove_data: csvData.remove(r) temp = [['data', 'x', 'y', 'type']] temp.extend(csvData) csvData = temp # In[13]: # # OUTLIER VISUALIZATION # In[21]: if n_pass != 4: print("Starting outlier detection....") data_type = [] c = 0 g = 0 for i in range(0, len(coord_data)): if db[i] != -1: data_type.append("data") else: if len(coord_data.index[i]) >= 16: data_type.append("cell_outliers") else: g = g + 1 data_type.append("gene_outliers") coord_data["data_type"] = data_type data_colors = ["lightblue"] if g > 0: noise_colors = ['blue', 'red'] else: noise_colors = ['blue'] coord_data["alpha"] = np.where(coord_data['data_type'] == 'data', 0.5, 1.0) plt.figure(figsize=(6, 4.5)) #ax = sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha']==0.5],hue="data_type",palette=sns.xkcd_palette(data_colors),sizes=(50,100),size="data_type",alpha=0.3) #sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha']==1.0],hue="data_type",palette=sns.xkcd_palette(noise_colors),sizes=(50,100),size="data_type",marker="^",alpha=1.0,ax=ax) marker = {"gene_outliers": "^", "cell_outliers": "^"} ax = sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha'] == 0.5], hue="data_type", palette=sns.xkcd_palette(data_colors), sizes=(50, 100), size="data_type", linewidth=0.0, s=10, alpha=0.3) sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha'] == 1.0], hue="data_type", palette=sns.xkcd_palette(noise_colors), sizes=(100, 50), size="data_type", style="data_type", markers=marker, alpha=1.0, linewidth=0.0, s=10, legend='brief', ax=ax) #plt.legend(title=='') ax.legend(bbox_to_anchor=(1.1, 1.05), frameon=False) sns.despine(bottom=False, left=False) plt.xlabel("dim1") plt.ylabel("dim2") plt.savefig(output_path + 'outliers_visualization.png', bbox_inches='tight') print("Outliers removed from the dataset....") # # POST-HOC CLUSTER ASSIGNMENT # In[23]: print("Starting post hoc clustering....") neighbor_df = pd.read_hdf( 'Stardust_results/build_output/1_pass/neighbor.h5', 'df') if 'Unnamed: 0' in list(neighbor_df.columns): neighbor_df.set_index('Unnamed: 0', inplace=True) p = 0 col = list(neighbor_df.columns) index = list(neighbor_df.index) cell_dict = dict() column_dict = dict() for i in range(len(col)): column_dict[i] = col[i] for i in range(len(list(neighbor_df.index))): row = neighbor_df.iloc[i] col_ind = list(row.to_numpy().nonzero())[0] for ind in col_ind: if index[i] in cell_dict.keys(): cell_dict[index[i]].append(column_dict[ind]) else: temp = [] temp.append(column_dict[ind]) cell_dict[index[i]] = temp cluster_assign = [] for key_cell in cell_dict.keys(): clust = dict() cells = cell_dict[key_cell] for cell in cells: if n_pass == 4: if cell in list(prev_df.index): cluster = prev_df.loc[cell]['cluster'] else: cluster = -1 else: cluster = db[list(coord_data.index).index(cell)] if cluster not in clust.keys(): clust[cluster] = 1 else: clust[cluster] = clust[cluster] + 1 max_cluster = max(clust.items(), key=operator.itemgetter(1))[0] if max_cluster == -1: continue cluster_assign.append(max_cluster) x_total = 0 y_total = 0 count = 0 for cell in cells: if (n_pass != 4 and db[list(coord_data.index).index(cell)] == max_cluster ) or (n_pass == 4 and cell in list(prev_df.index) and prev_df.loc[cell]['cluster'] == max_cluster): count = count + 1 x_total = x_total + coord_data.loc[cell]['x'] y_total = y_total + coord_data.loc[cell]['y'] temp = [] temp.append(key_cell) temp.append(x_total / count) temp.append(y_total / count) temp.append('cell') p = p + 1 csvData.append(temp) print("Post hoc clustering done....") # In[24]: with open(output_path + 'data.csv', 'w') as csvFile: writer = csv.writer(csvFile) writer.writerows(csvData) csvFile.close() data_df = pd.read_csv(output_path + "data.csv", delimiter=",", index_col=False) if n_pass != 4: clusters_info = [x for x in db if x != -1] clusters_info = clusters_info + cluster_assign else: clusters_info = clusters_info + cluster_assign data_df['cluster'] = clusters_info data_df.to_csv(output_path + 'data.csv') n_clusters = len(list(set(clusters_info))) print("cluster saved ....") n_clusters = len(data_df['cluster'].unique()) colors = random.sample(seaborn_colors, n_clusters) colors = random.sample(seaborn_colors, n_clusters) plt.figure(figsize=(5, 5)) #cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True) ax = sns.scatterplot(x="x", y="y", data=data_df, hue="cluster", palette=sns.xkcd_palette(colors), linewidth=0.0, s=2) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) for cl in range(n_clusters): plt.annotate(cl, data_df.loc[data_df['cluster'] == cl, ['x', 'y']].mean(), horizontalalignment='center', verticalalignment='center', size=10, weight='bold', color="black") sns.despine(bottom=False, left=False) plt.xlabel("sd1", fontsize=20) plt.ylabel("sd2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + "cluster_visualization.png", bbox_inches='tight', dpi=600) plt.savefig(output_path + "cluster_visualization.pdf", bbox_inches='tight', dpi=600) if n_pass == 3: from sklearn.datasets import make_blobs from sklearn.metrics import silhouette_samples, silhouette_score silhouette_avg = silhouette_score(data_df[['x', 'y']], data_df['cluster']) sample_silhouette_values = silhouette_samples(data_df[['x', 'y']], data_df['cluster']) print(silhouette_avg) y_lower = 10 import matplotlib.cm as cm #fig, (ax1, ax2) = plt.subplots(1, 2) fig = plt.figure(figsize=(4, 7)) #fig.set_size_inches(18, 7) for i in range(n_clusters): # Aggregate the silhouette scores for samples belonging to # cluster i, and sort them ith_cluster_silhouette_values = \ sample_silhouette_values[data_df['cluster'] == i] ith_cluster_silhouette_values.sort() size_cluster_i = ith_cluster_silhouette_values.shape[0] y_upper = y_lower + size_cluster_i color = cm.nipy_spectral(float(i) / n_clusters) plt.fill_betweenx(np.arange(y_lower, y_upper), 0, ith_cluster_silhouette_values, facecolor=color, edgecolor=color, alpha=0.7) # Label the silhouette plots with their cluster numbers at the middle plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) # Compute the new y_lower for next plot y_lower = y_upper + 10 # 10 for the 0 samples plt.title("The silhouette plot for the various clusters.") plt.xlabel("silhouette coefficient", fontsize=20) plt.ylabel("Cluster label", fontsize=20) plt.axvline(x=silhouette_avg, color="red", linestyle="--") plt.yticks([]) # Clear the yaxis labels / ticks plt.xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1]) sns.despine(bottom=False, left=False) fig.savefig(output_path + "/silhouette.pdf", bbox_inches='tight', dpi=600) fig.savefig(output_path + "/silhouette.png", bbox_inches='tight', dpi=600) # # MARKER FINDING data_df = pd.read_csv(output_path + "data.csv", delimiter=",", index_col=False) data_df.set_index('data', inplace=True) import pickle if n_pass == 2: path = 'Stardust_results/visualization_output/1_pass' if n_pass == 3: path = 'Stardust_results/visualization_output/2_pass' if n_pass == 4: path = 'Stardust_results/visualization_output/3_pass' if n_pass != 1: with open(path + '/de_genes_cluster.txt', 'rb') as fp: de_gene_cluster = pickle.load(fp) marker = [] disp_marker = [] for cl in range(n_clusters): cls = data_df[data_df['cluster'] == cl] gene_df = cls[cls['type'] == 'gene'] f = 0 for rank in range(len(de_gene_cluster)): if f == 1: break for gene in de_gene_cluster[rank]: if gene in list(gene_df.index): disp_marker.append(gene) #print(cl) f = 1 break marker = disp_marker #sys.exit(0) # # CELL GENE MARKER # In[28]: from sklearn.neighbors import KNeighborsRegressor prev_pass_data = pd.read_csv( 'Stardust_results/visualization_output/3_pass/data_openOrd.csv') prev_pass_data.set_index('data', inplace=True) data_df = pd.read_csv(output_path + '/data.csv') data_df.set_index('data', inplace=True) gene_df = data_df[data_df['type'] == 'gene'] x_gene_fit = list(gene_df['x']) y_gene_fit = list(gene_df['y']) cells = list(prev_pass_data.index) cell_list = [] x_coord = [] y_coord = [] for i in range(len(cells)): if cells[i] in list(data_df.index): cell_list.append(cells[i]) x_coord.append(prev_pass_data.iloc[i]['x']) y_coord.append(prev_pass_data.iloc[i]['y']) prev_df = pd.DataFrame(index=cell_list) prev_df['x'] = x_coord prev_df['y'] = y_coord import numpy as np from sklearn.linear_model import Lasso from sklearn.neighbors import KNeighborsRegressor import pickle cells = [] genes = [] gene_coord_x = [] gene_coord_y = [] for i in range(n_clusters): clust_data = data_df[data_df['cluster'] == i] clust_cells = clust_data[clust_data['type'] == 'cell'] clust_genes = clust_data[clust_data['type'] == 'gene'] cells.extend(list(clust_cells.index)) genes.extend(list(clust_genes.index)) if len(list(clust_genes.index)) == 0: continue model1 = KNeighborsRegressor(n_neighbors=4) model2 = KNeighborsRegressor(n_neighbors=4) temp = [] for cell in list(clust_cells.index): if cell in list(prev_df.index): temp.append(cell) clust_cells = clust_cells.loc[temp] model1.fit( np.array(list(clust_cells['x'])).reshape((-1, 1)), np.array(list(prev_df.loc[list(clust_cells.index)]['x'])).reshape( (-1, 1))) filename = output_path + '/sd_x_KNN_model.sav' pickle.dump(model1, open(filename, 'wb')) #model1 = pickle.load(open(filename, 'rb')) x_gene_pred = model1.predict( np.array(list(clust_genes['x'])).reshape((-1, 1))) gene_coord_x.extend(x_gene_pred) model2.fit( np.array(list(clust_cells['y'])).reshape((-1, 1)), np.array(list(prev_df.loc[list(clust_cells.index)]['y'])).reshape( (-1, 1))) filename = output_path + '/sd_y_KNN_model.sav' pickle.dump(model2, open(filename, 'wb')) #model2 = pickle.load(open(filename, 'rb')) y_gene_pred = model2.predict( np.array(list(clust_genes['y'])).reshape((-1, 1))) gene_coord_y.extend(y_gene_pred) with open(output_path + "/sd_gene_coord_x.txt", 'wb') as fp: pickle.dump(gene_coord_x, fp) with open(output_path + "/sd_gene_coord_y.txt", 'wb') as fp: pickle.dump(gene_coord_y, fp) #with open (output_path+"/sd_gene_coord_x.txt", 'rb') as fp: # gene_coord_x = pickle.load(fp) #with open (output_path+"/sd_gene_coord_y.txt", 'rb') as fp: # gene_coord_y = pickle.load(fp) import matplotlib.pyplot as plt, mpld3 from scipy.spatial import ConvexHull, convex_hull_plot_2d prev_pass_data = pd.read_csv( 'Stardust_results/visualization_output/3_pass/data_openOrd.csv') prev_pass_data["alpha"] = np.where(prev_pass_data['type'] == 'gene', 1.0, 0.5) color_gene = ["light blue"] color_cell = ["red"] #fig,ax1 = plt.subplots() plt.figure(figsize=(6, 6)) ax = sns.scatterplot(x="x", y="y", data=prev_pass_data[prev_pass_data['alpha'] == 0.5], hue="type", palette=sns.xkcd_palette(color_gene), sizes=(10, 5), size="type", alpha=0.3, s=10) #sns.scatterplot(x="x", y="y", data=data_df[data_df['alpha']==1.0],hue="type",palette=sns.xkcd_palette(color_cell),sizes=(20,5),size="type",marker="^",alpha=1.0,ax=ax,s=10) sns.scatterplot(x=gene_coord_x, y=gene_coord_y, palette=sns.xkcd_palette(color_cell), sizes=(20, 5), marker="^", alpha=1.0, ax=ax, s=10) for c in range(n_clusters): p = data_df[data_df["cluster"] == c] p = p[['x', 'y']] points = p.values hull = ConvexHull(points) #for simplex in hull.simplices: # sns.lineplot(points[simplex, 0], points[simplex, 1]) x_list = [] y_list = [] if n_pass != 1: for m in marker: #x_list.append(data_df.loc[m]['x']) x_list.append(gene_coord_x[genes.index(m)]) #y_list.append(data_df.loc[m]['y']) y_list.append(gene_coord_y[genes.index(m)]) for label, x, y in zip(marker, x_list, y_list): plt.annotate( label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0')) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) sns.despine(bottom=False, left=False) plt.xlabel("sd1", fontsize=20) plt.ylabel("sd2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + "sd_embedding.png", bbox_inches='tight', dpi=600) plt.savefig(output_path + "sd_embedding.pdf", bbox_inches='tight', dpi=600) import matplotlib.pyplot as plt, mpld3 from scipy.spatial import ConvexHull, convex_hull_plot_2d #data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5) prev_pass_data.set_index('data', inplace=True) temp_data = prev_pass_data[prev_pass_data['type'] == 'cell'] temp_genes = data_df[data_df['type'] == 'gene'] for pos in range(0, len(genes)): temp_genes.at[genes[pos], 'x'] = gene_coord_x[pos] temp_genes.at[genes[pos], 'y'] = gene_coord_y[pos] temp_data.append(temp_genes) color_gene = ["light blue"] color_cell = ["red"] n_clusters = len(data_df['cluster'].unique()) colors = random.sample(seaborn_colors, n_clusters) #fig,ax1 = plt.subplots() plt.figure(figsize=(6, 6)) ax = sns.scatterplot(x="x", y="y", data=temp_data, hue="cluster", palette=sns.xkcd_palette(colors), s=2, linewidth=0.0) #sns.scatterplot(x="x", y="y", data=data_df[data_df['alpha']==1.0],hue="type",palette=sns.xkcd_palette(color_cell),sizes=(20,5),size="type",marker="^",alpha=1.0,ax=ax,s=10) #sns.scatterplot(x=gene_coord_x, y=gene_coord_y,palette=sns.xkcd_palette(color_cell),sizes=(20,5),marker="^",alpha=1.0,ax=ax,s=20) for c in range(n_clusters): p = data_df[data_df["cluster"] == c] p = p[['x', 'y']] points = p.values hull = ConvexHull(points) #for simplex in hull.simplices: # sns.lineplot(points[simplex, 0], points[simplex, 1]) x_list = [] y_list = [] d1 = prev_pass_data[prev_pass_data['alpha'] == 0.5] for cl in range(n_clusters): plt.annotate(cl, d1.loc[d1['cluster'] == cl, ['x', 'y']].mean(), horizontalalignment='center', verticalalignment='center', size=10, weight='bold', color="black") if n_pass != 1: for m in marker: #x_list.append(data_df.loc[m]['x']) x_list.append(gene_coord_x[genes.index(m)]) #y_list.append(data_df.loc[m]['y']) y_list.append(gene_coord_y[genes.index(m)]) for label, x, y in zip(marker, x_list, y_list): plt.annotate( label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0')) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) sns.despine(bottom=False, left=False) plt.xlabel("sd1", fontsize=20) plt.ylabel("sd2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + "sd_color_embedding.png", bbox_inches='tight', dpi=600) plt.savefig(output_path + "sd_color_embedding.pdf", bbox_inches='tight', dpi=600) #sys.exit(0) # # UMAP CELL GENE MARKER # # if n_pass == 4: import pickle with open('Stardust_results/build_output/1_pass/umap_coord.txt', 'rb') as fp: umap_coord = pickle.load(fp) louvain_df = pd.read_csv( 'Stardust_results/build_output/1_pass/louvain_cluster_df.csv') louvain_df.set_index('Unnamed: 0', inplace=True) #data_df = pd.read_csv('F:/output/output_visualize_melanoma_pca/3rd_pass/data.csv') data_df = pd.read_csv(output_path + '/data.csv') data_df.set_index('data', inplace=True) gene_df = data_df[data_df['type'] == 'gene'] x_gene_fit = list(gene_df['x']) y_gene_fit = list(gene_df['y']) cells = list(louvain_df.index) cell_list = [] x_coord = [] y_coord = [] for i in range(len(cells)): if cells[i] in list(data_df.index): cell_list.append(cells[i]) x_coord.append(umap_coord[i][0]) y_coord.append(umap_coord[i][1]) umap_df = pd.DataFrame(index=cell_list) umap_df['x'] = x_coord umap_df['y'] = y_coord import numpy as np from sklearn.linear_model import Lasso from sklearn.neighbors import KNeighborsRegressor import pickle cells = [] genes = [] gene_coord_x = [] gene_coord_y = [] for i in range(n_clusters): clust_data = data_df[data_df['cluster'] == i] clust_cells = clust_data[clust_data['type'] == 'cell'] clust_genes = clust_data[clust_data['type'] == 'gene'] cells.extend(list(clust_cells.index)) genes.extend(list(clust_genes.index)) if len(list(clust_genes.index)) == 0: continue model1 = KNeighborsRegressor(n_neighbors=5) model2 = KNeighborsRegressor(n_neighbors=5) model1.fit( np.array(list(clust_cells['x'])).reshape((-1, 1)), np.array(list(umap_df.loc[list( clust_cells.index)]['x'])).reshape((-1, 1))) filename = output_path + '/scanpy_x_KNN_model.sav' pickle.dump(model1, open(filename, 'wb')) #model1 = pickle.load(open(filename, 'rb')) x_gene_pred = model1.predict( np.array(list(clust_genes['x'])).reshape((-1, 1))) gene_coord_x.extend(x_gene_pred) model2.fit( np.array(list(clust_cells['y'])).reshape((-1, 1)), np.array(list(umap_df.loc[list( clust_cells.index)]['y'])).reshape((-1, 1))) filename = output_path + '/scanpy_y_KNN_model.sav' pickle.dump(model2, open(filename, 'wb')) #model2 = pickle.load(open(filename, 'rb')) y_gene_pred = model2.predict( np.array(list(clust_genes['y'])).reshape((-1, 1))) gene_coord_y.extend(y_gene_pred) with open(output_path + "/scanpy_gene_coord_x.txt", 'wb') as fp: pickle.dump(gene_coord_x, fp) with open(output_path + "/scanpy_gene_coord_y.txt", 'wb') as fp: pickle.dump(gene_coord_y, fp) #with open (output_path+"/scanpy_gene_coord_x.txt", 'rb') as fp: # gene_coord_x = pickle.load(fp) #with open (output_path+"/scanpy_gene_coord_y.txt", 'rb') as fp: # gene_coord_y = pickle.load(fp) #n_clusters = len(list(data_df['cluster'].unique())) u_map_x = [] u_map_y = [] for ind in list(data_df.index): if ind in list(louvain_df.index): u_map_x.append(umap_coord[list( louvain_df.index).index(ind)][0]) u_map_y.append(umap_coord[list( louvain_df.index).index(ind)][1]) else: u_map_x.append(gene_coord_x[genes.index(ind)]) u_map_y.append(gene_coord_y[genes.index(ind)]) data_df['umap_x'] = u_map_x data_df['umap_y'] = u_map_y # colors = random.sample(seaborn_colors,n_clusters) #colors = colors3 plt.figure(figsize=(5, 5)) #cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True) ax = sns.scatterplot(x="umap_x", y="umap_y", data=data_df, hue="cluster", palette=sns.xkcd_palette(colors), linewidth=0.0, s=2) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) for cl in range(n_clusters): plt.annotate(cl, data_df.loc[data_df['cluster'] == cl, ['umap_x', 'umap_y']].mean(), horizontalalignment='center', verticalalignment='center', size=10, weight='bold', color="black") sns.despine(bottom=False, left=False) plt.xlabel("umap1", fontsize=20) plt.ylabel("umap2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + 'umap_clustering.png', bbox_inches='tight', dpi=600) plt.savefig(output_path + 'umap_clustering.pdf', bbox_inches='tight', dpi=600) import matplotlib.pyplot as plt, mpld3 from scipy.spatial import ConvexHull, convex_hull_plot_2d data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5) color_gene = ["light grey"] color_cell = ["red"] #fig,ax1 = plt.subplots() plt.figure(figsize=(6, 6)) ax = sns.scatterplot(x="umap_x", y="umap_y", data=data_df[data_df['alpha'] == 0.5], hue="type", palette=sns.xkcd_palette(color_gene), sizes=(10, 5), size="type", alpha=0.3, s=10) sns.scatterplot(x="umap_x", y="umap_y", data=data_df[data_df['alpha'] == 1.0], hue="type", palette=sns.xkcd_palette(color_cell), sizes=(20, 5), size="type", marker="^", alpha=1.0, ax=ax, s=10) for c in range(n_clusters): p = data_df[data_df["cluster"] == c] p = p[['umap_x', 'umap_y']] points = p.values hull = ConvexHull(points) #for simplex in hull.simplices: # sns.lineplot(points[simplex, 0], points[simplex, 1]) x_list = [] y_list = [] for m in marker: x_list.append(data_df.loc[m]['umap_x']) #x_list.append(gene_coord_x[genes.index(m)]) y_list.append(data_df.loc[m]['umap_y']) #y_list.append(gene_coord_y[genes.index(m)]) for cl in range(n_clusters): plt.annotate(cl, data_df.loc[data_df['cluster'] == cl, ['umap_x', 'umap_y']].mean(), horizontalalignment='center', verticalalignment='center', size=10, weight='bold', color="black") for label, x, y in zip(marker, x_list, y_list): plt.annotate( label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0')) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) sns.despine(bottom=False, left=False) plt.xlabel("umap1", fontsize=20) plt.ylabel("umap2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + 'umap_embedding.png', bbox_inches='tight', dpi=600) plt.savefig(output_path + 'umap_embedding.pdf', bbox_inches='tight', dpi=600) import matplotlib.pyplot as plt, mpld3 from scipy.spatial import ConvexHull, convex_hull_plot_2d data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5) color_gene = ["light grey"] color_cell = ["red"] #fig,ax1 = plt.subplots() plt.figure(figsize=(6, 6)) # colors = color ax = sns.scatterplot(x="umap_x", y="umap_y", data=data_df[data_df['alpha'] == 0.5], hue="cluster", linewidth=0.0, sizes=(2, 5), size="type", palette=sns.xkcd_palette(colors), s=2) sns.scatterplot(x="umap_x", y="umap_y", data=data_df[data_df['alpha'] == 1.0], hue="type", palette=sns.xkcd_palette(color_cell), linewidth=0.1, marker="^", ax=ax, alpha=1.0, s=10) for c in range(n_clusters): p = data_df[data_df["cluster"] == c] p = p[['umap_x', 'umap_y']] points = p.values hull = ConvexHull(points) #for simplex in hull.simplices: # sns.lineplot(points[simplex, 0], points[simplex, 1]) x_list = [] y_list = [] for m in marker: x_list.append(data_df.loc[m]['umap_x']) y_list.append(data_df.loc[m]['umap_y']) for cl in range(n_clusters): plt.annotate(cl, data_df.loc[data_df['cluster'] == cl, ['umap_x', 'umap_y']].mean(), horizontalalignment='center', verticalalignment='center', size=10, weight='bold', color="black") for label, x, y in zip(marker, x_list, y_list): plt.annotate( label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0')) ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False) sns.despine(bottom=False, left=False) plt.xlabel("umap1", fontsize=20) plt.ylabel("umap2", fontsize=20) plt.setp(ax.spines.values(), linewidth=2) plt.yticks([], linewidth=20) plt.xticks([]) plt.savefig(output_path + 'umap_color_embedding.png', bbox_inches='tight', dpi=600) plt.savefig(output_path + 'umap_color_embedding.pdf', bbox_inches='tight', dpi=600)
"Decision Tree": decTreeScore, "Random Forest": rfScore, "Naive Bayes": nbScore } methods = [ "Logistic Regression", "SVM", "KNN", "Naive Bayes", "Decision Tree", "Random Forest" ] accuracy = [logRegScore, svmScore, knnScore, nbScore, decTreeScore, rfScore] sns.set() plt.figure(figsize=(14, 5)) plt.ylabel("Başarı %") plt.xlabel("Algoritmalar") sns.barplot(x=methods, y=accuracy, palette="deep") for line in range(len(methods)): plt.text( line - 0.20, # x 0.85, # y "{:.3f}%".format(accuracy[line] * 100), horizontalalignment='left', size='large', color="black", ) plt.savefig('karşılaştır.png', transparent=True) plt.show()
plt.figure(figsize=(8, 6)) plt.pie(values, labels=pollutants, explode=explode, autopct='%1.1f%%', shadow=True) plt.title('Air pollutants and their probable amount in atmosphere [India]') plt.axis('equal') plt.show() # # showing INDIA AQI on world map using cartopy # In[82]: import cartopy.crs as ccrs # In[83]: geo = data['city']['geo'] fig = plt.figure(figsize=(12, 10)) ax = plt.axes(projection=ccrs.PlateCarree()) ax.stock_img() plt.scatter(geo[1], geo[0], color='blue') plt.text(geo[1] + 3, geo[0] - 2, f'{name} AQI \n {aqi}', color='red') plt.show()
tf_x = tf.placeholder(tf.float32, x.shape) # input x tf_y = tf.placeholder(tf.float32, y.shape) # input y # neural network layers l1 = tf.layers.dense(tf_x, 10, tf.nn.relu) output = tf.layers.dense(l1, 1) loss = tf.losses.mean_squared_error(tf_y,output) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.5) train_op = optimizer.minimize(loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) # initializer var in graph plt.ion() for step in range(100): # train and net output _, l, pred = sess.run([train_op, loss, output], {tf_x:x, tf_y:y}) if step % 5 ==0 #plot and show learning process print(l) plt.cla() plt.scatter(x, y) plt.plot(x, pred, 'r-', lw=5) plt.text(0.5, 0, 'Loss=%.4f' %l, fontdict={'size':20, 'color': 'red'}) plt.pause(0.1) plt.ioff() plt.show()
def g_histfitting(colourFilter, metadata, cluster_ids, blink_ids, N_bins): #geometric and negative binomial fitting from PYME.IO import tabular # N_bins = # frame_duration = metadata.getEntry('Camera.IntegrationTime') # frame_duration = metadata.getEntry('Camera.CycleTime') """ Important to note: blinks per cluster does nto exist as callable value in pipeline, have to create in this fxn call data from pipeline needed: dbscabcluster, blink id? """ # I = np.argsort(colourFilter[cluster_ids]) # print(pl['dbscanClustered'][I]) # bpc = np.zeros_like(np.unique(colourFilter[cluster_ids]))# blinks per cluster # cid = np.arange(1, max(colourFilter[cluster_ids])+1, 1) # print('cid vec', cid) # print('just before loop', len(np.unique(colourFilter[cluster_ids]))) # for i in range(1, len(np.unique(colourFilter[cluster_ids])) + 1): # nblinks = len(colourFilter[blink_ids][I][colourFilter[cluster_ids] == i]) # bpc[i-1] = nblinks # print(i, nblinks) _, bpc_2 = np.unique(colourFilter[cluster_ids], return_counts=True) print('any points where bps = 0?', np.where(bpc_2 == 0)) plt.hist(bpc_2, bins=20) plt.figure() # plt.hist(bpc, bins=20) # binning = np.linspace(1, np.max(bpc_2), np.max(bpc_2)) vals, bin_edges = np.histogram(bpc_2, bins=np.max(bpc_2) / 5, density=True) print('hist stuff', vals, bin_edges) # x_data = on_times y_hist = vals bin_starts = bin_edges[:-1] bin_ends = bin_edges[1:] x_hist = (bin_starts + bin_ends) / 2 params_on = np.array([.1]) geofit = FitModel_N(grff, params_on, x_hist, y_hist) # print('p value from geo fit', geofit.x) # plt.figure() # plt.plot(np.linspace(min(x_hist), max(x_hist), 100), gfm(np.linspace(min(x_hist), max(x_hist), 100), geofit.x)) # nb_params = np.array([2.0, geofit.x[0]]) # print('nb_params: ', nb_params) # nbfit = FitModel_N(nbrff, nb_params, x_hist, y_hist) # N = nbfit.x[0] # p = nbfit.x[1] x_fit = np.linspace(min(bin_edges), max(bin_edges), max(bin_edges) * 10) y_fit = gfm(x_fit, geofit.x) # hist_datasource = np.rec.fromarrays((y_hist, x_hist), dtype=[('y_hist', '<f4'), ('x_hist', '<f4')]) # hist_data = tabular.RecArraySource(hist_datasource.view(np.recarray)) # filt_off = tabular.recArrayInput(off_hist_datasource.view(np.recarray)) """ have fit for both N and p simultaneously, also want to make one that fits based on p as found in geo fit maybe make a button to determine which one? """ geo_cov = np.linalg.inv(np.matmul(geofit.jac.T, geofit.jac)) * np.mean( (geofit.fun * geofit.fun).sum()) # nb_cov = np.linalg.inv(np.matmul(nbfit.jac.T, nbfit.jac)) * np.mean( # (nbfit.fun * nbfit.fun).sum()) fitErrors_geo = np.sqrt(np.diag(geo_cov)) # fitErrors_nb = np.sqrt(np.diag(nb_cov)) # print('bpc', len(bpc), max(bpc), min(bpc), len(np.unique(bpc)), bpc) if USE_GUI: plt.bar(bin_starts, vals, width=bin_starts[1] - bin_starts[0], alpha=.4) plt.plot(x_fit, y_fit) plt.xlim([0, 200]) xx3 = max(plt.xlim()) yy3 = max(plt.ylim()) cv = min(plt.xlim()) # r'$y = Ae ^{-Bx} + C$' r'$y = p(1-p)^{x}$' plt.text(((xx3 - cv) * .3) + cv, yy3 * .7, r'$y = p(1-p)^{x}$') plt.text(((xx3 - cv) * .3) + cv, yy3 * .6, 'p= %5.2f +/- %5.2f' % (geofit.x, fitErrors_geo)) # plt.text(xx3 * .3, yy3 * .45, 'p= %5.2f' % (geofit.x)) plt.title(r'$mE0s3.2-CAAX$') plt.xlabel('Number of blinks per molecule') plt.ylabel('Probability') # negative binomial fitting # plt.figure() # plt.bar(bin_starts, vals, width=bin_starts[1] - bin_starts[0], alpha=.4) # plt.plot(x_fit, nbmf(x_fit, N, p)) # xx4 = max(plt.xlim()) # yy4 = max(plt.ylim()) # cv2 = min(plt.xlim()) # plt.text(((xx4-cv2)*.1) + cv2, yy4 * .9, 'Number of molecules N = %5.2f, off probability = %5.2f' % (N, p)) # plt.title('Negative Binomial fit for both N and p') # plt.text(xx2 * .3, yy2 * .45, ) """ hist_data = hist_data params_geo = geofit.x params_nb = nbfit.x fitErrors_geo = fitErrors_nb = fit_eqn_geo = fit_eqn_nb = """ return hist_data, geofit.x, fitErrors_geo, #, fit_eqn_geo, fit_eqn_nb
print t print v initialize(v, s, t, dt, n) calculate(v, s, t, dt, n) store(v, t, n) #plot plt.figure(1) plt.subplot(211) plt.plot(t, v,"g-", linewidth=2.0) plt.scatter(t, v) plt.title('The Velocity of a Free Falling Object') plt.xlabel('Time($t$)', fontsize=14) plt.ylabel('Velocity($m/s$)', fontsize=14) plt.text(3,-60,r'$g = 9.8 m/s^2$', fontsize=16) plt.grid(True) plt.subplot(212) plt.plot(t, s,"g-", linewidth=2.0) plt.scatter(t, s) plt.title('The Displacement of a Free Falling Object') plt.xlabel('Time($t$)', fontsize=14) plt.ylabel('Displacement($m$)', fontsize=14) plt.text(3,-300,r'$g = 9.8 m/s^2$', fontsize=16) plt.grid(True) plt.show() plt.savefig("ex1.jpg") read()
import numpy as np import matplotlib as plt n = 12 X = np.arange(n) Y1 = (1-X/float(n))*np.random.uniform(0.5,1,n) Y2 = (1-X/float(n))*np.random.uniform(0.5,1,n) plt.bar(X,Y1,facecolor='#9999ff',edgecolor='white') plt.bar(X,-Y2,facecolor='#ff9999',edgecolor='white') for x,y in zip(X,Y1): # ha: horizontal alignment plt.text(x+0.4,y+.05,'%.2f'%y,ha='center',va='bottom') for x,y in zip(X,Y2): # ha: horizontal alignment plt.text(x+0.4,-y-.15,'-%.2f'%y,ha='center',va='bottom') plt.xlim(-.5,n) plt.xticks(()) plt.ylim(-1.25,1.25) plt.yticks(()) plt.show()
import numpy as np import matplotlib as plt la = np.linalg words = ["I", "like", "enjoy", "deep", "learing", "NLP", "flying", "."] x = np.array([[0, 2, 1, 0, 0, 0, 0, 0], [2, 0, 0, 1, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 1], [0, 1, 0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 1, 1, 0]]) U, s, Vh = la.svd(x, full_matrices=False) for i in x(len(words)): plt.text(U[i, 0], U[i, 1], words[i])
alpha=opacity, color='m', yerr=std_lc, error_kw=error_config, label='LC') rect_RLC = axs.bar(test_suites + 2 * bar_width, avg_rlc, bar_width, alpha=opacity, color='c', yerr=std_rlc, error_kw=error_config, label='RLC') # Only for PR (since the bar is invisible) x = test_suites - bar_width y = avg_pr for a, b in zip(x, y): plt.text(a, b + 0.05, '%.04f' % b, ha='center', va='bottom', fontsize=6) axs.set_xticks(test_suites + bar_width / 4) axs.set_xticklabels(test_suites) axs.set_xlabel(x_label) axs.set_ylabel(y_label) axs.set_title(title) axs.legend(loc="upper right") plt.savefig('./data_analytics/figures/' + file_name) plt.show()
t = pickle.load(pickle_file) v = pickle.load(pickle_file) initialize(v, s, t, dt, n) calculate(v, s, t, dt, n) store(v, t, n) #plot plt.figure(1) plt.subplot(211) plt.plot(t, v,"g-", linewidth=1.0) plt.scatter(t, v) plt.title('The Velocity of a Free Falling Object') #plt.xlabel('Time($t$)', fontsize=12) (cancel because words overlap) plt.ylabel('Velocity($m/s$)', fontsize=12) plt.text(3,0,r'$g = 9.8 m/s^2$', fontsize=16) plt.grid(True) plt.subplot(212) plt.plot(t, s,"g-", linewidth=1.0) plt.scatter(t, s) plt.title('The Displacement of a Free Falling Object') plt.xlabel('Time($t$)', fontsize=12) plt.ylabel('Displacement($m$)', fontsize=12) plt.text(3,0,r'$g = 9.8 m/s^2$', fontsize=16) plt.grid(True) plt.show() plt.savefig("ex4.jpg") read()
score = mnist_classifier.evaluate(X_test_one_class, y_test_one_class, verbose=0) class_test_accuracy[class_index] = 100 * score[1] # Print test accuracy for each digit print("Test accuracy for label " + str(classes[class_index]) + ": " + str(class_test_accuracy[class_index]) + "%\n") ## Generate confusion matrix cm = confusion_matrix(y_test, predictions) plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Reds) tick_marks = np.arange(num_classes) plt.xticks(tick_marks, num_classes) plt.yticks(tick_marks, num_classes) fmt = 'd' thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.ylabel('True labels') plt.xlabel('Predicted labels') plt.title('MNIST Confusion Matrix') plt.show()
def plot_dataset_distribution(stats, num_cols=5, width=10, height=5, histogram_bins=10, histogram_range=[0, 1000], figure_padding=4): #convert the list into a dataframe stats_frame = pd.DataFrame( stats, columns=['Class', 'Filename', 'Width', 'Height', 'Size_in_KB']) #extract the datframe related to sizes only list_sizes = stats_frame['Size_in_KB'] #get the number of classes in the dataset number_of_classes = stats_frame['Class'].nunique() print(number_of_classes, " classes found in the dataset") #create a list of (list of sizes) for each class of images #we start by the the sizes of all images in the dataset list_sizes_per_class = [list_sizes] class_names = ['whole dataset'] print("Images of the whole dataset have an average size of ", list_sizes.mean()) for c in stats_frame['Class'].unique(): print("sizes of class [", c, "] have an average size of ", list_sizes.loc[stats_frame['Class'] == c].mean()) #then, we append the sizes of images of a particular class list_sizes_per_class.append(list_sizes.loc[stats_frame['Class'] == c]) class_names.append(c) class_count_dict = {} for c in stats_frame['Class'].unique(): print("number of instances in class [", c, "] is ", stats_frame.loc[stats_frame['Class'] == c].count()['Class']) #then, we append the sizes of images of a particular class class_count_dict[c] = stats_frame.loc[stats_frame['Class'] == c].count()['Class'] #list_sizes_per_class.append(list_sizes.loc[stats_frame['Class'] == c]) #class_names.append(c) num_rows = math.ceil((number_of_classes + 1) / num_cols) if (number_of_classes < num_cols): num_cols = number_of_classes + 1 fig, axes = plt.subplots(num_rows, num_cols, figsize=(width, height)) fig.tight_layout(pad=figure_padding) class_count = 0 if (num_rows == 1 or num_cols == 1): for i in range(num_rows): for j in range(num_cols): axes[j + i].hist(list_sizes_per_class[num_cols * i + j], bins=histogram_bins, range=histogram_range) axes[j + i].set_xlabel('Image size (in KB)', fontweight='bold') axes[i + j].set_title(class_names[j + i] + ' images ', fontweight='bold') class_count = class_count + 1 if (class_count == number_of_classes + 1): break else: for i in range(num_rows): for j in range(num_cols): axes[i, j].hist(list_sizes_per_class[num_cols * i + j], bins=histogram_bins, range=histogram_range) axes[i, j].set_xlabel('Image size (in KB)', fontweight='bold') axes[i, j].set_title(class_names[num_cols * i + j] + ' images ', fontweight='bold') class_count = class_count + 1 if (class_count == number_of_classes + 1): break f = figure() print(class_count_dict) plt.bar(*zip(*class_count_dict.items())) for index, food_brand in enumerate(class_count_dict): plt.text(food_brand, class_count_dict[food_brand] + 1, str(class_count_dict[food_brand])) #axes[1,3].set_xlabel(range(len(class_count_dict)), list(class_count_dict.keys())) plt.show()
def histfitting(colourFilter, metadata, cluster_idxs, fit_order, num_bins, blink_on_label, blink_off_label, to_json=False, log_bins=False, n_on_bins=1, n_off_bins=1, fixed_on_max=-1, fixed_off_max=-1): import matplotlib as plt from PYME.IO import tabular # for best number of bins, find number of different on duration times, set as number of bins # should probably include this as an option in traits ui, I.E. do you want to manually set num bins or set to # max number of unique states (blink duration, time to next blink, etc) N = fit_order """ should add traits thing where this line can be used or set to 1 """ frame_duration = metadata.getEntry('Camera.IntegrationTime') on_times = colourFilter[blink_on_label].astype('f') #* frame_duration on_times = [np.int(i) for i in on_times] # print('is it being loaded correctly 1?', on_times) """ setting up binning for histogram(s) this might not be correct, check again """ # max_on = np.max(on_times) + frame_duration # if fixed_on_max == -1: # fixed_on_max = on_times.max() # if log_bins: # binning = np.logspace(0.5, fixed_on_max + 1, num=n_on_bins) # else: binning = np.arange(frame_duration, max(on_times), 1) print('max on time', max(on_times)) # if len(binning) < 20 # binning = np.linspace(0, 30, 30) vals, bin_edges = np.histogram( on_times, bins=binning) #here, take 2-ed vals = vals[2:] np.set_printoptions(threshold=100000, suppress=True) # print('on y hist', len(vals), vals) logonbins = np.logspace(0, 1.49136169383, num=max(on_times)) logvals, logbinedges = np.histogram(on_times, bins=logonbins) np.set_printoptions(threshold=100000, suppress=True) print('log scale on vals', logvals) # logoff_vals, log_bin_edges_off = np.histogram() bin_edges *= frame_duration """ clip vectors here """ y_hist = vals bin_starts = bin_edges[:-1] x_hist = bin_starts max_xaxis = max(bin_edges) off_times = colourFilter[blink_off_label].astype('f') # print('is it being loaded right 2?', off_times) # min_off = min(off_times) # max_off = max(off_times) + frame_duratio logoffbins = np.logspace(0, 4.92471852852, num=30) logoff_vals, log_bin_edges_off = np.histogram(off_times, bins=logoffbins) print('log scale off vals', logoff_vals) if fixed_off_max == -1: fixed_off_max = off_times.max() # if log_bins: # binning_off = np.logspace(frame_duration, fixed_off_max + 1, num=30) # binning_off = np.logspace(-.60205999132, 4.92471852852, num=30) # # print(binning_off) # else: binning_off = np.arange(0.5, fixed_off_max + 1, n_off_bins) vals_off, bin_edges_off = np.histogram(off_times, bins=binning_off) """ brute force print statement for getting sims going before retreat """ np.set_printoptions(threshold=100000, suppress=True) # print('off y hist vals', len(vals_off), vals_off) bin_edges_off *= frame_duration y_hist_off = vals_off bin_starts_off = bin_edges_off[:-1] x_hist_off = bin_starts_off max_xaxis_off = max(bin_edges_off) # getting start params from integrated fit vals = np.array(vals) on_t_in_t = [i * frame_duration for i in on_times] off_t_in_t = [i * frame_duration for i in off_times] if N == 1: res_fxn = tfoef fit_fxn = sefm # print('what is going wrong here?') # print(max(vals), vals) # print(on_times, frame_duration) # print(np.mean(on_t_in_t)) start_params = [max(vals), 1.0 / np.mean(on_t_in_t)] # start_params = [40417, 5.608, 0] start_params_off = [max(vals_off), 1.0 / np.mean(off_t_in_t)] fit_eqn = 'A*e^(-B*x)' if N == 2: res_fxn = tsoef fit_fxn = defm start_params = [(np.max(vals), 1.0 / np.mean(on_t_in_t), np.max(vals), 5.0 / np.mean(on_t_in_t))] start_params_off = [(np.max(vals_off), 1.0 / np.mean(off_t_in_t), np.max(vals_off), 5.0 / np.mean(off_t_in_t))] fit_eqn = 'A*e^(-B*x) + C*e^(-D*x)' params = start_params fit_results = FitModel_N(res_fxn, params, x_hist[1:], y_hist[1:]) # # plt.figure() # plt.plot(x_hist_off[1:], y_hist[1:]) fit_results_off = FitModel_N(res_fxn, start_params_off, x_hist_off[1:], y_hist_off[1:]) cov = np.linalg.inv(np.matmul( fit_results.jac.T, fit_results.jac)) * np.mean( (fit_results.fun * fit_results.fun).sum()) # print('test 1', fit_results_off.jac.T) # print('test 2', fit_results_off.jac) # print(np.matmul(fit_results_off.jac.T, fit_results_off.jac)) # print(np.linalg.inv(np.matmul(fit_results_off.jac.T, fit_results_off.jac))) cov_off = np.linalg.inv( np.matmul(fit_results_off.jac.T, fit_results_off.jac)) * np.mean( (fit_results_off.fun * fit_results_off.fun).sum()) fitErrors_on = np.sqrt(np.diag(cov)) fitErrors_off = np.sqrt(np.diag(cov_off)) # fitErrors_off = 1.1 x_on_fit = np.linspace(0, max_xaxis, 100) y_on_fit = fit_fxn(x_on_fit, *fit_results.x) x_off_fit = np.linspace(0, max_xaxis_off, 100) y_off_fit = fit_fxn(x_off_fit, *fit_results_off.x) on_hist_datasource = np.rec.fromarrays((y_hist, bin_edges[1:]), dtype=[('y_hist', '<f4'), ('x_hist', '<f4')]) off_hist_datasource = np.rec.fromarrays((y_hist_off, bin_edges_off[1:]), dtype=[('y_hist', '<f4'), ('x_hist', '<f4')]) filt_on = tabular.RecArraySource(on_hist_datasource.view(np.recarray)) filt_off = tabular.RecArraySource(off_hist_datasource.view(np.recarray)) """ trying to get relevant files into metadata """ if USE_GUI: plt.figure() plt.bar(bin_starts, vals, width=bin_starts[1] - bin_starts[0], alpha=.4) # plt.scatter(x_hist[1:], y_hist[1:]) plt.plot(x_on_fit, y_on_fit) # plt.xscale('') xx = max(plt.xlim()) yy = max(plt.ylim()) if N == 1: plt.text(xx * .3, yy * .5, r'$y = Ae ^{-Bx} + C$') #, transform=plt.gca()) plt.text( xx * .3, yy * .45, 'A= %5.2f +/- %5.2f' % (fit_results.x[0], fitErrors_on[0])) plt.text( xx * .3, yy * .40, 'B= %5.2f +/- %5.2f' % (fit_results.x[1], fitErrors_on[1])) # plt.text(xx * .3, yy * .35, 'C= %5.2f +/- %5.2f' % (fit_results.x[2], fitErrors_on[2])) elif N == 2: plt.text(xx * .3, yy * .6, r'$y = Ae^{-B*x} + C e^{-D*x}$') plt.text( xx * .3, yy * .55, 'A= %5.2f +/- %5.2f' % (fit_results.x[0], fitErrors_on[0])) plt.text( xx * .3, yy * .5, 'B= %5.2f +/- %5.2f' % (fit_results.x[1], fitErrors_on[1])) plt.text( xx * .3, yy * .45, 'C= %5.2f +/- %5.2f' % (fit_results.x[2], fitErrors_on[2])) plt.text( xx * .3, yy * .4, 'D= %5.2f +/- %5.2f' % (fit_results.x[3], fitErrors_on[3])) # plt.text(xx * .3, yy * .35, 'E= %5.2f +/- %5.2f' % (fit_results.x[4], fitErrors_on[2])) plt.ylabel('Events') plt.xlabel('Blink duration') plt.title('Blink On Duration') fig, ax = plt.subplots(1, 1) ax.scatter(x_hist, y_hist - fit_fxn(x_hist, *fit_results.x)) ax.set_ylabel('Residual') ax.set_xlabel('Blink duration') ax.set_title('Blink On Duration residuals') # now, this is getting hists & fits for time to next blink values plt.figure() plt.bar(bin_starts_off, vals_off, width=bin_starts_off[1] - bin_starts_off[0], alpha=.4) # plt.scatter(x_hist_off[1:], y_hist_off[1:]) plt.plot(x_off_fit, y_off_fit) xx2 = max(plt.xlim()) yy2 = max(plt.ylim()) if N == 1: plt.text(xx2 * .3, yy2 * .6, r'$y = Ae ^{-Bx} + C$') plt.text( xx2 * .3, yy2 * .55, 'A= %5.2f +/- %5.2f' % (fit_results_off.x[0], fitErrors_off[0])) plt.text( xx2 * .3, yy2 * .50, 'B= %5.2f +/- %5.2f' % (fit_results_off.x[1], fitErrors_off[1])) # plt.text(xx2 * .3, yy2 * .35, 'C= %5.2f +/- %5.2f' % (fit_results_off.x[2], fitErrors_off[2])) elif N == 2: plt.text(xx2 * .3, yy2 * .6, r'$y = Ae^{-B*x} + C e^{-D*x}$') plt.text( xx2 * .3, yy2 * .55, 'A= %5.2f +/- %5.2f' % (fit_results_off.x[0], fitErrors_off[0])) plt.text( xx2 * .3, yy2 * .5, 'B= %5.2f +/- %5.2f' % (fit_results_off.x[1], fitErrors_off[1])) plt.text( xx2 * .3, yy2 * .45, 'C= %5.2f +/- %5.2f' % (fit_results_off.x[2], fitErrors_off[2])) plt.text( xx2 * .3, yy2 * .4, 'D= %5.2f +/- %5.2f' % (fit_results_off.x[3], fitErrors_off[3])) # plt.text(xx2 * .3, yy2 * .35, 'E= %5.2f +/- %5.2f' % (fit_results_off.x[4], fitErrors_off[2])) plt.ylabel('Events') plt.xlabel('Time to next blink') plt.title('Time to next blink in the same cluster') fig, ax = plt.subplots(1, 1) ax.scatter(x_hist_off, y_hist_off - fit_fxn(x_hist_off, *fit_results_off.x)) ax.set_ylabel('Residual') ax.set_xlabel('Time to next blink') ax.set_title('Time to next blink residuals') if to_json == True: import io import time import json try: to_unicode = unicode except NameError: to_unicode = str # Generate dye kinetics structure for JSON file dk = {} dk['plog'] = [0, 0] if log_bins: dk['tlog'] = [1, 1] else: dk['tlog'] = [0, 0] dk['tmin'] = [bin_edges[0], bin_edges_off[0]] dk['tmax'] = [bin_edges[-1], bin_edges_off[-1]] dk['off'] = y_hist_off.tolist() dk['on'] = y_hist.tolist() # Write JSON file timestr = time.strftime("%Y%m%d-%H%M%S") with io.open('empirical_histograms_' + timestr + '.json', 'w', encoding='utf8') as outfile: str_ = '{"dk": ' + json.dumps(dk) + '}' outfile.write(to_unicode(str_)) params_on = fit_results.x params_off = fit_results_off.x return filt_on, filt_off, params_on, params_off, fitErrors_on, fitErrors_off, fit_eqn
#print(firstCollisionPosition) #print(collisions) fmt = '%d%%' fig = plt.figure() plt.title('COLLISION (with original data)') # warning plt.xlabel(u"SHA3 bits", fontproperties='Comic Sans MS') # warning ax1 = fig.add_subplot(111) ax1.plot(l, b, 'or-', label=u'First collision position') for i, (_x, _y) in enumerate(zip(l, b)): plt.text( _x, _y, b[i], color='black', fontsize=10, ) ax1.legend(loc=1) ax1.set_ylim([0, 100000]) ax1.set_ylabel('First collision position') #fisrt collision position ax2 = ax1.twinx() # second axel plt.bar(l, a, alpha=0.3, color='blue', label=u'Total collisions') ax2.legend(loc=2) ax2.set_ylim([0, 100000]) #y axel range ax2.set_ylabel('Total collisions') plt.legend(prop={'family': 'Comic Sans MS', 'size': 8}, loc="upper left") #ax.set_xlabel('First collision position');