Ejemplo n.º 1
0
def plot_confusion_matrix(cm,
                          classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues,
                          filename='viz\\confusion_matrix.png'):
    plt.figure()
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j,
                 i,
                 cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig(filename)
Ejemplo n.º 2
0
def plot_confusion_matrix(cm,
                          classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print("Confusion Matrix, without normalization")
    print(cm)
    #imshow displays data as an image on a 2d master
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    #returns evenly spaced values with a given inteerval
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j,
                 i,
                 format(cm[i, j], fmt),
                 horizontalalignment='center',
                 color='white' if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
Ejemplo n.º 3
0
def plotConfusionMatrix(lbllist, predlist, classes, type):
    confusionMatrix = confusion_matrix(lbllist, predlist)

    # print(confusionMatrix)

    plt.imshow(confusionMatrix, interpolation="nearest", cmap=plt.cm.Blues)
    if type == 'train':
        plt.title("Confusion matrix training")
    elif type == 'test':
        plt.title("Confusion matrix testing")
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = "d"
    thresh = confusionMatrix.max() / 2.
    for i, j in itertools.product(range(confusionMatrix.shape[0]),
                                  range(confusionMatrix.shape[1])):
        plt.text(j,
                 i,
                 format(confusionMatrix[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if confusionMatrix[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel("True label")
    plt.xlabel("Predicted label")
    # plt.show()
    if type == 'train':
        plt.savefig(LOG_PATH + 'Confusion matrix training.png')
    elif type == 'test':
        plt.savefig(LOG_PATH + 'Confusion matrix testing.png')
    plt.close()
def plot_confusion_matrix(cm, classes, title='混淆矩阵', cmap=plt.cm.Greens):
    # imshow() 表示绘制并显示二维图 有18个参数
    # 参数1 X 混淆矩阵中显示的数值 二维数组
    # 参数2 cmap 颜色 plt.cm.Blues表示蓝色 plt.cm.Reds表示红色 plt.cm.Greens表示绿色
    # 参数5 interpolation 插值法 一般有如下值
    #     nearest 最近邻插值法
    #     bilinear 双线性插值法
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    plt.imshow(cm, cmap=cmap, interpolation="nearest")
    plt.title(title)  # 标题
    plt.colorbar()  # 显示颜色的进度条
    tick_marks = np.arange(2)  # [0 1]
    plt.xticks(tick_marks, classes)  # 对x轴上分类进行标记
    plt.yticks(tick_marks, classes)  # 对y轴上分类进行标记

    thresh = np.mean(cm)
    for i in range(2):
        for j in range(2):
            plt.text(i,
                     j,
                     cm[j][i],
                     horizontalalignment='center',
                     color='white' if cm[i][j] >= thresh else 'black')

    plt.xlabel('预测值')
    plt.ylabel('真实值')
def plot_confusion_matrix(confusion_matrix,
                          class_labels,
                          normalize=False,
                          title='Confusion Matrix',
                          cmap=plt.cm.Blues):
    """ Code courtesy of Abinav Sagar: https://towardsdatascience.com/convolutional-neural-network-for-breast-cancer-classification-52f1213dcc9 """

    if normalize:
        confusion_matrix = confusion_matrix.astype(
            'float') / confusion_matrix.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(confusion_matrix)

    plt.imshow(confusion_matrix, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(class_labels))
    plt.xticks(tick_marks, class_labels, rotation=55)
    plt.yticks(tick_marks, class_labels)
    fmt = '.2f' if normalize else 'd'
    thresh = confusion_matrix.max() / 2.
    for i, j in itertools.product(range(confusion_matrix.shape[0]),
                                  range(confusion_matrix.shape[1])):
        plt.text(j,
                 i,
                 format(confusion_matrix[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if confusion_matrix[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
Ejemplo n.º 6
0
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
Ejemplo n.º 7
0
def silhouette():
    if not os.path.exists("Stardust_results"):
        print(
            "The directory structure Stardust_results doest not exist. Please run run_stardust first"
        )
        sys.exit()
    if not os.path.exists("Stardust_results/analysis"):
        os.mkdir("Stardust_results/analysis")
    output_path = "Stardust_results/analysis/"
    from sklearn.metrics import silhouette_samples, silhouette_score
    data_df = pd.read_csv(
        'Stardust_results/visualization_output/3_pass/data.csv',
        delimiter=",",
        index_col=False)
    data_df.set_index('data', inplace=True)
    silhouette_avg = silhouette_score(data_df[['x', 'y']], data_df['cluster'])
    sample_silhouette_values = silhouette_samples(data_df[['x', 'y']],
                                                  data_df['cluster'])
    print("silhouette score ", silhouette_avg)

    y_lower = 10
    import matplotlib.cm as cm
    fig = plt.figure(figsize=(4, 7))
    n_clusters = len(list(data_df['cluster'].unique()))
    for i in range(n_clusters):
        # Aggregate the silhouette scores for samples belonging to
        # cluster i, and sort them
        ith_cluster_silhouette_values = \
            sample_silhouette_values[data_df['cluster'] == i]

        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = cm.nipy_spectral(float(i) / n_clusters)
        plt.fill_betweenx(np.arange(y_lower, y_upper),
                          0,
                          ith_cluster_silhouette_values,
                          facecolor=color,
                          edgecolor=color,
                          alpha=0.7)

        # Label the silhouette plots with their cluster numbers at the middle
        plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples

    plt.title("The silhouette plot for the various clusters.")
    plt.xlabel("silhouette coefficient", fontsize=20)
    plt.ylabel("Cluster label", fontsize=20)
    plt.axvline(x=silhouette_avg, color="red", linestyle="--")

    plt.yticks([])  # Clear the yaxis labels / ticks
    plt.xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
    sns.despine(bottom=False, left=False)
    fig.savefig(output_path + "/silhouette.pdf", bbox_inches='tight', dpi=600)
    fig.savefig(output_path + "/silhouette.png", bbox_inches='tight', dpi=600)
def add_labels(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x() + rect.get_width() / 2,
                 height,
                 height,
                 ha='center',
                 va='bottom')
        rect.set_edgecolor('white')
Ejemplo n.º 9
0
            def plottrace(self, point):
                # 使用matplotlib之pyplot绘制船舶轨迹
                # point = 38
                def initial(ax):
                    ax.axis("equal")  #设置图像显示的时候XY轴比例
                    ax.set_xlabel('Horizontal Position')
                    ax.set_ylabel('Vertical Position')
                    ax.set_title('Vessel trajectory')
                    plt.grid(True)  #添加网格
                    return ax

                es_time = np.zeros([point])
                fig = plt.figure()
                ax = fig.add_subplot(1, 1, 1)
                ax = initial(ax)

                # test
                ax2 = fig.add_subplot(1, 1, 1)
                ax2 = initial(ax2)

                plt.ion()  #interactive mode on 动态绘制

                # IniObsX=0000
                # IniObsY=4000
                # IniObsAngle=135
                # IniObsSpeed=10*math.sqrt(2)   #米/秒
                # print('开始仿真')
                obsX = []
                obsX2 = []
                # obsY = [4000,]
                obsY = []
                obsY2 = []
                for t in range(point):
                    # t0 = time.time()
                    #障碍物船只轨迹
                    # obsX.append(IniObsX+IniObsSpeed*math.sin(IniObsAngle/180*math.pi)*t)
                    obsX.append(sim_res.SHIP1POS[t][0])
                    obsX2.append(sim_res.SHIP2POS[t][0])
                    # obsY.append(IniObsY+IniObsSpeed*math.cos(IniObsAngle/180*math.pi)*t)
                    obsY.append(sim_res.SHIP1POS[t][1])
                    obsY2.append(sim_res.SHIP2POS[t][1])
                    plt.cla()
                    ax = initial(ax)
                    ax.plot(obsX, obsY, '-g', marker='*')  #散点图

                    # test
                    ax2 = initial(ax2)
                    ax2.plot(obsX2, obsY2, '-r', marker='o')
                    risk_value_text = 'Risk value: ' + str(
                        sim_res.RISKVALUE[t])
                    plt.text(0, 7, risk_value_text)
                    plt.pause(0.5)
                    # es_time[t] = 1000*(time.time() - t0)
                plt.pause(0)
                # return es_time
                pass
Ejemplo n.º 10
0
def vertical_mean_line(x, **kwargs):
    plt.axvline(x.mean(), linestyle="--", color=kwargs.get("color", "r"))
    txkw = dict(size=15, color=kwargs.get("color", "r"))

    label_x_pos_adjustment = 0.08  # this needs customization based on your data
    label_y_pos_adjustment = 5  # this needs customization based on your data
    if x.mean() < 6:  # this needs customization based on your data
        tx = "mean: {:.2f}\n(std: {:.2f})".format(x.mean(), x.std())
        plt.text(x.mean() + label_x_pos_adjustment, label_y_pos_adjustment, tx,
                 **txkw)
    else:
        tx = "mean: {:.2f}\n  (std: {:.2f})".format(x.mean(), x.std())
        plt.text(x.mean() - 1.4, label_y_pos_adjustment, tx, **txkw)
Ejemplo n.º 11
0
def get_regression_report(y_true=None,
                          prediction=None,
                          show_r2_plot=True,
                          save_plot=False):
    '''
    Generates performance report for a regression problem.

    Parameters:
    ------------------
    y_true: Array, series, list.

        The truth/ground value from the train data set.
    
    prediction: Array, series, list.

        The predicted value by a trained model.

    show_r2_plot: Bool, default True.

        Show the r-squared curve.

    save_plot: Bool, default True.

        Save the plot to the current working directory.

    '''
    mae = mean_absolute_error(y_true, prediction)
    mse = mean_squared_error(y_true, prediction)
    msle = mean_squared_log_error(y_true, prediction)
    r2 = r2_score(y_true, prediction)

    print("Mean Absolute Error: ", round(mae, 5))
    print("Mean Squared Error: ", round(mse, 5))
    print("Mean Squared Log Error: ", round(msle, 5))
    print("R-squared Error:  ", round(r2, 5))
    print("*" * 100)

    if show_r2_plot:
        plt.scatter(y_true, prediction)
        plt.xlabel('Truth values')
        plt.ylabel('Predicted values')
        plt.plot(np.unique(y_true),
                 np.poly1d(np.polyfit(y_true, y_true, 1))(np.unique(y_true)))
        plt.text(0.7, 0.2, 'R-squared = %0.2f' % r2)
        plt.show()

        if save_plot:
            plt.savefig("r2_plot.png")
Ejemplo n.º 12
0
def plotMultipleNumpylist(plotDict, yLabel, xLable):
    # this function plots multiple lines using values from diffrent numpy list
    _max = []
    for key, item  in plotDict.items(): 
        plt.plot(item, linewidth = .7)
        _max.append(max(item))

    plt.ylabel(yLabel)
    plt.xlabel(xLable)
    _text_loc_y = max(_max)
    plt.axvline(120, ymin=0, ymax =100, linestyle = 'dashed', color = 'maroon')
    plt.text(120, _text_loc_y, "   GRAMs Launching", {'color': 'maroon', 'fontsize': 10})
    
    plt.legend(plotDict.keys(), loc='upper left')
    plt.show()

    return 
Ejemplo n.º 13
0
def RysujGeom(x_range, WEZLY, ELEMENTY, types):

    plt.plot(x_range[0], 0, '*')  #x_range[0]
    plt.plot(x_range[1], 0, '*')  #x_range[1]
    plt.plot(x_range, [0, 0])
    plt.plot(WEZLY, np.zeros(len(WEZLY)), '*')

    plt.text(x_range[0] - 0.15, 0, types[0])
    plt.text(x_range[1] + 0.15, 0, types[1])

    for i in range(0, len(WEZLY)):
        plt.text(WEZLY[i] - 0.03, 0.01, str(WEZLY[i]))
        plt.text(WEZLY[i] - 0.05, -0.05, str(i + 1))

    for i in range(0, len(WEZLY) - 1):
        print((WEZLY[i] - WEZLY[i + 1]) / 2)
        plt.text(WEZLY[i] / 2 + WEZLY[i + 1] / 2, 0.05, str(i + 1))

    plt.xlim([x_range[0] - 0.3, x_range[1] + 0.3])
    plt.ylim([-0.2, 0.42])
Ejemplo n.º 14
0
def plot_confusion_matrix(cm, classes,
    normalize=False, title='Confusion matrix',
    cmap=plt.cm.Blues, filename='viz\\confusion_matrix.png'):
  plt.figure()
  plt.imshow(cm, interpolation='nearest', cmap=cmap)
  plt.title(title)
  plt.colorbar()
  tick_marks = np.arange(len(classes))
  plt.xticks(tick_marks, classes, rotation=45)
  plt.yticks(tick_marks, classes)

  if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

  thresh = cm.max() / 2.
  for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
      plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

  plt.tight_layout()
  plt.ylabel('True label')
  plt.xlabel('Predicted label')
  plt.savefig(filename)
Ejemplo n.º 15
0
def plot_confusion_matrix(cm, classes, title='Confusion Matrix', cmap=plt.cm.Blues):
    #     np.seterr(divide='ignore',invalid='ignore')
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    plt.figure(figsize=(10, 10))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
        pass

    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    pass
Ejemplo n.º 16
0
    def display(self, data, candidates, fname, display):
        
        finallist=[]
        for c in candidates:
            finallist.append(c[0])
        #print finallist
        part1 = finallist[:len(finallist)/2]
        part2 = finallist[len(finallist)/2:]
        
        meandiff=int(np.sqrt(np.power(np.mean(part2),2)-np.power(np.mean(part1),2)))
        rangeA = max(part1)-min(part1)
        rangeB = max(part2)-min(part2)
        span = int((rangeA+rangeB)/2)
        dspan = int(meandiff/span)
        theta = float(meandiff/(rangeA+rangeB))
        oneortwo=""
        if dspan >3 and meandiff > 20 or meandiff>36:
            oneortwo = "Two distributions \n\n MD: %d \n Span: %d \n Dspan: %d \n theta: %d" % (meandiff, span, dspan, theta) 
        else:
            oneortwo = "One distribution \n\n MD: %d \n Span: %d \n Dspan: %d \n theta: %d" % (meandiff, span, dspan, theta)

        cans = np.array(candidates)
        plt.plot(cans[:,0],cans[:,1],'ro')
        plt.axhline(max(cans[:,1])/4, color='r')
        plt.axhline(max(cans[:,1]/2), color='r')
        plt.axhline(int(max(cans[:,1]))*0.75, color='r')
        red_patch = mpatches.Patch(color='red', label='75%, 50% and 25% \nof maximum frequency')
        plt.legend(handles=[red_patch])
        plt.ylabel('Frequency of occurence')
        plt.xlabel('separate items')
        plt.title('Frequency distribution estimation graph: %s' %(fname))
        plt.text(max(data)*1.1, max(cans[:,1])*0.62, oneortwo, fontsize = 11, color = 'r')
        plt.hist(data,range(int(min(data)),int(max(data)),1))
        ofile = fname[0:-3]+"png"
        print ("Writing outfile: %s") % (ofile)
        plt.savefig(ofile, bbox_inches='tight')
        if display == True: 
            plt.show()
        return;
Ejemplo n.º 17
0
def Plant(RTE_NBR, TOTAL_PKG_STOP_LIST, TOTAL_STOP, Current_MGR, Current_DATE):
    print("=======================================")
    print("测试绘制1")

    CM1 = str(Current_MGR)
    CD1 = str(Current_DATE)

    CM = str(Current_MGR).replace(" ", "") + "_"
    CD = str(Current_DATE).replace(" ", "").replace(":", "_").replace(
        "/", "_") + "_"

    fig, ax = plt.subplots()
    # 解决自动顺序排列
    plt.xticks(arange(len(RTE_NBR)), RTE_NBR)

    # 注意绘制顺序
    ax.bar(arange(len(RTE_NBR)), TOTAL_PKG_STOP_LIST, color="red")
    ax.bar(arange(len(RTE_NBR)), TOTAL_STOP, color="green")
    # plt.text(4, 1, str(list2[0]), ha='center', wrap=True)

    ax.set(xlabel="RTE NBR",
           title="(MGR:" + CM1 + ")" + "Fedex" + CD1 + "Working_Detail")

    # plt.text(5, 10, str(list2[0]), fontsize=18, style='oblique', ha='center',va='top',wrap=True)

    # 用于解决柱状图的间距太窄的问题
    text = ax.text(0.02, 0.90, "")  # 设置文字
    text.set_position((0.9, .9))  # 不能超过1,和上面的设置是一样的
    for x, y, z, v in zip(arange(len(RTE_NBR)), TOTAL_PKG_STOP_LIST, TOTAL_PKG,
                          TOTAL_STOP):
        plt.text(x - 0.5, y, '%d/%d' % (v, z))
    # for x,y in zip(arange(len(RTE_NBR)),)

    ax.legend(["TOTAL PKG", "TOTAL STOP"])  # 设置图例

    # plt.show()

    fig.savefig("MGR_" + CM + CD + "Working_Detail.png")
    pass
Ejemplo n.º 18
0
def make_bar_graph(data=housing):
    '''

    :param data:
    :return:
    '''
    data = data
    mp.figure('Bar', facecolor='lightgray')
    mp.title('Bar', fontsize=20)
    gs = mg.GridSpec(3, 4)
    i, j = 0, 0
    for column in housing.columns:
        # 创建子图
        mp.subplot(gs[i, j])
        # 在图形内部添加文字,设置位置,内容,对齐方式,字号,颜色,透明度
        mp.text(0.5, 0.5, str(i) + '+' +  str(j), ha='center', va='center', size=35, color='red', alpha=0.5)
        # 删除边界刻度
        mp.xticks(())
        mp.yticks(())
        # 绘制柱状图
        single_data = housing[column]
        min_data, max_data = min(single_data), max(single_data)
        mp.xlim(min(min_data, max_data))
        step = (max_data - min_data) / 10
        for x in range(min_data, max_data, step):
            sum_num = sum(x <= single_data <= x + step)
            y.append(sum_num)
        x = np.range(len(y))
        mp.bar(x, y, 0.4, color='dodgerblue', label=column, alpha=0.75)
        # 调整子图位置
        j += 1
        j = j % 4
        i = i + j // 4
            
    # 改变布局形式,改为紧凑布局
    mp.tight_layout()
    pass
Ejemplo n.º 19
0
def qqPlot(theoreticalQ, sampleQ, name):
    slope, intercept, r_value, p_value, std_err = 0  # regr(theoreticalQ, sampleQ)

    plt.figure()
    plt.scatter(theoreticalQ, sampleQ, s=0.8, label=name, c='blue')
    y = [x * slope + intercept for x in theoreticalQ]
    plt.plot(theoreticalQ, y, 'r', label='Trend line')
    plt.text(0, max(sampleQ) * 0.6, '\n\n$R^2$ = ' + str('%.6f' % r_value**2))
    if intercept > 0:
        plt.text(
            0,
            max(sampleQ) * 0.55,
            'y = ' + str('%.6f' % slope) + 'x + ' + str('%.6f' % intercept))
    else:
        plt.text(0,
                 max(sampleQ) * 0.55,
                 'y = ' + str('%.6f' % slope) + 'x ' + str('%.6f' % intercept))

    plt.xlabel('Theoretical Quantile')
    plt.ylabel('Sample Quantile')
    plt.title('QQ plot ' + name)
    plt.grid(True)
    plt.legend()
    hbars = ax3.barh(h['bins'][wlow],
                     h['area'][wlow] / N.max(h['area']),
                     left=i - 0.2,
                     color=(0.85, 0.85, 0.85, 1),
                     height=30,
                     edgecolor=(0.85, 0.85, 0.85, 1),
                     zorder=0,
                     linewidth=0)

#PLOTTING ASTERISKS BY GLACIERS WITH RECORDS SHORTER THAN 5 YEARS

ax2.plot(N.array([12, 13, 47, 48]) + 0.2, N.zeros(4) - 3, 'k*')

ax3.set_ylim(-4000, 4000)
ax3.yaxis
plt.text(51, 2600, "Elevation (m)", rotation=-90)

for i, m in enumerate(tidemedian):
    bar = ax3.plot([i - 0.2, i + 0.8], [m, m], '-k', zorder=0.4)

print fig2.axes

print fig2.axes
#ax2.set_frame_on(False)
#ax3.set_frame_on(False)

ax2.plot([-5, 60], [-0.921, -0.921], 'r-', zorder=0, alpha=0.5)
ax2.plot([-5, 60], [0., 0], 'k-', zorder=0)
ax3.set_yticks([0, 1000, 2000, 3000, 4000])
plt.show()
if myr:
Ejemplo n.º 21
0
def compute(inp_dataset, input_path, output_path, de_analysis, n_pass):

    print("Current pass ", n_pass)
    import json
    import matplotlib as plt
    import csv
    from sklearn.manifold import TSNE
    import matplotlib.pyplot as plt
    from sklearn.decomposition import PCA
    from decimal import Decimal
    import seaborn as sns
    import pandas as pd
    import networkx as nx
    from sklearn.cluster import DBSCAN
    from sklearn.cluster import KMeans
    import operator
    import numpy as np
    import random
    import sys

    #csvData=[['data','x','y','type']]
    print("Processing the input data into datafames....")
    csvData = []
    count = 0
    #filename = "G:/Thesis/Dropclust/plots/output_normalized_own_cc.csv" filename = "G:/Thesis/Dropclust/plots/PCA_GENES/output_normalized_own_cc.csv" filename =
    #"G:/Thesis/Dropclust/output_normalized_zscore_cc1.csv" filename = "C:/Users/Swagatam/IdeaProjects/openOrd/output_normalized_own_cc.csv"
    filename = input_path + "/output_normalized_own_cc.csv"
    coord_data = pd.read_csv(filename, names=['data', 'x', 'y'])
    coord_data.set_index('data', inplace=True)
    data = []
    data_outlier = []
    with open(filename, 'r') as csvfile:
        csvreader = csv.reader(csvfile)
        for row in csvreader:
            #f=0
            #row=[float(i) for i in row]
            data.append(row)
            temp_outlier = []
            temp_outlier.append(row[1])
            temp_outlier.append(row[2])
            data_outlier.append(temp_outlier)
            temp = row
            #if row[0].isnumeric():
            #    temp.append('cell')
            if len(row[0]) >= 16:
                temp.append('cell')
            else:
                temp.append('gene')
                count = count + 1
            csvData.append(temp)

    # # DB SCAN

    # In[20]:

    if n_pass != 4:
        noise = []
        print("Performing clustering....")
        db = DBSCAN(eps=180, min_samples=55).fit_predict(data_outlier)
        final_data = []
        csvData = [['data', 'x', 'y', 'type']]
        for i in range(0, len(list(db))):
            if db[i] != -1:
                final_data.append(data[i])
                csvData.append(data[i])
            if db[i] == -1:
                noise.append(data[i][0])
        data = final_data

        n_clusters = len(set(db)) - (1 if -1 in list(db) else 0)
        print("Clustering done. the number of obtained clusters: ", n_clusters)
    else:
        remove_data = []

        prev_df = pd.read_csv(
            "Stardust_results/visualization_output/3_pass/data.csv",
            delimiter=",",
            index_col=False)
        prev_df.set_index('data', inplace=True)
        clusters_info = []
        for i in range(0, len(csvData)):
            if csvData[i][3] == 'cell':
                if csvData[i][0] in (prev_df.index):
                    clusters_info.append(prev_df.loc[csvData[i][0]]['cluster'])
                else:
                    remove_data.append(csvData[i])
            else:
                f = 0
                import pickle
                with open(
                        'Stardust_results/visualization_output/3_pass/de_genes_cluster.txt',
                        'rb') as fp:
                    de_gene_cluster = pickle.load(fp)
                for rank in range(0, len(de_gene_cluster)):
                    if csvData[i][0] in de_gene_cluster[rank]:
                        f = 1
                        clusters_info.append(de_gene_cluster[rank].index(
                            csvData[i][0]))
                        break
                if f == 0:
                    remove_data.append(csvData[i])
        for r in remove_data:
            csvData.remove(r)
        temp = [['data', 'x', 'y', 'type']]
        temp.extend(csvData)
        csvData = temp

    # In[13]:

    # # OUTLIER VISUALIZATION

    # In[21]:
    if n_pass != 4:
        print("Starting outlier detection....")
        data_type = []
        c = 0
        g = 0
        for i in range(0, len(coord_data)):
            if db[i] != -1:
                data_type.append("data")
            else:
                if len(coord_data.index[i]) >= 16:
                    data_type.append("cell_outliers")
                else:
                    g = g + 1
                    data_type.append("gene_outliers")
        coord_data["data_type"] = data_type
        data_colors = ["lightblue"]
        if g > 0:
            noise_colors = ['blue', 'red']
        else:
            noise_colors = ['blue']
        coord_data["alpha"] = np.where(coord_data['data_type'] == 'data', 0.5,
                                       1.0)
        plt.figure(figsize=(6, 4.5))
        #ax = sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha']==0.5],hue="data_type",palette=sns.xkcd_palette(data_colors),sizes=(50,100),size="data_type",alpha=0.3)
        #sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha']==1.0],hue="data_type",palette=sns.xkcd_palette(noise_colors),sizes=(50,100),size="data_type",marker="^",alpha=1.0,ax=ax)
        marker = {"gene_outliers": "^", "cell_outliers": "^"}
        ax = sns.scatterplot(x="x",
                             y="y",
                             data=coord_data[coord_data['alpha'] == 0.5],
                             hue="data_type",
                             palette=sns.xkcd_palette(data_colors),
                             sizes=(50, 100),
                             size="data_type",
                             linewidth=0.0,
                             s=10,
                             alpha=0.3)
        sns.scatterplot(x="x",
                        y="y",
                        data=coord_data[coord_data['alpha'] == 1.0],
                        hue="data_type",
                        palette=sns.xkcd_palette(noise_colors),
                        sizes=(100, 50),
                        size="data_type",
                        style="data_type",
                        markers=marker,
                        alpha=1.0,
                        linewidth=0.0,
                        s=10,
                        legend='brief',
                        ax=ax)
        #plt.legend(title=='')
        ax.legend(bbox_to_anchor=(1.1, 1.05), frameon=False)
        sns.despine(bottom=False, left=False)
        plt.xlabel("dim1")
        plt.ylabel("dim2")
        plt.savefig(output_path + 'outliers_visualization.png',
                    bbox_inches='tight')
        print("Outliers removed from the dataset....")

    # # POST-HOC CLUSTER ASSIGNMENT

    # In[23]:

    print("Starting post hoc clustering....")
    neighbor_df = pd.read_hdf(
        'Stardust_results/build_output/1_pass/neighbor.h5', 'df')
    if 'Unnamed: 0' in list(neighbor_df.columns):
        neighbor_df.set_index('Unnamed: 0', inplace=True)
    p = 0
    col = list(neighbor_df.columns)
    index = list(neighbor_df.index)
    cell_dict = dict()
    column_dict = dict()
    for i in range(len(col)):
        column_dict[i] = col[i]
    for i in range(len(list(neighbor_df.index))):
        row = neighbor_df.iloc[i]
        col_ind = list(row.to_numpy().nonzero())[0]
        for ind in col_ind:
            if index[i] in cell_dict.keys():
                cell_dict[index[i]].append(column_dict[ind])
            else:
                temp = []
                temp.append(column_dict[ind])
                cell_dict[index[i]] = temp
    cluster_assign = []
    for key_cell in cell_dict.keys():
        clust = dict()
        cells = cell_dict[key_cell]
        for cell in cells:
            if n_pass == 4:
                if cell in list(prev_df.index):
                    cluster = prev_df.loc[cell]['cluster']
                else:
                    cluster = -1
            else:
                cluster = db[list(coord_data.index).index(cell)]
            if cluster not in clust.keys():
                clust[cluster] = 1
            else:
                clust[cluster] = clust[cluster] + 1
        max_cluster = max(clust.items(), key=operator.itemgetter(1))[0]
        if max_cluster == -1:
            continue
        cluster_assign.append(max_cluster)
        x_total = 0
        y_total = 0
        count = 0
        for cell in cells:
            if (n_pass != 4
                    and db[list(coord_data.index).index(cell)] == max_cluster
                ) or (n_pass == 4 and cell in list(prev_df.index)
                      and prev_df.loc[cell]['cluster'] == max_cluster):
                count = count + 1
                x_total = x_total + coord_data.loc[cell]['x']
                y_total = y_total + coord_data.loc[cell]['y']
        temp = []
        temp.append(key_cell)
        temp.append(x_total / count)
        temp.append(y_total / count)
        temp.append('cell')
        p = p + 1
        csvData.append(temp)
    print("Post hoc clustering done....")

    # In[24]:

    with open(output_path + 'data.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(csvData)
    csvFile.close()
    data_df = pd.read_csv(output_path + "data.csv",
                          delimiter=",",
                          index_col=False)
    if n_pass != 4:
        clusters_info = [x for x in db if x != -1]
        clusters_info = clusters_info + cluster_assign
    else:
        clusters_info = clusters_info + cluster_assign
        data_df['cluster'] = clusters_info
    data_df.to_csv(output_path + 'data.csv')
    n_clusters = len(list(set(clusters_info)))
    print("cluster saved ....")

    n_clusters = len(data_df['cluster'].unique())
    colors = random.sample(seaborn_colors, n_clusters)

    colors = random.sample(seaborn_colors, n_clusters)
    plt.figure(figsize=(5, 5))
    #cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True)
    ax = sns.scatterplot(x="x",
                         y="y",
                         data=data_df,
                         hue="cluster",
                         palette=sns.xkcd_palette(colors),
                         linewidth=0.0,
                         s=2)
    ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
    for cl in range(n_clusters):
        plt.annotate(cl,
                     data_df.loc[data_df['cluster'] == cl, ['x', 'y']].mean(),
                     horizontalalignment='center',
                     verticalalignment='center',
                     size=10,
                     weight='bold',
                     color="black")
    sns.despine(bottom=False, left=False)
    plt.xlabel("sd1", fontsize=20)
    plt.ylabel("sd2", fontsize=20)
    plt.setp(ax.spines.values(), linewidth=2)
    plt.yticks([], linewidth=20)
    plt.xticks([])
    plt.savefig(output_path + "cluster_visualization.png",
                bbox_inches='tight',
                dpi=600)
    plt.savefig(output_path + "cluster_visualization.pdf",
                bbox_inches='tight',
                dpi=600)

    if n_pass == 3:
        from sklearn.datasets import make_blobs
        from sklearn.metrics import silhouette_samples, silhouette_score
        silhouette_avg = silhouette_score(data_df[['x', 'y']],
                                          data_df['cluster'])
        sample_silhouette_values = silhouette_samples(data_df[['x', 'y']],
                                                      data_df['cluster'])
        print(silhouette_avg)

        y_lower = 10
        import matplotlib.cm as cm
        #fig, (ax1, ax2) = plt.subplots(1, 2)
        fig = plt.figure(figsize=(4, 7))
        #fig.set_size_inches(18, 7)
        for i in range(n_clusters):
            # Aggregate the silhouette scores for samples belonging to
            # cluster i, and sort them
            ith_cluster_silhouette_values = \
                sample_silhouette_values[data_df['cluster'] == i]

            ith_cluster_silhouette_values.sort()

            size_cluster_i = ith_cluster_silhouette_values.shape[0]
            y_upper = y_lower + size_cluster_i

            color = cm.nipy_spectral(float(i) / n_clusters)
            plt.fill_betweenx(np.arange(y_lower, y_upper),
                              0,
                              ith_cluster_silhouette_values,
                              facecolor=color,
                              edgecolor=color,
                              alpha=0.7)

            # Label the silhouette plots with their cluster numbers at the middle
            plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

            # Compute the new y_lower for next plot
            y_lower = y_upper + 10  # 10 for the 0 samples

        plt.title("The silhouette plot for the various clusters.")
        plt.xlabel("silhouette coefficient", fontsize=20)
        plt.ylabel("Cluster label", fontsize=20)
        plt.axvline(x=silhouette_avg, color="red", linestyle="--")

        plt.yticks([])  # Clear the yaxis labels / ticks
        plt.xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
        sns.despine(bottom=False, left=False)
        fig.savefig(output_path + "/silhouette.pdf",
                    bbox_inches='tight',
                    dpi=600)
        fig.savefig(output_path + "/silhouette.png",
                    bbox_inches='tight',
                    dpi=600)

    #  #  MARKER FINDING
    data_df = pd.read_csv(output_path + "data.csv",
                          delimiter=",",
                          index_col=False)
    data_df.set_index('data', inplace=True)
    import pickle
    if n_pass == 2:
        path = 'Stardust_results/visualization_output/1_pass'
    if n_pass == 3:
        path = 'Stardust_results/visualization_output/2_pass'
    if n_pass == 4:
        path = 'Stardust_results/visualization_output/3_pass'
    if n_pass != 1:
        with open(path + '/de_genes_cluster.txt', 'rb') as fp:
            de_gene_cluster = pickle.load(fp)

        marker = []
        disp_marker = []
        for cl in range(n_clusters):
            cls = data_df[data_df['cluster'] == cl]
            gene_df = cls[cls['type'] == 'gene']
            f = 0
            for rank in range(len(de_gene_cluster)):
                if f == 1:
                    break
                for gene in de_gene_cluster[rank]:
                    if gene in list(gene_df.index):
                        disp_marker.append(gene)
                        #print(cl)
                        f = 1
                        break
        marker = disp_marker

        #sys.exit(0)

    # # CELL GENE MARKER

    # In[28]:
    from sklearn.neighbors import KNeighborsRegressor
    prev_pass_data = pd.read_csv(
        'Stardust_results/visualization_output/3_pass/data_openOrd.csv')
    prev_pass_data.set_index('data', inplace=True)
    data_df = pd.read_csv(output_path + '/data.csv')
    data_df.set_index('data', inplace=True)
    gene_df = data_df[data_df['type'] == 'gene']
    x_gene_fit = list(gene_df['x'])
    y_gene_fit = list(gene_df['y'])
    cells = list(prev_pass_data.index)
    cell_list = []
    x_coord = []
    y_coord = []

    for i in range(len(cells)):
        if cells[i] in list(data_df.index):
            cell_list.append(cells[i])
            x_coord.append(prev_pass_data.iloc[i]['x'])
            y_coord.append(prev_pass_data.iloc[i]['y'])

    prev_df = pd.DataFrame(index=cell_list)
    prev_df['x'] = x_coord
    prev_df['y'] = y_coord

    import numpy as np
    from sklearn.linear_model import Lasso
    from sklearn.neighbors import KNeighborsRegressor
    import pickle
    cells = []
    genes = []
    gene_coord_x = []
    gene_coord_y = []

    for i in range(n_clusters):
        clust_data = data_df[data_df['cluster'] == i]
        clust_cells = clust_data[clust_data['type'] == 'cell']
        clust_genes = clust_data[clust_data['type'] == 'gene']
        cells.extend(list(clust_cells.index))
        genes.extend(list(clust_genes.index))
        if len(list(clust_genes.index)) == 0:
            continue
        model1 = KNeighborsRegressor(n_neighbors=4)

        model2 = KNeighborsRegressor(n_neighbors=4)
        temp = []
        for cell in list(clust_cells.index):
            if cell in list(prev_df.index):
                temp.append(cell)
        clust_cells = clust_cells.loc[temp]
        model1.fit(
            np.array(list(clust_cells['x'])).reshape((-1, 1)),
            np.array(list(prev_df.loc[list(clust_cells.index)]['x'])).reshape(
                (-1, 1)))

        filename = output_path + '/sd_x_KNN_model.sav'
        pickle.dump(model1, open(filename, 'wb'))
        #model1 = pickle.load(open(filename, 'rb'))
        x_gene_pred = model1.predict(
            np.array(list(clust_genes['x'])).reshape((-1, 1)))
        gene_coord_x.extend(x_gene_pred)
        model2.fit(
            np.array(list(clust_cells['y'])).reshape((-1, 1)),
            np.array(list(prev_df.loc[list(clust_cells.index)]['y'])).reshape(
                (-1, 1)))

        filename = output_path + '/sd_y_KNN_model.sav'
        pickle.dump(model2, open(filename, 'wb'))
        #model2 = pickle.load(open(filename, 'rb'))
        y_gene_pred = model2.predict(
            np.array(list(clust_genes['y'])).reshape((-1, 1)))
        gene_coord_y.extend(y_gene_pred)

    with open(output_path + "/sd_gene_coord_x.txt", 'wb') as fp:
        pickle.dump(gene_coord_x, fp)
    with open(output_path + "/sd_gene_coord_y.txt", 'wb') as fp:
        pickle.dump(gene_coord_y, fp)

    #with open (output_path+"/sd_gene_coord_x.txt", 'rb') as fp:
    #        gene_coord_x = pickle.load(fp)
    #with open (output_path+"/sd_gene_coord_y.txt", 'rb') as fp:
    #        gene_coord_y = pickle.load(fp)

    import matplotlib.pyplot as plt, mpld3
    from scipy.spatial import ConvexHull, convex_hull_plot_2d
    prev_pass_data = pd.read_csv(
        'Stardust_results/visualization_output/3_pass/data_openOrd.csv')
    prev_pass_data["alpha"] = np.where(prev_pass_data['type'] == 'gene', 1.0,
                                       0.5)
    color_gene = ["light blue"]
    color_cell = ["red"]
    #fig,ax1 = plt.subplots()
    plt.figure(figsize=(6, 6))
    ax = sns.scatterplot(x="x",
                         y="y",
                         data=prev_pass_data[prev_pass_data['alpha'] == 0.5],
                         hue="type",
                         palette=sns.xkcd_palette(color_gene),
                         sizes=(10, 5),
                         size="type",
                         alpha=0.3,
                         s=10)
    #sns.scatterplot(x="x", y="y", data=data_df[data_df['alpha']==1.0],hue="type",palette=sns.xkcd_palette(color_cell),sizes=(20,5),size="type",marker="^",alpha=1.0,ax=ax,s=10)
    sns.scatterplot(x=gene_coord_x,
                    y=gene_coord_y,
                    palette=sns.xkcd_palette(color_cell),
                    sizes=(20, 5),
                    marker="^",
                    alpha=1.0,
                    ax=ax,
                    s=10)
    for c in range(n_clusters):
        p = data_df[data_df["cluster"] == c]
        p = p[['x', 'y']]
        points = p.values
        hull = ConvexHull(points)
        #for simplex in hull.simplices:
    #    sns.lineplot(points[simplex, 0], points[simplex, 1])
    x_list = []
    y_list = []
    if n_pass != 1:
        for m in marker:
            #x_list.append(data_df.loc[m]['x'])
            x_list.append(gene_coord_x[genes.index(m)])
            #y_list.append(data_df.loc[m]['y'])
            y_list.append(gene_coord_y[genes.index(m)])
        for label, x, y in zip(marker, x_list, y_list):
            plt.annotate(
                label,
                xy=(x, y),
                xytext=(-20, 20),
                textcoords='offset points',
                ha='right',
                va='bottom',
                #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
                arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0'))
    ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
    sns.despine(bottom=False, left=False)
    plt.xlabel("sd1", fontsize=20)
    plt.ylabel("sd2", fontsize=20)
    plt.setp(ax.spines.values(), linewidth=2)
    plt.yticks([], linewidth=20)
    plt.xticks([])
    plt.savefig(output_path + "sd_embedding.png", bbox_inches='tight', dpi=600)
    plt.savefig(output_path + "sd_embedding.pdf", bbox_inches='tight', dpi=600)

    import matplotlib.pyplot as plt, mpld3
    from scipy.spatial import ConvexHull, convex_hull_plot_2d
    #data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5)
    prev_pass_data.set_index('data', inplace=True)
    temp_data = prev_pass_data[prev_pass_data['type'] == 'cell']
    temp_genes = data_df[data_df['type'] == 'gene']
    for pos in range(0, len(genes)):
        temp_genes.at[genes[pos], 'x'] = gene_coord_x[pos]
        temp_genes.at[genes[pos], 'y'] = gene_coord_y[pos]
    temp_data.append(temp_genes)
    color_gene = ["light blue"]
    color_cell = ["red"]
    n_clusters = len(data_df['cluster'].unique())
    colors = random.sample(seaborn_colors, n_clusters)
    #fig,ax1 = plt.subplots()
    plt.figure(figsize=(6, 6))
    ax = sns.scatterplot(x="x",
                         y="y",
                         data=temp_data,
                         hue="cluster",
                         palette=sns.xkcd_palette(colors),
                         s=2,
                         linewidth=0.0)
    #sns.scatterplot(x="x", y="y", data=data_df[data_df['alpha']==1.0],hue="type",palette=sns.xkcd_palette(color_cell),sizes=(20,5),size="type",marker="^",alpha=1.0,ax=ax,s=10)
    #sns.scatterplot(x=gene_coord_x, y=gene_coord_y,palette=sns.xkcd_palette(color_cell),sizes=(20,5),marker="^",alpha=1.0,ax=ax,s=20)
    for c in range(n_clusters):
        p = data_df[data_df["cluster"] == c]
        p = p[['x', 'y']]
        points = p.values
        hull = ConvexHull(points)
        #for simplex in hull.simplices:
    #    sns.lineplot(points[simplex, 0], points[simplex, 1])
    x_list = []
    y_list = []
    d1 = prev_pass_data[prev_pass_data['alpha'] == 0.5]
    for cl in range(n_clusters):
        plt.annotate(cl,
                     d1.loc[d1['cluster'] == cl, ['x', 'y']].mean(),
                     horizontalalignment='center',
                     verticalalignment='center',
                     size=10,
                     weight='bold',
                     color="black")
    if n_pass != 1:
        for m in marker:
            #x_list.append(data_df.loc[m]['x'])
            x_list.append(gene_coord_x[genes.index(m)])
            #y_list.append(data_df.loc[m]['y'])
            y_list.append(gene_coord_y[genes.index(m)])
        for label, x, y in zip(marker, x_list, y_list):
            plt.annotate(
                label,
                xy=(x, y),
                xytext=(-20, 20),
                textcoords='offset points',
                ha='right',
                va='bottom',
                #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
                arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0'))
    ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
    sns.despine(bottom=False, left=False)
    plt.xlabel("sd1", fontsize=20)
    plt.ylabel("sd2", fontsize=20)
    plt.setp(ax.spines.values(), linewidth=2)
    plt.yticks([], linewidth=20)
    plt.xticks([])
    plt.savefig(output_path + "sd_color_embedding.png",
                bbox_inches='tight',
                dpi=600)
    plt.savefig(output_path + "sd_color_embedding.pdf",
                bbox_inches='tight',
                dpi=600)
    #sys.exit(0)
    # # UMAP CELL GENE MARKER # #

    if n_pass == 4:

        import pickle
        with open('Stardust_results/build_output/1_pass/umap_coord.txt',
                  'rb') as fp:
            umap_coord = pickle.load(fp)
        louvain_df = pd.read_csv(
            'Stardust_results/build_output/1_pass/louvain_cluster_df.csv')
        louvain_df.set_index('Unnamed: 0', inplace=True)
        #data_df = pd.read_csv('F:/output/output_visualize_melanoma_pca/3rd_pass/data.csv')
        data_df = pd.read_csv(output_path + '/data.csv')
        data_df.set_index('data', inplace=True)
        gene_df = data_df[data_df['type'] == 'gene']
        x_gene_fit = list(gene_df['x'])
        y_gene_fit = list(gene_df['y'])
        cells = list(louvain_df.index)
        cell_list = []
        x_coord = []
        y_coord = []
        for i in range(len(cells)):
            if cells[i] in list(data_df.index):
                cell_list.append(cells[i])
                x_coord.append(umap_coord[i][0])
                y_coord.append(umap_coord[i][1])
        umap_df = pd.DataFrame(index=cell_list)
        umap_df['x'] = x_coord
        umap_df['y'] = y_coord

        import numpy as np
        from sklearn.linear_model import Lasso
        from sklearn.neighbors import KNeighborsRegressor
        import pickle
        cells = []
        genes = []
        gene_coord_x = []
        gene_coord_y = []
        for i in range(n_clusters):
            clust_data = data_df[data_df['cluster'] == i]
            clust_cells = clust_data[clust_data['type'] == 'cell']
            clust_genes = clust_data[clust_data['type'] == 'gene']
            cells.extend(list(clust_cells.index))
            genes.extend(list(clust_genes.index))
            if len(list(clust_genes.index)) == 0:
                continue
            model1 = KNeighborsRegressor(n_neighbors=5)

            model2 = KNeighborsRegressor(n_neighbors=5)

            model1.fit(
                np.array(list(clust_cells['x'])).reshape((-1, 1)),
                np.array(list(umap_df.loc[list(
                    clust_cells.index)]['x'])).reshape((-1, 1)))

            filename = output_path + '/scanpy_x_KNN_model.sav'
            pickle.dump(model1, open(filename, 'wb'))
            #model1 = pickle.load(open(filename, 'rb'))
            x_gene_pred = model1.predict(
                np.array(list(clust_genes['x'])).reshape((-1, 1)))
            gene_coord_x.extend(x_gene_pred)
            model2.fit(
                np.array(list(clust_cells['y'])).reshape((-1, 1)),
                np.array(list(umap_df.loc[list(
                    clust_cells.index)]['y'])).reshape((-1, 1)))

            filename = output_path + '/scanpy_y_KNN_model.sav'
            pickle.dump(model2, open(filename, 'wb'))
            #model2 = pickle.load(open(filename, 'rb'))
            y_gene_pred = model2.predict(
                np.array(list(clust_genes['y'])).reshape((-1, 1)))
            gene_coord_y.extend(y_gene_pred)

        with open(output_path + "/scanpy_gene_coord_x.txt", 'wb') as fp:
            pickle.dump(gene_coord_x, fp)
        with open(output_path + "/scanpy_gene_coord_y.txt", 'wb') as fp:
            pickle.dump(gene_coord_y, fp)

        #with open (output_path+"/scanpy_gene_coord_x.txt", 'rb') as fp:
        #    gene_coord_x = pickle.load(fp)
        #with open (output_path+"/scanpy_gene_coord_y.txt", 'rb') as fp:
        #    gene_coord_y = pickle.load(fp)

        #n_clusters = len(list(data_df['cluster'].unique()))

        u_map_x = []
        u_map_y = []
        for ind in list(data_df.index):
            if ind in list(louvain_df.index):

                u_map_x.append(umap_coord[list(
                    louvain_df.index).index(ind)][0])
                u_map_y.append(umap_coord[list(
                    louvain_df.index).index(ind)][1])
            else:
                u_map_x.append(gene_coord_x[genes.index(ind)])
                u_map_y.append(gene_coord_y[genes.index(ind)])
        data_df['umap_x'] = u_map_x
        data_df['umap_y'] = u_map_y

        #        colors = random.sample(seaborn_colors,n_clusters)
        #colors = colors3
        plt.figure(figsize=(5, 5))
        #cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True)
        ax = sns.scatterplot(x="umap_x",
                             y="umap_y",
                             data=data_df,
                             hue="cluster",
                             palette=sns.xkcd_palette(colors),
                             linewidth=0.0,
                             s=2)
        ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
        for cl in range(n_clusters):
            plt.annotate(cl,
                         data_df.loc[data_df['cluster'] == cl,
                                     ['umap_x', 'umap_y']].mean(),
                         horizontalalignment='center',
                         verticalalignment='center',
                         size=10,
                         weight='bold',
                         color="black")
        sns.despine(bottom=False, left=False)
        plt.xlabel("umap1", fontsize=20)
        plt.ylabel("umap2", fontsize=20)
        plt.setp(ax.spines.values(), linewidth=2)
        plt.yticks([], linewidth=20)
        plt.xticks([])
        plt.savefig(output_path + 'umap_clustering.png',
                    bbox_inches='tight',
                    dpi=600)
        plt.savefig(output_path + 'umap_clustering.pdf',
                    bbox_inches='tight',
                    dpi=600)

        import matplotlib.pyplot as plt, mpld3
        from scipy.spatial import ConvexHull, convex_hull_plot_2d
        data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5)
        color_gene = ["light grey"]
        color_cell = ["red"]
        #fig,ax1 = plt.subplots()
        plt.figure(figsize=(6, 6))

        ax = sns.scatterplot(x="umap_x",
                             y="umap_y",
                             data=data_df[data_df['alpha'] == 0.5],
                             hue="type",
                             palette=sns.xkcd_palette(color_gene),
                             sizes=(10, 5),
                             size="type",
                             alpha=0.3,
                             s=10)
        sns.scatterplot(x="umap_x",
                        y="umap_y",
                        data=data_df[data_df['alpha'] == 1.0],
                        hue="type",
                        palette=sns.xkcd_palette(color_cell),
                        sizes=(20, 5),
                        size="type",
                        marker="^",
                        alpha=1.0,
                        ax=ax,
                        s=10)
        for c in range(n_clusters):
            p = data_df[data_df["cluster"] == c]
            p = p[['umap_x', 'umap_y']]
            points = p.values
            hull = ConvexHull(points)
            #for simplex in hull.simplices:
            #    sns.lineplot(points[simplex, 0], points[simplex, 1])
        x_list = []
        y_list = []
        for m in marker:
            x_list.append(data_df.loc[m]['umap_x'])
            #x_list.append(gene_coord_x[genes.index(m)])
            y_list.append(data_df.loc[m]['umap_y'])
            #y_list.append(gene_coord_y[genes.index(m)])
        for cl in range(n_clusters):
            plt.annotate(cl,
                         data_df.loc[data_df['cluster'] == cl,
                                     ['umap_x', 'umap_y']].mean(),
                         horizontalalignment='center',
                         verticalalignment='center',
                         size=10,
                         weight='bold',
                         color="black")
        for label, x, y in zip(marker, x_list, y_list):
            plt.annotate(
                label,
                xy=(x, y),
                xytext=(-20, 20),
                textcoords='offset points',
                ha='right',
                va='bottom',
                #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
                arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0'))
        ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
        sns.despine(bottom=False, left=False)
        plt.xlabel("umap1", fontsize=20)
        plt.ylabel("umap2", fontsize=20)
        plt.setp(ax.spines.values(), linewidth=2)
        plt.yticks([], linewidth=20)
        plt.xticks([])
        plt.savefig(output_path + 'umap_embedding.png',
                    bbox_inches='tight',
                    dpi=600)
        plt.savefig(output_path + 'umap_embedding.pdf',
                    bbox_inches='tight',
                    dpi=600)

        import matplotlib.pyplot as plt, mpld3
        from scipy.spatial import ConvexHull, convex_hull_plot_2d
        data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5)
        color_gene = ["light grey"]
        color_cell = ["red"]
        #fig,ax1 = plt.subplots()
        plt.figure(figsize=(6, 6))
        #       colors = color
        ax = sns.scatterplot(x="umap_x",
                             y="umap_y",
                             data=data_df[data_df['alpha'] == 0.5],
                             hue="cluster",
                             linewidth=0.0,
                             sizes=(2, 5),
                             size="type",
                             palette=sns.xkcd_palette(colors),
                             s=2)
        sns.scatterplot(x="umap_x",
                        y="umap_y",
                        data=data_df[data_df['alpha'] == 1.0],
                        hue="type",
                        palette=sns.xkcd_palette(color_cell),
                        linewidth=0.1,
                        marker="^",
                        ax=ax,
                        alpha=1.0,
                        s=10)
        for c in range(n_clusters):
            p = data_df[data_df["cluster"] == c]
            p = p[['umap_x', 'umap_y']]
            points = p.values
            hull = ConvexHull(points)
            #for simplex in hull.simplices:
            #    sns.lineplot(points[simplex, 0], points[simplex, 1])
        x_list = []
        y_list = []
        for m in marker:
            x_list.append(data_df.loc[m]['umap_x'])
            y_list.append(data_df.loc[m]['umap_y'])
        for cl in range(n_clusters):
            plt.annotate(cl,
                         data_df.loc[data_df['cluster'] == cl,
                                     ['umap_x', 'umap_y']].mean(),
                         horizontalalignment='center',
                         verticalalignment='center',
                         size=10,
                         weight='bold',
                         color="black")
        for label, x, y in zip(marker, x_list, y_list):
            plt.annotate(
                label,
                xy=(x, y),
                xytext=(-20, 20),
                textcoords='offset points',
                ha='right',
                va='bottom',
                #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
                arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0'))
        ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
        sns.despine(bottom=False, left=False)
        plt.xlabel("umap1", fontsize=20)
        plt.ylabel("umap2", fontsize=20)
        plt.setp(ax.spines.values(), linewidth=2)
        plt.yticks([], linewidth=20)
        plt.xticks([])
        plt.savefig(output_path + 'umap_color_embedding.png',
                    bbox_inches='tight',
                    dpi=600)
        plt.savefig(output_path + 'umap_color_embedding.pdf',
                    bbox_inches='tight',
                    dpi=600)
Ejemplo n.º 22
0
    "Decision Tree": decTreeScore,
    "Random Forest": rfScore,
    "Naive Bayes": nbScore
}

methods = [
    "Logistic Regression", "SVM", "KNN", "Naive Bayes", "Decision Tree",
    "Random Forest"
]
accuracy = [logRegScore, svmScore, knnScore, nbScore, decTreeScore, rfScore]

sns.set()
plt.figure(figsize=(14, 5))
plt.ylabel("Başarı %")
plt.xlabel("Algoritmalar")
sns.barplot(x=methods, y=accuracy, palette="deep")

for line in range(len(methods)):
    plt.text(
        line - 0.20,  # x
        0.85,  # y
        "{:.3f}%".format(accuracy[line] * 100),
        horizontalalignment='left',
        size='large',
        color="black",
    )

plt.savefig('karşılaştır.png', transparent=True)

plt.show()
plt.figure(figsize=(8, 6))
plt.pie(values,
        labels=pollutants,
        explode=explode,
        autopct='%1.1f%%',
        shadow=True)

plt.title('Air pollutants and their probable amount in atmosphere [India]')

plt.axis('equal')
plt.show()

# # showing INDIA AQI on world map using cartopy

# In[82]:

import cartopy.crs as ccrs

# In[83]:

geo = data['city']['geo']

fig = plt.figure(figsize=(12, 10))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.stock_img()

plt.scatter(geo[1], geo[0], color='blue')
plt.text(geo[1] + 3, geo[0] - 2, f'{name} AQI \n    {aqi}', color='red')

plt.show()
Ejemplo n.º 24
0
tf_x = tf.placeholder(tf.float32, x.shape)  # input x
tf_y = tf.placeholder(tf.float32, y.shape)  # input y

# neural network layers
l1 = tf.layers.dense(tf_x, 10, tf.nn.relu)
output = tf.layers.dense(l1, 1)

loss = tf.losses.mean_squared_error(tf_y,output)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.5)
train_op = optimizer.minimize(loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())  # initializer var in graph

plt.ion()

for step in range(100):
  # train and net output
  _, l, pred = sess.run([train_op, loss, output], {tf_x:x, tf_y:y})
  if step % 5 ==0
  #plot and show learning process
  print(l)
  plt.cla()
  plt.scatter(x, y)
  plt.plot(x, pred, 'r-', lw=5)
  plt.text(0.5, 0, 'Loss=%.4f' %l, fontdict={'size':20, 'color': 'red'})
  plt.pause(0.1)
  
plt.ioff()
plt.show()
Ejemplo n.º 25
0
def g_histfitting(colourFilter, metadata, cluster_ids, blink_ids,
                  N_bins):  #geometric and negative binomial fitting
    from PYME.IO import tabular
    # N_bins =
    # frame_duration = metadata.getEntry('Camera.IntegrationTime')
    # frame_duration = metadata.getEntry('Camera.CycleTime')
    """
    Important to note: blinks per cluster does nto exist as callable value in pipeline, have to create in this fxn call
    data from pipeline needed: dbscabcluster, blink id?
    """
    # I = np.argsort(colourFilter[cluster_ids])
    # print(pl['dbscanClustered'][I])
    # bpc = np.zeros_like(np.unique(colourFilter[cluster_ids]))# blinks per cluster
    # cid = np.arange(1, max(colourFilter[cluster_ids])+1, 1)
    # print('cid vec', cid)
    # print('just before loop', len(np.unique(colourFilter[cluster_ids])))
    # for i in range(1, len(np.unique(colourFilter[cluster_ids])) + 1):
    #     nblinks = len(colourFilter[blink_ids][I][colourFilter[cluster_ids] == i])
    #     bpc[i-1] = nblinks
    # print(i, nblinks)

    _, bpc_2 = np.unique(colourFilter[cluster_ids], return_counts=True)
    print('any points where bps = 0?', np.where(bpc_2 == 0))

    plt.hist(bpc_2, bins=20)
    plt.figure()
    # plt.hist(bpc, bins=20)
    # binning = np.linspace(1, np.max(bpc_2), np.max(bpc_2))
    vals, bin_edges = np.histogram(bpc_2, bins=np.max(bpc_2) / 5, density=True)
    print('hist stuff', vals, bin_edges)
    # x_data = on_times
    y_hist = vals
    bin_starts = bin_edges[:-1]
    bin_ends = bin_edges[1:]
    x_hist = (bin_starts + bin_ends) / 2
    params_on = np.array([.1])
    geofit = FitModel_N(grff, params_on, x_hist, y_hist)

    # print('p value from geo fit', geofit.x)
    # plt.figure()
    # plt.plot(np.linspace(min(x_hist), max(x_hist), 100), gfm(np.linspace(min(x_hist), max(x_hist), 100), geofit.x))

    # nb_params = np.array([2.0, geofit.x[0]])
    # print('nb_params: ', nb_params)
    # nbfit = FitModel_N(nbrff, nb_params, x_hist, y_hist)
    # N = nbfit.x[0]
    # p = nbfit.x[1]
    x_fit = np.linspace(min(bin_edges), max(bin_edges), max(bin_edges) * 10)
    y_fit = gfm(x_fit, geofit.x)
    #
    hist_datasource = np.rec.fromarrays((y_hist, x_hist),
                                        dtype=[('y_hist', '<f4'),
                                               ('x_hist', '<f4')])
    #
    hist_data = tabular.RecArraySource(hist_datasource.view(np.recarray))
    # filt_off = tabular.recArrayInput(off_hist_datasource.view(np.recarray))
    """
    have fit for both N and p simultaneously, also want to make one that fits based on p as found in geo fit
    maybe make a button to determine which one?
    """
    geo_cov = np.linalg.inv(np.matmul(geofit.jac.T, geofit.jac)) * np.mean(
        (geofit.fun * geofit.fun).sum())
    # nb_cov = np.linalg.inv(np.matmul(nbfit.jac.T, nbfit.jac)) * np.mean(
    #     (nbfit.fun * nbfit.fun).sum())
    fitErrors_geo = np.sqrt(np.diag(geo_cov))
    # fitErrors_nb = np.sqrt(np.diag(nb_cov))

    # print('bpc', len(bpc), max(bpc), min(bpc), len(np.unique(bpc)), bpc)
    if USE_GUI:
        plt.bar(bin_starts,
                vals,
                width=bin_starts[1] - bin_starts[0],
                alpha=.4)
        plt.plot(x_fit, y_fit)
        plt.xlim([0, 200])
        xx3 = max(plt.xlim())
        yy3 = max(plt.ylim())
        cv = min(plt.xlim())
        # r'$y = Ae ^{-Bx} + C$'
        r'$y = p(1-p)^{x}$'
        plt.text(((xx3 - cv) * .3) + cv, yy3 * .7, r'$y = p(1-p)^{x}$')
        plt.text(((xx3 - cv) * .3) + cv, yy3 * .6,
                 'p= %5.2f +/- %5.2f' % (geofit.x, fitErrors_geo))

        # plt.text(xx3 * .3, yy3 * .45, 'p= %5.2f' % (geofit.x))
        plt.title(r'$mE0s3.2-CAAX$')
        plt.xlabel('Number of blinks per molecule')
        plt.ylabel('Probability')

        # negative binomial fitting
        # plt.figure()
        # plt.bar(bin_starts, vals, width=bin_starts[1] - bin_starts[0], alpha=.4)
        # plt.plot(x_fit, nbmf(x_fit, N, p))
        # xx4 = max(plt.xlim())
        # yy4 = max(plt.ylim())
        # cv2 = min(plt.xlim())
        # plt.text(((xx4-cv2)*.1) + cv2, yy4 * .9, 'Number of molecules N = %5.2f, off probability = %5.2f' % (N, p))
        # plt.title('Negative Binomial fit for both N and p')
        # plt.text(xx2 * .3, yy2 * .45, )
    """
    hist_data = hist_data
    params_geo = geofit.x
    params_nb = nbfit.x
    fitErrors_geo =
    fitErrors_nb =
    fit_eqn_geo =
    fit_eqn_nb =
    """
    return hist_data, geofit.x, fitErrors_geo,  #, fit_eqn_geo, fit_eqn_nb
	print t
	print v

initialize(v, s, t, dt, n)
calculate(v, s, t, dt, n)
store(v, t, n)

#plot
plt.figure(1)
plt.subplot(211)
plt.plot(t, v,"g-", linewidth=2.0)
plt.scatter(t, v)
plt.title('The Velocity of a Free Falling Object')
plt.xlabel('Time($t$)', fontsize=14)
plt.ylabel('Velocity($m/s$)', fontsize=14)
plt.text(3,-60,r'$g = 9.8 m/s^2$', fontsize=16)
plt.grid(True)

plt.subplot(212)
plt.plot(t, s,"g-", linewidth=2.0)
plt.scatter(t, s)
plt.title('The Displacement of a Free Falling Object')
plt.xlabel('Time($t$)', fontsize=14)
plt.ylabel('Displacement($m$)', fontsize=14)
plt.text(3,-300,r'$g = 9.8 m/s^2$', fontsize=16)
plt.grid(True)

plt.show()
plt.savefig("ex1.jpg")
read()
Ejemplo n.º 27
0
import numpy as np 
import matplotlib as plt

n = 12
X = np.arange(n)
Y1 = (1-X/float(n))*np.random.uniform(0.5,1,n)
Y2 = (1-X/float(n))*np.random.uniform(0.5,1,n)

plt.bar(X,Y1,facecolor='#9999ff',edgecolor='white')
plt.bar(X,-Y2,facecolor='#ff9999',edgecolor='white')

for x,y in zip(X,Y1):
	# ha: horizontal alignment
	plt.text(x+0.4,y+.05,'%.2f'%y,ha='center',va='bottom')
for x,y in zip(X,Y2):
	# ha: horizontal alignment
	plt.text(x+0.4,-y-.15,'-%.2f'%y,ha='center',va='bottom')
plt.xlim(-.5,n)
plt.xticks(())
plt.ylim(-1.25,1.25)
plt.yticks(())

plt.show()
Ejemplo n.º 28
0
import numpy as np
import matplotlib as plt
la = np.linalg
words = ["I", "like", "enjoy", "deep", "learing", "NLP", "flying", "."]
x = np.array([[0, 2, 1, 0, 0, 0, 0, 0], [2, 0, 0, 1, 0, 1, 0, 0],
              [1, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0],
              [0, 0, 0, 1, 0, 0, 0, 1], [0, 1, 0, 0, 0, 0, 0, 1],
              [0, 0, 1, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 1, 1, 0]])
U, s, Vh = la.svd(x, full_matrices=False)
for i in x(len(words)):
    plt.text(U[i, 0], U[i, 1], words[i])
Ejemplo n.º 29
0
                  alpha=opacity,
                  color='m',
                  yerr=std_lc,
                  error_kw=error_config,
                  label='LC')
rect_RLC = axs.bar(test_suites + 2 * bar_width,
                   avg_rlc,
                   bar_width,
                   alpha=opacity,
                   color='c',
                   yerr=std_rlc,
                   error_kw=error_config,
                   label='RLC')

# Only for PR (since the bar is invisible)
x = test_suites - bar_width
y = avg_pr

for a, b in zip(x, y):
    plt.text(a, b + 0.05, '%.04f' % b, ha='center', va='bottom', fontsize=6)

axs.set_xticks(test_suites + bar_width / 4)
axs.set_xticklabels(test_suites)
axs.set_xlabel(x_label)
axs.set_ylabel(y_label)
axs.set_title(title)
axs.legend(loc="upper right")

plt.savefig('./data_analytics/figures/' + file_name)
plt.show()
	t = pickle.load(pickle_file)
	v = pickle.load(pickle_file)

initialize(v, s, t, dt, n)
calculate(v, s, t, dt, n)
store(v, t, n)

#plot
plt.figure(1)
plt.subplot(211)
plt.plot(t, v,"g-", linewidth=1.0)
plt.scatter(t, v)
plt.title('The Velocity of a Free Falling Object')
#plt.xlabel('Time($t$)', fontsize=12) (cancel because words overlap)
plt.ylabel('Velocity($m/s$)', fontsize=12)
plt.text(3,0,r'$g = 9.8 m/s^2$', fontsize=16)
plt.grid(True)

plt.subplot(212)
plt.plot(t, s,"g-", linewidth=1.0)
plt.scatter(t, s)
plt.title('The Displacement of a Free Falling Object')
plt.xlabel('Time($t$)', fontsize=12)
plt.ylabel('Displacement($m$)', fontsize=12)
plt.text(3,0,r'$g = 9.8 m/s^2$', fontsize=16)
plt.grid(True)

plt.show()
plt.savefig("ex4.jpg")
read()
Ejemplo n.º 31
0
    score = mnist_classifier.evaluate(X_test_one_class, y_test_one_class, verbose=0)
    class_test_accuracy[class_index] = 100 * score[1]

    # Print test accuracy for each digit
    print("Test accuracy for label " + str(classes[class_index]) + ": " + str(class_test_accuracy[class_index]) + "%\n")



## Generate confusion matrix

cm = confusion_matrix(y_test, predictions)

plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Reds)

tick_marks = np.arange(num_classes)
plt.xticks(tick_marks, num_classes)
plt.yticks(tick_marks, num_classes)

fmt = 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, format(cm[i, j], fmt),
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black")

plt.ylabel('True labels')
plt.xlabel('Predicted labels')
plt.title('MNIST Confusion Matrix')
plt.show()
Ejemplo n.º 32
0
def plot_dataset_distribution(stats,
                              num_cols=5,
                              width=10,
                              height=5,
                              histogram_bins=10,
                              histogram_range=[0, 1000],
                              figure_padding=4):
    #convert the list into a dataframe
    stats_frame = pd.DataFrame(
        stats, columns=['Class', 'Filename', 'Width', 'Height', 'Size_in_KB'])

    #extract the datframe related to sizes only
    list_sizes = stats_frame['Size_in_KB']

    #get the number of classes in the dataset
    number_of_classes = stats_frame['Class'].nunique()
    print(number_of_classes, " classes found in the dataset")

    #create a list of (list of sizes) for each class of images
    #we start by the the sizes of all images in the dataset
    list_sizes_per_class = [list_sizes]
    class_names = ['whole dataset']
    print("Images of the whole dataset have an average size of ",
          list_sizes.mean())

    for c in stats_frame['Class'].unique():
        print("sizes of class [", c, "] have an average size of ",
              list_sizes.loc[stats_frame['Class'] == c].mean())
        #then, we append the sizes of images of a particular class
        list_sizes_per_class.append(list_sizes.loc[stats_frame['Class'] == c])
        class_names.append(c)

    class_count_dict = {}
    for c in stats_frame['Class'].unique():
        print("number of instances in class [", c, "] is ",
              stats_frame.loc[stats_frame['Class'] == c].count()['Class'])
        #then, we append the sizes of images of a particular class
        class_count_dict[c] = stats_frame.loc[stats_frame['Class'] ==
                                              c].count()['Class']
        #list_sizes_per_class.append(list_sizes.loc[stats_frame['Class'] == c])
        #class_names.append(c)

        num_rows = math.ceil((number_of_classes + 1) / num_cols)
    if (number_of_classes < num_cols):
        num_cols = number_of_classes + 1
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(width, height))

    fig.tight_layout(pad=figure_padding)
    class_count = 0
    if (num_rows == 1 or num_cols == 1):
        for i in range(num_rows):
            for j in range(num_cols):
                axes[j + i].hist(list_sizes_per_class[num_cols * i + j],
                                 bins=histogram_bins,
                                 range=histogram_range)
                axes[j + i].set_xlabel('Image size (in KB)', fontweight='bold')
                axes[i + j].set_title(class_names[j + i] + ' images ',
                                      fontweight='bold')
                class_count = class_count + 1
                if (class_count == number_of_classes + 1):
                    break

    else:
        for i in range(num_rows):
            for j in range(num_cols):
                axes[i, j].hist(list_sizes_per_class[num_cols * i + j],
                                bins=histogram_bins,
                                range=histogram_range)
                axes[i, j].set_xlabel('Image size (in KB)', fontweight='bold')
                axes[i,
                     j].set_title(class_names[num_cols * i + j] + ' images ',
                                  fontweight='bold')
                class_count = class_count + 1
                if (class_count == number_of_classes + 1):
                    break

        f = figure()
        print(class_count_dict)
        plt.bar(*zip(*class_count_dict.items()))

        for index, food_brand in enumerate(class_count_dict):
            plt.text(food_brand, class_count_dict[food_brand] + 1,
                     str(class_count_dict[food_brand]))
        #axes[1,3].set_xlabel(range(len(class_count_dict)), list(class_count_dict.keys()))
        plt.show()
Ejemplo n.º 33
0
def histfitting(colourFilter,
                metadata,
                cluster_idxs,
                fit_order,
                num_bins,
                blink_on_label,
                blink_off_label,
                to_json=False,
                log_bins=False,
                n_on_bins=1,
                n_off_bins=1,
                fixed_on_max=-1,
                fixed_off_max=-1):
    import matplotlib as plt
    from PYME.IO import tabular
    # for best number of bins, find number of different on duration times, set as number of bins
    # should probably include this as an option in traits ui, I.E. do you want to manually set num bins or set to
    #       max number of unique states (blink duration, time to next blink, etc)

    N = fit_order
    """
    should add traits thing where this line can be used or set to 1

    """
    frame_duration = metadata.getEntry('Camera.IntegrationTime')

    on_times = colourFilter[blink_on_label].astype('f')  #* frame_duration
    on_times = [np.int(i) for i in on_times]
    # print('is it being loaded correctly 1?', on_times)
    """
    setting up binning for histogram(s)
    this might not be correct, check again
    """
    #    max_on = np.max(on_times) + frame_duration

    # if fixed_on_max == -1:
    #     fixed_on_max = on_times.max()

    # if log_bins:
    #     binning = np.logspace(0.5, fixed_on_max + 1, num=n_on_bins)
    # else:
    binning = np.arange(frame_duration, max(on_times), 1)
    print('max on time', max(on_times))
    # if len(binning) < 20
    # binning = np.linspace(0, 30, 30)

    vals, bin_edges = np.histogram(
        on_times, bins=binning)  #here, take 2-ed vals = vals[2:]
    np.set_printoptions(threshold=100000, suppress=True)
    # print('on y hist', len(vals), vals)

    logonbins = np.logspace(0, 1.49136169383, num=max(on_times))

    logvals, logbinedges = np.histogram(on_times, bins=logonbins)
    np.set_printoptions(threshold=100000, suppress=True)
    print('log scale on vals', logvals)
    # logoff_vals, log_bin_edges_off = np.histogram()

    bin_edges *= frame_duration
    """
    clip vectors here
    """
    y_hist = vals
    bin_starts = bin_edges[:-1]
    x_hist = bin_starts
    max_xaxis = max(bin_edges)

    off_times = colourFilter[blink_off_label].astype('f')
    # print('is it being loaded right 2?', off_times)
    # min_off = min(off_times)
    #    max_off = max(off_times) + frame_duratio
    logoffbins = np.logspace(0, 4.92471852852, num=30)
    logoff_vals, log_bin_edges_off = np.histogram(off_times, bins=logoffbins)
    print('log scale off vals', logoff_vals)

    if fixed_off_max == -1:
        fixed_off_max = off_times.max()

    # if log_bins:
    # binning_off = np.logspace(frame_duration, fixed_off_max + 1, num=30)

    # binning_off = np.logspace(-.60205999132, 4.92471852852, num=30)
    #
    # print(binning_off)
    # else:
    binning_off = np.arange(0.5, fixed_off_max + 1, n_off_bins)

    vals_off, bin_edges_off = np.histogram(off_times, bins=binning_off)
    """
    brute force print statement for getting sims going before retreat
    """
    np.set_printoptions(threshold=100000, suppress=True)
    # print('off y hist vals', len(vals_off), vals_off)
    bin_edges_off *= frame_duration

    y_hist_off = vals_off
    bin_starts_off = bin_edges_off[:-1]
    x_hist_off = bin_starts_off
    max_xaxis_off = max(bin_edges_off)

    # getting start params from integrated fit
    vals = np.array(vals)
    on_t_in_t = [i * frame_duration for i in on_times]
    off_t_in_t = [i * frame_duration for i in off_times]
    if N == 1:
        res_fxn = tfoef
        fit_fxn = sefm
        # print('what is going wrong here?')
        # print(max(vals), vals)
        # print(on_times, frame_duration)
        # print(np.mean(on_t_in_t))

        start_params = [max(vals), 1.0 / np.mean(on_t_in_t)]
        # start_params = [40417, 5.608, 0]
        start_params_off = [max(vals_off), 1.0 / np.mean(off_t_in_t)]
        fit_eqn = 'A*e^(-B*x)'

    if N == 2:
        res_fxn = tsoef
        fit_fxn = defm
        start_params = [(np.max(vals), 1.0 / np.mean(on_t_in_t), np.max(vals),
                         5.0 / np.mean(on_t_in_t))]
        start_params_off = [(np.max(vals_off), 1.0 / np.mean(off_t_in_t),
                             np.max(vals_off), 5.0 / np.mean(off_t_in_t))]
        fit_eqn = 'A*e^(-B*x) + C*e^(-D*x)'

    params = start_params
    fit_results = FitModel_N(res_fxn, params, x_hist[1:], y_hist[1:])
    #
    # plt.figure()
    # plt.plot(x_hist_off[1:], y_hist[1:])
    fit_results_off = FitModel_N(res_fxn, start_params_off, x_hist_off[1:],
                                 y_hist_off[1:])

    cov = np.linalg.inv(np.matmul(
        fit_results.jac.T, fit_results.jac)) * np.mean(
            (fit_results.fun * fit_results.fun).sum())
    # print('test 1', fit_results_off.jac.T)
    # print('test 2', fit_results_off.jac)
    # print(np.matmul(fit_results_off.jac.T, fit_results_off.jac))
    # print(np.linalg.inv(np.matmul(fit_results_off.jac.T, fit_results_off.jac)))
    cov_off = np.linalg.inv(
        np.matmul(fit_results_off.jac.T, fit_results_off.jac)) * np.mean(
            (fit_results_off.fun * fit_results_off.fun).sum())

    fitErrors_on = np.sqrt(np.diag(cov))
    fitErrors_off = np.sqrt(np.diag(cov_off))
    # fitErrors_off = 1.1

    x_on_fit = np.linspace(0, max_xaxis, 100)
    y_on_fit = fit_fxn(x_on_fit, *fit_results.x)

    x_off_fit = np.linspace(0, max_xaxis_off, 100)
    y_off_fit = fit_fxn(x_off_fit, *fit_results_off.x)

    on_hist_datasource = np.rec.fromarrays((y_hist, bin_edges[1:]),
                                           dtype=[('y_hist', '<f4'),
                                                  ('x_hist', '<f4')])
    off_hist_datasource = np.rec.fromarrays((y_hist_off, bin_edges_off[1:]),
                                            dtype=[('y_hist', '<f4'),
                                                   ('x_hist', '<f4')])

    filt_on = tabular.RecArraySource(on_hist_datasource.view(np.recarray))
    filt_off = tabular.RecArraySource(off_hist_datasource.view(np.recarray))
    """
    trying to get relevant files into metadata
    """

    if USE_GUI:
        plt.figure()
        plt.bar(bin_starts,
                vals,
                width=bin_starts[1] - bin_starts[0],
                alpha=.4)
        # plt.scatter(x_hist[1:], y_hist[1:])
        plt.plot(x_on_fit, y_on_fit)
        # plt.xscale('')
        xx = max(plt.xlim())
        yy = max(plt.ylim())

        if N == 1:
            plt.text(xx * .3, yy * .5,
                     r'$y = Ae ^{-Bx} + C$')  #, transform=plt.gca())
            plt.text(
                xx * .3, yy * .45,
                'A= %5.2f +/- %5.2f' % (fit_results.x[0], fitErrors_on[0]))
            plt.text(
                xx * .3, yy * .40,
                'B= %5.2f +/- %5.2f' % (fit_results.x[1], fitErrors_on[1]))
            # plt.text(xx * .3, yy * .35, 'C= %5.2f +/- %5.2f' % (fit_results.x[2], fitErrors_on[2]))
        elif N == 2:
            plt.text(xx * .3, yy * .6, r'$y =  Ae^{-B*x} + C e^{-D*x}$')
            plt.text(
                xx * .3, yy * .55,
                'A= %5.2f +/- %5.2f' % (fit_results.x[0], fitErrors_on[0]))
            plt.text(
                xx * .3, yy * .5,
                'B= %5.2f +/- %5.2f' % (fit_results.x[1], fitErrors_on[1]))
            plt.text(
                xx * .3, yy * .45,
                'C= %5.2f +/- %5.2f' % (fit_results.x[2], fitErrors_on[2]))
            plt.text(
                xx * .3, yy * .4,
                'D= %5.2f +/- %5.2f' % (fit_results.x[3], fitErrors_on[3]))
        #            plt.text(xx * .3, yy * .35, 'E= %5.2f +/- %5.2f' % (fit_results.x[4], fitErrors_on[2]))

        plt.ylabel('Events')
        plt.xlabel('Blink duration')
        plt.title('Blink On Duration')

        fig, ax = plt.subplots(1, 1)
        ax.scatter(x_hist, y_hist - fit_fxn(x_hist, *fit_results.x))
        ax.set_ylabel('Residual')
        ax.set_xlabel('Blink duration')
        ax.set_title('Blink On Duration residuals')

        # now, this is getting hists & fits for time to next blink values
        plt.figure()
        plt.bar(bin_starts_off,
                vals_off,
                width=bin_starts_off[1] - bin_starts_off[0],
                alpha=.4)
        # plt.scatter(x_hist_off[1:], y_hist_off[1:])
        plt.plot(x_off_fit, y_off_fit)
        xx2 = max(plt.xlim())
        yy2 = max(plt.ylim())
        if N == 1:
            plt.text(xx2 * .3, yy2 * .6, r'$y = Ae ^{-Bx} + C$')
            plt.text(
                xx2 * .3, yy2 * .55, 'A= %5.2f +/- %5.2f' %
                (fit_results_off.x[0], fitErrors_off[0]))
            plt.text(
                xx2 * .3, yy2 * .50, 'B= %5.2f +/- %5.2f' %
                (fit_results_off.x[1], fitErrors_off[1]))
            # plt.text(xx2 * .3, yy2 * .35, 'C= %5.2f +/- %5.2f' % (fit_results_off.x[2], fitErrors_off[2]))
        elif N == 2:
            plt.text(xx2 * .3, yy2 * .6, r'$y =  Ae^{-B*x} + C e^{-D*x}$')
            plt.text(
                xx2 * .3, yy2 * .55, 'A= %5.2f +/- %5.2f' %
                (fit_results_off.x[0], fitErrors_off[0]))
            plt.text(
                xx2 * .3, yy2 * .5, 'B= %5.2f +/- %5.2f' %
                (fit_results_off.x[1], fitErrors_off[1]))
            plt.text(
                xx2 * .3, yy2 * .45, 'C= %5.2f +/- %5.2f' %
                (fit_results_off.x[2], fitErrors_off[2]))
            plt.text(
                xx2 * .3, yy2 * .4, 'D= %5.2f +/- %5.2f' %
                (fit_results_off.x[3], fitErrors_off[3]))
        #            plt.text(xx2 * .3, yy2 * .35, 'E= %5.2f +/- %5.2f' % (fit_results_off.x[4], fitErrors_off[2]))

        plt.ylabel('Events')
        plt.xlabel('Time to next blink')
        plt.title('Time to next blink in the same cluster')

        fig, ax = plt.subplots(1, 1)
        ax.scatter(x_hist_off,
                   y_hist_off - fit_fxn(x_hist_off, *fit_results_off.x))
        ax.set_ylabel('Residual')
        ax.set_xlabel('Time to next blink')
        ax.set_title('Time to next blink residuals')

    if to_json == True:
        import io
        import time
        import json
        try:
            to_unicode = unicode
        except NameError:
            to_unicode = str

        # Generate dye kinetics structure for JSON file
        dk = {}
        dk['plog'] = [0, 0]
        if log_bins:
            dk['tlog'] = [1, 1]
        else:
            dk['tlog'] = [0, 0]
        dk['tmin'] = [bin_edges[0], bin_edges_off[0]]
        dk['tmax'] = [bin_edges[-1], bin_edges_off[-1]]
        dk['off'] = y_hist_off.tolist()
        dk['on'] = y_hist.tolist()

        # Write JSON file
        timestr = time.strftime("%Y%m%d-%H%M%S")
        with io.open('empirical_histograms_' + timestr + '.json',
                     'w',
                     encoding='utf8') as outfile:
            str_ = '{"dk": ' + json.dumps(dk) + '}'
            outfile.write(to_unicode(str_))

    params_on = fit_results.x
    params_off = fit_results_off.x
    return filt_on, filt_off, params_on, params_off, fitErrors_on, fitErrors_off, fit_eqn
Ejemplo n.º 34
0
    #print(firstCollisionPosition)
    #print(collisions)

    fmt = '%d%%'
    fig = plt.figure()
    plt.title('COLLISION (with original data)')  # warning
    plt.xlabel(u"SHA3 bits", fontproperties='Comic Sans MS')  # warning

    ax1 = fig.add_subplot(111)
    ax1.plot(l, b, 'or-', label=u'First collision position')
    for i, (_x, _y) in enumerate(zip(l, b)):
        plt.text(
            _x,
            _y,
            b[i],
            color='black',
            fontsize=10,
        )
    ax1.legend(loc=1)
    ax1.set_ylim([0, 100000])
    ax1.set_ylabel('First collision position')

    #fisrt collision position
    ax2 = ax1.twinx()  # second axel
    plt.bar(l, a, alpha=0.3, color='blue', label=u'Total collisions')
    ax2.legend(loc=2)
    ax2.set_ylim([0, 100000])  #y axel range
    ax2.set_ylabel('Total collisions')
    plt.legend(prop={'family': 'Comic Sans MS', 'size': 8}, loc="upper left")
    #ax.set_xlabel('First collision position');