Example #1
0
 def visualization(self):
     feature_1 = self.x_pca[:, 0]
     feature_2 = self.x_pca[:, 1]
     labels = self.y
     cdict = {0: 'red', 1: 'green'}
     labl = {0: 'without bug', 1: 'bug'}
     marker = {0: '*', 1: 'o'}
     alpha = {0: .3, 1: .5}
     fig, ax = plt.subplots(figsize=(7, 5))
     fig.patch.set_facecolor('white')
     for l in np.unique(labels):
         ix = np.where(labels == l)
         ax.scatter(feature_1[ix],
                    feature_2[ix],
                    c=cdict[l],
                    s=100,
                    label=labl[l],
                    marker=marker[l],
                    alpha=alpha[l])
     # for loop ends
     plt.xlabel("First Principal Component", fontsize=14)
     plt.ylabel("Second Principal Component", fontsize=14)
     plt.legend()
     plt.savefig(
         str(pathlib.Path().absolute()) + "/File/PCA_visualization.png")
Example #2
0
def threshold_search(true, prob, criteria):
    true = true.to_numpy()
    prob_train, prob_test, true_train, true_test = train_test_split(prob, true, test_size=0.2, random_state=1234)

    thresholds = np.linspace(0, 1, 101)
    all_f1_train = np.zeros(len(thresholds))
    for j in range(len(thresholds)):
        predictions_train = np.ones(len(prob_train))
        predictions_train[prob_train < thresholds[j]] = 0

        macro_f1_train = macro_weighted_f1(true_train, predictions_train, [0, 1])
        all_f1_train[j] = macro_f1_train

    best_threshold = thresholds[np.where(max(all_f1_train) == all_f1_train)]
    best_threshold = best_threshold[0]
    predictions_test = np.ones(len(prob_test))
    predictions_test[prob_test < best_threshold] = 0
    print("The best threshold for this prediction is: %s" % best_threshold)

    plt.plot(thresholds, all_f1_train, 'b')
    plt.axvline(x=0.5, linestyle=':', color='r')
    plt.axvline(x=best_threshold, linestyle='--', color='g')
    plt.axhline(y=all_f1_train[np.where(thresholds == 0.5)], linestyle=':', color='r')
    plt.axhline(y=max(all_f1_train), linestyle='--', color='g')
    plt.xlabel("Threshold")
    plt.ylabel("Macro F1")
    plt.savefig('{0} threshold plot.png'.format(criteria), bbox_inches='tight')
    plt.clf()

    return best_threshold
Example #3
0
def get_hist(data, col):
    [columndates, orderdates] = column_and_order_dates(data, col)
    [difference_dates, date_values] = days_difference(columndates, orderdates,
                                                      31)

    plt.hist(x=difference_dates, bins=100)
    plt.xlabel('Day')
    plt.ylabel('Amount')
    plt.show()
    print(date_values)
Example #4
0
    def create_plot(logbook, name_file):
        maxFitnessValues, meanFitnessValues, minFitnessValues, medianFitnessValues, stdFitnessValues = \
            logbook.select("max", "avg", "min", "median", "std")
        plt.plot(maxFitnessValues, color='red', label="Worst Fitness")
        plt.plot(meanFitnessValues, color='green', label="Mean Fitness")
        plt.plot(minFitnessValues, color='orange', label="Best Fitness")
        plt.plot(medianFitnessValues, color='blue', label="Avg. Fitness")
        plt.plot(stdFitnessValues, color='pink', label="Std. Fitness")

        plt.xlabel('Generation')
        plt.ylabel('Max / Average / Min / Median/ Std Fitness')
        plt.title('Max, Average, Min, Median and Std Fitness over Generations')
        plt.legend(loc='lower right')
        plt.savefig(name_file)
        plt.close()
Example #5
0
def show_images(images, labels, preds):
    plt.figure(figsize=(8, 4))
    for i, image in enumerate(images):
        plt.subplot(1, 6, i + 1, xticks=[], yticks=[])
        image = image.numpy().transpose((1, 2, 0))
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        image = image * std + mean
        image = np.clip(image, 0., 1.)
        plt.imshow(image)
        col = 'green'
        if preds[i] != labels[i]:
            col = 'red'

        plt.xlabel(f'{class_names[int(labels[i].numpy())]}')
        plt.ylabel(f'{class_names[int(preds[i].numpy())]}', color=col)
    plt.tight_layout()
    plt.show()
                        with open(file_graph) as f:
                            content = f.readlines()
                    # read each line
                        content = [x.strip() for x in content]
                        original_cc = []
                        supergraph_cc = []
                    for line in content:
                        value = line.split(" ")
                        if len(value):
                            supergraph_cc.append(float(value.pop()))
                            original_cc.append(float(value.pop()))

                    plt.clf()
                    plt.plot(k_array, original_cc, 'r--', k_array,
                             supergraph_cc, 'g-')
                    plt.ylabel("Clustering Coefficent")
                    plt.xlabel("k_degree")
                    plt.legend(('Original Graph', 'Supergraph'),
                               loc='lower center',
                               shadow=True)
                    plt.title(str(sys.argv[4]))
                    plt.savefig("metric_cc_web.png")  #if choose dataset web
                    #plt.savefig("metric_cc_socfb.png")
                    plt.clf()
                    list_norm = []
                    for i in norm:
                        list_norm.append(float(i))
                    list_k_array = []
                    for i in k_array:
                        list_k_array.append(float(i))
                    plt.plot(list_k_array, list_norm, 'r--')
Example #7
0
        if size == 1:
            for line in content:
                k_array = line.split(" ")

            file_graph = str(sys.argv[2])

            if os.path.exists(file_graph):
                # if file exist
                with open(file_graph) as f:
                    content = f.readlines()
                # read each line
                content = [x.strip() for x in content]
                ratio = NULL
                size = len(content)
                if size == 1:
                    for line in content:
                        ratio = line.split(" ")
                    list_ratio = []
                    for i in ratio:
                        list_ratio.append(float(i))
                    list_k_array = []
                    for i in k_array:
                        list_k_array.append(float(i))
                    plt.clf()
                    plt.figure(figsize=(16, 10))
                    plt.ylabel("Ratio")
                    plt.xlabel("k_degree")
                    plt.plot(list_k_array, list_ratio, 'r--')
                    plt.title("Graph friend 1000 10 100")
                    plt.savefig("ratio_fakedataset.png", dpi=120)
epochs = 5  #训练5次
model1.summary()  #模型输出
model1.compile(
    loss='sparse_categorical_crossentropy',  #模型编译
    optimizer='adam',
    metrics=['accuracy'])
#从训练集中抽取0.2进行验证
history = model1.fit(x_train,
                     y_train,
                     batch_size=batch_size,
                     epochs=epochs,
                     validation_split=0.2)

#-----------------------------------------------保存模型,可视化--------------------------
#保存模型
model1.save('model_CNN_text.h5')
#模型可视化
plot_model(model1, to_file='model_CNN_text.png', show_shape=True)
#加载模型
model = load_model('model_CNN_text.h5')
y_new = model.predict(x_train[0].reshape(1, 50))
#训练结果可视化
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Valid'], loc='upper left')
plt.savefig('Valid_acc.png')
plt.show()
#-----------------------------------------------------查看解码效果--------------------------------------------
decoded_imgs = model.predict(x_test)
n = 10
plt.figure(figsize=(20, 6))
for i in range(n):
    # 原图
    ax = plt.subplot(3, n, i+1)
    plt.imshow(x_test[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)


    # 解码效果图
    ax = plt.subplot(3, n, i+n+1)
    plt.imshow(decoded_imgs[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

plt.show()
#----------------------------------------------------训练过程可视化---------------------------------------------
print(history.history.keys())
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
Example #10
0
                              workers=10)

#------------------------------------------------------保存模型---------------------------------------
model.summary()
#判断路径是否存在,不存在创建
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)  #保存模型

#------------------------------------------------------训练过程可视化-----------------------------------
#绘制训练与验证的准确率值
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Valid'], loc='upper left')
plt.savefig('tradition_cnn_valid_acc.png')
plt.show()

#绘制训练与验证的损失
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Valid'], loc='upper left')
plt.savefig('tradition_cnn_valid_loss.png')
plt.show()