Esempio n. 1
0
    def _has_data_status(self):
        self.has_data = True
        data = LabeledDatasetFromFile(self.data_file_path).get_data_by_sheet(0)
        self.raw_data = data
        self.raw_data[self.raw_data[:, -1] == -1,
                      -1] = 0.  # default for 0/1 binary classification problem
        # more classes is also ok, e.g. 4 classes is 0,1,2,3

        self.output_nodes_spin_box.setEnabled(True)
        self.layer_pos_spin_box.setEnabled(True)
        self.insert_layer_btn.setEnabled(True)
        self.remove_layer_btn.setEnabled(True)
        self.node_pos_spin_box.setEnabled(True)
        self.insert_node_btn.setEnabled(True)
        self.remove_node_btn.setEnabled(True)
        self.ratio_slider.setEnabled(True)
        self.learning_rate_line_edit.setEnabled(True)
        self.max_epoch_line_edit.setEnabled(True)
        self.train_btn.setEnabled(True)
        self.save_btn.setEnabled(True)
        self.load_btn.setEnabled(True)

        self.label7.setText("Now you can train your train data.")
                plt.plot(x, y, '*r')
            else:
                plt.plot(x, y, '*g')

        plt.title(title)
        plt.show()
        end_time = time.time()
        print('cost time for visualization (title: %s) is %d sec.' %
              (title, end_time - start_time))


#%%
if __name__ == '__main__':
    # 调整数据格式
    data_address = r'..\dataset\demodata.xls'
    datasetff = LabeledDatasetFromFile(data_address).get_data_by_sheet(0)
    datasetff.astype(np.float)
    datasetff = datasetff[:, -3:]  # 只使用连续属性值
    datasetff[datasetff[:, -1] == 0, -1] = -1

    train_data = datasetff[:100, :]
    test_data = datasetff[100:, :]
    dataset = LabeledTrainAndTestDataset(train_data, test_data=test_data)
    # dataset.visual_data(train_data)

    # 训练模型
    svm = SupportVectorMachine(train_data,
                               test_data,
                               epsilon=0.0001,
                               C=200,
                               kernel_option=('rbf', 1.3))
Esempio n. 3
0
        turtle.goto(end_x, end_y)
        turtle.penup()
        turtle.goto((start_x + end_x) / 2., (start_y + end_y) / 2.)
        turtle.pendown()
        turtle.write('final model')

        turtle.mainloop()


if __name__ == '__main__':

    def f(x):
        return 1 if x > 0 else 0

    data_address = r'..\dataset\watermelon3.xlsx'
    datasetff = LabeledDatasetFromFile(data_address).get_data_by_sheet(
        0, mode=CONSTANT.TRANS)
    datasetff.astype(np.float)
    train_data = datasetff[:, -3:]  # 只使用连续属性值
    linear_separable_data = train_data[
        [0, 1, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 16], :]

    classifier = Perceptron(linear_separable_data,
                            linear_separable_data,
                            f,
                            epsilon=0.000001)
    classifier.train(max_epoch=1000, learning_rate=0.00001)
    # classifier.visual_train_data_and_model(visual_process=True, step=200)
    classifier.visualize_train_data_and_model_with_turtle()
    error = classifier.evaluate_train_data()
    if error != -1:
        print('error rate: %f' % error)
            projected_y = projected_x * k
            plt.plot(projected_x, projected_y, '*g')
            plt.plot([x[0], projected_x], [x[1], projected_y],
                     color="green",
                     linestyle=":")

        plt.title('Linear Discrimenant Analysis')
        plt.xlabel('密度')
        plt.ylabel('含糖率')
        plt.show()


#%%
if __name__ == '__main__':
    data_address = r'..\dataset\watermelon3.xlsx'
    datasetff = LabeledDatasetFromFile(data_address)
    entire_train_data = datasetff.get_data_by_sheet(
        0, mode=MyMachineLearning.utils.CONSTANT.TRANS)
    entire_train_data.astype(np.float)
    selected_train_data = datasetff.get_data_by_sheet(
        0,
        mode=MyMachineLearning.utils.CONSTANT.TRANS,
        feat_indces=[6, 7],
        label_indces=[8])
    selected_train_data.astype(np.float)

    # 线性回归
    print('1. Linear Regression:')
    regression_data_address = r'D:\Project\Github\LearningMachineLearning\dataset\salary_data.csv'
    regrsssion_data = LabeledDatasetFromFile(
        regression_data_address,
    def test(self):
        if not self._is_trained:
            return

        x = self.test_data[:, :2]
        y = self.test_data[:, 2]

        out = self.net(x)
        pred = torch.max(out, 1)[1]
        pred_y = pred.data.numpy()
        target_y = y.data.numpy()
        accuracy = float(
            (pred_y == target_y).astype(int).sum()) / float(target_y.size)

        return accuracy


if __name__ == '__main__':
    data_address = r'..\..\dataset\demodata.xls'
    train_data = LabeledDatasetFromFile(data_address).get_data_by_sheet(0)
    train_data[train_data[:, 2] == -1, 2] = 0.  # preprocess
    train_data.astype(np.float)
    np.random.shuffle(train_data)

    test_data = torch.from_numpy(train_data[150:, :])
    train_data = torch.from_numpy(train_data[:150, :])

    fcnn = SimpleFCNN([2, 7, 5, 2], train_data, test_data)
    fcnn.train(max_epoch=20000)
    print(fcnn.test())