Esempio n. 1
0
def predict_SVM():
    '''
    データ作成とモデルフィッティング
    '''
    N = 2000
    cls1 = np.random.randn(1000, 2)
    cls2 = np.random.randn(1000, 2) + np.array([5, 5])

    # データ行列Xを作成
    X = np.vstack((cls1, cls2))
    T = []
    for i in range(int(N / 2)):
        T.append(1.0)

    for i in range(int(N / 2)):
        T.append(-1.0)
    T = np.array(T)

    ##モデルフィッティング
    model = SVM()
    model.fit(X, T)
    pred_list = np.sign(model.w0 + np.dot(X, model.w))

    ##predict
    ok = []
    for i in range(len(X)):
        if T[i] == pred_list[i]:
            ok.append(1)

        else:
            ok.append(0)

    acc_SVM = np.sum(ok) / len(ok)
    print('Accuracy is {}'.format(acc_SVM))
Esempio n. 2
0
def build(kernel, metric, keys_limit, svm_C, logs):
    trainX = genfromtxt('input/arcene_train.data', delimiter=' ')
    trainY = genfromtxt('input/arcene_train.labels', delimiter=' ')
    validX = genfromtxt('input/arcene_valid.data', delimiter=' ')
    validY = genfromtxt('input/arcene_valid.labels', delimiter=' ')

    keys = metric.build(trainX.transpose(),
                        trainY,
                        logs=logs,
                        limit=keys_limit)

    tX = []
    for x in trainX:
        tX.append(np.take(x, keys))

    tX = np.array(tX)

    clf = SVM(kernel=kernel.kernel, C=svm_C)
    clf.fit(tX, trainY)

    vX = []
    for x in validX:
        vX.append(np.take(x, keys))

    vX = np.array(vX)

    predict_arr = [clf.predict(x) for x in vX]

    confusion_matrix = Statistic.get_metrics(predict_arr, validY)
    f_measure = Statistic.get_f_measure(confusion_matrix)

    return keys, confusion_matrix, f_measure
Esempio n. 3
0
    def Step_B(self, T0_list, T1_list):
        '''
        应用SVM分割设计空间,并按照T1_list中的参数设置优化超平面
        '''
        if len(T0_list) != len(T1_list):
            raise ValueError('T0列表与T1列表数目不相符')

        #理论分割函数
        f = self.f
        data = np.loadtxt(self.logPath + '/A_Samples.txt')
        samples = data[:, 0:f.dim]
        mark = data[:, f.dim + 1]
        Kernal_Gau = lambda x, y: np.exp((-np.linalg.norm(x - y)**2) / 80)
        Kernal_Poly = lambda x, y: (np.dot(x, y) + 1)**7
        svm = SVM(5,
                  kernal=Kernal_Gau,
                  path=self.logPath,
                  fileName='SVM_Step_B.txt')
        print('训练初始支持向量机...')
        svm.fit(samples, mark, maxIter=20000, maxAcc=1.1)
        test28(svm=svm)

        #记录每轮加点的数目
        pointNum = np.zeros(len(T1_list) + 1)
        pointNum[0] = samples.shape[0]

        for k in range(len(T1_list)):
            print('\n第%d轮加点...' % (k + 1))
            new_x = svm.infillSample4(T0_list[k], T1_list[k], f.min, f.max,
                                      [12, 6, 9, 9, 9])
            if new_x is None:
                print('当T1设置为%.2f时,加点数目为0' % T1_list[k])
                pointNum[k + 1] = samples.shape[0]
                continue
            else:
                num = new_x.shape[0]

            new_mark = np.zeros(num)
            for i in range(num):
                new_mark[i] = f.isOK(new_x[i, :])
            samples = np.vstack((samples, new_x))
            mark = np.append(mark, new_mark)
            print('训练支持向量机...')
            svm.fit(samples, mark, 20000, maxAcc=1.1)

            test28(svm=svm)
            pointNum[k + 1] = samples.shape[0]
            print('本轮加点数目:%d' % pointNum[k + 1])

        value = np.zeros(samples.shape[0])
        for i in range(samples.shape[0]):
            value[i] = f.aim(samples[i, :])
        value = value.reshape((-1, 1))
        mark = mark.reshape((-1, 1))
        storeData = np.hstack((samples, value, mark))
        np.savetxt(self.logPath + '/B_Samples.txt', storeData)

        print('样本点数目:')
        print(pointNum)
        print('加点结束')
Esempio n. 4
0
def cross_validation(x_train, y_train, C, gamma):
    model = SVM(C=C, kernel='rbf', gamma=gamma, tol=1e-2)
    cross = lambda arr, sz: [arr[i:i + sz] for i in range(0, len(arr), sz)]
    x_cross_val = np.array(cross(x_train, 160))
    y_cross_val = np.array(cross(y_train, 160))
    indices = np.array(range(5))
    score = 0
    for i in range(5):
        curr_indices = np.delete(indices, i)
        x_curr_valid = x_cross_val[i]
        y_curr_valid = y_cross_val[i]
        x_curr_train = np.vstack(x_cross_val[curr_indices])
        y_curr_train = y_cross_val[curr_indices].ravel()
        model.fit(x_curr_train, y_curr_train)
        model.number_support_vectors()
        y_curr_valid_predict = model.predict(x_curr_valid, x_curr_train,
                                             y_curr_train)
        curr_score = model.score_error(y_curr_valid_predict, y_curr_valid)
        print(
            "i = ",
            i,
            ". Score error = ",
            curr_score,
            ", i = ",
            i,
        )
        score += curr_score
    print("Average score: ", score / 5)
    return score / 5
Esempio n. 5
0
def real_data_train():
    x, y = create_array_real_data()
    shuffle_index = np.random.permutation(len(y))
    x = x[shuffle_index]
    y = y[shuffle_index]
    # 1000 elements: 800 for training, 200 for testing
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)
    # C = [1, 10]
    # gamma = [0.01, 0.1, 0.5, 1.0]
    # average_error = np.zeros((len(C), len(gamma)))
    # for i in range(len(C)):
    #     for j in range(len(gamma)):
    #         print("Cross-validation for parameters C = ", C[i], ", gamma = ", gamma[j])
    #         average_error[i][j] = cross_validation(x_train, y_train, C=C[i], gamma=gamma[j])
    # find C = 1, gamma = 0.01
    print("Create model C = ", 1000, ", gamma = ", 1)
    model = SVM(C=1, kernel='rbf', gamma=0.01, tol=1e-2)
    print("Fit model with train sequence")
    model.fit(x_train, y_train)
    model.number_support_vectors()
    print("Predict model on test sequence")
    y_test_predict = model.predict(x_test, x_train, y_train)
    score = model.score_error(y_test_predict, y_test)
    print("Score error = ", score)
Esempio n. 6
0
    def run_test(self, X, y, kernel):
        n = int(X.shape[0] * 0.8)
        K = self.gram_matrix(X, kernel)

        svm = SVM(kernel, 1.0, K)
        svm.fit(np.arange(n), y[:n])
        score = svm.score(np.arange(n, X.shape[0]), y[n:])

        return score
    def run_test(self, X, y, kernel):
        n = int(X.shape[0] * 0.8)
        K = self.gram_matrix(X, kernel)

        svm = SVM(kernel, 1.0, K)
        svm.fit(np.arange(n), y[:n])
        score = svm.score(np.arange(n, X.shape[0]), y[n:])

        return score
Esempio n. 8
0
def test_image():
    X, y = get_image_data()
    X = np.column_stack([[1] * X.shape[0], X])
    X_train,X_test,y_train,y_test = \
       train_test_split(X,y,test_size=0.2,random_state = np.random.RandomState(42))
    clf = SVM()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    correct_rate = 1 - np.mean(y_test != y_pred)
    print 'correct_rate:', correct_rate
    def test_simple(self):
        X = np.array([[1.0, 1.0],[2.0, 2.0],[3.0, 3.0],[4.0, 4.0],
                      [1.0, 1.0],[2.4, 2.4],[2.6, 2.6],[4.0, 4.0]])
        y = np.array([0.0, 0.0, 1.0, 1.0])
        K = self.gram_matrix(X, kernels.linear)
        svm = SVM(kernels.linear, 1.0, K)
        svm.fit(np.arange(4), y)

        result = svm.predict(np.arange(4,8))
        np.testing.assert_allclose(result, [0, 0, 1, 1])
Esempio n. 10
0
    def test_simple(self):
        X = np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0],
                      [1.0, 1.0], [2.4, 2.4], [2.6, 2.6], [4.0, 4.0]])
        y = np.array([0.0, 0.0, 1.0, 1.0])
        K = self.gram_matrix(X, kernels.linear)
        svm = SVM(kernels.linear, 1.0, K)
        svm.fit(np.arange(4), y)

        result = svm.predict(np.arange(4, 8))
        np.testing.assert_allclose(result, [0, 0, 1, 1])
Esempio n. 11
0
def non_linear_train():
    model = SVM(C=10, kernel='rbf', gamma=0.1, tol=1e-3)
    length = 20
    x, y = circle_model()
    print("-----------------------")
    print("Training set: ")
    print("x = \n", x)
    print("y = ", y)
    print("-----------------------")
    model.fit(x, y)
    model.find_support_vectors(x, y)
    model.draw_decision(x, y)
Esempio n. 12
0
    def fit(self, X_idx, y):
        self.classes = np.unique(y)
        logging.debug('Fitting %s data points with %s different classes '\
                      'with multiclass svm', X_idx.shape[0], len(self.classes))

        self.svms = []
        for class_a, class_b in itertools.combinations(self.classes, 2):
            filtered_X_idx, filtered_y = self.filter_data(X_idx, y, class_a, class_b)

            svm = SVM(self.kernel, self.C, self.K)
            svm.fit(filtered_X_idx, filtered_y)
            self.svms.append((class_a, class_b, svm))
Esempio n. 13
0
def main():
    dataset_dir = '../data/student-mat.csv'
    select_col = ['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu', 'Mjob', 'Fjob', 'reason',
                  'guardian', 'traveltime', 'studytime', 'failures', 'schoolsup', 'famsup', 'paid', 'activities',
                  'nursery', 'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health',
                  'absences',]
    #               'G1', 'G2']
    select_col = ['G1', 'G2']
    train_x, train_y, test_x, test_y = data_loader(dataset_dir, select_col=select_col)
    knn = SVM()
    knn.fit(train_x, train_y)
    predict_y = knn.predict(test_x)
    result = evaluate(test_y, predict_y)
    print(result)
Esempio n. 14
0
def linear_train():
    model = SVM(C=0.5, kernel='linear', gamma='auto', tol=1e-3)
    n_dim = 2
    length = 30
    x, y = make_blobs(n_samples=length, centers=2, random_state=6)
    y[y == 0] = -1
    print("-----------------------")
    print("Training set: ")
    print("x = \n", x)
    print("y = ", y)
    print("-----------------------")
    model.fit(x, y)
    model.find_support_vectors(x, y)
    model.draw_decision(x, y)
Esempio n. 15
0
def test_multi():
    X, y = get_multi_data()
    X = np.column_stack([[1] * X.shape[0], X])
    X_train,X_test,y_train,y_test = \
       train_test_split(X,y,test_size=0.2,random_state = np.random.RandomState(42))
    clf = SVM()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    correct_rate = 1 - np.mean(y_test != y_pred)
    print 'correct_rate:', correct_rate

    plot_samples(X, y)
    print clf.w
    for w in clf.w:
        plot_line(X[:, 1:], w)
Esempio n. 16
0
def main(C=1.0, epsilon=0.001):
    # Split data
    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    class_chosen = 1 # only this class is chosen
    y = np.asarray([-1 if y[i]!=class_chosen else 1 for i in range(y.shape[0])])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

    # Initialize model
    model = SVM(X_train, y_train, C=C, tolerance=epsilon)

    # Fit model
    support_vectors, iterations = model.fit()
    # Support vector count
    sv_count = support_vectors.shape[0]

    # Make prediction
    y_hat = model.predict(X_test)

   #  print(y_hat.shape, y_test.shape)

    # Calculate accuracy
    acc = calc_acc(y_test, y_hat)

    print("Support vector count: %d" % (sv_count))
    # print("bias:\t\t%.3f" % (model.b))
    # print("w:\t\t" + str(model.w))
    print("accuracy:\t%.3f" % (acc))
    print("Converged after %d iterations" % (iterations))
Esempio n. 17
0
def main(filename='data\iris-virginica.txt',
         C=1.0,
         kernel_type='linear',
         epsilon=0.001):
    # Load data
    (data, _) = readData('%s\%s' % (filepath, filename), header=False)
    data = data.astype(float)

    # Split data
    X, y = data[:, 0:-1], data[:, -1].astype(int)

    # Initialize model
    model = SVM()

    # Fit model
    support_vectors, iterations = model.fit(X, y)

    # Support vector count
    sv_count = support_vectors.shape[0]

    # Make prediction
    y_hat = model.predict(X)

    # Calculate accuracy
    acc = calc_acc(y, y_hat)

    print("Support vector count: %d" % (sv_count))
    print("bias:\t\t%.3f" % (model.b))
    print("w:\t\t" + str(model.w))
    print("accuracy:\t%.3f" % (acc))
    print("Converged after %d iterations" % (iterations))
def best_params():
    lr_list = [0.1, 0.01, 0.05, 0.001, 0.005, 0.0001, 0.0005]
    acc_max = 0
    lr_max = 0
    lamda_max = 0
    lambda_list = [0.1, 0.01, 0.05, 0.001, 0.005, 0.0001, 0.0005]
    for lr_val in lr_list:
        for lmda in lambda_list:
            clf = SVM(lr=lr_val, lamda=lmda)
            clf.fit(X_train, Y_train)
            predictions = clf.predict(X_test)
            acc = accuracy(Y_test, predictions)
            if acc > acc_max:
                acc_max = acc
                lr_max = lr_val
                lamda_max = lmda
    return (lr_max, lamda_max, acc_max)
Esempio n. 19
0
def stepB_1():
    '''
    步骤B的第一种版本,固定加点次数

    '''
    '''测试svm的加点算法,固定加点次数'''

    #理论分割函数
    f = TestFunction_G8()
    data = np.loadtxt('./Data/约束优化算法测试1/samples1.txt')
    samples = data[:, 0:2]
    mark = data[:, 3]

    #多项式核函数指数为5
    svm = SVM(5, kernal=Kernal_Polynomial, path='./Data/约束优化算法测试1')
    svm.fit(samples, mark, maxIter=50000)
    svm.show()

    T1_list = [1, 0.8, 0.6, 0.4]
    pointNum = np.zeros(len(T1_list) + 1)
    pointNum[0] = samples.shape[0]

    for k in range(len(T1_list)):
        if k < 4:
            new_x = svm.infillSample1(0.5, T1_list[k], f.min, f.max, [40, 40])
        else:
            new_x = svm.infillSample2(0.5, T1_list[k], f.min, f.max, [40, 40])
        num = new_x.shape[0]
        if num == 0:
            print('当T1设置为%.2f时,加点数目为0' % T1_list[k])
            pointNum[k + 1] = samples.shape[0]
            continue
        new_y = np.zeros(num)
        for i in range(num):
            new_y[i] = f.isOK(new_x[i, :])
        samples = np.vstack((samples, new_x))
        mark = np.append(mark, new_y)
        svm.fit(samples, mark, 100000)
        svm.show()
        pointNum[k + 1] = samples.shape[0]

    print('样本点数目:')
    print(pointNum)
    print('加点结束')
def main():
    dirList = list()

    temp = listdir("./data/Asthma/2010/")
    dirList.append(temp)
    temp = listdir("./data/Asthma/2011/")
    dirList.append(temp)
    temp = listdir("./data/Asthma/2012/")
    dirList.append(temp)
    temp = listdir("./data/Asthma/2013/")
    dirList.append(temp)
    temp = listdir("./data/Asthma/2014/")
    dirList.append(temp)

    pre = PreProcessor()
    clf = SVM(kernel=GaussianKernel(5.0), C=1.0)

    X_train, y_train = pre.loadTrainingSet(
        "training_data/Asthma_Sample_Tokenized.csv")
    clf.fit(X_train, y_train)
Esempio n. 21
0
def test_binary():
    X, y = get_binary_data()
    y[y == 0] = -1
    # X = np.column_stack([[1]*X.shape[0],X])
    X_train,X_test,y_train,y_test = \
       train_test_split(X,y,test_size=0.2,random_state = np.random.RandomState(42))
    plot_samples(X_train, y_train)
    plt.show()

    C = [0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3]
    for c in C:
        clf = SVM(C=c)
        clf.fit(X_train, y_train)

        # y_pred = clf.predict(X_test)
        # correct_rate = 1-np.mean(y_test!=y_pred)
        # print 'correct_rate:',correct_rate

        print clf.b, clf.w
        pre = clf.predict(X_test)
        print 'correct:', np.mean((pre == y_test).astype(int))
        plot_samples(X_train, y_train, clf.alpha, c)
        plot_line(X_train, [clf.b, clf.w[0], clf.w[1]])
Esempio n. 22
0
def main(filename='iris-virginica.txt', C=1.0, kernel_type='linear', epsilon=0.001):

    # Load data
#    (data, _) = readData('%s/%s' % (filepath, filename), header=False)
#    data = data.astype(float)
    data = pd.read_excel("C:/Users/Niku/Documents/dataset/arrays.xlsx")
    print(data.shape)
    X = data[0:1500]
    X = np.array(X)
    y = X[:, 35]
    X = X[:, 0:35]
    print(X.shape)
    #y = np.matrix(y)
    y = np.array(y)
    y[y == 0] = -1
    y1 = np.matrix(y)
    print(y.shape, X.shape, y1.shape)
    # Split data
#    X, y = data[:,0:-1], data[:,-1].astype(int)
#    print (X.shape)
#    print (y.shape)
#    X1 = np.matrix(X)
#    y1 = np.matrix(y)
#    print (X1.shape)
#    print (y1.shape)
#    print(type(X))
#    print(type(y))
    # Initialize model
    model = SVM()

    # Fit model
    support_vectors, iterations = model.fit(X, y)

    # Support vector count
    sv_count = support_vectors.shape[0]

    # Make prediction
    y_hat = model.predict(X)

    # Calculate accuracy
    acc = calc_acc(y, y_hat)

    print("Support vector count: %d" % (sv_count))
    print("bias:\t\t%.3f" % (model.b))
    print("w:\t\t" + str(model.w))
    print("accuracy:\t%.3f" % (acc))
    print("Converged after %d iterations" % (iterations))
Esempio n. 23
0
def main(filename='data/data_banknote_authentication.txt',
         C=1.0,
         kernel_type='linear',
         epsilon=0.001):
    # Load data
    (data, _) = readData('%s/%s' % (filepath, filename), header=False)
    data = data.astype(float)

    random.shuffle(data)  #data karıştırılıyor rastgele
    train_data = data[:int((len(data) + 1) *
                           .80)]  #Remaining 80% to training set
    test_data = data[int(len(data) * .80 + 1):]  #Splits 20% data to test set

    #Data parçalama
    X, y = train_data[:, 0:-1], train_data[:, -1].astype(int)  #Eğitim için
    X1, y1 = test_data[:, 0:-1], test_data[:, -1].astype(int)  #Test için

    # Split data
    #X, y = data[:,0:-1], data[:,-1].astype(int)

    # Initialize model
    model = SVM()

    # Fit model
    support_vectors, iterations = model.fit(
        X, y)  #eğitim için olan datalar burada gidiyor

    # Support vector count
    sv_count = support_vectors.shape[0]

    # Make prediction
    y_hat = model.predict(X1)  #test için datalar burada kullanılıyor

    # Calculate accuracy
    acc = calc_acc(y1, y_hat)

    print("Support vector count: %d" % (sv_count))
    print("bias:\t\t%.3f" % (model.b))
    print("w:\t\t" + str(model.w))
    print("accuracy:\t%.3f" % (acc))
    print("Converged after %d iterations" % (iterations))
Esempio n. 24
0
lengthes_subseq = [8, 6, 7]
tab_c = [1e-2, 1e-3, 1e-2]

for k in range(3):
    preindex = preindexation(
        lengthes_subseq[k])  # Compute a preindexation dictionnary
    Xtrain_emb = Spectrum_embedding(
        Xtrain[k]['seq'], lengthes_subseq[k],
        preindex=preindex)  # Compute the spectrum embedding
    Ktrain = kernel_spectrum(Xtrain_emb, Xtrain_emb, {})  # Compute the kernel

    ytrain = y[k]['Bound'].to_numpy()[:, None]
    ytrain[ytrain == 0] = -1

    model = SVM(c=tab_c[k])
    model.fit(Ktrain, ytrain)  # Fit the model

    Xtest_emb = Spectrum_embedding(Xtest[k]['seq'],
                                   lengthes_subseq[k],
                                   preindex=preindex)
    Ktest = kernel_spectrum(Xtrain_emb, Xtest_emb,
                            {})  # Compute the test kernel

    ypred = model.predict_class(Ktest)  # Prediction
    ypred[ypred == -1] = 0
    ytest.append(
        np.hstack((Xtest[k]['Id'].to_numpy()[:, None], ypred[:, None])))

y = np.vstack(ytest)

pd.DataFrame(y, columns=["Id", "Bound"]).set_index('Id').to_csv("Yte.csv")
Esempio n. 25
0
def init():

    def plot(arg):
        plt.xlabel('Iterations')
        plt.ylabel(arg)
        plt.legend()
        plt.show()
        plt.clf()
		
	# Arguments Parser Structure
    parser = argparse.ArgumentParser(description='Classification models: Logistic Regression, SVM, Naive Bayes Training.')
    parser.add_argument('-p', '--preprocess', action='store_true', help='perform preprocessing of emails')
    parser.add_argument('-f', '--figure', action='store_true', help='plot training figures (performs validation)')

	# Parse Arguments
    parsed = parser.parse_args()
    preprocess = parsed.preprocess
    figure = parsed.figure
    
    dataHandler = DataHandler()
	
    if preprocess:
        print('Extracting Features ............. ', end='', flush=True)
        start = time.time()
        dataHandler.saveFeatures()
        print('done -- ' + str(round(time.time()-start, 3)) + 's')

    print('Loading Data .................... ')
    start = time.time()
    x_train, y_train, x_test, y_test = dataHandler.loadTrainingData()
    x_val = deepcopy(x_test)
    y_val = deepcopy(y_test)
    
	#Logistic Regression
    logistic = LogisticRegression(lr=0.2, num_iter=1000, val=figure)
    start = time.time()
    train_history, val_history = logistic.fit(x_train, y_train, x_val, y_val)
    plt.plot(range(len(train_history)), train_history, label='Training Loss')
    plt.plot(range(len(val_history)), val_history, label='Validation Loss')
    
    accuracy = logistic.test(x_test, y_test)
    print('Test Accurarcy: {}%'.format(round(100*accuracy, 2)))

    # Plot
    if figure: plot("Loss")
    
	#SVM Training
    svm = SVM(lr=0.1, num_iter=420, val=figure)
    start = time.time()
    train_history, val_history = svm.fit(x_train, y_train, x_val, y_val)
    plt.plot(range(len(train_history)), train_history, label='Training Misclassification Ratio')
    plt.plot(range(len(val_history)), val_history, label='Validation Missclassification Ratio')
    accuracy = svm.test(x_test, y_test)
    print('Test Accurarcy: {}%'.format(round(100*accuracy, 2)))
    
    # Plot
    if figure: plot("Missclassification Ratio")

    #Naive Bayes
    bayes = NaiveBayes()
    start = time.time()
    bayes.fit(x_train, y_train)
    accuracy = bayes.test(x_test, y_test)
    print('Test Accurarcy: {}%'.format(round(100*accuracy, 2)))
Esempio n. 26
0
def predict():
    rootdir = 'E:/python/stockdata/train'
    stock_num = os.path.join(rootdir, str(input("请输入想要预测的股票号码")) + '.csv')
    stock_path = stock_num.replace("\\", "/")
    if os.path.isfile(stock_path):
        data = pd.read_csv("%s" % stock_path,
                           header=0)  #"%s"%path[i],  header = 0)

    acc = SVM(data)[0]
    print("此股票的预测精度为:", acc)

    in_close = float(input("请输入昨天的收盘价格"))
    in_ma5 = float(input("请输入昨天的5日均线"))
    in_ma10 = float(input("请输入昨天的10日均线"))
    in_ma20 = float(input("请输入昨天的20日均线"))
    in_volume = float(input("请输入昨天的成交量"))
    v_ma5 = float(input("请输入昨天的5日成交量"))
    v_ma10 = float(input("请输入昨天的10日成交量"))
    turnover = float(input("请输入昨天的流通量"))
    #v_ma20 = float(input("请输入昨天的20日成交量"))

    ma5 = (in_close - in_ma5) / in_ma5
    ma10 = (in_close - in_ma10) / in_ma10
    ma20 = (in_close - in_ma20) / in_ma20
    vma5 = (in_volume - v_ma5) / v_ma5
    vma10 = (in_volume - v_ma10) / v_ma10
    #vma20 = in_volume - v_ma20

    #####是否进行标准化: preprocessing.scale()######
    #    in_close = np.vstack((np.array(data['close'])[1::].reshape(dimension-1,1), in_close))
    #    ma5 = np.vstack((np.array(data['ma5_trend'])[1::].reshape(dimension-1,1), ma5))
    #    ma10 = np.vstack((np.array(data['ma10_trend'])[1::].reshape(dimension-1,1), ma10))
    #    ma20 = np.vstack((np.array(data['ma20_trend'])[1::].reshape(dimension-1,1), ma20))
    #    in_close = preprocessing.scale(np.vstack((np.array(data['close'])[1::].reshape(dimension-1,1), in_close)))
    #    ma5 = preprocessing.scale(np.vstack((np.array(data['ma5_trend'])[1::].reshape(dimension-1,1), ma5)))
    #    ma10 = preprocessing.scale(np.vstack((np.array(data['ma10_trend'])[1::].reshape(dimension-1,1), ma10)))
    #    ma20 = preprocessing.scale(np.vstack((np.array(data['ma20_trend'])[1::].reshape(dimension-1,1), ma20)))
    #    in_volume = preprocessing.scale(np.vstack((np.array(data['volume'])[1::].reshape(dimension-1,1), in_volume)))
    #    vma5 = preprocessing.scale(np.vstack((np.array(data['v_ma5_trend'])[1::].reshape(dimension-1,1), vma5)))
    #    vma10 = preprocessing.scale(np.vstack((np.array(data['v_ma10_trend'])[1::].reshape(dimension-1,1), vma10)))
    #    vma20 = preprocessing.scale(np.vstack((np.array(data['v_ma20_trend'])[1::].reshape(dimension-1,1), vma20)))

    x = np.hstack((ma5, ma10, ma20, vma5, vma10, turnover))  #, vma20[-1]]
    x = x.reshape(1, 6)
    #x = np.array(x).reshape(1,7) #vma20(1,8)
    clf = SVM(data)[1]

    data_x = SVM(data)[2]
    data_y = SVM(data)[3]

    pred = clf.predict(x)  #若不需要进行标准化,则改为clf.predict(train_x)
    if pred < 0:
        print("预测今日为:跌", pred)
    else:
        print("预测今日为:涨", pred)

    #可视化
    plot_x = data['date'].values[0:20]
    plot_y = data_y[0:20]
    plt.scatter(data['date'].values[0:20],
                data_y[0:20],
                color='darkorange',
                label='data')
    plt.hold('on')
    plt.plot([0, len(plot_x)], [0, 0], '--', color='g')
    plt.plot(plot_x, plot_y, color='navy', lw=2, label='Origin model')
    plt.plot(plot_x,
             clf.fit(data_x, data_y).predict(data_x)[0:20],
             color='cornflowerblue',
             lw=2,
             label='Linear model')
    plt.xlabel('data')
    plt.ylabel('target')
    plt.title('SVM-Stock-Prediction')
    plt.legend()
    plt.show()
from SVM import SVM, polynomial_kernel
from SVM_utils import *

X1, y1, X2, y2 = gen_non_lin_separable_data()
X_train, y_train = split_train(X1, y1, X2, y2)
X_test, y_test = split_test(X1, y1, X2, y2)

model = SVM(polynomial_kernel)
model.fit(X_train, y_train)

y_predict = model.predict(X_test)
correct = np.sum(y_predict == y_test)
print("%d out of %d predictions correct" % (correct, len(y_predict)))

plot_contour(X_train[y_train == 1], X_train[y_train == -1], model)
Esempio n. 28
0
X[size:, 0] = np.random.normal(2, 1, size)
X[size:, 1] = np.random.normal(2, 1, size)
Y[size:] = -1

if name == 'gaussian':
    r = 5.0
    a, b = (0., 0.)
    theta = np.arange(0, 2 * np.pi, 2 * np.pi / size)
    x = a + r * np.cos(theta)
    y = b + r * np.sin(theta)

    for i in range(size):
        X[i, 0] = x[i] + np.random.normal(0, 0.5, 1)
        X[i, 1] = y[i] + np.random.normal(0, 0.5, 1)
        Y[i, 0] = 1

    X[size:, 0] = np.random.normal(0, 1, size)
    X[size:, 1] = np.random.normal(0, 1, size)
    Y[size:, 0] = -1

model = SVM()
x, y = model.fit(X, Y, kernel=name, histroy=True)
# print(x.shape)
# print(y.shape)
# print(z.shape)
# plt.contour(x, y, z, cmap=plt.cm.coolwarm, alpha=0.8)
plt.plot(x, y)
plt.scatter(X[:size, 0], X[:size, 1], c='b')
plt.scatter(X[size:, 0], X[size:, 1], c='r')
plt.show()
    n_features=10,
    n_informative=5,
    random_state=1111,
    n_classes=2,
    class_sep=1.75,
)
# y的标签取值{0,1} 变成 {-1, 1}
y = (y * 2) - 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# 对数据归一化,消除量纲影响
# X_train ,X_test = normalize(X_train),normalize(X_test)

# 构造SVM分类器
model = SVM(X_train, y_train, kernel="RBF")
model.fit()
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
#print(y_pred,y_test)
print("随机产生的分类数据的 Accuracy:", accuracy)
print()

print('--------------------------------------------------------------')
print()
# 二分类,我们将鸢尾花中类别为1,2的样本,设为label=1;类别为0的样本,label为-1
data = datasets.load_iris()
X = data.data
Y = data.target
temp = []
for label in Y:
    if label == 0:
Esempio n. 30
0
numbers = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

combination = list(itertools.combinations(numbers, 2))

evaluate_matrix = []
prediction = []

for pair in combination:
    X_positive, X_negative, Y_positive, Y_negative = one_versus_one(
        X_clustered_np, pair)

    training_data = np.vstack((X_positive, X_negative))
    testing_data = np.hstack((Y_positive, Y_negative))

    clf = SVM(C=10)
    clf.fit(training_data, testing_data)

    Y_evaluate = clf.evaluate(X_test)
    evaluate_matrix.append(Y_evaluate)

#(45, 1000)
evaluate_matrix = np.array(evaluate_matrix)
evaluate_matrix = evaluate_matrix.T

prediction = DAG_decide(combination, evaluate_matrix)

# Creating Confusion Matrix
confusion_matrix = np.zeros((len(numbers), len(numbers)))

for i in range(len(prediction)):
    confusion_matrix[prediction[i]][Y_test[i]] += 1
from SVM import SVM
import numpy as np
np.random.seed(2018)


def load_data(path):
    arr = np.loadtxt(path)
    x, y = arr[:, :2], arr[:, -1]
    return x, y


if __name__ == '__main__':
    x, y = load_data('testSet.txt')
    train_cnt = 90
    train_x, train_y = x[:train_cnt], y[:train_cnt]
    test_x, test_y = x[train_cnt:], y[train_cnt:]
    # svm = SVM()
    # svm = SVM('poly')
    # svm = SVM('poly', gamma=0.01, d=3, r=0.5)
    # svm = SVM('sigmoid')
    # svm = SVM('sigmoid', gamma=0.001, r=0)
    svm = SVM('rbf', gamma=1e-2)  # rbf核函数似乎有问题?
    svm = SVM('rbf', gamma=1)  # rbf核函数似乎有问题?
    # svm.fit(x, y, C=1, epsilon=1e-4, max_iter=40)
    svm.fit(train_x, train_y, C=1, epsilon=1e-4, max_iter=40)
    predict = svm.predict_class(test_x)
    print(predict)
    print(y)
    acc = 1 - np.count_nonzero(predict - test_y) / len(test_y)
    print('精度:', acc)
    svm.visualize_2d(x, y)
Esempio n. 32
0
import numpy as np
from SVM import SVM


def load_data(file):
    X = []
    y = []
    with open(file) as f:
        for each_line in f.readlines():
            data = each_line.strip().split()
            y.append(float(data[0]))
            x = [0 for i in range(784)]
            for s in data[1:-1]:
                ind, color = map(int, s.split(":"))
                x[ind - 1] = float(color)
            X.append(x)
    return np.array(X), np.array(y)


if __name__ == "__main__":
    X_train, y_train = load_data("train-01-images.svm")
    X_test, y_test = load_data("test-01-images.svm")

    clf = SVM()
    clf.fit(X_train, y_train)
    y_predict = clf.predict(X_test)
    correct = np.sum(y_predict == y_test)
    print("%d out of %d predictions correct(%.2f%%)." %
          (correct, len(y_test), correct / len(y_test) * 100))
Esempio n. 33
0
def plot(predictor, X, y, grid_size):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, grid_size), np.linspace(y_min, y_max, grid_size), indexing="ij")
    flatten = lambda m: np.array(m).reshape(-1)

    result = []
    for (i, j) in itertools.product(range(grid_size), range(grid_size)):
        point = np.array([xx[i, j], yy[i, j]]).reshape(1, 2)
        result.append(predictor.predict(point))

    Z = np.array(result).reshape(xx.shape)

    plt.contourf(xx, yy, Z, cmap=cm.Paired, levels=[-0.001, 0.001], extend="both", alpha=0.8)
    plt.scatter(flatten(X[:, 0]), flatten(X[:, 1]), c=flatten(y), cmap=cm.Paired)
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.show()


num_samples = 500
num_features = 2
grid_size = 20
samples = np.matrix(np.random.normal(size=num_samples * num_features).reshape(num_samples, num_features))
labels = 2 * (samples.sum(axis=1) > 0) - 1.0
model = SVM(1.0, Kernel.linear())
print samples[0]
model.fit(samples, labels)
plot(model, samples, labels, grid_size)