Example #1
0
def testSpeed():
    startIndex, endIndex = 93, 118;
    data = [];
    y = np.mat(data).T;

    h, timespan, M = 1, 6, 2;
    X = np.mat(np.arange(y.shape[0])).T;
    f = LinearRegression.RegressionSplineFunction(int(h * 60 / timespan) + M - 2, M);
    m = LinearRegression.LinearRegression().fit(X, y, [f]);
    yHeat = m.predictValue(X);

    # speed1 = getSpeedM3(m.beta, f.knots, X[startIndex: endIndex, :]);
    # print(speed1.A.flatten().tolist());

    plt.figure(1, (12, 8));
    plt.get_current_fig_manager().window.maximize();
    plt.subplot(211);
    plt.plot(X.A.flatten(), y.A.flatten(), "-xb");
    plt.plot(X.A.flatten(), yHeat.A.flatten(), "-r");
    plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "or");
    plt.subplot(212);
    plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "-xb");
    plt.plot(X[startIndex: endIndex, :].A.flatten(), yHeat[startIndex: endIndex, :].A.flatten(), "-r");
    plt.show(block=True);
    plt.close();
Example #2
0
def update_figure_polar(value_main_tab, value_analysis, gene_name):
    if gene_name is None:
        raise Exception()
    else:
        if value_main_tab == 'main-tab-2' and value_analysis == 'temporal':
            array_gene_time = np.concatenate(
                (gene_data[gene_name]['rep1'], gene_data[gene_name]['rep2'],
                 gene_data[gene_name]['rep3'][:, [0, 2]]),
                axis=1)

            l_time_reg = []
            for x in range(8):
                l_time_reg.append(
                    LinearRegression.make_time_regression(
                        array_gene_time[x, :], simple=False, predict=True))
            l_time_reg_simple = []
            for x in range(8):
                l_time_reg_simple.append(
                    LinearRegression.make_time_regression(
                        array_gene_time[x, :], simple=True, predict=False))

            figure_polar = Figures.compute_figure_polar_tab_3(l_time_reg)
            figure_mean = Figures.compute_figure_mean_tab_3(
                l_time_reg)  #, yaxis_type, yaxis_scale)

            return figure_polar, figure_mean
        else:
            raise PreventUpdate
Example #3
0
def main():

    data_set = load_boston()

    train_data, train_target, test_data, test_target = LR.split_data(data_set)
    num_features = train_data.shape[1]

    new_train_data = train_data.copy()
    new_test_data = test_data.copy()
    
    for i in range(num_features):
        for j in range(i, num_features):
            new_column = train_data[:, i] * train_data[:, j]
            new_column = new_column.reshape(new_column.shape[0], 1)
            new_train_data = np.append(new_train_data, new_column, 1)

            new_test_column = test_data[:, i] * test_data[:, j]
            new_test_column = new_test_column.reshape(new_test_column.shape[0], 1)
            new_test_data = np.append(new_test_data, new_test_column, 1)
    
    lr = LR.LinearRegression()
    lr.fit(new_train_data, train_target)
    
    mse_test = lr.mse(new_test_data, test_target)
    mse_train = lr.mse(new_train_data, train_target)

    print "\nSol. 3.4"
    print "Linear Regression"
    print "{:^15}|{:^15}".format("Input Data", "MSE")
    print "-"*30
    print "{:^15}|{:^15.7}".format("test_data", mse_test)
    print "{:^15}|{:^15.7}".format("train_data", mse_train)
    print "\n"
def main():

    data_set = load_boston()

    train_data, train_target, test_data, test_target = LR.split_data(data_set)

    min_MSE = sys.maxint
    min_combo = None

    calculated_combos = []

    for combo in set(permutations(range(train_data.shape[1]), 4)):
        if sorted(combo) not in calculated_combos:
            calculated_combos.append(sorted(combo))
            lr = LR.LinearRegression()
            lr.fit(train_data[:, sorted(combo)], train_target)
            MSE = lr.mse(test_data[:, sorted(combo)], test_target)

            if min_MSE > MSE:
                min_MSE = MSE
                min_combo = combo

    print "Brute Force"
    print "Best Combination : [{}], by 1-index: {} with MSE = {:.7}".format(
        ", ".join([data_set.feature_names[x] for x in min_combo]),
        [x + 1 for x in min_combo], min_MSE)
Example #5
0
def detectChange(j, f):
    print("change {0} started".format(j));

    M = 2;
    h = 12;
    timespan = 6;
    size, speed = int(3600 / f), [];
    data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/data.npy".format(f)));
    marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/marks.npy".format(f)));
    y1 = data[:size * (h + 1), j];
    X1 = np.mat(np.arange(y1.shape[0])).T;
    f1 = LinearRegression.RegressionSplineFunction(int((h + 1) * 60 / timespan) + M - 2, M);
    m1 = LinearRegression.LinearRegression().fit(X1, y1, [f1]);
    sY1 = m1.predictValue(X1);
    X1 = X1[:-size, :];
    y1 = y1[:-size, :];
    sY1 = sY1[:-size, :];
    if M == 3:
        speed.extend(getSpeedM3(m1.beta, f1.knots, X1).A.flatten().tolist());
    else:
        speed.extend(getSpeedM2(m1.beta, f1.knots, X1).A.flatten().tolist());

    for i in range(1, math.floor((data.shape[0] - size) / (size * h))):
        y2 = data[i * size * h - size:(i + 1) * size * h + size, j];
        X2 = np.mat(np.arange(y2.shape[0])).T;
        f2 = LinearRegression.RegressionSplineFunction(int((h + 2) * 60 / timespan) + M - 2, M);
        m2 = LinearRegression.LinearRegression().fit(X2, y2, [f2]);
        sY2 = m2.predictValue(X2);
        X2 = X2[size:-size, :];
        y2 = y2[size:-size, :];
        sY2 = sY2[size:-size, :];
        if M == 3:
            speed.extend(getSpeedM3(m2.beta, f2.knots, X2).A.flatten().tolist());
        else:
            speed.extend(getSpeedM2(m2.beta, f2.knots, X2).A.flatten().tolist());

        plt.figure(1, (12, 8));
        plt.get_current_fig_manager().window.maximize();
        plt.subplot(211);
        plt.title(str(i - 1));
        plt.plot(X1.A.flatten(), y1.A.flatten(), "-x");
        plt.plot(X1.A.flatten(), sY1.A.flatten(), color = "red");
        plt.subplot(212);
        plt.title(str(i));
        plt.plot(X2.A.flatten(), y2.A.flatten(), "-x");
        plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red");
        plt.show(block = True);
        plt.close();

        X1, y1, sY1 = X2, y2, sY2;
    print("change history completed.");

    speed = np.mat(speed).T;
    speedMean, speedStd = speed.mean(), speed.std();
    plt.figure(1, (12, 8));
    plt.get_current_fig_manager().window.maximize();
    plt.hist(speed.A.flatten(), bins = 1000);
    plt.show(block = True);
    plt.close();
Example #6
0
 def getLinearFactors(self, node):
     """
 get linear factor for Q_h and Q_a, set node.f_linear=[[Q_h factors], [Q_a factors]]
 :param node: the leaf node which all instances under it are used to generate linear q_value model
 :return:
 """
     train_X = [instance.currentObs for instance in node.instances]
     train_Y = [instance.qValue for instance in node.instances]
     l_rate = 0.0001
     n_epochs = 1000
     count = 0
     max_diff = 10000
     tot = None
     if node.f_linear is not None:
         tot = np.transpose(node.f_linear)
         W = np.delete(tot, self.n_dim, 0)
         b = np.array([tot[self.n_dim]])
         count += 1
     elif node.parent and node.parent.f_linear is not None:
         tot = np.transpose(node.parent.f_linear)
         W = np.delete(tot, self.n_dim, 0)
         b = np.array([tot[self.n_dim]])
     while count < TRIES:
         if tot is not None:
             with tf.Session() as sess:
                 LR = lr.LinearRegression(training_epochs=int(n_epochs /
                                                              10**count),
                                          learning_rate=l_rate / 10**count)
                 LR.read_weights(weights=W, bias=b)
                 LR.linear_regression_model()
                 temp_diff, temp_W, temp_b = LR.gradient_descent(
                     sess=sess, train_X=train_X, train_Y=train_Y)
         else:
             with tf.Session() as sess:
                 LR = lr.LinearRegression(training_epochs=n_epochs,
                                          learning_rate=l_rate)
                 LR.read_weights()
                 LR.linear_regression_model()
                 temp_diff, temp_W, temp_b = LR.gradient_descent(
                     sess=sess, train_X=train_X, train_Y=train_Y)
         if temp_diff < max_diff:
             W = temp_W
             b = temp_b
             max_diff = temp_diff
         count += 1
     node.f_linear = np.concatenate((np.transpose(W), np.transpose(b)),
                                    axis=1)
     print("finish linear, node: " + str(node.idx))
Example #7
0
def getRandomData(mcount):
	seed(1)
	inputs = np.matrix([
		[gauss(0, 1) for i in range(1, mcount + 1)], 
		[gauss(0, 1)  for i in range(1, mcount + 1)]])
	outputs = LinearRegression.addOneRow(inputs).T * np.matrix([[5], [3], [4]]) + gauss(0, 0.1)  # weights = [5, 3, 4]
	return inputs, outputs
Example #8
0
def cross_validator(k, train_data, feature_names, classifier):
    for index, item in enumerate(train_data):
        item.append(feature_names[index])
    random.shuffle(train_data)
    k_splits = np.array_split(train_data, k)
    feature_splits = [[in_item[-1] for in_item in item]for item in k_splits]
    all_accuracy =  0
    for k in range(0,k):
        print ("For %s fold" %(int(k)+1))
        trainX = []
        trainY = []
        testX = k_splits[k]
        testY = feature_splits[k]
        trainX_temp = k_splits[:k] + k_splits[(k + 1):]
        trainY_temp = feature_splits[:k] + feature_splits[(k + 1):]
        for x in range(len(trainX_temp)):
            trainX.extend(trainX_temp[x])
            trainY.extend(trainY_temp[x])
        if classifier == 1:
            accuracy = (kn_classifier.knn_driver(trainX,  testX, 4))
        elif classifier == 2:
            accuracy = (centroid_classifier.predict(trainX, trainY, testX, testY, 4))
        elif classifier == 3:
            matrix, accuracy = (LinearRegression.predict(trainX, trainY, testX, testY))
        print (abs(accuracy))
        all_accuracy += accuracy
    k_accuracy = float(all_accuracy)/5
    return abs(k_accuracy)
Example #9
0
def test1():
    X, y = LR.make_data()
    image = scatter2image(X, y)
    accumulator, thetas, rhos = transform.hough_line(
        image)  # hough_line(image)
    show_transform(accumulator, 'hough_transform')
    show_line(image, accumulator, thetas, rhos, 50, 'hough_line')
Example #10
0
def train(xs, ys, n):

    w0 = 0.5
    w1 = 0.5

    ldw0 = 0.00001
    ldw1 = 0.0001

    pp = pprint.PrettyPrinter(indent=0)

    d = []

    for i in range(n):

        def h(x):
            return w0 + w1 * x

        def pdw0(xi, yi):
            return h(xi) - yi

        def pdw1(xi, yi):
            return (h(xi) - yi) * xi

        j = LinearRegression.cost(xs, ys, h)

        dw0 = LinearRegression.partial_derivative(xs, ys, pdw0)
        dw1 = LinearRegression.partial_derivative(xs, ys, pdw1)

        d.append([i, j, w0, dw0, ldw0, ldw0 * dw0, w1, dw1, ldw1, ldw1 * dw1])

        w0 = w0 - (ldw0 * dw0)
        w1 = w1 - (ldw1 * dw1)

        previous_dw0 = dw0
        previous_dw1 = dw1

        add_hypothensies_trace(xs, w0, w1, 'h' + str(i))

    print(
        tabulate(d,
                 headers=[
                     '#', 'J', 'w0', 'dw0', 'lw0', 'lw0 * dw0', 'w1', 'dw1',
                     'lw1', 'lw0 * dw0'
                 ]))

    return (w0, w1)
 def testOne(self):
   X = np.array([[3],
                 [4],
                 [5]])
   expected = np.array([[-1.],
                        [ 0.],
                        [ 1.]])
   np.testing.assert_almost_equal(LR.featureNormalize(X), expected)
Example #12
0
def data_handler():
    index_list = list()
    train_list = list()
    test_list = list()

    train_num, test_num = data_partitions[0], data_partitions[1]

    print(train_num, test_num)

    data = pd.read_csv(filename, delimiter=',', dtype=None, header=None)

    # We should Create numpy array for manipulation
    numpy_data = np.array(data)
    labels = np.array(data.head(1))

    for data_class in classes:

        # index_list is a list of numpy array int64 type
        index_list.append(np.where(labels == data_class)[1])

    for one_class in index_list:
        train_list.extend(one_class[0:train_num])
        test_list.extend(one_class[train_num:])

    print(train_list)
    print(test_list)

    train = np.array(numpy_data[:, train_list])
    test = np.array(numpy_data[:, test_list])

    print(train.shape, test.shape)

    np.savetxt(train_filename, train, delimiter=',', fmt='%i')
    np.savetxt(test_filename, test, delimiter=',', fmt='%i')

    # Call programs
    # Knn
    print("KNN classifier")
    obj = KnnClassification.KnnClassification(10, train_filename,
                                              test_filename)
    obj.train()

    # Centroid method
    print("Centroid classifier")
    obj = CentroidMethod.CentroidMethod(train_filename, test_filename)
    obj.pre_process()
    obj.train()

    # Linear Regression
    print("Linear regression")
    obj = LinearRegression.LinearRegression(train_filename, test_filename)
    obj.compute_coefficients()

    # SVM
    print("SVM classifier")
    obj = Svm.Svm(train_filename, test_filename)
    obj.train()
Example #13
0
def checkCostFunc():
    np.random.seed(2)
    m = 10
    y = np.random.rand(m, )
    y_predicted = np.random.rand(m, )
    calculatedCost = lm.costFunc(m, y, y_predicted)
    realCost = 0.075000505675425072
    if calculatedCost == realCost:
        print("PASSED : CostFunc Function")
    else:
        print("FAILED : CostFunc Function")
def main():
    data, heights = import_and_scale_training_data(sys.argv[1])
    of = open_output(sys.argv[2])
    for iterations, alpha in [(100, 0.001), (100, 0.005), (100, 0.01),
                              (100, 0.05), (100, 0.1), (100, 0.5), (100, 1),
                              (100, 5), (100, 10), (1000, 0.0005)]:
        lr = LinearRegression.LinearRegressor(iterations=iterations,
                                              alpha=alpha,
                                              of=of)
        lr.fit(data, heights)
    of.close()
Example #15
0
def checkPredict():
    np.random.seed(6)
    theta = np.random.rand(7, )
    X = np.random.rand(10, 7)
    calcX = lm.predict(X, theta)
    realX = np.array([
        1.7089558, 2.20884418, 2.18216447, 1.80692415, 2.12231727, 1.41312956,
        1.82242337, 2.11752865, 1.70792641, 0.8332109
    ])
    if np.all(np.isclose(calcX, realX)):
        print("PASSED : predict Function")
    else:
        print("FAILED : predict Function")
 def testOne(self):
   X = np.array([[1, 2],
                 [1, 3],
                 [1, 4],
                 [1, 5]])
   y = np.array([[ 7.],
                 [ 6.], 
                 [ 5.], 
                 [ 4.]]);
   theta = np.array([[0.1],
                     [0.2]])
   expected = 11.9450
   np.testing.assert_almost_equal(LR.computeCost(X, y, theta), expected)
 def testTwo(self):
   X = np.array([[1, 2, 3],
                 [1, 3, 4],
                 [1, 4, 5],
                 [1, 5, 6]])
   y = np.array([[ 7.],
                 [ 6.],
                 [ 5.],
                 [ 4.]])
   theta = np.array([[0.1],
                     [0.2],
                     [0.3]])
   expected = 7.0175;
   np.testing.assert_almost_equal(LR.computeCost(X, y, theta), expected)
 def test_analyze_linreg(self):
     X, y = gen_regression_data()
     solver = linReg.LinearRegressionSolver()
     with CapturedStdout():
         analyzerResults = analyze(
             solver,
             X,
             y,
             optimizationParams={
                 "nnTopology":
                 "",
                 "Lambda":
                 0.1,
                 "functions":
                 [lambda x: x[0]**2, lambda x: x[1]**2, lambda x: x[2]**2]
             },
             iterations=40,
             bins=3,
             tries=4,
             sample_iterations=40)
     npt.assert_equal(analyzerResults.sampleCountAnalyzis.sampleCount, [
         1, 4, 7, 10, 19, 28, 54, 80, 159, 238, 475, 712, 1423, 2134, 4267,
         6400
     ])
     npt.assert_almost_equal(
         analyzerResults.sampleCountAnalyzis.errorTrain, [
             0.00000000e+00, 2.33847837e+01, 3.00222512e+01, 7.96243961e+00,
             1.37056787e+00, 5.14152946e-01, 1.26362331e-01, 6.45051925e-02,
             1.46205243e-02, 7.25704802e-03, 1.67555343e-03, 7.51420424e-04,
             1.82472270e-04, 8.16512666e-05, 2.04713359e-05, 9.08089772e-06
         ], 5)
     npt.assert_almost_equal(analyzerResults.sampleCountAnalyzis.errorCV, [
         4.99885725e+04, 1.75878006e+04, 4.13273401e+03, 4.01030109e+01,
         3.82169238e+00, 6.81074172e-01, 1.43006176e-01, 7.01556101e-02,
         1.44597141e-02, 8.57240292e-03, 1.67551231e-03, 7.42103605e-04,
         1.73911141e-04, 7.83993282e-05, 1.98093851e-05, 8.65960541e-06
     ], 3)
     npt.assert_equal(analyzerResults.iterationCountAnalyzis.iterationCount,
                      [2, 4, 6, 10, 14, 27, 40])
     npt.assert_almost_equal(
         analyzerResults.iterationCountAnalyzis.errorTrain, [
             2.18325422e-01, 1.04552670e-05, 2.27461723e-06, 2.27461723e-06,
             2.27461723e-06, 2.27461723e-06, 2.27461723e-06
         ], 5)
     npt.assert_almost_equal(
         analyzerResults.iterationCountAnalyzis.errorCV, [
             2.14481838e-01, 1.01920583e-05, 2.16874107e-06, 2.16874107e-06,
             2.16874107e-06, 2.16874107e-06, 2.16874107e-06
         ], 5)
Example #19
0
def testAmplitude():
    startIndex, endIndex = 2855, 2880;
    data = [];
    y = np.mat(data).T;

    h, M = 24, 3;
    X = np.mat(np.arange(y.shape[0])).T;
    # m = LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]);
    m = LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(int(h * 60 / 60) + M - 2, M)]);
    yHeat = m.predictValue(X);

    amplitude = y[startIndex: endIndex, :] - yHeat[startIndex: endIndex, :];
    print(amplitude.A.flatten().tolist());

    plt.figure(1, (12, 8));
    plt.get_current_fig_manager().window.maximize();
    plt.subplot(211);
    plt.plot(X.A.flatten(), y.A.flatten(), "-xb");
    plt.plot(X.A.flatten(), yHeat.A.flatten(), "-r");
    plt.subplot(212);
    plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "-xb");
    plt.plot(X[startIndex: endIndex, :].A.flatten(), yHeat[startIndex: endIndex, :].A.flatten(), "-r");
    plt.show(block=True);
    plt.close();
def crossValidation(xArr,yArr,numVal=10):
    '''
    交叉验证测试岭回归
    '''
    m=len(yArr)
    indexList=range(m)
    errorMat=zeros((numVal,30))
    #交叉验证循环
    for i in range(numVal):
        #随机拆分数据
        trainX=[];trainY=[]
        testX=[];testY=[]
        #对数据混洗操作
        random.shuffle(indexList)
        for j in range(m):
            if j<m*0.9:
                trainX.append(xArr[indexList[j]])
                trainY.append(yArr[indexList[j]])
            else:
                testX.append(xArr[indexList[j]])
                testY.append(yArr[indexList[j]])
        #回归系数矩阵
        wMat=LR.ridgeTest(trainX,trainY)
        #循环遍历矩阵中的30组回归系数
        for k in range(30):
            matTestX=mat(testX)
            matTrainX=mat(trainX)
            #数据标准化
            meanTrain=mean(matTrainX,0)
            varTrain=var(matTrainX,0)
            matTestX=(matTestX-meanTrain)/varTrain
            #测试回归效果
            yEst=matTestX*mat(wMat[k,:]).T+mean(trainY)
            #计算误差
            errorMat[i,k]=((yEst.T.A-array(testY))**2).sum()
    #计算误差估计值的均值
    meanErrors=mean(errorMat,0)
    minMean=float(min(meanErrors))
    bestWeights=wMat[nonzero(meanErrors==minMean)]
    #数据还原
    xMat=mat(xArr)
    yMat=mat(yArr).T
    meanX=mean(xMat,0)
    varX=var(xMat,0)
    unReg=bestWeights/varX

    print("teh best model from ridge regression is:\n",unReg)
    print("with constant term: ",-1*sum(multiply(meanX,unReg))+mean(yMat))
Example #21
0
def checkAppendIntercept():
    np.random.seed(1)
    X = np.random.rand(10, 5)
    calc_X = lm.appendIntercept(X)
    real_X = np.array([[
        1.00000000e+00, 4.17022005e-01, 7.20324493e-01, 1.14374817e-04,
        3.02332573e-01, 1.46755891e-01
    ],
                       [
                           1.00000000e+00, 9.23385948e-02, 1.86260211e-01,
                           3.45560727e-01, 3.96767474e-01, 5.38816734e-01
                       ],
                       [
                           1.00000000e+00, 4.19194514e-01, 6.85219500e-01,
                           2.04452250e-01, 8.78117436e-01, 2.73875932e-02
                       ],
                       [
                           1.00000000e+00, 6.70467510e-01, 4.17304802e-01,
                           5.58689828e-01, 1.40386939e-01, 1.98101489e-01
                       ],
                       [
                           1.00000000e+00, 8.00744569e-01, 9.68261576e-01,
                           3.13424178e-01, 6.92322616e-01, 8.76389152e-01
                       ],
                       [
                           1.00000000e+00, 8.94606664e-01, 8.50442114e-02,
                           3.90547832e-02, 1.69830420e-01, 8.78142503e-01
                       ],
                       [
                           1.00000000e+00, 9.83468338e-02, 4.21107625e-01,
                           9.57889530e-01, 5.33165285e-01, 6.91877114e-01
                       ],
                       [
                           1.00000000e+00, 3.15515631e-01, 6.86500928e-01,
                           8.34625672e-01, 1.82882773e-02, 7.50144315e-01
                       ],
                       [
                           1.00000000e+00, 9.88861089e-01, 7.48165654e-01,
                           2.80443992e-01, 7.89279328e-01, 1.03226007e-01
                       ],
                       [
                           1.00000000e+00, 4.47893526e-01, 9.08595503e-01,
                           2.93614148e-01, 2.87775339e-01, 1.30028572e-01
                       ]])
    if np.all(np.isclose(real_X, calc_X)):
        print("PASSED : appendIntercept Function")
    else:
        print("FAILED : appendIntercept Function")
Example #22
0
def checkMakeGradientUpdate():
    np.random.seed(4)
    theta = np.random.rand(20, )
    grads = np.random.rand(20, )
    calcUpdate = lm.makeGradientUpdate(theta, grads)
    realUpdate = np.array([
        0.96702984, 0.54723225, 0.97268436, 0.71481599, 0.69772882, 0.2160895,
        0.97627445, 0.00623026, 0.25298236, 0.43479153, 0.77938292, 0.19768507,
        0.86299324, 0.98340068, 0.16384224, 0.59733394, 0.0089861, 0.38657128,
        0.04416006, 0.95665297
    ])

    if calcUpdate is not None and np.all(np.isclose(calcUpdate, realUpdate)):
        print("PASSED : makeGradientUpdate Function")
    else:
        print("FAILED : makeGradientUpdate Function")
Example #23
0
def checkCalcGradients():
    np.random.seed(3)
    m = 10
    x = np.random.rand(m, 20)
    y = np.random.rand(m, )
    y_p = np.random.rand(m, )
    calcGrad = lm.calcGradients(x, y, y_p, m)
    realGrad = np.array([
        -0.05425541, -0.04381124, -0.05959325, -0.03675508, -0.01118115,
        -0.05390415, -0.09321702, -0.01038522, -0.00185729, -0.04773877,
        -0.03408592, 0.00746619, 0.00090633, -0.01870412, -0.00821488,
        -0.01664091, -0.11836125, -0.03610672, -0.08967235, -0.02161973
    ])
    if np.all(np.isclose(calcGrad, realGrad)):
        print("PASSED : calcGradients Function")
    else:
        print("FAILED : calcGradients Function")
Example #24
0
def isConstant(y, periods, alpha):
    if y.var() == 0:
        return True;

    p1 = [DataHelper.testWhiteNoise(y - y.mean(), m) for m in periods];
    if np.any(np.mat(p1) <= alpha):
        return False;

    p2 = LinearRegression.LinearRegression().fit(np.mat(range(0, y.shape[0])).T, y).betaP;
    if p2[1, 0] <= alpha:
        return False;

    p3 = DataHelper.testRunsLeft((y > np.quantile(y, 0.5)) - 0);
    if p3 <= alpha:
        return False;

    print("{0}, {1}, {2}".format(p1, p2.T, p3));
    return True;
 def testOne(self):
   X = np.array([[1, 5],
                 [1, 2],
                 [1, 4],
                 [1, 5]])
   y = np.array([[1],
                 [6],
                 [4],
                 [2]])
   theta = np.array([[0],
                     [0]])
   alpha = 0.01;
   numOfIter = 1000;
   expectedTheta = np.array([[ 5.2148],
                             [-0.5733]])
   # expectedJHist[0] = 0.85426; 
   [actualTheta, actualJHist] = LR.gradientDescent(X, y, theta, alpha, numOfIter)
   np.testing.assert_almost_equal(actualTheta, expectedTheta, decimal=4);
Example #26
0
def checkTrain():
    np.random.seed(5)
    theta = np.random.rand(5, )
    X = np.random.rand(10, 5)
    y = np.random.rand(10, )
    model = {}
    calcModel = lm.train(theta, X, y, model)
    calcModel['J'] = calcModel['J'][:50]
    realModel = {}
    realModel['J'] = [
        0.23849093475226227, 0.23849093474760394, 0.23849093474294566,
        0.23849093473828731, 0.23849093473362895, 0.23849093472897059,
        0.23849093472431226, 0.23849093471965394, 0.23849093471499558,
        0.23849093471033728, 0.23849093470567886, 0.23849093470102059,
        0.23849093469636218, 0.23849093469170385, 0.23849093468704555,
        0.23849093468238722, 0.23849093467772886, 0.2384909346730705,
        0.23849093466841217, 0.23849093466375382, 0.23849093465909549,
        0.23849093465443719, 0.23849093464977877, 0.23849093464512042,
        0.23849093464046217, 0.23849093463580379, 0.2384909346311454,
        0.2384909346264871, 0.23849093462182877, 0.23849093461717041,
        0.23849093461251208, 0.23849093460785373, 0.23849093460319537,
        0.2384909345985371, 0.23849093459387868, 0.23849093458922033,
        0.23849093458456197, 0.23849093457990361, 0.23849093457524534,
        0.23849093457058701, 0.23849093456592868, 0.23849093456127032,
        0.23849093455661202, 0.23849093455195361, 0.23849093454729525,
        0.23849093454263687, 0.23849093453797865, 0.23849093453332024,
        0.23849093452866194, 0.23849093452400352
    ]

    realModel['theta'] = [
        0.22199316135627545, 0.87073228953402304, 0.20671913831457267,
        0.91861088834692606, 0.48841117787717347
    ]

    if realModel == calcModel:
        print("PASSED : test Function")
    else:
        print("FAILED : test Function")
 def test_find_solution(self):
     X, y = gen_regression_data()
     solver = linReg.LinearRegressionSolver()
     with CapturedStdout():
         optimizationResults = find_solution(
             solver,
             X,
             y,
             showFailureRateTrain=True,
             optimizationParams={
                 "nnTopology":
                 "",
                 "Lambda": [0.01, 0.1, 1],
                 "functions":
                 [[], [lambda x: x[0]**2, lambda x: x[1]**2],
                  [lambda x: x[0]**2, lambda x: x[1]**2, lambda x: x[2]**2]]
             },
             files=[],
             log={
                 "log_dir": "out",
                 "log_file_name": "mlak"
             })
     self.assertAlmostEqual(optimizationResults.failureRateTest, 1e-07, 6)
Example #28
0
def main():
    #load dataset
    data = np.genfromtxt("../datasets/mdataset.csv", delimiter=",")

    #create model
    linreg = LinearRegression(data, 2, 'test')
    linreg.describeModel()

    #training model
    epochs = 60
    linreg.training(epochs, 0.001)


    ##plot data with result lines
    plt.figure(1)


    axis1 = [min(data[:,0]), max(data[:,0]), min(data[:,2]), max(data[:,2])]
    axis2 = [min(data[:,1]), max(data[:,1]), min(data[:,2]), max(data[:,2])]
    axis = [min([axis1[0], axis2[0]]), max([axis1[1], axis2[1]]), 
            min([axis1[2], axis2[2]]), max([axis1[3], axis2[3]])]

    setx = np.asmatrix(np.linspace(axis[0], axis[1])).T
    x0 = np.ones((setx.size, 1)) 
    x = np.concatenate((x0, setx, setx), axis=1)

    plt.scatter(data[:,0], data[:,2])
    plt.scatter(data[:,1], data[:,2])
    plt.plot(setx, linreg.modelFunction(x))
    plt.axis(axis)

    #plt.subplot(212)
    
    #setx = np.linspace(axis[0], axis[1])
    
    #plt.plot(setx, linreg.modelFunction(setx))
    #plt.axis(axis)

    plt.show()
Example #29
0
runs = 1000
d = 100
aveW = np.zeros((1,6))
show = False

for i in range(0,runs):
    print "Running test # " + str(i)
    
    x = np.insert(np.random.random((d,2)) * 2 - 1,0,np.ones((1,d)), axis=1)
    x = np.append(x, x[:,1:2] * x[:,2:3], axis=1)
    x = np.append(x, np.square(x[:,1:3]), axis=1)
    truth = np.sign(np.square(x[:,1]) + np.square(x[:,2]) - .6)
    noise = np.append(np.ones((d*.9)), np.ones((d - d*.9)) * -1, axis=0)
    np.random.shuffle(noise)
    truth *= noise
    i,o,w = lr.runLR(x, truth, show=show)
    
    plotX = np.linspace(-1,1,1000)
    plotY = np.sqrt(np.square(plotX) * -1 + .6) 
    plotX = np.append(plotX, plotX, axis=1)
    plotY = np.append(plotY, -plotY, axis=1)
    green = x[(truth == 1), 1:]
    red = x[(truth < 1), 1:]  
    plot(green, red, [7,1,7], axis=311, show=show, other=[plotX, plotY,'b-'])
    pause(.1)
    
    wrongIn += i
    aveW = aveW + w
print "Average of " + str(wrongIn/runs) + " wrong in sample per run"
fractionWrong = (wrongIn/runs)/d
print "%f incorrect on average in sample"%(fractionWrong)
Example #30
0
	def test_regression(self):                                                 #checks for a certain and simple x,y data set 
		self.assertEqual((0,[12.706204736432095, -12.706204736432095],1),LinearRegression.MyFun(np.array([1,0]),np.array([0,1])))
Example #31
0
    def __init__(self,learning_rate=0.13, n_epochs=50000,
                           dataset='sum.pkl',
                           batch_size=100, feature_num=282):
        """
        stochastic gradient descent optimization of a log-linear model

        :type learning_rate: float
        :param learning_rate: learning rate used (factor for the stochastic
                              gradient)

        :type n_epochs: int
        :param n_epochs: maximal number of epochs to run the optimizer

        :type dataset: string
        :param dataset: the path of the summary dataset file
        """

        ######################
        #   Preparing Data   #
        ######################
        print('\n... Preparing Data')

        datasets = self.load_data(dataset,batch_size)

        train_set_x, train_set_y, train_set_z = datasets[0]
        valid_set_x, valid_set_y, valid_set_z = datasets[1]
        test_set_x, test_set_y, test_set_z = datasets[2]
        
        print( 'train_set_x dimensions ' + str(train_set_x.get_value(borrow=True).shape[0]) + ' ' + 
            str(train_set_x.get_value(borrow=True).shape[1]) )
        print( 'valid_set_x dimensions ' + str(valid_set_x.get_value(borrow=True).shape[0]) + ' ' + 
            str(valid_set_x.get_value(borrow=True).shape[1]) )
        print( 'test_set_x dimensions ' + str(test_set_x.get_value(borrow=True).shape[0]) + ' ' + 
            str(test_set_x.get_value(borrow=True).shape[1]) )
        
        # compute number of minibatches for training, validation and testing
        self.n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
        self.n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
        self.n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

        #print( 'n_train_batches ' + str(self.n_train_batches) )
        #print( 'n_valid_batches ' + str(self.n_valid_batches) )
        #print( 'n_test_batches ' + str(self.n_test_batches) +'\n' )

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print('... building the model')

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch

        # generate symbolic variables for input (x and y represent a minibatch)
        x = T.matrix('x')  # data from features
        wv = T.matrix('wv') # data from vectors
        y = T.ivector('y')  # probs, presented as 1D vector of [int] labels

        ###### feature ###### word2vec ######
        word2vec_num = 300

        ####################### start of CNN #########################
        
        # Initialize parameters
        rng = numpy.random.RandomState(23455)
        nkerns=200
        v_height = word2vec_num # to be change 
        image_height = v_height
        image_width = 1
        filter_height = 2 if v_height%2==1 else 3
        filter_width = 1
        pool_height = 2
        pool_width = 1

        # Reshape matrix of rasterized images of shape (batch_size, 1 * 13)
        # to a 4D tensor, compatible with our LeNetConvPoolLayer
        # (13,) is the size of feature vectors.
        conv_layer_input = wv.reshape((batch_size, 1, image_height, image_width))

        # Construct the first convolutional pooling layer:
        # filtering reduces the image size to (13-2+1 , 1) = (12, 1)
        # maxpooling reduces this further to (13/2, 1) = (6, 1)
        # 4D output tensor is thus of shape (batch_size, nkerns, 6, 1)
        conv_layer = LeNetConvPoolLayer(
            rng,
            input=conv_layer_input,
            image_shape=(batch_size, 1, image_height, image_width),
            filter_shape=(nkerns, 1, filter_height, filter_width),
            poolsize=(pool_height, pool_width)
        )

        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (batch_size, nkerns*6*1),
        # or (2, 20 * 6 * 1) = (2, 120) with the default values.
        conv_layer_output = conv_layer.output.flatten(2)
        
        ####################### End of CNN ##############################
        ####################### Start of concatenation ##################

        word2vec_in = nkerns*(v_height-filter_height+1)/pool_height
        feature_in = feature_num
        n_out = 100

        # first fully-connected tanh layer
        word2vec_hidden = HiddenLayer(
            rng,
            input=conv_layer_output,
            n_in=word2vec_in,
            n_out=n_out,
            activation=T.tanh
        )
        
        # first fully-connected tanh layer
        feature_hidden = HiddenLayer(
            rng,
            input=x,
            n_in=feature_in,
            n_out=n_out,
            activation=T.tanh
        )
        
        concat = word2vec_hidden.output + feature_hidden.output

        ####################### End of concatenation #####################

        ## Concatenate x with word2vec ##
        input_x = concat
        
        # Set up vars
        rng = numpy.random.RandomState(23455)
        n_in_0 = n_out
        #n_in_0 = nkerns*(v_height-filter_height+1)/pool_height + feature_num
        layer_dim = [ n_in_0/3*2, n_in_0/9*4 ]
        #layer_dim = [ 100, 50 ]
        n_out_0 = layer_dim[0]

        # first fully-connected tanh layer
        layer0 = HiddenLayer(
            rng,
            input=input_x,
            n_in=n_in_0,
            n_out=n_out_0,
            activation=T.tanh
        )

        # second fully-connected tanh layer
        n_in_1 = n_out_0
        n_out_1 = layer_dim[1]
        layer1 = HiddenLayer(
            rng,
            input=layer0.output,
            n_in=n_in_1,
            n_out=n_out_1,
            activation=T.tanh
        )

        # third fully-connected tanh layer
        n_in_2 = n_out_1
        n_out_2 = feature_num
        layer2 = HiddenLayer(
            rng,
            input=layer1.output,
            n_in=n_in_2,
            n_out=n_out_2,
            activation=T.tanh
        )

        # classify the values of the fully-connected tanh layer
        classes = 30 # divided into 101 classes        
        self.classifier = LinearRegression(input=layer2.output, n_in=n_out_2, n_out=1)

        # cost = negative log likelihood in symbolic format
        cost = self.classifier.errors(y)

        # batch_size == row size == weight vector row size
        self.test_model = theano.function(
            inputs=[index],
            outputs=self.classifier.errors(y),
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size],
                wv: test_set_z[index * batch_size: (index + 1) * batch_size]
            }
        )

        self.validate_model = theano.function(
            inputs=[index],
            outputs=self.classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size],
                wv: valid_set_z[index * batch_size: (index + 1) * batch_size]
            }
        )

        # create a update list by gradient descent
        params = feature_hidden.params + word2vec_hidden.params + layer2.params + layer1.params + layer0.params + [self.classifier.W, self.classifier.b] + conv_layer.params 
        grads = T.grad(cost, params)
        updates = [
            (param_i, param_i - learning_rate * grad_i)
            for param_i, grad_i in zip(params, grads)
        ]

        # train model
        self.train_model = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size],
                wv: train_set_z[index * batch_size: (index + 1) * batch_size]
            }
        )
Example #32
0
import numpy as np
from random import seed, gauss
import LinearRegression

def getRandomData(mcount):
	seed(1)
	inputs = np.matrix([
		[gauss(0, 1) for i in range(1, mcount + 1)], 
		[gauss(0, 1)  for i in range(1, mcount + 1)]])
	outputs = LinearRegression.addOneRow(inputs).T * np.matrix([[5], [3], [4]]) + gauss(0, 0.1)  # weights = [5, 3, 4]
	return inputs, outputs

a, b = getRandomData(3000)

weights = LinearRegression.teachLinReg(a, b)
#w2 = LinearRegression.batchGradientDescent(linder, a, b, np.matrix([[0, 0, 0]]).T, 0.0005, 4000)
print weights
print LinearRegression.calcLinRegError(a, b, weights)
#print LinearRegression.calcLinRegError(a, b, w2)
Example #33
0
def detectSpeed(j, f):
    print("speed {0} started".format(j));

    M = 2;
    h = 1;
    timespan = 6;
    size, speed = int(3600 / f), [];
    # data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/Realtime_30/__JNRTDB_YCH_LIC6205.PV.npy")).T;
    data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/data.npy".format(f)));
    marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/marks.npy".format(f)));
    # y1 = data[:size * (h + 0), j];
    # X1 = np.mat(np.arange(y1.shape[0])).T;
    # knots = findKnots2(y1.A.flatten());
    # f1 = LinearRegression.RegressionSplineFunction(int((h + 0) * 60 / timespan) + M - 2, M, knots);
    # m1 = LinearRegression.LinearRegression().fit(X1, y1, [f1]);
    # sY1 = m1.predictValue(X1);
    # X1 = X1[:, :];
    # y1 = y1[:, :];
    # sY1 = sY1[:, :];
    # speed.extend(getSpeedM2(m1.beta, f1.knots, X1).A.flatten().tolist());

    if not os.path.isfile(f"{f}/speed_{j}_speed.npy"):
        totalCount = math.floor((data.shape[0] - 0) / (size * h));

        for i in range(0, totalCount):
            y2 = data[i * size * h - 0:(i + 1) * size * h + 0, j];
            X2 = np.mat(np.arange(y2.shape[0])).T;
            knots = findKnots3(y2.A.flatten());
            f2 = LinearRegression.RegressionSplineFunction(int((h + 0) * 60 / timespan) + M - 2, M, knots);
            m2 = LinearRegression.LinearRegression().fit(X2, y2, [f2]);
            sY2 = m2.predictValue(X2);
            X2 = X2[:, :];
            y2 = y2[:, :];
            sY2 = sY2[:, :];
            speed.extend(getSpeedM2(m2.beta, f2.knots, X2).A.flatten().tolist());

            # plt.figure(1, (12, 8));
            # # plt.get_current_fig_manager().window.showMaximized();
            # plt.subplot(111);
            # plt.title(f"{i}, {m2.r2}");
            # plt.plot(X2.A.flatten(), y2.A.flatten(), "-xk");
            # plt.plot(X2.A.flatten(), sY2.A.flatten(), "-or");
            # for x in f2.knots:
            #     plt.axvline(x, color = "b");
            # # plt.scatter(f1.knots, [y1.mean()] * len(f1.knots), marker="*", color = "b");
            # # plt.subplot(212);
            # # plt.title(str(i));
            # # plt.plot(X2.A.flatten(), y2.A.flatten(), "-x");
            # # plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red");
            # plt.show(block = True);
            # plt.savefig(f"/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/speed_images_history_YCH_LIC6206.PV/{i}.png");
            # print(f"{i}/{totalCount} saved.");
            # plt.close();

            # X1, y1, sY1, f1 = X2, y2, sY2, f2;

        print("speed history completed.");
        speed = np.array(speed);
        np.save(f"{f}/speed_{j}_speed.npy", speed);
    else:
        speed = np.load(f"{f}/speed_{j}_speed.npy");

    speedMean, speedStd = speed.mean(), speed.std();
    print(np.logical_or((speed - speedMean) / speedStd < -6, (speed - speedMean) / speedStd > 6).sum());

    plt.figure(1, (12, 8));
    plt.get_current_fig_manager().window.showMaximized();
    plt.hist(speed, bins = 1000);
    for x in [speedMean, speedMean - 6 * speedStd, speedMean + 6 * speedStd]:
        plt.axvline(x, color = "b");
    plt.show(block = True);
    plt.close();

    deltaValues = np.diff(data[:, j], 1, 0);
    deltaMean, deltaStd = deltaValues.mean(), deltaValues.std();
    print(np.logical_or((deltaValues - deltaMean) / deltaStd < -6, (deltaValues - deltaMean) / deltaStd > 6).sum());

    plt.figure(1, (12, 8));
    plt.get_current_fig_manager().window.showMaximized();
    plt.hist(deltaValues.A.flatten(), bins = 1000);
    for x in [deltaMean, deltaMean - 6 * deltaStd, deltaMean + 6 * deltaStd]:
        plt.axvline(x, color = "b");
    plt.show(block = True);
    plt.close();

    indices1 = np.argwhere(speed < (speedMean - 6 * speedStd))[:, 0].flatten().tolist() + np.argwhere(speed > (speedMean + 6 * speedStd))[:, 0].flatten().tolist();
    indices1.sort();
    # showAnomaly(indices1, j, size, data, marks);

    # h = 1;
    # startIndex, offset, values = size * h, int(12 * 60 / f), None;
    # if not os.path.isfile("{0}/speed_{1}_values.npy".format(f, j)):
    #     ftn = LinearRegression.RegressionSplineFunction(int(h * 60 / timespan) + M - 2, M);
    #     X = ftn.getX(np.mat(np.arange(size * h)).T);
    #     x = np.mat([size * h - 1 - offset]);
    #
    #     with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool:
    #         if M == 3:
    #             T = np.multiply(np.hstack(tuple([x - k for k in ftn.knots])), np.hstack(tuple([(x > k) - 0 for k in ftn.knots])));
    #
    #             # values = [calcSpeedM3(i, j, offset, size, h, data, X, x, T) for i in range(startIndex, size * 24 * 10)];
    #             # showDiff(speed[startIndex: startIndex + len(values)].A.flatten().tolist(), values, size * 6);
    #
    #             values = pool.starmap(calcSpeedM3, [(i, j, offset, size, h, data, X, x, T) for i in range(startIndex, data.shape[0] - offset)]);
    #         else:
    #             T = np.hstack(tuple([(x > k) - 0 for k in ftn.knots]));
    #
    #             # values = [calcSpeedM2(i, j, offset, size, h, data, X, T) for i in range(startIndex, size * 24 * 10)];
    #             # showDiff(speed[startIndex: startIndex + len(values)].A.flatten().tolist(), values, size * 6);
    #
    #             values = pool.starmap(calcSpeedM2, [(i, j, offset, size, h, data, X, T) for i in range(startIndex, data.shape[0] - offset)]);
    #     np.save("{0}/speed_{1}_values.npy".format(f, j), np.mat(values).T);
    #     print("realtime speed completed.");
    #
    # values = np.load(f"{f}/speed_{j}_values.npy");
    # valuesMean, valuesStd = values.mean(), values.std();
    # plt.figure(1, (12, 8));
    # plt.get_current_fig_manager().window.showMaximized();
    # plt.hist(values, bins = 1000);
    # plt.show(block = True);
    # plt.close();

    # indices2 = (np.argwhere(values < (speedMean - 6 * speedStd))[:, 0].flatten() + startIndex).tolist() + (np.argwhere(values > (speedMean + 6 * speedStd))[:, 0].flatten() + startIndex).tolist();
    # indices2.sort();
    # showAnomaly(indices2, j, size, data, marks);

    forest = None;
    if not os.path.isfile("{0}/speed_{1}_forest.npy".format(f, j)):
        dataSet = np.mat(speed).T;
        forest = IsolationForest(200, 2 ** 9, CurvesThresholdFinder(0.65, 0.68, 0.73, False));
        forest.fill(dataSet);
        print("forest fill completed");
        forest.train(dataSet);
        print("forest train completed");

        with open("{0}/speed_{1}_forest.npy".format(f, j), "wb") as file:
            pickle.dump(forest, file, protocol = pickle.DEFAULT_PROTOCOL);
    else:
        with open("{0}/speed_{1}_forest.npy".format(f, j), "rb") as file:
            forest = pickle.load(file);

    # scores = None;
    # if not os.path.isfile("{0}/speed_{1}_scores.npy".format(f, j)):
    #     with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool:
    #         scores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in values.A.flatten().tolist()]);
    #     np.save("{0}/speed_{1}_scores.npy".format(f, j), np.mat(scores).T);
    #     print("realtime score completed.");
    #
    # scores = np.mat(np.load("{0}/speed_{1}_scores.npy".format(f, j)));
    # plt.figure(1, (12, 8));
    # plt.get_current_fig_manager().window.showMaximized();
    # plt.hist(scores.A.flatten(), bins = 1000);
    # plt.show(block = True);
    # plt.close();

    scores = np.array(forest.scores);
    indices3 = np.argwhere(scores >= forest.threshold)[:, 0].flatten().tolist();
    indices3.sort();
    # showAnomaly(indices3, j, size, data, marks);

    # indices4 = (np.argwhere(values < (speedMean - 3 * speedStd))[:, 0].flatten()).tolist() + (np.argwhere(values > (speedMean + 3 * speedStd))[:, 0].flatten()).tolist();
    # indices4 = [i + startIndex for i in indices4 if values[i, 0] < speedMean - 6 * speedStd or values[i, 0] > speedMean + 6 * speedStd or scores[i] >= forest.threshold];
    # indices4.sort();
    # showAnomaly(indices4, j, size, data, marks);

    # deltaScores = None;
    # if not os.path.isfile("{0}/speed_{1}_delta_scores.npy".format(f, j)):
    #     with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool:
    #         deltaScores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in deltaValues.A.flatten().tolist()]);
    #     np.save("{0}/speed_{1}_delta_scores.npy".format(f, j), np.mat(deltaScores).T);
    #
    # deltaScores = np.mat(np.load("{0}/speed_{1}_delta_scores.npy".format(f, j)));

    # indices5 = [i + 1 for i in range(0, deltaValues.shape[0]) if deltaValues[i, 0] < deltaMean - 6 * deltaStd or deltaValues[i, 0] > deltaMean + 6 * deltaStd];
    indices5 = np.argwhere(deltaValues < (deltaMean - 6 * deltaStd))[:, 0].flatten().tolist() + np.argwhere(deltaValues > (deltaMean + 6 * deltaStd))[:, 0].flatten().tolist();
    indices5 = [i + 1 for i in indices5];
    indices5.sort();
    # showAnomaly(indices5, j, size, data, marks);
    # showAnomaly2(indices4, indices5, j, size, data, marks);

    print("speed {0} completed".format(j));
Example #34
0
wrongIn = 0.0
runs = 1000
d = 100
aveW = np.zeros((6))

for i in range(0,runs):
    print "Running test # " + str(i)
    
    x = np.insert(np.random.random((d,2)) * 2 - 1,0,np.ones((1,d)), axis=1)
    x = np.append(x, x[:,1:2] * x[:,2:3], axis=1)
    x = np.append(x, np.square(x[:,1:3]), axis=1)
    truth = np.sign(np.square(x[:,1]) + np.square(x[:,2]) - .6)
    noise = np.append(np.ones((d*.9)), np.ones((d - d*.9)) * -1, axis=0)
    np.random.shuffle(noise)
    truth *= noise
    i,o,w = lr.runLR(x, truth, show=False)
    
    wrongIn += i
    aveW = aveW + w

wrongIn = 0.0    
for i in range(0,runs):
    print "Running test # " + str(i)
    
    x = np.insert(np.random.random((d,2)) * 2 - 1,0,np.ones((1,d)), axis=1)
    x = np.append(x, x[:,1:2] * x[:,2:3], axis=1)
    x = np.append(x, np.square(x[:,1:3]), axis=1)
    truth = np.sign(np.square(x[:,1]) + np.square(x[:,2]) - .6)
    noise = np.append(np.ones((d*.9)), np.ones((d - d*.9)) * -1, axis=0)
    np.random.shuffle(noise)
    truth *= noise
Example #35
0
def calcSpeedM3(i, j, offset, size, h, data, X, x, T):
    y = data[i + 1 + offset - size * h:i + 1 + offset, j];
    m = LinearRegression.LinearRegression().fit(X, y);
    return getSpeedM3Internal(m.beta, x, T)[0, 0];
Example #36
0
def detectAmplitude(j, f):
    print("amplitude {0} started".format(j));

    M = 3;
    h = 24;
    size, sY = int(3600 / f),  [];
    data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/data.npy".format(f)));
    marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/marks.npy".format(f)));
    # y1 = data[:size * (h + 1), j];
    # X1 = np.mat(np.arange(y1.shape[0])).T;
    # m1 = LinearRegression.LinearRegression().fit(X1, y1, [LinearRegression.RegressionSplineFunction((h + 1) + M - 2, M)]);
    # sY1 = m1.predictValue(X1);
    # X1 = X1[:-size, :];
    # y1 = y1[:-size, :];
    # sY1 = sY1[:-size, :];
    # sY.extend(sY1.A.flatten().tolist());

    if not os.path.isfile(f"{f}/amplitude_{j}_amplitude.npy"):
        totalCount = math.floor((data.shape[0] - 0) / (size * h));

        for i in range(0, totalCount):
            y2 = data[i * size * h - 0:(i + 1) * size * h + 0, j];
            X2 = np.mat(np.arange(y2.shape[0])).T;
            m2 = LinearRegression.LinearRegression().fit(X2, y2, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]);
            sY2 = m2.predictValue(X2);
            X2 = X2[:, :];
            y2 = y2[:, :];
            sY2 = sY2[:, :];
            sY.extend(sY2.A.flatten().tolist());

            # plt.figure(1, (12, 8));
            # # plt.get_current_fig_manager().window.showMaximized();
            # plt.subplot(111);
            # plt.title(f"{i}, {m2.r2}");
            # plt.plot(X2.A.flatten(), y2.A.flatten(), "-xk");
            # plt.plot(X2.A.flatten(), sY2.A.flatten(), "-or");
            # for x in f2.knots:
            #     plt.axvline(x, color = "b");
            # # plt.scatter(f1.knots, [y1.mean()] * len(f1.knots), marker="*", color = "b");
            # # plt.subplot(212);
            # # plt.title(str(i));
            # # plt.plot(X2.A.flatten(), y2.A.flatten(), "-x");
            # # plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red");
            # plt.show(block = True);
            # plt.savefig(f"/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/amplitude_images_history_YCH_FI6221.PV/{i}.png");
            # print(f"{i}/{totalCount} saved.");
            # plt.close();

            # X1, y1, sY1, f1 = X2, y2, sY2, f2;

        print("amplitude history completed.");
        amplitude = data[: len(sY), j].A.flatten() - np.array(sY);
        np.save(f"{f}/amplitude_{j}_amplitude.npy", amplitude);
    else:
        amplitude = np.load(f"{f}/amplitude_{j}_amplitude.npy");

    amplitudeMean, amplitudeStd = amplitude.mean(), amplitude.std();
    print(DataHelper.testNormalDistribution(amplitude));
    # plt.figure(1, (12, 8));
    # plt.get_current_fig_manager().window.showMaximized();
    # plt.hist(amplitude, bins = 1000);
    # plt.show(block = True);
    # plt.close();

    indices1 = np.argwhere(amplitude < (amplitudeMean - 6 * amplitudeStd))[:, 0].flatten().tolist() + np.argwhere(amplitude > (amplitudeMean + 6 * amplitudeStd))[:, 0].flatten().tolist();
    indices1.sort();
    showAnomaly(indices1, j, size, data, marks);

    h, m = 24, 12; # 24 hours, 12 minutes
    startIndex, offset, values = size * h, int(m * 60 / f), None;
    if not os.path.isfile(f"{f}/amplitude_{j}_values.npy"):
        with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool:
            values = pool.starmap(calcAmplitude, [(i, j, offset, size, h, M, data) for i in range(startIndex, data.shape[0] - offset)]);
        np.save("{0}/amplitude_{1}_values.npy".format(f, j), np.array(values));
    else:
        values = np.load(f"{f}/amplitude_{j}_values.npy");

    # plt.figure(1, (12, 8));
    # plt.get_current_fig_manager().window.showMaximized();
    # plt.hist(values, bins = 1000);
    # plt.show(block = True);
    # plt.close();

    indices2 = (np.argwhere(values < (amplitudeMean - 6 * amplitudeStd))[:, 0] + startIndex).tolist() + (np.argwhere(values > (amplitudeMean + 6 * amplitudeStd))[:, 0] + startIndex).tolist();
    indices2.sort();
    showAnomaly(indices2, j, size, data, marks);

    # forest = None;
    # if not os.path.isfile("{0}/amplitude_{1}_forest.npy".format(f, j)):
    #     forest = IsolationForest(200, 2 ** 9, CurvesThresholdFinder(0.65, 0.68, 0.73, False));
    #     forest.fill(amplitude);
    #     print("forest fill completed");
    #     forest.train(amplitude);
    #     print("forest train completed");
    #
    #     with open("{0}/amplitude_{1}_forest.npy".format(f, j), "wb") as file:
    #         pickle.dump(forest, file, protocol = pickle.DEFAULT_PROTOCOL);
    # else:
    #     with open("{0}/amplitude_{1}_forest.npy".format(f, j), "rb") as file:
    #         forest = pickle.load(file);
    #
    # scores = None;
    # if not os.path.isfile("{0}/amplitude_{1}_scores.npy".format(f, j)):
    #     with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool:
    #         scores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in values.A.flatten().tolist()]);
    #     np.save("{0}/amplitude_{1}_scores.npy".format(f, j), np.mat(scores).T);
    #
    # scores = np.mat(np.load("{0}/amplitude_{1}_scores.npy".format(f, j)));
    # plt.figure(1, (12, 8));
    # plt.get_current_fig_manager().window.maximize();
    # plt.hist(scores.A.flatten(), bins = 1000);
    # plt.show(block = True);
    # plt.close();

    # indices3 = (np.argwhere(scores >= forest.threshold)[:, 0].flatten() + startIndex).tolist();
    # indices3.sort();
    # showAnomaly(indices3, j, size, data, marks);

    # indices4 = (np.argwhere(values < (amplitudeMean - 3 * amplitudeStd))[:, 0].flatten()).tolist() + (np.argwhere(values > (amplitudeMean + 3 * amplitudeStd))[:, 0].flatten()).tolist();
    # indices4 = [i + startIndex for i in indices4 if values[i, 0] < amplitudeMean - 6 * amplitudeStd or values[i, 0] > amplitudeMean + 6 * amplitudeStd or scores[i] >= forest.threshold];
    # indices4.sort();
    # showAnomaly(indices4, j, size, data, marks);

    print("amplitude {0} completed".format(j));
Example #37
0
import numpy as np
import LinearRegression as lr
from utils import points2weights

wrongIn = 0.0
runs = 1000
d = 100

for i in range(0,runs):
    print "Running test # " + str(i)
    
    f = points2weights(np.random.random((2,2)) * 2 - 1)
    x = np.insert(np.random.random((d,2)) * 2 - 1,0,np.ones((1,d)), axis=1)
    truth = np.sign(np.dot(x,f))
    i,o,w = lr.runLR(x, truth)
    
    wrongIn += i
print "Average of " + str(wrongIn/runs) + " wrong in sample per run"
fractionWrong = (wrongIn/runs)/d
print "%f incorrect on average in sample"%(fractionWrong)

a = abs(fractionWrong - 0)
b = abs(fractionWrong - 0.001)
c = abs(fractionWrong - 0.01)
d = abs(fractionWrong - 0.1)

if min([a,b,c,d]) == a :
    print "Answer is A"
elif min([a,b,c,d]) == b :
    print "Answer is B"
Example #38
0
 def type_error(self):
     with self.assertRaises(TypeError):                                      #checks if the argument is a string
         LinearRegression.MyFun('b',5)
Example #39
0
from utils import points2weights

wrongOut = 0.0
runs = 1000
d = 100
D = 1000
for i in range(0,runs):
    print "Running test # " + str(i)
    
    f = points2weights(np.random.random((2,2)) * 2 - 1)
    x = np.insert(np.random.random((d,2)) * 2 - 1,0,np.ones((1,d)), axis=1)    
    truth = np.sign(np.dot(x,f))
    X = np.insert(np.random.random((D,2)) * 2 - 1,0,np.ones((1,D)),axis=1)
    truthX = np.sign(np.dot(X,f))    
    
    i,o,w = lr.runLR(x, truth, X=X, truthX=truthX)
    wrongOut += o
print "Average of " + str(wrongOut/runs) + " wrong out of sample per run"
print "%f incorrect on average out of sample"%((wrongOut/runs/D))
fractionWrong = (wrongOut/runs)/D
print "%f incorrect on average out of sample"%(fractionWrong)

a = abs(fractionWrong - 0)
b = abs(fractionWrong - 0.001)
c = abs(fractionWrong - 0.01)
d = abs(fractionWrong - 0.1)

if min([a,b,c,d]) == a :
    print "Answer is A"
elif min([a,b,c,d]) == b :
    print "Answer is B"
Example #40
0
def main():
    loaddata()

    feature_sqft_living = np.array(train_data['sqft_living'])
    feature_bedrooms = np.array(train_data['bedrooms'])
    outputs = np.array(train_data['price'])

    # Model 1 features
    Model1_features = ['sqft_living']
    Model1_output = ['price']

    feature_matrix1, output_vector1 = LR.extract_data_from_features(
        train_data, Model1_features, Model1_output)

    feature_matrix1_targets, output_vector1_targets = LR.extract_data_from_features(
        test_data, Model1_features, Model1_output)

    step_size1 = 7.0e-12
    tolerance1 = 2.5e7
    # you get set your init weights for this question, but it will take some time to train
    init_weights1 = np.array([-47000.0, 1.0]).reshape((2, 1))

    # Model 1 training
    Model1_weights = LR.regression_gradient_descent(feature_matrix1,
                                                    output_vector1,
                                                    init_weights1, step_size1,
                                                    tolerance1)

    test1_predictions = LR.predict(feature_matrix1_targets, Model1_weights)
    print "The first house prediction price of test data", test1_predictions[0]
    # Model 1 RSS
    RSS1 = LR.get_residual_sum_of_squares(feature_matrix1_targets,
                                          Model1_weights,
                                          output_vector1_targets)
    print "RSS of model 1 ", RSS1

    # Model 2 features

    Model2_features = ['sqft_living', 'sqft_living15']
    Model2_output = ['price']
    # extract matrix from training data correspond to features and output
    feature_matrix2, output_vector2 = LR.extract_data_from_features(
        train_data, Model2_features, Model2_output)

    feature_matrix2_targets, output_vector2_targets = LR.extract_data_from_features(
        test_data, Model2_features, Model2_output)

    # Set parameters
    step_size2 = 4.0e-12
    tolerance2 = 1.0e9
    init_weights2 = np.array([-100000.0, 1.0, 1.0]).reshape((3, 1))

    # Model 2 training
    Model2_weights = LR.regression_gradient_descent(feature_matrix2,
                                                    output_vector2,
                                                    init_weights2, step_size2,
                                                    tolerance2)

    test2_predictions = LR.predict(feature_matrix2_targets, Model2_weights)
    print "The first house prediction price of test data", test2_predictions[0]
    RSS2 = LR.get_residual_sum_of_squares(feature_matrix2_targets,
                                          Model2_weights,
                                          output_vector2_targets)
    print "RSS of model 2 ", RSS2
Example #41
0
def calcAmplitude(i, j, offset, size, h, M, data):
    X = np.mat(np.arange(size * h)).T;
    y = data[i + 1 + offset - size * h: i + 1 + offset, j];
    return data[i, j] - LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]).predictValue(np.mat([size * h - 1 - offset]))[0, 0];