def testSpeed(): startIndex, endIndex = 93, 118; data = []; y = np.mat(data).T; h, timespan, M = 1, 6, 2; X = np.mat(np.arange(y.shape[0])).T; f = LinearRegression.RegressionSplineFunction(int(h * 60 / timespan) + M - 2, M); m = LinearRegression.LinearRegression().fit(X, y, [f]); yHeat = m.predictValue(X); # speed1 = getSpeedM3(m.beta, f.knots, X[startIndex: endIndex, :]); # print(speed1.A.flatten().tolist()); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.maximize(); plt.subplot(211); plt.plot(X.A.flatten(), y.A.flatten(), "-xb"); plt.plot(X.A.flatten(), yHeat.A.flatten(), "-r"); plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "or"); plt.subplot(212); plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "-xb"); plt.plot(X[startIndex: endIndex, :].A.flatten(), yHeat[startIndex: endIndex, :].A.flatten(), "-r"); plt.show(block=True); plt.close();
def update_figure_polar(value_main_tab, value_analysis, gene_name): if gene_name is None: raise Exception() else: if value_main_tab == 'main-tab-2' and value_analysis == 'temporal': array_gene_time = np.concatenate( (gene_data[gene_name]['rep1'], gene_data[gene_name]['rep2'], gene_data[gene_name]['rep3'][:, [0, 2]]), axis=1) l_time_reg = [] for x in range(8): l_time_reg.append( LinearRegression.make_time_regression( array_gene_time[x, :], simple=False, predict=True)) l_time_reg_simple = [] for x in range(8): l_time_reg_simple.append( LinearRegression.make_time_regression( array_gene_time[x, :], simple=True, predict=False)) figure_polar = Figures.compute_figure_polar_tab_3(l_time_reg) figure_mean = Figures.compute_figure_mean_tab_3( l_time_reg) #, yaxis_type, yaxis_scale) return figure_polar, figure_mean else: raise PreventUpdate
def main(): data_set = load_boston() train_data, train_target, test_data, test_target = LR.split_data(data_set) num_features = train_data.shape[1] new_train_data = train_data.copy() new_test_data = test_data.copy() for i in range(num_features): for j in range(i, num_features): new_column = train_data[:, i] * train_data[:, j] new_column = new_column.reshape(new_column.shape[0], 1) new_train_data = np.append(new_train_data, new_column, 1) new_test_column = test_data[:, i] * test_data[:, j] new_test_column = new_test_column.reshape(new_test_column.shape[0], 1) new_test_data = np.append(new_test_data, new_test_column, 1) lr = LR.LinearRegression() lr.fit(new_train_data, train_target) mse_test = lr.mse(new_test_data, test_target) mse_train = lr.mse(new_train_data, train_target) print "\nSol. 3.4" print "Linear Regression" print "{:^15}|{:^15}".format("Input Data", "MSE") print "-"*30 print "{:^15}|{:^15.7}".format("test_data", mse_test) print "{:^15}|{:^15.7}".format("train_data", mse_train) print "\n"
def main(): data_set = load_boston() train_data, train_target, test_data, test_target = LR.split_data(data_set) min_MSE = sys.maxint min_combo = None calculated_combos = [] for combo in set(permutations(range(train_data.shape[1]), 4)): if sorted(combo) not in calculated_combos: calculated_combos.append(sorted(combo)) lr = LR.LinearRegression() lr.fit(train_data[:, sorted(combo)], train_target) MSE = lr.mse(test_data[:, sorted(combo)], test_target) if min_MSE > MSE: min_MSE = MSE min_combo = combo print "Brute Force" print "Best Combination : [{}], by 1-index: {} with MSE = {:.7}".format( ", ".join([data_set.feature_names[x] for x in min_combo]), [x + 1 for x in min_combo], min_MSE)
def detectChange(j, f): print("change {0} started".format(j)); M = 2; h = 12; timespan = 6; size, speed = int(3600 / f), []; data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/data.npy".format(f))); marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/marks.npy".format(f))); y1 = data[:size * (h + 1), j]; X1 = np.mat(np.arange(y1.shape[0])).T; f1 = LinearRegression.RegressionSplineFunction(int((h + 1) * 60 / timespan) + M - 2, M); m1 = LinearRegression.LinearRegression().fit(X1, y1, [f1]); sY1 = m1.predictValue(X1); X1 = X1[:-size, :]; y1 = y1[:-size, :]; sY1 = sY1[:-size, :]; if M == 3: speed.extend(getSpeedM3(m1.beta, f1.knots, X1).A.flatten().tolist()); else: speed.extend(getSpeedM2(m1.beta, f1.knots, X1).A.flatten().tolist()); for i in range(1, math.floor((data.shape[0] - size) / (size * h))): y2 = data[i * size * h - size:(i + 1) * size * h + size, j]; X2 = np.mat(np.arange(y2.shape[0])).T; f2 = LinearRegression.RegressionSplineFunction(int((h + 2) * 60 / timespan) + M - 2, M); m2 = LinearRegression.LinearRegression().fit(X2, y2, [f2]); sY2 = m2.predictValue(X2); X2 = X2[size:-size, :]; y2 = y2[size:-size, :]; sY2 = sY2[size:-size, :]; if M == 3: speed.extend(getSpeedM3(m2.beta, f2.knots, X2).A.flatten().tolist()); else: speed.extend(getSpeedM2(m2.beta, f2.knots, X2).A.flatten().tolist()); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.maximize(); plt.subplot(211); plt.title(str(i - 1)); plt.plot(X1.A.flatten(), y1.A.flatten(), "-x"); plt.plot(X1.A.flatten(), sY1.A.flatten(), color = "red"); plt.subplot(212); plt.title(str(i)); plt.plot(X2.A.flatten(), y2.A.flatten(), "-x"); plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red"); plt.show(block = True); plt.close(); X1, y1, sY1 = X2, y2, sY2; print("change history completed."); speed = np.mat(speed).T; speedMean, speedStd = speed.mean(), speed.std(); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.maximize(); plt.hist(speed.A.flatten(), bins = 1000); plt.show(block = True); plt.close();
def getLinearFactors(self, node): """ get linear factor for Q_h and Q_a, set node.f_linear=[[Q_h factors], [Q_a factors]] :param node: the leaf node which all instances under it are used to generate linear q_value model :return: """ train_X = [instance.currentObs for instance in node.instances] train_Y = [instance.qValue for instance in node.instances] l_rate = 0.0001 n_epochs = 1000 count = 0 max_diff = 10000 tot = None if node.f_linear is not None: tot = np.transpose(node.f_linear) W = np.delete(tot, self.n_dim, 0) b = np.array([tot[self.n_dim]]) count += 1 elif node.parent and node.parent.f_linear is not None: tot = np.transpose(node.parent.f_linear) W = np.delete(tot, self.n_dim, 0) b = np.array([tot[self.n_dim]]) while count < TRIES: if tot is not None: with tf.Session() as sess: LR = lr.LinearRegression(training_epochs=int(n_epochs / 10**count), learning_rate=l_rate / 10**count) LR.read_weights(weights=W, bias=b) LR.linear_regression_model() temp_diff, temp_W, temp_b = LR.gradient_descent( sess=sess, train_X=train_X, train_Y=train_Y) else: with tf.Session() as sess: LR = lr.LinearRegression(training_epochs=n_epochs, learning_rate=l_rate) LR.read_weights() LR.linear_regression_model() temp_diff, temp_W, temp_b = LR.gradient_descent( sess=sess, train_X=train_X, train_Y=train_Y) if temp_diff < max_diff: W = temp_W b = temp_b max_diff = temp_diff count += 1 node.f_linear = np.concatenate((np.transpose(W), np.transpose(b)), axis=1) print("finish linear, node: " + str(node.idx))
def getRandomData(mcount): seed(1) inputs = np.matrix([ [gauss(0, 1) for i in range(1, mcount + 1)], [gauss(0, 1) for i in range(1, mcount + 1)]]) outputs = LinearRegression.addOneRow(inputs).T * np.matrix([[5], [3], [4]]) + gauss(0, 0.1) # weights = [5, 3, 4] return inputs, outputs
def cross_validator(k, train_data, feature_names, classifier): for index, item in enumerate(train_data): item.append(feature_names[index]) random.shuffle(train_data) k_splits = np.array_split(train_data, k) feature_splits = [[in_item[-1] for in_item in item]for item in k_splits] all_accuracy = 0 for k in range(0,k): print ("For %s fold" %(int(k)+1)) trainX = [] trainY = [] testX = k_splits[k] testY = feature_splits[k] trainX_temp = k_splits[:k] + k_splits[(k + 1):] trainY_temp = feature_splits[:k] + feature_splits[(k + 1):] for x in range(len(trainX_temp)): trainX.extend(trainX_temp[x]) trainY.extend(trainY_temp[x]) if classifier == 1: accuracy = (kn_classifier.knn_driver(trainX, testX, 4)) elif classifier == 2: accuracy = (centroid_classifier.predict(trainX, trainY, testX, testY, 4)) elif classifier == 3: matrix, accuracy = (LinearRegression.predict(trainX, trainY, testX, testY)) print (abs(accuracy)) all_accuracy += accuracy k_accuracy = float(all_accuracy)/5 return abs(k_accuracy)
def test1(): X, y = LR.make_data() image = scatter2image(X, y) accumulator, thetas, rhos = transform.hough_line( image) # hough_line(image) show_transform(accumulator, 'hough_transform') show_line(image, accumulator, thetas, rhos, 50, 'hough_line')
def train(xs, ys, n): w0 = 0.5 w1 = 0.5 ldw0 = 0.00001 ldw1 = 0.0001 pp = pprint.PrettyPrinter(indent=0) d = [] for i in range(n): def h(x): return w0 + w1 * x def pdw0(xi, yi): return h(xi) - yi def pdw1(xi, yi): return (h(xi) - yi) * xi j = LinearRegression.cost(xs, ys, h) dw0 = LinearRegression.partial_derivative(xs, ys, pdw0) dw1 = LinearRegression.partial_derivative(xs, ys, pdw1) d.append([i, j, w0, dw0, ldw0, ldw0 * dw0, w1, dw1, ldw1, ldw1 * dw1]) w0 = w0 - (ldw0 * dw0) w1 = w1 - (ldw1 * dw1) previous_dw0 = dw0 previous_dw1 = dw1 add_hypothensies_trace(xs, w0, w1, 'h' + str(i)) print( tabulate(d, headers=[ '#', 'J', 'w0', 'dw0', 'lw0', 'lw0 * dw0', 'w1', 'dw1', 'lw1', 'lw0 * dw0' ])) return (w0, w1)
def testOne(self): X = np.array([[3], [4], [5]]) expected = np.array([[-1.], [ 0.], [ 1.]]) np.testing.assert_almost_equal(LR.featureNormalize(X), expected)
def data_handler(): index_list = list() train_list = list() test_list = list() train_num, test_num = data_partitions[0], data_partitions[1] print(train_num, test_num) data = pd.read_csv(filename, delimiter=',', dtype=None, header=None) # We should Create numpy array for manipulation numpy_data = np.array(data) labels = np.array(data.head(1)) for data_class in classes: # index_list is a list of numpy array int64 type index_list.append(np.where(labels == data_class)[1]) for one_class in index_list: train_list.extend(one_class[0:train_num]) test_list.extend(one_class[train_num:]) print(train_list) print(test_list) train = np.array(numpy_data[:, train_list]) test = np.array(numpy_data[:, test_list]) print(train.shape, test.shape) np.savetxt(train_filename, train, delimiter=',', fmt='%i') np.savetxt(test_filename, test, delimiter=',', fmt='%i') # Call programs # Knn print("KNN classifier") obj = KnnClassification.KnnClassification(10, train_filename, test_filename) obj.train() # Centroid method print("Centroid classifier") obj = CentroidMethod.CentroidMethod(train_filename, test_filename) obj.pre_process() obj.train() # Linear Regression print("Linear regression") obj = LinearRegression.LinearRegression(train_filename, test_filename) obj.compute_coefficients() # SVM print("SVM classifier") obj = Svm.Svm(train_filename, test_filename) obj.train()
def checkCostFunc(): np.random.seed(2) m = 10 y = np.random.rand(m, ) y_predicted = np.random.rand(m, ) calculatedCost = lm.costFunc(m, y, y_predicted) realCost = 0.075000505675425072 if calculatedCost == realCost: print("PASSED : CostFunc Function") else: print("FAILED : CostFunc Function")
def main(): data, heights = import_and_scale_training_data(sys.argv[1]) of = open_output(sys.argv[2]) for iterations, alpha in [(100, 0.001), (100, 0.005), (100, 0.01), (100, 0.05), (100, 0.1), (100, 0.5), (100, 1), (100, 5), (100, 10), (1000, 0.0005)]: lr = LinearRegression.LinearRegressor(iterations=iterations, alpha=alpha, of=of) lr.fit(data, heights) of.close()
def checkPredict(): np.random.seed(6) theta = np.random.rand(7, ) X = np.random.rand(10, 7) calcX = lm.predict(X, theta) realX = np.array([ 1.7089558, 2.20884418, 2.18216447, 1.80692415, 2.12231727, 1.41312956, 1.82242337, 2.11752865, 1.70792641, 0.8332109 ]) if np.all(np.isclose(calcX, realX)): print("PASSED : predict Function") else: print("FAILED : predict Function")
def testOne(self): X = np.array([[1, 2], [1, 3], [1, 4], [1, 5]]) y = np.array([[ 7.], [ 6.], [ 5.], [ 4.]]); theta = np.array([[0.1], [0.2]]) expected = 11.9450 np.testing.assert_almost_equal(LR.computeCost(X, y, theta), expected)
def testTwo(self): X = np.array([[1, 2, 3], [1, 3, 4], [1, 4, 5], [1, 5, 6]]) y = np.array([[ 7.], [ 6.], [ 5.], [ 4.]]) theta = np.array([[0.1], [0.2], [0.3]]) expected = 7.0175; np.testing.assert_almost_equal(LR.computeCost(X, y, theta), expected)
def test_analyze_linreg(self): X, y = gen_regression_data() solver = linReg.LinearRegressionSolver() with CapturedStdout(): analyzerResults = analyze( solver, X, y, optimizationParams={ "nnTopology": "", "Lambda": 0.1, "functions": [lambda x: x[0]**2, lambda x: x[1]**2, lambda x: x[2]**2] }, iterations=40, bins=3, tries=4, sample_iterations=40) npt.assert_equal(analyzerResults.sampleCountAnalyzis.sampleCount, [ 1, 4, 7, 10, 19, 28, 54, 80, 159, 238, 475, 712, 1423, 2134, 4267, 6400 ]) npt.assert_almost_equal( analyzerResults.sampleCountAnalyzis.errorTrain, [ 0.00000000e+00, 2.33847837e+01, 3.00222512e+01, 7.96243961e+00, 1.37056787e+00, 5.14152946e-01, 1.26362331e-01, 6.45051925e-02, 1.46205243e-02, 7.25704802e-03, 1.67555343e-03, 7.51420424e-04, 1.82472270e-04, 8.16512666e-05, 2.04713359e-05, 9.08089772e-06 ], 5) npt.assert_almost_equal(analyzerResults.sampleCountAnalyzis.errorCV, [ 4.99885725e+04, 1.75878006e+04, 4.13273401e+03, 4.01030109e+01, 3.82169238e+00, 6.81074172e-01, 1.43006176e-01, 7.01556101e-02, 1.44597141e-02, 8.57240292e-03, 1.67551231e-03, 7.42103605e-04, 1.73911141e-04, 7.83993282e-05, 1.98093851e-05, 8.65960541e-06 ], 3) npt.assert_equal(analyzerResults.iterationCountAnalyzis.iterationCount, [2, 4, 6, 10, 14, 27, 40]) npt.assert_almost_equal( analyzerResults.iterationCountAnalyzis.errorTrain, [ 2.18325422e-01, 1.04552670e-05, 2.27461723e-06, 2.27461723e-06, 2.27461723e-06, 2.27461723e-06, 2.27461723e-06 ], 5) npt.assert_almost_equal( analyzerResults.iterationCountAnalyzis.errorCV, [ 2.14481838e-01, 1.01920583e-05, 2.16874107e-06, 2.16874107e-06, 2.16874107e-06, 2.16874107e-06, 2.16874107e-06 ], 5)
def testAmplitude(): startIndex, endIndex = 2855, 2880; data = []; y = np.mat(data).T; h, M = 24, 3; X = np.mat(np.arange(y.shape[0])).T; # m = LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]); m = LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(int(h * 60 / 60) + M - 2, M)]); yHeat = m.predictValue(X); amplitude = y[startIndex: endIndex, :] - yHeat[startIndex: endIndex, :]; print(amplitude.A.flatten().tolist()); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.maximize(); plt.subplot(211); plt.plot(X.A.flatten(), y.A.flatten(), "-xb"); plt.plot(X.A.flatten(), yHeat.A.flatten(), "-r"); plt.subplot(212); plt.plot(X[startIndex: endIndex, :].A.flatten(), y[startIndex: endIndex, :].A.flatten(), "-xb"); plt.plot(X[startIndex: endIndex, :].A.flatten(), yHeat[startIndex: endIndex, :].A.flatten(), "-r"); plt.show(block=True); plt.close();
def crossValidation(xArr,yArr,numVal=10): ''' 交叉验证测试岭回归 ''' m=len(yArr) indexList=range(m) errorMat=zeros((numVal,30)) #交叉验证循环 for i in range(numVal): #随机拆分数据 trainX=[];trainY=[] testX=[];testY=[] #对数据混洗操作 random.shuffle(indexList) for j in range(m): if j<m*0.9: trainX.append(xArr[indexList[j]]) trainY.append(yArr[indexList[j]]) else: testX.append(xArr[indexList[j]]) testY.append(yArr[indexList[j]]) #回归系数矩阵 wMat=LR.ridgeTest(trainX,trainY) #循环遍历矩阵中的30组回归系数 for k in range(30): matTestX=mat(testX) matTrainX=mat(trainX) #数据标准化 meanTrain=mean(matTrainX,0) varTrain=var(matTrainX,0) matTestX=(matTestX-meanTrain)/varTrain #测试回归效果 yEst=matTestX*mat(wMat[k,:]).T+mean(trainY) #计算误差 errorMat[i,k]=((yEst.T.A-array(testY))**2).sum() #计算误差估计值的均值 meanErrors=mean(errorMat,0) minMean=float(min(meanErrors)) bestWeights=wMat[nonzero(meanErrors==minMean)] #数据还原 xMat=mat(xArr) yMat=mat(yArr).T meanX=mean(xMat,0) varX=var(xMat,0) unReg=bestWeights/varX print("teh best model from ridge regression is:\n",unReg) print("with constant term: ",-1*sum(multiply(meanX,unReg))+mean(yMat))
def checkAppendIntercept(): np.random.seed(1) X = np.random.rand(10, 5) calc_X = lm.appendIntercept(X) real_X = np.array([[ 1.00000000e+00, 4.17022005e-01, 7.20324493e-01, 1.14374817e-04, 3.02332573e-01, 1.46755891e-01 ], [ 1.00000000e+00, 9.23385948e-02, 1.86260211e-01, 3.45560727e-01, 3.96767474e-01, 5.38816734e-01 ], [ 1.00000000e+00, 4.19194514e-01, 6.85219500e-01, 2.04452250e-01, 8.78117436e-01, 2.73875932e-02 ], [ 1.00000000e+00, 6.70467510e-01, 4.17304802e-01, 5.58689828e-01, 1.40386939e-01, 1.98101489e-01 ], [ 1.00000000e+00, 8.00744569e-01, 9.68261576e-01, 3.13424178e-01, 6.92322616e-01, 8.76389152e-01 ], [ 1.00000000e+00, 8.94606664e-01, 8.50442114e-02, 3.90547832e-02, 1.69830420e-01, 8.78142503e-01 ], [ 1.00000000e+00, 9.83468338e-02, 4.21107625e-01, 9.57889530e-01, 5.33165285e-01, 6.91877114e-01 ], [ 1.00000000e+00, 3.15515631e-01, 6.86500928e-01, 8.34625672e-01, 1.82882773e-02, 7.50144315e-01 ], [ 1.00000000e+00, 9.88861089e-01, 7.48165654e-01, 2.80443992e-01, 7.89279328e-01, 1.03226007e-01 ], [ 1.00000000e+00, 4.47893526e-01, 9.08595503e-01, 2.93614148e-01, 2.87775339e-01, 1.30028572e-01 ]]) if np.all(np.isclose(real_X, calc_X)): print("PASSED : appendIntercept Function") else: print("FAILED : appendIntercept Function")
def checkMakeGradientUpdate(): np.random.seed(4) theta = np.random.rand(20, ) grads = np.random.rand(20, ) calcUpdate = lm.makeGradientUpdate(theta, grads) realUpdate = np.array([ 0.96702984, 0.54723225, 0.97268436, 0.71481599, 0.69772882, 0.2160895, 0.97627445, 0.00623026, 0.25298236, 0.43479153, 0.77938292, 0.19768507, 0.86299324, 0.98340068, 0.16384224, 0.59733394, 0.0089861, 0.38657128, 0.04416006, 0.95665297 ]) if calcUpdate is not None and np.all(np.isclose(calcUpdate, realUpdate)): print("PASSED : makeGradientUpdate Function") else: print("FAILED : makeGradientUpdate Function")
def checkCalcGradients(): np.random.seed(3) m = 10 x = np.random.rand(m, 20) y = np.random.rand(m, ) y_p = np.random.rand(m, ) calcGrad = lm.calcGradients(x, y, y_p, m) realGrad = np.array([ -0.05425541, -0.04381124, -0.05959325, -0.03675508, -0.01118115, -0.05390415, -0.09321702, -0.01038522, -0.00185729, -0.04773877, -0.03408592, 0.00746619, 0.00090633, -0.01870412, -0.00821488, -0.01664091, -0.11836125, -0.03610672, -0.08967235, -0.02161973 ]) if np.all(np.isclose(calcGrad, realGrad)): print("PASSED : calcGradients Function") else: print("FAILED : calcGradients Function")
def isConstant(y, periods, alpha): if y.var() == 0: return True; p1 = [DataHelper.testWhiteNoise(y - y.mean(), m) for m in periods]; if np.any(np.mat(p1) <= alpha): return False; p2 = LinearRegression.LinearRegression().fit(np.mat(range(0, y.shape[0])).T, y).betaP; if p2[1, 0] <= alpha: return False; p3 = DataHelper.testRunsLeft((y > np.quantile(y, 0.5)) - 0); if p3 <= alpha: return False; print("{0}, {1}, {2}".format(p1, p2.T, p3)); return True;
def testOne(self): X = np.array([[1, 5], [1, 2], [1, 4], [1, 5]]) y = np.array([[1], [6], [4], [2]]) theta = np.array([[0], [0]]) alpha = 0.01; numOfIter = 1000; expectedTheta = np.array([[ 5.2148], [-0.5733]]) # expectedJHist[0] = 0.85426; [actualTheta, actualJHist] = LR.gradientDescent(X, y, theta, alpha, numOfIter) np.testing.assert_almost_equal(actualTheta, expectedTheta, decimal=4);
def checkTrain(): np.random.seed(5) theta = np.random.rand(5, ) X = np.random.rand(10, 5) y = np.random.rand(10, ) model = {} calcModel = lm.train(theta, X, y, model) calcModel['J'] = calcModel['J'][:50] realModel = {} realModel['J'] = [ 0.23849093475226227, 0.23849093474760394, 0.23849093474294566, 0.23849093473828731, 0.23849093473362895, 0.23849093472897059, 0.23849093472431226, 0.23849093471965394, 0.23849093471499558, 0.23849093471033728, 0.23849093470567886, 0.23849093470102059, 0.23849093469636218, 0.23849093469170385, 0.23849093468704555, 0.23849093468238722, 0.23849093467772886, 0.2384909346730705, 0.23849093466841217, 0.23849093466375382, 0.23849093465909549, 0.23849093465443719, 0.23849093464977877, 0.23849093464512042, 0.23849093464046217, 0.23849093463580379, 0.2384909346311454, 0.2384909346264871, 0.23849093462182877, 0.23849093461717041, 0.23849093461251208, 0.23849093460785373, 0.23849093460319537, 0.2384909345985371, 0.23849093459387868, 0.23849093458922033, 0.23849093458456197, 0.23849093457990361, 0.23849093457524534, 0.23849093457058701, 0.23849093456592868, 0.23849093456127032, 0.23849093455661202, 0.23849093455195361, 0.23849093454729525, 0.23849093454263687, 0.23849093453797865, 0.23849093453332024, 0.23849093452866194, 0.23849093452400352 ] realModel['theta'] = [ 0.22199316135627545, 0.87073228953402304, 0.20671913831457267, 0.91861088834692606, 0.48841117787717347 ] if realModel == calcModel: print("PASSED : test Function") else: print("FAILED : test Function")
def test_find_solution(self): X, y = gen_regression_data() solver = linReg.LinearRegressionSolver() with CapturedStdout(): optimizationResults = find_solution( solver, X, y, showFailureRateTrain=True, optimizationParams={ "nnTopology": "", "Lambda": [0.01, 0.1, 1], "functions": [[], [lambda x: x[0]**2, lambda x: x[1]**2], [lambda x: x[0]**2, lambda x: x[1]**2, lambda x: x[2]**2]] }, files=[], log={ "log_dir": "out", "log_file_name": "mlak" }) self.assertAlmostEqual(optimizationResults.failureRateTest, 1e-07, 6)
def main(): #load dataset data = np.genfromtxt("../datasets/mdataset.csv", delimiter=",") #create model linreg = LinearRegression(data, 2, 'test') linreg.describeModel() #training model epochs = 60 linreg.training(epochs, 0.001) ##plot data with result lines plt.figure(1) axis1 = [min(data[:,0]), max(data[:,0]), min(data[:,2]), max(data[:,2])] axis2 = [min(data[:,1]), max(data[:,1]), min(data[:,2]), max(data[:,2])] axis = [min([axis1[0], axis2[0]]), max([axis1[1], axis2[1]]), min([axis1[2], axis2[2]]), max([axis1[3], axis2[3]])] setx = np.asmatrix(np.linspace(axis[0], axis[1])).T x0 = np.ones((setx.size, 1)) x = np.concatenate((x0, setx, setx), axis=1) plt.scatter(data[:,0], data[:,2]) plt.scatter(data[:,1], data[:,2]) plt.plot(setx, linreg.modelFunction(x)) plt.axis(axis) #plt.subplot(212) #setx = np.linspace(axis[0], axis[1]) #plt.plot(setx, linreg.modelFunction(setx)) #plt.axis(axis) plt.show()
runs = 1000 d = 100 aveW = np.zeros((1,6)) show = False for i in range(0,runs): print "Running test # " + str(i) x = np.insert(np.random.random((d,2)) * 2 - 1,0,np.ones((1,d)), axis=1) x = np.append(x, x[:,1:2] * x[:,2:3], axis=1) x = np.append(x, np.square(x[:,1:3]), axis=1) truth = np.sign(np.square(x[:,1]) + np.square(x[:,2]) - .6) noise = np.append(np.ones((d*.9)), np.ones((d - d*.9)) * -1, axis=0) np.random.shuffle(noise) truth *= noise i,o,w = lr.runLR(x, truth, show=show) plotX = np.linspace(-1,1,1000) plotY = np.sqrt(np.square(plotX) * -1 + .6) plotX = np.append(plotX, plotX, axis=1) plotY = np.append(plotY, -plotY, axis=1) green = x[(truth == 1), 1:] red = x[(truth < 1), 1:] plot(green, red, [7,1,7], axis=311, show=show, other=[plotX, plotY,'b-']) pause(.1) wrongIn += i aveW = aveW + w print "Average of " + str(wrongIn/runs) + " wrong in sample per run" fractionWrong = (wrongIn/runs)/d print "%f incorrect on average in sample"%(fractionWrong)
def test_regression(self): #checks for a certain and simple x,y data set self.assertEqual((0,[12.706204736432095, -12.706204736432095],1),LinearRegression.MyFun(np.array([1,0]),np.array([0,1])))
def __init__(self,learning_rate=0.13, n_epochs=50000, dataset='sum.pkl', batch_size=100, feature_num=282): """ stochastic gradient descent optimization of a log-linear model :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the summary dataset file """ ###################### # Preparing Data # ###################### print('\n... Preparing Data') datasets = self.load_data(dataset,batch_size) train_set_x, train_set_y, train_set_z = datasets[0] valid_set_x, valid_set_y, valid_set_z = datasets[1] test_set_x, test_set_y, test_set_z = datasets[2] print( 'train_set_x dimensions ' + str(train_set_x.get_value(borrow=True).shape[0]) + ' ' + str(train_set_x.get_value(borrow=True).shape[1]) ) print( 'valid_set_x dimensions ' + str(valid_set_x.get_value(borrow=True).shape[0]) + ' ' + str(valid_set_x.get_value(borrow=True).shape[1]) ) print( 'test_set_x dimensions ' + str(test_set_x.get_value(borrow=True).shape[0]) + ' ' + str(test_set_x.get_value(borrow=True).shape[1]) ) # compute number of minibatches for training, validation and testing self.n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size self.n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size self.n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size #print( 'n_train_batches ' + str(self.n_train_batches) ) #print( 'n_valid_batches ' + str(self.n_valid_batches) ) #print( 'n_test_batches ' + str(self.n_test_batches) +'\n' ) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a minibatch) x = T.matrix('x') # data from features wv = T.matrix('wv') # data from vectors y = T.ivector('y') # probs, presented as 1D vector of [int] labels ###### feature ###### word2vec ###### word2vec_num = 300 ####################### start of CNN ######################### # Initialize parameters rng = numpy.random.RandomState(23455) nkerns=200 v_height = word2vec_num # to be change image_height = v_height image_width = 1 filter_height = 2 if v_height%2==1 else 3 filter_width = 1 pool_height = 2 pool_width = 1 # Reshape matrix of rasterized images of shape (batch_size, 1 * 13) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (13,) is the size of feature vectors. conv_layer_input = wv.reshape((batch_size, 1, image_height, image_width)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (13-2+1 , 1) = (12, 1) # maxpooling reduces this further to (13/2, 1) = (6, 1) # 4D output tensor is thus of shape (batch_size, nkerns, 6, 1) conv_layer = LeNetConvPoolLayer( rng, input=conv_layer_input, image_shape=(batch_size, 1, image_height, image_width), filter_shape=(nkerns, 1, filter_height, filter_width), poolsize=(pool_height, pool_width) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns*6*1), # or (2, 20 * 6 * 1) = (2, 120) with the default values. conv_layer_output = conv_layer.output.flatten(2) ####################### End of CNN ############################## ####################### Start of concatenation ################## word2vec_in = nkerns*(v_height-filter_height+1)/pool_height feature_in = feature_num n_out = 100 # first fully-connected tanh layer word2vec_hidden = HiddenLayer( rng, input=conv_layer_output, n_in=word2vec_in, n_out=n_out, activation=T.tanh ) # first fully-connected tanh layer feature_hidden = HiddenLayer( rng, input=x, n_in=feature_in, n_out=n_out, activation=T.tanh ) concat = word2vec_hidden.output + feature_hidden.output ####################### End of concatenation ##################### ## Concatenate x with word2vec ## input_x = concat # Set up vars rng = numpy.random.RandomState(23455) n_in_0 = n_out #n_in_0 = nkerns*(v_height-filter_height+1)/pool_height + feature_num layer_dim = [ n_in_0/3*2, n_in_0/9*4 ] #layer_dim = [ 100, 50 ] n_out_0 = layer_dim[0] # first fully-connected tanh layer layer0 = HiddenLayer( rng, input=input_x, n_in=n_in_0, n_out=n_out_0, activation=T.tanh ) # second fully-connected tanh layer n_in_1 = n_out_0 n_out_1 = layer_dim[1] layer1 = HiddenLayer( rng, input=layer0.output, n_in=n_in_1, n_out=n_out_1, activation=T.tanh ) # third fully-connected tanh layer n_in_2 = n_out_1 n_out_2 = feature_num layer2 = HiddenLayer( rng, input=layer1.output, n_in=n_in_2, n_out=n_out_2, activation=T.tanh ) # classify the values of the fully-connected tanh layer classes = 30 # divided into 101 classes self.classifier = LinearRegression(input=layer2.output, n_in=n_out_2, n_out=1) # cost = negative log likelihood in symbolic format cost = self.classifier.errors(y) # batch_size == row size == weight vector row size self.test_model = theano.function( inputs=[index], outputs=self.classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], wv: test_set_z[index * batch_size: (index + 1) * batch_size] } ) self.validate_model = theano.function( inputs=[index], outputs=self.classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size], wv: valid_set_z[index * batch_size: (index + 1) * batch_size] } ) # create a update list by gradient descent params = feature_hidden.params + word2vec_hidden.params + layer2.params + layer1.params + layer0.params + [self.classifier.W, self.classifier.b] + conv_layer.params grads = T.grad(cost, params) updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] # train model self.train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], wv: train_set_z[index * batch_size: (index + 1) * batch_size] } )
import numpy as np from random import seed, gauss import LinearRegression def getRandomData(mcount): seed(1) inputs = np.matrix([ [gauss(0, 1) for i in range(1, mcount + 1)], [gauss(0, 1) for i in range(1, mcount + 1)]]) outputs = LinearRegression.addOneRow(inputs).T * np.matrix([[5], [3], [4]]) + gauss(0, 0.1) # weights = [5, 3, 4] return inputs, outputs a, b = getRandomData(3000) weights = LinearRegression.teachLinReg(a, b) #w2 = LinearRegression.batchGradientDescent(linder, a, b, np.matrix([[0, 0, 0]]).T, 0.0005, 4000) print weights print LinearRegression.calcLinRegError(a, b, weights) #print LinearRegression.calcLinRegError(a, b, w2)
def detectSpeed(j, f): print("speed {0} started".format(j)); M = 2; h = 1; timespan = 6; size, speed = int(3600 / f), []; # data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/Realtime_30/__JNRTDB_YCH_LIC6205.PV.npy")).T; data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/data.npy".format(f))); marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/marks.npy".format(f))); # y1 = data[:size * (h + 0), j]; # X1 = np.mat(np.arange(y1.shape[0])).T; # knots = findKnots2(y1.A.flatten()); # f1 = LinearRegression.RegressionSplineFunction(int((h + 0) * 60 / timespan) + M - 2, M, knots); # m1 = LinearRegression.LinearRegression().fit(X1, y1, [f1]); # sY1 = m1.predictValue(X1); # X1 = X1[:, :]; # y1 = y1[:, :]; # sY1 = sY1[:, :]; # speed.extend(getSpeedM2(m1.beta, f1.knots, X1).A.flatten().tolist()); if not os.path.isfile(f"{f}/speed_{j}_speed.npy"): totalCount = math.floor((data.shape[0] - 0) / (size * h)); for i in range(0, totalCount): y2 = data[i * size * h - 0:(i + 1) * size * h + 0, j]; X2 = np.mat(np.arange(y2.shape[0])).T; knots = findKnots3(y2.A.flatten()); f2 = LinearRegression.RegressionSplineFunction(int((h + 0) * 60 / timespan) + M - 2, M, knots); m2 = LinearRegression.LinearRegression().fit(X2, y2, [f2]); sY2 = m2.predictValue(X2); X2 = X2[:, :]; y2 = y2[:, :]; sY2 = sY2[:, :]; speed.extend(getSpeedM2(m2.beta, f2.knots, X2).A.flatten().tolist()); # plt.figure(1, (12, 8)); # # plt.get_current_fig_manager().window.showMaximized(); # plt.subplot(111); # plt.title(f"{i}, {m2.r2}"); # plt.plot(X2.A.flatten(), y2.A.flatten(), "-xk"); # plt.plot(X2.A.flatten(), sY2.A.flatten(), "-or"); # for x in f2.knots: # plt.axvline(x, color = "b"); # # plt.scatter(f1.knots, [y1.mean()] * len(f1.knots), marker="*", color = "b"); # # plt.subplot(212); # # plt.title(str(i)); # # plt.plot(X2.A.flatten(), y2.A.flatten(), "-x"); # # plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red"); # plt.show(block = True); # plt.savefig(f"/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/speed_images_history_YCH_LIC6206.PV/{i}.png"); # print(f"{i}/{totalCount} saved."); # plt.close(); # X1, y1, sY1, f1 = X2, y2, sY2, f2; print("speed history completed."); speed = np.array(speed); np.save(f"{f}/speed_{j}_speed.npy", speed); else: speed = np.load(f"{f}/speed_{j}_speed.npy"); speedMean, speedStd = speed.mean(), speed.std(); print(np.logical_or((speed - speedMean) / speedStd < -6, (speed - speedMean) / speedStd > 6).sum()); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.showMaximized(); plt.hist(speed, bins = 1000); for x in [speedMean, speedMean - 6 * speedStd, speedMean + 6 * speedStd]: plt.axvline(x, color = "b"); plt.show(block = True); plt.close(); deltaValues = np.diff(data[:, j], 1, 0); deltaMean, deltaStd = deltaValues.mean(), deltaValues.std(); print(np.logical_or((deltaValues - deltaMean) / deltaStd < -6, (deltaValues - deltaMean) / deltaStd > 6).sum()); plt.figure(1, (12, 8)); plt.get_current_fig_manager().window.showMaximized(); plt.hist(deltaValues.A.flatten(), bins = 1000); for x in [deltaMean, deltaMean - 6 * deltaStd, deltaMean + 6 * deltaStd]: plt.axvline(x, color = "b"); plt.show(block = True); plt.close(); indices1 = np.argwhere(speed < (speedMean - 6 * speedStd))[:, 0].flatten().tolist() + np.argwhere(speed > (speedMean + 6 * speedStd))[:, 0].flatten().tolist(); indices1.sort(); # showAnomaly(indices1, j, size, data, marks); # h = 1; # startIndex, offset, values = size * h, int(12 * 60 / f), None; # if not os.path.isfile("{0}/speed_{1}_values.npy".format(f, j)): # ftn = LinearRegression.RegressionSplineFunction(int(h * 60 / timespan) + M - 2, M); # X = ftn.getX(np.mat(np.arange(size * h)).T); # x = np.mat([size * h - 1 - offset]); # # with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool: # if M == 3: # T = np.multiply(np.hstack(tuple([x - k for k in ftn.knots])), np.hstack(tuple([(x > k) - 0 for k in ftn.knots]))); # # # values = [calcSpeedM3(i, j, offset, size, h, data, X, x, T) for i in range(startIndex, size * 24 * 10)]; # # showDiff(speed[startIndex: startIndex + len(values)].A.flatten().tolist(), values, size * 6); # # values = pool.starmap(calcSpeedM3, [(i, j, offset, size, h, data, X, x, T) for i in range(startIndex, data.shape[0] - offset)]); # else: # T = np.hstack(tuple([(x > k) - 0 for k in ftn.knots])); # # # values = [calcSpeedM2(i, j, offset, size, h, data, X, T) for i in range(startIndex, size * 24 * 10)]; # # showDiff(speed[startIndex: startIndex + len(values)].A.flatten().tolist(), values, size * 6); # # values = pool.starmap(calcSpeedM2, [(i, j, offset, size, h, data, X, T) for i in range(startIndex, data.shape[0] - offset)]); # np.save("{0}/speed_{1}_values.npy".format(f, j), np.mat(values).T); # print("realtime speed completed."); # # values = np.load(f"{f}/speed_{j}_values.npy"); # valuesMean, valuesStd = values.mean(), values.std(); # plt.figure(1, (12, 8)); # plt.get_current_fig_manager().window.showMaximized(); # plt.hist(values, bins = 1000); # plt.show(block = True); # plt.close(); # indices2 = (np.argwhere(values < (speedMean - 6 * speedStd))[:, 0].flatten() + startIndex).tolist() + (np.argwhere(values > (speedMean + 6 * speedStd))[:, 0].flatten() + startIndex).tolist(); # indices2.sort(); # showAnomaly(indices2, j, size, data, marks); forest = None; if not os.path.isfile("{0}/speed_{1}_forest.npy".format(f, j)): dataSet = np.mat(speed).T; forest = IsolationForest(200, 2 ** 9, CurvesThresholdFinder(0.65, 0.68, 0.73, False)); forest.fill(dataSet); print("forest fill completed"); forest.train(dataSet); print("forest train completed"); with open("{0}/speed_{1}_forest.npy".format(f, j), "wb") as file: pickle.dump(forest, file, protocol = pickle.DEFAULT_PROTOCOL); else: with open("{0}/speed_{1}_forest.npy".format(f, j), "rb") as file: forest = pickle.load(file); # scores = None; # if not os.path.isfile("{0}/speed_{1}_scores.npy".format(f, j)): # with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool: # scores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in values.A.flatten().tolist()]); # np.save("{0}/speed_{1}_scores.npy".format(f, j), np.mat(scores).T); # print("realtime score completed."); # # scores = np.mat(np.load("{0}/speed_{1}_scores.npy".format(f, j))); # plt.figure(1, (12, 8)); # plt.get_current_fig_manager().window.showMaximized(); # plt.hist(scores.A.flatten(), bins = 1000); # plt.show(block = True); # plt.close(); scores = np.array(forest.scores); indices3 = np.argwhere(scores >= forest.threshold)[:, 0].flatten().tolist(); indices3.sort(); # showAnomaly(indices3, j, size, data, marks); # indices4 = (np.argwhere(values < (speedMean - 3 * speedStd))[:, 0].flatten()).tolist() + (np.argwhere(values > (speedMean + 3 * speedStd))[:, 0].flatten()).tolist(); # indices4 = [i + startIndex for i in indices4 if values[i, 0] < speedMean - 6 * speedStd or values[i, 0] > speedMean + 6 * speedStd or scores[i] >= forest.threshold]; # indices4.sort(); # showAnomaly(indices4, j, size, data, marks); # deltaScores = None; # if not os.path.isfile("{0}/speed_{1}_delta_scores.npy".format(f, j)): # with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool: # deltaScores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in deltaValues.A.flatten().tolist()]); # np.save("{0}/speed_{1}_delta_scores.npy".format(f, j), np.mat(deltaScores).T); # # deltaScores = np.mat(np.load("{0}/speed_{1}_delta_scores.npy".format(f, j))); # indices5 = [i + 1 for i in range(0, deltaValues.shape[0]) if deltaValues[i, 0] < deltaMean - 6 * deltaStd or deltaValues[i, 0] > deltaMean + 6 * deltaStd]; indices5 = np.argwhere(deltaValues < (deltaMean - 6 * deltaStd))[:, 0].flatten().tolist() + np.argwhere(deltaValues > (deltaMean + 6 * deltaStd))[:, 0].flatten().tolist(); indices5 = [i + 1 for i in indices5]; indices5.sort(); # showAnomaly(indices5, j, size, data, marks); # showAnomaly2(indices4, indices5, j, size, data, marks); print("speed {0} completed".format(j));
wrongIn = 0.0 runs = 1000 d = 100 aveW = np.zeros((6)) for i in range(0,runs): print "Running test # " + str(i) x = np.insert(np.random.random((d,2)) * 2 - 1,0,np.ones((1,d)), axis=1) x = np.append(x, x[:,1:2] * x[:,2:3], axis=1) x = np.append(x, np.square(x[:,1:3]), axis=1) truth = np.sign(np.square(x[:,1]) + np.square(x[:,2]) - .6) noise = np.append(np.ones((d*.9)), np.ones((d - d*.9)) * -1, axis=0) np.random.shuffle(noise) truth *= noise i,o,w = lr.runLR(x, truth, show=False) wrongIn += i aveW = aveW + w wrongIn = 0.0 for i in range(0,runs): print "Running test # " + str(i) x = np.insert(np.random.random((d,2)) * 2 - 1,0,np.ones((1,d)), axis=1) x = np.append(x, x[:,1:2] * x[:,2:3], axis=1) x = np.append(x, np.square(x[:,1:3]), axis=1) truth = np.sign(np.square(x[:,1]) + np.square(x[:,2]) - .6) noise = np.append(np.ones((d*.9)), np.ones((d - d*.9)) * -1, axis=0) np.random.shuffle(noise) truth *= noise
def calcSpeedM3(i, j, offset, size, h, data, X, x, T): y = data[i + 1 + offset - size * h:i + 1 + offset, j]; m = LinearRegression.LinearRegression().fit(X, y); return getSpeedM3Internal(m.beta, x, T)[0, 0];
def detectAmplitude(j, f): print("amplitude {0} started".format(j)); M = 3; h = 24; size, sY = int(3600 / f), []; data = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/data.npy".format(f))); marks = np.mat(np.load("/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/{0}/2020-08-01/marks.npy".format(f))); # y1 = data[:size * (h + 1), j]; # X1 = np.mat(np.arange(y1.shape[0])).T; # m1 = LinearRegression.LinearRegression().fit(X1, y1, [LinearRegression.RegressionSplineFunction((h + 1) + M - 2, M)]); # sY1 = m1.predictValue(X1); # X1 = X1[:-size, :]; # y1 = y1[:-size, :]; # sY1 = sY1[:-size, :]; # sY.extend(sY1.A.flatten().tolist()); if not os.path.isfile(f"{f}/amplitude_{j}_amplitude.npy"): totalCount = math.floor((data.shape[0] - 0) / (size * h)); for i in range(0, totalCount): y2 = data[i * size * h - 0:(i + 1) * size * h + 0, j]; X2 = np.mat(np.arange(y2.shape[0])).T; m2 = LinearRegression.LinearRegression().fit(X2, y2, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]); sY2 = m2.predictValue(X2); X2 = X2[:, :]; y2 = y2[:, :]; sY2 = sY2[:, :]; sY.extend(sY2.A.flatten().tolist()); # plt.figure(1, (12, 8)); # # plt.get_current_fig_manager().window.showMaximized(); # plt.subplot(111); # plt.title(f"{i}, {m2.r2}"); # plt.plot(X2.A.flatten(), y2.A.flatten(), "-xk"); # plt.plot(X2.A.flatten(), sY2.A.flatten(), "-or"); # for x in f2.knots: # plt.axvline(x, color = "b"); # # plt.scatter(f1.knots, [y1.mean()] * len(f1.knots), marker="*", color = "b"); # # plt.subplot(212); # # plt.title(str(i)); # # plt.plot(X2.A.flatten(), y2.A.flatten(), "-x"); # # plt.plot(X2.A.flatten(), sY2.A.flatten(), color = "red"); # plt.show(block = True); # plt.savefig(f"/media/WindowsE/Data/PARS/JNLH/ReasonAnalysis/amplitude_images_history_YCH_FI6221.PV/{i}.png"); # print(f"{i}/{totalCount} saved."); # plt.close(); # X1, y1, sY1, f1 = X2, y2, sY2, f2; print("amplitude history completed."); amplitude = data[: len(sY), j].A.flatten() - np.array(sY); np.save(f"{f}/amplitude_{j}_amplitude.npy", amplitude); else: amplitude = np.load(f"{f}/amplitude_{j}_amplitude.npy"); amplitudeMean, amplitudeStd = amplitude.mean(), amplitude.std(); print(DataHelper.testNormalDistribution(amplitude)); # plt.figure(1, (12, 8)); # plt.get_current_fig_manager().window.showMaximized(); # plt.hist(amplitude, bins = 1000); # plt.show(block = True); # plt.close(); indices1 = np.argwhere(amplitude < (amplitudeMean - 6 * amplitudeStd))[:, 0].flatten().tolist() + np.argwhere(amplitude > (amplitudeMean + 6 * amplitudeStd))[:, 0].flatten().tolist(); indices1.sort(); showAnomaly(indices1, j, size, data, marks); h, m = 24, 12; # 24 hours, 12 minutes startIndex, offset, values = size * h, int(m * 60 / f), None; if not os.path.isfile(f"{f}/amplitude_{j}_values.npy"): with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool: values = pool.starmap(calcAmplitude, [(i, j, offset, size, h, M, data) for i in range(startIndex, data.shape[0] - offset)]); np.save("{0}/amplitude_{1}_values.npy".format(f, j), np.array(values)); else: values = np.load(f"{f}/amplitude_{j}_values.npy"); # plt.figure(1, (12, 8)); # plt.get_current_fig_manager().window.showMaximized(); # plt.hist(values, bins = 1000); # plt.show(block = True); # plt.close(); indices2 = (np.argwhere(values < (amplitudeMean - 6 * amplitudeStd))[:, 0] + startIndex).tolist() + (np.argwhere(values > (amplitudeMean + 6 * amplitudeStd))[:, 0] + startIndex).tolist(); indices2.sort(); showAnomaly(indices2, j, size, data, marks); # forest = None; # if not os.path.isfile("{0}/amplitude_{1}_forest.npy".format(f, j)): # forest = IsolationForest(200, 2 ** 9, CurvesThresholdFinder(0.65, 0.68, 0.73, False)); # forest.fill(amplitude); # print("forest fill completed"); # forest.train(amplitude); # print("forest train completed"); # # with open("{0}/amplitude_{1}_forest.npy".format(f, j), "wb") as file: # pickle.dump(forest, file, protocol = pickle.DEFAULT_PROTOCOL); # else: # with open("{0}/amplitude_{1}_forest.npy".format(f, j), "rb") as file: # forest = pickle.load(file); # # scores = None; # if not os.path.isfile("{0}/amplitude_{1}_scores.npy".format(f, j)): # with multiprocessing.Pool(psutil.cpu_count(False) - 2) as pool: # scores = pool.map(forest.getAnomalyScore, [np.mat([v]) for v in values.A.flatten().tolist()]); # np.save("{0}/amplitude_{1}_scores.npy".format(f, j), np.mat(scores).T); # # scores = np.mat(np.load("{0}/amplitude_{1}_scores.npy".format(f, j))); # plt.figure(1, (12, 8)); # plt.get_current_fig_manager().window.maximize(); # plt.hist(scores.A.flatten(), bins = 1000); # plt.show(block = True); # plt.close(); # indices3 = (np.argwhere(scores >= forest.threshold)[:, 0].flatten() + startIndex).tolist(); # indices3.sort(); # showAnomaly(indices3, j, size, data, marks); # indices4 = (np.argwhere(values < (amplitudeMean - 3 * amplitudeStd))[:, 0].flatten()).tolist() + (np.argwhere(values > (amplitudeMean + 3 * amplitudeStd))[:, 0].flatten()).tolist(); # indices4 = [i + startIndex for i in indices4 if values[i, 0] < amplitudeMean - 6 * amplitudeStd or values[i, 0] > amplitudeMean + 6 * amplitudeStd or scores[i] >= forest.threshold]; # indices4.sort(); # showAnomaly(indices4, j, size, data, marks); print("amplitude {0} completed".format(j));
import numpy as np import LinearRegression as lr from utils import points2weights wrongIn = 0.0 runs = 1000 d = 100 for i in range(0,runs): print "Running test # " + str(i) f = points2weights(np.random.random((2,2)) * 2 - 1) x = np.insert(np.random.random((d,2)) * 2 - 1,0,np.ones((1,d)), axis=1) truth = np.sign(np.dot(x,f)) i,o,w = lr.runLR(x, truth) wrongIn += i print "Average of " + str(wrongIn/runs) + " wrong in sample per run" fractionWrong = (wrongIn/runs)/d print "%f incorrect on average in sample"%(fractionWrong) a = abs(fractionWrong - 0) b = abs(fractionWrong - 0.001) c = abs(fractionWrong - 0.01) d = abs(fractionWrong - 0.1) if min([a,b,c,d]) == a : print "Answer is A" elif min([a,b,c,d]) == b : print "Answer is B"
def type_error(self): with self.assertRaises(TypeError): #checks if the argument is a string LinearRegression.MyFun('b',5)
from utils import points2weights wrongOut = 0.0 runs = 1000 d = 100 D = 1000 for i in range(0,runs): print "Running test # " + str(i) f = points2weights(np.random.random((2,2)) * 2 - 1) x = np.insert(np.random.random((d,2)) * 2 - 1,0,np.ones((1,d)), axis=1) truth = np.sign(np.dot(x,f)) X = np.insert(np.random.random((D,2)) * 2 - 1,0,np.ones((1,D)),axis=1) truthX = np.sign(np.dot(X,f)) i,o,w = lr.runLR(x, truth, X=X, truthX=truthX) wrongOut += o print "Average of " + str(wrongOut/runs) + " wrong out of sample per run" print "%f incorrect on average out of sample"%((wrongOut/runs/D)) fractionWrong = (wrongOut/runs)/D print "%f incorrect on average out of sample"%(fractionWrong) a = abs(fractionWrong - 0) b = abs(fractionWrong - 0.001) c = abs(fractionWrong - 0.01) d = abs(fractionWrong - 0.1) if min([a,b,c,d]) == a : print "Answer is A" elif min([a,b,c,d]) == b : print "Answer is B"
def main(): loaddata() feature_sqft_living = np.array(train_data['sqft_living']) feature_bedrooms = np.array(train_data['bedrooms']) outputs = np.array(train_data['price']) # Model 1 features Model1_features = ['sqft_living'] Model1_output = ['price'] feature_matrix1, output_vector1 = LR.extract_data_from_features( train_data, Model1_features, Model1_output) feature_matrix1_targets, output_vector1_targets = LR.extract_data_from_features( test_data, Model1_features, Model1_output) step_size1 = 7.0e-12 tolerance1 = 2.5e7 # you get set your init weights for this question, but it will take some time to train init_weights1 = np.array([-47000.0, 1.0]).reshape((2, 1)) # Model 1 training Model1_weights = LR.regression_gradient_descent(feature_matrix1, output_vector1, init_weights1, step_size1, tolerance1) test1_predictions = LR.predict(feature_matrix1_targets, Model1_weights) print "The first house prediction price of test data", test1_predictions[0] # Model 1 RSS RSS1 = LR.get_residual_sum_of_squares(feature_matrix1_targets, Model1_weights, output_vector1_targets) print "RSS of model 1 ", RSS1 # Model 2 features Model2_features = ['sqft_living', 'sqft_living15'] Model2_output = ['price'] # extract matrix from training data correspond to features and output feature_matrix2, output_vector2 = LR.extract_data_from_features( train_data, Model2_features, Model2_output) feature_matrix2_targets, output_vector2_targets = LR.extract_data_from_features( test_data, Model2_features, Model2_output) # Set parameters step_size2 = 4.0e-12 tolerance2 = 1.0e9 init_weights2 = np.array([-100000.0, 1.0, 1.0]).reshape((3, 1)) # Model 2 training Model2_weights = LR.regression_gradient_descent(feature_matrix2, output_vector2, init_weights2, step_size2, tolerance2) test2_predictions = LR.predict(feature_matrix2_targets, Model2_weights) print "The first house prediction price of test data", test2_predictions[0] RSS2 = LR.get_residual_sum_of_squares(feature_matrix2_targets, Model2_weights, output_vector2_targets) print "RSS of model 2 ", RSS2
def calcAmplitude(i, j, offset, size, h, M, data): X = np.mat(np.arange(size * h)).T; y = data[i + 1 + offset - size * h: i + 1 + offset, j]; return data[i, j] - LinearRegression.LinearRegression().fit(X, y, [LinearRegression.RegressionSplineFunction(h + M - 2, M)]).predictValue(np.mat([size * h - 1 - offset]))[0, 0];