def test_theta_0(): rng.seed(0) n_samples = 100 Y = rng.randn(n_samples, 5) X = rng.randn(n_samples, 5) sgcrf = SparseGaussianCRF(lamL=0.01, lamT=0.01) sgcrf.fit(X, Y) assert np.allclose(sgcrf.Lam, np.eye(5), .1, .2)
def sgcrf(task): import sys sys.path.insert(0, '/home/sikun/Documents/sgcrfpy/') from sgcrf import SparseGaussianCRF vec_s, vec_f = data_prep(task) sgcrf = SparseGaussianCRF(learning_rate=0.1) sgcrf.fit(vec_s, vec_f) # loss = sgcrf.lnll pred_f = sgcrf.predict(vec_s) for k in range(vec_s.shape[0]): print(pearsonr(pred_f[k], vec_f[k]))
def train_sgcrf(feature, gt, n_iter): """ Input: normalized feature and gt, number of iterations. Output: SGCRF model """ print("Train SGCRF based on the output of RNN.") model = SparseGaussianCRF(learning_rate=0.1, lamL=0.01, lamT=0.001, n_iter=n_iter) model.fit(feature, gt) return model
def crf_models(df, df1, train_start_date, train_end_date, test_start_date, test_end_date, request_type, CD, predictor_num): """ Trains 2 GCRF models on data from specified CD and Request Type which is assigned to fulfill request. Uses specified start and end dates for training and testing to creat train and test sets. Put this in a file called LAGCRF.py """ #Create Training and Testing Sets dftrain = preprocessing(df, train_start_date, train_end_date) dftrain = dftrain.reset_index(drop=True) dftest = preprocessing(df1, test_start_date, test_end_date) dftest = dftest.reset_index(drop=True) #Reserve test set for training on all 3 models. y_train, y_test = CreateTestSet(dftest, predictor_num) y_test = y_test.reshape((-1, 1)) ## 2 Models #Model1: CD modelCD = SparseGaussianCRF(lamL=0.1, lamT=0.1, n_iter=10000) dftrainCD = dftrain[dftrain['CD'] == CD].reset_index(drop=True) X_trainCD, X_testCD = CreateTrainSet(dftrainCD, predictor_num) X_testCD = X_testCD.reshape((-1, 1)) modelCD.fit(X_trainCD, X_testCD) y_predCD = modelCD.predict(y_train) #Model2: Request_type modelRT = SparseGaussianCRF(lamL=0.1, lamT=0.1, n_iter=10000) dftrainRT = dftrain[dftrain['RequestType'] == request_type].reset_index( drop=True) X_trainRT, X_testRT = CreateTrainSet(dftrainRT, predictor_num) X_testRT = X_testRT.reshape((-1, 1)) modelRT.fit(X_trainRT, X_testRT) y_predRT = modelRT.predict(y_train) #Average out all predictions y_predFinal = (y_predCD + y_predRT) / 2 # Return models return modelCD, modelRT
def Strukturni_predict_fun(train_index, test_index, ModelSTNo): atribute = pd.read_csv('atribute') output = pd.read_csv('output') atribute = atribute.ix[:,1:].values output = output.ix[:,1:].values timeST = np.zeros([ModelSTNo]) R2 = np.zeros([ModelSTNo]) x_train, x_test = atribute[train_index,:], atribute[test_index,:] y_train, y_test = output[train_index,:], output[test_index,:] std_scl = StandardScaler() std_scl.fit(x_train) x_train = std_scl.transform(x_train) x_test = std_scl.transform(x_test) model = SparseGaussianCRF() start_time = time.time() model.fit(x_train, y_train) y_SGCRF = model.predict(x_test).reshape(-1) timeST[0] = time.time() - start_time start_time = time.time() model2 = Sequential() model2.add(Dense(30, input_dim = x_train.shape[1], activation='relu')) model2.add(Dense(25, activation='relu')) model2.add(Dense(20, activation='relu')) model2.add(Dense(7, activation='linear')) model2.compile(loss='mean_absolute_error', optimizer='SGD') ES = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=0, mode='auto', baseline=None) model2.fit(x_train, y_train, epochs=1200, batch_size=200,validation_data=(x_test, y_test), callbacks=[ES]) y_NN1 = model2.predict(x_test).reshape(-1) timeST[1] = time.time() - start_time start_time = time.time() model3 = Sequential() model3.add(Dense(35, input_dim = x_train.shape[1], activation='relu')) model3.add(Dense(26, activation='relu')) model3.add(Dense(22, activation='relu')) model3.add(Dense(7, activation='linear')) model3.compile(loss='mean_absolute_error', optimizer='SGD') ES = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=0, mode='auto', baseline=None) model2.fit(x_train, y_train, epochs=1200, batch_size=200,validation_data=(x_test, y_test), callbacks=[ES]) y_NN2 = model3.predict(x_test).reshape(-1) timeST[2] = time.time() - start_time start_time = time.time() model4 = Sequential() model4.add(Dense(36, input_dim = x_train.shape[1], activation='relu')) model4.add(Dense(27, activation='relu')) model4.add(Dense(21, activation='relu')) model4.add(Dense(7, activation='linear')) model4.compile(loss='mean_absolute_error', optimizer='SGD') ES = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=0, mode='auto', baseline=None) model4.fit(x_train, y_train, epochs=1200, batch_size=200,validation_data=(x_test, y_test), callbacks=[ES]) y_NN3 = model4.predict(x_test).reshape(-1) timeST[3] = time.time() - start_time R2[0] = r2_score(y_test.reshape(-1), y_SGCRF) R2[1] = r2_score(y_test.reshape(-1), y_NN1) R2[2] = r2_score(y_test.reshape(-1), y_NN2) R2[3] = r2_score(y_test.reshape(-1), y_NN3) return timeST, R2
iterations = 3 #number of GCRF models data_path = '/home/mllab/Desktop/defazio-311/Data/SameDatapoints/Complaint/' results_path = "/home/mllab/Desktop/defazio-311/Results/complaint/predicted_data_water.csv" fh = open(results_path, "w") fh.write("Predicted1,Real1,Predicted7,Real7\n") filename = data_path + 'Water System.csv' response_times, X_train, Y_train, X_test, Y_test = gs.getSamples( filename, x_length, y_length) model = SparseGaussianCRF( lamL=0.1, lamT=0.1, n_iter=10000) #lamL and lamT are regularization parameters predictions = [] #run the model a few times and store predictions for i in range(0, iterations): model.fit(X_train, Y_train) prediction = model.predict(X_test) predictions.append(prediction) predictions = np.array(predictions) #average different GCRF's predictions predictions = np.mean(predictions, axis=0)
def lacer(df, df1, train_start_date, train_end_date, test_start_date, test_end_date, request_type, owner, CD, predictor_num): #Once model is ready, replace df with csv """ Trains 3 GCRF models on data from specified CD, Request Type, and Owner which is assigned to fulfill request. Uses specified start and end dates for training and testing to creat train and test sets. """ #Create Training and Testing Sets dftrain = preprocessing(df, train_start_date, train_end_date) dftrain = dftrain.reset_index(drop=True) dftest = preprocessing(df1, test_start_date, test_end_date) dftest = dftest.reset_index(drop=True) #Reserve test set for training on all 3 models. y_train, y_test = CreateTestSet(dftest, predictor_num) y_test = y_test.reshape((-1, 1)) ## 3 Models #Model1: CD modelCD = SparseGaussianCRF(lamL=0.1, lamT=0.1, n_iter=10000) dftrainCD = dftrain[dftrain['CD'] == CD].reset_index(drop=True) X_trainCD, X_testCD = CreateTrainSet(dftrainCD, predictor_num) X_testCD = X_testCD.reshape((-1, 1)) modelCD.fit(X_trainCD, X_testCD) y_predCD = modelCD.predict(y_train) #Model2: Request_type modelRT = SparseGaussianCRF(lamL=0.1, lamT=0.1, n_iter=10000) dftrainRT = dftrain[dftrain['RequestType'] == request_type].reset_index( drop=True) X_trainRT, X_testRT = CreateTrainSet(dftrainRT, predictor_num) X_testRT = X_testRT.reshape((-1, 1)) modelRT.fit(X_trainRT, X_testRT) y_predRT = modelRT.predict(y_train) #Model3: Owner modelOwner = SparseGaussianCRF(lamL=0.1, lamT=0.1, n_iter=10000) dftrainOwner = dftrain[dftrain['Owner'] == owner].reset_index(drop=True) X_trainOwner, X_testOwner = CreateTrainSet(dftrainOwner, predictor_num) X_testOwner = X_testOwner.reshape((-1, 1)) modelOwner.fit(X_trainOwner, X_testOwner) y_predOwner = modelOwner.predict(y_train) #Average out all predictions y_predFinal = (y_predCD + y_predRT + y_predOwner) / 3 #Return metrics return metrics(y_predFinal, y_test)