Exemple #1
0
def test_theta_0():
    rng.seed(0)
    n_samples = 100
    Y = rng.randn(n_samples, 5)
    X = rng.randn(n_samples, 5)

    sgcrf = SparseGaussianCRF(lamL=0.01, lamT=0.01)
    sgcrf.fit(X, Y)

    assert np.allclose(sgcrf.Lam, np.eye(5), .1, .2)
Exemple #2
0
def sgcrf(task):
    import sys
    sys.path.insert(0, '/home/sikun/Documents/sgcrfpy/')
    from sgcrf import SparseGaussianCRF

    vec_s, vec_f = data_prep(task)
    sgcrf = SparseGaussianCRF(learning_rate=0.1)
    sgcrf.fit(vec_s, vec_f)
    # loss = sgcrf.lnll
    pred_f = sgcrf.predict(vec_s)
    for k in range(vec_s.shape[0]):
        print(pearsonr(pred_f[k], vec_f[k]))
Exemple #3
0
def train_sgcrf(feature, gt, n_iter):
    """
    Input: normalized feature and gt, number of iterations.
    Output: SGCRF model 
    """
    print("Train SGCRF based on the output of RNN.")
    model = SparseGaussianCRF(learning_rate=0.1,
                              lamL=0.01,
                              lamT=0.001,
                              n_iter=n_iter)
    model.fit(feature, gt)
    return model
Exemple #4
0
def crf_models(df, df1, train_start_date, train_end_date, test_start_date,
               test_end_date, request_type, CD, predictor_num):
    """
    Trains 2 GCRF models on data from specified CD and Request Type which is assigned to fulfill request. 
    Uses specified start and end dates for training and testing to creat train and test sets.
    Put this in a file called LAGCRF.py 
    """

    #Create Training and Testing Sets
    dftrain = preprocessing(df, train_start_date, train_end_date)
    dftrain = dftrain.reset_index(drop=True)
    dftest = preprocessing(df1, test_start_date, test_end_date)
    dftest = dftest.reset_index(drop=True)

    #Reserve test set for training on all 3 models.

    y_train, y_test = CreateTestSet(dftest, predictor_num)
    y_test = y_test.reshape((-1, 1))

    ## 2 Models
    #Model1: CD
    modelCD = SparseGaussianCRF(lamL=0.1, lamT=0.1, n_iter=10000)
    dftrainCD = dftrain[dftrain['CD'] == CD].reset_index(drop=True)

    X_trainCD, X_testCD = CreateTrainSet(dftrainCD, predictor_num)
    X_testCD = X_testCD.reshape((-1, 1))
    modelCD.fit(X_trainCD, X_testCD)

    y_predCD = modelCD.predict(y_train)

    #Model2: Request_type
    modelRT = SparseGaussianCRF(lamL=0.1, lamT=0.1, n_iter=10000)
    dftrainRT = dftrain[dftrain['RequestType'] == request_type].reset_index(
        drop=True)

    X_trainRT, X_testRT = CreateTrainSet(dftrainRT, predictor_num)
    X_testRT = X_testRT.reshape((-1, 1))

    modelRT.fit(X_trainRT, X_testRT)

    y_predRT = modelRT.predict(y_train)

    #Average out all predictions
    y_predFinal = (y_predCD + y_predRT) / 2

    # Return models
    return modelCD, modelRT
def Strukturni_predict_fun(train_index, test_index, ModelSTNo):
    
    atribute = pd.read_csv('atribute')
    output = pd.read_csv('output')
    atribute = atribute.ix[:,1:].values
    output = output.ix[:,1:].values
    
    timeST = np.zeros([ModelSTNo])
    R2 = np.zeros([ModelSTNo])
    
    x_train, x_test = atribute[train_index,:], atribute[test_index,:]
    y_train, y_test = output[train_index,:], output[test_index,:]
    
    std_scl = StandardScaler()
    std_scl.fit(x_train)
    
    x_train = std_scl.transform(x_train)
    x_test = std_scl.transform(x_test)
    
    model = SparseGaussianCRF()
    
    start_time = time.time()
    model.fit(x_train, y_train)
    y_SGCRF = model.predict(x_test).reshape(-1)
    timeST[0] = time.time() - start_time
    
    
    start_time = time.time()
    model2 = Sequential()
    model2.add(Dense(30, input_dim = x_train.shape[1], activation='relu'))
    model2.add(Dense(25, activation='relu'))
    model2.add(Dense(20, activation='relu'))
    model2.add(Dense(7, activation='linear'))
    model2.compile(loss='mean_absolute_error', optimizer='SGD')
    ES = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=0, mode='auto', baseline=None)
    model2.fit(x_train, y_train, epochs=1200, batch_size=200,validation_data=(x_test, y_test), callbacks=[ES])
    y_NN1 = model2.predict(x_test).reshape(-1)
    timeST[1] = time.time() - start_time 

    start_time = time.time()    
    model3 = Sequential()
    model3.add(Dense(35, input_dim = x_train.shape[1], activation='relu'))
    model3.add(Dense(26, activation='relu'))
    model3.add(Dense(22, activation='relu'))
    model3.add(Dense(7, activation='linear'))
    model3.compile(loss='mean_absolute_error', optimizer='SGD')
    ES = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=0, mode='auto', baseline=None)
    model2.fit(x_train, y_train, epochs=1200, batch_size=200,validation_data=(x_test, y_test), callbacks=[ES])
    y_NN2 = model3.predict(x_test).reshape(-1)
    timeST[2] = time.time() - start_time
    
    start_time = time.time()    
    model4 = Sequential()
    model4.add(Dense(36, input_dim = x_train.shape[1], activation='relu'))
    model4.add(Dense(27, activation='relu'))
    model4.add(Dense(21, activation='relu'))
    model4.add(Dense(7, activation='linear'))
    model4.compile(loss='mean_absolute_error', optimizer='SGD')
    ES = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=0, mode='auto', baseline=None)
    model4.fit(x_train, y_train, epochs=1200, batch_size=200,validation_data=(x_test, y_test), callbacks=[ES])
    y_NN3 = model4.predict(x_test).reshape(-1)
    timeST[3] = time.time() - start_time
    
    R2[0] = r2_score(y_test.reshape(-1), y_SGCRF)
    R2[1] = r2_score(y_test.reshape(-1), y_NN1)
    R2[2] = r2_score(y_test.reshape(-1), y_NN2)
    R2[3] = r2_score(y_test.reshape(-1), y_NN3)

    return timeST, R2
    
Exemple #6
0
iterations = 3  #number of GCRF models

data_path = '/home/mllab/Desktop/defazio-311/Data/SameDatapoints/Complaint/'
results_path = "/home/mllab/Desktop/defazio-311/Results/complaint/predicted_data_water.csv"

fh = open(results_path, "w")

fh.write("Predicted1,Real1,Predicted7,Real7\n")

filename = data_path + 'Water System.csv'

response_times, X_train, Y_train, X_test, Y_test = gs.getSamples(
    filename, x_length, y_length)

model = SparseGaussianCRF(
    lamL=0.1, lamT=0.1,
    n_iter=10000)  #lamL and lamT are regularization parameters

predictions = []

#run the model a few times and store predictions
for i in range(0, iterations):

    model.fit(X_train, Y_train)
    prediction = model.predict(X_test)
    predictions.append(prediction)

predictions = np.array(predictions)

#average different GCRF's predictions
predictions = np.mean(predictions, axis=0)
Exemple #7
0
def lacer(df, df1, train_start_date, train_end_date, test_start_date,
          test_end_date, request_type, owner, CD,
          predictor_num):  #Once model is ready, replace df with csv
    """
    Trains 3 GCRF models on data from specified CD, Request Type, and Owner which is assigned to fulfill request. 
    Uses specified start and end dates for training and testing to creat train and test sets. 
    """

    #Create Training and Testing Sets
    dftrain = preprocessing(df, train_start_date, train_end_date)
    dftrain = dftrain.reset_index(drop=True)
    dftest = preprocessing(df1, test_start_date, test_end_date)
    dftest = dftest.reset_index(drop=True)

    #Reserve test set for training on all 3 models.
    y_train, y_test = CreateTestSet(dftest, predictor_num)
    y_test = y_test.reshape((-1, 1))

    ## 3 Models
    #Model1: CD
    modelCD = SparseGaussianCRF(lamL=0.1, lamT=0.1, n_iter=10000)
    dftrainCD = dftrain[dftrain['CD'] == CD].reset_index(drop=True)

    X_trainCD, X_testCD = CreateTrainSet(dftrainCD, predictor_num)
    X_testCD = X_testCD.reshape((-1, 1))
    modelCD.fit(X_trainCD, X_testCD)

    y_predCD = modelCD.predict(y_train)

    #Model2: Request_type
    modelRT = SparseGaussianCRF(lamL=0.1, lamT=0.1, n_iter=10000)
    dftrainRT = dftrain[dftrain['RequestType'] == request_type].reset_index(
        drop=True)

    X_trainRT, X_testRT = CreateTrainSet(dftrainRT, predictor_num)
    X_testRT = X_testRT.reshape((-1, 1))

    modelRT.fit(X_trainRT, X_testRT)

    y_predRT = modelRT.predict(y_train)

    #Model3: Owner
    modelOwner = SparseGaussianCRF(lamL=0.1, lamT=0.1, n_iter=10000)
    dftrainOwner = dftrain[dftrain['Owner'] == owner].reset_index(drop=True)

    X_trainOwner, X_testOwner = CreateTrainSet(dftrainOwner, predictor_num)
    X_testOwner = X_testOwner.reshape((-1, 1))

    modelOwner.fit(X_trainOwner, X_testOwner)

    y_predOwner = modelOwner.predict(y_train)

    #Average out all predictions
    y_predFinal = (y_predCD + y_predRT + y_predOwner) / 3

    #Return metrics
    return metrics(y_predFinal, y_test)