def train_model(self): if self.verbose: print("training GP model ...") self.gpMdl = gp.GPR() m = gp.mean.Zero() k = gp.cov.RBFard(D=None, log_ell_list=self.gp_hyp[:-1], log_sigma=self.gp_hyp[-1]) ''' try: self.gpMdl.setPrior(mean=m, kernel=k) self.gpMdl.setNoise(log_sigma = np.log(0.8)) self.gpMdl.setOptimizer('BFGS')#('minimize'); #self.gpMdl.getPosterior(self.gprX, self.gprY) self.gpMdl.optimize(self.gprX,self.gprY)#,numIters=100) except: print('cannot quasi-newton it') ''' #self.gpMdl = gp.GPR() self.gpMdl.setPrior(mean=m, kernel=k) self.gpMdl.setNoise(log_sigma=np.log(0.7)) #self.gpMdl.getPosterior(self.gprX,self.gprY) self.gpMdl.setOptimizer('Minimize') self.gpMdl.optimize(self.gprX, self.gprY, numIterations=10) #,numIters=100) return self.gpMdl.covfunc.hyp
def compute_gp_regression(X_train, y_train, X_test): model = pyGPs.GPR() m = pyGPs.mean.Const(0) k = pyGPs.cov.RBF() model.setPrior(mean=m, kernel=k) model.optimize(X_train, y_train) y_pred, _, _, _, _ = model.predict(X_test) return y_pred
def test_GPR(self): print("testing GP regression...") model = pyGPs.GPR() m = pyGPs.mean.Zero() k = pyGPs.cov.RBF() model.setPrior(mean=m, kernel=k) model.setOptimizer("Minimize", num_restarts=10) model.optimize(self.xr, self.yr) model.predict(self.zr) self.checkRegressionOutput(model)
def run(): model = pyGPs.GPR() x, y, z = generate_toy_data() lel = np.apply_along_axis(np.std, 0, x) print "parameters" print lel lengthscale = edistance_at_percentile(x, 50) print lengthscale # TODO: set non-default parameters k = pyGPs.cov.RBFard(log_ell_list=[0.01, 0.01], log_sigma=0.01) #D=x.shape[1]) m = pyGPs.mean.Const() model.setPrior(mean=m, kernel=k) print "hyperparameters" print k.hyp model.optimize(x, y) print "posterior", model.posterior print "Negative log marginal liklihood optimized:", round(model.nlZ, 3) def objective(x): ymu, ys2, fmu, fs2, lp = model.predict(x.reshape((1, len(x)))) ret = ymu - 1.645 * np.sqrt(ys2) return ret[0][0] x_opt = fmin_bfgs(lambda x: objective(x) * -1, np.arange(0, 0.2, 0.1)) print "Optimized value of x:", x_opt ymu, ys2, fmu, fs2, lp = model.predict(z) q_95 = ymu + 1.645 * np.sqrt(ys2) q_5 = ymu - 1.645 * np.sqrt(ys2) t1 = sort_for_plotting(z[:, -1].reshape(len(z), 1), q_95, q_5) t2 = sort_for_plotting(z[:, 0].reshape(len(z), 1), q_95, q_5) plt.figure() ymu = np.reshape(ymu, (ymu.shape[0], )) plt.plot(z[:, -1], ymu, ls='None', marker='+') plt.fill_between(t1[:, 0], t1[:, 1], t1[:, 2], facecolor=[0.7539, 0.89453125, 0.62890625, 1.0], linewidths=0) plt.show() plt.figure() plt.plot(z[:, 0], ymu, ls='None', marker='+') plt.fill_between(t2[:, 0], t2[:, 1], t2[:, 2], facecolor=[0.7539, 0.89453125, 0.62890625, 1.0], linewidths=0) plt.show()
def setUp(self): # fix random seed np.random.seed(0) # random data for testing n = 20 # number of inputs D = 3 # dimension of inputs self.x = np.random.normal(loc=0.0, scale=1.0, size=(n, D)) self.y = np.random.random((n, )) self.model = pyGPs.GPR() nlZ, dnlZ, post = self.model.getPosterior(self.x, self.y) self.nlZ_beforeOpt = nlZ
def optimize_max_possible_value(x, y, grid, func): model = pyGPs.GPR() np_x = np.array(x) np_y = np.array(y) np_z = np.array(z) model.getPosterior(np_x, np_y) model.optimize(np_x, np_y) used_points = set() for step in xrange(100): l = model.predict(np_z) possible_max_point = None possible_max_value = None possible_max_index = None N = 2 for i in xrange(len(z)): point = z[i] value = l[0][i][0] variance = sqrt(l[1][i][0]) if possible_max_value is None or possible_max_value < value + variance * N: possible_max_point = point possible_max_value = value + variance * N possible_max_index = i print possible_max_index, possible_max_point, possible_max_value if possible_max_index in used_points: return possible_max_point, func(possible_max_point) used_points.add(possible_max_index) x.append(possible_max_point) y.append(func(possible_max_point)) np_x = np.array(x) np_y = np.array(y) np_z = np.array(z) model = pyGPs.GPR() model.getPosterior(np_x, np_y) model.optimize(np_x, np_y) return possible_max_point, func(possible_max_point)
def test(self, features, version, label): """ Learns GPR and KNR model from given training set and test on test set. Here, training set consists of every odd feature and test set consists of every training set is equal to test set. :param features: :param version: :param label: :return: """ groundtruth = np.load(self.param_path + '/v' + str(version) + '_' + self.GT) _trainX = np.concatenate(features[0:features.shape[0]:2]) _trainY = np.concatenate(groundtruth[0:groundtruth.size:2]) testX = features[1:features.shape[0]:2] testY = groundtruth[1:groundtruth.size:2] print 'features.shape: ', features.shape, ', groundtruth.shape: ', groundtruth.shape print '_trainX.shape: ', _trainX.shape, ', _trainY.shape: ', _trainY.shape trainX, trainY = self.exclude_label(_trainX, _trainY, c=0) PYGPR = 'gpr_' + label KNR = 'knr_' + label if files.isExist(self.model_path, PYGPR): gprmodel = self.loadf(self.model_path, PYGPR) knrmodel = self.loadf(self.model_path, KNR) else: print 'Learning GPR model' gprmodel = pyGPs.GPR() gprmodel.getPosterior(trainX, trainY) gprmodel.optimize(trainX, trainY) self.savef(self.model_path, PYGPR, gprmodel) print 'Learning KNR model' knrmodel = knr(trainX, trainY) self.savef(self.model_path, KNR, knrmodel) print 'Learning both GPR and KNR model is DONE.' self.plot_gpr(gprmodel, testX, testY, label, 'odd_feature') self.plot_knr(knrmodel, testX, testY, label, 'odd_feature')
def calculateRMSEPyGP(vectorX,vectorY,labelList): """ calculate the root mean squared error Parameters: ----------- vectorX: timestamps of the timeseries vectorY: valueSet of the timeseries labelList: labels of the timeseries Returns: -------- list of (household,rmse) tuples """ #setX = [preprocessing.scale(element )for element in vectorX] setY=preprocessing.scale(vectorY,axis=1) model = pyGPs.GPR() # specify model (GP regression) k = pyGPs.cov.Linear() + pyGPs.cov.RBF() #hyperparams will be set with optimizeHyperparameters method model.setPrior(kernel=k) hyperparams, model2 = GPE.optimizeHyperparameters([0.0000001,0.0000001,0.0000001],model,vectorX,setY,bounds=[(None,5),(None,5),(None,5)],method = 'L-BFGS-B') print('hyerparameters used:',hyperparams) y_pred, ys2, fm, fs2, lp = model2.predict(vectorX[0]) #plot general model after normalizing the input timeseries plt.plot(y_pred, color='red') for i in setY: plt.plot(i,color='blue') plt.show(block=True) rmseData = [] for i in range(0,len(vectorY),1): rmse = mean_squared_error(vectorY[i], y_pred)**0.5 HH = labelList[i] rmseData.append((HH,rmse)) return rmseData
def test_trainset_test_same(self, features, version, label): """ Learns GPR and KNR model from given training set and test on test set. Here, training set is equal to test set. :param features: :param version: :param label: :return: """ groundtruth = np.load(self.param_path + '/v' + str(version) + '_' + self.GT) _trainX = np.concatenate(features) _trainY = np.concatenate(groundtruth) trainX, trainY = self.exclude_label(_trainX, _trainY, c=0) testX = features testY = groundtruth PYGPR = 'gpr_all_' + label KNR = 'knr_all_' + label if files.isExist(self.model_path, PYGPR): gprmodel = self.loadf(self.model_path, PYGPR) knrmodel = self.loadf(self.model_path, KNR) else: print 'Learning GPR model' gprmodel = pyGPs.GPR() gprmodel.getPosterior(trainX, trainY) gprmodel.optimize(trainX, trainY) self.savef(self.model_path, PYGPR, gprmodel) print 'Learning KNR model' knrmodel = knr(trainX, trainY) self.savef(self.model_path, KNR, knrmodel) print 'Learning both GPR and KNR model is DONE.' self.plot_gpr(gprmodel, testX, testY, label, 'all_feature') self.plot_knr(knrmodel, testX, testY, label, 'all_feature')
def InitModel(self): # initialize search space self.x = np.array([[sum(n)/2 for n in self.domain]]) self.y = np.array([self.func(self.x[0])]) self.regret = np.array([np.linalg.norm(self.x[0] - self.optima)]) self.regretBound = np.array([1]) self.covF = np.array([0]) self.covTr = np.array([0]) # specify model (GP regression) self.model = pyGPs.GPR() m = pyGPs.mean.Zero() k = pyGPs.cov.Linear() if model.kernel == 'RBF': k = pyGPs.cov.RBF() if model.kernel == 'Matern': k = pyGPs.cov.Matern() self.model.setPrior(mean=m, kernel=k)
def gpr(p, *args): p1 = p[0] p2 = p[1] p3 = p[2] # x, y, xs, ys = args # min_max_scaler1 = preprocessing.MinMaxScaler() # min_max_scaler2 = preprocessing.MinMaxScaler() # min_max_scaler3 = preprocessing.MinMaxScaler() # min_max_scaler4 = preprocessing.MinMaxScaler() # x = min_max_scaler1.fit_transform(x) # xs = min_max_scaler2.fit_transform(xs) # y = min_max_scaler3.fit_transform(y) # ys = min_max_scaler4.fit_transform(ys) x, y, ys = args min_max_scaler1 = preprocessing.MinMaxScaler() min_max_scaler2 = preprocessing.MinMaxScaler() min_max_scaler3 = preprocessing.MinMaxScaler() x = min_max_scaler1.fit_transform(x) y = min_max_scaler2.fit_transform(y) ys = min_max_scaler3.fit_transform(ys) pca = PCA(n_components='mle') x_pca = pca.fit_transform(x) k1 = pyGPs.cov.RBF(np.log(p1), np.log(p2)) + pyGPs.cov.Noise(np.log(p3)) # STANDARD GP (prediction) m = pyGPs.mean.Linear(D=x_pca[0:600, :].shape[1]) + pyGPs.mean.Const() model = pyGPs.GPR() model.setData(x_pca[0:600, :], y) model.setPrior(mean=m, kernel=k1) # STANDARD GP (training) # model.optimize(x, y) ymu, ys2, fmu, fs2, lp = model.predict(x_pca[600:660, :]) ymu = min_max_scaler3.inverse_transform(ymu) ys = min_max_scaler3.inverse_transform(ys) rmse = pyGPs.Validation.valid.RMSE(ymu, ys) return rmse
def __init__(self, input, n_in, n_out, output): """ Initialize the parameters of the logistic regression :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ input = np.asarray(input) self.y = np.asarray(output) model = pyGPs.GPR() model.setData(input, self.y) model.optimize() ymu, ys2, fmu, fs2, lp = model.predict(input) self.lp = lp self.ymu = ymu
def __init__(self, winSize = 500, gpWinSize = 40,gpin_winSize =40 ,nu_cluster = 3, controlFeedback = 1, alpha = 7, beta = 2, sigma = 0.9, controllerDelay = 6 ): # online Learning parameters and variables self.__winSize = winSize self.__gpWinSize = gpWinSize self.__gpin_winSize = gpin_winSize ; self.__fileName = "data_init.csv" # system data self.__inData = [] self.__outData = [] self.__statData = [] self.__clusData = [] # unclontroled input forecast model self.__gpfX = [] self.__gpfY = [] self.__sysState = [] # Number of clusters self.__Nc = nu_cluster self.__onlineCluster = 1 # model status self.__modelInitilization = 0 # 0: initial, 1: learn and predict self.__modelTimeIdx = 0 self.__modelstate = 0 # model control output self.__controlOut = [] self.__controlFeedback = controlFeedback self.__alpha = alpha self.__beta = beta self.__sigma = sigma self.__controllerDelay = controllerDelay self.__controllerCounter = 0 # model objects # state models self.__GPs = [] # forecast model self.__gpFmdl = gp.GPR() # classifier model self.__myCluster = KMeans(n_clusters=self.__Nc ,init='random', random_state=0) # variables to store prediction data for evaluation and plotting: # forecasting workload variables self.__In_mean = [] self.__In_si = [] self.__In_pred_data = [] # classification variables self.__sysModes = [] # for output variables self.__out_mean =[] self.__out_si = [] self.__out_pred_data = [] # for i in self.__Nc: # self.__out_mean.append([]) # self.__out_si.append([]) # self.__out_pred_data.append([]) # other self.verbose = 1 # pass the offline data to initialize the model # input, output and model state indices in data indxIn = 0 # Input indices indxOut = 18 # Output indices indxS = [14,17] # State indices indxQ = [2,3, 4, 5, 6, 8, 12, 13] #clustering data indicies inData = 0 outData = 0 statData = [] clusData = []; with open(self.__fileName, 'rt') as dataFile: reader = csv.reader(dataFile, delimiter=',') for row in reader: inData = float(row[indxIn]) outData = float(row[indxOut]) statData = [float(row[i]) for i in indxS] clusData = [float(row[i]) for i in indxQ] self.__initModel(inData, statData, outData, clusData) if self.__modelInitilization: break
def __initModel(self, sysIn, sysStat, sysOut, sysClus): # buffer data untill the window size is reached self.__inData.append(sysIn) self.__outData.append(sysOut) self.__statData.append(sysStat) self.__clusData.append(sysClus) # Lean forecast model and system indx self.__gpfX.append(self.__modelTimeIdx) self.__gpfY.append(sysIn) while len(self.__gpfX) > self.__gpin_winSize: # delete oldest data self.__gpfX.pop(0) self.__gpfY.pop(0) self.__modelTimeIdx += 1 if len(self.__inData) >= self.__winSize: # Learn the models (align the data to input output format Y(k+1) = f(x(k),u(k+1))) self.__inData.pop(0) self.__outData.pop(0) # self.__statData.pop() # self.__clusterFeatures.pop() # classify the data clustersX = self.__myCluster.fit_predict(self.__clusData) if self.verbose: print ("training state-space models using GPs") for i in range(self.__Nc): gprX = [] gprY = [] for j in range(len(clustersX) - 1): if clustersX[j] == i: gprX.append([self.__inData[j]] + self.__statData[j]) gprY.append(self.__outData[j]) gprX = np.array(gprX) gprY = np.array(gprY) gpmdl = gp.GPR() m = gp.mean.Zero() RBF_hyp_init = [0.5] * (len(sysStat) + 2) k = gp.cov.RBFard(D=None, log_ell_list=RBF_hyp_init[:-1], log_sigma=RBF_hyp_init[-1]) gpmdl.setPrior(mean=m, kernel=k) if self.verbose: print ("training GP of mode: " + str(i)) # gpmdl.getPosterior(gprX,gprY) gpmdl.setNoise(log_sigma=np.log(0.8)) gpmdl.setOptimizer('Minimize') # ('Minimize'); gpmdl.optimize(gprX, gprY) # ,numIterations=100) self.__GPs.append(gpmdl) if self.verbose: print ("training forecast Model using GP") try: k_f = gp.cov.RBF(log_ell=1, log_sigma=1) self.__gpFmdl.setPrior(mean=gp.mean.Zero(), kernel=k_f) self.__gpFmdl.setNoise(log_sigma=np.log(0.8)) self.__gpFmdl.setOptimizer('BFGS') # ('Minimize'); self.__gpFmdl.optimize(np.array(self.__gpfX), np.array(self.__gpfY)) # ,numIterations=100) except: print('can quasi-newton it (forecast)') self.__gpFmdl = gp.GPR() k_f = gp.cov.RBF(log_ell=1, log_sigma=1) self.__gpFmdl.setPrior(mean=gp.mean.Zero(), kernel=k_f) self.__gpFmdl.setNoise(log_sigma=np.log(0.8)) self.__gpFmdl.setPrior(mean=gp.mean.Zero(), kernel=k_f) self.__gpFmdl.setOptimizer('Minimize') # ('Minimize'); self.__gpFmdl.optimize(np.array(self.__gpfX), np.array(self.__gpfY)) # , numIterations=100) self.__sysState = self.__statData[-1] self.__modelInitilization = 1
def __initModel(self, sysIn, sysStat, sysOut): self.__inData.append(sysIn) self.__outData.append(sysOut) self.__statData.append(sysStat) self.__featurewin.append(sysStat) while len(self.__featurewin) < self.__featureWinSize: self.__featurewin.append(sysStat) while len(self.__featurewin) > self.__featureWinSize: self.__featurewin.pop(0) curr_feature = self.__featureExtraction(self.__featurewin) # update feature data (slid the window) self.__clusterFeatures.append(curr_feature) # update forecast model and system indx # add the new data (slid the win) self.__gpfX.append(self.__modelTimeIdx) self.__gpfY.append(sysIn) while len(self.__gpfX) > self.__gpWinSize: # delete oldest data self.__gpfX.pop(0) self.__gpfY.pop(0) self.__modelTimeIdx += 1 if len(self.__clusterFeatures) >= self.__winSize: # Learn the models (align the data to input output format Y(k+1) = f(x(k),u(k+1))) self.__inData.pop(0) self.__outData.pop(0) # self.__statData.pop() # self.__clusterFeatures.pop() # classify the data clustersX = self.__myCluster.fit_predict(self.__clusterFeatures) if self.verbose: print("training state-space models using GPs") for i in range(self.__Nc): gprX = [] gprY = [] for j in range(len(clustersX) - 1): if clustersX[j] == i: gprX.append([self.__inData[j]] + self.__statData[j]) gprY.append(self.__outData[j]) gprX = np.array(gprX) gprY = np.array(gprY) gpmdl = gp.GPR() m = gp.mean.Zero() RBF_hyp_init = [0.5] * ( len(sysStat) + 2 ) # [13.9310228936928,2.54640381722411,0.177686434357263,12.5490563084955,162.467937309584,3.38074333489536] k = gp.cov.RBFard(D=None, log_ell_list=RBF_hyp_init[:-1], log_sigma=RBF_hyp_init[-1]) gpmdl.setPrior(mean=m, kernel=k) if self.verbose: print("training GP of mode: " + str(i)) # gpmdl.getPosterior(gprX,gprY) gpmdl.setNoise(log_sigma=np.log(0.8)) gpmdl.setOptimizer('Minimize') # ('Minimize'); gpmdl.optimize(gprX, gprY) # ,numIterations=100) self.__GPs.append(gpmdl) if self.verbose: print("training forecast Model using GP") try: k_f = gp.cov.RBF(log_ell=1, log_sigma=1) self.__gpFmdl.setPrior(mean=gp.mean.Zero(), kernel=k_f) self.__gpFmdl.setNoise(log_sigma=np.log(0.8)) self.__gpFmdl.setOptimizer('BFGS') # ('Minimize'); self.__gpFmdl.optimize(np.array(self.__gpfX), np.array( self.__gpfY)) # ,numIterations=100) except: print('can quasi-newton it (forecast)') self.__gpFmdl = gp.GPR() k_f = gp.cov.RBF(log_ell=1, log_sigma=1) self.__gpFmdl.setPrior(mean=gp.mean.Zero(), kernel=k_f) self.__gpFmdl.setNoise(log_sigma=np.log(0.8)) self.__gpFmdl.setPrior(mean=gp.mean.Zero(), kernel=k_f) self.__gpFmdl.setOptimizer('Minimize') # ('Minimize'); self.__gpFmdl.optimize( np.array(self.__gpfX), np.array(self.__gpfY)) # , numIterations=100) self.__sysState = self.__statData[-2] self.__modelInitilization = 1
# plt.plot(Ttrain,Ytrain,'r') Ttest = np.atleast_2d(range(nTtrain, nT)).T # plt.plot(Ttest,Ytest,'b', Ttest,predict, 'g.') # plt.show() # Baseline error #err = getError.getError(Ytest, predict, muTIM, S2TIM) # print 'NLL, MSE, MAE:' # print err[0], err[1], err[2] # Train GPTS covFunc = pyGPs.cov.RQ() + pyGPs.cov.Const() + pyGPs.cov.Noise() model = pyGPs.GPR() model.setPrior(kernel=covFunc) #model.setScalePrior([1.0, 1.0]) # Learn the hyperparameters on the training data model.setOptimizer("RTMinimize", 10) model.optimize(Ttrain, Ytrain) # Do the extrapolation logthetaGPTS = model.covfunc.hyp #(mu, sig2, df) = GPTSonline(Ytest, covFunc, logthetaGPTS,model.ScalePrior) # Plot the stuff plt.axis([0.0, 7.0, -4.0, 5.0]) plt.plot(Ttrain, Ytrain, 'r')
def __init__(self): return #def __init__(self, winSize = 600, gpWinSize = 50 ,featureWinSize=3 ,nu_cluster = 2, # controlFeedback = 1, alpha = 3, beta = 1, sigma = 0.9, controllerDelay = 5 ): # online Learning parameters and variables self.__winSize = winSize self.__gpWinSize = gpWinSize self.__featureWinSize = featureWinSize self.__modelInitilization = 0 # 0: not initialozed, 1: learn and predict self.offlineInit = 0 # 1: use an offline data to init the model, 0: init online by buffering data self.__fileName = "result04.csv" # load data self.__inData = [] self.__outData = [] self.__statData = [] self.__gpfX = [] self.__gpfY = [] self.__sysState = [] # Number of clusters self.__Nc = nu_cluster self.__clusterFeatures = [] self.__featurewin = [] self.__onlineCluster = 1 # model status self.__modelTimeIdx = 0 self.__modelstate = 0 # model control output self.__controlOut = [] self.__controlFeedback = controlFeedback self.__alpha = alpha self.__beta = beta self.__sigma = sigma self.__controllerDelay = controllerDelay self.__controllerCounter = 0 # model objects # state models self.__GPs = [] # forecast model self.__gpFmdl = gp.GPR() # classifier model self.__myCluster = KMeans(n_clusters=self.__Nc, init='random', random_state=0) # variables to store prediction data for evaluation and plotting: # forecasting workload variables self.__In_mean = [] self.__In_si = [] self.__In_pred_data = [] # classification variables self.__sysModes = [] # for output variables self.__out_mean = [] self.__out_si = [] self.__out_pred_data = [] # for i in self.__Nc: # self.__out_mean.append([]) # self.__out_si.append([]) # self.__out_pred_data.append([]) # other self.verbose = 0 if self.offlineInit: # pass the offline data to initialize the model # input, output and model state indices in data indxIn = 36 # range(0,9) # Input indices [1:9] indxOut = 38 # range(9,18) # Output indices [10:18] indxS = [18, 26, 27, 35] # range(18,36) # State indices [20:36] inData = 0 outData = 0 statData = [] with open(self.__fileName, 'rt') as dataFile: reader = csv.reader(dataFile, delimiter=',') for row in reader: inData = float(row[indxIn]) outData = float(row[indxOut]) statData = [float(row[i]) for i in indxS] self.__initModel(inData, statData, outData) if self.__modelInitilization: break
def visualize_video(self, features, version, label, _fgset, _colordp, param): groundtruth = np.load(self.param_path + '/v' + str(version) + '_' + self.GT) _trainX = np.concatenate(features[0:features.shape[0]:2]) _trainY = np.concatenate(groundtruth[0:groundtruth.size:2]) testX = features[1:features.shape[0]:2] testY = groundtruth[1:groundtruth.size:2] np.savetxt(self.res_path + '/feature_' + label + '.txt', np.hstack((_trainX, _trainY.reshape(-1, 1))), fmt='%d') print 'features.shape: ', features.shape, ', groundtruth.shape: ', groundtruth.shape print '_trainX.shape: ', _trainX.shape, ', _trainY.shape: ', _trainY.shape trainX, trainY = self.exclude_label(_trainX, _trainY, c=0) PYGPR = 'gpr_' + label KNR = 'knr_' + label if files.isExist(self.res_path, PYGPR): gprmodel = self.loadf(self.res_path, PYGPR) knrmodel = self.loadf(self.res_path, KNR) else: print 'Learning GPR model' gprmodel = pyGPs.GPR() gprmodel.getPosterior(trainX, trainY) gprmodel.optimize(trainX, trainY) self.savef(self.res_path, PYGPR, gprmodel) print 'Learning KNR model' knrmodel = knr(trainX, trainY) self.savef(self.res_path, KNR, knrmodel) print 'Learning both GPR and KNR model is DONE.' Y_pred = np.array([]) Y_sum_pred = [] Y_pred_frame = [] for x in testX: ym, ys2, fm, fs2, lp = gprmodel.predict(np.array(x)) Y_pred = np.hstack((Y_pred, ym.reshape(ym.size))) ym = ym.reshape(ym.size) Y_sum_pred.append(sum(ym)) Y_pred_frame.append(ym) Y_label = [] Y_sum_label = [] for y in testY: Y_label += y Y_sum_label.append(sum(y)) imgset = [] fgset = _fgset[1:len(_fgset) - 1] colordp = _colordp[1:len(_colordp) - 1] for i in range(len(fgset)): rect, cont = self.segmentation_blob(fgset[i], param) tmp = colordp[i].copy() pred = Y_pred_frame[i] gt = groundtruth[i] for j in range(len(rect)): r = rect[j] cv2.rectangle(tmp, (r[0], r[2]), (r[1], r[3]), tools.green, 1) msg_pred = 'Pred: ' + str(pred[j]) msg_gt = 'GT: ' + str(gt[j]) cv2.putText(tmp, msg_pred, (r[0], r[2]), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.0, tools.blue) cv2.putText(tmp, msg_gt, (r[0] + 10, r[2]), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.0, tools.red) imgset.append(tmp) images.display_img(imgset, 300)
def ANM_predict_causality(self, train_size=0.5, independence_criterion='HSIC', metric='linear'): ''' Prediction of causality based on the bivariate additive noise model Parameters ---------- independence_criterion : kruskal for Kruskal-Wallis H-test, HSIC for Hilbert-Schmidt Independence Criterion Returns ------- Causal-direction: 1 if X causes Y, or -1 if Y causes X ''' Xtrain, Xtest, Ytrain, Ytest = train_test_split(self.X, self.Y, train_size=train_size) #_gp = KernelRidge(kernel='rbf',degree=3)#GaussianProcess()# #Forward case #_gp.fit(Xtrain,Ytrain) #errors_forward = _gp.predict(Xtest) - Ytest _gp = pyGPs.GPR() _gp.getPosterior(Xtrain, Ytrain) _gp.optimize(Xtrain, Ytrain) ym, ys2, fm, fs2, lp = _gp.predict(Xtest) errors_forward = ym - Ytest #Backward case #_gp.fit(Ytrain,Xtrain) #errors_backward = _gp.predict(Ytest) - Xtest _gp = pyGPs.GPR() _gp.getPosterior(Ytrain, Xtrain) _gp.optimize(Ytrain, Xtrain) ym, ys2, fm, fs2, lp = _gp.predict(Ytest) errors_backward = ym - Xtest #Independence score forward_indep_pval = { 'kruskal': kruskal(errors_forward, Xtest)[1], 'HSIC': self.HilbertSchmidtNormIC(errors_forward, Xtest)[1] }[independence_criterion] backward_indep_pval = { 'kruskal': kruskal(errors_backward, Ytest)[1], 'HSIC': self.HilbertSchmidtNormIC(errors_backward, Ytest)[1] }[independence_criterion] #print 'Scores:', forward_indep_pval, backward_indep_pval #Warning it should be < if forward_indep_pval > backward_indep_pval: self.causal_direction = 1 self.pvalscore = forward_indep_pval else: self.causal_direction = -1 self.pvalscore = backward_indep_pval return { 'causal_direction': self.causal_direction, 'pvalscore': self.pvalscore, 'difways': abs(forward_indep_pval - backward_indep_pval) }
def ANM_causation_score(self, train_size=0.5, independence_criterion='HSIC', metric='linear', regression_method='GP'): ''' Measure how likely a given causal direction is true Parameters ---------- train_size : Fraction of given data used to training phase independence_criterion : kruskal for Kruskal-Wallis H-test, HSIC for Hilbert-Schmidt Independence Criterion metric : linear, sigmoid, rbf, poly kernel function to compute gramm matrix for HSIC gaussian kernel is used in : Nonlinear causal discovery with additive noise models Patrik O. Hoyer et. al Returns ------- causal_strength: A float between 0. and 1. ''' Xtrain, Xtest, Ytrain, Ytest = train_test_split(self.X, self.Y, train_size=train_size) if regression_method == 'GP': _gp = pyGPs.GPR() # specify model (GP regression) _gp.getPosterior( Xtrain, Ytrain) # fit default model (mean zero & rbf kernel) with data _gp.optimize( Xtrain, Ytrain ) # optimize hyperparamters (default optimizer: single run minimize) #Forward case #_gp = KernelRidge(kernel='sigmoid',degree=3) #_gp.fit(Xtrain,Ytrain) ym, ys2, fm, fs2, lp = _gp.predict(Xtest) #_gp.plot() #errors_forward = _gp.predict(Xtest) - Ytest errors_forward = ym - Ytest else: _gp = KernelRidge(kernel='sigmoid') _gp.fit(Xtrain, Ytrain) errors_forward = _gp.predict(Xtest) - Ytest #Independence score forward_indep_pval = { 'kruskal': kruskal(errors_forward, Xtest)[1], 'HSIC': self.HilbertSchmidtNormIC(errors_forward, Xtest, metric=metric)[1] }[independence_criterion] return {'causal_strength': forward_indep_pval}
temp = [trainingSet[i][5]] y5.append(temp) temp = [trainingSet[i][6]] y6.append(temp) x = np.array(X) y0 = np.array(y0) y1 = np.array(y1) y2 = np.array(y2) #m = pyGPs.mean.Zero() #k = pyGPs.cov.RBFard(log_ell_list=[0.05,0.17], log_sigma=1.) #model.setPrior(mean=m, kernel=k) #model.setNoise( log_sigma = np.log(0.1) ) model1 = pyGPs.GPR() # model model1.setData(x, y0) model2 = pyGPs.GPR() model2.setData(x, y1) model3 = pyGPs.GPR() model3.setData(x, y2) host = '' port = 8221 address = (host, port) server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_socket.bind(address) server_socket.listen(5)
def calculate_rmse_gp(vector_x, vector_y, weighted=True, plot=False, context=None, optimization_params=None, signed=False, sample=None): """Calculate the root mean squared error. :param vector_x: timestamps of the timeseries :param vector_y: valueSet of the timeseries :param weighted: weight RMSE wrt variance of prediction :param plot: plot the expected function :param context: (internal) :param optimization_params: :param signed: Add a sign to RMSE based on whether the prediction is on average higher or lower than the prediction :param sample: Learn from sample of the data (int for min number, float for fraction, list for inidices) :returns: list(idx,rmse), hyperparams, model """ if optimization_params is None: optimization_params = {} # setX = [preprocessing.scale(element )for element in vectorX] # setY = preprocessing.scale(vector_y, axis=1) vector_y_train = vector_y vector_x_train = vector_x if sample: if type(sample) == float: logger.debug("Sample series for training (ratio)") vector_y_train = [] vector_x_train = [] for idx in random.sample(range(len(vector_y)), k=int(len(vector_y) * sample)): vector_y_train.append(vector_y[idx]) vector_x_train.append(vector_x[idx]) elif type(sample) == int: logger.debug("Sample series for training (number)") if len(vector_y) <= sample: vector_y_train = vector_y vector_x_train = vector_x else: vector_y_train = [] vector_x_train = [] for idx in random.sample(range(len(vector_y)), k=sample): vector_y_train.append(vector_y[idx]) vector_x_train.append(vector_x[idx]) elif type(sample) == list: logger.debug("Sample series for training (indices)") vector_y_train = [] vector_x_train = [] for idx in sample: vector_y_train.append(vector_y[idx]) vector_x_train.append(vector_x[idx]) model = pyGPs.GPR() # specify model (GP regression) k = pyGPs.cov.Linear() + pyGPs.cov.RBF( ) # hyperparams will be set with optimizeHyperparameters method model.setPrior(kernel=k) hyperparams, model2 = gpe.optimizeHyperparameters( optimization_params.get("initialHyperParameters", [0.0000001, 0.0000001, 0.0000001]), model, vector_x_train, vector_y_train, bounds=optimization_params.get("bounds", [(None, 5), (None, 5), (None, 5)]), method=optimization_params.get("method", 'L-BFGS-B')) logger.info('Hyperparameters used: {}'.format(hyperparams)) # mean (y_pred) variance (ys2), latent mean (fmu) variance (fs2), log predictive prob (lp) y_pred, ys2, fm, fs2, lp = model2.predict(vector_x[0]) last_vector_x = vector_x[0] rmse_data = [] for i in range(len(vector_y)): if not np.all(np.equal(last_vector_x, vector_x[i])): logger.debug("Recomputing prediction") y_pred, ys2, fm, fs2, lp = model2.predict(vector_x[i]) last_vector_x = vector_x[i] if weighted: rmse = math.sqrt( mean_squared_error(vector_y[i], y_pred, (np.max(ys2) - ys2)) / np.max(ys2)) else: rmse = math.sqrt(mean_squared_error(vector_y[i], y_pred)) if signed: if np.mean(vector_y[i] - y_pred) < 0: rmse = -rmse rmse_data.append((i, rmse)) if plot: fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(14, 2)) xs = vector_x[0] ym = y_pred xss = np.reshape(xs, (xs.shape[0], )) ymm = np.reshape(ym, (ym.shape[0], )) ys22 = np.reshape(ys2, (ys2.shape[0], )) for i in vector_y: ax[0].plot(i, color='blue', alpha=0.2) ax[0].set_title("Node {}".format(context["cum_depth"])) ax[0].fill_between(xss, ymm + 3. * np.sqrt(ys22), ymm - 3. * np.sqrt(ys22), facecolor=[0.7539, 0.89453125, 0.62890625, 1.0], linewidth=0.5) ax[0].plot(xss, ym, color='red', label="Prediction") ax[0].legend() rmse_list = [t[1] for t in rmse_data] ax[1].hist(rmse_list, bins=100) ax[1].vlines(np.mean(rmse_list), 0, 2, color="red") ax[1].set_xlabel("RMSE") ax[1].set_ylabel("#") # plt.show(block=True) return rmse_data, hyperparams, model2
def __updateModel(self, sysIn, sysStat, sysOut,sysClus): # update the forecast model if self.verbose: print ("update forecast Model") # add the new data and delete the oldest (slid the win) gpfX = np.append(self.__gpFmdl.x, np.array([self.__modelTimeIdx]).reshape(-1, 1), axis=0) gpfY = np.append(self.__gpFmdl.y, np.array([sysIn]).reshape(-1, 1), axis=0) while gpfY.size > self.__gpin_winSize: # delete oldest data gpfX = np.delete(gpfX, 0, 0) gpfY = np.delete(gpfY, 0, 0) self.__modelTimeIdx += 1 # get the old hyp hyp_f = self.__gpFmdl.covfunc.hyp # relearn the model with the old hyp as a prior model try: self.__gpFmdl = gp.GPR() k_f = gp.cov.RBF(log_ell=hyp_f[0], log_sigma=hyp_f[1]) self.__gpFmdl.setPrior(mean=gp.mean.Zero(), kernel=k_f) self.__gpFmdl.setNoise(log_sigma=np.log(0.8)) self.__gpFmdl.setOptimizer('BFGS') self.__gpFmdl.optimize(gpfX, gpfY) except: print('cannot BFGS it, forecast') self.__gpFmdl = gp.GPR() k_f = gp.cov.RBF(log_ell=hyp_f[0], log_sigma=hyp_f[1]) self.__gpFmdl.setPrior(mean=gp.mean.Zero(), kernel=k_f) self.__gpFmdl.setNoise(log_sigma=np.log(0.8)) self.__gpFmdl.setOptimizer('Minimize') self.__gpFmdl.optimize(gpfX, gpfY) # Update cluster model if self.__onlineCluster: if self.verbose: print ("update cluster Model ...") self.__myCluster = KMeans(n_clusters=self.__Nc, init=self.__myCluster.cluster_centers_, random_state=0,n_init=1) # update the cluster data (slid the window) self.__clusData.append(sysClus) self.__clusData.pop(0) # update the clusterer self.__myCluster.fit(self.__clusData) # update system state GP model if self.verbose: print ("update system Models ...") # Estiamte discrete Mode predCluster = self.__myCluster.predict(np.array(sysClus).reshape(1, -1)) self.__modelstate = np.asscalar(predCluster[0]) self.__sysModes.append(self.__modelstate) # pull the model used for the last prediction gprMdl = self.__GPs[self.__modelstate] newgprX = np.array([sysIn] + self.__sysState).reshape(1, -1) gprX = np.append(gprMdl.x, newgprX, axis=0) gprY = np.append(gprMdl.y, np.array([sysOut]).reshape(1, -1), axis=0) # gprMdl.x = np.append(gprMdl.x, Xs, axis=0) # gprMdl.y = np.append(gprMdl.y, [outData[i]], axis=0) while gprY.size > self.__gpWinSize: gprX = np.delete(gprX, 0, 0) gprY = np.delete(gprY, 0, 0) hyp = gprMdl.covfunc.hyp gprMdl = gp.GPR() m = gp.mean.Zero() # k = gp.cov.SumOfKernel(gp.cov.RBFard(D=None, log_ell_list=hyp, log_sigma=1.),gp.cov.Noise(1)) k = gp.cov.RBFard(D=None, log_ell_list=hyp[:-1], log_sigma=hyp[-1]) gprMdl.setPrior(mean=m, kernel=k) # gprMdl.getPosterior(gprX,gprY) gprMdl.setNoise(log_sigma=np.log(0.81)) try: gprMdl.setOptimizer('BFGS') gprMdl.optimize(gprX, gprY) except: print('cannot BFGS it ') gprMdl = gp.GPR() m = gp.mean.Zero() # k = gp.cov.SumOfKernel(gp.cov.RBFard(D=None, log_ell_list=hyp, log_sigma=1.),gp.cov.Noise(1)) k = gp.cov.RBFard(D=None, log_ell_list=hyp[:-1], log_sigma=hyp[-1]) gprMdl.setPrior(mean=m, kernel=k) # gprMdl.getPosterior(gprX,gprY) gprMdl.setNoise(log_sigma=np.log(0.81)) gprMdl.setOptimizer('Minimize') gprMdl.optimize(gprX, gprY) self.__GPs[self.__modelstate] = gprMdl # Update system state self.__sysState = sysStat # save the data for Error calculation and prediction evaluation self.__In_pred_data.append(sysIn) self.__out_pred_data.append(sysOut)
def remove_confounds_fast(training_predictors, testing_predictors, training_data, testing_data, training_label, training_group_labels, normalisation, verbose) : # start by checking all inputs assert (np.shape(training_predictors)[0] == np.shape(training_data)[0]), 'Training predictors and training data must have same number of subjects' assert (np.shape(testing_predictors)[0] == np.shape(testing_data)[0]), 'Testing predictors and testing data must have same number of subjects' assert (np.shape(training_predictors)[1] == np.shape(testing_predictors)[1]), 'Training and testing predictors must have same number of variables' assert (np.shape(training_data)[1] == np.shape(testing_data)[1]), 'Training and testing data must have same number of variables' assert (len(training_group_labels) == np.shape(training_data)[0]), 'Training group labels must have length equal to number of training subjects' # initialise corrected data corrected_testing_data = np.zeros_like(testing_data) corrected_training_data = np.zeros_like(training_data) # normalise the training and testing predictors if normalisation : testing_predictors = testing_predictors - np.min(training_predictors, axis=0) training_predictors = training_predictors - np.min(training_predictors, axis=0) testing_predictors = testing_predictors.astype(float) / np.max(training_predictors, axis=0) training_predictors = training_predictors.astype(float) / np.max(training_predictors, axis=0) # do regression n_variables = np.shape(training_data)[1] # if training group label not equal to 0, just train on subjects with # the given label # first copy original training predictors and data for predictions testing_training_predictors = training_predictors testing_training_data = training_data if not training_label == 0 : training_predictors = training_predictors[training_group_labels == training_label, :] training_data = training_data[training_group_labels == training_label, :] # set up GP # calculate distance matrix of training predictors to initialise RBF dists = squareform(pdist(training_predictors)) # covariance is linear + RBF + noise # these all have built-in scale so no need to introduce extra hyps # set scale hyps to unity # set RBF length hyp to log of median dist k = pyGPs.cov.Linear(log_sigma=np.log(1.0)) + pyGPs.cov.RBF(log_ell=np.log(np.median(dists[:])), log_sigma=np.log(1.0)) + pyGPs.cov.Noise(log_sigma=np.log(1.0)) # zero mean m = pyGPs.mean.Zero() model = pyGPs.GPR() model.setPrior(mean=m, kernel=k) model.setNoise(log_sigma=np.log(np.std(training_data[:]))) # optimize the hyperparameters by maximizing log-likelihood over all # variables if verbose : print('Optimizing hyperparameters...') hyps_opt = minimize_Kostro.minimize_Kostro(model, training_predictors, training_data, 200) if verbose : print('Hyperparameters optimized!') # set GP with optimized hyperparameters # must convert arrays to list model.covfunc.hyp = list(hyps_opt[:-1]) model.setNoise(log_sigma=np.log(np.std(training_data[:]))) # loop through variables, removing the effects of confounds on each one for i in range(n_variables) : if (i % 1000) == 0 and verbose : print '%i features processed' % i # targets are the i'th column of features training_targets = training_data[:, i] # set training data model.setData(training_predictors, training_targets) # make predictions on training data ym, ys2, fm, fs2, lp = model.predict(testing_training_predictors) # store residuals corrected_training_data[:, i] = testing_training_data[:, i] - np.squeeze(ym) # make predictions on testing data ym, ys2, fm, fs2, lp = model.predict(testing_predictors) # store residuals corrected_testing_data[:, i] = testing_data[:, i] - np.squeeze(ym) return corrected_training_data, corrected_testing_data
t_training = t[points_training] w_training = workload[points_training] #w_training = w_training/max(w_training) tr_training = tr1[points_training] # Data for validation points_validation = np.arange(0,len(t),5) t_validation = t[points_validation] w_validation = workload[points_validation] tr_validation = tr1[points_validation] # Training of the GP model # Learning gp_system = gp.GPR() gp_system.setOptimizer("Minimize", num_restarts=10) gp_system.getPosterior(w_training, tr_training) gp_system.optimize(w_training, tr_training) #plt.figure() gp_system.predict(np.sort(w_validation)) #gp_system.plot() # Validation tr_predicted = np.zeros(len(t_validation)) for i in np.arange(len(t_validation)): gp_system.predict(np.array([w_validation[i]])) tr_predicted[i] = np.asscalar(gp_system.ym)
if __name__ == '__main__': data = sio.loadmat('airlinedata.mat') x = np.atleast_2d(data['xtrain']) y = np.atleast_2d(data['ytrain']) xt = np.atleast_2d(data['xtest']) yt = np.atleast_2d(data['ytest']) # To get interpolation too #xt = np.concatenate((x,xt)) #yt = np.concatenate((y,yt)) # Set some parameters Q = 10 model = pyGPs.GPR() # start from a new model # Specify non-default mean and covariance functions # @SEE doc_kernel_mean for documentation of all kernels/means m = pyGPs.mean.Zero() for _ in range(10): hyps = pyGPs.cov.initSMhypers(Q, x, y) k = pyGPs.cov.SM(Q, hyps) model.setPrior(kernel=k) # Noise std. deviation sn = 0.1 model.setNoise(log_sigma=np.log(sn)) # Instead of getPosterior(), which only fits data using given hyperparameters,
import matplotlib.pyplot as plt import numpy as np import pyGPs demoData = np.load('../../data/regression_data.npz') x = demoData['x'] y = demoData['y'] z = demoData['xstar'] model_full = pyGPs.GPR() model_full.getPosterior(x, y) model_full.optimize(x, y) model_full.predict(z) model_full.plot() # Training Error prediction_x = model_full.predict(x)[0] error_x = np.linalg.norm(prediction_x - y, 2) / np.linalg.norm(y, 2) print('Training Error: %e' % error_x) # Spectrum covariance = model_full.covfunc.getCovMatrix(x, x, mode='train') u, s, v = np.linalg.svd(covariance) x_axis = np.arange(1, covariance.shape[0] + 1) plt.plot(x_axis, s, '-r') plt.title('Spectrum for Full GP') plt.xlabel('Dimension') plt.ylabel('Singular Value') plt.show()