def test_linear_regression_gradient(self): estimator = linear.LinearRegression() d = 5 input_vals = {"x": np.random.randn(d)} outcome_vals = {"y": np.array(np.random.randn())} parameter_vals = { "w": np.random.randn(d), "b": np.array(np.random.randn()) } test_utils.test_ComputationGraphFunction(estimator.graph, input_vals, outcome_vals, parameter_vals) self.assertTrue(1 == 1)
def get_Q_values_linear_tree(self, currentObs, nextObs, action, home_reward, away_reward, qValue, home_identifier=None, smooth_flag=None, merge_count=None): # ops.reset_default_graph() sess = tf.Session() inst = C_UTree.Instance(-1, currentObs, action, nextObs, home_reward, away_reward) node = self.utree.getAbsInstanceLeaf(inst) LR = linear_regression.LinearRegression() LR.read_weights(weights=node.weight, bias=node.bias) LR.readout_linear_regression_model() sess.run(LR.init) temp = sess.run(LR.pred, feed_dict={LR.X: [inst.currentObs]}).tolist() LR.delete_para() sess.close() del LR del sess gc.collect() # print temp[0] # gc.collect() if smooth_flag: return_list = [] tolertion_level = 0.15 home_diff_abs = abs(qValue[0] - temp[0][0]) return_list.append( node.qValues_home[action] ) if home_diff_abs >= tolertion_level else return_list.append( temp[0][0]) away_diff_abs = abs(qValue[1] - temp[0][1]) return_list.append( node.qValues_away[action] ) if away_diff_abs >= tolertion_level else return_list.append( temp[0][1]) end_diff_abs = abs(qValue[2] - temp[0][2]) return_list.append( node.qValues_end[action] ) if end_diff_abs >= tolertion_level else return_list.append( temp[0][2]) if home_diff_abs >= tolertion_level or away_diff_abs >= tolertion_level or end_diff_abs >= tolertion_level: merge_count += 1 return return_list, merge_count else: return temp[0], merge_count
def get_action_linear_regression(observation, CUTreeAgent): Q_list = [] Q_number = [] for action_test in ACTION_LIST: sess = tf.Session() inst = C_UTree.Instance(-1, observation, action_test, observation, None, None) # leaf is located by the current observation node = CUTreeAgent.utree.getAbsInstanceLeaf(inst) LR = linear_regression.LinearRegression() LR.read_weights(weights=node.weight, bias=node.bias) LR.readout_linear_regression_model() sess.run(LR.init) temp = sess.run(LR.pred, feed_dict={LR.X: [inst.currentObs]}).tolist() Q_list.append(temp) Q_number.append(len(node.instances)) return ACTION_LIST[Q_list.index(max(Q_list))]
def data_test(): model = linear_regression.LinearRegression() training_data = read_data("data.csv") model.fit(training_data[0], training_data[1]) print(model.coefficients) print("Minimized RSS: ", end=" ") print(model.calculate_rss(training_data[0], training_data[1], True)) print("Squared RSE: ", end=" ") print(model.calculate_squared_rse()) print("99% Confidence Interval: ", end=" ") print(model.calculate_coefficient_ci(0.99)) print("R^2: ", end=" ") print(model.calculate_r2(training_data[0], training_data[1], True)) print("F-Statistic: ", end=" ") print(model.calculate_f_statistic(True)) print(model.calculate_tss(training_data[1])) print("T-statics for the coefficients: ", end=" ") print(model.calculate_t_statistic()) print("P-Values for the coefficients: ", end=" ") print(model.calculate_p_values(model.calculate_t_statistic())) print("Leverage Statistics for the training data: ") print(model.calculate_leverage_statistic()) print(model.calculate_vif_statistic())
def boost_tree_testing_performance(self, save_path, read_game_number, save_correlation_dir, save_mse_dir, save_mae_dir, save_rae_dir, save_rse_dir): print >> sys.stderr, 'starting from {0}'.format(read_game_number) self.utree = pickle.load( open( save_path + 'pickle_Game_File_' + str(read_game_number) + '.p', 'rb')) print >> sys.stderr, 'finishing read tree' game_directory = self.problem.games_directory game_testing_record_dict = {} game_to_print_list = range(301, 401) for game_number in game_to_print_list: game_record = self.read_csv_game_record( self.problem.games_directory + 'record_cartpole_transition_game{0}.csv'.format( int(game_number))) event_number = len(game_record) for index in range(0, event_number): transition = game_record[index] currentObs = transition.get('observation').split('$') nextObs = transition.get('newObservation').split('$') reward = float(transition.get('reward')) action = float(transition.get('action')) qValue = float(transition.get('qValue')) inst = C_UTree.Instance(-1, currentObs, action, nextObs, reward, None) node = self.utree.getAbsInstanceLeaf(inst) if game_testing_record_dict.get(node) is None: game_testing_record_dict.update( {node: np.array([[currentObs, qValue, action]])}) else: node_record = game_testing_record_dict.get(node) node_record = np.concatenate( (node_record, [[currentObs, qValue, action]]), axis=0) game_testing_record_dict.update({node: node_record}) all_q_values_record = { 'output_q': [], 'test_q': [], 'oracle_q': [], 'merge_q': [] } for node in game_testing_record_dict.keys(): # print node.idx node_record = game_testing_record_dict.get(node) currentObs_node = node_record[:, 0] qValues_node = node_record[:, 1] actions = node_record[:, 2] test_q = all_q_values_record.get('test_q') test_append_q = [qValues_list for qValues_list in qValues_node] test_q += test_append_q sess = tf.Session() LR = linear_regression.LinearRegression() LR.read_weights(weights=node.weight, bias=node.bias) LR.readout_linear_regression_model() sess.run(LR.init) qValues_output = sess.run(LR.pred, feed_dict={ LR.X: currentObs_node.tolist() }).tolist() output_q = all_q_values_record.get('output_q') output_append_q = [ qValues_list[0] for qValues_list in qValues_output ] output_q += output_append_q oracle_q = all_q_values_record.get('oracle_q') oracle_append_q = [node.qValues[int(action)] for action in actions] oracle_q += oracle_append_q merge_q = all_q_values_record.get('merge_q') merge_append_q = self.merge_oracle_linear_q( test_append_q, output_append_q, oracle_append_q) merge_q += merge_append_q # self.compute_mse(all_q_values_record, save_mse_dir) # self.compute_mae(all_q_values_record, save_mae_dir) # self.compute_correlation(all_q_values_record, save_correlation_dir) # self.compute_mse(all_q_values_record, save_mse_dir) self.compute_rae(all_q_values_record, save_rae_dir) self.compute_rse(all_q_values_record, save_rse_dir)
def __init__(self, rng, input, n_in, n_hidden, n_out, final_layer='sigmoid'): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh) if final_layer == 'sigmoid': import logistic_regression # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = logistic_regression.LogisticRegressionCrossEnt( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) elif final_layer == 'tanh': import logistic_regression_tanh # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = logistic_regression_tanh.LogisticRegressionCrossEnt( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) else: import linear_regression # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = linear_regression.LinearRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \ + (self.logRegressionLayer.W ** 2).sum() # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood self.euclidean_loss = self.logRegressionLayer.euclidean_loss # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params
## Step 3: Perform PCA. pca = PCA(n_components=150).fit(x_train) plt.plot(np.cumsum(pca.explained_variance_ratio_)) plt.xlabel('number of components') plt.ylabel('cumulative explained variance') plt.show() show_eigenfaces(pca) ## Step 4: Project Training data to PCA print("Projecting the input data on the eigenfaces orthonormal basis") Xtrain_pca = pca.transform(x_train) Xtest_pca = pca.transform(x_test) our_regressor = lr.LinearRegression(Xtrain_pca, y_train).fit() # sklearn_regressor = LinearRegression().fit(Xtrain_pca, y_train) our_train_accuracy = our_regressor.score() print("train accuracy ....", our_train_accuracy) # Linear Regression model class LinearRegression(): def __init__(self, X, y, alpha=0.03, n_iter=1500): self.alpha = alpha self.n_iter = n_iter self.n_samples = len(y) self.n_features = np.size(X, 1) self.X = np.hstack((np.ones(
def generate_linear_b_u_tree_one_way_decision(input_all): game_testing_record_dict = {} train_game_number = 200 ice_hockey_problem = Problem_moutaincar.MoutainCar() CUTreeAgent = Agent.CUTreeAgent(problem=ice_hockey_problem, max_hist=3000, check_fringe_freq=1200, is_episodic=0, training_mode='_linear_epoch_decay_lr') CUTreeAgent.read_Utree( game_number=train_game_number, save_path= '/Local-Scratch/UTree model/mountaincar/model_boost_linear_qsplit_noabs_save_linear_epoch_decay_lr/' ) index_number = 0 for input in input_all: # for input in input_positions: inst_aleft = C_UTree_boost_Galen.Instance( -1, input, 0, input, None, None) # next observation is not important inst_amiddle = C_UTree_boost_Galen.Instance(-1, input, 1, input, None, None) inst_aright = C_UTree_boost_Galen.Instance(-1, input, 2, input, None, None) node_aleft = CUTreeAgent.utree.getAbsInstanceLeaf(inst_aleft) node_amiddle = CUTreeAgent.utree.getAbsInstanceLeaf(inst_amiddle) node_aright = CUTreeAgent.utree.getAbsInstanceLeaf(inst_aright) if game_testing_record_dict.get(node_aleft) is None: game_testing_record_dict.update( {node_aleft: np.array([[input, 0, index_number]])}) else: node_record = game_testing_record_dict.get(node_aleft) node_record = np.concatenate( (node_record, [[input, 0, index_number]]), axis=0) game_testing_record_dict.update({node_aleft: node_record}) if game_testing_record_dict.get(node_amiddle) is None: game_testing_record_dict.update( {node_amiddle: np.array([[input, 1, index_number]])}) else: node_record = game_testing_record_dict.get(node_amiddle) node_record = np.concatenate( (node_record, [[input, 1, index_number]]), axis=0) game_testing_record_dict.update({node_amiddle: node_record}) if game_testing_record_dict.get(node_aright) is None: game_testing_record_dict.update( {node_aright: np.array([[input, 2, index_number]])}) else: node_record = game_testing_record_dict.get(node_aright) node_record = np.concatenate( (node_record, [[input, 2, index_number]]), axis=0) game_testing_record_dict.update({node_aright: node_record}) index_number += 1 index_qvalue_record = {} for node in game_testing_record_dict.keys(): node_record = game_testing_record_dict.get(node) currentObs_node = node_record[:, 0] actions = node_record[:, 1] index_numbers = node_record[:, 2] # for i in range(0, len(index_numbers)): # min_mse = 999999 # # currentObs = currentObs_node[i] # for instance in node.instances: # instance_observation = instance.currentObs # mse = ((np.asarray(currentObs) - np.asarray(instance_observation)) ** 2).mean() # if mse < min_mse: # min_mse = mse # Q_value = instance.qValue # # if index_qvalue_record.get(index_numbers[i]) is not None: # index_record_dict = index_qvalue_record.get(index_numbers[i]) # index_record_dict.update({actions[i]: Q_value}) # else: # index_qvalue_record.update({index_numbers[i]: {actions[i]: Q_value}}) sess = tf.Session() LR = linear_regression.LinearRegression() LR.read_weights(weights=node.weight, bias=node.bias) LR.readout_linear_regression_model() sess.run(LR.init) qValues_output = sess.run(LR.pred, feed_dict={LR.X: currentObs_node.tolist()}) for i in range(0, len(index_numbers)): if index_qvalue_record.get(index_numbers[i]) is not None: index_record_dict = index_qvalue_record.get(index_numbers[i]) index_record_dict.update({actions[i]: qValues_output[i]}) else: index_qvalue_record.update( {index_numbers[i]: { actions[i]: qValues_output[i] }}) length = len(input_all) decision_all = [] for i in index_qvalue_record: index_record_dict = index_qvalue_record.get(i) q_left = index_record_dict.get(0) q_middle = index_record_dict.get(1) q_right = index_record_dict.get(2) qValues = [q_left[0], q_middle[0], q_right[0]] max_action = qValues.index(max(qValues)) decision_all.append(qValues) return decision_all
def train_linear_regression_on_leaves(self, node): leaves_number = 0 if node.nodeType != NodeLeaf: for child in node.children: leaves_number += self.train_linear_regression_on_leaves(child) return leaves_number else: train_x = [] train_y = [] # before = defaultdict(int) # after = defaultdict(int) # for i in gc.get_objects(): # before[type(i)] += 1 for instance in node.instances: train_x.append(instance.currentObs) train_y.append([instance.qValue]) if len(train_x) != 0 and len(train_y) != 0: sess = tf.InteractiveSession(config=config) if self.training_mode == '_epoch_linear': training_epochs = len(node.instances) # if self.game_number > 50: # training_epochs = training_epochs * 5 LR = linear_regression.LinearRegression( training_epochs=training_epochs) elif self.training_mode == '_linear_epoch_decay_lr': node.update_times += 1 times = node.update_times lr = 0.05 * float(1) / (1 + 0.0225 * times) * math.pow( 0.977, len(node.instances) / 30) # lr = 0.05*math.pow(0.02, float(len(node.instances))/float(self.max_hist)) training_epochs = len( node.instances) if len(node.instances) > 50 else 50 # if self.game_number > 50: # training_epochs = training_epochs * 5 training_epochs = training_epochs * 3 # lr = float(lr) / 5 LR = linear_regression.LinearRegression( training_epochs=training_epochs, learning_rate=lr) elif len(self.training_mode) == 0: LR = linear_regression.LinearRegression() else: raise ValueError("undefined training mode") if node.weight is None or node.bias is None: LR.read_weights() else: LR.read_weights(node.weight, node.bias) LR.linear_regression_model() trained_weights, trained_bias = LR.gradient_descent( sess=sess, train_X=train_x, train_Y=train_y) print >> sys.stderr, 'node index is {0}'.format(node.idx) LR.delete_para() node.weight = None node.bias = None node.weight = trained_weights node.bias = trained_bias trained_weights = None trained_bias = None train_x = None train_y = None del LR sess.close() del sess gc.collect() # for i in gc.get_objects(): # after[type(i)] += 1 # for k in after: # if after[k] - before[k]: # print (k, after[k] - before[k]) return 1
#! /usr/bin/python3 import numpy as np from sklearn import datasets import matplotlib.pyplot as plot import linear_regression boston = datasets.load_boston() X = boston.data Y = boston.target X = X[Y < 50] Y = Y[Y < 50] import train_test_split X_train, X_test, Y_train, Y_test = train_test_split.split(X, Y) reg = linear_regression.LinearRegression() reg.fit_normal(X_train, Y_train) Y_predict = reg.predict(X_test) print(reg.theta_) print(reg.score(Y_test, Y_predict))
import numpy as np import os from sklearn.linear_model import LinearRegression from data.data_generators import gen_sinusoidal from data.data_processing import create_design_matrix import linear_regression x_mat, y = gen_sinusoidal(10, seed=12) print(y.flatten()) x_design = create_design_matrix(2, x_mat) print(x_design.flatten()) model = LinearRegression() model.fit(x_design, y) print(model.intercept_) print(model.coef_) print("My model") model2 = linear_regression.LinearRegression() model2.fit(x_design, y) print(model2.weights_)
import linear_regression, partitionSetTest import sys xAxis = [] yAxis1 = [] yAxis2 = [] for i in range(1, 7): xAxis.append(i * 1000) yAxis1.append( linear_regression.LinearRegression(sys.argv[1], i * 1000, 1000.0)) #yAxis2.append(partitionSetTest.BoostingTest(sys.argv[1],i*1000,sys.argv[2])) print(xAxis, yAxis1)
def main(): user_avg_rating, moive_avg_rating, all_avg = preprocess.GetAvgRatingMap() user = preprocess.readUser() movie = preprocess.readMovie() train_data = preprocess.readTrain() test_data = preprocess.readTest() X = [] y = [] for line in train_data: uid, mid, rating = line user_feature = copy.deepcopy(user[uid]) if uid in user_avg_rating: user_feature.append(user_avg_rating[uid]) else: user_feature.append(all_avg) movie_feature = copy.deepcopy(movie[mid]) if mid in moive_avg_rating: movie_feature.append(moive_avg_rating[mid]) else: movie_feature.append(all_avg) features = GetFeature(user_feature, movie_feature) X.append(features) y.append(float(rating)) print 'Begin training model:' model = linear_regression.LinearRegression() # model = LinearRegression() model.fit(X, y) print model.theta_ print model.intercept_ print 'End training model.' test_X = [] test_y = [] for line in test_data: tid, uid, mid = line user_feature = copy.deepcopy(user[uid]) if uid in user_avg_rating: user_feature.append(user_avg_rating[uid]) else: user_feature.append(all_avg) movie_feature = copy.deepcopy(movie[mid]) if mid in moive_avg_rating: movie_feature.append(moive_avg_rating[mid]) else: movie_feature.append(all_avg) features = GetFeature(user_feature, movie_feature) y = model.predict(np.array([features])) test_y.append((tid, int(round(y[0])))) with open("../out/submit.txt", "w") as f: f.write("Id,rating\n") for item in test_y: f.write(str(item[0]) + "," + str(item[1]) + "\n")
ncols=5) # 縦横の数を設定し、そのfigインスタンスとaxesインスタンスを作成。 for i in range(5): axes[0, i].set_xlim([xmin, xmax]) axes[0, i].set_ylim([ymin, ymax]) axes[1, i].set_ylim([ymin, ymax]) axes[1, i].set_ylim([ymin, ymax]) # 徐々にサンプル数を増やしたいため、iの値によってデータをスプリット xx = x[:2 + i * 2] yy = y[:2 + i * 2] # 一行目、二行目ともに同じデータを散文図で設定。 axes[0, i].scatter(xx, yy, color="k") axes[1, i].scatter(xx, yy, color="k") # 普通の線形回帰 model = linearreg.LinearRegression() model.fit(xx, yy) # 線形の図の始端、終端を定義 xs = [xmin, xmax] ys = [model.w_[0] + model.w_[1] * xmin, model.w_[0] + model.w_[1] * xmax] # 図示するため0行目のi列に代入 axes[0, i].plot(xs, ys, color="k") # リッジ回帰 model = ridge.RidgeRegression(lambda_=10.) model.fit(xx, yy) xs = [xmin, xmax] ys = [model.w_[0] + model.w_[1] * xmin, model.w_[0] + model.w_[1] * xmax] axes[1, i].plot(xs, ys, color="k") plt.show()