class NeuralAgent(object): def __init__(self, env, actualQ): """ Args: env: an environment """ n_hidden = 10 self.MLP = MLP(n_hidden, env.n_action) self.model = Regressor(self.MLP, lossfun=F.squared_error, accfun=None) self.env = env self.Q = np.zeros( (env.n_action**env.n_input, env.n_action)).astype(np.float32) self.actualQ = actualQ self.optimizer = optimizers.SGD() self.optimizer.setup(self.model) def compute_loss(self, y, t): """ We define the loss as the sum of the squared error between the actual and predicted Q values :param y: predicted Q-values :param t: actual Q-values :return: loss """ return sum(np.square(np.subtract(y, t))) def act(self, observation): """ Act based on observation and train agent on cumulated reward (return) :param observation: new observation :param reward: reward gained from previous action; None indicates no reward because of initial state :return: action """ x = self.model.predictor(observation).data action = np.argmax(x) return action def train(self, a, old_obs, r, new_obs): """ :param a: action :param old_obs: old observation :param r: reward :param new_obs: new observation :return: """ newQ = self.model.predictor(old_obs).data _old_obs = self.env.asint(old_obs) self.Q[_old_obs, 0] = newQ[0, 0] self.Q[_old_obs, 1] = newQ[0, 1] self.optimizer.update(self.model.lossfun, self.Q[_old_obs, :], self.actualQ[_old_obs, :])
def __init__(self, env, actualQ): """ Args: env: an environment """ n_hidden = 10 self.MLP = MLP(n_hidden, env.n_action) self.model = Regressor(self.MLP, lossfun=F.squared_error, accfun=None) self.env = env self.Q = np.zeros( (env.n_action**env.n_input, env.n_action)).astype(np.float32) self.actualQ = actualQ self.optimizer = optimizers.SGD() self.optimizer.setup(self.model)
def __init__(self, input_dim, embed_dim, output_dim, data): super(Model, self).__init__() self.input_dim = input_dim self.embed_dim = embed_dim self.output_dim = output_dim self.num_layer = 5 self.embedding = nn.Conv1d(input_dim, embed_dim, 1) self.filter_convs = nn.ModuleList() self.gate_convs = nn.ModuleList() self.residuals = nn.ModuleList() #self.residuals_bns = nn.ModuleList() self.skip = nn.ModuleList() self.skip.append(nn.Conv1d(embed_dim, embed_dim, 1)) self.batch_norm = False self.data = data if self.batch_norm: self.filter_conv_bns = nn.ModuleList() self.gate_conv_bns = nn.ModuleList() self.residual_bns = nn.ModuleList() self.skip_bns = nn.ModuleList() self.skip_bns.append(nn.BatchNorm1d(embed_dim)) dilate = 1 self.kernel_size = 2 for i in range(self.num_layer): self.filter_convs.append( nn.Conv1d(embed_dim, embed_dim, self.kernel_size, dilation=dilate)) self.gate_convs.append( nn.Conv1d(embed_dim, embed_dim, self.kernel_size, dilation=dilate)) self.residuals.append(nn.Conv1d(embed_dim, embed_dim, 1)) self.skip.append(nn.Conv1d(embed_dim, embed_dim, 1)) if self.batch_norm: self.filter_conv_bns.append(nn.BatchNorm1d(embed_dim)) self.gate_conv_bns.append(nn.BatchNorm1d(embed_dim)) self.residual_bns.append(nn.BatchNorm1d(embed_dim)) self.skip_bns.append(nn.BatchNorm1d(embed_dim)) dilate *= 2 self.final1 = nn.Conv1d(embed_dim, embed_dim, 1) self.final2 = nn.Conv1d(embed_dim, embed_dim, 1) if self.batch_norm: self.final1_bn = nn.BatchNorm1d(embed_dim) self.final2_bn = nn.BatchNorm1d(embed_dim) #self.loss = 'Gaussian'; self.loss = 'mul-Gaussian@20' self.regressor = Regressor(self.loss, embed_dim, input_dim) self.dropout = nn.Dropout(0.)
def __init__(self, input_dim, embed_dim, output_dim, data=None): super(Model, self).__init__() self.input_dim = input_dim self.embed_dim = embed_dim self.output_dim = output_dim self.embedding1 = nn.Linear(input_dim, embed_dim) self.embedding2 = nn.Linear(embed_dim, embed_dim) self.rnn = nn.LSTM(embed_dim, output_dim) self.loss = 'mul-Gaussian@20' self.regressor = Regressor(self.loss, embed_dim, input_dim) self.final1 = nn.Linear(output_dim, embed_dim) self.final2 = nn.Linear(embed_dim, embed_dim) self.dropout = nn.Dropout(0.)
def get_model(model_name): """ Load the named model if it exists, otherwise train it :param model_name: Name of the model :return: Network, Regressor, Optimizer and results """ try: pickle_in = open("{}_rnn.pickle".format(model_name), 'rb') rnn = pickle.load(pickle_in) pickle_in = open("{}_model.pickle".format(model_name), 'rb') model = pickle.load(pickle_in) pickle_in = open("{}_optimizer.pickle".format(model_name), 'rb') optimizer = pickle.load(pickle_in) pickle_in = open("{}_results.pickle".format(model_name), 'rb') results = pickle.load(pickle_in) tqdm.write("Model '{}' Loaded!".format(model_name)) except FileNotFoundError: rnn = RNN(n_hidden=hidden_units) model = Regressor(rnn, accfun=compute_accuracy, lossfun=compute_loss) # Set up the optimizer optimizer = optimizers.SGD() optimizer.setup(model) tqdm.write("Model not found! Starting training ...") results = train_network(train_iter, rnn, model, optimizer) with open('{}_rnn.pickle'.format(model_name), 'wb') as f: pickle.dump(rnn, f) with open('{}_model.pickle'.format(model_name), 'wb') as f: pickle.dump(model, f) with open('{}_optimizer.pickle'.format(model_name), 'wb') as f: pickle.dump(optimizer, f) with open('{}_results.pickle'.format(model_name), 'wb') as f: pickle.dump(results, f) # Plot the training and test loss as a function of epochs plt.plot(results[0], label='train loss') plt.plot(results[1], label='test loss') plt.legend() plt.xlabel("epoch") plt.ylabel("loss") plt.show() return rnn, model, optimizer, results
def __init__(self, epochs=1000, l_rate=0.001) -> None: self.epochs: int = epochs self.l_rate: float = l_rate self.estimator = Regressor(0, 0)
class LinearRegression: '''Class for training data and graphing results''' def __init__(self, epochs=1000, l_rate=0.001) -> None: self.epochs: int = epochs self.l_rate: float = l_rate self.estimator = Regressor(0, 0) def graph(self, xg, yg, dots: np.ndarray, c='b', title='Graph'): '''Visualizing scattered dots and xg and yg as a line (prediction)''' plt.plot(xg, yg) plt.xlabel('mileage') plt.ylabel('estimated price') plt.xlim(xg.min() - 1, xg.max() + 1) plt.scatter(dots[:, 0], dots[:, 1], marker='x') plt.title(title) plt.show() return def loss_function(self, data: np.ndarray): m = data.shape[0] return sum([(self.estimator.predict(data[j][0]) - data[j][1])**2 for j in range(m)]) def run_epoch(self, data: np.ndarray): m = data.shape[0] tmp0 = self.l_rate * sum([ self.estimator.predict(data[j][0]) - data[j][1] for j in range(m) ]) / m tmp1 = self.l_rate * sum( [(self.estimator.predict(data[j][0]) - data[j][1]) * data[j][0] for j in range(m)]) / m self.estimator.weights_update(tmp0, tmp1) return def animated_training(self, data: np.ndarray, df: pd.DataFrame, mus, sigmas): fig, ax = plt.subplots(figsize=(16, 9), dpi=70) def animate(epoch: int): self.run_epoch(data) ax.clear() plt.title(f'epoch = {epoch}') ax.set_xlabel('km') ax.set_ylabel('price') ax.set_xlim(data.min(axis=0)[0] - 1, data.max(axis=0)[0] + 1) ax.set_ylim(-4, 4) x = np.linspace(start=data.min(axis=0)[0] - 1, stop=data.max(axis=0)[0] + 1, num=100) y = self.estimator.predict(x) line = plt.plot(x, y, label='prediction') plt.scatter(data[:, 0], data[:, 1], label='raw data', marker='x') plt.legend() return line, ani = animation.FuncAnimation(fig, animate, frames=self.epochs, interval=10, blit=False) plt.show() for epoch in range(self.epochs): self.run_epoch(data) scaled_x = np.linspace(start=data.min(axis=0)[0] - 1, stop=data.max(axis=0)[0] + 1, num=100) self.graph(scaled_x, self.estimator.predict(scaled_x), data, 'k', f'Scaled data ({self.epochs})') x_lin = np.linspace(start=df.min(axis=0)[0] - 1, stop=df.max(axis=0)[0] + 1, num=100) y_lin = self.estimator.predict(scaled_x) * sigmas[1] + mus[1] self.graph(x_lin, y_lin, (np.matrix([df.km, df.price]).T).A, 'b', 'Resulting unscaled prediction') return def train(self, df: pd.DataFrame, plot=True): np.random.seed(7171) scaled_data, mus, sigmas = Scaler.rescale(df) self.estimator.set_scaling_parameters(mus, sigmas) data = (np.matrix([scaled_data.km, scaled_data.price]).T).A if not plot: for epoch in range(self.epochs): self.run_epoch(data) else: self.animated_training(data, df, mus, sigmas) pickle.dump(self.estimator, open('weights.sav', 'wb')) print(f'Resulting loss function: {self.loss_function(data)}') return
#!/usr/bin/env python2 # -*- coding: utf-8 -*- from DataManager import DataManager from Regressor import Regressor from sklearn.metrics import accuracy_score #from sklearn.model_selection import cross_val_score input_dir = "../public_data" output_dir = "../res" basename = 'movierec' D = DataManager(basename, input_dir) # Load data print D myRegressor = Regressor() # Train Ytrue_tr = D.data['Y_train'] myRegressor.fit(D.data['X_train'], Ytrue_tr) # Making predictions Ypred_tr = myRegressor.predict(D.data['X_train']) Ypred_va = myRegressor.predict(D.data['X_valid']) Ypred_te = myRegressor.predict(D.data['X_test']) # We can compute the training success rate acc_tr = accuracy_score(Ytrue_tr, Ypred_tr) # But it might be optimistic compared to the validation and test accuracy # that we cannot compute (except by making submissions to Codalab) # So, we can use cross-validation:
def test(n_data, benchmark, violated_const_ratio): model_1_mae, model_2_mae, model_3_mae = [], [], [] model_1_violated_const, model_2_violated_const, model_3_violated_const = [], [], [] use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") train_split = 0.5 test_split = 0.4 val_split = 0.1 for seed in seeds: params = { 'epochs': 150, 'n_data': n_data, 'batch_size': 256, 'violated_const_ratio': violated_const_ratio, # this is used to create a trainig set with a specific # amount of contraint violations 'benchmark': benchmark, 'split': [train_split, test_split, val_split], 'seed': seed } # the 3 approaches are tested on the same sample of data d_train = Dataset(params, 'train', device) d_test = Dataset(params, 'test', device) d_val = Dataset(params, 'valid', device) # regressor model_1 = Regressor(params, d_train, d_test, d_val) model_1.train() tmp = model_1.test() model_1_mae.append(tmp[0]) model_1_violated_const.append(tmp[1]) # regularization with single multiplier model_2 = SBRregressor(params, d_train, d_test, d_val) model_2.train() tmp = model_2.test() model_2_mae.append(tmp[0]) model_2_violated_const.append(tmp[1]) # regularization with a multiplier for each constraint model_3 = SBRregressor2(params, d_train, d_test, d_val) model_3.train() tmp = model_3.test() model_3_mae.append(tmp[0]) model_3_violated_const.append(tmp[1]) mae = list(zip(model_1_mae, model_2_mae, model_3_mae)) violated_const = list( zip(model_1_violated_const, model_2_violated_const, model_3_violated_const)) base_filename = str(benchmark) +\ "_tr" + str(int(n_data * train_split)) +\ "_ts" + str(int(n_data * test_split)) +\ "_v" + str(int(n_data * val_split)) +\ "_vconst" + str(violated_const_ratio) store(base_filename, mae, violated_const)
import numpy as np import pandas as pd from Regressor import Regressor from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder data = pd.read_csv('diamonds.csv') data.drop(['Unnamed: 0'], axis=1, inplace=True) data = data[(data[['x', 'y','z']] != 0).all(axis=1)] encoder = LabelEncoder() data['cut'] = encoder.fit_transform(data['cut']) data['color'] = encoder.fit_transform(data['color']) data['clarity'] = encoder.fit_transform(data['clarity']) y = data['price'] data = data.drop(['price'], axis=1) X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=66) reg = Regressor('KNN') reg.fit(X_train, y_train) print(reg.score(X_test, y_test))
def __init__(self, input_dim, embed_dim, z_dim, data): super(Model, self).__init__() self.input_dim = input_dim self.embed_dim = embed_dim self.output_dim = embed_dim output_dim = embed_dim self.mlp_dim = z_dim mlp_dim = z_dim self.num_layer = 4 self.pre_layer = 0 self.z_dim = z_dim // self.num_layer self.embedding = nn.Conv1d(input_dim, embed_dim, 1) self.batch_norm = False self.data = data self.fwds = nn.ModuleList() self.skip_gates = nn.ModuleList() self.forward_gates = nn.ModuleList() self.inference_gates = nn.ModuleList() self.backward = nn.ModuleList() self.bwd_gates = nn.ModuleList() self.pri_gates = nn.ModuleList() dilate = 1 self.fwds_first = nn.ModuleList() self.skip0 = Gates(embed_dim, output_dim, mlp_dim) self.skip_first = nn.ModuleList() for i in range(self.pre_layer): self.fwds_first.append( WaveNetGate(embed_dim, embed_dim, dilate, batch_norm=True)) self.skip_first.append(Gates(embed_dim, output_dim, mlp_dim)) dilate *= 2 for i in range(self.num_layer): self.fwds.append( WaveNetGate(embed_dim, embed_dim, dilate, batch_norm=True)) self.backward.append( WaveNetGate(embed_dim, embed_dim, dilate, batch_norm=True)) self.pri_gates.append(Gates(embed_dim, z_dim * 2, mlp_dim)) self.forward_gates.append( Gates(embed_dim + z_dim, embed_dim, mlp_dim)) self.inference_gates.append( Gates(embed_dim * 2, z_dim * 2, mlp_dim)) self.skip_gates.append( Gates(embed_dim + z_dim, output_dim, mlp_dim)) self.bwd_gates.append(Gates(embed_dim * 2, embed_dim, mlp_dim)) dilate *= 2 self.final_dilate = dilate // 2 self.final1 = nn.Conv1d(output_dim, output_dim, 1) self.final2 = nn.Conv1d(output_dim, output_dim, 1) #self.bwd_fc1 = nn.Conv1d(embed_dim, output_dim, 1); #self.bwd_fc2 = nn.Conv1d(output_dim, output_dim, 1); if self.batch_norm: self.final1_bn = nn.BatchNorm1d(output_dim) self.final2_bn = nn.BatchNorm1d(output_dim) self.loss = 'Gaussian' self.regressor = Regressor(self.loss, output_dim, input_dim) self.dropout = nn.Dropout(0.)
if __name__ == "__main__": epochs = 10 train = create_data(n=400) test = create_data(n=400) train_iter = iterators.SerialIterator(train, batch_size=100, shuffle=False) test_iter = iterators.SerialIterator(test, batch_size=100, repeat=False, shuffle=False) rnn = RNN(n_hidden=50) model = Regressor(predictor=rnn, lossfun=compute_loss, accfun=compute_accuracy) # Set up the optimizer optimizer = optimizers.SGD() optimizer.setup(rnn) # Set up the trainer updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (epochs, 'epoch')) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model)) trainer.extend(extensions.LogReport()) trainer.extend(
def buildRegressor(self): self.__learner = Regressor() return self.__buildCommonLearner(self.__learner)
from Regressor import Regressor def scan_test_cases_n(f): """read number of test cases from file""" return int(f.readline()) def scan_input(f): """read n, x, y, degree and return them """ line = list(map(int, f.readline().split())) points_n = line[0] degree = line[1] x, y = [], [] for j in range(points_n): line = list(map(float, f.readline().split())) x.append(line[0]) y.append(line[1]) return points_n, degree, x, y if __name__ == '__main__': f = open('input.txt') test_cases_n = scan_test_cases_n(f) for i in range(test_cases_n): # print(scan_input(f)) regressor = Regressor(scan_input(f))
from Regressor import Regressor regressor = Regressor() outputLR = regressor.fit_and_predict(2015) outputNN = regressor.fit_and_predict(2015) print outputLR print outputNN
# Params XGBOOST eta_temp = np.linspace(0.01, 0.3, 2) #10 max_depth_temp = np.linspace(3, 8, 2, dtype= np.dtype(np.int16)) #8 gamma_temp = np.linspace(0, 0.2, 2) #10 subsample_temp = np.linspace(0.5, 1, 2) #6 colsample_bytree_temp = np.linspace(0.5, 1, 2) #6 alpha_temp = np.linspace(0, 0.1, 2) #11 min_child_weight_temp = np.linspace(1, 20, 2) #10 params_xgb = [{'objective': 'reg:squarederror','eta':i, 'max_depth':j, 'gamma':k, 'subsample':l, 'colsample_bytree':m, 'alpha':n, 'min_child_weight':o} for i in eta_temp for j in max_depth_temp for k in gamma_temp for l in subsample_temp for m in colsample_bytree_temp for n in alpha_temp for o in min_child_weight_temp] models = [] # ## ADD MODELS # # add linear models models += [Regressor("OLS", LinearRegression, [{}])] models += [Regressor("ThSen", TheilSenRegressor, [{}])] # (very slow) models += [Regressor("Huber", HuberRegressor, params_huber)] # # ridge and lasso models += [Regressor("Ridge", Ridge, params_ridge)] models += [Regressor("Lasso", Lasso, params_lasso)] # # Bayesian Ridge models += [Regressor("BayRidge", BayesianRidge, [{}])] # # KNN Regressor models += [Regressor("KNN", KNeighborsRegressor, params_knn)] # add tree models += [Regressor("DecTree", DecisionTreeRegressor, params_dt)]
if st.sidebar.checkbox('Barras Agrupadas 2 variáveis'): geradorGrafico.plotar_grafico_barras_agrupadas_2D() st.markdown('<hr/>', unsafe_allow_html=True) if st.sidebar.checkbox('Barras Agrupadas 3 variáveis'): geradorGrafico.plotar_grafico_barras_agrupadas_3D() st.markdown('<hr/>', unsafe_allow_html=True) if st.sidebar.checkbox('Pairplot'): geradorGrafico.plotar_pairplot() st.markdown('<hr/>', unsafe_allow_html=True) if st.sidebar.checkbox('Correlação'): geradorGrafico.plotar_correlacao() st.markdown('<hr/>', unsafe_allow_html=True) #Regressões Lineares e Logísticas regressor = Regressor(df_limpo) st.sidebar.write('Regressões') if st.sidebar.checkbox('Linear'): st.header('Regressão Linear') regressor.linear() st.markdown('<hr/>', unsafe_allow_html=True) if st.sidebar.checkbox('Logística'): st.header('Regressão Logística') regressor.logistica() st.markdown('<hr/>', unsafe_allow_html=True)