class Model(): def __init__(self): self.layers = [] self.optimizer = None def get_layers(self): return self.layers def get_output_shape(self): return self.layers[-1].get_output_shape() def add(self, layer): self.layers.append(layer) num_layers = len(self.layers) if num_layers > 1: prev_layer = self.layers[num_layers - 2] layer.set_input(prev_layer) def compile(self, cost, optimizer = 'sgd', num_epochs=10, batch_size=4, lr=0.15): if optimizer == 'sgd': self.optimizer = SGD(self, cost=cost, num_epochs=num_epochs, batch_size=batch_size, lr=lr) def predict(self, x): for layer in self.layers: x = layer.forward(x) return x def train(self, train_x, train_y): self.optimizer.optimize(train_x, train_y)
def get_network_mse_1(x_all, y_all, num_of_neurons=(2, 25, 2), activation='relu', lr=0.001, momentum_coef=0.0, weight_decay=0.0, p_dropout=0.0, num_of_epochs=100, val_split=0.2, verbose=0): """ model with 1 hidden layer, loss is MSE """ mse = LossMSE() model = Sequential() model.add( Linear(out=num_of_neurons[1], input_size=num_of_neurons[0], activation='relu')) model.add(Dropout(prob=p_dropout)) model.add(Linear(out=num_of_neurons[2], activation=activation)) model.loss = mse sgd = SGD(lr, momentum_coef, weight_decay=weight_decay) report = sgd.train(model, x_all, y_all, num_of_epochs, val_split=val_split, verbose=verbose) return model, report
def run_training_and_evaluation(): x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper() hidden_dims = [100] activation_functions = [sigmoid, sigmoid] init_parameters_sd = 1 learning_rate = 2e-1 batch_size = 50 max_epochs = 20 mlp_model = MLP(input_dim=784, output_dim=10, hidden_dims=hidden_dims, activation_functions=activation_functions, init_parameters_sd=init_parameters_sd, optimizer=SGD(learning_rate=learning_rate)) print(mlp_model) train_model(mlp_model, x_train, y_train, batch_size=batch_size, max_epochs=max_epochs, x_val=x_val, y_val=y_val, plot=True, early_stop=True, patience=2) file_name = f'mlp_model_{hidden_dims}_sd={init_parameters_sd}' + \ f'_lr={learning_rate}_b={batch_size}_{datetime.now().strftime("%m-%d-%Y_%H.%M")}.pkl' mlp_model.save_model(file_name) evaluate_model(mlp_model, x_test, y_test)
def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int], init_parameters_sd: float = 1.0, activation_functions: Optional[List[Callable]] = None, optimizer: Optimizer = SGD(), initializer: Optional[Initializer] = None): self.input_dim = input_dim sizes = [input_dim] + hidden_dims + [output_dim] if initializer is None: initializer = NormalDistributionInitializer(init_parameters_sd) self.weights = initializer.init_weights(sizes) self.biases = initializer.init_biases(sizes) if activation_functions is None: self.activation_functions = [sigmoid] * (len(self.weights) - 1) + [ softmax ] else: self.activation_functions = activation_functions + [softmax] self.optimizer = optimizer self.optimizer.set_parameters({ 'weights': self.weights, 'biases': self.biases })
def run_training(): x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper() learning_rate = 1e-1 batch_size = 50 max_epochs = 8 mlp_model = MLP(input_dim=784, output_dim=10, hidden_dims=[30], activation_functions=[sigmoid], init_parameters_sd=1, optimizer=SGD(learning_rate=learning_rate)) print(mlp_model) train_model(mlp_model, x_train, y_train, lr=learning_rate, batch_size=batch_size, max_epochs=max_epochs, x_val=x_val, y_val=y_val, plot=True)
def _get_optimizer_by_name(optimizer_name: str) -> Optimizer: if optimizer_name == 'SGD': optimizer = SGD(learning_rate=1e-1) elif optimizer_name == 'Momentum': optimizer = Momentum(learning_rate=1e-1, momentum_rate=0.7) elif optimizer_name == 'Nestorov': optimizer = NestorovMomentum(learning_rate=1e-1, momentum_rate=0.7) elif optimizer_name == 'Adagrad': optimizer = Adagrad(learning_rate=1e-1) elif optimizer_name == 'Adadelta': optimizer = Adadelta() elif optimizer_name == 'Adam': optimizer = Adam(learning_rate=1e-2) else: optimizer = SGD(learning_rate=1e-1) return optimizer
def init_data(): X, y = import_power_plant_data() X, y = X.to_numpy(), y.to_numpy() #print(X,y) #exit() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,shuffle=True, random_state=1234) print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) opt = SGD(lr=0.01) epoch = 10000 regressor = LinearRegression(opt, epoch=epoch) x_plot = list(range(1,epoch+1)) all_mse = regressor.fit(X_train, y_train) predicted = regressor.predict(X_test) #print(len(predicted)) #exit() mse_value = Metrics.mse(y_test, predicted) #print(len(x_plot), len(all_mse)) #print(mse_value) #y_pred_line = regressor.predict(X) #cmap = plt.get_cmap('viridis') #fig = plt.figure(figsize=(8,6)) #m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10) #m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10) #plt.plot(x_plot, all_mse, color = "blue", linewidth=2) Plot.plot_time_series(x_plot, all_mse, "mse_plot", "number of iterations", "Mean Square Error (MSE)", "MSE vs Number of iterations") plt.show()
def __init__(self, train_loader, val_loader, criterion, val_criterion, afunc, x_shape, dim_out, args, features_net=None): assert args.model in globals( ), 'models/{}.py has no definition of model {}'.format( args.algorithm, args.model) dim_in = 1 for size in x_shape: dim_in *= size self.net = globals()[args.model](dim_in, dim_out, args, afunc) self.args = args self.features_net = features_net self.optimizer = SGD(self.net, train_loader, val_loader, criterion, val_criterion, args, transform_net=features_net)
def analyze_batch_sizes(): x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper() simulation_number = 5 learning_rate = 1e-1 max_epochs = 7 batch_sizes = [10, 50, 100, 500] training_data_dictionary = {} for batch_size in batch_sizes: epochs_num = [] training_losses = [] validation_losses = [] validation_accuracies = [] for i in range(simulation_number): print( f'\nBatch size : {batch_size}, simulation {i + 1}/{simulation_number}' ) mlp_model = MLP(input_dim=784, output_dim=10, hidden_dims=[30], activation_functions=[sigmoid], init_parameters_sd=1, optimizer=SGD(learning_rate=learning_rate)) sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies = \ train_model( mlp_model, x_train, y_train, batch_size=batch_size, max_epochs=max_epochs, x_val=x_val, y_val=y_val, plot=False ) epochs_num.append(sim_overall_epoch_num) training_losses.append(sim_training_losses) validation_losses.append(sim_validation_losses) validation_accuracies.append(sim_validation_accuracies) training_data_dictionary[batch_size] = { 'epochs': epochs_num, 'train_losses': training_losses, 'val_losses': validation_losses, 'val_acc': validation_accuracies } file_name = f'batch_sizes_analysis_data_{batch_sizes}_{datetime.now().strftime("%m-%d-%Y_%H.%M")}.pkl' with open(file_name, 'wb') as handle: pkl.dump(training_data_dictionary, handle, protocol=pkl.HIGHEST_PROTOCOL) plot_losses_results(training_data_dictionary) plot_accuracies_results(training_data_dictionary)
def get_network_ce_1(x_all, y_all, num_of_neurons=(2, 25, 2), activation='relu', lr=0.1, momentum_coef=0.0, weight_decay=0.0, p_dropout=0.0, num_of_epochs=100, val_split=0.2, verbose=0): """ 1 hidden layer, CE """ ce = LossCrossEntropy() model = Sequential() model.add( Linear(out=num_of_neurons[1], input_size=num_of_neurons[0], activation=activation)) model.add(Dropout(prob=p_dropout)) model.add(Linear(out=num_of_neurons[2], activation='softmax')) model.loss = ce sgd = SGD(lr, momentum_coef, weight_decay=weight_decay) # initialize SGD optimizer with given learning rate, momentum coefficient and weight decay parameter sgd = SGD(lr, momentum_coef, weight_decay) # train model, take report report = sgd.train(model, x_all, y_all, num_of_epochs, val_split=val_split, verbose=verbose) # return model and report return model, report
def get_network_ce_4(x_all, y_all, num_of_neurons=(2, 25, 25, 25, 2), activation='relu', lr=0.1, momentum_coef=0.0, weight_decay=0.0, p_dropout=0.0, num_of_epochs=100, val_split=0.2, verbose=0): """ model with 3 hidden layers, loss is CE """ ce = LossCrossEntropy() model = Sequential() model.add( Linear(out=num_of_neurons[1], input_size=num_of_neurons[0], activation=activation)) model.add(Dropout(prob=p_dropout)) model.add(Linear(out=num_of_neurons[2], activation=activation)) model.add(Dropout(prob=p_dropout)) model.add(Linear(out=num_of_neurons[3], activation=activation)) model.add(Dropout(prob=p_dropout)) model.add(Linear(out=num_of_neurons[4], activation='softmax')) model.loss = ce sgd = SGD(lr, momentum_coef, weight_decay=weight_decay) report = sgd.train(model, x_all, y_all, num_of_epochs, val_split=val_split, verbose=verbose) return model, report
def __init__(self, input_dim: Tuple[int, int], fc_input_dim: int, output_dim: int, hidden_dims: List[int], kernel_number: int, kernel_size: int, init_parameters_sd: float = 1.0, activation_functions: Optional[List[Callable]] = None, optimizer: Optimizer = SGD(), initializer: Optional[Initializer] = None, max_pooling_bool: bool = True): self.max_pooling_bool = max_pooling_bool if initializer is None: initializer = NormalDistributionInitializer(init_parameters_sd) self.convolutional_layer = Conv2D(out_channels=kernel_number, kernel_size=kernel_size, padding=1, stride=1) self.convolutional_layer.init_parameters(input_dim) if self.max_pooling_bool: self.max_pooling = MaxPool2D(kernel_size=2, padding=0, stride=2) self.fc_input_dim = fc_input_dim sizes = [fc_input_dim] + hidden_dims + [output_dim] self.weights = initializer.init_weights(sizes) self.biases = initializer.init_biases(sizes) if activation_functions is None: self.activation_functions = [sigmoid] * (len(self.weights) - 1) + [ softmax ] else: self.activation_functions = activation_functions + [softmax] self.optimizer = optimizer self.optimizer.set_parameters({ 'weights': self.weights, 'biases': self.biases, 'conv_weights': self.convolutional_layer.weights, 'conv_biases': self.convolutional_layer.biases })
def __init__(self, train_loader, val_loader, criterion, val_criterion, afunc, x_shape, n_classes, args, features_net=None): assert len( x_shape ) == 3, 'Unexpected x_shape' + x_shape + '. This model only works with images dataset.' assert args.model in globals( ), 'models/{}.py has no definition of model {}'.format( args.algorithm, args.model) assert afunc == F.relu, 'Unexpected activation. FeaturesNet is to be trained using ReLU only.' assert n_classes == 10, 'Unexpected input. FeaturesNet is to be trained for MNIST and CIFAR only.' assert features_net is None, 'features_net must ne None for features_net_test.py module!' self.net = globals()[args.model]() self.optimizer = SGD(self.net, train_loader, val_loader, criterion, val_criterion, args)
l1 = Linear(X, W1, b1) s1 = Sigmoid(l1) l2 = Linear(s1, W2, b2) cost = MSE(y, l2) feed_dict = {X: X_, y: y_, W1: W1_, b1: b1_, W2: W2_, b2: b2_} epochs = 20 # Total number of examples m = X_.shape[0] batch_size = 11 steps_per_epoch = m // batch_size graph = Graph(feed_dict) sgd = SGD(1e-2) trainables = [W1, b1, W2, b2] print("Total number of examples = {}".format(m)) for i in range(epochs): loss = 0 for j in range(steps_per_epoch): # Step 1 # Randomly sample a batch of examples X_batch, y_batch = resample(X_, y_, n_samples=batch_size) # Reset value of X and y Inputs X.output = X_batch y.output = y_batch
def train(args): if args.log: log_dir = args.log else: log_dir = os.path.join( os.path.abspath(os.path.dirname(__file__)), '{}'.format(datetime.now().strftime('%Y%m%d_%H:%M'))) if not os.path.exists(log_dir): os.mkdir(log_dir) # setting for logging logger = logging.getLogger() logging.basicConfig(level=logging.INFO) log_path = os.path.join(log_dir, 'log') file_handler = logging.FileHandler(log_path) fmt = logging.Formatter('%(asctime)s %(levelname)s %(message)s') file_handler.setFormatter(fmt) logger.addHandler(file_handler) logger.info('Arguments...') for arg, val in vars(args).items(): logger.info('{} : {}'.format(arg, val)) logger.info('Preparing dataset...') if not args.entity or not args.relation: # make vocab from train set logger.info('Making entity/relation vocab from train data...') raise NotImplementedError() else: ent_vocab = Vocab.load(args.entity) rel_vocab = Vocab.load(args.relation) n_entity, n_relation = len(ent_vocab), len(rel_vocab) train_dat = TripletDataset.load(args.train, ent_vocab, rel_vocab) logger.info('') if args.valid: assert args.metric in ['mrr', 'hits'], 'Invalid evaluation metric: {}'.format( args.metric) assert args.metric, 'Please indecate evaluation metric for validation' if args.metric == 'hits': assert args.nbest, 'Please indecate nbest for hits' valid_dat = TripletDataset.load(args.valid, ent_vocab, rel_vocab) if args.restart: logger.info('Restarting training: {}'.format(args.restart)) model = GaussianBilinearModel.load_model(args.restart) else: logger.info('Building new model') opt = SGD(args.lr, args.gradclip) model = GaussianBilinearModel(n_entity, n_relation, args.dim, args.cmin, args.cmax, opt, args.tri, args.init_sigma) best_model = None best_val = -1 for epoch in range(args.epoch): logger.info('start {} epoch'.format(epoch + 1)) sum_loss = 0 start = time.time() for i, pos_sample in enumerate(data_iter(train_dat)): neg_samples = [(pos_sample[0], pos_sample[1], np.random.randint(n_entity)) for _ in range(args.num_negative)] for neg_sample in neg_samples: loss = model.update(pos_sample, neg_sample) sum_loss += loss # logger.info('loss: {}'.format(loss)) # logger.info('processing {} samples in this epoch'.format(i+1)) print('processing {} samples in this epoch'.format(i + 1)) logger.info('sum loss: {}'.format(sum_loss)) logger.info('{} sec/epoch for training'.format(time.time() - start)) model_path = os.path.join(log_dir, 'model{}'.format(epoch + 1)) model.save_model(model_path) if args.valid and (epoch + 1) % args.evalstep == 0: val = evaluation(model, valid_dat, args.metric, args.nbest) logger.info('{} in validation: {}'.format(args.metric, val)) if val > best_val: best_model = copy.deepcopy(model) best_val = val best_epoch = epoch + 1 if args.valid: logger.info('best model is {} epoch'.format(best_epoch)) model_path = os.path.join(log_dir, 'bestmodel') best_model.save_model(model_path) logger.info('done all')
def compile(self, cost, optimizer = 'sgd', num_epochs=10, batch_size=4, lr=0.15): if optimizer == 'sgd': self.optimizer = SGD(self, cost=cost, num_epochs=num_epochs, batch_size=batch_size, lr=lr)
def get_network(x_all, y_all, num_of_hidden_layers=3, loss='ce', num_of_neurons=(2, 25, 25, 25, 2), activation='relu', lr=0.1, momentum_coef=0.0, weight_decay=0.0, p_dropout=0.0, num_of_epochs=100, val_split=0.2, verbose=0): """ creates model with given parameters x_all - features y_all - targets num_of_hidden_layers - int, number of hidden layers in model loss - 'ce' for Cross Entropy, 'mse' for Mean Squared Error num_of_neurons - tuple of ints with size num_of_hidden_layers + 2, first element is number of features in x_all and last element is number of possible targets activation - 'relu' for ReLu, 'tanh' for Tanh lr - float, learning rate momentum_coef - float in range (0, 1), momentum coefficient weight_decay - float, L2-regularization parameter p_dropout - float in range [0, 1), probability of dropout num_of_epochs - int, number of epochs val_split - float in range [0, 1), ratio of validation set verbose - 0 or 1, for printing out results """ # set loss and last activation if loss == 'ce': loss = LossCrossEntropy() last_activation = 'softmax' else: loss = LossMSE() last_activation = activation # initialize empty Sequential as model model = Sequential() # add linear layers with given activations and dropout layers after linear modules with given p_dropout if num_of_hidden_layers > 0: model.add( Linear(out=num_of_neurons[1], input_size=num_of_neurons[0], activation=activation)) model.add(Dropout(prob=p_dropout)) for i in range(num_of_hidden_layers - 1): model.add(Linear(out=num_of_neurons[i + 2], activation=activation)) model.add(Dropout(prob=p_dropout)) model.add(Linear(out=num_of_neurons[-1], activation=last_activation)) else: model.add( Linear(out=num_of_neurons[-1], input_size=num_of_neurons[0], activation=last_activation)) # set loss of model model.loss = loss sgd = SGD(lr, momentum_coef, weight_decay=weight_decay) report = sgd.train(model, x_all, y_all, num_of_epochs, val_split=val_split, verbose=verbose) return model, report
from functions.activation_functions import sigmoid, relu from models.neural_network_models.mlp import MLP from models.neural_network_models.train_model import train_model from optimizers.sgd import SGD from weight_initilization.basa_initializer import Initializer from weight_initilization.he_initializer import HeInitializer from weight_initilization.normal_distr_initilizer import NormalDistributionInitializer from weight_initilization.xavier_initializer import XavierInitializer simulation_number = 10 act_function = sigmoid max_epochs = 7 batch_size = 50 hidden_dims = [100] weight_sd = 1.0 optimizer = SGD(learning_rate=1e-2) def analyze_initializers(): x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper() initializers_names = ['Normal', 'Xavier', 'He'] data_dictionary = {} processes = min(len(initializers_names), multiprocessing.cpu_count() - 1) with multiprocessing.Pool(processes=processes) as pool: results = pool.starmap(get_results_for_initializer, [(initializer_name, x_train, x_val, y_train, y_val) for initializer_name in initializers_names]) for name, res in zip(initializers_names, results): data_dictionary[name] = res