def main(): lr = 0.001 input_size = 50 output_size = 10 n_iterations = 100 # Both input and "ground truth" are random vectors x = np.random.random(input_size) y = np.random.random(output_size) # Randomly initialize neural network weights #weights = to_value(np.random.random((input_size, output_size))) nn = MLP(input_size, output_size, [5, 10, 20]) print(nn.layers[0]) losses = [] for i in tqdm(range(100)): y_pred = nn(x) loss = np.sum((y - y_pred) * (y - y_pred)) losses.append(loss.data) loss.backward() for p in nn.parameters(): p.data -= lr * p.grad nn.zero_grad() plt.plot(losses) plt.ylabel('Loss') plt.xlabel('Iteration') plt.title('Multilayer perceptron fitting random noise') plt.show()
def __init__(self, generator=MLP(), discriminator=MLP()): super().__init__() if generator != None and discriminator != None: self.generator = generator self.discriminator = discriminator self.layers = self.generator.layers + self.discriminator.layers self.generator.loss = CrossEntropy() self.discriminator.loss = CrossEntropy()
def __init__(self, input_dim, n_actions, gamma=0.9): self.input_dim = input_dim self.n_actions = n_actions self.gamma = gamma self.model = MLP(input_dim, [32, 32], n_actions) self.optim = optim.Adam(self.model.parameters(), lr=1e-2) self.action_reward = []
def __init__(self, encoder=MLP(), decoder=MLP(), noise=None): """ An Autoencoder consists of an Encoder network and a a Decoder network. In the constructor merged these two networks. """ super().__init__() self.layers += encoder.layers + decoder.layers self.encoder = encoder self.decoder = decoder self.noise = noise
def __init__(self, in_channels, num_classes, num_seg_classes, stem_channels=(64, 128, 128), local_channels=(512, 2048), cls_channels=(256, 256), seg_channels=(256, 256, 128), dropout_prob_cls=0.3, dropout_prob_seg=0.2, with_transform=True): """ Args: in_channels (int): the number of input channels out_channels (int): the number of output channels stem_channels (tuple of int): the numbers of channels in stem feature extractor local_channels (tuple of int): the numbers of channels in local mlp cls_channels (tuple of int): the numbers of channels in classification mlp seg_channels (tuple of int): the numbers of channels in segmentation mlp dropout_prob_cls (float): the probability to dropout in classification mlp dropout_prob_seg (float): the probability to dropout in segmentation mlp with_transform (bool): whether to use TNet to transform features. """ super(PointNetPartSeg, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.num_seg_classes = num_seg_classes # stem self.stem = Stem(in_channels, stem_channels, with_transform=with_transform) self.mlp_local = SharedMLP(stem_channels[-1], local_channels) # classification # Notice that we apply dropout to each classification mlp. self.mlp_cls = MLP(local_channels[-1], cls_channels, dropout=dropout_prob_cls) self.cls_logit = nn.Linear(cls_channels[-1], num_classes, bias=True) # part segmentation # Notice that the original repo concatenates global feature, one hot class embedding, # stem features and local features. However, the paper does not use last local feature. # Here, we follow the released repo. in_channels_seg = local_channels[-1] + num_classes + sum(stem_channels) + sum(local_channels) self.mlp_seg = SharedMLP(in_channels_seg, seg_channels[:-1], dropout=dropout_prob_seg) self.conv_seg = Conv1d(seg_channels[-2], seg_channels[-1], 1) self.seg_logit = nn.Conv1d(seg_channels[-1], num_seg_classes, 1, bias=True) self.init_weights()
def load(self, name): modelDir = f"./models/{name}" self.encoder = MLP() self.decoder = MLP() self.decoder.loss = MSE() # load encoder and decoder for name, model in [("encoder", self.encoder), ("decoder", self.decoder)]: layerDir = [ dir for dir in os.listdir(modelDir) if os.path.isdir(os.path.join(modelDir, dir)) and name in dir ] layerDir.sort(key=lambda x: int(x.strip(f"{name}_layer"))) for dir in layerDir: layerFolder = os.path.join(modelDir, dir) if "dense.json" in os.listdir(layerFolder): # this is a dense layer newLayer = Dense() newLayer.load(layerFolder) model.layers.append(newLayer) # load aditional information about sampler with open(f"{modelDir}/sampler.json", "r") as file: data = json.load(file) inputDim = data["inputDim"] outputDim = data["outputDim"] self.sampler = Sampler(inputDim, outputDim) # load mean and logvar layer self.sampler.mean = Dense() self.sampler.mean.load(os.path.join(modelDir, f"sampler_mean")) self.sampler.logVar = Dense() self.sampler.logVar.load(os.path.join(modelDir, f"sampler_logvar")) self.layers = self.encoder.layers + [ self.sampler.mean, self.sampler.logVar ] + self.decoder.layers
def init_model(): # Select the model if FLAGS.model == 'mlp': model = MLP() elif FLAGS.model == 'shallow': model = Shallow_CNN() elif FLAGS.model == 'deep': model = Deep_CNN() else: raise ValueError('--model should be "mlp", "shallow", or "deep"') return model
def _init_model(self, model_name): # Select the model if model_name == 'mlp': model = MLP() elif model_name == 'shallow': model = Shallow_CNN() elif model_name == 'deep': model = Deep_CNN() else: raise ValueError('--model should be "shallow" or "deep"') return model
def get_evals(approx): if approx == True: model = MLP(params.neural_net_hiddens) model.load_state_dict(torch.load('./model.pth')) model.eval() # Make data. X = np.arange(params.x_0[0] - params.delta_0, params.x_0[0] + params.delta_0, 0.05) Y = np.arange(params.x_0[1] - params.delta_0, params.x_0[0] + params.delta_0, 0.05) X, Y = np.meshgrid(X, Y) Z = [] for j in range(len(X)): for i in range(len(X[0])): # your loop order was backwards point = np.array([[X[j][i], Y[j][i]]], dtype=np.float32) inp = torch.from_numpy(point) if approx == False: val = f(point) else: val = model(inp).data.numpy() Z.append(val) Z = np.array(Z).reshape(X.shape) return X, Y, Z
def __init__(self, embedding, encoder, encoder_type, mlp_input, mlp_arc_hidden, mlp_lab_hidden, mlp_dropout, num_labels, criterion): super(BiAffineParser, self).__init__() self.embedding = embedding self.encoder = encoder self.encoder_type = encoder_type # Arc MLPs self.arc_mlp_h = MLP(mlp_input, mlp_arc_hidden, 2, 'ReLU', mlp_dropout) self.arc_mlp_d = MLP(mlp_input, mlp_arc_hidden, 2, 'ReLU', mlp_dropout) # Label MLPs self.lab_mlp_h = MLP(mlp_input, mlp_lab_hidden, 2, 'ReLU', mlp_dropout) self.lab_mlp_d = MLP(mlp_input, mlp_lab_hidden, 2, 'ReLU', mlp_dropout) # BiAffine layers self.arc_biaffine = BiAffine(mlp_arc_hidden, 1) self.lab_biaffine = BiAffine(mlp_lab_hidden, num_labels) # Loss criterion self.criterion = criterion()
def __init__(self, layer_num=2, hidden_dim=200): super().__init__() self.action_size = 3 self.state_size = 768 * 2 self.memory = deque(maxlen=2000) self.gamma = 0.95 self.epsilon = 0.900 self.epsilon_min = 0.03 self.epsilon_decay = 0.995 self.temperature = 768 self.selection_epsilon = 0.900 self.selection_epsilon_min = 0.03 self.selection_epsilon_decay = 0.995 self.encoder = LocalGlobalEncoder() self.mlp = MLP(in_dim=self.state_size, hid_dim=hidden_dim, out_dim=self.action_size, layer_num=layer_num) self.to(DEVICE)
class Policy(object): def __init__(self, input_dim, n_actions, gamma=0.9): self.input_dim = input_dim self.n_actions = n_actions self.gamma = gamma self.model = MLP(input_dim, [32, 32], n_actions) self.optim = optim.Adam(self.model.parameters(), lr=1e-2) self.action_reward = [] def get_action(self, observation, stochastic=True): pred = self.model(observation) if stochastic: return pred.multinomial() return pred[0].argmax() def update(self): R = 0 rewards = [] for action, reward in self.action_reward: R = reward + self.gamma * R rewards.insert(0, R) rewards = T.Tensor(rewards) rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps) actions = [] for (action, _), reward in zip(self.action_reward, rewards): action.reinforce(reward) actions.append(action) self.optim.zero_grad() T.autograd.backward(actions, [None for _ in actions]) self.optim.step() self.action_reward = [] def record(self, action, reward): self.action_reward.append((action, reward))
def main(): # Learning the XOR # (obs.: sensitive to standard dev. we use to init. weights) # X = np.array([[.1, .1], [.1, .9], [.9, .1], [.9, .9]], dtype=float) # Y = np.array([[.1], [.9], [.9], [.1]], dtype=float) # xor = MLP(learning_rate=1.0, layers=[2, 2, 1], loss=MeanSquaredError, sigma=0.05) # perf_xor = xor.fit(X, Y) # plot_perf(perf_xor, 'perf_xor') parser = argparse.ArgumentParser() parser.add_argument( "--bench_name", type=str, help="To choose the task. Available: monks-1, monks-2, monks-3." ) parser.add_argument( "--do_grid_search", action="store_true", help="Wheter to do hyper-params search via grid-search." ) parser.add_argument( "--lr", default=1e-1, type=float, help="The learning rate. Default: 1e-1.", ) parser.add_argument( "--momentum", default=0.0, type=float, help="The momentum. Default: 0.0.", ) parser.add_argument( "--reg", default=0.0, type=float, help="The regularization term. Default: 0.0.", ) parser.add_argument( "--max_epochs", default=100, type=int, help="The maximum number of epochs.", ) parser.add_argument( "--sigma", default=1.0, type=float, help="To init weights with normal distribution N(mu, sigma), and scale sigma.", ) parser.add_argument( "--mu", default=0.0, type=float, help="To init weights with normal distribution N(mu, sigma), and center mu.", ) parser.add_argument( '--layers', nargs='+', help='The layers\' inputs. E.g. n_input n_hid_i n_hid_k n_class, where n_input is the size of the input to the MLP,\ n_hid_j is the size of hidden layers, and n_class is the size of last layer for regression/classification tasks.' ) parser.add_argument( "--activation", default='Linear', type=str, help="The activation function. Available: Linear, Sigmoid." ) parser.add_argument( "--activation_last_layer", default='Sigmoid', type=str, help="The activation function to be applied after the classification or regression layer. Available: Linear, Sigmoid." ) parser.add_argument( "--task", type=str, help="The type of task to be solved. Available: classification, regression." ) parser.add_argument( "--verbose", default='no', type=str, help="If output in verbose mode." ) parser.add_argument( "--debug_interval", default=100, type=int, help="To output training info every debug_interval epochs." ) parser.add_argument( "--do_early_stopping", action="store_true", help="Wheter to do early stopping." ) args = parser.parse_args() X_train, Y_train = retrieveSamplesFromMonk( f"monks-data/{args.bench_name}.train") X_test, Y_test = retrieveSamplesFromMonk( f"monks-data/{args.bench_name}.test") logger.info(f"Training set len = {len(X_train)}") logger.info(f"Test set len = {len(X_test)}") logger.warning( "do_grid_search: %s", bool(args.do_grid_search) ) if args.do_grid_search: n_input, n_output = 17, 1 hyperspace = { 'learning_rate': [i/10 for i in range(1, 10)], 'momentum': [i/10 for i in range(1, 10)], 'lambda': [0.0], # [10**(-i) for i in range(1,6)], 'topology': [[n_input, 2**j, n_output] for j in range(1, 6)] } logger.info('Starting hyper-parameter grid-search.') start = timer() monk = HyperOptimizer( hyperspace, X_train, Y_train, loss=MeanSquaredError, p=.70, k=5, scoring=['valid_err', 'test_err', 'accuracy'], task=args.task, X_ts=X_test, Y_ts=Y_test ) monk.hoptimize() end = timer() logger.info( f'Grid-search for task {args.bench_name} has taken = {np.ceil((end-start)*1000)} ms') monk = MLP( learning_rate=args.lr, momentum=args.momentum, lamda=args.reg, max_epochs=args.max_epochs, layers=[int(l) for l in args.layers], task=args.task, verbose=args.verbose, activation=Linear if args.activation == 'Linear' else Sigmoid, activation_out=Linear if args.activation_last_layer == 'Linear' else Sigmoid, do_early_stopping=args.do_early_stopping, debug_interval=args.debug_interval ) logger.info(f'Starting training for task {args.bench_name}.') start = timer() perf_monk = monk.fit(X_train, Y_train) end = timer() logger.info( f'Training for task {args.bench_name} has taken = {np.ceil((end-start)*1000)} ms') test_err = monk.score(X_test, Y_test) logger.info(f'Test error = {test_err}') plot_perf(perf_monk, f'perf_{args.bench_name}') plot_accuracy(perf_monk, f'acc_{args.bench_name}')
def accuracy(predictions, label): total_corr = 0 index = 0 for c in predictions.flatten(): if (c.item() > 0.5): r = 1.0 else: r = 0.0 if (r == label[index].item()): total_corr += 1 index += 1 return (total_corr / len(label)) # Takes as input states (9 inputs), and outputs Q_value(a|s) for all possible a Qnet = MLP() # Target net is same as Qnet except is only updated every n runs Tnet = MLP() # MDP Hyperparams numep = 10 # Agent can make at MOST 5 actions before forced termination numt = 5 epsilon = 0.90 # Episode Loop for ep in range(numep): print('new game') # Initialize tictactoe game x = tictactoe() terminate = False
newLayer.load(layerFolder) model.layers.append(newLayer) self.layers = self.generator.layers + self.discriminator.layers if __name__ == "__main__": dataset = Dataset(name="mnist", train_size=60000, test_size=10000, batch_size=128) LATENT_SIZE = 28 * 28 # set the learning rate and optimizer for training optimizer = Adam(0.0002, 0.5) generator = MLP() generator.addLayer( Dense(inputDim=LATENT_SIZE, outputDim=256, activation=LeakyReLU(0.2), optimizer=optimizer)) generator.addLayer( Dense(inputDim=256, outputDim=512, activation=LeakyReLU(0.2), optimizer=optimizer)) generator.addLayer( Dense(inputDim=512, outputDim=1024, activation=LeakyReLU(0.2), optimizer=optimizer))
from nn import MLP from datasets import BreastCancer import matplotlib.pyplot as plt x_train, y_train, x_test, y_test, _ = BreastCancer.load_data(pp='mms') # Training in_shape = (519, 30 ) # input format -> (number_of_samples, number_of_attributes) layers = (30, 30, 1) # Three Layers with 30-30-1 neurons functions = ('relu', 'relu', 'sigmoid') # Activation function of each layer epochs = 300 model = MLP(input_shape=in_shape, layers=layers, activations=functions, initializer='he') history = model.run(x_train, y_train, epochs=epochs, batch_size=32) # Testing prediction = model.predict(x_test) # Plotting the Loss plt.plot(range(epochs), history) plt.xlabel('Epochs') plt.ylabel('Loss') plt.show()
def crossValidate(data, meta, folds, topology, iterations, weights, graph=None): "k-fold cross validation" if folds <= 1: raise Exception("Cross validation folds must be > 1") averageError = 0.0 for counter, (training, validation) in enumerate( crossValidateIndices(items=range(data.shape[0]), k=folds, randomize=True) ): # setup training and validation matricies train = data.ix[training].reset_index(drop=True) validate = data.ix[validation].reset_index(drop=True) trainingFeatures = train.drop(meta.categoricalLabelColumns, axis=1) # remove output columns validationFeatures = validate.drop(meta.categoricalLabelColumns, axis=1) # remove output columns trainingLabels = train[meta.categoricalLabelColumns] # use only output columns validationLabels = validate[meta.categoricalLabelColumns] # use only output columns # setup MLP and start training li( "Fold {2}/{3} - Training with {1}/{4} rows ({0} epochs)".format( iterations, trainingFeatures.shape[0], counter + 1, folds, data.shape[0] ) ) mlp = MLP() mlp.trainingIterations = iterations mlp.initalWeightsMultiplier = weights mlp.features = ( trainingFeatures.values ) # convert from pandas dataframe to numpy arrays. they are faster for the computationally intensive training phase. mlp.labels = trainingLabels.values mlp.validationFeatures = validationFeatures # for validation, send in pandas dataframes. mlp.validationLabels = validationLabels mlp.meta = meta mlp.topology = topology if graph: mlp.trackLearning = True mlp.setupHiddenLayers() mlp.train() if graph: li("Plotting Learning to file '{0}'".format(graph)) mlp.plotLearning(graph) # validate model li( "Fold {0}/{1} - Testing with {2}/{3} rows".format( counter + 1, folds, validationFeatures.shape[0], data.shape[0] ) ) error = mlp.validateModel(printToScreen=True) averageError += error averageError = averageError / folds li("Average error across all folds: {0}".format(averageError)) return averageError
class PointNetPartSeg(nn.Module): """PointNet for part segmentation References: https://github.com/charlesq34/pointnet/blob/master/part_seg/pointnet_part_seg.py """ def __init__(self, in_channels, num_classes, num_seg_classes, stem_channels=(64, 128, 128), local_channels=(512, 2048), cls_channels=(256, 256), seg_channels=(256, 256, 128), dropout_prob_cls=0.3, dropout_prob_seg=0.2, with_transform=True): """ Args: in_channels (int): the number of input channels out_channels (int): the number of output channels stem_channels (tuple of int): the numbers of channels in stem feature extractor local_channels (tuple of int): the numbers of channels in local mlp cls_channels (tuple of int): the numbers of channels in classification mlp seg_channels (tuple of int): the numbers of channels in segmentation mlp dropout_prob_cls (float): the probability to dropout in classification mlp dropout_prob_seg (float): the probability to dropout in segmentation mlp with_transform (bool): whether to use TNet to transform features. """ super(PointNetPartSeg, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.num_seg_classes = num_seg_classes # stem self.stem = Stem(in_channels, stem_channels, with_transform=with_transform) self.mlp_local = SharedMLP(stem_channels[-1], local_channels) # classification # Notice that we apply dropout to each classification mlp. self.mlp_cls = MLP(local_channels[-1], cls_channels, dropout=dropout_prob_cls) self.cls_logit = nn.Linear(cls_channels[-1], num_classes, bias=True) # part segmentation # Notice that the original repo concatenates global feature, one hot class embedding, # stem features and local features. However, the paper does not use last local feature. # Here, we follow the released repo. in_channels_seg = local_channels[-1] + num_classes + sum(stem_channels) + sum(local_channels) self.mlp_seg = SharedMLP(in_channels_seg, seg_channels[:-1], dropout=dropout_prob_seg) self.conv_seg = Conv1d(seg_channels[-2], seg_channels[-1], 1) self.seg_logit = nn.Conv1d(seg_channels[-1], num_seg_classes, 1, bias=True) self.init_weights() def forward(self, data_batch): x = data_batch["points"] cls_label = data_batch["cls_label"] num_points = x.shape[2] end_points = {} # stem stem_feature, end_points_stem = self.stem(x) end_points["trans_input"] = end_points_stem["trans_input"] end_points["trans_feature"] = end_points_stem["trans_feature"] stem_features = end_points_stem["stem_features"] # mlp for local features local_features = [] x = stem_feature for ind, mlp in enumerate(self.mlp_local): x = mlp(x) local_features.append(x) # max pool over points global_feature, max_indices = torch.max(x, 2) # (batch_size, local_channels[-1]) end_points['key_point_inds'] = max_indices # classification x = global_feature x = self.mlp_cls(x) cls_logit = self.cls_logit(x) # segmentation global_feature_expand = global_feature.unsqueeze(2).expand(-1, -1, num_points) with torch.no_grad(): I = torch.eye(self.num_classes, dtype=global_feature.dtype, device=global_feature.device) one_hot = I[cls_label] # (batch_size, num_classes) one_hot_expand = one_hot.unsqueeze(2).expand(-1, -1, num_points) x = torch.cat(stem_features + local_features + [global_feature_expand, one_hot_expand], dim=1) x = self.mlp_seg(x) x = self.conv_seg(x) seg_logit = self.seg_logit(x) preds = { "cls_logit": cls_logit, "seg_logit": seg_logit } preds.update(end_points) return preds def init_weights(self): self.mlp_local.init_weights(xavier_uniform) self.mlp_cls.init_weights(xavier_uniform) self.mlp_seg.init_weights(xavier_uniform) self.conv_seg.init_weights(xavier_uniform) nn.init.xavier_uniform_(self.cls_logit.weight) nn.init.zeros_(self.cls_logit.bias) nn.init.xavier_uniform_(self.seg_logit.weight) nn.init.zeros_(self.seg_logit.bias) # Set batch normalization to 0.01 as default set_bn(self, momentum=0.01)
def crossValidate(data, meta, folds, topology, iterations, weights, graph=None): "k-fold cross validation" if folds <= 1: raise Exception("Cross validation folds must be > 1") averageError = 0.0 for counter, (training, validation) in enumerate(crossValidateIndices(items=range(data.shape[0]), k=folds, randomize=True)): # setup training and validation matricies train = data.ix[training].reset_index(drop=True) validate = data.ix[validation].reset_index(drop=True) trainingFeatures = train.drop(meta.categoricalLabelColumns, axis=1) # remove output columns validationFeatures = validate.drop(meta.categoricalLabelColumns, axis=1) # remove output columns trainingLabels = train[meta.categoricalLabelColumns] # use only output columns validationLabels = validate[meta.categoricalLabelColumns] # use only output columns #setup MLP and start training li("Fold {2}/{3} - Training with {1}/{4} rows ({0} epochs)".format(iterations, trainingFeatures.shape[0], counter + 1, folds, data.shape[0])) mlp = MLP() mlp.trainingIterations = iterations mlp.initalWeightsMultiplier = weights mlp.features = trainingFeatures.values # convert from pandas dataframe to numpy arrays. they are faster for the computationally intensive training phase. mlp.labels = trainingLabels.values mlp.validationFeatures = validationFeatures # for validation, send in pandas dataframes. mlp.validationLabels = validationLabels mlp.meta = meta mlp.topology = topology if graph: mlp.trackLearning = True mlp.setupHiddenLayers() mlp.train() if graph: li("Plotting Learning to file '{0}'".format(graph)) mlp.plotLearning(graph) #validate model li("Fold {0}/{1} - Testing with {2}/{3} rows".format(counter + 1, folds, validationFeatures.shape[0], data.shape[0])) error = mlp.validateModel(printToScreen=True) averageError += error averageError = averageError / folds li("Average error across all folds: {0}".format(averageError)) return averageError
num_layers = 2 network = "RBF" if network == "RBF": net = RBF(input_size, output_size, hidden_size) # training of the centres and variances X = torch.randn(train_size/2,input_size) Y = 2*X loss_hist = fit_model(net,X,Y,5) # training of the weights X = torch.randn(train_size/2,input_size) Y = 2*X loss_hist = fit_model(net,X,Y,5) else net = MLP(input_size, output_size, num_layers, hidden_size) X = torch.randn(train_size,input_size) Y = 2*X loss_hist = fit_model(net,X,Y,5) # In[3]: # You should see loss get to effectively zero loss_hist = fit_model(net,X,Y,5) plt.plot(loss_hist) # In[4]: # test error should be close to zero, MLP got to ~.0002 after 5 epochs of training on 4000 examples X_test = torch.randn(train_size,input_size)