def _createAllButLastLayer(self,nUnits,*otherargs): inputSize,windowSizes,windowStrides = otherargs if len(windowSizes) != len(windowStrides): print("NeuralNetworkConvolutional: ERROR. len(windowSizes) != len(windowStrides)") return # check number of dimensions to convolve over allSizes = [len(inputSize)] + [len(a) for a in windowSizes] + [len(a) for a in windowStrides] if allSizes[1:] != allSizes[:-1]: print("NeuralNetworkConvolutional: ERROR. len(inputSize) and length of each windowSizes and windowStrides are not equal.") return nLayers = len(nUnits)-1 nConvLayers = len(windowSizes) if nLayers < nConvLayers: print("NeuralNetworkConvolutional: ERROR. len(nUnits)-1 not greater than or equal to number of convolutional layers.") return if nConvLayers > 0: layers = [ConvolutionalLayer(list(inputSize) + [nUnits[0]], windowSizes[0], windowStrides[0], nUnits[1])] for layeri in range(1,nConvLayers): layers.append(ConvolutionalLayer(layers[-1].nWindows.tolist() + [nUnits[layeri]], windowSizes[layeri], windowStrides[layeri], nUnits[layeri+1])) nInputsNextLayer = np.prod(layers[-1].nWindows) * layers[-1].nUnits else: nInputsNextLayer = nUnits[0] for layeri in range(nConvLayers,nLayers-1): layers.append(TanhLayer(nInputsNextLayer,nUnits[layeri+1])) nInputsNextLayer = nUnits[layeri+1] return layers, nInputsNextLayer
def _createAllButLastLayer(self,nUnits): # *args): layers = [] ni = nUnits[0] for nu in nUnits[1:-1]: layers.append(TanhLayer(ni,nu)) ni = nu return layers,ni
def view_rec_test(x_curr, prev_s_tensor, prev_in_gate_tensor): count = 0 params = get_trainable_params() for p in params: count += 1 print('view rec test : num of params %d' % count) rect8_ = InputLayer(view_features_shape, x_curr) prev_s_ = InputLayer(s_shape, prev_s_tensor) t_x_s_update_ = FCConv3DLayer( prev_s_, rect8_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_update.params, isTrainable=True) t_x_s_reset_ = FCConv3DLayer( prev_s_, rect8_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_reset.params, isTrainable=True) update_gate_ = SigmoidLayer(t_x_s_update_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_) t_x_rs_ = FCConv3DLayer( rs_, rect8_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_rs.params, isTrainable=True) tanh_t_x_rs_ = TanhLayer(t_x_rs_) gru_out_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_.output, update_gate_.output
def test_tanh_layer(): x_train = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [7.0, 3.2, 4.7, 1.4], [6.4, 3.2, 4.5, 1.5], [6.3, 3.3, 6.0, 2.5], [5.8, 2.7, 5.1, 1.9]]) y_train = np.array([0, 0, 1, 1, 2, 2]) W1 = np.random.randn(4, 10) * 0.001 b1 = np.zeros((1, 10)) W2 = np.random.randn(10, 6) * 0.001 b2 = np.zeros((1, 6)) softmax = SoftmaxLayer() tanh = TanhLayer() reg_parameter = 0.001 g_numerical_W = eval_hidden_numerical_gradient(tanh, softmax, x_train, y_train, W1, b1, W2, b2, reg_parameter) g_analytical_W = eval_hidden_analytical_gradient(tanh, softmax, x_train, y_train, W1, b1, W2, b2, reg_parameter) assert check_gradient( g_numerical_W, g_analytical_W ) <= 1e-7, "Error in calculating gradient of the TanhLayer"
train = zip( np.array(data[:n * 9 / 10]).astype(np.float), np.array(targets[:n * 9 / 10]).astype(np.float)) test = zip( np.array(data[n / 10:]).astype(np.float), np.array(targets[n / 10:]).astype(np.float)) return train, test train, test = gen_data() model = Sequential([ LinearLayer(2, 20, weights='random'), TanhLayer(), #SigmoidLayer(), # HeavisideLayer(), # LinearLayer(10, 20, weights='random'), # SigmoidLayer(), LinearLayer(20, num_classes, weights='random', L1=0.001), # ReluLayer(), # SigmoidLayer() SoftMaxLayer() ]) # model = Sequential([ # LinearLayer(2, 5, weights='random'), # SigmoidLayer(), # #LinearLayer(3, 3, weights='random'), # #SigmoidLayer(),
def main(): # NOTE: I include the output layer when counting num_layers, so num_layers=3 implies 2 hidden layers. config = { 'loss': 'VnceLoss', 'truncate_gaussian': False, 'num_layers': 3, 'input_dim': 2, 'output_dim': 4, 'hidden_dim': 100, 'learning_rate': 0.01, 'num_data': 10000, 'batch_size': 100, 'num_epochs': 100, 'stats_interval': 1, 'weights_init': UniformInit(-0.05, 0.05, rng=rng), 'biases_init': ConstantInit(0.), 'final_biases_init': ConstantVectorInit(np.array([0., 0., -1, -1])), 'activation_layer': TanhLayer(), 'c': 0.3, 'nz': 1, 'noise': 'bad_noise', 'nu': 10 } if config['noise']: exp_name = config['loss'] + '_' + 'truncate_gaussian=' + str( config['truncate_gaussian'] ) + '_' + config['noise'] + '_' + 'nu' + str(config['nu']) else: exp_name = config['loss'] + '_' + 'truncate_gaussian=' + str( config['truncate_gaussian']) train_data = StarsAndMoonsDataProvider( config['c'], 'train', size=config['num_data'], batch_size=config['batch_size'], truncate_gaussian=config['truncate_gaussian'], rng=rng) valid_data = StarsAndMoonsDataProvider( config['c'], 'valid', batch_size=config['batch_size'], truncate_gaussian=config['truncate_gaussian'], rng=rng) model, var_dist = train(train_data, valid_data, config, log=False, plot=True) with open(os.path.join(SAVE_DIR, "{}_config.txt".format(exp_name)), 'w') as f: for key, value in config.items(): f.write("{}: {}\n".format(key, value)) pickle.dump( model, open( os.path.join( SAVE_DIR, "truncate={}_model.p".format( str(config['truncate_gaussian']))), "wb")) pickle.dump( var_dist, open(os.path.join(SAVE_DIR, "{}_var_dist.p".format(exp_name)), "wb"))
help='path to pre-trained weights') parser.add_argument('--d', type=int, default=2, help='dimension of visibles for synthetic dataset') parser.add_argument('--num_layers', type=int, default=2, help='dimension of visibles for synthetic dataset') parser.add_argument('--hidden_dim', type=int, default=100, help='dimension of visibles for synthetic dataset') parser.add_argument('--activation_layer', type=object, default=TanhLayer(), help='dimension of visibles for synthetic dataset') # Latent NCE optimisation arguments parser.add_argument( '--opt_method', type=str, default='SGD', help='optimisation method. L-BFGS-B and CG both seem to work') parser.add_argument( '--maxiter', type=int, default=5, help= 'number of iterations performed by L-BFGS-B optimiser inside each M step of EM' )
x_train, y_train = load_mnist(MNIST_TRAINING_X, MNIST_TRAINING_y) x_train = x_train.reshape(MNIST_NUM_TRAINING, MNIST_NUM_FEATURES) y_train = y_train.reshape(MNIST_NUM_TRAINING) # initialize parameters randomly HIDDEN_LAYER_SIZE = 500 W1 = 0.1 * np.random.randn(MNIST_NUM_FEATURES, HIDDEN_LAYER_SIZE) b1 = np.zeros((1, HIDDEN_LAYER_SIZE)) W2 = 0.1 * np.random.randn(HIDDEN_LAYER_SIZE, MNIST_NUM_OUTPUT) b2 = np.zeros((1, MNIST_NUM_OUTPUT)) learning_rate = 0.1 # step size of the gradient descent algorithm reg_parameter = 0.001 # regularization strength softmax = SoftmaxLayer() hidden = TanhLayer() num_iter = 12000 BATCH_SIZE = 100 for i in range(num_iter): idx = np.random.choice(MNIST_NUM_TRAINING, BATCH_SIZE, replace=True) x_batch = x_train[idx, :] y_batch = y_train[idx] pre_activation, hidden_output = hidden.forward_pass(x_batch, W1, b1) hidden_layer_weights = [W1] output_prob, loss = softmax.forward_pass(hidden_output, y_batch, W2, b2, reg_parameter, hidden_layer_weights)
x_train, y_train = load_mnist(MNIST_TRAINING_X , MNIST_TRAINING_y) x_train = x_train.reshape(MNIST_NUM_TRAINING, MNIST_NUM_FEATURES) y_train = y_train.reshape(MNIST_NUM_TRAINING) # initialize parameters randomly HIDDEN_LAYER_SIZE = 500 W1 = 0.1 * np.random.randn(MNIST_NUM_FEATURES, HIDDEN_LAYER_SIZE) b1 = np.zeros((1, HIDDEN_LAYER_SIZE)) W2 = 0.1 * np.random.randn(HIDDEN_LAYER_SIZE, MNIST_NUM_OUTPUT) b2 = np.zeros((1, MNIST_NUM_OUTPUT)) learning_rate = 0.1 # step size of the gradient descent algorithm reg_parameter = 0.001 # regularization strength softmax = SoftmaxLayer() hidden = TanhLayer() num_iter = 12000 BATCH_SIZE = 100 for i in range(num_iter): idx = np.random.choice(MNIST_NUM_TRAINING, BATCH_SIZE, replace=True) x_batch = x_train[idx, :] y_batch = y_train[idx] pre_activation, hidden_output = hidden.forward_pass(x_batch, W1, b1) hidden_layer_weights = [W1] output_prob, loss = softmax.forward_pass(hidden_output, y_batch, W2, b2, reg_parameter, hidden_layer_weights) g_W2, g_b2, g_output = softmax.backward_pass(output_prob, hidden_output, y_batch, W2, b2, reg_parameter)
path = 'C:/Users/wojtek/Desktop/projekt1-oddanie/clasification/data.circles.train.1000.csv' path_r = 'C:/Users/wojtek/Desktop/projekt1-oddanie/regression/data.multimodal.train.500.csv' X, y = reader.read_data(path_r) output = len(np.unique(y)) # circles/ XOR CLASS # network = MultilayerPerceptron(100, 2, 0.05, 0.009, ProblemType.CLASSIFICATION, ErrorType.CLASSIC, True) # network.add_layer(ReluLayer(32, 2)) # network.add_layer(TanhLayer(32,32)) # network.add_layer(TanhLayer(output, 32)) # REG network = MultilayerPerceptron(30, 2, 0.05, 0.009, ProblemType.REGRESSION, ErrorType.CLASSIC, True) network.add_layer(TanhLayer(64, 1)) network.add_layer(SigmoidLayer(64, 64)) network.add_layer(TanhLayer(64, 64)) network.add_layer(SigmoidLayer(1, 64)) vs.plot_network(network) network.fit(X, y) pred = network.pred_for_show(X) # network.predict(X[0])5 accuracy = np.sum(y == pred) / len(y) * 100 # print(y) # print(pred) print(accuracy) # print(np.mean(y-pred)) plt.plot(X, y, 'bo', X, pred, 'ro') plt.show()
def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor): # Scan function cannot use compiled function. input_ = InputLayer(input_shape, x_curr) conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1a.params) rect1a_ = LeakyReLU(conv1a_) conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3), params=conv1b.params) rect1_ = LeakyReLU(conv1b_) pool1_ = PoolLayer(rect1_) conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3), params=conv2a.params) rect2a_ = LeakyReLU(conv2a_) conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3), params=conv2b.params) rect2_ = LeakyReLU(conv2b_) conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1), params=conv2c.params) res2_ = AddLayer(conv2c_, rect2_) pool2_ = PoolLayer(res2_) conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3), params=conv3a.params) rect3a_ = LeakyReLU(conv3a_) conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3), params=conv3b.params) rect3_ = LeakyReLU(conv3b_) conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1), params=conv3c.params) res3_ = AddLayer(conv3c_, rect3_) pool3_ = PoolLayer(res3_) conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3), params=conv4a.params) rect4a_ = LeakyReLU(conv4a_) conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3), params=conv4b.params) rect4_ = LeakyReLU(conv4b_) pool4_ = PoolLayer(rect4_) conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3), params=conv5a.params) rect5a_ = LeakyReLU(conv5a_) conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3), params=conv5b.params) rect5_ = LeakyReLU(conv5b_) conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1), params=conv5c.params) res5_ = AddLayer(conv5c_, rect5_) pool5_ = PoolLayer(res5_) conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3), params=conv6a.params) rect6a_ = LeakyReLU(conv6a_) conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3), params=conv6b.params) rect6_ = LeakyReLU(conv6b_) res6_ = AddLayer(pool5_, rect6_) pool6_ = PoolLayer(res6_) flat6_ = FlattenLayer(pool6_) fc7_ = TensorProductLayer(flat6_, n_fc_filters[0], params=fc7.params) rect7_ = LeakyReLU(fc7_) prev_s_ = InputLayer(s_shape_1d, prev_s_tensor) #print(self.prev_s_._output_shape) t_x_s_update_ = FCConv1DLayer(prev_s_, rect7_, n_fc_filters[0], params=self.t_x_s_update.params, isTrainable=True) t_x_s_reset_ = FCConv1DLayer(prev_s_, rect7_, n_fc_filters[0], params=self.t_x_s_reset.params, isTrainable=True) update_gate_ = SigmoidLayer(t_x_s_update_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_) t_x_rs_ = FCConv1DLayer(rs_, rect7_, n_fc_filters[0], params=self.t_x_rs.params, isTrainable=True) tanh_t_x_rs_ = TanhLayer(t_x_rs_) gru_out_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_.output, update_gate_.output
if __name__ == '__main__': from mnist import MNIST # Load MNIST dataset mndata = MNIST('./mnist') train_img, train_label = mndata.load_training() train_img = np.array(train_img, dtype=float) / 255.0 train_label = np.array(train_label, dtype=float) # Input vector (Layer 0) n_output_0 = len(train_img[0]) # Middle layer (Layer 1) n_output_1 = 200 layer1 = TanhLayer(n_output_1, n_output_0) # Output layer (Layer 2) n_output_2 = 10 layer2 = TanhLayer(n_output_2, n_output_1) # FP, BP and learning epsilon = 0.15 n_training_data = 1000 se_history = [] y1_history = [] y2_history = [] W1_history = [] W2_history = [] cpr_history = [] for loop in range(100):
if __name__ == '__main__': from mnist import MNIST # Load MNIST dataset mndata = MNIST('./mnist') train_img, train_label = mndata.load_training() train_img = np.array(train_img, dtype=float)/255.0 train_label = np.array(train_label, dtype=float) # Input vector (Layer 0) n_output_0 = len(train_img[0]) # Middle layer (Layer 1) n_output_1 = 200 layer1 = TanhLayer(n_output_1, n_output_0) # Output layer (Layer 2) n_output_2 = 10 layer2 = TanhLayer(n_output_2, n_output_1) # FP, BP and learning epsilon = 0.15 n_training_data = 1000 se_history = [] y1_history = [] y2_history = [] W1_history = [] W2_history = [] cpr_history = [] for loop in range(100):
parser.add_argument('--exp_name', type=str, default='4d-1000n', help='name of set of experiments this one belongs to') parser.add_argument('--name', type=str, default='cross-val', help='name of this exact experiment') # Data arguments parser.add_argument('--n', type=int, default=5000, help='Number of datapoints') parser.add_argument('--nz', type=int, default=1, help='Number of latent samples per datapoint') parser.add_argument('--nu', type=int, default=1, help='ratio of noise to data samples in NCE') parser.add_argument('--load_data', dest='load_data', action='store_true', help='load 100d data generated in matlab') parser.add_argument('--no-load_data', dest='load_data', action='store_false') parser.set_defaults(load_data=False) # Model arguments parser.add_argument('--d', type=int, default=4, help='dimension of visibles for synthetic dataset') parser.add_argument('--num_layers', type=int, default=2, help='dimension of visibles for synthetic dataset') parser.add_argument('--hidden_dim', type=int, default=100, help='dimension of visibles for synthetic dataset') parser.add_argument('--activation_layer', type=object, default=TanhLayer(), help='dimension of visibles for synthetic dataset') # Latent NCE optimisation arguments parser.add_argument('--opt_method', type=str, default='SGD', help='optimisation method. L-BFGS-B and CG both seem to work') parser.add_argument('--maxiter', type=int, default=5, help='number of iterations performed by L-BFGS-B optimiser inside each M step of EM') parser.add_argument('--stop_threshold', type=float, default=0, help='Tolerance used as stopping criterion in EM loop') parser.add_argument('--max_num_epochs', type=int, default=50, help='Maximum number of loops through the dataset during training') parser.add_argument('--model_learn_rate', type=float, default=0.1, help='if opt_method=SGD, this is the learning rate used to train the model') parser.add_argument('--var_learn_rate', type=float, default=0.1, help='if opt_method=SGD, this is the learning rate used to train the variational dist') parser.add_argument('--batch_size', type=int, default=10, help='if opt_method=SGD, this is the size of a minibatch') parser.add_argument('--num_batch_per_em_step', type=int, default=1, help='if opt_method=SGD, this is the number of batches per EM step') parser.add_argument('--track_loss', dest='track_loss', action='store_true', help='track VNCE loss in E & M steps') parser.add_argument('--no-track_loss', dest='track_loss', action='store_false') parser.set_defaults(track_loss=True) # nce optimisation arguments
if mode: print("Creating network...") network = MultilayerPerceptron(30, 2, 0.05, 0.009, ProblemType.CLASSIFICATION, ErrorType.MSE, True) network = MultilayerPerceptron(16, 2, 0.09, 0.009, ProblemType.CLASSIFICATION, ErrorType.MSE, True) input_size = len(images[0]) print("Adding layers...") network.add_layer(TanhLayer(32, input_size)) network.add_layer(SigmoidLayer(32, 32)) network.add_layer(SigmoidLayer(10, 32)) print("Learning...") network.fit(test_images, test_labels) ser.save_to_file("a_30.p", network) else: print("Reading network from file...") network = ser.read_from_file("a.p") print("Classification...") pred = network.pred_for_show(images) pred_values = [v[0] for v in pred] counter = 0 for i in range(len(pred_values)):