def test_fully_connected(): data = torch.tensor([1.0, 2.0, 3.0, 4.0]) layer = FullyConnected(4, 2) layer.weights = torch.tensor([ [1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [-1.0, -40.0], ]) layer.bias = torch.tensor([1.0, 2.0]) out = layer(data[None, :]) expected = torch.tensor([[19.0, 0.0]]) assert torch.allclose(out, expected)
def _build_encoder(self): """ CNN encoder Conv1 -> ReLU -> MaxPool1 -> Conv2 -> ReLU -> MaxPool2 -> Flatten -> FC1 -> ReLU -> FC2 """ self.encoder = OrderedDict() self.encoder["Conv1"] = Conv2D( act_fn=ReLU(), init=self.init, pad=self.enc_conv1_pad, optimizer=self.optimizer, out_ch=self.enc_conv1_out_ch, stride=self.enc_conv1_stride, kernel_shape=self.enc_conv1_kernel_shape, ) self.encoder["Pool1"] = Pool2D( mode="max", optimizer=self.optimizer, stride=self.enc_pool1_stride, kernel_shape=self.enc_pool1_kernel_shape, ) self.encoder["Conv2"] = Conv2D( act_fn=ReLU(), init=self.init, pad=self.enc_conv2_pad, optimizer=self.optimizer, out_ch=self.enc_conv2_out_ch, stride=self.enc_conv2_stride, kernel_shape=self.enc_conv2_kernel_shape, ) self.encoder["Pool2"] = Pool2D( mode="max", optimizer=self.optimizer, stride=self.enc_pool2_stride, kernel_shape=self.enc_pool2_kernel_shape, ) self.encoder["Flatten3"] = Flatten(optimizer=self.optimizer) self.encoder["FC4"] = FullyConnected( n_out=self.latent_dim, act_fn=ReLU(), optimizer=self.optimizer ) self.encoder["FC5"] = FullyConnected( n_out=self.T * 2, optimizer=self.optimizer, act_fn=Affine(slope=1, intercept=0), init=self.init, )
def __init__(self, embeddings_name, word_layers, word_pool, sent_layers, sent_pool, fc_layers, trainable=False, vocab_size=None, num_features=None, numpy_embeddings=False): super(HAN, self).__init__(embeddings_name, trainable=trainable, vocab_size=vocab_size, num_features=num_features, numpy_embeddings=numpy_embeddings) self.word_layers = nn.ModuleList( [build_layer(**word_layer) for word_layer in word_layers]) self.word_pool = build_layer(**word_pool) self.sent_layers = nn.ModuleList( [build_layer(**sent_layer) for sent_layer in sent_layers]) self.sent_pool = build_layer(**sent_pool) self.fc_layers = nn.ModuleList( [FullyConnected(**fc_layer) for fc_layer in fc_layers]) self.min_len = 1 self.default_sentence = np.zeros((self.min_len, self.num_features))
def _build_decoder(self): """ MLP decoder FC1 -> ReLU -> FC2 -> Sigmoid """ self.decoder = OrderedDict() self.decoder["FC1"] = FullyConnected( act_fn=ReLU(), init=self.init, n_out=self.latent_dim, optimizer=self.optimizer, ) # NB. `n_out` is dependent on the dimensionality of X. we use a # placeholder for now, and update it within the `forward` method self.decoder["FC2"] = FullyConnected( n_out=None, act_fn=Sigmoid(), optimizer=self.optimizer, init=self.init )
def _build_generator(self): """ FC1 -> ReLU -> FC2 -> ReLU -> FC3 -> ReLU -> FC4 """ self.generator = OrderedDict() self.generator["FC1"] = FullyConnected( self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, init=self.init ) self.generator["FC2"] = FullyConnected( self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, init=self.init ) self.generator["FC3"] = FullyConnected( self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, init=self.init ) self.generator["FC4"] = FullyConnected( self.n_feats, act_fn="Affine(slope=1, intercept=0)", optimizer=self.optimizer, init=self.init, )
def test_initialisation(): torch.manual_seed(100) layer = FullyConnected(4, 4) expected = torch.tensor([ [0.0897, 0.9591, 0.3983, -0.0735], [-0.2528, 0.2770, -0.4809, 0.1704], [0.3322, 0.8787, 0.3821, -0.8099], [-1.0318, -1.1512, 0.2711, -0.1215], ]) assert torch.allclose(layer.weights, expected, atol=1e-4)
def _build_critic(self): """ FC1 -> ReLU -> FC2 -> ReLU -> FC3 -> ReLU -> FC4 """ self.critic = OrderedDict() self.critic["FC1"] = FullyConnected( self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, init=self.init ) self.critic["FC2"] = FullyConnected( self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, init=self.init ) self.critic["FC3"] = FullyConnected( self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, init=self.init ) self.critic["FC4"] = FullyConnected( 1, act_fn="Affine(slope=1, intercept=0)", optimizer=self.optimizer, init=self.init, )
def inner_model(trainable, x): layers_list = [ Reshape([-1, 28, 28, 1]), Conv(32), BatchNormalization(), Relu(), MaxPool(), Conv(64), BatchNormalization(), Relu(), MaxPool(), Reshape([-1, 7 * 7 * 64]), FullyConnected(1024), Relu(), FullyConnected(10) ] variable_saver = VariableSaver() signal = x print('shape', signal.get_shape()) for idx, layer in enumerate(layers_list): signal = layer.contribute(signal, idx, trainable, variable_saver.save_variable) print('shape', signal.get_shape()) return signal, variable_saver.var_list
def __init__(self, sizes, batch_size, epoch_num, learning_rate, use_trained_params=False, filename=None): self.num_layers = len(sizes) self.sizes = sizes self.batch_size = batch_size self.epoch_num = epoch_num self.learning_rate = learning_rate if use_trained_params: path = os.path.dirname(os.path.abspath(__file__)) loaded_params = np.load(os.path.join(path, filename)) self.W1 = loaded_params['W1'] self.b1 = loaded_params['b1'] self.W2 = loaded_params['W2'] self.b2 = loaded_params['b2'] else: np.random.seed(12) self.W1 = np.sqrt(1 / sizes[0]) * np.random.randn( sizes[0], sizes[1]) #(784,50) self.b1 = np.sqrt(1 / sizes[0]) * np.random.randn(sizes[1]) self.W2 = np.sqrt(1 / sizes[1]) * np.random.randn( sizes[1], sizes[2]) #(50,10) self.b2 = np.sqrt(1 / sizes[1]) * np.random.randn(sizes[2]) # layers of network self.layers = {} self.layers['FullyConnected1'] = FullyConnected(self.W1, self.b1) self.layers['Activation'] = Sigmoid() self.layers['FullyConnected2'] = FullyConnected(self.W2, self.b2) self.lastLayer = SoftmaxLoss()
def __init__(self, pattern, input_num, range_num=None): if range_num is not None and input_num - 1 < range_num: raise ValueError('range_num is over input_num - 1') if range_num is None: range_num = input_num - 1 input_size = pattern.shape[1] * range_num * (2 * input_num - range_num - 1) + 1 sd = SelectiveDesensitization(pattern, input_num, range_num) fc = FullyConnected(input_size, pattern.shape[1]) activation_func = SGN() loss_func = PotentialLoss() self.pattern = pattern self.layers = [sd, fc, activation_func] self.loss_func = loss_func
def test_optimiser_step(): layers = [FullyConnected(4, 2)] model = Model(layers) model.layers[0].weights = torch.tensor( [[9.0, 10.0], [11.0, 12.0], [13.0, 14.0], [15.0, 16.0]], requires_grad=True) optim = RMSProp(model.parameters(), lr=2) data = torch.tensor([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]) out = model(data) total = torch.sum(out) total.backward() optim.step() expected = torch.tensor([[2.6754, 3.6754], [4.6754, 5.6754], [6.6754, 7.6754], [8.6754, 9.6754]]) assert torch.allclose(expected, model.layers[0].weights, atol=1e-4)
def make_cnn(X_dim, num_class): conv = Conv(X_dim, n_filter=16, h_filter=5, w_filter=5, stride=1, padding=2) relu = ReLU() maxpool = Maxpool(conv.out_dim, size=2, stride=2) conv2 = Conv(maxpool.out_dim, n_filter=20, h_filter=5, w_filter=5, stride=1, padding=2) relu2 = ReLU() maxpool2 = Maxpool(conv2.out_dim, size=2, stride=2) flat = Flatten() fc = FullyConnected(np.prod(maxpool2.out_dim), num_class) return [conv, relu, maxpool, conv2, relu2, maxpool2, flat, fc]
def __init__(self, batch_size, mc, kernel_type, classes): super(Net, self).__init__() self.batch_size = batch_size self.mc = mc self.kernel_type = kernel_type self.classes = classes # in_channels, out_channels, k_size = 3, stride = 1, padding = 0, batch_size = 8, mc = 10, group, F0 = True/local_prame=True # MNIST self.rff0 = Conv_RFF(1, 16, 3, 2, 1, self.batch_size, self.mc, self.kernel_type, 1, True) self.linear0 = Conv_Linear(16 * 2, 16, 3, 2, 1, self.batch_size, self.mc, 1, False) self.rff1 = Conv_RFF(16, 16, 3, 1, 1, self.batch_size, self.mc, self.kernel_type, 1, False) self.linear1 = Conv_Linear(16 * 2, 16, 3, 2, 1, self.batch_size, self.mc, 1, False) self.rff2 = Conv_RFF(16, 16, 3, 2, 1, self.batch_size, self.mc, self.kernel_type, 1, False) self.fully = FullyConnected(2 * 2 * 16 * 2, self.classes, self.batch_size, self.mc, False)
def _init_params(self): self._dv = {} # assume dim(keys) = dim(query) = dim(values) assert self.kqv_dim % self.n_heads == 0 self.latent_dim = self.kqv_dim // self.n_heads self.attention = DotProductAttention(scale=True, dropout_p=self.dropout_p) self.projections = { k: Dropout( FullyConnected( init=self.init, n_out=self.kqv_dim, optimizer=self.optimizer, act_fn="Affine(slope=1, intercept=0)", ), self.dropout_p, ) for k in ["Q", "K", "V", "O"] } self.is_initialized = True
def _deserialize(self, params): layers_attrs = ['layers'] if params['best_layers_']: layers_attrs.append('best_layers_') for layers_attr in layers_attrs: for i, layer_dict in enumerate(params[layers_attr]): if layer_dict['layer'] == 'activation': params[layers_attr][i] = Activation(**layer_dict) if layer_dict['layer'] == 'dropout': params[layers_attr][i] = Dropout(**layer_dict) if layer_dict['layer'] == 'fully_connected': fc = FullyConnected(**layer_dict) fc.W = np.asarray(layer_dict['W']) fc.b = np.asarray(layer_dict['b']) fc.dW = np.asarray(layer_dict['dW']) fc.db = np.asarray(layer_dict['db']) params[layers_attr][i] = fc return params
def __init__(self, sizes, batch_size, epoch_num, use_trained_params=False, filename=None, img_dim=(3, 32, 32), conv_param={ 'filter_num': 32, 'filter_size': 3, 'padding': 1, 'stride': 1 }, optimizer='Adam', activation='ReLU', use_dropout=True, dropout_p=0.2, use_bn=True): self.num_layers = len(sizes) self.sizes = sizes self.batch_size = batch_size self.epoch_num = epoch_num # self.learning_rate = learning_rate self.activation = activation self.use_dropout = use_dropout self.dropout_p = dropout_p self.use_bn = use_bn self.filter_num = conv_param['filter_num'] self.filter_size = conv_param['filter_size'] self.filter_padding = conv_param['padding'] self.filter_stride = conv_param['stride'] self.img_c = img_dim[0] self.img_wh = img_dim[1] self.conv_output_size = int( (img_dim[1] - self.filter_size + 2 * self.filter_padding) / self.filter_stride) + 1 self.pool_output_size = int(self.filter_num * (self.conv_output_size / 2) * (self.conv_output_size / 2)) self.opt = optimizer optimizers = { 'SGD': SGD, 'Momentum_SGD': Momentum_SGD, 'AdaGrad': AdaGrad, 'RMSProp': RMSProp, 'AdaDelta': AdaDelta, 'Adam': Adam } self.optimizer = optimizers[self.opt]() if use_trained_params: path = os.path.dirname(os.path.abspath(__file__)) loaded_params = np.load(os.path.join(path, filename)) self.W1 = loaded_params['W1'] self.b1 = loaded_params['b1'] self.W2 = loaded_params['W2'] self.b2 = loaded_params['b2'] self.W3 = loaded_params['W3'] self.b3 = loaded_params['b3'] self.gamma = loaded_params['gamma'] self.beta = loaded_params['beta'] if use_bn: self.running_mean = loaded_params['running_mean'] self.running_var = loaded_params['running_var'] else: np.random.seed(12) # Conv層重み self.W1 = np.sqrt(1 / sizes[0]) * np.random.randn( self.filter_num, img_dim[0], self.filter_size, self.filter_size) self.b1 = np.sqrt(1 / sizes[0]) * np.random.randn(self.filter_num) # BatchNorm層 self.gamma = np.ones(self.filter_num * self.conv_output_size * self.conv_output_size) self.beta = np.zeros(self.filter_num * self.conv_output_size * self.conv_output_size) # FullyConnected層重み(中間層) self.W2 = np.sqrt(1 / self.pool_output_size) * np.random.randn( self.pool_output_size, self.sizes[1]) #(pool,100) self.b2 = np.sqrt(1 / self.pool_output_size) * np.random.randn( self.sizes[1]) # Fullyconnected層重み(出力層) self.W3 = np.sqrt(1 / sizes[1]) * np.random.randn( self.sizes[1], self.sizes[2]) self.b3 = np.sqrt(1 / sizes[1]) * np.random.randn(self.sizes[2]) # layers of network activation_function = {'Sigmoid': Sigmoid, 'ReLU': ReLU} self.layers = {} self.layers['Conv'] = Conv2D(self.W1, self.b1, self.filter_stride, self.filter_padding) if self.use_bn: if use_trained_params: self.layers['BatchNorm'] = BatchNorm(self.gamma, self.beta,\ running_mean=self.running_mean,running_var=self.running_var) else: self.layers['BatchNorm'] = BatchNorm(self.gamma, self.beta) self.layers['Activation'] = activation_function[self.activation]() if self.use_dropout: self.layers['Dropout'] = Dropout(self.dropout_p) self.layers['Pool'] = MaxPool(pool_h=2, pool_w=2, stride=2) self.layers['FullyConnected1'] = FullyConnected(self.W2, self.b2) self.layers['Activation2'] = activation_function[self.activation]() self.layers['FullyConnected2'] = FullyConnected(self.W3, self.b3) self.lastLayer = SoftmaxLoss()
def __init__(self, sizes, batch_size, epoch_num, use_trained_params=False, filename=None, optimizer='SGD', activation='ReLU', use_dropout=True, dropout_p=0.2, use_bn=True): self.num_layers = len(sizes) self.sizes = sizes self.batch_size = batch_size self.epoch_num = epoch_num self.activation = activation self.use_dropout = use_dropout self.dropout_p = dropout_p self.use_bn = use_bn self.opt = optimizer optimizers = { 'SGD': SGD, 'Momentum_SGD': Momentum_SGD, 'AdaGrad': AdaGrad, 'RMSProp': RMSProp, 'AdaDelta': AdaDelta, 'Adam': Adam } self.optimizer = optimizers[self.opt]() if use_trained_params: path = os.path.dirname(os.path.abspath(__file__)) loaded_params = np.load(os.path.join(path, filename)) self.W1 = loaded_params['W1'] self.b1 = loaded_params['b1'] self.W2 = loaded_params['W2'] self.b2 = loaded_params['b2'] self.gamma = loaded_params['gamma'] self.beta = loaded_params['beta'] # Batch Normalizationを使う場合 if self.use_bn: self.running_mean = loaded_params['running_mean'] self.running_var = loaded_params['running_var'] else: np.random.seed(12) self.W1 = np.sqrt(1 / sizes[0]) * np.random.randn( sizes[0], sizes[1]) #(784,50) self.b1 = np.sqrt(1 / sizes[0]) * np.random.randn(sizes[1]) self.W2 = np.sqrt(1 / sizes[1]) * np.random.randn( sizes[1], sizes[2]) #(50,10) self.b2 = np.sqrt(1 / sizes[1]) * np.random.randn(sizes[2]) self.gamma = np.ones(self.W1.shape[1]) self.beta = np.zeros(self.W1.shape[1]) # layers of network activation_function = {'Sigmoid': Sigmoid, 'ReLU': ReLU} self.layers = {} self.layers['FullyConnected1'] = FullyConnected(self.W1, self.b1) if self.use_bn: if use_trained_params: self.layers['BatchNorm'] = BatchNorm( self.gamma, self.beta, running_mean=self.running_mean, running_var=self.running_var) else: self.layers['BatchNorm'] = BatchNorm(self.gamma, self.beta) self.layers['Activation'] = activation_function[self.activation]() if self.use_dropout: self.layers['Dropout'] = Dropout(self.dropout_p) self.layers['FullyConnected2'] = FullyConnected(self.W2, self.b2) self.lastLayer = SoftmaxLoss()
def main(args): parser = argparse.ArgumentParser( description="Trains a simple neural net on simple data" ) parser.add_argument( "--dataset", type=Dataset, choices=Dataset, default=Dataset.IRIS ) parser.add_argument("--plot-image-grid", action="store_true") parser.add_argument("--lr", default=1e-3, type=float) parser.add_argument("--epochs", default=100, type=int) parser.add_argument( "--quiet", action="store_true", help="Do not show any of the generated plots", ) parser.add_argument( "--savefig", action="store_true", help="Save the figures as pdfs" ) parsed_args, _ = parser.parse_known_args() epochs = parsed_args.epochs if parsed_args.dataset == Dataset.IRIS: train_data, test_data, train_targets, test_targets, classes = load_iris_data() elif parsed_args.dataset == Dataset.KMNIST: train_data, test_data, train_targets, test_targets, classes = load_kmnist_data() if parsed_args.plot_image_grid: make_image_grid(train_data, train_targets, savefig=parsed_args.savefig) plt.show() return features = train_data.size(1) no_classes = train_targets.max() + 1 if parsed_args.dataset == Dataset.IRIS: layers = [ FullyConnected(features, 32), FullyConnected(32, no_classes, nonlinearity="linear"), ] elif parsed_args.dataset == Dataset.KMNIST: layers = [ Conv2d(1, 8, 3), Conv2d(8, 8, 3), MaxPool2d(2), Conv2d(8, 16, 3), Conv2d(16, 16, 3), MaxPool2d(2), Flatten(start_dim=1, end_dim=-1), FullyConnected(256, no_classes, nonlinearity="linear"), ] model = Model(layers) optimiser = RMSProp(model.parameters(), parsed_args.lr) it = range(epochs) if parsed_args.dataset != Dataset.KMNIST: it = tqdm(it) epochs_loss = [] for epoch in it: epochs_loss.append( train(model, optimiser, train_data, train_targets, size=128, use_tqdm=True) ) plot_loss(epochs_loss, dataset=parsed_args.dataset, savefig=parsed_args.savefig) loss, accuracy, accuracy_per_class = test(model, test_data, test_targets) plot_bar( accuracy_per_class, classes, dataset=parsed_args.dataset, savefig=parsed_args.savefig, ) if not parsed_args.quiet: plt.show() print(f"Final Loss: {loss}, Final Accuracy: {accuracy}")
for row in data: inputs.append([int(i) for i in row[1:]]) inputs = np.array(inputs) with open('boolean_functions.csv') as f: data = csv.reader(f, delimiter=',', quotechar='|') all_targets = [] for row in data: all_targets.append([[int(i)] for i in row]) all_targets = np.array(all_targets) learning_rate = 0.02 batch_size = 1 updates = 10000 layers = [FullyConnected(4, 1, activation=ScaledTanh(), threshold=True)] for layer in layers: size = (layer.output_dim, layer.input_dim) layer.weights = np.random.uniform(low=-0.2, high=0.2, size=size) size = layer.output_dim layer.threshold = np.random.uniform(low=-1, high=1, size=size) network = StandardNetwork(layers) optimizer = SGD(layers, learning_rate) error_function = MSE() repeats = 10 is_linearly_separable = [] for targets in all_targets:
def _fit(self, X): if not self._initialized: layer = FullyConnected(self.n_hidden, bias=0., random_seed=self.random_seed) layer.setup_weights(X.shape) self.W = layer.W self.vb = np.zeros(X.shape[1]) self.hb = layer.b self._dW = np.zeros_like(self.W) self._dvb = np.zeros_like(self.vb) self._dhb = np.zeros_like(self.hb) self._rng = RNG(self.random_seed) self._rng.reseed() timer = Stopwatch(verbose=False).start() for _ in xrange(self.n_epochs): self.epoch += 1 if self.verbose: print_inline('Epoch {0:>{1}}/{2} '.format( self.epoch, len(str(self.n_epochs)), self.n_epochs)) if isinstance(self.learning_rate, str): S, F = map(float, self.learning_rate.split('->')) self._learning_rate = S + (F - S) * ( 1. - np.exp(-(self.epoch - 1.) / 8.)) / ( 1. - np.exp(-(self.n_epochs - 1.) / 8.)) else: self._learning_rate = self.learning_rate if isinstance(self.momentum, str): S, F = map(float, self.momentum.split('->')) self._momentum = S + (F - S) * ( 1. - np.exp(-(self.epoch - 1) / 4.)) / ( 1. - np.exp(-(self.n_epochs - 1) / 4.)) else: self._momentum = self.momentum mean_recon = self.train_epoch(X) if mean_recon < self.best_recon: self.best_recon = mean_recon self.best_epoch = self.epoch self.best_W = self.W.copy() self.best_vb = self.vb.copy() self.best_hb = self.hb.copy() self._early_stopping = self.early_stopping msg = 'elapsed: {0} sec'.format( width_format(timer.elapsed(), default_width=5, max_precision=2)) msg += ' - recon. mse: {0}'.format( width_format(mean_recon, default_width=6, max_precision=4)) msg += ' - best r-mse: {0}'.format( width_format(self.best_recon, default_width=6, max_precision=4)) if self.early_stopping: msg += ' {0}*'.format(self._early_stopping) if self.verbose: print msg if self._early_stopping == 0: return if self.early_stopping: self._early_stopping -= 1
data.val_size = val_inputs.shape[0] plot_updates = True plot_decision_boundary = True plot_parameter_changes = True save_parameters = False learning_rate = 0.01 batch_size = 1 updates = 2000000 M1 = 25 M2 = 10 layers = [ FullyConnected(2, M1, activation=Tanh(), threshold=True), FullyConnected(M1, M2, activation=Tanh(), threshold=True), FullyConnected(M2, 1, activation=Tanh(), threshold=True) ] if plot_parameter_changes: old_layers = deepcopy(layers) network = StandardNetwork(layers) optimizer = SGD(layers, learning_rate) error_function = MSE() errors = {'training': [], 'validation': []} for i in range(updates): input, target = data.sample(batch_size)
import config from initializers import xavier_uniform_init, he_uniform_init, he_normal_init from layers import FullyConnected from layers import LeakyReLU, ReLU, Sigmoid, Tanh, SoftmaxCrossEntropy from layers import BatchNorm, Dropout ''' Model and its output activation and loss function Currently the output gate assumes Softmax + CrossEntropy ''' softmax_crossentropy = SoftmaxCrossEntropy() # CURRENTLY THE BEST MODEL he_and_relu = [ FullyConnected(config.INPUT_DIM, 192, he_uniform_init, use_weight_norm=True), BatchNorm(input_dim=192), ReLU(), Dropout(0.3), FullyConnected(192, 96, he_uniform_init, use_weight_norm=True), BatchNorm(input_dim=96), ReLU(), Dropout(0.3), FullyConnected(96, 48, he_uniform_init, use_weight_norm=True), BatchNorm(input_dim=48), ReLU(), Dropout(0.3), FullyConnected(48, config.NUM_CLASSES, he_uniform_init,
X_train, y_train = mnist_reader.load_mnist('data/fashion', kind='train') X_test, y_test = mnist_reader.load_mnist('data/fashion', kind='t10k') X_train = X_train.astype(np.float32) / 255 X_test = X_test.astype(np.float32) / 255 r = np.random.permutation(len(y_train)) X_train = X_train[r] y_train = y_train[r] X_dev = X_train[:12000] y_dev = y_train[:12000] X_train = X_train[10000:] y_train = y_train[10000:] LOG.info("finish data preprocessing.") FCs = [ FullyConnected(784, 256, opts.batch_size, relu()), FullyConnected(256, 128, opts.batch_size, relu()), FullyConnected(128, 64, opts.batch_size, relu()), FullyConnected(64, 10, opts.batch_size, softmax()) ] LOG.info("finish initialization.") n_samples = len(y_train) order = np.arange(n_samples) best_precision, test_precision = 0, 0 for epochs in range(0, opts.epochs): np.random.shuffle(order) cost = 0. for batch_start in range(0, n_samples, opts.batch_size): batch_end = batch_start + opts.batch_size if batch_start \