def __init__(self, number_of_vectors, number_of_input_elements, number_of_output_elements, number_of_neurons, number_of_hidden_layers, problem_type, function_types, seed): self.number_of_vectors = number_of_vectors self.number_of_input_elements = number_of_input_elements self.number_of_output_elements = number_of_output_elements self.number_of_neurons = number_of_neurons self.number_of_hidden_layers = number_of_hidden_layers self.function_types = function_types self.problem_type = problem_type # Генерация входных и выходных данных при задаче классификации if problem_type == 'classification': self.input_layer = lrs.InputLayer(number_of_vectors, number_of_input_elements, seed) lrs.InputLayer.generate_classification(self.input_layer) self.output_layer = lrs.OutputLayer(number_of_vectors, number_of_output_elements) lrs.OutputLayer.generate_classification(self.output_layer, self.input_layer) # Генерация входных и выходных данных при задаче регрессии else: self.input_layer = lrs.InputLayer(number_of_vectors, number_of_input_elements, seed) lrs.InputLayer.generate_regression(self.input_layer) self.output_layer = lrs.OutputLayer(number_of_vectors, number_of_output_elements) lrs.OutputLayer.generate_regression(self.output_layer, self.input_layer) # Список всех слоев, включая слои входных, выходных данных и слои функций активации self.layers = list() self.layers.append(self.input_layer) for i in range(number_of_hidden_layers): self.layers.append( lrs.HiddenLayer(number_of_vectors, number_of_input_elements, number_of_neurons, i)) self.layers.append( lrs.ActivationFunction(number_of_vectors, number_of_input_elements, number_of_neurons, function_types[i])) # Выходной слой self.layers.append(self.output_layer) # Функция потерь self.layers.append( lrs.ActivationFunction(number_of_vectors, number_of_input_elements, number_of_neurons, function_types[-1])) # Веса выходного слоя генерируются здесь lrs.OutputLayer.generate_weights(self.output_layer, self.layers[-4].number_of_neurons)
def _build_model_components(self): """ Instantiates model components. """ # encoder & decoder first (to know the decoder depth) self.encoder = encoder.get_encoder(self.config.config_encoder) self.decoder = decoder.get_decoder(self.config.config_decoder) # source & target embeddings embed_weight_source, embed_weight_target, out_weight_target = self._get_embed_weights( ) assert isinstance(self.config.config_embed_source, encoder.EmbeddingConfig) assert isinstance(self.config.config_embed_target, encoder.EmbeddingConfig) self.embedding_source = encoder.Embedding( self.config.config_embed_source, prefix=C.SOURCE_EMBEDDING_PREFIX, embed_weight=embed_weight_source) self.embedding_target = encoder.Embedding( self.config.config_embed_target, prefix=C.TARGET_EMBEDDING_PREFIX, embed_weight=embed_weight_target) self.embedding_output = encoder.Embedding(self.config.config_embed_output, prefix=C.OUTPUT_EMBEDDING_PREFIX, embed_weight=out_weight_target) \ if self.config.config_embed_output is not None else None # output layer self.output_layer = layers.OutputLayer( hidden_size=self.decoder.get_num_hidden(), vocab_size=self.config.output_layer_size, weight=out_weight_target, weight_normalization=self.config.weight_normalization) self._is_built = True
def test_mnist(): # Load the dataset f = gzip.open('data/mnist.pkl.gz', 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() test_set_x, test_set_y = test_set valid_set_x, valid_set_y = valid_set train_set_x, train_set_y = train_set test_set_x = np.asmatrix(test_set_x, dtype=theano.config.floatX) train_set_x = np.asmatrix(train_set_x, dtype=theano.config.floatX) #test_set_y_vect = [[int(b) for b in list("{0:010b}".format(1 << num))[::-1]] for num in test_set_y] train_set_y_vect = np.asmatrix( [[int(b) for b in list("{0:010b}".format(1 << num))[::-1]] for num in train_set_y], dtype=theano.config.floatX) #valid_set_y_vect = [[int(b) for b in list("{0:010b}".format(1 << num))[::-1]] for num in valid_set_y] batch_size = 500 # size of the minibatch # accessing the third minibatch of the training set import csv alphas = [0.001 * 3**i for i in range(10)] with open('losses.csv', 'wb') as f: writer = csv.writer(f) for alpha in alphas: layer1 = layers.FlatInputLayer(batch_size, test_set_x.shape[1], ranges=np.asarray( [[0, 255]], dtype=theano.config.floatX)) layer2 = layers.DenseLayer(layer1, 500, 0.1, 0, layers.sigmoid) layer3 = layers.DenseLayer(layer2, 10, 0.1, 0, layers.sigmoid) layer4 = layers.OutputLayer(layer3) mlp = NN([layer1, layer2, layer3, layer4], learning_rate=alpha, L2_reg=1) train_losses = mlp.train_model_batch(train_set_x, train_set_y_vect, epochs=50) print(alpha) print(train_losses) writer.writerow(train_losses) probabilities = mlp.fprop(test_set_x) predicted_labels = np.argmax(probabilities, 1) miss = sum( [y1 == y2 for y1, y2 in zip(predicted_labels, test_set_y)]) print(float(miss) / len(predicted_labels))
def __init__(self, args, input_channels): super(ImageBreastModel, self).__init__() self.args = args self.four_view_model = FourViewModel(input_channels, args) self.fc1_LMLO = nn.Linear(256, 256) # in_feature(256), out_feature(256) self.fc1_RMLO = nn.Linear(256, 256) self.fc1_LCC = nn.Linear(256, 256) self.fc1_RCC = nn.Linear(256, 256) self.output_layer_lcc = layers.OutputLayer( 256, (4, 2)) # in_feature(256), out_shape(4, 2) self.output_layer_rcc = layers.OutputLayer(256, (4, 2)) self.output_layer_lmlo = layers.OutputLayer(256, (4, 2)) self.output_layer_rmlo = layers.OutputLayer(256, (4, 2)) self.all_views_avg_pool = layers.AllViewsAvgPool() self.all_views_gaussian_noise_layer = layers.AllViewsGaussianNoise( 0.01)
def __init__(self, data_dim, target_dim, hidden_dims): self._hidden_layers = [] self._output_layer = None previous_dim = data_dim for hidden_dim in hidden_dims: self._hidden_layers.append( layers.HiddenLayer(previous_dim, hidden_dim)) previous_dim = hidden_dim self._output_layer = layers.OutputLayer(previous_dim, target_dim)
def _init_value_network(self, input_dims, output_dims, minibatch_size=32): """ A subclass may override this if a different sort of network is desired. """ layer1 = layers.FlatInputLayer(minibatch_size, input_dims) layer2 = layers.DenseLayer(layer1, 15, 0.1, 0, layers.sigmoid) layer3 = layers.DenseLayer(layer2, output_dims, 0.1, 0, layers.sigmoid) layer4 = layers.OutputLayer(layer3) return nn.NN([layer1, layer2, layer3, layer4], batch_size=minibatch_size, learning_rate=self.value_learning_rate)
def _init_value_network(self, input_dims, output_dims, minibatch_size=32): """ A subclass may override this if a different sort of network is desired. """ scale_factor = 2 layer1 = layers.FlatInputLayer( minibatch_size, input_dims, np.asarray(self.observation_ranges, dtype='float32'), scale_factor) layer2 = layers.DenseLayer(layer1, 15, 0.1, 0, layers.tanh) layer3 = layers.DenseLayer(layer2, output_dims, 0.1, 0, layers.identity) layer4 = layers.OutputLayer(layer3) return nn.NN([layer1, layer2, layer3, layer4], batch_size=minibatch_size, learning_rate=self.value_learning_rate)
def setup_architecture(self): expected_output = [] feature_vector = [] count = 0 for f in os.listdir(self.training_folder): if count > 0: break if '.png' in f: feature_vector = feature.get_features('train/'+f) f = f.split('.') f = f[0] theline = linecache.getline('trainLabels.csv', int(f) + 1) theline = theline.strip(' ') theline = theline.strip('\n') theline = theline.split(',') theline = theline[1] expected_output = self.get_expected_output(theline) count += 1 self.input_layer = layers.InputLayer(len(feature_vector)) self.get_hidden_layers(self.hidden_layer_sizes) self.output_layer = layers.OutputLayer(len(expected_output)) if len(self.hidden_layer_sizes) == 0: self.input_layer.set_architecture(self.output_layer) else: self.input_layer.set_architecture(self.hidden_layers[0]) for i, hidden_layer in enumerate(self.hidden_layers): prevLayer = None nextLayer = None if i == 0: prevLayer = self.input_layer else: prevLayer = self.hidden_layers[i-1] if i == len(self.hidden_layers) - 1: nextLayer = self.output_layer else: nextLayer = self.hidden_layers[i+1] hidden_layer.set_architecture(prevLayer, nextLayer) self.output_layer.set_architecture(self.hidden_layers[-1])
def __init__(self, num_actions, phi_length, width, height, discount=.9, learning_rate=.01, batch_size=32, approximator='none'): self._batch_size = batch_size self._num_input_features = phi_length self._phi_length = phi_length self._img_width = width self._img_height = height self._discount = discount self.num_actions = num_actions self.learning_rate = learning_rate self.scale_input_by = 255.0 print "neural net initialization, lr is: ", self.learning_rate, approximator # CONSTRUCT THE LAYERS self.q_layers = [] self.q_layers.append( layers.Input2DLayer(self._batch_size, self._num_input_features, self._img_height, self._img_width, self.scale_input_by)) if approximator == 'cuda_conv': self.q_layers.append( cc_layers.ShuffleBC01ToC01BLayer(self.q_layers[-1])) self.q_layers.append( cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1], n_filters=16, filter_size=8, stride=4, weights_std=.01, init_bias_value=0.1)) self.q_layers.append( cc_layers.CudaConvnetConv2DLayer(self.q_layers[-1], n_filters=32, filter_size=4, stride=2, weights_std=.01, init_bias_value=0.1)) self.q_layers.append( cc_layers.ShuffleC01BToBC01Layer(self.q_layers[-1])) elif approximator == 'conv': self.q_layers.append( layers.StridedConv2DLayer(self.q_layers[-1], n_filters=16, filter_width=8, filter_height=8, stride_x=4, stride_y=4, weights_std=.01, init_bias_value=0.01)) self.q_layers.append( layers.StridedConv2DLayer(self.q_layers[-1], n_filters=32, filter_width=4, filter_height=4, stride_x=2, stride_y=2, weights_std=.01, init_bias_value=0.01)) if approximator == 'cuda_conv' or approximator == 'conv': self.q_layers.append( layers.DenseLayer(self.q_layers[-1], n_outputs=256, weights_std=0.01, init_bias_value=0.1, dropout=0, nonlinearity=layers.rectify)) self.q_layers.append( layers.DenseLayer(self.q_layers[-1], n_outputs=num_actions, weights_std=0.01, init_bias_value=0.1, dropout=0, nonlinearity=layers.identity)) if approximator == 'none': self.q_layers.append(\ layers.DenseLayerNoBias(self.q_layers[-1], n_outputs=num_actions, weights_std=0.00, dropout=0, nonlinearity=layers.identity)) self.q_layers.append(layers.OutputLayer(self.q_layers[-1])) for i in range(len(self.q_layers) - 1): print self.q_layers[i].get_output_shape() # Now create a network (using the same weights) # for next state q values self.next_layers = copy_layers(self.q_layers) self.next_layers[0] = layers.Input2DLayer(self._batch_size, self._num_input_features, self._img_width, self._img_height, self.scale_input_by) self.next_layers[1].input_layer = self.next_layers[0] self.rewards = T.col() self.actions = T.icol() # Build the loss function ... print "building loss funtion" q_vals = self.q_layers[-1].predictions() next_q_vals = self.next_layers[-1].predictions() next_maxes = T.max(next_q_vals, axis=1, keepdims=True) target = self.rewards + discount * next_maxes target = theano.gradient.consider_constant(target) diff = target - q_vals # Zero out all entries for actions that were not chosen... mask = build_mask(T.zeros_like(diff), self.actions, 1.0) diff_masked = diff * mask error = T.mean(diff_masked**2) self._loss = error * diff_masked.shape[1] # self._parameters = layers.all_parameters(self.q_layers[-1]) self._idx = T.lscalar('idx') # CREATE VARIABLES FOR INPUT AND OUTPUT self.states_shared = theano.shared( np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) self.states_shared_next = theano.shared( np.zeros((1, 1, 1, 1), dtype=theano.config.floatX)) self.rewards_shared = theano.shared(np.zeros( (1, 1), dtype=theano.config.floatX), broadcastable=(False, True)) self.actions_shared = theano.shared(np.zeros((1, 1), dtype='int32'), broadcastable=(False, True)) self._givens = \ {self.q_layers[0].input_var: self.states_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :, :, :], self.next_layers[0].input_var: self.states_shared_next[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :, :, :], self.rewards: self.rewards_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :], self.actions: self.actions_shared[self._idx*self._batch_size: (self._idx+1)*self._batch_size, :] } self._updates = layers.gen_updates_rmsprop_and_nesterov_momentum(\ self._loss, self._parameters, learning_rate=self.learning_rate, rho=0.9, momentum=0.9, epsilon=1e-6) self._train = theano.function([self._idx], self._loss, givens=self._givens, updates=self._updates) self._compute_loss = theano.function([self._idx], self._loss, givens=self._givens) self._compute_q_vals = \ theano.function([self.q_layers[0].input_var], self.q_layers[-1].predictions(), on_unused_input='ignore')
x = sparse.csr_matrix( name='x', dtype='int64') # the data is presented as rasterized images # y = sparse.csr_matrix(name='y', dtype='int8') # the labels are presented as 1D vector of multi y = T.bmatrix('y') # the labels are presented as 1D vector of multi print '... building the computional Graph' rng = numpy.random.RandomState(23455) if MLP: layer1 = layers.HiddenLayer(rng, input=x, n_in=in_trn_matrix.shape[1], n_out=num_of_hidden_units, sparse=True) out_layer = layers.OutputLayer(input=layer1.output, n_in=num_of_hidden_units, n_out=numtype) params = layer1.params + out_layer.params #weights = T.concatenate[layer1.W, out_layer.W] else: out_layer = layers.OutputLayer(input=x, n_in=in_trn_matrix.shape[1], n_out=numtype, sparse=True) params = out_layer.params #weights = [out_layer.W] scorelayer = layers.SigmoidLoss(input=out_layer.score_y_given_x, n_in=numtype, n_out=numtype) cost = scorelayer.cross_entropy_loss(y)
layer2 = layers.HiddenLayer(rng, input=layer2_input, n_in=ishape[0] * ishape[1], n_out=num_of_hidden_units, activation=T.tanh) outlayers = [] cost = 0. out_errors = [] total_errs = 0 params = layer2.params logger.info('n_in in each softmax: %d and n_out: %d', num_of_hidden_units, 2) for i in range(n_targets): oneOutLayer = layers.OutputLayer(input=layer2.output, n_in=num_of_hidden_units, n_out=2) # oneOutLayer = MyLogisticRegression(input=layer1.output, n_in=num_of_hidden_units, n_out=2) onelogistic = layers.SoftmaxLoss(input=oneOutLayer.score_y_given_x, n_in=2, n_out=2) params += oneOutLayer.params outlayers.append(oneOutLayer) total_errs += onelogistic.errors(y[:, i]) cost += onelogistic.negative_log_likelihood(y[:, i]) # total_errors total_errs /= n_targets # the cost we minimize during training is the NLL of the model
def test(self, number_of_vectors): # Генерация входных и выходных данных при задаче классификации if self.problem_type == 'classification': test_input_layer = lrs.InputLayer(number_of_vectors, self.number_of_input_elements, 2) lrs.InputLayer.generate_classification(test_input_layer) test_output_layer = lrs.OutputLayer(number_of_vectors, self.number_of_output_elements) lrs.OutputLayer.generate_classification(test_output_layer, test_input_layer) # Генерация входных и выходных данных при задаче регрессии else: test_input_layer = lrs.InputLayer(number_of_vectors, self.number_of_input_elements, 2) lrs.InputLayer.generate_regression(test_input_layer) test_output_layer = lrs.OutputLayer(number_of_vectors, self.number_of_output_elements) lrs.OutputLayer.generate_regression(test_output_layer, test_input_layer) test_output_layer.weights = self.output_layer.weights test_layers = [test_input_layer] for i in range(len(self.layers) - 3): test_layers.append(self.layers[i + 1]) test_layers[i + 1].number_of_vectors = number_of_vectors test_layers.append(test_output_layer) test_layers.append( lrs.ActivationFunction(number_of_vectors, self.number_of_input_elements, self.number_of_neurons, self.function_types[-1])) prediction = self.forward(test_layers) classes = test_layers[-1].array print('\nPredicted array (test): ') print(prediction) if self.function_types[-1] == 'l2': squared_error = np.power( (test_layers[-2].array - test_layers[-2].predicted_array), 2).sum() mean_squared_error = squared_error / ( number_of_vectors * self.number_of_input_elements) print('\nMean squared error: ') print(mean_squared_error) else: correct = 0.0 for i in range(number_of_vectors): prediction_rounded = np.round(classes, 0) if prediction_rounded[i][0] == test_layers[-2].array[i][0]: correct += 1 accuracy = correct * 100.0 / number_of_vectors print('\nAccuracy: %.2f%%' % accuracy)