def build_model1(self): # LookupTable to Embedding src_embedding_layer = EmbeddingLayer(input_dim=self.n_src_vocab, output_dim=self.src_embed_dim, name='src_embedding') tgt_embedding_layer = EmbeddingLayer(input_dim=self.n_tgt_vocab, output_dim=self.tgt_embed_dim, name='src_embedding') # LSTMs src_lstm_forward = LSTM(input_dim=self.src_embed_dim, output_dim=self.src_lstm_op_dim) src_lstm_backward = LSTM(input_dim=self.src_embed_dim, output_dim=self.src_lstm_op_dim) tgt_lstm = LSTM(input_dim=self.tgt_embed_dim, output_dim=self.tgt_lstm_op_dim) sys.stderr.write(str(tgt_lstm.params) + "\n") # TODO # From target LSTM to target word indexes # Input: target LSTM output dim + Attention from BiLSTM proj_layer = FullyConnectedLayer(input_dim=tgt_lstm_op_dim + 2 * src_lstm_op_dim, output_dim=self.n_tgt_vocab, activation='softmax') params = src_embedding_layer.params + tgt_embedding_layer.params + src_lstm_forward.params + src_lstm_backward.params + tgt_lstm.params[:-1] + proj_layer.params # declare input variables src_ip = T.ivector() tgt_ip = T.ivector() tgt_op = T.ivector() # lookup table -> embedding src_embed_ip = src_embedding_layer.fprop(src_ip) tgt_embed_ip = tgt_embedding_layer.fprop(tgt_ip) # embedding -> source BiLSTM src_lstm_forward.fprop(src_embed_ip) src_lstm_backward.fprop(src_embed_ip[::-1, :]) # Concatenate foward/backward. (Flip backward again to get corresponding h for the same word) encoderh = T.concatenate((src_lstm_forward.h, src_lstm_backward.h[::-1, :]), axis=1) # End of source BiLSTM -> target LSTM tgt_lstm.h_0 = encoderh[-1] tgt_lstm.fprop(tgt_embed_ip) # Attention # Read http://arxiv.org/abs/1508.04025 attention = tgt_lstm.h.dot(encoderh.transpose()) attention = attention.dot(encoderh) # Order preference? decoderh = T.concatenate((attention, tgt_lstm.h), axis=1) # LSTM output -> target word proj_op = proj_layer.fprop(decoder) # Cost + regularization cost = T.nnet.categorical_crossentropy(proj_op, tgt_op).mean() cost += beta * T.mean((tgt_lstm.h[:-1] ** 2 - tgt_lstm.h[1:] ** 2) ** 2) return dict({'cost': cost, 'src_ip': src_ip, 'tgt_ip': tgt_ip, 'tgt_op': tgt_op, 'params': params, 'proj_op': proj_op})
def _gen_layers(self): self.RNN = ScratchRNN(n_nodes=self.n_nodes1, initializer=HeInitializer(), optimizer=AdaGrad(self.lr), activator=TanH(), lr=self.lr) self.FC = FullyConnectedLayer(self.n_nodes1, self.n_output, initializer=HeInitializer(), optimizer=AdaGrad(self.lr)) self.activation = Softmax()
class DAEModel(object): def __init__(self, layers=[200, 120, 200]): self.layers = layers self.layer1 = FullyConnectedLayer(self.layers[0], self.layers[1]) self.layer2 = FullyConnectedLayer(self.layers[1], self.layers[2]) def forward(self, inputs): c1 = self.layer1.forward(inputs) x2 = activation.sigmoid(c1) c2 = self.layer2.forward(x2) outputs = activation.sigmoid(c2) return x2, outputs
def fit(self, X, y, X_val=None, y_val=None): X = np.array(X) y = np.array(y) optimizer = AdaGrad(self.lr) #self.FC1 = FullyConnectedLayer(self.n_features, self.n_nodes1, SimpleInitializer(self.sigma), optimizer) self.FC1 = FullyConnectedLayer(self.n_features, self.n_nodes1, HeInitializer(), optimizer) self.activation1 = ReLU() self.FC2 = FullyConnectedLayer(self.n_nodes1, self.n_nodes2, HeInitializer(), optimizer) self.activation2 = ReLU() self.FC3 = FullyConnectedLayer(self.n_nodes2, self.n_output, HeInitializer(), optimizer) self.activation3 = Softmax() eye = np.eye(len(np.unique(y))) start = time.time() #epoch毎に for epoch in range(self.n_epochs): #mini batch生成 mini_batch = GetMiniBatch(X, y, self.batch_size) #mini batch毎に train_entrpy = [] for mini_X, mini_y in mini_batch: #学習(forward and/or backward propagation) mini_entrpy = self._propagate(X=mini_X, y=eye[mini_y.reshape(-1, )], predict=False) #mini batchごとのentropy保管 train_entrpy.append(mini_entrpy) #batchごとのentropyを平均して保管 self.entropy["training"].append( sum(train_entrpy) / len(train_entrpy)) #validation dataがあれば同じことを行う if ((X_val is not None) & (y_val is not None)): val_entrpy = self._propagate(X=X_val, y=eye[y_val.reshape(-1, )], predict=False, validation=True) self.entropy["validation"].append(val_entrpy) lap = time.time() print("epoch: ", epoch) print("process time: ", lap - start, "sec") return self.entropy
pretrained=src_embedding, name='src_embedding') tgt_embedding_layer = EmbeddingLayer(input_dim=src_embedding.shape[0], output_dim=src_embedding.shape[1], name='tgt_embedding') tgt_lstm_0 = FastLSTM(input_dim=1024, output_dim=1024, name='tgt_lstm_0') tgt_lstm_1 = FastLSTM(input_dim=1024, output_dim=1024, name='tgt_lstm_1') tgt_lstm_2 = FastLSTM(input_dim=1024, output_dim=1024, name='tgt_lstm_2') tgt_lstm_h_to_vocab = FullyConnectedLayer( input_dim=1024, output_dim=tgt_embedding_layer.input_dim, batch_normalization=False, activation='softmax', name='tgt_lstm_h_to_vocab') # Set model parameters params = tgt_embedding_layer.params params += [ src_lstm_0.h_0, src_lstm_0.c_0, src_lstm_1.h_0, src_lstm_1.c_0, src_lstm_2.h_0, src_lstm_2.c_0 ] for ind, rnn in enumerate([tgt_lstm_0, tgt_lstm_1, tgt_lstm_2]): if ind == 0: params += rnn.params[:-1] else: params += rnn.params
def plot(error_curve): plt.plot([x for x in range(0, len(error_curve))], error_curve) plt.show() def testing(net, data, labels): output = net.activate(data) answer = output.argmax(1) percentage = (answer == labels).sum() / float(data.shape[0]) * 100 return percentage net = Network() layer_1 = FullyConnectedLayer(Logistic(), 4, 256) layer_2 = FullyConnectedLayer(Logistic(), 256, 3) net.append_layer(layer_1) net.append_layer(layer_2) params = Backprop_params(100, 1e-5, 1, 0.9, True, [0.01, 0.01], 0) method = Backpropagation(params, net) data_all = prepareData() train_data = data_all[0] test_data = data_all[1] train_labels = data_all[2] test_labels = data_all[3] error_curve = method.train(train_data, train_labels) print "Train efficiency: " + str(testing(net, train_data, train_labels)) print "Test efficiency: " + str(testing(net, test_data, test_labels)) plot(error_curve)
plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.plot(points[:, 0], points[:, 1], lw=2, label='ROC curve') plt.plot([0.0, 1.0], [0.0, 1.0], lw=2) plt.show() auc = 0 for i in xrange(1, len(points)): auc += (points[i, 0] - points[i - 1, 0]) * points[i, 1] print 'auc = ' + str(auc) #загрузка данных из файла data = loadDataFromFile("../../Datasets/tic-tac-toe.data.txt") #конфигурирование сети net = Network() layer_1 = FullyConnectedLayer(Logistic(), 9, 9) layer_2 = FullyConnectedLayer(Logistic(), 9, 1) net.append_layer(layer_1) net.append_layer(layer_2) params = Backprop_params(500, 1e-5, 10, 0.9, False, [0.01, 0.01], 0) method = Backpropagation(params, net) train_data = data[0] test_data = data[1] train_labels = data[2] test_labels = data[3] #обучение error_curve = method.train(train_data, train_labels) plot(error_curve) #вывод результатов print "Train efficiency: " + str(testing(net, train_data, train_labels))
peek_dim = 0 if not peek_encoder else src_rnn_op_dim decoder = [ FastLSTM(input_dim=tgt_emb_dim + peek_dim, output_dim=tgt_rnn_op_dim, name='tgt_rnn_0') ] for i in range(int(args.num_layers) - 1): decoder.append( FastLSTM(input_dim=tgt_rnn_op_dim + peek_dim, output_dim=tgt_rnn_op_dim, name='tgt_rnn_%d' % (i))) # Projection layers tgt_rnn_h_to_vocab = FullyConnectedLayer(input_dim=tgt_rnn_op_dim + src_rnn_op_dim, output_dim=n_tgt, batch_normalization=False, activation='softmax', name='tgt_rnn_h_to_vocab') if args.attention == 'mlp': attention_layer_1 = FullyConnectedLayer(input_dim=tgt_rnn_op_dim, output_dim=tgt_rnn_op_dim, batch_normalization=False, activation='relu', name='attention_layer_1') attention_layer_2 = FullyConnectedLayer(input_dim=tgt_rnn_op_dim, output_dim=tgt_rnn_op_dim, batch_normalization=False, activation='relu', name='attention_layer_2')
def get_learning_set(): _dict = load_dictionary('Datasets/dictionary.txt') ngrams = get_ngrams() data = np.zeros((len(_dict), len(ngrams))) for i in xrange(0, len(_dict)): for j in xrange(0, len(ngrams)): if ngrams[j] in _dict[i]: data[i, j] += 1 return data if __name__ == "__main__": data = get_learning_set() net = Network() layer_1 = FullyConnectedLayer(Logistic(), 1089, 100) layer_2 = FullyConnectedLayer(Linear(), 100, 1089) net.append_layer(layer_1) net.append_layer(layer_2) params = Backprop_params(200, 1e-5, 1000, 0.9, False, [0.01, 0.01], 0) method = Backpropagation(params, net) rnd_index = np.random.permutation(len(data)) data = data[rnd_index] method.train(data, data) Network.save_network(net, 'nets/network.net')
import dataset_mnist from activation import SigmoidActivation, SoftmaxActivation, ReluActivation, TanhActivation, LinearActivation from cost import QuadraticCost, CrossEntropyCost from initializer import GaussInitializer, GaussSqrtInitializer from layer import FullyConnectedLayer from network import Network import optimizer X_train, y_train, X_test, y_test = dataset_mnist.load_mnist() layer1 = FullyConnectedLayer(784, 100, activation=SigmoidActivation()) layer2 = FullyConnectedLayer(100, 10, activation=SigmoidActivation()) layer1.init_weigths(GaussInitializer()) layer2.init_weigths(GaussInitializer()) net = Network([layer1, layer2], cost_function=CrossEntropyCost(), test_function=dataset_mnist.output_test) opti = optimizer.SGDOptimizer(lr=0.5, batch_size=10, lambda1=0.0, lambda2=5.0) net.evaluate(X_test, y_test) net.train(X_train, y_train, X_test, y_test, opti, 30, disp_train_cost=True)
def __init__(self, layers=[200, 120, 200]): self.layers = layers self.layer1 = FullyConnectedLayer(self.layers[0], self.layers[1]) self.layer2 = FullyConnectedLayer(self.layers[1], self.layers[2])
class ScratchDeepNeuralNetrowkClassifier(): def __init__(self, n_features=784, n_nodes1=400, n_nodes2=200, n_output=10, sigma=0.01, lr=0.001, batch_size=20, n_epochs=30): self.n_features = n_features self.n_nodes1 = n_nodes1 self.n_nodes2 = n_nodes2 self.n_output = n_output self.sigma = sigma self.lr = lr self.batch_size = batch_size self.entropy = {"training": [], "validation": []} self.n_epochs = n_epochs def fit(self, X, y, X_val=None, y_val=None): X = np.array(X) y = np.array(y) optimizer = AdaGrad(self.lr) #self.FC1 = FullyConnectedLayer(self.n_features, self.n_nodes1, SimpleInitializer(self.sigma), optimizer) self.FC1 = FullyConnectedLayer(self.n_features, self.n_nodes1, HeInitializer(), optimizer) self.activation1 = ReLU() self.FC2 = FullyConnectedLayer(self.n_nodes1, self.n_nodes2, HeInitializer(), optimizer) self.activation2 = ReLU() self.FC3 = FullyConnectedLayer(self.n_nodes2, self.n_output, HeInitializer(), optimizer) self.activation3 = Softmax() eye = np.eye(len(np.unique(y))) start = time.time() #epoch毎に for epoch in range(self.n_epochs): #mini batch生成 mini_batch = GetMiniBatch(X, y, self.batch_size) #mini batch毎に train_entrpy = [] for mini_X, mini_y in mini_batch: #学習(forward and/or backward propagation) mini_entrpy = self._propagate(X=mini_X, y=eye[mini_y.reshape(-1, )], predict=False) #mini batchごとのentropy保管 train_entrpy.append(mini_entrpy) #batchごとのentropyを平均して保管 self.entropy["training"].append( sum(train_entrpy) / len(train_entrpy)) #validation dataがあれば同じことを行う if ((X_val is not None) & (y_val is not None)): val_entrpy = self._propagate(X=X_val, y=eye[y_val.reshape(-1, )], predict=False, validation=True) self.entropy["validation"].append(val_entrpy) lap = time.time() print("epoch: ", epoch) print("process time: ", lap - start, "sec") return self.entropy def predict(self, X): X = np.array(X) #forward propagationのみ self._propagate(X, y=None, predict=True) return np.argmax(self.Z3, axis=1) def _propagate(self, X, y=None, predict=False, validation=False): #forward propagation A1 = self.FC1.forward(X) Z1 = self.activation1.forward(A1) A2 = self.FC2.forward(Z1) Z2 = self.activation2.forward(A2) A3 = self.FC3.forward(Z2) self.Z3 = self.activation3.forward(A3) if (predict == True): return #entropy entropy = self.activation3.cross_entropy(self.Z3, y) if (validation == True): return entropy #backward propagation dA3 = self.activation3.backward(self.Z3, y) dZ2 = self.FC3.backward(dA3) dA2 = self.activation2.backward(dZ2) dZ1 = self.FC2.backward(dA2) dA1 = self.activation1.backward(dZ1) dZ0 = self.FC1.backward(dA1) # dZ0は使用しない return entropy
class ScratchRNNClassifier(): def __init__(self, n_nodes1=400, n_output=10, sigma=0.01, lr=0.001, batch_size=20, n_epochs=30): self.n_nodes1 = n_nodes1 self.n_output = n_output self.n_sequences = None self.n_features = None self.sigma = sigma self.lr = lr self.batch_size = batch_size self.entropy = {"training": [], "validation": []} self.n_epochs = n_epochs def fit(self, X, y, X_val=None, y_val=None): X = np.array(X) y = np.array(y) _, _, self.n_features = X.shape #layerを作る self._gen_layers() #one-hot eye = np.eye(len(np.unique(y))) start = time.time() #epoch毎に for epoch in range(self.n_epochs): #mini batch生成 mini_batch = GetMiniBatch(X, y, self.batch_size) #mini batch毎に train_entrpy = [] for mini_X, mini_y in mini_batch: #forward propagation self._propagate_forward(X=mini_X) #entropy計算 mini_entrpy = self._calc_entropy(y=eye[mini_y.reshape(-1, )]) #backward propagation self._propagate_backward(y=eye[mini_y.reshape(-1, )]) #mini batchごとのentropy保管 train_entrpy.append(mini_entrpy) #batchごとのentropyを平均して保管 self.entropy["training"].append( sum(train_entrpy) / len(train_entrpy)) #validation dataがあれば同じことを行う if ((X_val is not None) & (y_val is not None)): self._propagate_forward(X=X_val) val_entrpy = self._calc_entropy(y=eye[y_val.reshape(-1, )]) self._propagate_backward(y=eye[y_val.reshape(-1, )]) self.entropy["validation"].append(val_entrpy) lap = time.time() print("epoch: ", epoch) print("process time: ", lap - start, "sec") return self.entropy def predict(self, X): X = np.array(X) self._propagate_forward(X) return np.argmax(self.H2, axis=1) def _gen_layers(self): self.RNN = ScratchRNN(n_nodes=self.n_nodes1, initializer=HeInitializer(), optimizer=AdaGrad(self.lr), activator=TanH(), lr=self.lr) self.FC = FullyConnectedLayer(self.n_nodes1, self.n_output, initializer=HeInitializer(), optimizer=AdaGrad(self.lr)) self.activation = Softmax() def _propagate_forward(self, X): H1 = self.RNN.forward(X) A1 = self.FC.forward(H1[:, -1, :]) self.H2 = self.activation.forward(A1) def _propagate_backward(self, y): dA1 = self.activation.backward(self.H2, y) dH1 = self.FC.backward(dA1) dH0 = self.RNN.backward(dH1) # dH0は使用しない def _calc_entropy(self, y): return self.activation.cross_entropy(self.H2, y)
def network2(): layer1 = FullyConnectedLayer(4, 6, activation=SigmoidActivation()) layer2 = FullyConnectedLayer(6, 10, activation=SigmoidActivation()) layer3 = FullyConnectedLayer(10, 2, activation=SigmoidActivation()) layer1.init_weigths(GaussInitializer()) layer2.init_weigths(GaussInitializer()) layer3.init_weigths(GaussInitializer()) return 'sigmoid_cross_entropy', Network( [layer1, layer2, layer3], cost_function=CrossEntropyCost(), test_function=dataset_norm4.output_test)