예제 #1
0
파일: nmt.py 프로젝트: rooa/sp2016.11-731
    def build_model1(self):
        # LookupTable to Embedding
        src_embedding_layer = EmbeddingLayer(input_dim=self.n_src_vocab, output_dim=self.src_embed_dim, name='src_embedding')
        tgt_embedding_layer = EmbeddingLayer(input_dim=self.n_tgt_vocab, output_dim=self.tgt_embed_dim, name='src_embedding')

        # LSTMs
        src_lstm_forward = LSTM(input_dim=self.src_embed_dim, output_dim=self.src_lstm_op_dim)
        src_lstm_backward = LSTM(input_dim=self.src_embed_dim, output_dim=self.src_lstm_op_dim)
        tgt_lstm = LSTM(input_dim=self.tgt_embed_dim, output_dim=self.tgt_lstm_op_dim)
        sys.stderr.write(str(tgt_lstm.params) + "\n")  # TODO

        # From target LSTM to target word indexes
        # Input: target LSTM output dim + Attention from BiLSTM
        proj_layer = FullyConnectedLayer(input_dim=tgt_lstm_op_dim + 2 * src_lstm_op_dim, output_dim=self.n_tgt_vocab, activation='softmax')

        params = src_embedding_layer.params + tgt_embedding_layer.params + src_lstm_forward.params + src_lstm_backward.params + tgt_lstm.params[:-1] + proj_layer.params

        # declare input variables
        src_ip = T.ivector()
        tgt_ip = T.ivector()
        tgt_op = T.ivector()

        # lookup table -> embedding
        src_embed_ip = src_embedding_layer.fprop(src_ip)
        tgt_embed_ip = tgt_embedding_layer.fprop(tgt_ip)

        # embedding -> source BiLSTM
        src_lstm_forward.fprop(src_embed_ip)
        src_lstm_backward.fprop(src_embed_ip[::-1, :])
        # Concatenate foward/backward. (Flip backward again to get corresponding h for the same word)
        encoderh = T.concatenate((src_lstm_forward.h, src_lstm_backward.h[::-1, :]), axis=1)

        # End of source BiLSTM -> target LSTM
        tgt_lstm.h_0 = encoderh[-1]
        tgt_lstm.fprop(tgt_embed_ip)

        # Attention
        # Read http://arxiv.org/abs/1508.04025
        attention = tgt_lstm.h.dot(encoderh.transpose())
        attention = attention.dot(encoderh)

        # Order preference?
        decoderh = T.concatenate((attention, tgt_lstm.h), axis=1)

        # LSTM output -> target word
        proj_op = proj_layer.fprop(decoder)

        # Cost + regularization
        cost = T.nnet.categorical_crossentropy(proj_op, tgt_op).mean()
        cost += beta * T.mean((tgt_lstm.h[:-1] ** 2 - tgt_lstm.h[1:] ** 2) ** 2)

        return dict({'cost': cost,
                     'src_ip': src_ip,
                     'tgt_ip': tgt_ip,
                     'tgt_op': tgt_op,
                     'params': params,
                     'proj_op': proj_op})
    def _gen_layers(self):
        self.RNN = ScratchRNN(n_nodes=self.n_nodes1,
                              initializer=HeInitializer(),
                              optimizer=AdaGrad(self.lr),
                              activator=TanH(),
                              lr=self.lr)

        self.FC = FullyConnectedLayer(self.n_nodes1,
                                      self.n_output,
                                      initializer=HeInitializer(),
                                      optimizer=AdaGrad(self.lr))
        self.activation = Softmax()
예제 #3
0
class DAEModel(object):
    def __init__(self, layers=[200, 120, 200]):
        self.layers = layers

        self.layer1 = FullyConnectedLayer(self.layers[0], self.layers[1])
        self.layer2 = FullyConnectedLayer(self.layers[1], self.layers[2])

    def forward(self, inputs):
        c1 = self.layer1.forward(inputs)
        x2 = activation.sigmoid(c1)
        c2 = self.layer2.forward(x2)
        outputs = activation.sigmoid(c2)
        return x2, outputs
예제 #4
0
    def fit(self, X, y, X_val=None, y_val=None):
        X = np.array(X)
        y = np.array(y)

        optimizer = AdaGrad(self.lr)
        #self.FC1 = FullyConnectedLayer(self.n_features, self.n_nodes1, SimpleInitializer(self.sigma), optimizer)
        self.FC1 = FullyConnectedLayer(self.n_features, self.n_nodes1,
                                       HeInitializer(), optimizer)
        self.activation1 = ReLU()
        self.FC2 = FullyConnectedLayer(self.n_nodes1, self.n_nodes2,
                                       HeInitializer(), optimizer)
        self.activation2 = ReLU()
        self.FC3 = FullyConnectedLayer(self.n_nodes2, self.n_output,
                                       HeInitializer(), optimizer)
        self.activation3 = Softmax()

        eye = np.eye(len(np.unique(y)))
        start = time.time()
        #epoch毎に
        for epoch in range(self.n_epochs):
            #mini batch生成
            mini_batch = GetMiniBatch(X, y, self.batch_size)

            #mini batch毎に
            train_entrpy = []
            for mini_X, mini_y in mini_batch:
                #学習(forward and/or backward propagation)
                mini_entrpy = self._propagate(X=mini_X,
                                              y=eye[mini_y.reshape(-1, )],
                                              predict=False)
                #mini batchごとのentropy保管
                train_entrpy.append(mini_entrpy)
            #batchごとのentropyを平均して保管
            self.entropy["training"].append(
                sum(train_entrpy) / len(train_entrpy))
            #validation dataがあれば同じことを行う
            if ((X_val is not None) & (y_val is not None)):
                val_entrpy = self._propagate(X=X_val,
                                             y=eye[y_val.reshape(-1, )],
                                             predict=False,
                                             validation=True)
                self.entropy["validation"].append(val_entrpy)

            lap = time.time()
            print("epoch: ", epoch)
            print("process time: ", lap - start, "sec")
        return self.entropy
예제 #5
0
                                     pretrained=src_embedding,
                                     name='src_embedding')

tgt_embedding_layer = EmbeddingLayer(input_dim=src_embedding.shape[0],
                                     output_dim=src_embedding.shape[1],
                                     name='tgt_embedding')

tgt_lstm_0 = FastLSTM(input_dim=1024, output_dim=1024, name='tgt_lstm_0')

tgt_lstm_1 = FastLSTM(input_dim=1024, output_dim=1024, name='tgt_lstm_1')

tgt_lstm_2 = FastLSTM(input_dim=1024, output_dim=1024, name='tgt_lstm_2')

tgt_lstm_h_to_vocab = FullyConnectedLayer(
    input_dim=1024,
    output_dim=tgt_embedding_layer.input_dim,
    batch_normalization=False,
    activation='softmax',
    name='tgt_lstm_h_to_vocab')

# Set model parameters
params = tgt_embedding_layer.params
params += [
    src_lstm_0.h_0, src_lstm_0.c_0, src_lstm_1.h_0, src_lstm_1.c_0,
    src_lstm_2.h_0, src_lstm_2.c_0
]

for ind, rnn in enumerate([tgt_lstm_0, tgt_lstm_1, tgt_lstm_2]):
    if ind == 0:
        params += rnn.params[:-1]
    else:
        params += rnn.params

def plot(error_curve):
    plt.plot([x for x in range(0, len(error_curve))], error_curve)
    plt.show()


def testing(net, data, labels):
    output = net.activate(data)
    answer = output.argmax(1)
    percentage = (answer == labels).sum() / float(data.shape[0]) * 100
    return percentage


net = Network()
layer_1 = FullyConnectedLayer(Logistic(), 4, 256)
layer_2 = FullyConnectedLayer(Logistic(), 256, 3)
net.append_layer(layer_1)
net.append_layer(layer_2)
params = Backprop_params(100, 1e-5, 1, 0.9, True, [0.01, 0.01], 0)
method = Backpropagation(params, net)
data_all = prepareData()
train_data = data_all[0]
test_data = data_all[1]
train_labels = data_all[2]
test_labels = data_all[3]

error_curve = method.train(train_data, train_labels)
print "Train efficiency: " + str(testing(net, train_data, train_labels))
print "Test efficiency: " + str(testing(net, test_data, test_labels))
plot(error_curve)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.plot(points[:, 0], points[:, 1], lw=2, label='ROC curve')
    plt.plot([0.0, 1.0], [0.0, 1.0], lw=2)
    plt.show()
    auc = 0
    for i in xrange(1, len(points)):
        auc += (points[i, 0] - points[i - 1, 0]) * points[i, 1]
    print 'auc = ' + str(auc)


#загрузка данных из файла
data = loadDataFromFile("../../Datasets/tic-tac-toe.data.txt")
#конфигурирование сети
net = Network()
layer_1 = FullyConnectedLayer(Logistic(), 9, 9)
layer_2 = FullyConnectedLayer(Logistic(), 9, 1)
net.append_layer(layer_1)
net.append_layer(layer_2)
params = Backprop_params(500, 1e-5, 10, 0.9, False, [0.01, 0.01], 0)
method = Backpropagation(params, net)
train_data = data[0]
test_data = data[1]
train_labels = data[2]
test_labels = data[3]

#обучение
error_curve = method.train(train_data, train_labels)
plot(error_curve)
#вывод результатов
print "Train efficiency: " + str(testing(net, train_data, train_labels))
예제 #8
0
peek_dim = 0 if not peek_encoder else src_rnn_op_dim
decoder = [
    FastLSTM(input_dim=tgt_emb_dim + peek_dim,
             output_dim=tgt_rnn_op_dim,
             name='tgt_rnn_0')
]

for i in range(int(args.num_layers) - 1):
    decoder.append(
        FastLSTM(input_dim=tgt_rnn_op_dim + peek_dim,
                 output_dim=tgt_rnn_op_dim,
                 name='tgt_rnn_%d' % (i)))
# Projection layers
tgt_rnn_h_to_vocab = FullyConnectedLayer(input_dim=tgt_rnn_op_dim +
                                         src_rnn_op_dim,
                                         output_dim=n_tgt,
                                         batch_normalization=False,
                                         activation='softmax',
                                         name='tgt_rnn_h_to_vocab')

if args.attention == 'mlp':
    attention_layer_1 = FullyConnectedLayer(input_dim=tgt_rnn_op_dim,
                                            output_dim=tgt_rnn_op_dim,
                                            batch_normalization=False,
                                            activation='relu',
                                            name='attention_layer_1')

    attention_layer_2 = FullyConnectedLayer(input_dim=tgt_rnn_op_dim,
                                            output_dim=tgt_rnn_op_dim,
                                            batch_normalization=False,
                                            activation='relu',
                                            name='attention_layer_2')

def get_learning_set():
    _dict = load_dictionary('Datasets/dictionary.txt')
    ngrams = get_ngrams()
    data = np.zeros((len(_dict), len(ngrams)))
    for i in xrange(0, len(_dict)):
        for j in xrange(0, len(ngrams)):
            if ngrams[j] in _dict[i]:
                data[i, j] += 1
    return data


if __name__ == "__main__":
    data = get_learning_set()

    net = Network()
    layer_1 = FullyConnectedLayer(Logistic(), 1089, 100)
    layer_2 = FullyConnectedLayer(Linear(), 100, 1089)
    net.append_layer(layer_1)
    net.append_layer(layer_2)
    params = Backprop_params(200, 1e-5, 1000, 0.9, False, [0.01, 0.01], 0)
    method = Backpropagation(params, net)

    rnd_index = np.random.permutation(len(data))
    data = data[rnd_index]

    method.train(data, data)

    Network.save_network(net, 'nets/network.net')
예제 #10
0
import dataset_mnist

from activation import SigmoidActivation, SoftmaxActivation, ReluActivation, TanhActivation, LinearActivation
from cost import QuadraticCost, CrossEntropyCost
from initializer import GaussInitializer, GaussSqrtInitializer
from layer import FullyConnectedLayer
from network import Network
import optimizer

X_train, y_train, X_test, y_test = dataset_mnist.load_mnist()

layer1 = FullyConnectedLayer(784, 100, activation=SigmoidActivation())
layer2 = FullyConnectedLayer(100, 10, activation=SigmoidActivation())
layer1.init_weigths(GaussInitializer())
layer2.init_weigths(GaussInitializer())
net = Network([layer1, layer2],
              cost_function=CrossEntropyCost(),
              test_function=dataset_mnist.output_test)
opti = optimizer.SGDOptimizer(lr=0.5, batch_size=10, lambda1=0.0, lambda2=5.0)

net.evaluate(X_test, y_test)
net.train(X_train, y_train, X_test, y_test, opti, 30, disp_train_cost=True)
예제 #11
0
    def __init__(self, layers=[200, 120, 200]):
        self.layers = layers

        self.layer1 = FullyConnectedLayer(self.layers[0], self.layers[1])
        self.layer2 = FullyConnectedLayer(self.layers[1], self.layers[2])
예제 #12
0
class ScratchDeepNeuralNetrowkClassifier():
    def __init__(self,
                 n_features=784,
                 n_nodes1=400,
                 n_nodes2=200,
                 n_output=10,
                 sigma=0.01,
                 lr=0.001,
                 batch_size=20,
                 n_epochs=30):
        self.n_features = n_features
        self.n_nodes1 = n_nodes1
        self.n_nodes2 = n_nodes2
        self.n_output = n_output
        self.sigma = sigma
        self.lr = lr
        self.batch_size = batch_size
        self.entropy = {"training": [], "validation": []}
        self.n_epochs = n_epochs

    def fit(self, X, y, X_val=None, y_val=None):
        X = np.array(X)
        y = np.array(y)

        optimizer = AdaGrad(self.lr)
        #self.FC1 = FullyConnectedLayer(self.n_features, self.n_nodes1, SimpleInitializer(self.sigma), optimizer)
        self.FC1 = FullyConnectedLayer(self.n_features, self.n_nodes1,
                                       HeInitializer(), optimizer)
        self.activation1 = ReLU()
        self.FC2 = FullyConnectedLayer(self.n_nodes1, self.n_nodes2,
                                       HeInitializer(), optimizer)
        self.activation2 = ReLU()
        self.FC3 = FullyConnectedLayer(self.n_nodes2, self.n_output,
                                       HeInitializer(), optimizer)
        self.activation3 = Softmax()

        eye = np.eye(len(np.unique(y)))
        start = time.time()
        #epoch毎に
        for epoch in range(self.n_epochs):
            #mini batch生成
            mini_batch = GetMiniBatch(X, y, self.batch_size)

            #mini batch毎に
            train_entrpy = []
            for mini_X, mini_y in mini_batch:
                #学習(forward and/or backward propagation)
                mini_entrpy = self._propagate(X=mini_X,
                                              y=eye[mini_y.reshape(-1, )],
                                              predict=False)
                #mini batchごとのentropy保管
                train_entrpy.append(mini_entrpy)
            #batchごとのentropyを平均して保管
            self.entropy["training"].append(
                sum(train_entrpy) / len(train_entrpy))
            #validation dataがあれば同じことを行う
            if ((X_val is not None) & (y_val is not None)):
                val_entrpy = self._propagate(X=X_val,
                                             y=eye[y_val.reshape(-1, )],
                                             predict=False,
                                             validation=True)
                self.entropy["validation"].append(val_entrpy)

            lap = time.time()
            print("epoch: ", epoch)
            print("process time: ", lap - start, "sec")
        return self.entropy

    def predict(self, X):
        X = np.array(X)
        #forward propagationのみ
        self._propagate(X, y=None, predict=True)
        return np.argmax(self.Z3, axis=1)

    def _propagate(self, X, y=None, predict=False, validation=False):

        #forward propagation
        A1 = self.FC1.forward(X)
        Z1 = self.activation1.forward(A1)
        A2 = self.FC2.forward(Z1)
        Z2 = self.activation2.forward(A2)
        A3 = self.FC3.forward(Z2)
        self.Z3 = self.activation3.forward(A3)
        if (predict == True):
            return
        #entropy
        entropy = self.activation3.cross_entropy(self.Z3, y)

        if (validation == True):
            return entropy

        #backward propagation
        dA3 = self.activation3.backward(self.Z3, y)
        dZ2 = self.FC3.backward(dA3)
        dA2 = self.activation2.backward(dZ2)
        dZ1 = self.FC2.backward(dA2)
        dA1 = self.activation1.backward(dZ1)
        dZ0 = self.FC1.backward(dA1)  # dZ0は使用しない

        return entropy
class ScratchRNNClassifier():
    def __init__(self,
                 n_nodes1=400,
                 n_output=10,
                 sigma=0.01,
                 lr=0.001,
                 batch_size=20,
                 n_epochs=30):
        self.n_nodes1 = n_nodes1
        self.n_output = n_output
        self.n_sequences = None
        self.n_features = None
        self.sigma = sigma
        self.lr = lr
        self.batch_size = batch_size
        self.entropy = {"training": [], "validation": []}
        self.n_epochs = n_epochs

    def fit(self, X, y, X_val=None, y_val=None):
        X = np.array(X)
        y = np.array(y)

        _, _, self.n_features = X.shape

        #layerを作る
        self._gen_layers()
        #one-hot
        eye = np.eye(len(np.unique(y)))
        start = time.time()
        #epoch毎に
        for epoch in range(self.n_epochs):
            #mini batch生成
            mini_batch = GetMiniBatch(X, y, self.batch_size)

            #mini batch毎に
            train_entrpy = []
            for mini_X, mini_y in mini_batch:
                #forward propagation
                self._propagate_forward(X=mini_X)
                #entropy計算
                mini_entrpy = self._calc_entropy(y=eye[mini_y.reshape(-1, )])
                #backward propagation
                self._propagate_backward(y=eye[mini_y.reshape(-1, )])
                #mini batchごとのentropy保管
                train_entrpy.append(mini_entrpy)

            #batchごとのentropyを平均して保管
            self.entropy["training"].append(
                sum(train_entrpy) / len(train_entrpy))
            #validation dataがあれば同じことを行う
            if ((X_val is not None) & (y_val is not None)):
                self._propagate_forward(X=X_val)
                val_entrpy = self._calc_entropy(y=eye[y_val.reshape(-1, )])
                self._propagate_backward(y=eye[y_val.reshape(-1, )])
                self.entropy["validation"].append(val_entrpy)

            lap = time.time()
            print("epoch: ", epoch)
            print("process time: ", lap - start, "sec")
        return self.entropy

    def predict(self, X):
        X = np.array(X)
        self._propagate_forward(X)
        return np.argmax(self.H2, axis=1)

    def _gen_layers(self):
        self.RNN = ScratchRNN(n_nodes=self.n_nodes1,
                              initializer=HeInitializer(),
                              optimizer=AdaGrad(self.lr),
                              activator=TanH(),
                              lr=self.lr)

        self.FC = FullyConnectedLayer(self.n_nodes1,
                                      self.n_output,
                                      initializer=HeInitializer(),
                                      optimizer=AdaGrad(self.lr))
        self.activation = Softmax()

    def _propagate_forward(self, X):
        H1 = self.RNN.forward(X)
        A1 = self.FC.forward(H1[:, -1, :])
        self.H2 = self.activation.forward(A1)

    def _propagate_backward(self, y):
        dA1 = self.activation.backward(self.H2, y)
        dH1 = self.FC.backward(dA1)
        dH0 = self.RNN.backward(dH1)  # dH0は使用しない

    def _calc_entropy(self, y):
        return self.activation.cross_entropy(self.H2, y)
예제 #14
0
def network2():

    layer1 = FullyConnectedLayer(4, 6, activation=SigmoidActivation())
    layer2 = FullyConnectedLayer(6, 10, activation=SigmoidActivation())
    layer3 = FullyConnectedLayer(10, 2, activation=SigmoidActivation())
    layer1.init_weigths(GaussInitializer())
    layer2.init_weigths(GaussInitializer())
    layer3.init_weigths(GaussInitializer())
    return 'sigmoid_cross_entropy', Network(
        [layer1, layer2, layer3],
        cost_function=CrossEntropyCost(),
        test_function=dataset_norm4.output_test)