Example #1
0
def test_set_weights_without_biases():
    my_cnn = CNN()
    image_size = (np.random.randint(32, 100), np.random.randint(20, 100), np.random.randint(3, 10))
    number_of_conv_layers = np.random.randint(2, 10)
    my_cnn.add_input_layer(shape=image_size, name="input")
    previous_depth = image_size[2]
    for k in range(number_of_conv_layers):
        number_of_filters = np.random.randint(3, 100)
        kernel_size = np.random.randint(3, 9)
        my_cnn.append_conv2d_layer(num_of_filters=number_of_filters,
                                   kernel_size=(kernel_size, kernel_size),
                                   padding="same", activation='linear')

        w = my_cnn.get_weights_without_biases(layer_number=k + 1)
        w_set=np.full_like(w,0.2)
        my_cnn.set_weights_without_biases(w_set,layer_number=k+1)
        w_get = my_cnn.get_weights_without_biases(layer_number=k + 1)
        assert w_get.shape == w_set.shape
        previous_depth = number_of_filters
    pool_size = np.random.randint(2, 5)
    my_cnn.append_maxpooling2d_layer(pool_size=pool_size, padding="same",
                                     strides=2, name="pool1")
    my_cnn.append_flatten_layer(name="flat1")
    my_cnn.append_dense_layer(num_nodes=10)
    number_of_dense_layers = np.random.randint(2, 10)
    previous_nodes = 10
    for k in range(number_of_dense_layers):
        number_of_nodes = np.random.randint(3, 100)
        kernel_size = np.random.randint(3, 9)
        my_cnn.append_dense_layer(num_nodes=number_of_nodes)

        w = my_cnn.get_weights_without_biases(layer_number=k + number_of_conv_layers + 4)
        w_set = np.full_like(w, 0.8)
        my_cnn.set_weights_without_biases(w_set, layer_number=k + number_of_conv_layers + 4)
        w_get = my_cnn.get_weights_without_biases(layer_number=k + number_of_conv_layers + 4)
        assert w_get.shape == w_set.shape
        previous_nodes = number_of_nodes
    def __init__(self, embed_size, vocab: VocabEntry):
        """
        Init the Embedding layer for one language
        @param embed_size (int): Embedding size (dimensionality) for the output
        @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation.
        """
        super(ModelEmbeddings, self).__init__()

        ## A4 code
        #pad_token_idx = vocab.src['<pad>']
        #self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx)
        ## End A4 code

        ### YOUR CODE HERE for part 1j
        self.char_embed_size = 50
        self.embed_size = embed_size
        self.char_embedding = nn.Embedding(len(vocab.char2id),
                                           self.char_embed_size,
                                           padding_idx=0)
        self.cnn = CNN(char_embed_size=self.char_embed_size,
                       word_embed_size=embed_size,
                       kernel_size=5)
        self.highway = Highway(embed_size=embed_size)
        self.dropout = nn.Dropout(0.3)
Example #3
0
    def __init__(self, embed_size, vocab):
        """
        Init the Embedding layer for one language
        @param embed_size (int): Embedding size (dimensionality) for the output 
        @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation.
        """
        super(ModelEmbeddings, self).__init__()

        ## A4 code
        # pad_token_idx = vocab.src['<pad>']
        # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx)
        ## End A4 code

        ### YOUR CODE HERE for part 1j
        self.pad_token_idx = vocab.char2id['<pad>'] 
        self.e_char = self.char_embed_size = 50 
        self.e_word = self.embed_size = embed_size #e_word
        self.embeddings = nn.Embedding(len(vocab.char2id), self.char_embed_size, padding_idx=self.pad_token_idx)
        self.vocab = vocab #vocab object 
        
        self.dropoutp = 0.3 
        self.dropout = nn.Dropout(self.dropoutp)
        self.cnnlay = CNN(self.e_char, f = self.e_word)
        self.highwaylay = Highway(self.e_word) 
Example #4
0
    def __init__(self, embed_size, vocab):
        """
        Init the Embedding layer for one language
        @param embed_size (int): Embedding size (dimensionality) for the output
        @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation.
        """
        super(ModelEmbeddings, self).__init__()

        ## A4 code
        # pad_token_idx = vocab.src['<pad>']
        # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx)
        ## End A4 code

        ### YOUR CODE HERE for part 1j
        self.CHAR_EMBED_SIZE = 50
        self.dropout_rate = 0.3
        self.embed_size = embed_size
        pad_token_idx = vocab['<pad>']
        self.conv = CNN(self.CHAR_EMBED_SIZE, embed_size)
        self.highway = Highway(embed_size, embed_size)
        self.charEmbedding = nn.Embedding(num_embeddings=len(vocab.char2id),
                                          embedding_dim=self.CHAR_EMBED_SIZE,
                                          padding_idx=pad_token_idx)
        self.dropout = nn.Dropout(self.dropout_rate)
Example #5
0
    def __init__(self, settings):
        super().__init__()
        self.enc_type = settings['conv_type']
        self.out_c = settings['out_c']
        if self.enc_type == 'CNN':
            self.enc = CNN(settings)
        else:
            self.enc = PCNN(settings)
            self.out_c *= 3
        self.out_feature_size = self.out_c

        self.n_rel = settings['n_rel']
        self.r_embed = nn.Parameter(torch.zeros(self.n_rel,
                                                self.out_feature_size),
                                    requires_grad=True)
        self.r_bias = nn.Parameter(torch.zeros(self.n_rel), requires_grad=True)

        # attention module
        self.att_sm = nn.Softmax(dim=-1)
        eye = torch.eye(self.out_feature_size, self.out_feature_size)
        # self.att_W = nn.Parameter(eye.expand(self.n_rel, self.out_c, self.out_c), requires_grad=True)
        # n_rel * out_feature_size * out_c
        self.att_W = nn.Parameter(eye.unsqueeze(0).repeat([self.n_rel, 1, 1]),
                                  requires_grad=True)
        # out_feature_size * out_c
        self.att_W_small = nn.Parameter(eye, requires_grad=True)
        # pcnn
        # self.linear = nn.Linear(settings['out_feature_size'] * 3, settings['n_rel'])
        self.linear = nn.Linear(self.out_feature_size, 1)
        self.dropout = nn.Dropout(p=settings['dropout_p'])

        # con = math.sqrt(6.0/(self.out_feature_size + self.n_rel))
        con = 0.01
        nn.init.uniform_(self.r_embed, a=-con, b=con)
        nn.init.uniform_(self.r_bias, a=-con, b=con)
        nn.init.uniform_(self.att_W_small, a=-con, b=con)
Example #6
0
def question_1i_sanity_check():
    """ Sanity check for model_embeddings.py
        basic shape check
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1i: Convolutional Network")
    print("-" * 80)
    char_embed_size = 50
    max_word_length = 21
    kernel_size = 5
    inpt = torch.randn(BATCH_SIZE,
                       char_embed_size,
                       max_word_length,
                       dtype=torch.float32)
    conv = CNN(char_embed_size, EMBED_SIZE, kernel_size)

    output = conv(inpt)
    output_expected_size = [BATCH_SIZE, EMBED_SIZE]
    assert (
        list(output.size()) == output_expected_size
    ), "output shape is incorrect: it should be:\n {} but is:\n{}".format(
        output_expected_size, list(output.size()))
    print("Sanity Check Passed for Question 1i: Convolutional Network!")
    print("-" * 80)
def play(OPTIONS):
    """main method"""
    trained_agent = lambda x: np.argmax(mcts.get_raw_action_prob(x))
    random_player = lambda x: np.random.choice(
        np.where(x.get_valid_moves() == 1)[0])
    num_games = 100

    if OPTIONS.optimal:
        opponent = get_optimal_move
    elif OPTIONS.suboptimal:
        opponent = (
            lambda x: get_optimal_move(x)
            if random.random() > Config.suboptimality else random_player(x))
    elif OPTIONS.suboptimal:
        opponent = trained_agent
    else:
        opponent = random_player

    network = CNN(Config.levels)
    network.load_checkpoint("./temp/", "best.pth.tar")
    mcts = MCTS(network)

    a = gym.Gym(trained_agent, opponent, potential=0.99)
    print(a.play_games(num_games, mode=2, verbose=True))
Example #8
0
    def __init__(self, word_embed_size, vocab):
        """
        Init the Embedding layer for one language
        @param word_embed_size (int): Embedding size (dimensionality) for the output word
        @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation.

        Hints: - You may find len(self.vocab.char2id) useful when create the embedding
        """
        super(ModelEmbeddings, self).__init__()

        ### YOUR CODE HERE for part 1h
        self.word_embed_size = word_embed_size
        self.char_embed_size = 50
        self.vocab = vocab
        self.char_embed = nn.Embedding(len(self.vocab.char2id),
                                       self.char_embed_size,
                                       padding_idx=self.vocab.char_pad)
        self.cnn = CNN(self.char_embed_size,
                       self.word_embed_size,
                       kernel_size=5,
                       padding=1)
        self.highway = Highway(self.word_embed_size,
                               self.word_embed_size,
                               dropout_rate=0.3)
Example #9
0
def question_1g_sanity_check():
    """ Sanity check for the class `CNN`.
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1g: CNN")
    print("-" * 80)
    SENTENCE_LENGTH = 20
    BATCH_SIZE = 5
    E_CHAR = 50
    M_WORD = 21
    F = 3
    # model = CNN(f=F, e_char = E_CHAR, m_word=M_WORD)
    model = CNN(f=F, e_char=E_CHAR)
    x_reshaped = torch.randn((SENTENCE_LENGTH, BATCH_SIZE, E_CHAR, M_WORD))

    print("Running test on a batch of x_reshaped")
    x_conv_out = model.forward(x_reshaped)
    assert list(x_conv_out.size()) == [
        SENTENCE_LENGTH, BATCH_SIZE, F
    ], "Output size should be: {}, but got {}".format(
        (SENTENCE_LENGTH, BATCH_SIZE, F), x_conv_out.size())

    print("Sanity Check Passed for Question 1g: CNN!")
    print("-" * 80)
    def __init__(self, embed_size, vocab):
        """
        Init the Embedding layer for one language
        @param embed_size (int): Embedding size (dimensionality) for the output 
        @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation.
        """
        super(ModelEmbeddings, self).__init__()

        # A4 code
        # pad_token_idx = vocab.src['<pad>']
        # self.embeddings = nn.Embedding(
        #     len(vocab.src), embed_size, padding_idx=pad_token_idx)
        # End A4 code

        # YOUR CODE HERE for part 1j
        self.char_embed_size = 50
        self.embed_size = embed_size
        pad_token_idx = vocab.char2id['<pad>']
        self.embeddings = nn.Embedding(len(vocab.char2id),
                                       self.char_embed_size,
                                       padding_idx=pad_token_idx)
        # print(embed_size)
        self.convolution = CNN(self.char_embed_size, self.embed_size)
        self.highway_layer = Highway(self.embed_size, dropout_rate=0.3)
Example #11
0
    def __init__(self, embed_size, vocab):
        """
        Init the Embedding layer for one language
        @param embed_size (int): Embedding size (dimensionality) for the output 
        @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation.
        """
        super(ModelEmbeddings, self).__init__()

        ## A4 code
        # pad_token_idx = vocab.src['<pad>']
        # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx)
        ## End A4 code

        self.embed_size = embed_size
        self.vocab = vocab
        self.num_char = len(vocab.char2id)
        self.char_embed_size = 50
        self.max_word_length = 21
        self.dropout_rate = 0.3
        self.kernel_size = 5
        self.embeddings = nn.Embedding(self.num_char, self.char_embed_size, padding_idx=self.vocab.char2id['<pad>'])

        self.cnn = CNN(self.kernel_size, self.char_embed_size, self.embed_size, self.max_word_length)
        self.highway = Highway(self.embed_size, self.dropout_rate)
    def __init__(self, embed_size, vocab):
        """
        Init the Embedding layer for one language
        @param embed_size (int): Embedding size (dimensionality) for the output 
        @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation.
        """
        super(ModelEmbeddings, self).__init__()

        ## A4 code
        # pad_token_idx = vocab.src['<pad>']
        # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx)
        ## End A4 code

        ### YOUR CODE HERE for part 1j
        e_char = 50  #character embedding size
        max_word_length = 21
        dropout_rate = 0.3
        self.embed_size = embed_size
        pad_token_idx = vocab.char2id['<pad>']
        self.char_embed_layer = nn.Embedding(len(vocab.char2id),
                                             e_char,
                                             padding_idx=pad_token_idx)
        self.cnn_layer = CNN(max_word_length, e_char, self.embed_size)
        self.highway_layer = Highway(self.embed_size, dropout_rate)
Example #13
0
    def forward(self, input):
        """
        Looks up character-based CNN embeddings for the words in a batch of sentences.
        @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where
            each integer is an index into the character vocabulary

        @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the 
            CNN-based embeddings for each word of the sentences in the batch
        """
        ## A4 code
        # output = self.embeddings(input)
        # return output
        ## End A4 code

        ### YOUR CODE HERE for part 1j
        #x_char_embedding = []
        # for x_padded in input:
        x_padded = input
        x_embedded = self.embeddings(x_padded)
        x_padded_dim = list(x_embedded.size())
        #        print(x_embedded.size())
        # need to convert 4d to 3d
        x_embedded = x_embedded.reshape(-1, x_padded_dim[3], x_padded_dim[2])
        #        print(x_embedded.size())
        x_conv_out = CNN(in_channel=self.embed_size,
                         out_channel=self.embed_size).forward(x_embedded)
        x_conv_out = torch.squeeze(x_conv_out, -1)
        #        print(x_conv_out.size())
        x_conv_out = x_conv_out.reshape(x_padded_dim[0], x_padded_dim[1], -1)
        #        print(x_conv_out.size())
        x_highway = Highway(self.embed_size).forward(x_conv_out)
        #print(x_highway.size())
        x_dout = self.dropout(x_highway)
        #        x_char_embedding.append(x_dout)
        #x_char_embedding = torch.stack(x_char_embedding  )
        return x_dout
Example #14
0
    def __init__(self, embed_size, vocab):
        """
        Init the Embedding layer for one language
        @param embed_size (int): Embedding size (dimensionality) for the output 
        @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation.
        """
        super(ModelEmbeddings, self).__init__()

        ## A4 code
        # pad_token_idx = vocab.src['<pad>']
        # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx)
        ## End A4 code

        ### YOUR CODE HERE for part 1j
        char_emb_dim = 50
        max_sentence_len = 21
        p_drop = 0.3
        kernel_size = 5
        pad_token_idx = vocab.char2id['<pad>']
        self.vocab = vocab
        self.embed_size = embed_size
        self.char_emb = nn.Embedding(len(vocab.char2id), char_emb_dim, pad_token_idx)
        self.cnn = CNN(kernel_size, embed_size, char_emb_dim, max_sentence_len)
        self.hwy = Highway(embed_size, p_drop)
Example #15
0
def test_get_weights_without_biases_3():
    my_cnn = CNN()
    image_size=(np.random.randint(32,100),np.random.randint(20,100),np.random.randint(3,10))
    number_of_conv_layers=np.random.randint(2,10)
    my_cnn.add_input_layer(shape=image_size,name="input")
    previous_depth=image_size[2]
    for k in range(number_of_conv_layers):
        number_of_filters = np.random.randint(3, 100)
        kernel_size= np.random.randint(3,9)
        my_cnn.append_conv2d_layer(num_of_filters=number_of_filters,
                                   kernel_size=(kernel_size,kernel_size),
                                   padding="same", activation='linear')

        actual = my_cnn.get_weights_without_biases(layer_number=k+1)
        assert actual.shape == (kernel_size,kernel_size,previous_depth,number_of_filters)
        previous_depth=number_of_filters
    actual = my_cnn.get_weights_without_biases(layer_number=0)
    assert actual is None
    pool_size = np.random.randint(2, 5)
    my_cnn.append_maxpooling2d_layer(pool_size=pool_size,padding="same",
                                     strides=2,name="pool1")
    actual=my_cnn.get_weights_without_biases(layer_name="pool1")
    assert actual is None
    my_cnn.append_flatten_layer(name="flat1")
    actual=my_cnn.get_weights_without_biases(layer_name="flat1")
    assert actual is None
    my_cnn.append_dense_layer(num_nodes=10)
    number_of_dense_layers = np.random.randint(2, 10)
    previous_nodes = 10
    for k in range(number_of_dense_layers):
        number_of_nodes = np.random.randint(3, 100)
        kernel_size = np.random.randint(3, 9)
        my_cnn.append_dense_layer(num_nodes=number_of_nodes)
        actual = my_cnn.get_weights_without_biases(layer_number=k+number_of_conv_layers+4 )
        # assert actual.shape == (previous_nodes, number_of_nodes)
        previous_nodes = number_of_nodes
Example #16
0
    def __init__(self,
                 word_embed_size,
                 vocab,
                 char_embed_size=50,
                 dropout_prob=0.3):
        """
        Init the Embedding layer for one language
        @param word_embed_size (int): Embedding size (dimensionality) for the output word
        @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation.

        Hints: - You may find len(self.vocab.char2id) useful when create the embedding
        """
        super(ModelEmbeddings, self).__init__()

        ### YOUR CODE HERE for part 1h

        self.word_embed_size = word_embed_size  # for the autograder
        self.e_word = word_embed_size
        self.e_char = char_embed_size
        self.dropout_prob = dropout_prob
        self.embedding = nn.Embedding(len(vocab.char2id), self.e_char,
                                      vocab.char_pad)
        self.cnn = CNN(self.e_char, self.e_word)
        self.highway = Highway(self.e_word)
Example #17
0
def test_remove_last_layer():
    from tensorflow.keras.datasets import cifar10
    batch_size = 32
    num_classes = 10
    epochs = 100
    data_augmentation = True
    num_predictions = 20
    save_dir = os.path.join(os.getcwd(), 'saved_models')
    model_name = 'keras_cifar10_trained_model.h5'
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    number_of_train_samples_to_use = 100
    X_train = X_train[0:number_of_train_samples_to_use, :]
    y_train = y_train[0:number_of_train_samples_to_use]
    my_cnn = CNN()
    my_cnn.add_input_layer(shape=(32, 32, 3), name="input")
    my_cnn.append_conv2d_layer(num_of_filters=16,
                               kernel_size=(3, 3),
                               padding="same",
                               activation='linear',
                               name="conv1")
    my_cnn.append_maxpooling2d_layer(pool_size=2,
                                     padding="same",
                                     strides=2,
                                     name="pool1")
    my_cnn.append_conv2d_layer(num_of_filters=8,
                               kernel_size=3,
                               activation='relu',
                               name="conv2")
    my_cnn.append_flatten_layer(name="flat1")
    my_cnn.append_dense_layer(num_nodes=10, activation="relu", name="dense1")
    my_cnn.append_dense_layer(num_nodes=2, activation="relu", name="dense2")
    out = my_cnn.predict(X_train)
    assert out.shape == (number_of_train_samples_to_use, 2)
    my_cnn.remove_last_layer()
    out = my_cnn.predict(X_train)
    assert out.shape == (number_of_train_samples_to_use, 10)
Example #18
0
import mnist
import numpy as np
from cnn import CNN
import matplotlib.pyplot as plt
import pickle

np.set_printoptions(edgeitems=100, linewidth=200000)

cnn = CNN(6, 12)

train_images = (mnist.train_images() / 255) - 0.5
train_labels = mnist.train_labels()

test_images = (mnist.test_images() / 255) - 0.5
test_labels = mnist.test_labels()

stats = cnn.train(train_images[:10000], train_labels[:10000],
                  test_images[:1000], test_labels[:1000], 10, 0.005)
epochs = stats[0]
avg_losses = stats[1]
accuracies = stats[2]

with open("artifacts/model.bin", "wb") as f:
    pickle.dump(cnn, f)

fig = plt.figure()
plt.subplots_adjust(hspace=0.5)

g1 = fig.add_subplot(2, 1, 1, ylabel="Loss", xlabel="Epoch")
g1.plot(epochs, avg_losses, label="Avg loss", color="red")
g1.legend(loc="center")
Example #19
0
    session_conf = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        # embed()
        if (MODEL_TO_RUN == 0):
            model = CNN_LSTM(x_train.shape[1], y_train.shape[1],
                             len(vocab_processor.vocabulary_), embedding_dim,
                             filter_sizes, num_filters, l2_reg_lambda)
        elif (MODEL_TO_RUN == 1):
            model = LSTM_CNN(x_train.shape[1], y_train.shape[1],
                             len(vocab_processor.vocabulary_), embedding_dim,
                             filter_sizes, num_filters, l2_reg_lambda)
        elif (MODEL_TO_RUN == 2):
            model = CNN(x_train.shape[1], y_train.shape[1],
                        len(vocab_processor.vocabulary_), embedding_dim,
                        filter_sizes, num_filters, l2_reg_lambda)
        elif (MODEL_TO_RUN == 3):
            model = LSTM(x_train.shape[1], y_train.shape[1],
                         len(vocab_processor.vocabulary_), embedding_dim)
        else:
            print
            "PLEASE CHOOSE A VALID MODEL!\n0 = CNN_LSTM\n1 = LSTM_CNN\n2 = CNN\n3 = LSTM\n"
            exit()

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(model.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)
Example #20
0
def train(learning_rate, learning_rate_decay, dropout_rate, mini_batch_size,
          epochs, optimizer, random_seed, model_directory, model_filename,
          log_directory):

    np.random.seed(random_seed)

    if not os.path.exists(log_directory):
        os.makedirs(log_directory)

    # Load CIFAR10 dataset
    cifar10 = CIFAR10()
    x_train = cifar10.x_train
    y_train = cifar10.y_train
    y_train_onehot = cifar10.y_train_onehot
    x_valid = cifar10.x_valid
    y_valid = cifar10.y_valid
    y_valid_onehot = cifar10.y_valid_onehot

    num_classes = cifar10.num_classes
    input_size = cifar10.input_size

    print('CIFAR10 Input Image Size: {}'.format(input_size))

    model = CNN(input_size=input_size,
                num_classes=num_classes,
                optimizer=optimizer)

    train_accuracy_log = list()
    valid_accuracy_log = list()
    train_loss_log = list()

    for epoch in range(epochs):
        print('Epoch: %d' % epoch)

        learning_rate *= learning_rate_decay
        # Prepare mini batches on train set
        shuffled_idx = np.arange(len(x_train))
        np.random.shuffle(shuffled_idx)
        mini_batch_idx = [
            shuffled_idx[k:k + mini_batch_size]
            for k in range(0, len(x_train), mini_batch_size)
        ]

        # Validate on validation set
        valid_prediction_onehot = model.test(data=x_valid)
        valid_prediction = np.argmax(valid_prediction_onehot, axis=1).reshape(
            (-1, 1))
        valid_accuracy = model_accuracy(label=y_valid,
                                        prediction=valid_prediction)
        print('Validation Accuracy: %f' % valid_accuracy)
        valid_accuracy_log.append(valid_accuracy)

        # Train on train set
        for i, idx in enumerate(mini_batch_idx):
            train_loss = model.train(data=x_train[idx],
                                     label=y_train_onehot[idx],
                                     learning_rate=learning_rate,
                                     dropout_rate=dropout_rate)
            if i % 200 == 0:
                train_prediction_onehot = model.test(data=x_train[idx])
                train_prediction = np.argmax(train_prediction_onehot,
                                             axis=1).reshape((-1, 1))
                train_accuracy = model_accuracy(label=y_train[idx],
                                                prediction=train_prediction)
                print('Training Loss: %f, Training Accuracy: %f' %
                      (train_loss, train_accuracy))
                if i == 0:
                    train_accuracy_log.append(train_accuracy)
                    train_loss_log.append(train_loss)

    model.save(directory=model_directory, filename=model_filename)
    print('Trained model saved successfully')

    model.save_as_pb(directory=model_directory, filename=model_filename)
    print('Trained model saved as pb successfully')

    # The directory should not exist before calling this method
    signature_dir = os.path.join(model_directory, 'signature')
    assert (not os.path.exists(signature_dir))
    model.save_signature(directory=signature_dir)
    print('Trained model with signature saved successfully')

    plot_curve(train_losses = train_loss_log, train_accuracies = train_accuracy_log, valid_accuracies = valid_accuracy_log, \
        filename = os.path.join(log_directory, 'training_curve.png'))
Example #21
0
def test_add_input_layer():
    model = CNN()
    out = model.add_input_layer(shape=(256, 256, 3), name="input0")
    # no tests for this?
    assert True
Example #22
0
 def init_CNN(self):
     net = CNN(load_glove=False)
     net.num_words = self.num_words
     net.glove_dim = INPUT_DIM
     return net
Example #23
0
def main(model_name, new_scan=False, preprocess=True):
    config = Config()
    plot = config.plot
    cut = config.cut
    bandpass = config.bandpass
    resample = config.resample

    # read data folders
    file_list = os.listdir(config.root + '/data/after')
    file_list.sort()
    if new_scan == True:
        print('start new scan!')
        start_point = 0
        event_num = 0

        try:
            os.system('rm -rf %s/event_detect/detect_result/cut/*' %
                      config.root)
            os.system('rm -rf %s/event_detect/detect_result/png/*' %
                      config.root)
            os.system('rm -rf %s/event_detect/detect_result/png2/*' %
                      config.root)
            os.system('rm -rf %s/event_detect/detect_result/cnn/*.csv' %
                      config.root)
        except:
            pass
        # file_list_len = len(file_list)
    else:
        with open(config.root + '/event_detect/detect_result/' + model_name +
                  '/checkpoint') as file:
            start_point = int(file.readline())
            event_num = int(file.readline())
            file_list = file_list[start_point:]
            # file_list_len = len(file_list)
            print('restart from {}'.format(file_list[0]))

    # load CNN model
    if model_name == 'cnn':
        from cnn import CNN
        import tensorflow as tf
        from tflib.models import Model

        model = CNN()
        # sess = tf.Session(config=tf.ConfigProto(device_count={"CPU":20},inter_op_parallelism_threads=0,intra_op_parallelism_threads=0))
        sess = tf.Session()
        saver, global_step = Model.continue_previous_session(
            sess,
            model_file='cnn',
            ckpt_file=config.root + '/event_detect/saver/cnn/checkpoint')

    # read group info
    group = []
    with open(config.root + '/config/group_info', 'r') as f:
        for line in f.readlines():
            if line != '\n':
                if line[0] == '#':
                    group.append([])
                else:
                    group[-1].append(line.split()[0])
    # read data & detect eq
    for file in file_list:
        sac_file_name = [[], [], []]
        all_group_sta_num = [0] * len(group)
        path = os.path.join(config.root + '/data/after', file)
        begin = datetime.datetime.now()
        group_E = [[] for _ in range(len(group))]
        group_N = [[] for _ in range(len(group))]
        group_Z = [[] for _ in range(len(group))]
        print('Start reading data: %s.' % file)
        for i in range(len(group)):
            for sta in group[i]:
                if len(glob.glob(path + '/' + '*' + sta + '.*')) == 3:
                    all_group_sta_num[i] += 1
                    sacfile_E = glob.glob(path + '/' + '*' + sta + '.*' +
                                          'E')[0]
                    sacfile_N = glob.glob(path + '/' + '*' + sta + '.*' +
                                          'N')[0]
                    sacfile_Z = glob.glob(path + '/' + '*' + sta + '.*' +
                                          'Z')[0]
                    sac_file_name[0].append(sacfile_E.split('/')[-1])
                    sac_file_name[1].append(sacfile_N.split('/')[-1])
                    sac_file_name[2].append(sacfile_Z.split('/')[-1])
                    group_E[i].append(obspy.read(sacfile_E))
                    group_N[i].append(obspy.read(sacfile_N))
                    group_Z[i].append(obspy.read(sacfile_Z))
        flatten_group_E = [st for each_group in group_E for st in each_group]
        flatten_group_N = [st for each_group in group_N for st in each_group]
        flatten_group_Z = [st for each_group in group_Z for st in each_group]
        st_E = reduce(lambda st1, st2: st1 + st2, flatten_group_E)
        st_N = reduce(lambda st1, st2: st1 + st2, flatten_group_N)
        st_Z = reduce(lambda st1, st2: st1 + st2, flatten_group_Z)
        st_all = st_E + st_N + st_Z
        all_sta_num = len(flatten_group_Z)
        if resample:
            st_all = st_all.resample(sampling_rate=resample)
        if bandpass:
            st_all = st_all.filter('bandpass',
                                   freqmin=bandpass[0],
                                   freqmax=bandpass[1],
                                   corners=4,
                                   zerophase=True)
        endtime = st_all[0].stats.endtime

        start_flag = -1
        end_flag = -1
        event_list = []
        confidence_total = {}
        start_total = []
        end_total = []
        pos_num_total = []
        samples = 1.0 / st_all[0].stats.delta
        # npts = st_all[0].stats.npts
        print('Finish reading data.')

        print('Start detection.')
        for windowed_st in st_all.slide(window_length=(config.winsize - 1) /
                                        samples,
                                        step=config.winlag / samples):
            cur_sta = 0
            len_group_conf = 0
            group_class, group_conf = [], []
            # windowed_E = windowed_st[:all_sta_num]
            # windowed_N = windowed_st[all_sta_num:2*all_sta_num]
            # windowed_Z = windowed_st[2*all_sta_num:]
            start = len(windowed_st) / 3 * 2
            end = len(windowed_st)
            group_max_conf = 0
            for i in range(len(group)):
                data_input = [[], [], []]
                group_sta_num = all_group_sta_num[i]
                if group_sta_num > 0:
                    for j in range(cur_sta, cur_sta + group_sta_num):
                        if len(windowed_st[j].data) < config.winsize:
                            windowed_st[j].data = np.concatenate([
                                windowed_st[j].data,
                                np.zeros(config.winsize -
                                         len(windowed_st[j].data))
                            ])
                        data_input[0].append(
                            windowed_st[j].data[:config.winsize])
                        # print(j, windowed_st[j])
                    for j in range(all_sta_num + cur_sta,
                                   all_sta_num + cur_sta + group_sta_num):
                        if len(windowed_st[j].data) < config.winsize:
                            windowed_st[j].data = np.concatenate([
                                windowed_st[j].data,
                                np.zeros(config.winsize -
                                         len(windowed_st[j].data))
                            ])
                        data_input[1].append(
                            windowed_st[j].data[:config.winsize])
                        # print(j, windowed_st[j])
                    for j in range(2 * all_sta_num + cur_sta,
                                   2 * all_sta_num + cur_sta + group_sta_num):
                        if len(windowed_st[j].data) < config.winsize:
                            windowed_st[j].data = np.concatenate([
                                windowed_st[j].data,
                                np.zeros(config.winsize -
                                         len(windowed_st[j].data))
                            ])
                        data_input[2].append(
                            windowed_st[j].data[:config.winsize])
                        # print(j, windowed_st[j])
                    plot_b = 2 * all_sta_num + cur_sta
                    plot_e = 2 * all_sta_num + cur_sta + group_sta_num
                    cur_sta += group_sta_num

                    if preprocess:
                        for i in range(3):
                            for j in range(group_sta_num):
                                data_input[i][j] = data_preprocess(
                                    data_input[i][j])
                    data_input = np.array(data_input)

                    if len(data_input[0][0]) < config.winsize:
                        concat = np.zeros([
                            3, group_sta_num,
                            config.winsize - len(data_input[0][0])
                        ])
                        data_input = np.concatenate([data_input, concat],
                                                    axis=2)
                    else:
                        data_input = data_input[:, :, :config.winsize]
                    data_input = data_input.transpose((1, 2, 0))

                    j = 0
                    while j < len(data_input):
                        if np.max(data_input[j]) == 0 or np.isnan(
                                np.max(data_input[j])):
                            data_input = np.delete(data_input, j, axis=0)
                        else:
                            j += 1

                    if len(data_input) >= 3:
                        len_group_conf += 1
                        class_pred, confidence = model.classify(
                            sess=sess, input_=[data_input])
                        group_class.append(class_pred)
                        group_conf.append(confidence[0])
                        if confidence[0] > group_max_conf:
                            start = plot_b
                            end = plot_e
                            group_max_conf = confidence[0]
                    else:
                        group_class.append(0)
                        group_conf.append(0)
                else:
                    group_class.append(0)
                    group_conf.append(0)

            # consider the result of multiple groups
            pos_num = 0
            for each in group_class:
                if each == 1:
                    pos_num += 1
            if pos_num >= config.group_num_thrd:
                class_pred = 1
            else:
                class_pred = 0

            confidence = sum(
                group_conf) / len_group_conf if len_group_conf else 0

            # calculate the window range
            if class_pred == 1:
                confidence_total[confidence] = [group_max_conf, start, end]
                start_total.append(windowed_st[0].stats.starttime)
                end_total.append(windowed_st[0].stats.endtime)
                pos_num_total.append(pos_num)

                if start_flag == -1:
                    start_flag = windowed_st[0].stats.starttime
                    end_flag = windowed_st[0].stats.endtime
                else:
                    end_flag = windowed_st[0].stats.endtime
            print("{} {} {} {} {:.8f} {:.8f}".format(class_pred,start_flag,end_flag, \
                windowed_st[0].stats.starttime,confidence, group_max_conf))

            if class_pred == 0 and start_flag != -1:  #end_flag < windowed_st[0].stats.starttime:
                confidence = max(list(confidence_total.keys()))
                # for j in range(len(confidence_total)):
                #     if confidence == confidence_total[j]:
                #         break
                # start_local = start_total[j]
                # end_local = end_total[j]
                # event = [file, start_flag, end_flag,
                #          confidence, start_local, end_local]
                event_num += 1
                group_max_conf = confidence_total[confidence][0]
                start = confidence_total[confidence][1]
                end = confidence_total[confidence][2]
                event = [event_num, file, start_flag, end_flag, confidence, \
                    max(pos_num_total), start, end, group_max_conf]

                confidence_total = {}
                start_total = []
                end_total = []
                pos_num_total = []

                event_list.append(event)
                #print(event_list)

                start_flag = -1
                end_flag = -1

            if class_pred == 1 and end_flag + config.winlag / samples >= endtime:
                confidence = max(list(confidence_total.keys()))
                # for j in range(len(confidence_total)):
                #     if confidence == confidence_total[j]:
                #         break
                # start_local = start_total[j]
                # end_local = end_total[j]
                # event = [file.split('/')[-2], start_flag, endtime,
                #          confidence, start_total, end_total]
                event_num += 1
                group_max_conf = confidence_total[confidence][0]
                start = confidence_total[confidence][1]
                end = confidence_total[confidence][2]
                event = [event_num, file, start_flag, endtime, confidence, \
                    max(pos_num_total), start, end, group_max_conf]

                event_list.append(event)
                start_flag = -1
                end_flag = -1

        if event_list:
            new_event_list = [event_list[0]]
            for i in range(1, len(event_list)):
                if event_list[i][1] > new_event_list[-1][1] and \
                event_list[i][1] < new_event_list[-1][1]+1000/(config.resample if config.resample else 200):
                    # if event_list[i][1] > new_event_list[-1][1] and event_list[i][1] < new_event_list[-1][2]:
                    new_event_list[-1][2] = event_list[i][2]
                else:
                    new_event_list.append(event_list[i])
        else:
            new_event_list = []

        # write event list
        if len(event_list) != 0:
            with open(config.root + '/event_detect/detect_result/' +
                      model_name + '/events_list.csv',
                      mode='a',
                      newline='') as f:
                csvwriter = csv.writer(f)
                for event in event_list:
                    csvwriter.writerow(event)
                f.close()

        if plot:
            print('Plot detected events.')
            for event in new_event_list:
                plot_traces = st_Z
                event_num, _, start_flag, end_flag, confidence, pos_num, start, end, group_max_conf = event
                name = config.root + '/event_detect/detect_result/png/' \
                        + str(int(event_num)) + '_' + str(confidence)[:4] + '.png'
                plot_traces.plot(starttime=start_flag,
                                 endtime=end_flag,
                                 size=(800, 800),
                                 automerge=False,
                                 equal_scale=False,
                                 linewidth=0.8,
                                 outfile=name)

                plot_traces2 = st_all[start:end]
                name2 = config.root + '/event_detect/detect_result/png2/' \
                        + str(int(event_num)) + '_' + str(group_max_conf)[:4] + '.png'
                plot_traces2.plot(starttime=start_flag,
                                  endtime=end_flag,
                                  size=(800, 800),
                                  automerge=False,
                                  equal_scale=False,
                                  linewidth=0.8,
                                  outfile=name2)

        ## cut use Obspy, processed data
        # if cut:
        #     print('Cut detected events.')
        #     for event in new_event_list:
        #         event_num, _, start_flag, end_flag, confidence, pos_num, start, end, group_max_conf = event
        #         slice_E = st_E.slice(start_flag, end_flag)
        #         slice_N = st_N.slice(start_flag, end_flag)
        #         slice_Z = st_Z.slice(start_flag, end_flag)
        #         save_path = config.root + '/event_detect/detect_result/cut/' \
        #                 + str(int(event_num)) + '_' + str(confidence)[:4]
        #         os.system('mkdir %s'%save_path)
        #         for i in range(len(slice_E)):
        #             slice_E[i].write(save_path+'/'+sac_file_name[0][i], format='SAC')
        #             slice_N[i].write(save_path+'/'+sac_file_name[1][i], format='SAC')
        #             slice_Z[i].write(save_path+'/'+sac_file_name[2][i], format='SAC')

        ## cut use SAC, raw data
        if cut:
            print('Cut detected events.')
            for event in new_event_list:
                event_num, _, start_flag, end_flag, confidence, pos_num, start, end, group_max_conf = event
                save_path = config.root + '/event_detect/detect_result/cut/' \
                    + str(int(event_num)) + '_' + str(confidence)[:4] + '/'
                os.system('mkdir %s' % save_path)
                cut_b = 60 * 60 * int(start_flag.hour) + 60 * int(
                    start_flag.minute) + float(start_flag.second)
                cut_e = 60 * 60 * int(end_flag.hour) + 60 * int(
                    end_flag.minute) + float(end_flag.second)
                ## SAC
                os.putenv("SAC_DISPLAY_COPYRIGHT", "0")
                p = subprocess.Popen(['sac'], stdin=subprocess.PIPE)

                s = ''

                s += "cut %s %s \n" % (cut_b, cut_e)
                s += "r %s/* \n" % (config.root + '/data/after/' + file)
                s += "w dir %s over \n" % (save_path)
                s += "quit \n"

                p.communicate(s.encode())

        start_point += 1
        with open(config.root + '/event_detect/detect_result/' + model_name +
                  '/checkpoint',
                  mode='w') as f:
            f.write(str(start_point) + '\n')
            f.write(str(event_num))
            end = datetime.datetime.now()
            print('{} completed, num {}, time {}.'.format(
                file, start_point, end - begin))
            print('Checkpoint saved.')
Example #24
0
    #logger.info("%s, evaluation loss:%s, acc:%s"%(timestr, total_loss/step, total_right/(total_right + total_wrong)))


#---------------------------------- execute valid model end --------------------------------------

#----------------------------------- begin to train -----------------------------------
with tf.Graph().as_default():
    with tf.device("/gpu:3"):
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=FLAGS.gpu_options)
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement,
            gpu_options=gpu_options)
        with tf.Session(config=session_conf).as_default() as sess:
            cnn = CNN(FLAGS.sequence_len, embedding, FLAGS.embedding_size,
                      filter_sizes, FLAGS.num_filters)
            global_step = tf.Variable(0, name="global_step", trainable=False)
            #optimizer = tf.train.AdamOptimizer(5e-2)
            optimizer = tf.train.GradientDescentOptimizer(1e-1)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            sess.run(tf.initialize_all_variables())

            #ori_quests, cand_quests = zip(*train_quests)
            #valid_ori_quests, valid_cand_quests = zip(*valid_quests)

            for ori_train, cand_train, neg_train in batch_iter(
                    ori_quests, cand_quests, FLAGS.batch_size, FLAGS.epoches):
                run_step(sess, ori_train, cand_train, neg_train, cnn,
Example #25
0
def main():
    print('Starting process...')

    SEED = 111
    torch.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True

    TEXT = torchtext.data.Field(tokenize='spacy', batch_first=True)
    LABEL = torchtext.data.LabelField(dtype=torch.float)

    train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
    train_data, valid_data = train_data.split(random_state=random.seed(SEED))

    max_vocab_size = 25_000
    TEXT.build_vocab(
        train_data,
        max_size=max_vocab_size,
        vectors="glove.6B.100d",
        unk_init=torch.Tensor.normal_,
    )
    LABEL.build_vocab(train_data)

    train_iter, valid_iter, test_iter = torchtext.data.BucketIterator.splits(
        (train_data, valid_data, test_data),
        batch_size=ARGS.batch_size,
        device=DEVICE)

    vocab_size = len(TEXT.vocab)
    pad_idx = TEXT.vocab.stoi[TEXT.pad_token]
    filter_sizes = np.array(ARGS.filter_sizes.split(','), dtype=int)
    model = CNN(vocab_size, ARGS.binary_neuron, ARGS.embed_dim, ARGS.n_filters,
                filter_sizes, ARGS.output_dim, ARGS.dropout_rate, pad_idx)

    pretrained_embeddings = TEXT.vocab.vectors
    model.embedding.weight.data.copy_(pretrained_embeddings)
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
    model.embedding.weight.data[UNK_IDX] = torch.zeros(ARGS.embed_dim)
    model.embedding.weight.data[pad_idx] = torch.zeros(ARGS.embed_dim)

    criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters())

    model.to(DEVICE)
    criterion.to(DEVICE)

    min_valid_loss = float('inf')
    for epoch in range(1, ARGS.epochs + 1):
        start_time = time.time()

        model.train()
        train_loss, train_acc, train_p, train_tn, train_fp, train_fn = run_epoch(
            model, train_iter, criterion, optimizer)

        model.eval()
        with torch.no_grad():
            valid_loss, valid_acc, val_p, val_tn, val_fp, val_fn = run_epoch(
                model, valid_iter, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss < min_valid_loss:
            min_valid_loss = valid_loss
            torch.save(model.state_dict(), 'model.pt')

        print(
          f'Epoch: {epoch:02} | Epoch Time: {epoch_mins}m {epoch_secs}s\n' \
          f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%\n' \
          f'\tVal. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%'
        )

    test_loss, test_acc, test_tp, test_tn, test_fp, test_fn = run_epoch(
        model, test_iter, criterion)
    print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
    sns.heatmap(np.array([[test_tp, test_fp], [test_fn, test_tn]]),
                vmax=.5,
                linewidth=0.5,
                cmap="Blues",
                xticklabels=["Positive", "Negative"],
                yticklabels=["True", "False"])
    print(np.array([[test_tp, test_fp], [test_fn, test_tn]]))
    plt.show()
Example #26
0
print(n(m(char_embedding_i)).size())
x=n(m(char_embedding_i))
x = x.squeeze(2,3)
print(x.size())
"""
##cnn = CNN()
learning_rate = 1.0
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr = learning_rate)


for i in range(0, 44376-1, 35):
    #print("charembed")
    #print(char_embed[:,i:i+35,:])
    #print(char_embed[:,i:i+35,:].size())
    cnn = CNN()
    char_embedx = char_embed[:,i:i+35,:].contiguous().view(-1,1,21,15)
    #print("char_embedx")
    #print(char_embedx)
    #print(char_embedx.size())
    cnnoutput = cnn(char_embedx)
    #print('CNN')
    #print(cnnoutput)
    #print(cnnoutput.size())

    input_size = cnnoutput.size()[1]
    output_size = cnnoutput.size()[1]
    highway = Highway(input_size, output_size)
    highwayoutput = highway(cnnoutput)
    #print("highway")
    #print(highwayoutput)
def main():
    # loads, encodes and normalizes the dataset
    X_train, y_train, X_test, y_test, N_class = load_data()
    encoder = OneHotEncoder(sparse=True)
    y_train = encoder.fit_transform(y_train.reshape(-1, 1)).toarray()
    y_test = encoder.transform(y_test.reshape(-1, 1)).toarray()
    X_train = normalize(X_train)
    X_train, y_train = shuffle(X_train, y_train)
    X_test = normalize(X_test)
    X_test, X_validation, y_test, y_validation = train_test_split(X_test, y_test, test_size=0.50, random_state=0)

    """ IN THIS SECTION WE COMPARE DIFFENT REGULARIZATIONS """

    nn1 = CNN(
        name="No_regularization",
        imageWidth=100,
        imageHeight=100,
        hiddenSize=256,
        outputSize=N_class,
        filters=[(3, 3, 3, 20), (3, 3, 20, 50)],
        poolSize=(2, 2),
        initialization="xavier_glorot",
        regularization="None"
    )
    cost1, accuracy1 = nn1.train(X_train, y_train, X_validation, y_validation, batchSize=128, epochs=20)


    nn2 = CNN(
        name="dropout_regularization",
        imageWidth=100,
        imageHeight=100,
        hiddenSize=256,
        outputSize=N_class,
        filters=[(3, 3, 3, 20), (3, 3, 20, 50)],
        poolSize=(2, 2),
        initialization="xavier_glorot",
        regularization="dropout"
    )

    cost2, accuracy2 = nn2.train(X_train, y_train, X_validation, y_validation, batchSize=128, epochs=20)


    nn3 = CNN(
        name="l1_regularization",
        imageWidth=100,
        imageHeight=100,
        hiddenSize=256,
        outputSize=N_class,
        filters=[(3, 3, 3, 20), (3, 3, 20, 50)],
        poolSize=(2, 2),
        initialization="xavier_glorot",
        regularization="l1"
    )

    cost3, accuracy3 = nn3.train(X_train, y_train, X_validation, y_validation, batchSize=128, epochs=20)
    
    
    nn4 = CNN(
        name="l2_regularization",
        imageWidth=100,
        imageHeight=100,
        hiddenSize=256,
        outputSize=N_class,
        filters=[(3, 3, 3, 20), (3, 3, 20, 50)],
        poolSize=(2, 2),
        initialization="xavier_glorot",
        regularization="l2"
    )

    cost4, accuracy4 = nn4.train(X_train, y_train, X_validation, y_validation, batchSize=128, epochs=20)
    
    plt.xlabel("Epochs")
    plt.ylabel("Cost")
    plt.plot(cost1, label='None')
    plt.plot(cost2, label='dropout')
    plt.plot(cost3, label='l1')
    plt.plot(cost4, label='l2')
    plt.legend(loc='upper left')
    plt.show()
    
    plt.xlabel("Epochs")
    plt.ylabel("Validation Accuracy")
    plt.plot(accuracy1, label='None')
    plt.plot(accuracy2, label='dropout')
    plt.plot(accuracy3, label='l1')
    plt.plot(accuracy4, label='l2')
    plt.legend(loc='upper left')
    plt.show()
    
    
    count1=np.zeros((4,4),dtype=int)
    count2=np.zeros((4,4),dtype=int)
    count3=np.zeros((4,4),dtype=int)
    count4=np.zeros((4,4),dtype=int)
    for i in range(len(X_test)):
        k = np.argmax(y_test[i])
        j1 = nn1.predictOne(X_test[i])
        j2 = nn2.predictOne(X_test[i])
        j3 = nn3.predictOne(X_test[i])
        j4 = nn4.predictOne(X_test[i])
        if j1!=k:
            count1[k][j1] += 1
        if j2!=k:
            count2[k][j2] += 1
        if j3!=k:
            count3[k][j3] += 1
        if j4!=k:
            count4[k][j4] += 1
    
    for i in range(N_class):
        p = np.zeros(N_class)
        p[i] = 1
        print(i,':',encoder.inverse_transform([p])) 
    print("Test phase")
    print("------")
    print("None:")
    print("mistakes")
    print(count1)
    print("Test accuracy:",(len(X_test)-np.sum(count1))/len(X_test))
    print("------")
    print("dropout:")
    print("mistakes")
    print(count2)
    print("Test accuracy:",(len(X_test)-np.sum(count2))/len(X_test))
    print("------")
    print("l1:")
    print("mistakes")
    print(count3)
    print("Test accuracy:",(len(X_test)-np.sum(count3))/len(X_test))
    print("------")
    print("l2:")
    print("mistakes")
    print(count4)
    print("Test accuracy:",(len(X_test)-np.sum(count4))/len(X_test))
Example #28
0
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = CNN(sequence_length=x_train.shape[1],
                  num_classes=y_train.shape[1],
                  vocab_size=len(vocab_processor.vocabulary_),
                  embedding_size=FLAGS.embedding_dim,
                  filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                  num_filters=FLAGS.num_filters,
                  l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
Example #29
0
FilesList = LoadList(cfg.TEST_LIST)

WND_SHIFT = WND_WIDTH - 2

VEC_PER_WND = WND_WIDTH / math.pow(2, MPoolLayers_H)

phase_train = tf.Variable(True, name='phase_train')

x = tf.placeholder(tf.float32, shape=[None, WND_HEIGHT, WND_WIDTH])

SeqLens = tf.placeholder(shape=[cfg.BatchSize], dtype=tf.int32)

x_expanded = tf.expand_dims(x, 3)

Inputs = CNN(x_expanded, phase_train, 'CNN_1')

logits = RNN(Inputs, SeqLens, 'RNN_1')

# CTC Beam Search Decoder to decode pred string from the prob map
decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, SeqLens)

#Reading test data...
InputListTest, SeqLensTest, _ = ReadData(cfg.TEST_LOCATION, cfg.TEST_LIST,
                                         cfg.TEST_NB, WND_HEIGHT, WND_WIDTH,
                                         WND_SHIFT, VEC_PER_WND, '')

print('Initializing...')

session = tf.Session()
Example #30
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--optimizer', type=str, default='sgd',
                        help='which optimizer to use in training. Valid options are' + \
                            '\'sgd\' or \'kfac\'.')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    parser.add_argument(
        '--save-stats',
        type=str,
        default=None,
        help='name of file to save training loss and test loss and accuracy.')
    parser.add_argument('--test-every',
                        type=int,
                        default=None,
                        help='test the model roughly every n examples')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '.',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '.',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    model = CNN().to(device)
    if args.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum)
    elif args.optimizer == 'kfac':
        optimizer = KFAC(model, F.nll_loss)

    train_stats = {}
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, test_loader, optimizer, epoch,
              train_stats)
        test(args, model, device, test_loader)

    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pt")