예제 #1
0
    def create(self):
        DataUtils.message("Creating The Model...", new=True)

        word_input = Input(shape=(self.look_back, 300))

        tag_input = Input(shape=(self.look_back, ))
        tag_emb = Embedding(self.distinct_tags + 1,
                            30,
                            input_length=self.look_back,
                            mask_zero=True,
                            trainable=False)(tag_input)

        concat_emb = Concatenate()([word_input, tag_emb])

        bilstm = Bidirectional(
            LSTM(300,
                 dropout=0.35,
                 recurrent_dropout=0.1,
                 return_sequences=True))(concat_emb)
        hidden = TimeDistributed(Dense(800, activation="tanh"))(bilstm)
        output = TimeDistributed(
            Dense(self.distinct_words, activation="softmax"))(hidden)

        model = Model(inputs=[word_input, tag_input], outputs=output)
        model.compile(loss='categorical_crossentropy',
                      optimizer="adam",
                      metrics=['accuracy'])

        self.model = model
예제 #2
0
    def create_xy_test(self,
                       tag_file,
                       embedding_file,
                       data_size=1,
                       window_size=5,
                       available_tags=[],
                       suffix=None,
                       mode="create",
                       load=None):
        DataUtils.message("Prepearing Test Data...", new=True)

        if mode == "create" or mode == "save":
            x_test, y_test = self.__create_xy(tag_file, embedding_file,
                                              data_size, window_size,
                                              available_tags, suffix)

        if mode == "save":
            DataUtils.save_array(
                DataUtils.get_filename("SFF",
                                       "X_TEST" + "_" + str(window_size)),
                x_test)
            DataUtils.save_array(
                DataUtils.get_filename("SFF",
                                       "Y_TEST" + "_" + str(window_size)),
                y_test)

        if mode == "load" and load is not None:
            x_test = DataUtils.load_array(load[0])
            y_test = DataUtils.load_array(load[1])

        self.x_test = np.array(x_test)
        self.y_test = np.array(y_test)
예제 #3
0
    def create_xy_test(self,
                       embedding_file,
                       data_size=1,
                       look_back=0,
                       mode="create",
                       load=None):
        DataUtils.message("Prepearing Test Data...", new=True)

        if mode == "create" or mode == "save":
            word_test, head_test, tag_test = self.__create_xy(embedding_file,
                                                              data_size,
                                                              look_back,
                                                              test=True)

        if mode == "save":
            DataUtils.save_array(
                DataUtils.get_filename("DP_W", "TEST" + "_" + str(look_back)),
                word_test)
            DataUtils.save_array(
                DataUtils.get_filename("DP_H", "TEST" + "_" + str(look_back)),
                head_test)
            DataUtils.save_array(
                DataUtils.get_filename("DP_T", "TEST" + "_" + str(look_back)),
                tag_test)

        if mode == "load" and load is not None:
            word_test = DataUtils.load_array(load[0])
            head_test = DataUtils.load_array(load[1])
            tag_test = DataUtils.load_array(load[2])

        self.word_test = np.array(word_test)
        self.head_test = np.array(head_test)
        self.tag_test = np.array(tag_test)
예제 #4
0
    def create_xy_train(self,
                        tag_file,
                        embedding_file,
                        data_size=1,
                        look_back=5,
                        threshold=0,
                        suffix=None,
                        mode="create",
                        load=None):
        DataUtils.message("Prepearing Training Data...", new=True)

        if mode == "create" or mode == "save":
            x_train, y_train = self.__create_xy_train(tag_file, embedding_file,
                                                      data_size, look_back,
                                                      threshold, suffix)

        if mode == "save":
            DataUtils.save_array(
                DataUtils.get_filename("ULSTM_X",
                                       "TRAIN" + "_" + str(look_back)),
                x_train)
            DataUtils.save_array(
                DataUtils.get_filename("ULSTM_Y",
                                       "TRAIN" + "_" + str(look_back)),
                y_train)

        if mode == "load" and load is not None:
            x_train = DataUtils.load_array(load[0])
            y_train = DataUtils.load_array(load[1])

        self.x_train = x_train
        self.y_train = y_train

        self.INPUT_SHAPE = x_train.shape
        self.OUTPUT_SHAPE = y_train.shape
예제 #5
0
    def create_xy_train(self,
                        tag_file,
                        embedding_file,
                        data_size=1,
                        window_size=5,
                        available_tags=[],
                        suffix=None,
                        mode="create",
                        load=None):
        DataUtils.message("Prepearing Training Data...", new=True)

        if mode == "create" or mode == "save":
            x_train, y_train = self.__create_xy(tag_file, embedding_file,
                                                data_size, window_size,
                                                available_tags, suffix)

        if mode == "save":
            DataUtils.save_array(
                DataUtils.get_filename("SFF",
                                       "X_TRAIN" + "_" + str(window_size)),
                x_train)
            DataUtils.save_array(
                DataUtils.get_filename("SFF",
                                       "Y_TRAIN" + "_" + str(window_size)),
                y_train)

        if mode == "load" and load is not None:
            x_train = DataUtils.load_array(load[0])
            y_train = DataUtils.load_array(load[1])

        self.x_train = np.array(x_train)
        self.y_train = np.array(y_train)

        self.INPUT_SHAPE = self.x_train.shape
        self.OUTPUT_SHAPE = self.y_train.shape
예제 #6
0
    def create_xy_test(self,
                       tag_file,
                       embedding_file,
                       data_size=1,
                       look_back=5,
                       suffix=None,
                       mode="create",
                       load=None):
        DataUtils.message("Prepearing Test Data...", new=True)

        if mode == "create" or mode == "save":
            x_test, y_test = self.__create_xy_test(tag_file, embedding_file,
                                                   data_size, look_back,
                                                   suffix)

        if mode == "save":
            DataUtils.save_array(
                DataUtils.get_filename("ULSTM_X",
                                       "TEST" + "_" + str(look_back)), x_test)
            DataUtils.save_array(
                DataUtils.get_filename("ULSTM_Y",
                                       "TEST" + "_" + str(look_back)), y_test)

        if mode == "load" and load is not None:
            x_test = DataUtils.load_array(load[0])
            y_test = DataUtils.load_array(load[1])

        self.x_test = np.array(x_test)
        self.y_test = np.array(y_test)
예제 #7
0
    def create_xy_train(self, parse_tree_file, data_size=1, seq_len=10):
        DataUtils.message("Prepearing Training Data...", new=True)

        x_train, y_train = self.__create_xy(parse_tree_file, data_size,
                                            seq_len)

        self.x_train = x_train
        self.y_train = y_train
예제 #8
0
    def create_xy_train(self, dependency_tree, embedding_file, data_size=1, look_back=0, mode="create", load=None):
        DataUtils.message("Prepearing Training Data...", new=True)

        if mode == "create" or mode == "save":
            word_train, tag_train, probability_train = self.__create_xy(dependency_tree, embedding_file, data_size, look_back, test=False)

        self.word_train = word_train
        self.tag_train = tag_train
        self.probability_train = probability_train
예제 #9
0
    def create(self):
        DataUtils.message("Creating The Model...", new=True)

        input_forward = Input(shape=(self.seq_len, ))
        input_backward = Input(shape=(self.seq_len, ))

        head_forward = Input(shape=(self.seq_len, ))
        head_backward = Input(shape=(self.seq_len, ))

        word_embedding = Embedding(self.distinct_words,
                                   128,
                                   input_length=self.seq_len,
                                   trainable=True)
        input_forward_embedding = word_embedding(input_forward)
        input_backward_embedding = word_embedding(input_backward)

        head_forward_embedding = word_embedding(head_forward)
        head_backward_embedding = word_embedding(head_backward)

        lstm_forward = LSTM(128)
        lstm_backward = LSTM(128)

        input_forward_lstm = lstm_forward(input_forward_embedding)
        input_backward_lstm = lstm_backward(input_backward_embedding)
        input_lstm = Concatenate()([input_forward_lstm, input_backward_lstm])

        head_forward_lstm = lstm_forward(head_forward_embedding)
        head_backward_lstm = lstm_backward(head_backward_embedding)
        head_lstm = Concatenate()([head_forward_lstm, head_backward_lstm])

        tag_output = Dense(18, activation="softmax")(input_lstm)

        input_hidden = Dense(100, activation=None)
        input_forward_hidden = input_hidden(input_lstm)

        head_hidden = Dense(100, activation=None)
        head_forward_hidden = head_hidden(head_lstm)

        sum_hidden = Add()([input_forward_hidden, head_forward_hidden])
        tanh_hidden = Activation("tanh")(sum_hidden)

        arc_output = Dense(1, activation=None)(tanh_hidden)

        model = Model(inputs=[
            input_forward, input_backward, head_forward, head_backward
        ],
                      outputs=[tag_output, arc_output])

        def nll1(y_true, y_pred):
            # keras.losses.binary_crossentropy give the mean
            # over the last axis. we require the sum
            return K.sum(K.binary_crossentropy(y_true, y_pred), axis=-1)

        model.compile(loss=['categorical_crossentropy', nll1],
                      optimizer="adam",
                      metrics=['accuracy'])
        self.model = model
예제 #10
0
    def save(self, note=""):
        DataUtils.message("Saving Model...", new=True)
        directory = "weights/"

        DataUtils.create_dir(directory)

        file = DataUtils.get_filename("UFF", note)+".h5"

        self.model.save(directory+file)
예제 #11
0
    def plot(self, note=""):
        DataUtils.message("Ploting Model...", new=True)
        directory = "plot/"

        DataUtils.create_dir(directory)

        file = DataUtils.get_filename("UFF", note)+".png"

        plot_model(self.model, to_file=directory+file, show_shapes=True, show_layer_names=False)
예제 #12
0
    def create_xy_test(self, parse_tree_file, data_size=1, seq_len=10):
        DataUtils.message("Prepearing Validation Data...", new=True)

        x_test, y_test = self.__create_xy(parse_tree_file,
                                          data_size,
                                          seq_len,
                                          test=True)

        self.x_test = x_test
        self.y_test = y_test
예제 #13
0
 def train(self, epochs, batch_size=32):
     DataUtils.message("Training...", new=True)
     self.model.fit([
         self.word_train[0][0], self.word_train[0][1], self.tag_train[0][0],
         self.tag_train[0][1], self.word_train[1][0], self.word_train[1][1],
         self.tag_train[1][0], self.tag_train[1][1]
     ],
                    self.head_train,
                    epochs=epochs,
                    batch_size=batch_size)
예제 #14
0
    def create(self):
        DataUtils.message("Creating The Model...", new=True)
        word_input_forward = Input(shape=(self.look_back,300))
        word_input_backward = Input(shape=(self.look_back,300))

        tag_input_forward = Input(shape=(self.look_back,))
        tag_input_backward = Input(shape=(self.look_back,))

        tag_emb = Embedding(self.distinct_tags, 30, input_length=self.look_back, trainable=True)
        tag_input_forward_output = tag_emb(tag_input_forward)
        tag_input_backward_output = tag_emb(tag_input_backward)

        input_forward = Concatenate()([word_input_forward, tag_input_forward_output])
        input_backward = Concatenate()([word_input_backward, tag_input_backward_output])

        word_head_forward = Input(shape=(self.look_back,300))
        word_head_backward = Input(shape=(self.look_back,300))

        tag_head_forward = Input(shape=(self.look_back,))
        tag_head_backward = Input(shape=(self.look_back,))

        tag_head_forward_output = tag_emb(tag_head_forward)
        tag_head_backward_output = tag_emb(tag_head_backward)

        head_forward = Concatenate()([word_head_forward, tag_head_forward_output])
        head_backward = Concatenate()([word_head_backward, tag_head_backward_output])

        bilstm = BiLSTM(300)

        bilstm_input = bilstm([input_forward,input_backward])
        dense_input = Dense(600, activation="linear")(bilstm_input)

        bilstm_head = bilstm([head_forward,head_backward])
        dense_head = Dense(600, activation="linear")(bilstm_head)

        sum_dense = Add()([dense_input,dense_head])

        dense_tanh = Dense(600, activation="tanh")(sum_dense)
        output = Dense(1, activation="softmax")(dense_tanh)

        model = Model(inputs=[word_input_forward, word_input_backward, tag_input_forward, tag_input_backward, word_head_forward, word_head_backward, tag_head_forward, tag_head_backward], outputs=output)
        model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['accuracy'])
        self.model = model
예제 #15
0
    def create(self):
        DataUtils.message("Creating The Model...", new=True)
        model = Sequential()
        model.add(
            Masking(input_shape=(self.INPUT_SHAPE[1], self.INPUT_SHAPE[2])))
        model.add(Dropout(.2))
        model.add(
            Bidirectional(
                LSTM(32,
                     dropout=0.2,
                     recurrent_dropout=0.2,
                     return_sequences=True)))
        model.add(TimeDistributed(Dense(32, activation="tanh")))
        model.add(
            TimeDistributed(Dense(self.OUTPUT_SHAPE[2], activation="softmax")))
        model.compile(loss="categorical_crossentropy",
                      optimizer="adam",
                      metrics=["accuracy"])

        self.model = model
예제 #16
0
    def create_xy_train(self,
                        dependency_tree,
                        embedding_file,
                        data_size=1,
                        look_back=0,
                        mode="create",
                        load=None):
        DataUtils.message("Prepearing Training Data...", new=True)

        if mode == "create" or mode == "save":
            word_train, head_train, tag_train = self.__create_xy(
                dependency_tree,
                embedding_file,
                data_size,
                look_back,
                test=False)

        if mode == "save":
            DataUtils.save_array(
                DataUtils.get_filename("DP_W", "TRAIN" + "_" + str(look_back)),
                word_train)
            DataUtils.save_array(
                DataUtils.get_filename("DP_H", "TRAIN" + "_" + str(look_back)),
                head_train)
            DataUtils.save_array(
                DataUtils.get_filename("DP_T", "TRAIN" + "_" + str(look_back)),
                tag_train)

        if mode == "load" and load is not None:
            word_train = DataUtils.load_array(load[0])
            head_train = DataUtils.load_array(load[1])
            tag_train = DataUtils.load_array(load[2])

        self.word_train = np.array(word_train)
        self.head_train = np.array(head_train)
        self.tag_train = np.array(tag_train)
예제 #17
0
 def validate(self, batch_size=16):
     DataUtils.message("Validation...")
     return self.model.evaluate(self.x_test, self.y_test, batch_size=batch_size)
예제 #18
0
 def save(self, note=""):
     DataUtils.message("Saving Model...", new=True)
     self.model.save(DataUtils.get_filename("DP", note) + ".h5")
예제 #19
0
 def train(self, epochs, batch_size=16):
     DataUtils.message("Training...", new=True)
     self.model.fit(self.x_train, self.y_train, epochs=epochs, batch_size=batch_size)
예제 #20
0
 def plot(self, note=""):
     DataUtils.message("Ploting Model...", new=True)
     plot_model(self.model,
                to_file=DataUtils.get_filename("DP", note) + ".png",
                show_shapes=True,
                show_layer_names=False)
예제 #21
0
 def load(self, file):
     DataUtils.message("Loading Model...", new=True)
     self.model = load_model(file)
예제 #22
0
        self.model = model

    def train(self, epochs, batch_size=16):
        DataUtils.message("Training...", new=True)
        self.model.fit(self.x_train, self.y_train, epochs=epochs, batch_size=batch_size)

    def validate(self, batch_size=16):
        DataUtils.message("Validation...")
        return self.model.evaluate(self.x_test, self.y_test, batch_size=batch_size)

    def predict(self, x):
        return self.model.predict(x)

    def summary(self, x):
        self.model.summary()

if __name__ == "__main__":
    test_file = "data/Brown_tagged_train.txt"
    train_file = "data/Brown_tagged_test.txt"
    embedding_file = "embeddings/GoogleNews-vectors-negative300-SLIM.bin"
    epochs = 10

    model = UnsupervisedFF()
    model.create_xy_train(train_file, embedding_file, 1, threshold=0)
    model.create_xy_test(test_file, embedding_file, 1)
    model.create()
    model.train(epochs)

    DataUtils.message(model.validate())
예제 #23
0
 def validate(self, batch_size=16):
     DataUtils.message("Validation...")
     return self.model.evaluate([self.word_test,self.tag_test], self.head_test, batch_size=batch_size)
예제 #24
0
 def train(self, epochs, batch_size=32):
     DataUtils.message("Training...", new=True)
     a = self.model.fit([self.word_train[0][0], self.word_train[0][1], self.tag_train[0][0], self.tag_train[0][1], self.word_train[1][0], self.word_train[1][1], self.tag_train[1][0], self.tag_train[1][1]], self.probability_train, epochs=epochs, batch_size=batch_size)
     print(a.history)
예제 #25
0
 def validate(self, batch_size=32):
     DataUtils.message("Validation...", new=True)
     return self.model.evaluate(self.x_test, self.y_test)
예제 #26
0
    def create(self):
        DataUtils.message("Creating The Model...", new=True)
        word_full_forward = Input(shape=(self.look_back, 300))
        word_full_backward = Input(shape=(self.look_back, 300))

        tag_full_forward_input = Input(shape=(self.look_back, ))
        tag_full_backward_input = Input(shape=(self.look_back, ))
        tag_emb = Embedding(self.distinct_tags,
                            30,
                            input_length=self.look_back,
                            trainable=True)

        tag_full_forward = tag_emb(tag_full_forward_input)
        tag_full_backward = tag_emb(tag_full_backward_input)

        full_forward = Concatenate()([word_full_forward, tag_full_forward])
        full_backward = Concatenate()([word_full_backward, tag_full_backward])

        bilstm = BiLSTM(300)([full_forward, full_backward])
        dense_output = TimeDistributed(Dense(600, activation="linear"))(bilstm)

        word_instance_forward = Input(shape=(self.look_back, 300))
        word_instance_backward = Input(shape=(self.look_back, 300))

        tag_instance_forward_input = Input(shape=(self.look_back, ))
        tag_instance_backward_input = Input(shape=(self.look_back, ))

        tag_instance_forward = tag_emb(tag_instance_forward_input)
        tag_instance_backward = tag_emb(tag_instance_backward_input)

        instance_forward = Concatenate()(
            [word_instance_forward, tag_instance_forward])
        instance_backward = Concatenate()(
            [word_instance_backward, tag_instance_backward])

        f_ilstm = LSTM(300, dropout=0.35, recurrent_dropout=0.1)
        b_ilstm = LSTM(300, dropout=0.35, recurrent_dropout=0.1)

        forward_ioutput = f_ilstm(full_forward)
        backward_ioutput = b_ilstm(full_backward)
        bilstm_ioutput = Concatenate()([forward_ioutput, backward_ioutput])

        dense_ioutput = Dense(600, activation="linear")(bilstm_ioutput)
        repeat_ioutput = RepeatVector(self.look_back)(dense_ioutput)

        sum_output = Add()([dense_output, repeat_ioutput])

        hidden = TimeDistributed(Dense(600, activation="tanh"))(sum_output)
        output = TimeDistributed(Dense(1, activation="softmax"))(hidden)

        model = Model(inputs=[
            word_full_forward, word_full_backward, tag_full_forward_input,
            tag_full_backward_input, word_instance_forward,
            word_instance_backward, tag_instance_forward_input,
            tag_instance_backward_input
        ],
                      outputs=output)
        model.compile(loss='binary_crossentropy',
                      optimizer="adam",
                      metrics=['accuracy'])

        self.model = model