def load_model(model_path, max_features, word_embedding_dim, maxlen,
               nb_seg_tags, lstm_dim):

    model = Sequential()
    model.add(
        Embedding(max_features,
                  word_embedding_dim,
                  input_length=maxlen,
                  name='word_emb',
                  mask_zero=True))
    model.add(Dropout(0.5))
    model.add(Bidirectional(LSTM(lstm_dim, return_sequences=True)))
    model.add(Dropout(0.5))
    model.add(TimeDistributed(Dense(nb_seg_tags)))
    crf = ChainCRF()
    model.add(crf)
    model.compile(loss=crf.sparse_loss,
                  optimizer=RMSprop(0.01),
                  metrics=['sparse_categorical_accuracy'])
    #model.compile('adam', loss=crf.sparse_loss, metrics=['sparse_categorical_accuracy'])
    #early_stopping = EarlyStopping(patience=10, verbose=1)
    #checkpointer = ModelCheckpoint(options.model + "/seg_keras_weights.hdf5",verbose=1,save_best_only=True)
    eprint(
        strftime("%Y-%m-%d %H:%M:%S", gmtime()) + ' Loading saved model:' +
        model_path + '/seg_keras_weights.hdf5')

    model.load_weights(model_path + '/seg_keras_weights.hdf5')

    return model
def test_generate_transition_matrix():
    # Generate data

    n_samples, n_steps, n_classes = 20000, 16, 3
    U_true = get_test_transition_matrix(n_classes)
    (X_train, y_train), (X_test, y_test) = get_test_sequences(n_samples=n_samples,
                                                              n_steps=n_steps,
                                                              U=U_true)
    model = Sequential()
    crf = ChainCRF(input_shape=X_train[0].shape)
    model.add(crf)

    sgd = SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)
    model.compile(loss=crf.loss, optimizer=sgd, metrics=['accuracy'])

    model.fit(X_train, y_train, nb_epoch=1, batch_size=32,
              validation_data=(X_test, y_test))

    print('Example predictions:')
    y_pred = model.predict_classes(X_test)
    for i in range(10):
        print(i)
        print('y_true', np.argmax(y_test[i], axis=1))
        print('y_pred', y_pred[i])
    U_pred = K.get_value(crf.U)
    print('U:\n', U_pred)
    print('b_start:\n', K.get_value(crf.b_start))
    print('b_end:\n', K.get_value(crf.b_end))

    U_pred = np.exp(U_pred)
    U_pred /= np.sum(U_pred, axis=1, keepdims=True)
    print('transitions_true:\n', U_true)
    print('transitions_pred:\n', U_pred)
    assert_allclose(U_pred, U_true, atol=5e-2)
def segment_file():
    embeddings = build_embeddings(args.max_features)
    #
    print('Loading data...')
    X_chars, y_test = load_file_as_words(args.test_set)
    X_idxs = np.array([[word2index.get(w, word2index['<UNK>']) for w in words] for words in X_chars])
    X_idxs_padded = sequence.pad_sequences(X_idxs, maxlen=args.maxlen, padding='post')

    print('loading model...')
    word_input = Input(shape=(args.maxlen,), dtype='int32', name='word_input')
    word_emb = Embedding(embeddings.shape[0], args.word_embedding_dim, input_length=args.maxlen, name='word_emb',
                         weights=[embeddings])(word_input)
    word_emb_d = Dropout(0.5)(word_emb)
    bilstm = Bidirectional(LSTM(args.lstm_dim, return_sequences=True))(word_emb_d)
    bilstm_d = Dropout(0.5)(bilstm)
    dense = TimeDistributed(Dense(args.nb_pos_tags))(bilstm_d)
    crf = ChainCRF()
    crf_output = crf(dense)
    model = load_model("model/keras_weights_0921.hdf5",
                       custom_objects={'ChainCRF': ChainCRF, 'sparse_loss': crf.sparse_loss}, compile=False)
    model.compile(loss=crf.sparse_loss, optimizer='adam', metrics=['sparse_categorical_accuracy'])

    prediction = model.predict(X_idxs_padded, args.batch_size, verbose=0)

    # TODO - 01. the function should return a segmented string

    with codecs.open(args.output_file, mode='w', encoding='utf-8') as results:
        for pred, word in zip(np.argmax(prediction, axis=2), X_chars):
            assert len(pred) >= len(word)

            for ch, est in zip(word, pred):
                results.write(ch + '\t' + index2pos[est] + '\n')
            else:
                results.write('WB\tWB\n')
Example #4
0
    def build_model(self,
                    word_embedding_dim=200,
                    lstm_dim=100,
                    batch_size=10,
                    nb_epoch=1,
                    optimizer='adam'):
        self.lstm_dim = lstm_dim
        # cut texts after this number of words (among top max_features most common words)
        self.epoches = nb_epoch
        self.batch_size = batch_size
        self.embedding_dim = word_embedding_dim
        self.embeddings = self.build_embeddings()

        word_input = Input(shape=(self.maxlen, ),
                           dtype='int32',
                           name='word_input')
        word_emb = Embedding(self.embeddings.shape[0],
                             self.embedding_dim,
                             input_length=self.maxlen,
                             name='word_emb',
                             weights=[self.embeddings])(word_input)
        word_emb_d = Dropout(0.5)(word_emb)
        bilstm = Bidirectional(LSTM(self.lstm_dim,
                                    return_sequences=True))(word_emb_d)
        bilstm_d = Dropout(0.5)(bilstm)
        dense = TimeDistributed(Dense(len(self.index2pos)))(bilstm_d)
        crf = ChainCRF()
        crf_output = crf(dense)
        self.segmentation_model = Model(inputs=[word_input],
                                        outputs=[crf_output])
        self.segmentation_model.compile(
            loss=crf.sparse_loss,
            optimizer=optimizer,
            metrics=['sparse_categorical_accuracy'])
def test_tag_sequence():
    # Generate data

    n_samples, n_steps, n_classes = 1000, 16, 3
    U_true = get_test_transition_matrix(n_classes)
    (X_train, y_train), (X_test, y_test) = get_test_sequences(n_samples,
                                                              n_steps,
                                                              U_true)
    model = Sequential()
    crf = ChainCRF(input_shape=(n_steps, n_classes))
    model.add(crf)

    sgd = SGD(lr=0.2, momentum=0.0, decay=0.0, nesterov=False)
    model.compile(loss=crf.loss, optimizer=sgd, metrics=['accuracy'])
    history = model.fit(X_train, y_train, nb_epoch=1, batch_size=32,
                        validation_data=(X_test, y_test))

    assert(history.history['val_acc'][-1] >= 0.94)
Example #6
0
def terminate_task(shared_layer_output, task):
    """Terminate Task
    Terminate the provided task by sending the LSTM output through hidden layers
    first (if they are defined) and then sending the result to a softmax classifier.

    Args:
        shared_layer_output (object): Output of an LSTM layer.
        task (TaskConfig): Task configuration

    Returns:
        `tuple` of object: Reference to CRF layer, output layer, and task in case of CRF classifier, None, output
            layer, and task otherwise.
    """
    assert isinstance(task, TaskConfig)

    input_layer = shared_layer_output

    # Add hidden layers
    for i, hidden_layer_config in enumerate(task.hidden_layers):
        input_layer = Dense(
            units=hidden_layer_config.units,
            activation=hidden_layer_config.activation,
            name="hidden_%s_%d" % (task.name, i + 1)
        )(input_layer)

    if task.classifier == CLASSIFIER_SOFTMAX:
        # Add softmax layer
        return None, TimeDistributed(Dense(
            units=len(task.data_reader.get_labels()),
            activation=softmax
        ), name="softmax_output_%s" % task.name)(input_layer), task
    else:
        # Add dense layer to achieve the correct size
        input_layer = TimeDistributed(Dense(
            units=len(task.data_reader.get_labels())
        ))(input_layer)

        crf = ChainCRF(name="CRF_output_%s" % task.name)

        return crf, crf(input_layer), task
    bilstm_embedding = Embedding(len(chars) + 1,
                                 embedding_dim,
                                 input_length=sequence_length,
                                 mask_zero=True)(bilstm_inputs)
    #合并cnn提取特征和普通字向量
    total_emb = merge([bilstm_embedding, cnn_max_pooling],
                      mode='concat',
                      concat_axis=2,
                      name='total_emb')
    emb_droput = Dropout(dropout)(total_emb)
    #blstm = Bidirectional(LSTM(64, return_sequences=True), merge_mode='sum')(emb_droput)
    blstm = Bidirectional(LSTM(64, return_sequences=True),
                          merge_mode='sum')(bilstm_embedding)
    drop = Dropout(dropout)(blstm)
    output = TimeDistributed(Dense(5))(drop)
    crf = ChainCRF()
    crf_output = crf(output)
    model = Model(input=[bilstm_inputs, cnn_inputs], output=crf_output)
    # checkpoint = ModelCheckpoint('./model/weights.{epoch:03d}-{val_acc:.4f}.hdf5', monitor='val_acc', verbose=1,
    #                             save_best_only=True, mode='auto')
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    batch_size = 1024
    history = model.fit([np.array(list(d['x'])),
                         np.array(list(d['x']))],
                        np.array(list(d['y'])).reshape((-1, maxlen, 5)),
                        batch_size=batch_size,
                        nb_epoch=50)
    model.save('./model/cnn_bilstm_crf_model.h5')
Example #8
0
def build_model(parameters,embedding_matrix =None, weightsPath = None):
	lstm_dim = parameters['word_lstm_dim']
	word_vocab_size = parameters['word_vocab_size'] 
	char_vocab_size = parameters['char_vocab_size']
	char_embedding_dim = parameters['char_dim']
	word_embedding_dim = parameters['word_dim']
	maxCharSize = parameters['maxCharSize']	
	cap_size = 	parameters['cap_size']
	cap_embed_size = parameters['cap_dim']
	max_words = parameters['max_words']
	nb_filters = parameters['cnn_nb_filters']
	window_length = parameters['cnn_window_length']
	learning_rate = parameters['learning_rate']
	decay_rate = parameters['decay_rate'] 
	momentum = parameters['momentum']
	clipvalue = parameters['clipvalue']
	tag_label_size = parameters['tag_label_size']
	dropout = parameters['dropout']

	char_input = Input(shape=(maxCharSize * max_words,), dtype='int32', name='char_input')
	char_emb = Embedding(char_vocab_size, char_embedding_dim, input_length=max_words*maxCharSize, dropout=dropout, name='char_emb')(char_input)
	char_cnn = Convolution1D(nb_filter=nb_filters,filter_length= window_length, activation='tanh', border_mode='full') (char_emb) 
	char_max_pooling = MaxPooling1D(pool_length=maxCharSize) (char_cnn) #  get output per word. this is the size of the hidden layer

	"""

	Summary for char layer alone.
	____________________________________________________________________________________________________
	Layer (type)                     Output Shape          Param #     Connected to 
	====================================================================================================
	char_input (InputLayer)          (None, 2000)          0           None refers to batch size             
	____________________________________________________________________________________________________
	char_emb (Embedding)             (None, 2000, 25)      1250        char_input[0][0] 
	____________________________________________________________________________________________25 is embedding dimension
	convolution1d_1 (Convolution1D)  (None, 2002, 30)      2280        char_emb[0][0]
	____________________________________________________________________________________________30 is the number of filters plus 2 because we use full padding
	maxpooling1d_1 (MaxPooling1D)    (None, 100, 30)        0           convolution1d_1[0][0]
	=============================================================================================max poolign to get 100 hidden units which will be carried over 
	Total params: 3530

	"""


	#based on https://github.com/pressrelations/keras/blob/a2d358e17ea7979983c3c6704390fe2d4b29bbbf/examples/conll2000_bi_lstm_crf.py
	word_input = Input(shape=(max_words,), dtype='int32', name='word_input')
	if (embedding_matrix is not None):
		word_emb = Embedding(word_vocab_size+1, word_embedding_dim,weights=[embedding_matrix], input_length=max_words, dropout=0, name='word_emb')(word_input)
	else:
		word_emb = Embedding(word_vocab_size+1, word_embedding_dim, input_length=max_words, dropout=0, name='word_emb')(word_input)

	caps_input = Input(shape=(max_words,), dtype='int32', name='caps_input')
	caps_emb = Embedding(cap_size, cap_embed_size, input_length=None, dropout=dropout, name='caps_emb')(caps_input)
	#concat axis refers to the axis whose dimension can be different
	total_emb = merge([word_emb, caps_emb,char_max_pooling], mode='concat', concat_axis=2,name ='total_emb')
	emb_droput = Dropout(dropout)(total_emb)
	#inner_init : initialization function of the inner cells. I believe this is Cell state
	bilstm_word  = Bidirectional(LSTM(lstm_dim,inner_init='uniform', forget_bias_init='one',return_sequences=True))(emb_droput)
	bilstm_word_d = Dropout(dropout)(bilstm_word)

	dense = TimeDistributed(Dense(tag_label_size))(bilstm_word_d)
	crf = ChainCRF()def shared(shape, name):
	crf_output = crf(dense)
	#to accoutn for gradient clipping
	#info on nesterov http://stats.stackexchange.com/questions/211334/keras-how-does-sgd-learning-rate-decay-work
	sgd = SGD(lr=learning_rate, decay=decay_rate, momentum=momentum, nesterov=False,clipvalue = clipvalue)



	model = Model(input=[word_input,caps_input,char_input], output=[crf_output])
	if(weightsPath):
		model.load_weights(weightsPath)
	model.compile(loss=crf.sparse_loss,
	              optimizer=sgd,
	              metrics=['sparse_categorical_accuracy'])

	model.summary()
	return model

def train_model (model,parameters,Words_id_train,caps_train,char_train,tag_train,Words_id_dev=None,caps_dev=None,char_dev = None,tag_dev=None):
	
	# define the checkpoint
	filepath="weights-improvement-BiLSTM-All-no-wd-{epoch:02d}-{loss:.4f}.hdf5"
	checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
	callbacks_list = [checkpoint]
	batch_size = parameters['batch_size']
	epoch_number = parameters['epoch_number']
	model.fit([Words_id_train,caps_train,char_train], tag_train,
          batch_size=batch_size,
          validation_data=([Words_id_dev,caps_dev,char_dev], tag_dev), nb_epoch=epoch_number,callbacks=callbacks_list)
	return model
Example #9
0
def save_result_to_hbase(x):
    nb_word = len(index_word)  # 1008
    nb_tag = len(index_tag)  # 16/14
    maxlen = 100
    word_embedding_dim = 100
    lstm_dim = 100
    batch_size = 64

    word_input = Input(shape=(maxlen, ), dtype='float32', name='word_input')
    word_emb = Embedding(nb_word,
                         word_embedding_dim,
                         input_length=maxlen,
                         dropout=0.2,
                         name='word_emb')(word_input)
    bilstm = Bidirectional(
        LSTM(lstm_dim, dropout_W=0.1, dropout_U=0.1,
             return_sequences=True))(word_emb)
    bilstm_d = Dropout(0.1)(bilstm)

    half_window_size = 5

    paddinglayer = ZeroPadding1D(padding=half_window_size)(word_emb)
    conv = Conv1D(nb_filter=50,
                  filter_length=(2 * half_window_size + 1),
                  border_mode='valid')(paddinglayer)
    conv_d = Dropout(0.1)(conv)
    dense_conv = TimeDistributed(Dense(50))(conv_d)
    rnn_cnn_merge = merge([bilstm_d, dense_conv], mode='concat', concat_axis=2)

    dense = TimeDistributed(Dense(nb_tag))(rnn_cnn_merge)
    crf = ChainCRF()
    crf_output = crf(dense)

    model = Model(input=[word_input], output=[crf_output])

    model.compile(loss=crf.sparse_loss,
                  optimizer=RMSprop(0.001),
                  metrics=['sparse_categorical_accuracy'])

    # model.load_weights('/home/weiwc/pkl/model.weights')
    model.load_weights('model.weights')
    X_test_cut = x[0]
    X_test_len = x[1]
    X_word = x[2]
    rowkey = str(x[3])

    # print type(X_test_cut)
    # print len(X_test_cut)
    # print X_test_cut

    # print (X_test_len)
    # print len(X_test_len)
    # print X_test_cut[0],X_test_cut[1],X_test_cut[2]

    Y_pred = model.predict(X_test_cut)
    # print "Y_pred",len(Y_pred),len(Y_pred[0]),len(Y_pred[1]),Y_pred
    # print "X_word",len(X_word),X_word

    j2 = 0
    i2 = 0
    t = []
    # tt = []
    # for i in range(12):
    #     tt.append([])
    for j1 in range(len(X_word)):
        # index_tag {0: 'PAD', 1: 'O', 2: 'B-ROLE', 3: 'I-ROLE', 4: 'B-PER', 5: 'I-PER', 6: 'B-CRIME', 7: 'I-CRIME', 8: 'B-TIME',
        #  9: 'I-TIME', 10: 'B-ORG', 11: 'I-ORG', 12: 'B-LOC', 13: 'I-LOC'}
        w = X_word[j1]
        tags = Y_pred[i2][j2]
        tag_flag = False
        t_tmp = []
        for i in range(14):
            if (tags[i] == 1) and i > 0:
                t_tmp.append(index_tag[i])
                t_tmp.append(w)
                t.append(t_tmp)
                break
        j2 += 1
        if j2 == X_test_len[
                i2]:  #X_test_len = [89, 37, 95, 86, 90, 100, 90, 94, 80, 79, 44, 59]
            j2 = 0
            i2 += 1
    for i in t:
        print i[0], i[1]
    # l2 = []
    # l3 = []
    # l22 = []
    # l23 = []
    # c = 0
    # ttl = ""
    # for i in t:
    #     if  i[0].startswith('B') and c == 0:
    #         l2.append(i[0])
    #         l3.append(i[1].decode("utf-8"))
    #         ttl = i[0].replace('B','I')
    #         c = c + 1
    #
    #     elif i[0] == ttl:
    #         l2.append(i[0])
    #         l3.append(i[1].decode("utf-8"))
    #     elif i[0].startswith('B') and c != 0:
    #         l22.append(l2)
    #         l23.append("".join(l3))
    #         l2 = []
    #         l3 = []
    #         l2.append(i[0])
    #         l3.append(i[1].decode("utf-8"))
    #         ttl = i[0].replace('B', 'I')
    # l22.append(l2)
    # l23.append("".join(l3))
    # taglist = ['B_ROLE','I_ROLE','B_PER','I_PER','B_CRIME','I_CRIME','B_TIME','I_TIME','B_ORG','I_ORG','B_LOC','I_LOC']
    # ret_t = {'PER': [], 'LOC': [], 'ORG': [], 'TIME': [], 'ROLE': [], 'CRIME': []}
    # index_tag {0: 'PAD', 1: 'O', 2: 'B-ROLE', 3: 'I-ROLE', 4: 'B-PER', 5: 'I-PER', 6: 'B-CRIME', 7: 'I-CRIME', 8: 'B-TIME',
    #  9: 'I-TIME', 10: 'B-ORG', 11: 'I-ORG', 12: 'B-LOC', 13: 'I-LOC'}
    # id = 0
    # for i in l22:
    #     ret_t[i[0].split("-")[1]].append(l23[id])
    #     id += 1
    #
    # t2 = []
    # for i in ret_t.keys():
    #     tmp = (rowkey, [rowkey, "d", i, ",".join(ret_t[i])])
    #     t2.append(tmp)
    # for i in t2:
    #     print i[1][2],i[1][3]

    # return t2
    return "-"
Example #10
0
def get_X(o_content):

    nb_word = len(index_word)  # 1008
    nb_tag = len(index_tag)  # 16/14
    maxlen = 100
    word_embedding_dim = 100
    lstm_dim = 100
    batch_size = 64

    word_input = Input(shape=(maxlen,), dtype='float32', name='word_input')
    word_emb = Embedding(nb_word, word_embedding_dim, input_length=maxlen, dropout=0.2, name='word_emb')(word_input)
    bilstm = Bidirectional(LSTM(lstm_dim, dropout_W=0.1, dropout_U=0.1, return_sequences=True))(word_emb)
    bilstm_d = Dropout(0.1)(bilstm)

    half_window_size = 5

    paddinglayer = ZeroPadding1D(padding=half_window_size)(word_emb)
    conv = Conv1D(nb_filter=50, filter_length=(2 * half_window_size + 1), border_mode='valid')(paddinglayer)
    conv_d = Dropout(0.1)(conv)
    dense_conv = TimeDistributed(Dense(50))(conv_d)
    rnn_cnn_merge = merge([bilstm_d, dense_conv], mode='concat', concat_axis=2)

    dense = TimeDistributed(Dense(nb_tag))(rnn_cnn_merge)
    crf = ChainCRF()
    crf_output = crf(dense)

    model = Model(input=[word_input], output=[crf_output])

    model.compile(loss=crf.sparse_loss,
                  optimizer=RMSprop(0.001),
                  metrics=['sparse_categorical_accuracy'])
    # model.load_weights('/home/weiwc/pkl/model.weights')
    model.load_weights('model.weights')

    x_sen=[]
    word_sen=[]
    content_re = o_content.replace(" ","")
    for line in content_re:
        word_sen.append(line)
        if line in dict_word:
            x_sen.append(dict_word[line])
        else:
            x_sen.append(1)
    X_test_cut=[]
    X_test_len=[]
    max_sen_len=100
    if len(x_sen) <= max_sen_len:
        X_test_cut.append(x_sen)
        X_test_len.append(len(x_sen))

    X_test_cut=pad_sequences(X_test_cut,maxlen=max_sen_len,padding='post')
    Y_pred = model.predict(X_test_cut)

    j2=0
    i2=0
    t = []
    for j1 in range(len(word_sen)):
        w = word_sen[j1]
        tags = Y_pred[i2][j2]
        t_tmp = []
        for i in range(14):
            if (tags[i] == 1):
                # t_tmp.append(index_tag[i])
                # t_tmp.append(w)
                # t.append(t_tmp)
                t.append(index_tag[i])
                break
        j2 += 1
        # if j2 == X_test_len[i2]:       #X_test_len = [89, 37, 95, 86, 90, 100, 90, 94, 80, 79, 44, 59]
        #     j2 = 0
        #     i2 += 1
    wl = re.split("[ ]{1,100}", o_content)
    tt = []
    start = 0
    end = 0
    for i in wl:
        end += len(i)
        tt.append(t[start:end])
        start += len(i)
    tt2 = []
    for i in range(len(tt)):
        flag = False
        for j in tt[i]:
            if j.startswith('B'):
                flag = True
                tt2.append("".join(wl[i]) + "|" + j.split("-")[1])
                break
        if not flag:
            for j in tt[i]:
                if j.startswith('I'):
                    flag = True
                    tt2.append("".join(wl[i]) + "|" + j.split("-")[1])
                    break
        if not flag:
            for j in tt[i]:
                tt2.append("".join(wl[i]) + "|" + j)
                break

    return "   ".join(tt2)
def main():

    # parse user input
    parser = argparse.ArgumentParser()

    #file related args
    parser.add_argument("-m",
                        "--model-dir",
                        default="./models/",
                        help="directory to save the best models")

    parser.add_argument(
        "-t",
        "--train-set",
        default="./data/EG.txt-train.txt",
        help="maximul sentence length (for fixed size input)")  #
    parser.add_argument("-v",
                        "--dev-set",
                        default="./data/EG.txt-dev.txt",
                        help="source vocabulary size")  #
    parser.add_argument("-s",
                        "--test-set",
                        default="./data/EG.txt-test.txt",
                        help="target vocabulary size")  #

    parser.add_argument("-i",
                        "--input",
                        default="./data/EG.txt-test.sample-eng.txt",
                        help="a sample input segmened file")  #
    parser.add_argument("-o", "--output", default="", help="POS output")  #

    # network related
    #input
    parser.add_argument("-e",
                        "--emb-size",
                        default=300,
                        type=int,
                        help="dimension of embedding")  # emb matrix col size
    parser.add_argument("-w",
                        "--window-size",
                        default=10,
                        type=int,
                        help="dimension of embedding")  #
    parser.add_argument("-d",
                        "--vocab-emb",
                        default="./data/segmented-vectors",
                        help="vocabulatry pre-trained embeddings")  #
    parser.add_argument("-r",
                        "--final_layer",
                        default="lstm",
                        help="Final optimization layer 'crf' or 'lstm'")

    #learning related
    parser.add_argument(
        "-a",
        "--learning-algorithm",
        default="adam",
        help="optimization algorithm (adam, sgd, adagrad, rmsprop, adadelta)")
    parser.add_argument("-b",
                        "--batch-size",
                        default=128,
                        type=int,
                        help="batch size")
    parser.add_argument("-n",
                        "--epochs",
                        default=100,
                        type=int,
                        help="nb of epochs")

    #others
    parser.add_argument("-V",
                        "--verbose-level",
                        default=1,
                        type=int,
                        help="verbosity level (0 < 1 < 2)")
    parser.add_argument("-g",
                        "--showGraph",
                        default=False,
                        help="show precision and accuracy graphs")  #
    parser.add_argument("-l",
                        "--train-model",
                        default=False,
                        type=lambda x: (str(x).lower() == 'true'),
                        help="Train the model, default False")

    parser.parse_args()

    args = parser.parse_args()

    if not os.path.exists(args.model_dir):
        os.makedirs(args.model_dir)

    # 5 to the left, 5 to the right
    windowSize = args.window_size

    print("Pos with Keras, only token, window size %d" % (windowSize))
    print("Train the model: %s" % (args.train_model))

    # Read in the vocab
    #print("Read in the vocab")
    vocabPath = args.vocab_emb

    word2Idx = {}  # Maps a word to the index in the embeddings matrix
    idx2word = {}
    embeddings = []  # Embeddings matrix

    with open(vocabPath, 'r') as fIn:
        idx = 0
        for line in fIn:
            split = line.strip().split(' ')
            embeddings.append(np.array([float(num) for num in split[1:]]))
            word2Idx[split[0]] = idx
            idx += 1

    idx2word = {v: k for k, v in word2Idx.items()}

    embeddings = np.asarray(embeddings, dtype='float32')

    embedding_size = embeddings.shape[1]

    # Create a mapping for our labels
    labels_list = getLabels(args.train_set)
    labels_list = set(labels_list + getLabels(args.dev_set))

    label2Idx = dict((l, i) for i, l in enumerate(labels_list))
    idx2Label = {v: k for k, v in label2Idx.items()}

    if (args.train_model == False):
        word2Idx = load_pickled_file(args.model_dir + '/word2Idx')
        label2Idx = load_pickled_file(args.model_dir + '/label2Idx')
        idx2Label = {v: k for k, v in label2Idx.items()}
    elif (not os.path.isfile(args.model_dir + '/list2idx.pkl')):
        save_pickled_file(word2Idx, args.model_dir + '/word2Idx')
        save_pickled_file(label2Idx, args.model_dir + '/label2Idx')

    print("Idx2Label:", idx2Label)

    if (args.train_model == True):
        # Read in data
        print("Read in data and create matrices")
        train_sentences = readFile(args.train_set)
        dev_sentences = readFile(args.dev_set)
        test_sentences = readFile(args.test_set)
    else:
        test_sentences = readTestFile(args.input)

    test_src = []
    test_trg = []
    for sentence in test_sentences:
        for word in sentence:
            if (args.train_model == True):
                test_src.append(word[0])
                test_trg.append(word[1])
            else:
                test_src.append(word.split('\t')[0])

    if (args.train_model == True):
        # Create numpy arrays
        X_train, y_train = createNumpyArray(train_sentences, windowSize,
                                            word2Idx, label2Idx)
        X_dev, y_dev = createNumpyArray(dev_sentences, windowSize, word2Idx,
                                        label2Idx)
        X_test, y_test = createNumpyArray(test_sentences, windowSize, word2Idx,
                                          label2Idx)
    else:
        X_test = createTestArray(test_sentences, windowSize, word2Idx,
                                 label2Idx)

#print(test_src)

# Create the  Network

    n_in = 2 * windowSize + 1
    n_out = len(label2Idx)
    batch_size = args.batch_size
    epochs = args.epochs

    # If CRF change Tensor to shape '(?, ?, ?)'
    if (args.final_layer == 'crf'):
        maxlen = n_in
        if (args.train_model == True):
            X_train = sequence.pad_sequences(X_train,
                                             maxlen=maxlen,
                                             padding='post')
            y_train = sequence.pad_sequences(y_train,
                                             maxlen=maxlen,
                                             padding='post')
            y_train = np.expand_dims(y_train, -1)

            X_dev = sequence.pad_sequences(X_dev,
                                           maxlen=maxlen,
                                           padding='post')
            y_dev = sequence.pad_sequences(y_dev,
                                           maxlen=maxlen,
                                           padding='post')
            y_dev = np.expand_dims(y_dev, -1)

            X_test = sequence.pad_sequences(X_test,
                                            maxlen=maxlen,
                                            padding='post')
            y_test = sequence.pad_sequences(y_test,
                                            maxlen=maxlen,
                                            padding='post')
            y_test = np.expand_dims(y_test, -1)
        else:
            X_test = sequence.pad_sequences(X_test,
                                            maxlen=maxlen,
                                            padding='post')

    print('number of classes:', n_out)
    print("Embeddings shape", embeddings.shape)
    print("input dim", embeddings.shape[0], embeddings.shape[1])

    if (args.final_layer == 'crf'):
        model = Sequential()
        model.add(
            Embedding(output_dim=embeddings.shape[1],
                      input_dim=embeddings.shape[0],
                      input_length=n_in,
                      weights=[embeddings],
                      trainable=False))
        model.add(Dropout(0.5))
        model.add(Bidirectional(LSTM(300, return_sequences=True)))
        model.add(Dropout(0.5))
        model.add(TimeDistributed(Dense(n_out)))
        crf = ChainCRF()
        model.add(crf)
        model.compile(loss=crf.sparse_loss,
                      optimizer=RMSprop(0.01),
                      metrics=['sparse_categorical_accuracy'])
    else:
        model = Sequential()
        model.add(
            Embedding(output_dim=embeddings.shape[1],
                      input_dim=embeddings.shape[0],
                      input_length=n_in,
                      weights=[embeddings],
                      trainable=False))

        model.add(Dropout(0.5))
        #model.add(LSTM(300, return_sequences=False))
        model.add(Bidirectional(LSTM(embedding_size, return_sequences=False)))
        model.add(Dropout(0.5))
        model.add(Dense(output_dim=n_out, activation='softmax'))
        model.compile(loss='categorical_crossentropy',
                      optimizer=args.learning_algorithm,
                      metrics=['accuracy'])

    model.summary()

    if (os.path.isfile(args.model_dir + '/keras_weights.hdf5')):
        model.load_weights(args.model_dir + '/keras_weights.hdf5')

    if (args.train_model == True):
        early_stopping = EarlyStopping(patience=5, verbose=1)
        checkpointer = ModelCheckpoint(args.model_dir + "/keras_weights.hdf5",
                                       verbose=1,
                                       save_best_only=True)

        history = model.fit(
            X_train,
            y_train,
            batch_size=batch_size,
            #epochs=epochs,
            nb_epoch=epochs,
            verbose=1,
            shuffle=True,
            callbacks=[early_stopping, checkpointer],
            validation_data=[X_dev, y_dev])

    model.load_weights(args.model_dir + '/keras_weights.hdf5')

    if (args.train_model == True):
        preds_dev = model.predict_classes(X_dev, batch_size=64, verbose=0)
        if (args.final_layer == 'crf'):
            preds_dev = preds_dev.argmax(-1)

    if (args.final_layer == 'crf'):
        preds_test = model.predict_classes(X_test, batch_size=512,
                                           verbose=0).argmax(-1)
    else:
        preds_test = model.predict_classes(X_test, batch_size=512, verbose=0)

    # print("test_src:",len(test_src))
    # print("X_test", len(X_test))
    # print("preds_test",len(preds_test))
    if (args.output != ''):
        fout = open(args.output, 'w')
    else:
        fout = sys.stdout

    for w, p in zip(test_src, preds_test):
        #print("W:",w," P:",p)
        fout.write(w + '\t' +
                   (idx2Label[p] if (p < len(idx2Label)) else 'UNKNOWN') +
                   '\n')

    #print(score_test[1])
    if (args.train_model == True):
        from sklearn.metrics import confusion_matrix, classification_report
        score_test = model.evaluate(X_test, y_test, batch_size=500)
        print("Test Score:", score_test[1])
        score_dev = model.evaluate(X_dev, y_dev, batch_size=500)
        print("Dev Score:", score_dev[1])

        print('')
        print(
            classification_report(np.argmax(y_dev, axis=1),
                                  preds_dev,
                                  target_names=labels_list))

        if (args.showGraph):
            print('')
            print(confusion_matrix(np.argmax(y_dev, axis=1), preds_dev))

            print('')
            print(
                classification_report(np.argmax(y_test, axis=1),
                                      preds_test,
                                      target_names=labels_list))
            print('')
            print(confusion_matrix(np.argmax(y_test, axis=1), preds_test))

            # # list all data in history
            print(history.history.keys())
            import matplotlib.pyplot as plt
            # summarize history for accuracy
            plt.plot(history.history['acc'])
            plt.plot(history.history['val_acc'])
            plt.title('model accuracy')
            plt.ylabel('accuracy')
            plt.xlabel('epoch')
            plt.legend(['train', 'test'], loc='upper left')
            plt.show()
            #summarize history for loss
            plt.plot(history.history['loss'])
            plt.plot(history.history['val_loss'])
            plt.title('model loss')
            plt.ylabel('loss')
            plt.xlabel('epoch')
            plt.legend(['train', 'test'], loc='upper right')
            plt.show()

        score, y_true_word, y_pred_word = computeWordLevelAccuracy(
            test_trg, preds_test, idx2Label)
        print(score)