def prediction(path, mode="bert_bilstm", is_eval=False):
    labels_to_ix, ix_to_label = NER_pre_data.build_label(normal_param.labels)
    vocab = process_data_for_keras.read_vocab(normal_param.lstm_vocab)
    if mode == "lstm":
        save_path = normal_param.save_path_lstm
        model = keras_LSTM_CRF.load_embedding_bilstm2_crf_model(
            save_path, len(vocab), len(labels_to_ix), normal_param.max_length)
    elif mode == "bilstm":
        save_path = normal_param.save_path_bilstm
        model = keras_BILSTM_CEF.load_embedding_bilstm2_crf_model(
            save_path, len(vocab), len(labels_to_ix), normal_param.max_length)
    elif mode == "bert_bilstm":
        save_path = normal_param.save_path_bert_bilstm
        model = keras_Bert_bilstm_crf.load_embedding_bilstm2_crf_model(
            save_path, len(labels_to_ix))
    elif mode == "rnn":
        save_path = normal_param.save_path_gru
        model = keras_RNN_CRF.load_embedding_bilstm2_crf_model(
            save_path, len(vocab), len(labels_to_ix), 0)
    else:
        save_path = normal_param.save_path_wordVEC_bilstm
        embeddings_matrix, vocab = process_data_for_keras.txtpad_use_word2vec()
        # NUM_CLASS, embeddings_matrix, input_length
        model = keras_word2vec_bilstm_crf.load_embedding_bilstm2_crf_model(
            save_path, len(labels_to_ix), embeddings_matrix,
            normal_param.max_length)

    myNerInfer = NERInference.NERInference(model,
                                           vocab,
                                           ix_to_label,
                                           len(vocab),
                                           path,
                                           mode=mode)
    new_string4_pred, ix = myNerInfer.predict_all(is_eval)
    return new_string4_pred
def process_data(embeding = None, is_train = True, vocab2 = None):
    '''
    根据不同的embeding方法处理数据。
    :param embeding: embeding方法:bert、wordvec、不用embeding方法
    :return:
    '''
    labels_to_ix, _ = NER_pre_data.build_label(normal_param.labels)
    vocab = read_vocab(normal_param.lstm_vocab)
    # x_test, y_test = read_data(normal_param.head_test_path, vocab, labels_to_ix)
    if is_train:
        x, y = read_data(normal_param.head_path, vocab, labels_to_ix)

        x_train, y_train, x_test, y_test = split_tst_trn(x, y, 50)
        length = gain_max_length(x_train, x_test)
        if embeding == "wordvec":
            x_train, y_train, x_test, y_test = list_to_array(x_train, y_train, x_test, y_test, vocab2, labels_to_ix,
                                                             length, wordembeding=embeding)
        else:
            x_train, y_train, x_test, y_test = list_to_array(x_train, y_train, x_test, y_test, vocab, labels_to_ix, length, wordembeding = embeding)
        y_test = np.expand_dims(y_test, 2)
        y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1))

        return x_train, y_train, x_test, y_test, len(vocab), len(labels_to_ix)
    else:
        x, y = read_data(normal_param.head_test_path, vocab, labels_to_ix)

        length = gain_max_length(x, [])
        y_test, x_test = deal_txt_label_to_array(x, y, vocab, labels_to_ix, length, mode = embeding)
        return x_test, y_test
def process_test_data():
    '''
    对测试集数据进行
    :return:
    '''
    labels_to_ix, _ = NER_pre_data.build_label(normal_param.labels)
    vocab = read_vocab(normal_param.lstm_vocab)
    x, y = read_data(normal_param.head_test_path, vocab, labels_to_ix)
    y_test, x_test = deal_txt_label_to_array(x, y, vocab, labels_to_ix, normal_param.max_length, mode = "bert")
    return x_test, y_test
def process_data_gen(data, label, embeding = None):
    '''
    根据不同的embeding方法处理数据。
    :param embeding: embeding方法:bert、wordvec、不用embeding方法
    :return:
    '''
    labels_to_ix, _ = NER_pre_data.build_label(normal_param.labels)
    vocab = read_vocab(normal_param.lstm_vocab)
    # x, y = read_data_part(start_path, end_path)

    # x_test, y_test = read_data(normal_param.head_test_path, vocab, labels_to_ix)
    # x_train, y_train, x_test, y_test = split_tst_trn(x, y, 50)
    data, label = normal_util.shuffle(data, label)
    length = normal_param.max_length
    x_train, y_train = deal_txt_label_to_array(data, label, vocab, labels_to_ix, length, mode = None)
    # y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1))
    # # y_train = np.expand_dims(y_train, 2)
    y_train = np.expand_dims(y_train, 2)
    return x_train, y_train, len(vocab), len(labels_to_ix)