コード例 #1
0
def handle_data():
    if request.method == 'POST':
        return render_template("search.html")
    if request.method == 'GET':
        query = request.args.get('content')

        model = NLPModel()

        clf_path = 'lib/models/SentimentClassifier.pkl'
        with open(clf_path, 'rb') as f:
            model.clf = pickle.load(f)

        vec_path = 'lib/models/TFIDFVectorizer.pkl'
        with open(vec_path, 'rb') as f:
            model.vectorizer = pickle.load(f)

        user_query = query
        uq_vectorized = model.vectorizer_transform(np.array([user_query]))
        prediction = model.predict(uq_vectorized)
    # print(prediction)
        pred_proba = model.predict_proba(uq_vectorized)

        confidence = round(pred_proba[0], 3)
        print(prediction,confidence)

        if prediction == 0:
            filename = 'cry.jpg'
            return send_file(filename, mimetype='image/jpg')
        else:
            filename = 'smile.jpg'
            return send_file(filename, mimetype='image/jpg')
コード例 #2
0
ファイル: predict.py プロジェクト: suryansh2020/mynlp
    char2idx, idx2char, configs.vocabulary_length = load_vocabulary(configs.vocabulary_path, configs.data_path, configs.tokenize_as_morph)

    input = " ".join(sys.argv[1:])
    print(input)
    predict_input_enc, predict_input_enc_length = enc_processing([input], char2idx, configs.max_sequence_length, configs.tokenize_as_morph)
    predict_output_dec, predict_output_dec_length = dec_output_processing([""], char2idx, configs.max_sequence_length, configs.tokenize_as_morph)
    predict_target_dec = dec_target_processing([""], char2idx, configs.max_sequence_length, configs.tokenize_as_morph)

    model = NLPModel(configs)
    if os.path.exists(configs.f_name):
        model.load_model(configs.f_name)

    for i in range(configs.max_sequence_length):
        if i > 0:
            predict_output_dec, predict_output_decLength = dec_output_processing([answer], char2idx, configs.max_sequence_length, configs.tokenize_as_morph)
            predict_target_dec = dec_target_processing([answer], char2idx, configs.max_sequence_length, configs.tokenize_as_morph)
        # 예측을 하는 부분이다.

        dataset_test = dataset_process(predict_input_enc, predict_output_dec, predict_target_dec, 1)
        for (feature, _) in dataset_test.take(1):
            predictions = model.predict(feature)

        answer, finished = pred_next_string(predictions.numpy(), idx2char)

        if finished:
            break

    # 예측한 값을 인지 할 수 있도록
    # 텍스트로 변경하는 부분이다.
    print("answer: ", answer)
コード例 #3
0
ファイル: main.py プロジェクト: suryansh2020/mynlp
def main():
    configs = Config()

    data_out_path = os.path.join(os.getcwd(), './out')
    os.makedirs(data_out_path, exist_ok=True)

    # 데이터를 통한 사전 구성 한다.
    char2idx, idx2char, configs.vocabulary_length = load_vocabulary(
        configs.vocabulary_path, configs.data_path, configs.tokenize_as_morph)
    # 훈련 데이터와 테스트 데이터를 가져온다.
    train_input, train_label, eval_input, eval_label = load_data(
        configs.data_path)

    # 훈련셋 인코딩 만드는 부분이다.
    train_input_enc, train_input_enc_length = enc_processing(
        train_input, char2idx, configs.max_sequence_length,
        configs.tokenize_as_morph)
    # 훈련셋 디코딩 입력 부분 만드는 부분이다.
    train_output_dec, train_output_dec_length = dec_output_processing(
        train_label, char2idx, configs.max_sequence_length,
        configs.tokenize_as_morph)
    # 훈련셋 디코딩 출력 부분 만드는 부분이다.
    train_target_dec = dec_target_processing(train_label, char2idx,
                                             configs.max_sequence_length,
                                             configs.tokenize_as_morph)

    # 평가셋 인코딩 만드는 부분이다.
    eval_input_enc, eval_input_enc_length = enc_processing(
        eval_input, char2idx, configs.max_sequence_length,
        configs.tokenize_as_morph)
    # 평가셋 인코딩 만드는 부분이다.
    eval_output_dec, eval_output_dec_length = dec_output_processing(
        eval_label, char2idx, configs.max_sequence_length,
        configs.tokenize_as_morph)
    # 평가셋 인코딩 만드는 부분이다.
    eval_target_dec = dec_target_processing(eval_label, char2idx,
                                            configs.max_sequence_length,
                                            configs.tokenize_as_morph)

    dataset_train = dataset_process(train_input_enc, train_output_dec,
                                    train_target_dec, configs.batch_size)
    dataset_eval = dataset_process(eval_input_enc, eval_output_dec,
                                   eval_target_dec, configs.batch_size)

    model = NLPModel(configs)
    if os.path.exists(configs.f_name):
        model.load_model(configs.f_name)

    for i, (features,
            labels) in enumerate(dataset_train.take(configs.train_steps)):
        model.train(features, labels)
        if i % 200 == 0:
            model.save_model(configs.f_name)

            predict_input_enc, predic_input_enc_length = enc_processing(
                ["가끔 궁금해"], char2idx, configs.max_sequence_length,
                configs.tokenize_as_morph)
            # 학습 과정이 아니므로 디코딩 입력은
            # 존재하지 않는다.(구조를 맞추기 위해 넣는다.)
            predict_output_dec, predic_output_decLength = dec_output_processing(
                [""], char2idx, configs.max_sequence_length,
                configs.tokenize_as_morph)
            # 학습 과정이 아니므로 디코딩 출력 부분도
            # 존재하지 않는다.(구조를 맞추기 위해 넣는다.)
            predict_target_dec = dec_target_processing(
                [""], char2idx, configs.max_sequence_length,
                configs.tokenize_as_morph)

            for i in range(configs.max_sequence_length):
                if i > 0:
                    predict_output_dec, _ = dec_output_processing(
                        [answer], char2idx, configs.max_sequence_length,
                        configs.tokenize_as_morph)
                    predict_target_dec = dec_target_processing(
                        [answer], char2idx, configs.max_sequence_length,
                        configs.tokenize_as_morph)
                # 예측을 하는 부분이다.

                dataset_test = dataset_process(predict_input_enc,
                                               predict_output_dec,
                                               predict_target_dec, 1)
                for (feature, _) in dataset_test.take(1):
                    predictions = model.predict(feature)

                answer, finished = pred_next_string(predictions.numpy(),
                                                    idx2char)

                if finished:
                    break

            # 예측한 값을 인지 할 수 있도록
            # 텍스트로 변경하는 부분이다.
            print("answer: ", answer)