def handle_data(): if request.method == 'POST': return render_template("search.html") if request.method == 'GET': query = request.args.get('content') model = NLPModel() clf_path = 'lib/models/SentimentClassifier.pkl' with open(clf_path, 'rb') as f: model.clf = pickle.load(f) vec_path = 'lib/models/TFIDFVectorizer.pkl' with open(vec_path, 'rb') as f: model.vectorizer = pickle.load(f) user_query = query uq_vectorized = model.vectorizer_transform(np.array([user_query])) prediction = model.predict(uq_vectorized) # print(prediction) pred_proba = model.predict_proba(uq_vectorized) confidence = round(pred_proba[0], 3) print(prediction,confidence) if prediction == 0: filename = 'cry.jpg' return send_file(filename, mimetype='image/jpg') else: filename = 'smile.jpg' return send_file(filename, mimetype='image/jpg')
char2idx, idx2char, configs.vocabulary_length = load_vocabulary(configs.vocabulary_path, configs.data_path, configs.tokenize_as_morph) input = " ".join(sys.argv[1:]) print(input) predict_input_enc, predict_input_enc_length = enc_processing([input], char2idx, configs.max_sequence_length, configs.tokenize_as_morph) predict_output_dec, predict_output_dec_length = dec_output_processing([""], char2idx, configs.max_sequence_length, configs.tokenize_as_morph) predict_target_dec = dec_target_processing([""], char2idx, configs.max_sequence_length, configs.tokenize_as_morph) model = NLPModel(configs) if os.path.exists(configs.f_name): model.load_model(configs.f_name) for i in range(configs.max_sequence_length): if i > 0: predict_output_dec, predict_output_decLength = dec_output_processing([answer], char2idx, configs.max_sequence_length, configs.tokenize_as_morph) predict_target_dec = dec_target_processing([answer], char2idx, configs.max_sequence_length, configs.tokenize_as_morph) # 예측을 하는 부분이다. dataset_test = dataset_process(predict_input_enc, predict_output_dec, predict_target_dec, 1) for (feature, _) in dataset_test.take(1): predictions = model.predict(feature) answer, finished = pred_next_string(predictions.numpy(), idx2char) if finished: break # 예측한 값을 인지 할 수 있도록 # 텍스트로 변경하는 부분이다. print("answer: ", answer)
def main(): configs = Config() data_out_path = os.path.join(os.getcwd(), './out') os.makedirs(data_out_path, exist_ok=True) # 데이터를 통한 사전 구성 한다. char2idx, idx2char, configs.vocabulary_length = load_vocabulary( configs.vocabulary_path, configs.data_path, configs.tokenize_as_morph) # 훈련 데이터와 테스트 데이터를 가져온다. train_input, train_label, eval_input, eval_label = load_data( configs.data_path) # 훈련셋 인코딩 만드는 부분이다. train_input_enc, train_input_enc_length = enc_processing( train_input, char2idx, configs.max_sequence_length, configs.tokenize_as_morph) # 훈련셋 디코딩 입력 부분 만드는 부분이다. train_output_dec, train_output_dec_length = dec_output_processing( train_label, char2idx, configs.max_sequence_length, configs.tokenize_as_morph) # 훈련셋 디코딩 출력 부분 만드는 부분이다. train_target_dec = dec_target_processing(train_label, char2idx, configs.max_sequence_length, configs.tokenize_as_morph) # 평가셋 인코딩 만드는 부분이다. eval_input_enc, eval_input_enc_length = enc_processing( eval_input, char2idx, configs.max_sequence_length, configs.tokenize_as_morph) # 평가셋 인코딩 만드는 부분이다. eval_output_dec, eval_output_dec_length = dec_output_processing( eval_label, char2idx, configs.max_sequence_length, configs.tokenize_as_morph) # 평가셋 인코딩 만드는 부분이다. eval_target_dec = dec_target_processing(eval_label, char2idx, configs.max_sequence_length, configs.tokenize_as_morph) dataset_train = dataset_process(train_input_enc, train_output_dec, train_target_dec, configs.batch_size) dataset_eval = dataset_process(eval_input_enc, eval_output_dec, eval_target_dec, configs.batch_size) model = NLPModel(configs) if os.path.exists(configs.f_name): model.load_model(configs.f_name) for i, (features, labels) in enumerate(dataset_train.take(configs.train_steps)): model.train(features, labels) if i % 200 == 0: model.save_model(configs.f_name) predict_input_enc, predic_input_enc_length = enc_processing( ["가끔 궁금해"], char2idx, configs.max_sequence_length, configs.tokenize_as_morph) # 학습 과정이 아니므로 디코딩 입력은 # 존재하지 않는다.(구조를 맞추기 위해 넣는다.) predict_output_dec, predic_output_decLength = dec_output_processing( [""], char2idx, configs.max_sequence_length, configs.tokenize_as_morph) # 학습 과정이 아니므로 디코딩 출력 부분도 # 존재하지 않는다.(구조를 맞추기 위해 넣는다.) predict_target_dec = dec_target_processing( [""], char2idx, configs.max_sequence_length, configs.tokenize_as_morph) for i in range(configs.max_sequence_length): if i > 0: predict_output_dec, _ = dec_output_processing( [answer], char2idx, configs.max_sequence_length, configs.tokenize_as_morph) predict_target_dec = dec_target_processing( [answer], char2idx, configs.max_sequence_length, configs.tokenize_as_morph) # 예측을 하는 부분이다. dataset_test = dataset_process(predict_input_enc, predict_output_dec, predict_target_dec, 1) for (feature, _) in dataset_test.take(1): predictions = model.predict(feature) answer, finished = pred_next_string(predictions.numpy(), idx2char) if finished: break # 예측한 값을 인지 할 수 있도록 # 텍스트로 변경하는 부분이다. print("answer: ", answer)