Exemple #1
0
import os
import sys
currentdir = os.path.dirname(os.path.realpath(__file__))
parentdir = os.path.dirname(currentdir)
#grandparentdir = os.path.dirname(parentdir)
sys.path.append(parentdir)
print(parentdir)

from utils.Preprocess import Preprocess

sent = "i love potato, i have a party. so i need your help. i'm happy."

p = Preprocess()

pos = p.pos(sent)

ret = p.get_keywords(pos, without_tag=False)
print(ret)

ret = p.get_keywords(pos, without_tag=True)
print(ret)
Exemple #2
0
from utils.Preprocess import Preprocess
from tensorflow.keras import preprocessing

#sent = "내일 오전 10시에 짬뽕 주문하고 싶어ㅋㅋ"
sent = "내일 오전 10시에 탕수육 주문하고 싶어"
p = Preprocess(word2index_dic='../train_tools/dict/chatbot_dict.bin',
               userdic='../utils/user_dic.tsv')

pos = p.pos(sent)
keywords = p.get_keywords(pos, without_tag=False)

print(keywords)

# w2i = p.get_wordidx_sequence(keywords)
# sequences = [w2i]
#
# MAX_SEQ_LEN = 15    # 임베딩 벡터 크기
# padded_seqs = preprocessing.sequence.pad_sequences(sequences, maxlen=MAX_SEQ_LEN, padding='post')
#
# print(keywords)
# print(sequences)
# print(padded_seqs)
# 데이터 읽어오기
train_file = "drive/MyDrive/deep-chatbot/models/intent/total_train_data.csv"
data = pd.read_csv(train_file, delimiter=',')
queries = data['query'].tolist()
intents = data['intent'].tolist()

from utils.Preprocess import Preprocess
p = Preprocess(word2index_dic=
               'drive/MyDrive/deep-chatbot/train_tools/dict/chatbot_dict.bin')

# 단어 시퀀스 생성
sequences = []
for sentence in queries:
    pos = p.pos(str(sentence))
    keywords = p.get_keywords(pos, without_tag=True)
    seq = p.get_wordidx_sequence(keywords)
    sequences.append(seq)

# 단어 인덱스 시퀀스 벡터 ○2
# 단어 시퀀스 벡터 크기
from config.GlobalParams import MAX_SEQ_LEN
padded_seqs = preprocessing.sequence.pad_sequences(sequences,
                                                   maxlen=MAX_SEQ_LEN,
                                                   padding='post')

# (105658, 15)
print(padded_seqs.shape)
print(len(intents))  #105658

# 학습용, 검증용, 테스트용 데이터셋 생성 ○3