import os import sys currentdir = os.path.dirname(os.path.realpath(__file__)) parentdir = os.path.dirname(currentdir) #grandparentdir = os.path.dirname(parentdir) sys.path.append(parentdir) print(parentdir) from utils.Preprocess import Preprocess sent = "i love potato, i have a party. so i need your help. i'm happy." p = Preprocess() pos = p.pos(sent) ret = p.get_keywords(pos, without_tag=False) print(ret) ret = p.get_keywords(pos, without_tag=True) print(ret)
from utils.Preprocess import Preprocess from tensorflow.keras import preprocessing #sent = "내일 오전 10시에 짬뽕 주문하고 싶어ㅋㅋ" sent = "내일 오전 10시에 탕수육 주문하고 싶어" p = Preprocess(word2index_dic='../train_tools/dict/chatbot_dict.bin', userdic='../utils/user_dic.tsv') pos = p.pos(sent) keywords = p.get_keywords(pos, without_tag=False) print(keywords) # w2i = p.get_wordidx_sequence(keywords) # sequences = [w2i] # # MAX_SEQ_LEN = 15 # 임베딩 벡터 크기 # padded_seqs = preprocessing.sequence.pad_sequences(sequences, maxlen=MAX_SEQ_LEN, padding='post') # # print(keywords) # print(sequences) # print(padded_seqs)
# 데이터 읽어오기 train_file = "drive/MyDrive/deep-chatbot/models/intent/total_train_data.csv" data = pd.read_csv(train_file, delimiter=',') queries = data['query'].tolist() intents = data['intent'].tolist() from utils.Preprocess import Preprocess p = Preprocess(word2index_dic= 'drive/MyDrive/deep-chatbot/train_tools/dict/chatbot_dict.bin') # 단어 시퀀스 생성 sequences = [] for sentence in queries: pos = p.pos(str(sentence)) keywords = p.get_keywords(pos, without_tag=True) seq = p.get_wordidx_sequence(keywords) sequences.append(seq) # 단어 인덱스 시퀀스 벡터 ○2 # 단어 시퀀스 벡터 크기 from config.GlobalParams import MAX_SEQ_LEN padded_seqs = preprocessing.sequence.pad_sequences(sequences, maxlen=MAX_SEQ_LEN, padding='post') # (105658, 15) print(padded_seqs.shape) print(len(intents)) #105658 # 학습용, 검증용, 테스트용 데이터셋 생성 ○3