Exemple #1
0
 def __init__(self, img_size=32, stacks=4, skips=4, return_seq=False):
     env = gym_super_mario_bros.make('SuperMarioBros-v2')
     self.env = JoypadSpace(env, SIMPLE_MOVEMENT)
     self.preprocess = Preprocess(img_size, stacks, return_seq)
     self.skips = skips
     self.action_space = self.env.action_space
     self.observation_space = (img_size, img_size, stacks)
Exemple #2
0
class Mario():
    def __init__(self, img_size=32, stacks=4, skips=4, return_seq=False):
        env = gym_super_mario_bros.make('SuperMarioBros-v2')
        self.env = JoypadSpace(env, SIMPLE_MOVEMENT)
        self.preprocess = Preprocess(img_size, stacks, return_seq)
        self.skips = skips
        self.action_space = self.env.action_space
        self.observation_space = (img_size, img_size, stacks)

    def reset(self):
        self.preprocess.reset()
        s = self.env.reset()
        s = self.preprocess(s)
        return s

    def step(self, a):
        total_r = 0
        for i in range(self.skips):
            self.env.render()

            n_s, r, done, info = self.env.step(a)
            n_s = self.preprocess(n_s)
            total_r += r

            if done: break

        return n_s, total_r, done, info
Exemple #3
0
class Pong():
    def __init__(self, img_size=32, stacks=4, skips=4, return_seq=False):
        self.env = gym.make('Pong-v0')
        self.preprocess = Preprocess(img_size, stacks, return_seq)
        self.skips = skips
        self.pong_action = {0: 0, 1: 2, 2: 3}
        self.action_space = self.env.action_space
        self.action_space.n = 3
        self.observation_space = (img_size, img_size, stacks)

    def reset(self):
        self.preprocess.reset()
        s = self.env.reset()
        s = self.preprocess(s)
        return s

    def step(self, a):
        total_r = 0
        for i in range(self.skips):
            self.env.render()

            n_s, r, done, info = self.env.step(self.pong_action[a])
            n_s = self.preprocess(n_s)
            total_r += r

            if done: break

        return n_s, total_r, done, info
Exemple #4
0
 def __init__(self, img_size=32, stacks=4, skips=4, return_seq=False):
     self.env = gym.make('Pong-v0')
     self.preprocess = Preprocess(img_size, stacks, return_seq)
     self.skips = skips
     self.pong_action = {0: 0, 1: 2, 2: 3}
     self.action_space = self.env.action_space
     self.action_space.n = 3
     self.observation_space = (img_size, img_size, stacks)
Exemple #5
0
import os
import sys
currentdir = os.path.dirname(os.path.realpath(__file__))
parentdir = os.path.dirname(currentdir)
#grandparentdir = os.path.dirname(parentdir)
sys.path.append(parentdir)
print(parentdir)

from utils.Preprocess import Preprocess

sent = "i love potato, i have a party. so i need your help. i'm happy."

p = Preprocess()

pos = p.pos(sent)

ret = p.get_keywords(pos, without_tag=False)
print(ret)

ret = p.get_keywords(pos, without_tag=True)
print(ret)
Exemple #6
0
sys.path.append(grandparentdir)
from utils.Preprocess import Preprocess
from tensorflow.keras import preprocessing
import pickle


def read_corpus_data(filename):
    with open(filename, 'r') as f:
        data = [line.split('\t') for line in f.read().splitlines()]
        data = data[1:]
    return data


corpus_data = read_corpus_data('./corpus.txt')

p = Preprocess()
dict = []
for c in corpus_data:
    pos = p.pos(c[1])
    for k in pos:
        dict.append(k[0])

#사전에 사용될 word2index 생성
#사전의 첫 번째 인덱스에는 OOV 사용
tokenizer = preprocessing.text.Tokenizer(oov_token='OOV')
tokenizer.fit_on_texts(dict)
word_index = tokenizer.word_index

#사전 파일 생성
f = open("chatbot_dict.bin", "wb")
try:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import preprocessing
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, Dropout, Conv1D, GlobalMaxPool1D, concatenate
from tensorflow.keras.utils import plot_model

# 데이터 읽어오기
train_file = "drive/MyDrive/deep-chatbot/models/intent/total_train_data.csv"
data = pd.read_csv(train_file, delimiter=',')
queries = data['query'].tolist()
intents = data['intent'].tolist()

from utils.Preprocess import Preprocess
p = Preprocess(word2index_dic=
               'drive/MyDrive/deep-chatbot/train_tools/dict/chatbot_dict.bin')

# 단어 시퀀스 생성
sequences = []
for sentence in queries:
    pos = p.pos(str(sentence))
    keywords = p.get_keywords(pos, without_tag=True)
    seq = p.get_wordidx_sequence(keywords)
    sequences.append(seq)

# 단어 인덱스 시퀀스 벡터 ○2
# 단어 시퀀스 벡터 크기
from config.GlobalParams import MAX_SEQ_LEN
padded_seqs = preprocessing.sequence.pad_sequences(sequences,
                                                   maxlen=MAX_SEQ_LEN,
                                                   padding='post')
Exemple #8
0
from utils.Preprocess import Preprocess

sent = input()

# 전처리 객체 생성
p = Preprocess(userdic='../utils/user_dic.tsv')

# 형태소 분석기 실행
pos = p.pos(sent)

# 품사 태그와 같이 키워드 출력
ret = p.get_keywords(pos, without_tag=False)
print(ret)

# 품사 태그 없이 키워드 출력
ret = p.get_keywords(pos, without_tag=True)
print(ret)
# 필요한 모듈 임포트
import pandas as pd
import tensorflow as tf
from tensorflow.keras import preprocessing
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, Dropout, Conv1D, GlobalMaxPool1D, concatenate

# 데이터 읽어오기
train_file = "total_train_data.csv"
data = pd.read_csv(train_file, delimiter=',', encoding='cp949')
queries = data['query'].tolist()
intents = data['intent'].tolist()

from utils.Preprocess import Preprocess
p = Preprocess(word2index_dic='../../train_tools/dict/chatbot_dict.bin',
               userdic='../../utils/user_dic.tsv')

# 단어 시퀀스 생성
sequences = []
for sentence in queries:
    pos = p.pos(sentence)
    keywords = p.get_keywords(pos, without_tag=True)
    seq = p.get_wordidx_sequence(keywords)
    sequences.append(seq)

# 단어 인덱스 시퀀스 벡터 ○2
# 단어 시퀀스 벡터 크기
padded_seqs = preprocessing.sequence.pad_sequences(sequences,
                                                   maxlen=15,
                                                   padding='post')
Exemple #10
0
from tensorflow.keras import preprocessing
import pickle

# 말뭉치 데이터 읽어오기
def read_corpus_data(filename):
    with open(filename, 'r') as f:
        data = [line.split('\t') for line in f.read().splitlines()]
    return data


# 말뭉치 데이터 가져오기
corpus_data = read_corpus_data('./corpus.txt')


# 망뭉치 데이터에서 키워드만 추출해서 사전 리스트 생성
p = Preprocess(word2index_dic='chatbot_dict.bin',
               userdic = '../../utils/user_dic.tsv')
dict = []
for c in corpus_data:
    pos = p.pos(c[1])
    for k in pos:
        dict.append(k[0])
    # keywords = p.get_keywords(pos, without_tag=True)
    # for k in keywords:
    #     dict.append(k)

# 사전에 사용될 word2index 생성
# 사전의 첫번 째 인덱스에는 OOV 사용
tokenizer = preprocessing.text.Tokenizer(oov_token='OOV')
tokenizer.fit_on_texts(dict)
word_index = tokenizer.word_index
Exemple #11
0
def read_file(file_name):
    sents = []
    with open(file_name, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for idx, l in enumerate(lines):
            if l[0] == ';' and lines[idx + 1][0] == '$':
                this_sent = []
            elif l[0] == '$' and lines[idx - 1][0] == ';':
                continue
            elif l[0] == '\n':
                sents.append(this_sent)
            else:
                this_sent.append(tuple(l.split()))
    return sents

p = Preprocess(word2index_dic='drive/MyDrive/deep-chatbot/train_tools/dict/chatbot_dict.bin')

# 학습용 말뭉치 데이터를 불러옴
corpus = read_file('drive/MyDrive/deep-chatbot/models/ner/ner_train.txt')

# 말뭉치 데이터에서 단어와 BIO 태그만 불러와 학습용 데이터셋 생성
sentences, tags = [], []
for t in corpus:
    tagged_sentence = []
    sentence, bio_tag = [], []
    for w in t:
        tagged_sentence.append((w[1], w[3]))
        sentence.append(w[1])
        bio_tag.append(w[3])
    
    sentences.append(sentence)
Exemple #12
0
import sys
sys.path.append('/content/drive/MyDrive/deep-chatbot/')

import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import preprocessing

intent_labels = {0: "인사", 1: "질문"}

# 의도 분류 모델 불러오기
model = load_model('drive/MyDrive/deep-chatbot/models/intent/intent_model.h5')

query = "데이터베이스"

from utils.Preprocess import Preprocess
p = Preprocess(word2index_dic='drive/MyDrive/deep-chatbot/train_tools/dict/chatbot_dict.bin',
               userdic='drive/MyDrive/deep-chatbot/utils/user_dic.tsv')
pos = p.pos(query)
keywords = p.get_keywords(pos, without_tag=True)
seq = p.get_wordidx_sequence(keywords)
sequences = [seq]

# 단어 시퀀스 벡터 크기
from config.GlobalParams import MAX_SEQ_LEN
padded_seqs = preprocessing.sequence.pad_sequences(sequences, maxlen=MAX_SEQ_LEN, padding='post')

predict = model.predict(padded_seqs)
predict_class = tf.math.argmax(predict, axis=1)
print(query)
print("의도 예측 점수 : ", predict)
print("의도 예측 클래스 : ", predict_class.numpy())
print("의도  : ", intent_labels[predict_class.numpy()[0]])
from utils.Preprocess import Preprocess

sent = "김포시에 있는 고려병원 위치 알려줘"

p = Preprocess(userdic='../utils/user_dic.txt')  # 사용자 사전

pos = p.pos(sent)  # 제외시킬 품사 // 형태소단위로 끊은 형태 (품사가 아직 제외가 안됨)!
# print(pos[0])
# print(pos[1])
# print(pos[2])
# print(pos[3])
# print(pos[4])
# print(pos[5])
# print(pos[6])
# print(pos[7])
# print(pos[8])
# print(pos[9])
# print(pos)
# ret = p.get_keywords(pos, without_tag= False)
# print(pos)
# print(ret)
#
# # 태그 없이 단어만 추출
ret = p.get_keywords(pos, without_tag=True)
print(ret)
from models.intent.IntentModel import IntentModel
from utils.Preprocess import Preprocess

p = Preprocess(word2index_dic='train_tools/dict/chatbot_dict.bin',
               userdic='../Tokenizing/user_dic.txt')
intent = IntentModel(model_name='models/intent/intent_model.h5', proprocess=p)
query = "안녕하세요."
intent_predict = intent.predict_class(query)
print(intent_predict)
import sys
sys.path.append('/content/drive/MyDrive/deep-chatbot/')

import pickle
from utils.Preprocess import Preprocess

# 단어 사전 불러오기
f = open("drive/MyDrive/deep-chatbot/train_tools/dict/chatbot_dict.bin", "rb")
word_index = pickle.load(f)
f.close()

sent = "데이터베이스가 뭔지 알려줘"

# 전처리 객체 생성
p = Preprocess(userdic='drive/MyDrive/deep-chatbot/utils/user_dic.tsv')

# 형태소분석기 실행
pos = p.pos(sent)

# 품사 태그 없이 키워드 출력
keywords = p.get_keywords(pos, without_tag=True)
for word in keywords:
    try:
        print(word, word_index[word])
    except KeyError:
        # 해당 단어가 사전에 없는 경우, OOV 처리
        print(word, word_index['OOV'])

Exemple #16
0
from utils.Preprocess import Preprocess
from tensorflow.keras import preprocessing

#sent = "내일 오전 10시에 짬뽕 주문하고 싶어ㅋㅋ"
sent = "내일 오전 10시에 탕수육 주문하고 싶어"
p = Preprocess(word2index_dic='../train_tools/dict/chatbot_dict.bin',
               userdic='../utils/user_dic.tsv')

pos = p.pos(sent)
keywords = p.get_keywords(pos, without_tag=False)

print(keywords)

# w2i = p.get_wordidx_sequence(keywords)
# sequences = [w2i]
#
# MAX_SEQ_LEN = 15    # 임베딩 벡터 크기
# padded_seqs = preprocessing.sequence.pad_sequences(sequences, maxlen=MAX_SEQ_LEN, padding='post')
#
# print(keywords)
# print(sequences)
# print(padded_seqs)
Exemple #17
0
from utils.Preprocess import Preprocess
from models.intent.IntentModel import IntentModel

p = Preprocess(word2index_dic='../train_tools/dict/chatbot_dict.bin',
               userdic='../utils/user_dic.tsv')

intent = IntentModel(model_name='../models/intent/intent_model.h5',
                     proprocess=p)
query = "오늘 탕수육 주문 가능한가요?"
predict = intent.predict_class(query)
predict_label = intent.labels[predict]

print(query)
print("의도 예측 클래스 : ", predict)
print("의도 예측 레이블 : ", predict_label)
Exemple #18
0
    with open(file_name, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for idx, l in enumerate(lines):
            if l[0] == ';' and lines[idx + 1][0] == '$':
                this_sent = []
            elif l[0] == '$' and lines[idx - 1][0] == ';':
                continue
            elif l[0] == '\n':
                sents.append(this_sent)
            else:
                this_sent.append(tuple(l.split()))
    return sents


# 전처리 객체 생성
p = Preprocess(word2index_dic='../../train_tools/dict/chatbot_dict.bin',
               userdic='../../utils/user_dic.tsv')

# 학습용 말뭉치 데이터를 불러옴
corpus = read_file('ner_train.txt')

# 말뭉치 데이터에서 단어와 BIO 태그만 불러와 학습용 데이터셋 생성
sentences, tags = [], []
for t in corpus:
    tagged_sentence = []
    sentence, bio_tag = [], []
    for w in t:
        tagged_sentence.append((w[1], w[3]))
        sentence.append(w[1])
        bio_tag.append(w[3])

    sentences.append(sentence)