Python Preprocess Examples

Programming Language: Python

Namespace/Package Name: utilss.Preprocess

Class/Type: Preprocess

Examples at hotexamples.com: 4

Python Preprocess - 4 examples found. These are the top rated real world Python examples of utilss.Preprocess.Preprocess extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Preprocess(3)

get_keywords(2)

pos(2)

get_wordidx_sequence(1)

Example #1

Show file

File: train_model.py Project: Byeongryul/Book_Chatbot

    with open(file_name, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for idx, l in enumerate(lines):
            if l[0] == ';' and lines[idx + 1][0] == '$':
                this_sent = []
            elif l[0] == '$' and lines[idx - 1][0] == ';':
                continue
            elif l[0] == '\n':
                sents.append(this_sent)
            else:
                this_sent.append(tuple(l.split()))
    return sents


# 전처리 객체 생성
p = Preprocess(word2index_dic='train_tools/dict/chatbot_dict.bin',
               userdic='utilss/user_dic.tsv')

# 학습용 말뭉치 데이터를 불러옴
corpus = read_file('models/ner/ner_train.txt')

# 말뭉치 데이터에서 단어와 BIO 태그만 불러와 학습용 데이터셋 생성
sentences, tags = [], []
for t in corpus:
    tagged_sentence = []
    sentence, bio_tag = [], []
    for w in t:
        tagged_sentence.append((w[1], w[3]))
        sentence.append(w[1])
        bio_tag.append(w[3])

    sentences.append(sentence)

Example #2

Show file

File: create_dict.py Project: Byeongryul/Book_Chatbot

from tensorflow.keras import preprocessing
import pickle


# 말뭉치 데이터 읽어오기
def read_corpus_data(filename):
    with open(filename, 'r') as f:
        data = [line.split('\t') for line in f.read().splitlines()]
    return data


# 말뭉치 데이터 가져오기
corpus_data = read_corpus_data('train_tools/dict/corpus.txt')

# 말뭉치 데이터에서 키워드만 추출해서 사전 리스트 생성
p = Preprocess()
dict = []
for C in corpus_data:
    pos = p.pos(C[1])
    for k in pos:
        dict.append(k[0])

# 사전에 사용될 word2index 생성
# 사전에 첫 번째 인덱스에서 OOV 사용
tokenizer = preprocessing.text.Tokenizer(oov_token='OOV')
tokenizer.fit_on_texts(dict)
word_index = tokenizer.word_index

# 사전 파일 생성
f = open('train_tools/dict/chatbot_dict.bin', 'wb')
try:

Example #3

Show file

import sys
import os

sys.path.append(os.getcwd())

import pickle
from utilss.Preprocess import Preprocess

# 단어 사전 불러오기
f = open('train_tools/dict/chatbot_dict.bin', 'rb')
word_index = pickle.load(f)
f.close()

sent = "내일 오전 10시에 탕수육 주문하고 싶어 ㅋㅋ"

# 전처리 객체 생성
p = Preprocess(userdic='utilss/user_dic.tsv')

# 형태소 분석기 실행
pos = p.pos(sent)

# 품사 태그 없이 키워드 출력
keywords = p.get_keywords(pos, without_tag=True)
for word in keywords:
    try:
        print(word, word_index[word])
    except KeyError:
        # 해당 단어가 사전에 없는 경우 OOV 처리
        print(word, word_index['OOV'])

Example #4

Show file

import sys
import os

sys.path.append(os.getcwd())

from config.DatabaseConfig import *
from utilss.Database import Database
from utilss.Preprocess import Preprocess

# 전처리 객체 생성
p = Preprocess(word2index_dic='train_tools/dict/chatbot_dict.bin',
               userdic='utilss/user_dic.tsv')

db = Database(host=DB_HOST,
              user=DB_USER,
              password=DB_PASSWARD,
              db_name=DB_NAME)
db.connect()

query = "오전에 탕수육 10개 주문합니다."

from models.intent.IntentModel import IntentModel
intent = IntentModel(model_name='models/intent/intent_model.h5', preprocess=p)
predict = intent.predict_class(query)
intent_name = intent.labels[predict]

from models.ner.NerModel import NerModel
ner = NerModel(model_name='models/ner/ner_model.h5', preprocess=p)
predicts = ner.predict(query)
ner_tags = ner.predict_tags(query)