Ejemplo n.º 1
0
def pipeline() -> None:
    about = input()
    ing = input()
    category = input()
    points = pickle.load(open('points.pkl', 'rb'), encoding='utf-8')
    mdl = models.load_model('model.h5')

    about = np.array(np.array(text.hashing_trick(preprocess(about), 91)))
    ing = np.array(np.array(text.hashing_trick(preprocess(ing), 56)))
    category = np.array(np.array(to_categorical(points[-1][category], 19)))
    mdl.predict([ing, about, category])
Ejemplo n.º 2
0
    def hashing_method(self):
        # get vocab size
        motiv = self.flatten(self.motiv_quotes)
        demotiv = self.flatten(self.demotiv_quotes)
        self.vocab = set(text_to_word_sequence(motiv + " " + demotiv))
        self.vocab_size = len(self.vocab)

        # perform hash encoding
        self.quotes = self.motiv_quotes + self.demotiv_quotes
        before = time.time()
        for quote in self.quotes:
            self.encoded_quotes.append(
                hashing_trick(quote,
                              round(self.vocab_size * 1.5),
                              hash_function='md5'))
        after = time.time()
        diff = (after - before) * 1000
        print("hashing trick time: " + str(diff) + " ms")

        # PADDED HASH DATA FOR TRAINING
        self.padded_encoded_quotes = pad_sequences(self.encoded_quotes,
                                                   maxlen=280)
        #print(self.encoded_quotes)
        #print("----------------------------------------")
        print(self.padded_encoded_quotes)
Ejemplo n.º 3
0
def new() -> None:

    with open('dataset.csv', newline='') as csvfile:
        spamreader = csv.DictReader(csvfile)
        name = [row['name'] for row in spamreader]

    with open('dataset.csv', newline='') as csvfile:
        spamreader = csv.DictReader(csvfile)
        ing = [row['ing'] for row in spamreader]

    with open('dataset.csv', newline='') as csvfile:
        spamreader = csv.DictReader(csvfile)
        about = [row['about'] for row in spamreader]

    with open('dataset.csv', newline='') as csvfile:
        spamreader = csv.DictReader(csvfile)
        category = [row['category'] for row in spamreader]

    with open('dataset.csv', newline='') as csvfile:
        spamreader = csv.DictReader(csvfile)
        score = [row['score'] for row in spamreader]

    arr = np.array([name, category, about, ing, score])

    about = np.array(
        [np.array(text.hashing_trick(arr[2, i], 91)) for i in range(357)])
    ing = np.array(
        [np.array(text.hashing_trick(arr[3, i], 56)) for i in range(357)])

    ing = sequence.pad_sequences(ing, maxlen=56, padding='post')
    about = sequence.pad_sequences(about, maxlen=91, padding='post')

    hist = {}
    category = set(category)

    for i in enumerate(category):
        hist[i[1]] = i[0]

    category = np.array(
        [np.array(to_categorical(hist[arr[1, i]], 19)) for i in range(357)])

    score = np.array([float(x) for x in score])

    pickle.dump((about, ing, category, hist), open('points.pkl', 'wb'))

    mdl = new_model(about, ing, category, np.array(score))
    mdl = models.load_model('model.h5')
Ejemplo n.º 4
0
from tensorflow.keras.layers import Input, Embedding, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import numpy as np
    
samples = ['너 오늘 이뻐 보인다', 
           '나는 오늘 기분이 더러워', 
           '끝내주는데, 좋은 일이 있나봐', 
           '나 좋은 일이 생겼어', 
           '아 오늘 진짜 짜증나', 
           '환상적인데, 정말 좋은거 같아']
labels = [[1], [0], [1], [1], [0], [1]]

# hash 테이블로 문서를 수치화한다.
VOCAB_SIZE = 10 # vocabulary 크기 (hash table)를 10개로 한정한다.
sequences = [hashing_trick(s, VOCAB_SIZE) for s in samples]
sequences = np.array(sequences)
labels = np.array(labels)
print(sequences)

# Embedding layer 내부의 출력층 개수임. 단어의 latent feature 개수
EMB_SIZE = 8

# 딥러닝 모델을 빌드한다.
xInput = Input(batch_shape=(None, sequences.shape[1]))
embed_input = Embedding(input_dim=VOCAB_SIZE + 1, output_dim=EMB_SIZE)(xInput)
embed_input1 = tf.reduce_mean(embed_input, axis=-1)

hidden_layer = Dense(128, activation=tf.nn.relu)(embed_input1)
output = Dense(1, activation='sigmoid')(hidden_layer)
model = Model(xInput, output)