예제 #1
0
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 19 14:38:15 2017

@author: kcarnold
"""
import numpy as np
#%%

from suggestion.analyzers import load_reviews
reviews = load_reviews()
#%%
from collections import Counter
import itertools
import tqdm
vocab = Counter(
    itertools.chain.from_iterable(
        text.lower().split()
        for text in tqdm.tqdm(reviews[reviews.is_train].tokenized)))
#%%
MAX_SEQ_LEN = 100
#%%
NUM_WORDS = 20000
id2str = ['<PAD>', '<UNK>'
          ] + [word for word, count in vocab.most_common(NUM_WORDS)]
str2id = {word: idx for idx, word in enumerate(id2str)}
#%%
from suggestion import clustering
cnnb = clustering.ConceptNetNumberBatch.load()
#%%
EMBEDDING_DIM = 300
예제 #2
0
from suggestion import analyzers
from suggestion.paths import paths
import pickle

reviews = analyzers.load_reviews()
wordpair_analyzer = analyzers.WordPairAnalyzer.build(reviews)
with open(paths.models / 'wordpair_analyzer.pkl', 'wb') as f:
    pickle.dump(wordpair_analyzer, f, -1)