# input must be 2D list
xxx = utils.prepare_words([xx], contractions)
np.array(xxx)


# In[ ]:





# In[13]:


xx_clean = utils.replace_contr(utils.clean_sentence(xx), contractions)
xx_clean


# In[14]:


xx_clean = utils.replace_contr(xx, contractions)
xx_clean


# In[15]:


x = utils.token_sens(xx_clean, sentence_size = 30, word_to_idx = word_to_idx)
x.shape
Exemple #2
0
# In[7]:

with open('../model/contractions.pkl', 'rb') as f:
    contractions = pickle.load(f)

# In[8]:

# clean, contract, split to sentence, token - if in dict, ok; if not in dict, replace with <digit>

# In[9]:

pos_data_clean = []

for x in tqdm(pos_data, total=len(pos_data)):
    x = utils.clean_sentence(x)
    x = utils.replace_contr(x, contractions)
    #x = utils.split_document(x[0])
    #x, _ = utils.refine_document(x) # use this if need to train "phrase"
    pos_data_clean.append(x[0])

neg_data_clean = []

for x in tqdm(neg_data, total=len(neg_data)):
    x = utils.clean_sentence(x)
    x = utils.replace_contr(x, contractions)
    #x = utils.split_document(x[0])
    #x, _ = utils.refine_document(x) # use this if need to train "phrase"
    neg_data_clean.append(x[0])

print(len(pos_data_clean), len(neg_data_clean))