Beispiel #1
0
print("### Loading Train Data ###")
data_agent = data_manager(train_path, train=True)

# In[6]:

print("### Loading Test Data ###")
test_agent = data_manager(test_path, train=False)

# ## Preprocessing and Padding

# In[7]:

idx_in_sen, idx_out_sen, mask_in, mask_out, length_in, idx2word, word2idx, remain_idx = transform_orig(
    [data_agent.orig_data, data_agent.out_sen],
    min_count=min_count,
    max_len=[Encoder_max_len, Decoder_max_len],
    path="Attn_ver1/tmp/tokenizer.pkl")

# In[8]:

pickle.dump({"orig_word": [idx2word, word2idx]},
            open(os.path.join(tmp_path, "tokenizer.pkl"), "wb"))

# ## Build Model

# In[9]:


def Encoder(inputs,
            dim,
Beispiel #2
0
# In[6]:

train_path = ["data/{}/train.csv".format(x) for x in ["all"]]
test_path = ["data/{}/test.csv".format(x) for x in ["all"]]

print("### Loading Train Data ###")
data_agent = data_manager(train_path, train=True)

print("### Loading Test Data ###")
test_agent = data_manager(test_path, train=False)

# In[7]:

print("\n### Preprocessing ###")
idx_in_sen, idx_out_sen, mask_in, mask_out, length_in, length_out, idx2word, word2idx, remain_idx = transform_orig(
    [data_agent.orig_data, data_agent.out_sen],
    min_count=min_count,
    max_len=[Encoder_max_len, Decoder_max_len])

# In[8]:

pickle.dump({"orig_word": [idx2word, word2idx]},
            open(os.path.join(tmp_path, "tokenizer.pkl"), "wb"))

# In[9]:

print("""
##################################################################################################
#######################################  Building Model  #########################################
##################################################################################################
""")
start_time = time.time()