コード例 #1
0
ファイル: train.py プロジェクト: jogonba2/SHAN
d = w2v.vector_size
similar_val = 0.9999
non_similar_val = 0.0001
steps_per_epoch = 500  # 196961 muestras
epochs = 200
validation_steps = 150

shann_obj = SHANN(max_len_doc_sents, max_len_doc_sent_words,
                  max_len_summ_sents, max_len_summ_sent_words, d, path_models,
                  name_models)

shann_obj._set_model()
train_file = "../../Corpora/CNNDM/dev.csv"
dev_file = "../../Corpora/CNNDM/dev.csv"

x_tr, y_tr = ut.load_csv_samples(train_file)
x_dv, y_dv = ut.load_csv_samples(dev_file)

generator_train = ut.generator_2(x_tr,
                                 y_tr,
                                 max_len_doc_sents,
                                 max_len_summ_sents,
                                 max_len_doc_sent_words,
                                 max_len_summ_sent_words,
                                 padding_val,
                                 pos_pairs,
                                 neg_pairs,
                                 w2v,
                                 d,
                                 similar_val=similar_val,
                                 non_similar_val=non_similar_val)
コード例 #2
0
ファイル: test.py プロジェクト: jogonba2/SHAN
topk_sentences = 3

shann_obj = SHANN(max_len_doc_sents, max_len_doc_sent_words,
		  max_len_summ_sents, max_len_summ_sent_words,
		  d, path_models, name_models)

shann_obj._set_model()
shann_obj.load_weights(path_weights)

decoder = Decoder(max_len_doc_sents, max_len_doc_sent_words,
                  w2v, d, shann_obj.get_all_att_model(),
                  topk_sentences=topk_sentences)

test_file = "../../Corpora/CNNDM/test.csv"

x_ts, y_ts = ut.load_csv_samples(test_file)

"""
# Word Level #
print(len(x_ts))
summaries = decoder._word_decoder(x_ts[15])
print(summaries)
print(y_ts[15])
exit()
with open(output_file_words, "w", encoding="utf8") as fw:
    for i in range(len(summaries)):
        fw.write(summaries[i].strip() + "\t" + y_ts.iloc[i].strip() + "\n")
"""

# Sentence Level #
print(len(x_ts))