def copied_code_from_translate_Akkadian(): train_texts, dev_texts, sign_to_id, tran_to_id, id_to_sign, id_to_tran = preprocess( ) # Run the HMM. hmm_train(train_texts, dev_texts) # lambda1, lambda2 = hmm_train(train_texts, dev_texts) (lambda1, lambda2, _, _, _, _) = load_object_from_file(Path("../output/hmm_model.pkl")) memm_path = Path("../output/memm_model.pkl") memm_from_file = load_object_from_file(memm_path) (logreg, vec, idx_to_tag_dict) = memm_from_file extra_decoding_arguments = build_extra_decoding_arguments(train_texts) #dump_object_to_file(predictor, "predictor") pred_path = Path("../output/predictor_lr_03_test_96_8.pkl") predictor_from_file = load_object_from_file(pred_path) #dump_object_to_file(model, "model") model_path = Path("../output/model_lr_03_test_96_8.pkl") model_from_file = load_object_from_file(model_path) return lambda1, lambda2, logreg, vec, idx_to_tag_dict, extra_decoding_arguments, sign_to_id, id_to_tran, \ predictor_from_file, model_from_file
def memm_train_and_store(corpora): """ Trains MEMM model and stores all the data needed for using MEMM :return: nothing, stores everything in memm_model.pkl """ train_texts, dev_texts, test_texts, _, _, _, _ = preprocess(corpora) logreg, vec, idx_to_tag_dict = memm_train(train_texts, dev_texts) dump_object_to_file((logreg, vec, idx_to_tag_dict, test_texts), memm_path)
def hmm_train_and_store(corpora): """ Trains HMM model and stores all the data needed for using HMM :return: nothing, stores everything in hmm_model.pkl """ train_texts, dev_texts, test_texts, _, _, _, _ = preprocess(corpora) most_common_tag, possible_tags, q, e, S, total_tokens, q_bi_counts, q_uni_counts, lambda1, lambda2 = \ hmm_train(train_texts, dev_texts) dump_object_to_file((most_common_tag, possible_tags, q, e, S, total_tokens, q_bi_counts, q_uni_counts, lambda1, lambda2, test_texts), hmm_path)
def biLSTM_train_and_store(corpora): """ Trains biLSTM model and stores all the data needed for using biLSTM :return: nothing, stores everything in bilstm_model.pkl """ train_texts, dev_texts, test_texts, sign_to_id, _, _, id_to_tran = preprocess(corpora) model, vocab, train_dataset, validation_dataset, cuda_device, reader = prepare1() trainer, model, reader, vocab = prepare2(model, vocab, train_dataset, validation_dataset, cuda_device, reader) trainer.train() predictor = SentenceTaggerPredictor(model, dataset_reader=reader) dump_object_to_file((model, predictor, sign_to_id, id_to_tran, test_texts), bilstm_path)
def main(): """ Tests the run of HMM :return: nothing """ train_texts, dev_texts, test_texts, sign_to_id, tran_to_id, id_to_sign, id_to_tran = \ preprocess(['rinap/rinap1', 'rinap/rinap3', 'rinap/rinap4', 'rinap/rinap5']) most_common_tag, possible_tags, q, e, S, total_tokens, q_bi_counts, q_uni_counts, lambda1, lambda2 = \ hmm_train(train_texts, dev_texts) print("Done training, now computing accuracy!") print(compute_accuracy(train_texts, hmm_viterbi, total_tokens, q_bi_counts, q_uni_counts, q, e, S, most_common_tag, possible_tags, lambda1, lambda2)) print(compute_accuracy(dev_texts, hmm_viterbi, total_tokens, q_bi_counts, q_uni_counts, q, e, S, most_common_tag, possible_tags, lambda1, lambda2)) print(compute_accuracy(test_texts, hmm_viterbi, total_tokens, q_bi_counts, q_uni_counts, q, e, S, most_common_tag, possible_tags, lambda1, lambda2))
def main(): """ Check the biLSTM model :return: nothing """ train_texts, dev_texts, test_texts, sign_to_id, tran_to_id, id_to_sign, id_to_tran = preprocess(['rinap/rinap1', 'rinap/rinap3', 'rinap/rinap4', 'rinap/rinap5']) model, vocab, train_dataset, validation_dataset, cuda_device, reader = prepare1() trainer, model, reader, vocab = prepare2(model, vocab, train_dataset, validation_dataset, cuda_device, reader) train(trainer, model, reader) check_results(train_texts, dev_texts, test_texts, sign_to_id, id_to_tran)
def main(): """ Test the run of MEMM and HMM :return: nothing """ full_flow_start = time.time() train_sents, dev_sents, _, _, _, _ = preprocess( ['rinap/rinap1', 'rinap/rinap3', 'rinap/rinap4', 'rinap/rinap5']) print(len(train_sents)) print(len(dev_sents)) vocab = compute_vocab_count(train_sents) extra_decoding_arguments = build_extra_decoding_arguments(train_sents) total_tokens, q_tri_counts, q_bi_counts, q_uni_counts, e_word_tag_counts, e_tag_counts = hmm_preprocess( train_sents) print("HMM trained") tag_to_idx_dict = build_tag_to_idx_dict(train_sents) index_to_tag_dict = invert_dict(tag_to_idx_dict) vec = DictVectorizer() print("Create train examples") train_examples, train_labels = create_examples(train_sents, tag_to_idx_dict) num_train_examples = len(train_examples) print("#example: " + str(num_train_examples)) print("Done") print("Create dev examples") dev_examples, dev_labels = create_examples(dev_sents, tag_to_idx_dict) num_dev_examples = len(dev_examples) print("#example: " + str(num_dev_examples)) print("Done") all_examples = train_examples all_examples.extend(dev_examples) print("Vectorize examples") all_examples_vectorized = vec.fit_transform(all_examples) train_examples_vectorized = all_examples_vectorized[:num_train_examples] dev_examples_vectorized = all_examples_vectorized[num_train_examples:] print("Done") logreg = linear_model.LogisticRegression(multi_class='multinomial', max_iter=1, solver='lbfgs', C=100000, verbose=1, n_jobs=2) print("Fitting...") start = time.time() logreg.fit(train_examples_vectorized, train_labels) end = time.time() print("End training, elapsed " + str(end - start) + " seconds") # End of log linear model training start = time.time() print("Start evaluation on dev set") acc_viterbi, acc_greedy, acc_hmm = memm_hmm_eval( dev_sents, logreg, vec, index_to_tag_dict, extra_decoding_arguments, total_tokens, q_tri_counts, q_bi_counts, q_uni_counts, e_word_tag_counts, e_tag_counts) end = time.time() print("Dev: Accuracy hmm : " + acc_hmm) print("Dev: Accuracy greedy memm : " + acc_greedy) print("Dev: Accuracy Viterbi memm : " + acc_viterbi) print("Evaluation on dev set elapsed: " + str(end - start) + " seconds")