def patent_utraining_test_data(patent_dir, lang, version="1", use_annotated_chunks_p=True): # get dictionary of annotations d_phr2label = load_phrase_labels(patent_dir, lang) # create .mallet file make_utraining_test_file(patent_dir, lang, version, d_phr2label, use_annotated_chunks_p) # create an instance of Mallet_test class to do the rest # let's do the work in the test directory for now. test_output_dir = os.path.join(patent_dir, lang, "test") train_output_dir = os.path.join(patent_dir, lang, "train") mtest = mallet.Mallet_test("utest", version , test_output_dir, "utrain", train_output_dir) # create the mallet vectors file from the mallet file #mtest.write_test_mallet_vectors_file() mtest.mallet_test_classifier("MaxEnt")
def pipeline_utraining_test_data(root, lang, patent_dir, version="1"): print "[pipeline_utraining_test_data]root %s, lang %s, patent_dir %s, version |%s|" % (root, lang, patent_dir, version) doc_feats_path = os.path.join(root, "doc_feats") # location of the corresponding training vectors and model file train_output_dir = os.path.join(patent_dir, lang, "train") test_output_dir = os.path.join(root, "test") #make_unlabeled_mallet_file(doc_feats_path, mallet_subdir, file_name, "utest", version) pipeline_make_utraining_test_file(root, lang, version) #sys.exit() # create an instance of Mallet_test class to do the rest # let's do the work in the test directory for now. mtest = mallet.Mallet_test("utest", version , test_output_dir, "utrain", train_output_dir) # create the mallet vectors file from the mallet file mtest.write_test_mallet_vectors_file() mtest.mallet_test_classifier("MaxEnt")
def _run_classifier(train_dir, test_dir, version, classifier, mallet_file, results_file): """Create an instance of the classifier and run it.""" mtest = mallet.Mallet_test("utest", version , test_dir, "utrain", train_dir) mtest.mallet_test_classifier(classifier, mallet_file, results_file)