예제 #1
0
def patent_utraining_test_data(patent_dir, lang, version="1", use_annotated_chunks_p=True):
    # get dictionary of annotations
    d_phr2label = load_phrase_labels(patent_dir, lang)
    # create .mallet file
    make_utraining_test_file(patent_dir, lang, version, d_phr2label, use_annotated_chunks_p)
    # create an instance of Mallet_test class to do the rest
    # let's do the work in the test directory for now.
    test_output_dir = os.path.join(patent_dir, lang, "test")
    train_output_dir = os.path.join(patent_dir, lang, "train")
    mtest = mallet.Mallet_test("utest", version , test_output_dir, "utrain", train_output_dir)
    # create the mallet vectors file from the mallet file
    #mtest.write_test_mallet_vectors_file()
    mtest.mallet_test_classifier("MaxEnt")
예제 #2
0
def pipeline_utraining_test_data(root, lang, patent_dir, version="1"):
    print "[pipeline_utraining_test_data]root %s, lang %s, patent_dir %s, version |%s|" % (root, lang, patent_dir, version)
    doc_feats_path = os.path.join(root, "doc_feats")

    # location of the corresponding training vectors and model file
    train_output_dir = os.path.join(patent_dir, lang, "train")
    test_output_dir = os.path.join(root, "test")

    #make_unlabeled_mallet_file(doc_feats_path, mallet_subdir, file_name, "utest", version)
    pipeline_make_utraining_test_file(root, lang, version)
    #sys.exit()

    # create an instance of Mallet_test class to do the rest
    # let's do the work in the test directory for now.
    mtest = mallet.Mallet_test("utest", version , test_output_dir, "utrain", train_output_dir)
    # create the mallet vectors file from the mallet file
    mtest.write_test_mallet_vectors_file()
    mtest.mallet_test_classifier("MaxEnt")
예제 #3
0
def _run_classifier(train_dir, test_dir, version, classifier, mallet_file, results_file):
    """Create an instance of the classifier and run it."""
    mtest = mallet.Mallet_test("utest", version , test_dir, "utrain", train_dir)
    mtest.mallet_test_classifier(classifier, mallet_file, results_file)