Ejemplo n.º 1
0
def get_features(model_file, output_file):
    """\
    """
    m = Model.load_from_file(model_file)
    labels = m.data_headers.get_attrib(m.class_attr).labels
    feats = m.vectorizer.get_feature_names()
    fh = codecs.open(output_file, 'w', 'UTF-8')
    for i, label in enumerate(labels):
        log_info('Enumerating features for label %d (\'%s\')' % (i, label))
        coefs = m.classifier.coef_[i]
        nonzero = [(f, c) for (f, c) in zip(feats, coefs) if c != 0]
        print >> fh, 'LABEL == %s' % label
        for f, c in sorted(nonzero, key=itemgetter(1), reverse=True):
            print >> fh, '%s - %f' % (f, c)
        print >> fh, "\n\n"
    fh.close()
Ejemplo n.º 2
0
def get_features(model_file, output_file):
    """\
    """
    m = Model.load_from_file(model_file)
    labels = m.data_headers.get_attrib(m.class_attr).labels
    feats = m.vectorizer.get_feature_names()
    fh = codecs.open(output_file, 'w', 'UTF-8')
    for i, label in enumerate(labels):
        log_info('Enumerating features for label %d (\'%s\')' % (i, label))
        coefs = m.classifier.coef_[i]
        nonzero = [(f, c) for (f, c) in zip(feats, coefs) if c != 0]
        print >> fh, 'LABEL == %s' % label
        for f, c in sorted(nonzero, key=itemgetter(1), reverse=True):
            print >> fh, '%s - %f' % (f, c)
        print >> fh, "\n\n"
    fh.close()
Ejemplo n.º 3
0
def test_models(file_in, file_out, model_files, source_attr, target_attr,
                oov_test_file, oov_part, pos_attr, test_indiv):
    """\
    Test all the given models on the selected file and save the target.

    If oov_test_file is set, performs also OOV evaluation.
    If test_pos is True, prints detailed results for various POSs.
    """
    # load testing data
    log_info('Loading data: ' + file_in)
    data = DataSet()
    data.load_from_arff(file_in)
    forms = data[source_attr]
    # apply all models
    for model_num, model_file in enumerate(model_files, start=1):
        model = Model.load_from_file(model_file)
        log_info('Applying model: ' + model_file)
        rules = model.classify(data)
        output_attr = 'OUTPUT_M' + str(model_num)
        data.add_attrib(Attribute(output_attr, 'string'), rules)
        if test_indiv:
            good = count_correct(data, model.class_attr, output_attr)
            print_score(good, len(data), 'Model accuracy')
        forms = [inflect(form, rule) for form, rule in zip(forms, rules)]
        forms_attr = 'FORMS_M' + str(model_num)
        data.add_attrib(Attribute(forms_attr, 'string'), forms)
    # test the final performance
    log_info('Evaluating...')
    good = count_correct(data, target_attr, forms_attr)
    print_score(good, len(data), 'ALL')
    # evaluate without punctuation
    evaluate_nopunct(data, source_attr, target_attr, forms_attr)
    # evaluate forms different from lemma
    evaluate_nolemma(data, source_attr, target_attr, forms_attr)
    # load training data for OOV tests, evaluate on OOV
    if oov_test_file:
        evaluate_oov(data, source_attr, target_attr, forms_attr,
                     oov_test_file, oov_part)
    # test on different POSes
    if pos_attr:
        evaluate_poses(data, target_attr, forms_attr, pos_attr)
    # save the classification results
    log_info('Saving data: ' + file_out)
    data.save_to_arff(file_out)
Ejemplo n.º 4
0
def test_models(file_in, file_out, model_files, source_attr, target_attr,
                oov_test_file, oov_part, pos_attr, test_indiv):
    """\
    Test all the given models on the selected file and save the target.

    If oov_test_file is set, performs also OOV evaluation.
    If test_pos is True, prints detailed results for various POSs.
    """
    # load testing data
    log_info('Loading data: ' + file_in)
    data = DataSet()
    data.load_from_arff(file_in)
    forms = data[source_attr]
    # apply all models
    for model_num, model_file in enumerate(model_files, start=1):
        model = Model.load_from_file(model_file)
        log_info('Applying model: ' + model_file)
        rules = model.classify(data)
        output_attr = 'OUTPUT_M' + str(model_num)
        data.add_attrib(Attribute(output_attr, 'string'), rules)
        if test_indiv:
            good = count_correct(data, model.class_attr, output_attr)
            print_score(good, len(data), 'Model accuracy')
        forms = [inflect(form, rule) for form, rule in zip(forms, rules)]
        forms_attr = 'FORMS_M' + str(model_num)
        data.add_attrib(Attribute(forms_attr, 'string'), forms)
    # test the final performance
    log_info('Evaluating...')
    good = count_correct(data, target_attr, forms_attr)
    print_score(good, len(data), 'ALL')
    # evaluate without punctuation
    evaluate_nopunct(data, source_attr, target_attr, forms_attr)
    # evaluate forms different from lemma
    evaluate_nolemma(data, source_attr, target_attr, forms_attr)
    # load training data for OOV tests, evaluate on OOV
    if oov_test_file:
        evaluate_oov(data, source_attr, target_attr, forms_attr, oov_test_file,
                     oov_part)
    # test on different POSes
    if pos_attr:
        evaluate_poses(data, target_attr, forms_attr, pos_attr)
    # save the classification results
    log_info('Saving data: ' + file_out)
    data.save_to_arff(file_out)