Beispiel #1
0
def classify_with_expert_knowledge(paths):
    sentences, labels, class_names = load_test_data(paths)
    features, vocabulary, unused = extract_features_and_vocabulary(sentences)
    stemmed_target_features = [stem_words(target_feature) for target_feature in TARGET_FEATURES]
    predicted = classify(stemmed_target_features, features.toarray(), vocabulary)
    print('EXPERT KNOWLEDGE:')
    evaluate_classification(predicted, labels, sentences, class_names)
Beispiel #2
0
def test_extract_features(input_sentences, expected_vocabulary):
    actual_features, actual_vocabulary, unused = extract_features_and_vocabulary(input_sentences)
    expected_features = np.array([
        [1, 1, 2, 0, 1, 1, 1, 1, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 1, 1]
    ])
    assert actual_vocabulary == expected_vocabulary
    assert (actual_features == expected_features).all()
Beispiel #3
0
def classify_with_tf_idf(paths):
    sentences, labels, class_names = load_test_data(paths)
    sentences = np.array(sentences)
    labels = np.array(labels)
    average_precisions = []
    average_recalls = []
    for train_index, test_index in sklearn.cross_validation.StratifiedKFold(labels, n_folds=3):
        sentences_train, sentences_test = sentences[train_index], sentences[test_index]
        labels_train, labels_test = labels[train_index], labels[test_index]
        features_train, vocabulary, count_vectorizer = extract_features_and_vocabulary(sentences_train)
        tfidf_features_train = transform_to_tfidf(features_train)
        predicted = predict_with_svc(tfidf_features_train, labels_train, sentences_test, count_vectorizer)

        print('TF-IDF')
        average_precision, average_recall = evaluate_classification(predicted, labels_test, sentences, class_names)
        average_precisions.append(average_precision)
        average_recalls.append(average_recall)
    evaluate_complete_classification(average_precisions, average_recalls)
Beispiel #4
0
def vocabulary(input_sentences):
    return extract_features_and_vocabulary(input_sentences)[1]
Beispiel #5
0
def features(input_sentences):
    return extract_features_and_vocabulary(input_sentences)[0].toarray()