Ejemplo n.º 1
0
prepare_sent_features()


def get_features(aid):
    return tuple(meta[aid][fn] for fn in feature_names)


qa_X = np.asarray([get_features(aid) for aid in all_answers])
# Score > 0 tests => positive class is good answer
# Score <= 0 tests => positive class is poor answer
qa_Y = np.asarray([meta[aid]['Score'] > 0 for aid in all_answers])
classifying_answer = "good"

for idx, feat in enumerate(feature_names):
    plot_feat_hist([(qa_X[:, idx], feat)])
"""
plot_feat_hist([(qa_X[:, idx], feature_names[idx]) for idx in [1,0]], 'feat_hist_two.png')
plot_feat_hist([(qa_X[:, idx], feature_names[idx]) for idx in [3,4,5,6]], 'feat_hist_four.png')
"""
avg_scores_summary = []


def measure(clf_class, parameters, name, data_size=None, plot=False):
    start_time_clf = time.time()
    if data_size is None:
        X = qa_X
        Y = qa_Y
    else:
        X = qa_X[:data_size]
        Y = qa_Y[:data_size]

prepare_sent_features()


def get_features(aid):
    return tuple(meta[aid][fn] for fn in feature_names)

qa_X = np.asarray([get_features(aid) for aid in all_answers])
# Score > 0 tests => positive class is good answer
# Score <= 0 tests => positive class is poor answer
qa_Y = np.asarray([meta[aid]['Score'] > 0 for aid in all_answers])
classifying_answer = "good"

for idx, feat in enumerate(feature_names):
    plot_feat_hist([(qa_X[:, idx], feat)])
"""
plot_feat_hist([(qa_X[:, idx], feature_names[idx]) for idx in [1,0]], 'feat_hist_two.png')
plot_feat_hist([(qa_X[:, idx], feature_names[idx]) for idx in [3,4,5,6]], 'feat_hist_four.png')
"""
avg_scores_summary = []


def measure(clf_class, parameters, name, data_size=None, plot=False):
    start_time_clf = time.time()
    if data_size is None:
        X = qa_X
        Y = qa_Y
    else:
        X = qa_X[:data_size]
        Y = qa_Y[:data_size]