prepare_sent_features() def get_features(aid): return tuple(meta[aid][fn] for fn in feature_names) qa_X = np.asarray([get_features(aid) for aid in all_answers]) # Score > 0 tests => positive class is good answer # Score <= 0 tests => positive class is poor answer qa_Y = np.asarray([meta[aid]['Score'] > 0 for aid in all_answers]) classifying_answer = "good" for idx, feat in enumerate(feature_names): plot_feat_hist([(qa_X[:, idx], feat)]) """ plot_feat_hist([(qa_X[:, idx], feature_names[idx]) for idx in [1,0]], 'feat_hist_two.png') plot_feat_hist([(qa_X[:, idx], feature_names[idx]) for idx in [3,4,5,6]], 'feat_hist_four.png') """ avg_scores_summary = [] def measure(clf_class, parameters, name, data_size=None, plot=False): start_time_clf = time.time() if data_size is None: X = qa_X Y = qa_Y else: X = qa_X[:data_size] Y = qa_Y[:data_size]