print "\tf1 macro:", f1macro f1micro = f1_score(y_test, y_pred, average='micro') print "\tf1 micro:", f1micro rank_options = [False] if test['method'] == 'dummy': rank_options = [True, False] for preshuffle in rank_options: df_with_ranking = rk.ranking(data_test, y_pred, y_prob, preshuffle=preshuffle, target = True) search_ids = df_with_ranking['srch_id'] diff_search_ids = search_ids.drop_duplicates() k = 0 ndcg_list = [] for id in diff_search_ids: mask = (df_with_ranking['srch_id'] == id) result_df = df_with_ranking.loc[mask] ndcg_result = ndcg.ndcg(result_df) ndcg_list.append(ndcg_result) meanndcg = sum(ndcg_list) / float(len(ndcg_list)) f.write('%s; %s; %s; %s; %s; %f; %f; %f; %f; %f; %f\n' % ( test['method'], str(boosting), str(params), str(preshuffle), str(traintime), accuracy, recallmacro, recallmicro, f1macro, f1micro, meanndcg)) print "\tmean ndcg", meanndcg
clf = ensemble.RandomForestClassifier(**params) start_time = datetime.now() clf.fit(X_train, y_train) print clf.classes_ print "trained in", datetime.now() - start_time y_pred = clf.predict(X_train) y_prob = clf.predict_proba(X_train) print "class probs", y_prob print "classes found", np.unique(y_pred) print "accuracy:", clf.score(X_train, y_train) print "recall macro:", recall_score(y_train, y_pred, average='macro') print "recall micro:", recall_score(y_train, y_pred, average='micro') print "f1 macro:", f1_score(y_train, y_pred, average='macro') print "f1 micro:", f1_score(y_train, y_pred, average='micro') df_with_ranking = rk.ranking(traindf, y_pred, y_prob) search_ids = df_with_ranking['srch_id'] diff_search_ids = search_ids.drop_duplicates() k = 0 ndcg_list = [] for id in diff_search_ids: mask = (df_with_ranking['srch_id'] == id) result_df = df_with_ranking.loc[mask] ndcg = ndcg.ndcg(result_df, k) ndcg_list.append([ndcg])
start_time = datetime.now() clf.fit(X_train, y_train) print clf.classes_ print "trained in", datetime.now() - start_time y_pred = clf.predict(X_train) y_prob = clf.predict_proba(X_train) print "class probs", y_prob print "classes found", np.unique(y_pred) print "accuracy:", clf.score(X_train, y_train) print "recall macro:", recall_score(y_train, y_pred, average='macro') print "recall micro:", recall_score(y_train, y_pred, average='micro') print "f1 macro:", f1_score(y_train, y_pred, average='macro') print "f1 micro:", f1_score(y_train, y_pred, average='micro') df_with_ranking = rk.ranking(traindf, y_pred, y_prob) search_ids = df_with_ranking['srch_id'] diff_search_ids = search_ids.drop_duplicates() k = 0 ndcg_list = [] for id in diff_search_ids: mask = (df_with_ranking['srch_id'] == id) result_df = df_with_ranking.loc[mask] ndcg_result = ndcg.ndcg(result_df) ndcg_list.append([ndcg_result]) print ndcg_list