print "\tf1 macro:", f1macro
            f1micro = f1_score(y_test, y_pred, average='micro')
            print "\tf1 micro:", f1micro

            rank_options = [False]
            if test['method'] == 'dummy':
                rank_options = [True, False]

            for preshuffle in rank_options:
                df_with_ranking = rk.ranking(data_test, y_pred, y_prob, preshuffle=preshuffle, target = True)

                search_ids = df_with_ranking['srch_id']
                diff_search_ids = search_ids.drop_duplicates()

                k = 0
                ndcg_list = []

                for id in diff_search_ids:
                    mask = (df_with_ranking['srch_id'] == id)
                    result_df = df_with_ranking.loc[mask]
                    ndcg_result = ndcg.ndcg(result_df)
                    ndcg_list.append(ndcg_result)

                meanndcg = sum(ndcg_list) / float(len(ndcg_list))
                f.write('%s; %s; %s; %s; %s; %f; %f; %f; %f; %f; %f\n' % (
                    test['method'], str(boosting), str(params), str(preshuffle), str(traintime), accuracy, recallmacro,
                    recallmicro, f1macro,
                    f1micro, meanndcg))

                print "\tmean ndcg", meanndcg
    clf = ensemble.RandomForestClassifier(**params)
    start_time = datetime.now()
    clf.fit(X_train, y_train)
    print clf.classes_
    print "trained in", datetime.now() - start_time

    y_pred = clf.predict(X_train)
    y_prob = clf.predict_proba(X_train)
    print "class probs", y_prob
    print "classes found", np.unique(y_pred)
    print "accuracy:", clf.score(X_train, y_train)
    print "recall macro:", recall_score(y_train, y_pred, average='macro')
    print "recall micro:", recall_score(y_train, y_pred, average='micro')
    print "f1 macro:", f1_score(y_train, y_pred, average='macro')
    print "f1 micro:", f1_score(y_train, y_pred, average='micro')

    df_with_ranking = rk.ranking(traindf, y_pred, y_prob)

    search_ids = df_with_ranking['srch_id']
    diff_search_ids = search_ids.drop_duplicates()

    k = 0
    ndcg_list = []

    for id in diff_search_ids:
        mask = (df_with_ranking['srch_id'] == id)
        result_df = df_with_ranking.loc[mask]
        ndcg = ndcg.ndcg(result_df, k)
        ndcg_list.append([ndcg])

    start_time = datetime.now()
    clf.fit(X_train, y_train)
    print clf.classes_
    print "trained in", datetime.now() - start_time

    y_pred = clf.predict(X_train)
    y_prob = clf.predict_proba(X_train)
    print "class probs", y_prob
    print "classes found", np.unique(y_pred)
    print "accuracy:", clf.score(X_train, y_train)
    print "recall macro:", recall_score(y_train, y_pred, average='macro')
    print "recall micro:", recall_score(y_train, y_pred, average='micro')
    print "f1 macro:", f1_score(y_train, y_pred, average='macro')
    print "f1 micro:", f1_score(y_train, y_pred, average='micro')

    df_with_ranking = rk.ranking(traindf, y_pred, y_prob)

    search_ids = df_with_ranking['srch_id']
    diff_search_ids = search_ids.drop_duplicates()

    k = 0
    ndcg_list = []

    for id in diff_search_ids:
        mask = (df_with_ranking['srch_id'] == id)
        result_df = df_with_ranking.loc[mask]
        ndcg_result = ndcg.ndcg(result_df)
        ndcg_list.append([ndcg_result])

    print ndcg_list