Example #1
0
            if val_avg > 0.4:
                val = val_max
            elif val_avg < 0.01:
                val = val_min
            else:
                val = val_max
            keys = {k: val for k in keys}
            map_result.update(keys)

        list_res = []
        for key, new in map_score2.items():
            new = map_result[key]
            try:
                label = map_dup[key]
            except:
                continue
            pred = map_score2[key]
            new_pred = new
            list_res.append((label, pred, new_pred))

        aaa = pandas.DataFrame(list_res, columns=['label', 'pred', 'new'])
        from tfidf_k import calc_weight
        from sklearn.metrics import log_loss, roc_auc_score
        sw = calc_weight(aaa['label'].values)
        print(ppp)
        print(roc_auc_score(aaa['label'].values, aaa['pred'].values, sample_weight=sw))
        print(log_loss(aaa['label'].values, aaa['pred'].values, sample_weight=sw))
        print(roc_auc_score(aaa['label'].values, aaa['new'].values, sample_weight=sw))
        print(log_loss(aaa['label'].values, aaa['new'].values, sample_weight=sw))
        print('-------')
Example #2
0
from logging import StreamHandler, DEBUG, Formatter, FileHandler

log_fmt = Formatter('%(asctime)s %(name)s %(lineno)d [%(levelname)s][%(funcName)s] %(message)s ')

from logging import getLogger
logger = getLogger(__name__)

handler = StreamHandler()
handler.setLevel('INFO')
handler.setFormatter(log_fmt)
logger.setLevel('INFO')
logger.addHandler(handler)


aaa = pandas.read_csv('clique_data.csv')
sample_weight = calc_weight(aaa['label'].values)
# , 'emax', 'emin']  # ,  # 'l_score', 'r_score', 'm_score']  #
use_cols = ['cnum', 'pred', 'new', 'vmax', 'vmin', 'vavg']  # , 'emax', 'emin']
use_cols = ['cnum', 'pred', 'vmax', 'vmin', 'vavg']  # , 'emax', 'emin']

#'l_num', 'r_num', 'm_num']

x_train = aaa[use_cols].values
y_train = aaa['label'].values


all_params = {'max_depth': [5],  # [14],
              'learning_rate': [0.02],  # [0.06, 0.1, 0.2],
              'n_estimators': [10000],
              'min_child_weight': [1],
              'colsample_bytree': [0.7],