コード例 #1
0
 def eval(self, T, dev_data, hparams, sess):
     preds = self.infer(dev_data)
     if hparams.metric == 'logloss':
         log_loss = metrics.log_loss(dev_data[1], preds)
         if self.best_score > log_loss:
             self.best_score = log_loss
             try:
                 os.makedirs('model_tmp/')
             except:
                 pass
             self.saver.save(sess, 'model_tmp/model')
         utils.print_out("# Epcho-time %.2fs Eval logloss %.6f. Best logloss %.6f." \
                         %(T,log_loss,self.best_score))
     elif hparams.metric == 'auc':
         fpr, tpr, thresholds = metrics.roc_curve(dev_data[1] + 1,
                                                  preds,
                                                  pos_label=2)
         auc = metrics.auc(fpr, tpr)
         if self.best_score < auc:
             self.best_score = auc
             try:
                 os.makedirs('model_tmp/')
             except:
                 pass
             self.saver.save(sess, 'model_tmp/model')
         utils.print_out("# Epcho-time %.2fs Eval AUC %.6f. Best AUC %.6f." \
                         %(T,auc,self.best_score))
コード例 #2
0
    def eval(self, T, dev_data, hparams, sess, test_data=None):
        preds1 = self.infer(test_data)
        preds = self.infer(dev_data)

        import pickle
        if test_data:
            pk = [test_data[1], preds1]
            with open(str(T) + '.pk', 'wb') as f:
                pickle.dump(pk, f)
            print('save test pikel file')
        if hparams.metric == 'logloss':
            log_loss = metrics.log_loss(dev_data[1], preds)
            if self.best_score > log_loss:
                self.best_score = log_loss
                try:
                    os.makedirs('model_tmp/')
                except:
                    pass
                self.saver.save(sess, 'model_tmp/model')
            utils.print_out("# Epcho-time %.2fs Eval logloss %.6f. Best logloss %.6f." \
                            %(T,log_loss,self.best_score))
        elif hparams.metric == 'auc':
            fpr, tpr, thresholds = metrics.roc_curve(dev_data[1],
                                                     preds,
                                                     pos_label=1)
            auc = metrics.auc(fpr, tpr)
            if self.best_score < auc:
                self.best_score = auc
                try:
                    os.makedirs('model_tmp/')
                except:
                    pass
                self.saver.save(sess, 'model_tmp/model')
            utils.print_out("# Epcho-time %.2fs Eval AUC %.6f. Best AUC %.6f." \
                            %(T,auc,self.best_score))
コード例 #3
0
ファイル: base_model.py プロジェクト: zaf05/ICME2019-CTR
 def eval(self,T,dev,hparams,sess):
     preds=self.infer(dev)
     if True:
         fpr, tpr, thresholds = metrics.roc_curve(dev[hparams.label]+1, preds, pos_label=2)
         logloss=metrics.log_loss(dev[hparams.label],preds)
         auc=metrics.auc(fpr, tpr)
         if self.best_score<auc:
             self.best_score=auc
         utils.print_out(("# Epcho-time %.2fs Eval AUC %.6f. Eval logloss %.6f. Best AUC %.6f."+hparams.label)%(T,auc,logloss,self.best_score)) 
コード例 #4
0
    def eval(self, T, dev, hparams, sess):
        preds = self.infer(dev)
        dev['predict_imp'] = preds

        dev['rank'] = dev[['aid', 'bid']].groupby('aid')['bid'].apply(
            lambda row: pd.Series(dict(zip(row.index, row.rank())))) - 1
        dev['predict_imp'] = dev['predict_imp'].apply(lambda x: np.exp(x) - 1)
        dev['predict_imp'] = dev['predict_imp'].apply(round)
        dev['predict_imp'] = dev['predict_imp'].apply(lambda x: 0
                                                      if x < 0 else x)
        dev['predict_imp'] = dev['predict_imp'] + dev['rank'] * 0.0001

        dev['predict_imp'] = dev['predict_imp'].apply(lambda x: round(x, 4))
        gold_dev = dev[dev['gold'] == True]
        score = abs(gold_dev['gold_imp'] - gold_dev['predict_imp']) / (
            (gold_dev['gold_imp'] + gold_dev['predict_imp']) / 2 + 1e-15)
        SMAPE = score.mean()

        try:
            last_aid = None
            gold_imp = None
            gold_bid = None
            s = None
            score = []
            for item in dev[['aid', 'bid', 'predict_imp']].values:
                item = list(item)
                if item[0] != last_aid:
                    last_aid = item[0]
                    gold_bid = item[1]
                    gold_imp = item[2]
                    if s is not None:
                        score.append(s / cont)
                    s = 0
                    cont = 0
                else:
                    if (gold_imp - item[2]) * (gold_bid - item[1]) == 0:
                        s += -1
                    else:
                        s += ((gold_imp - item[2]) *
                              (gold_bid - item[1])) / (abs(
                                  ((gold_imp - item[2]) *
                                   (gold_bid - item[1]))))
                    cont += 1

            MonoScore = np.mean(score)
            score = 0.4 * (1 - SMAPE / 2) + 0.6 * (MonoScore + 1) / 2
        except:
            MonoScore = 0

        if SMAPE < self.best_score:
            self.best_score = SMAPE
        utils.print_out((
            "# Epcho-time %.2fs AVG %.4f. Eval SMAPE %.4f. #Eval MonoScore %.4f. Best Score %.4f"
        ) % (T, dev['predict_imp'].mean(), SMAPE, MonoScore, self.best_score))
        return SMAPE
コード例 #5
0
ファイル: ffm.py プロジェクト: misads/ctr
 def __init__(self,hparams):
     self.hparams=hparams
     if hparams.metric in ['logloss']:
         self.best_score=100000
     else:
         self.best_score=0
     self.build_graph(hparams)   
     self.optimizer(hparams)
     params = tf.trainable_variables()
     utils.print_out("# Trainable variables")
     for param in params:
         utils.print_out("  %s, %s, %s" % (param.name, str(param.get_shape()),param.op.device))   
コード例 #6
0
def train(hparams):
    tf.set_random_seed(2018)
    random.seed(2018)
    train_iterator = TextIterator(hparams, mode="train")
    dev_iterator = TextIterator(hparams,
                                mode="dev",
                                batch_size=hparams.evl_batch_size)
    test_iterator = TextIterator(hparams,
                                 mode="test",
                                 batch_size=hparams.evl_batch_size)
    model = Model(hparams)
    config_proto = tf.ConfigProto(log_device_placement=0,
                                  allow_soft_placement=0)
    config_proto.gpu_options.allow_growth = True
    sess = tf.Session(config=config_proto)
    sess.run(tf.global_variables_initializer())

    global_step = 0
    train_loss = 0
    train_norm = 0
    best_loss = 0
    dey_cont = 0
    pay_cont = 0
    epoch = False
    epoch_cont = 0
    start_time = time.time()
    if True:
        while True:
            try:
                cost, _, norm = model.train(sess, train_iterator)
            except StopIteration:
                epoch = True
                epoch_cont += 1
                continue
            global_step += 1
            train_loss += cost
            train_norm += norm
            if global_step % hparams.num_display_steps == 0 or epoch:
                info = {}
                info['learning_rate'] = hparams.learning_rate
                info["train_ppl"] = train_loss / hparams.num_display_steps
                info["avg_grad_norm"] = train_norm / hparams.num_display_steps
                train_loss = 0
                train_norm = 0
                print_step_info("  ", global_step, info)
                if global_step % hparams.num_eval_steps == 0 or epoch:
                    epoch = False
                    preds = []
                    label = []
                    aid = []
                    while True:
                        try:
                            pred = model.infer(sess, dev_iterator, label, aid)
                            preds += list(pred)
                        except StopIteration:
                            break
                    res = {}
                    for i in range(len(aid)):
                        if aid[i] not in res:
                            res[aid[i]] = {}
                            res[aid[i]]['label'] = []
                            res[aid[i]]['pred'] = []
                        res[aid[i]]['label'].append(label[i] + 1)
                        res[aid[i]]['pred'].append(preds[i])
                    auc = []
                    for u in res:
                        fpr, tpr, thresholds = metrics.roc_curve(
                            res[u]['label'], res[u]['pred'], pos_label=2)
                        loss_ = metrics.auc(fpr, tpr)
                        if np.isnan(loss_):
                            continue
                        gloab_auc[u] = loss_
                        auc.append(loss_)
                    loss_ = np.mean(auc)
                    if best_loss <= loss_:
                        model.saver_ffm.save(
                            sess,
                            os.path.join(hparams.path,
                                         'model_' + str(hparams.sub_name)))
                        best_loss = loss_
                        T = (time.time() - start_time)
                        start_time = time.time()
                        utils.print_out(
                            "# Epcho-time %.2fs Eval AUC %.6f. Best AUC %.6f."
                            % (T, loss_, best_loss))
                    else:
                        utils.print_out(
                            "# Epcho-time %.2fs Eval AUC %.6f. Best AUC %.6f."
                            % (T, loss_, best_loss))
                        model.saver_ffm.restore(
                            sess,
                            os.path.join(hparams.path,
                                         'model_' + str(hparams.sub_name)))
                        dey_cont += 1
                    if epoch_cont == hparams.epoch or dey_cont == hparams.dey_cont:
                        model.saver_ffm.restore(
                            sess,
                            os.path.join(hparams.path,
                                         'model_' + str(hparams.sub_name)))
                        break
    print("Dev inference ...")
    preds = []
    label = []
    aid = []
    while True:
        try:
            pred = model.infer(sess, dev_iterator, label, aid)
            preds += list(pred)
        except StopIteration:
            break
    data = []
    for i in range(len(preds)):
        data.append([aid[i], label[i], preds[i]])
    df = pd.DataFrame(data)
    df.columns = ['aid', 'label', 'score']
    df.to_csv('result_sub/submission_dev_' + str(hparams.sub_name) + '.csv',
              index=False)
    print('Dev inference done!')
    res = {}
    for i in range(len(aid)):
        if aid[i] not in res:
            res[aid[i]] = {}
            res[aid[i]]['label'] = []
            res[aid[i]]['pred'] = []
        res[aid[i]]['label'].append(label[i] + 1)
        res[aid[i]]['pred'].append(preds[i])
    auc = []
    for u in res:
        fpr, tpr, thresholds = metrics.roc_curve(res[u]['label'],
                                                 res[u]['pred'],
                                                 pos_label=2)
        loss_ = metrics.auc(fpr, tpr)
        if np.isnan(loss_):
            continue
        auc.append(loss_)
    loss_ = np.mean(auc)
    print("Dev auc:", loss_)
    print("Test inference ...")
    preds = []
    label = []
    aid = []
    while True:
        try:
            pred = model.infer(sess, test_iterator, label, aid)
            preds += list(pred)
        except StopIteration:
            break
    print('Test inference done!')
    return preds
コード例 #7
0
def print_step_info(prefix, global_step, info):
    utils.print_out("%sstep %d lr %g logloss %.6f gN %.2f, %s" %
                    (prefix, global_step, info["learning_rate"],
                     info["train_ppl"], info["avg_grad_norm"], time.ctime()))
コード例 #8
0
    def __init__(self, hparams):
        self.f1 = hparams.aid.copy()
        self.f2 = hparams.user.copy()
        self.batch_norm_decay = 0.9
        self.single_ids = {}
        self.num_ids = {}
        self.mulit_ids = {}
        self.mulit_mask = {}
        self.emb_v1 = {}
        self.emb_v2 = {}
        self.emb_combine_aid_v2 = {}
        self.norm_num = {}
        self.cross_params = []
        self.layer_params = []
        self.embed_params = []
        self.length = {}
        self.bias = tf.Variable(tf.truncated_normal(shape=[1],
                                                    mean=0.0,
                                                    stddev=0.0001),
                                name='bias')
        self.use_dropout = tf.placeholder(tf.bool)
        initializer = tf.random_uniform_initializer(-0.1, 0.1)
        self.feature_all_length = len(hparams.single_features) + len(
            hparams.mutil_features)
        feature_all_length = self.feature_all_length
        self.label = tf.placeholder(shape=(None), dtype=tf.float32)
        norm = [
            'uid_count', 'interest1_len', 'interest2_len', 'interest3_len',
            'interest4_len', 'interest5_len', 'kw1_len', 'kw2_len', 'kw3_len',
            'topic1_len', 'topic2_len', 'topic3_len'
        ]
        for s in hparams.num_features:
            self.num_ids[s] = tf.placeholder(shape=(None, ), dtype=tf.float32)
            if s in norm:
                self.norm_num[s] = self.batch_norm_layer(
                    tf.reshape(self.num_ids[s], [-1, 1]), self.use_dropout, s)
            else:
                self.norm_num[s] = self.num_ids[s][:, None]

        for s in hparams.single_features:
            self.single_ids[s] = tf.placeholder(shape=(None, ), dtype=tf.int32)
            self.emb_v1[s] = tf.Variable(tf.truncated_normal(
                shape=[len(hparams.dict[s]) + 2, 1], mean=0.0, stddev=0.0001),
                                         name='emb_v1_' + s)
            self.embed_params.append(self.emb_v1[s])
            self.emb_v2[s] = tf.Variable(tf.truncated_normal(
                shape=[len(hparams.dict[s]) + 2, hparams.k],
                mean=0.0,
                stddev=0.0001),
                                         name='emb_v2_' + s)
            self.embed_params.append(self.emb_v2[s])

        for s in hparams.mutil_features:
            self.mulit_ids[s] = tf.placeholder(shape=(None, None),
                                               dtype=tf.int32)
            self.length[s] = tf.placeholder(shape=(None, ), dtype=tf.int32)
            self.mulit_mask[s] = tf.sequence_mask(self.length[s],
                                                  100,
                                                  dtype=tf.float32)
            self.emb_v1[s] = tf.Variable(tf.truncated_normal(
                shape=[len(hparams.dict[s]) + 2, 1], mean=0.0, stddev=0.0001),
                                         name='emb_v1_' + s)
            self.embed_params.append(self.emb_v1[s])
            self.emb_v2[s] = tf.Variable(tf.truncated_normal(
                shape=[len(hparams.dict[s]) + 2, hparams.k],
                mean=0.0,
                stddev=0.0001),
                                         name='emb_v2_' + s)
            self.embed_params.append(self.emb_v2[s])
        self.build_graph(hparams)

        params = tf.trainable_variables()
        utils.print_out("# Trainable variables")
        for param in params:
            if 'W_' in param.name or 'b_' in param.name or 'emb' in param.name:
                utils.print_out(
                    "  %s, %s, %s" %
                    (param.name, str(param.get_shape()), param.op.device))

        self.optimizer(hparams)
        params = tf.trainable_variables()