コード例 #1
0
def model_predict(model, eval_input_fn, epoch):
    """Display evaluate result."""
    prediction_result = model.predict(eval_input_fn)

    click_sum = 0.0
    predictions = []
    user_id_list = []
    labels = []
    num_samples = FLAGS.batch_size * FLAGS.predict_steps
    num_pre_samples = 0
    print(num_samples)
    for pred_dict in prediction_result:
        # print(pred_dict)
        user_id = pred_dict['user_id'][0]
        p = pred_dict['probabilities'][0]
        label = float(pred_dict['label'][0])
        click_sum += p
        predictions.append(p)
        user_id_list.append(user_id)
        labels.append(label)
        if (p >= 0.5):
            num_pre_samples += 1

        if len(predictions) % (num_samples / 10) == 0:
            tf.logging.info(
                'predict at step %d/%d',
                int(
                    float(len(predictions)) / num_samples *
                    FLAGS.predict_steps), FLAGS.predict_steps)
        if len(predictions) >= num_samples:
            break

    #tf.metrics.precision
    # print(len(predictions))
    num_samples = len(predictions)
    print('the predicted positive samples is: ' + str(num_pre_samples))
    # Display evaluation metrics

    label_mean = sum(labels) / num_samples
    prediction_mean = sum(predictions) / num_samples
    loss = sum(cross_entropy_loss(
        labels, predictions)) / num_samples * FLAGS.batch_size
    auc = roc_auc_score(labels, predictions)
    group_auc = cal_group_auc(labels, predictions, user_id_list)

    predict_diff = np.array(predictions) - prediction_mean
    predict_diff_square_sum = sum(np.square(predict_diff))
    s_deviation = np.sqrt(predict_diff_square_sum / num_samples)
    c_deviation = s_deviation / prediction_mean

    true_positive_samples = (np.array(predictions) * np.array(labels) >=
                             0.5).tolist().count(True)
    false_positive_samples = (np.array(predictions) * (1 - np.array(labels)) >=
                              0.5).tolist().count(True)
    print(true_positive_samples)
    print(false_positive_samples)
    # precision = float(true_positive_samples)/(true_positive_samples+false_positive_samples)
    precision = 0
    false_negative_samples = (np.array(predictions) * np.array(labels) <
                              0.5).tolist().count(True)
    recall = float(true_positive_samples) / (true_positive_samples +
                                             false_negative_samples)
    print(false_negative_samples)
    tf.logging.info('Results at epoch %d/%d', (epoch + 1), FLAGS.num_epochs)
    tf.logging.info('-' * 60)
    tf.logging.info('label/mean: %s' % label_mean)
    tf.logging.info('predictions/mean: %s' % prediction_mean)
    tf.logging.info('total loss average batchsize: %s' % loss)
    tf.logging.info('standard deviation: %s' % s_deviation)
    tf.logging.info('coefficient of variation: %s' % c_deviation)
    tf.logging.info('precision: %s' % precision)
    tf.logging.info('recall: %s' % recall)
    tf.logging.info('auc: %s' % auc)
    tf.logging.info('group auc: %s' % group_auc)
コード例 #2
0
ファイル: train.py プロジェクト: zyfnhct/deep-ctr-prediction
def main(unused_argv):
    train_files = []
    eval_files = []
    if isinstance(FLAGS.train_data_dir, str):
        train_files = list_hdfs_dir(FLAGS.train_data_dir)

    if isinstance(FLAGS.eval_data_dir, str):
        eval_files = list_hdfs_dir(FLAGS.eval_data_dir)

    random.shuffle(train_files)
    feature_columns = build_model_columns()

    session_config = tf.ConfigProto(device_count={
        'GPU': 1,
        'CPU': 10
    },
                                    inter_op_parallelism_threads=10,
                                    intra_op_parallelism_threads=10
                                    # log_device_placement=True
                                    )
    session_config.gpu_options.per_process_gpu_memory_fraction = 0.32
    run_config = tf.estimator.RunConfig().replace(
        model_dir=FLAGS.model_dir,
        session_config=session_config,
        log_step_count_steps=1000,
        save_summary_steps=20000,
        save_checkpoints_secs=1000)

    model = tf.estimator.Estimator(model_fn=dfm_model_fn,
                                   params={
                                       'feature_columns':
                                       feature_columns,
                                       'hidden_units':
                                       FLAGS.hidden_units.split(','),
                                       'learning_rate':
                                       FLAGS.learning_rate,
                                       'use_fm':
                                       FLAGS.use_fm
                                   },
                                   config=run_config)
    train_input_fn = lambda: feature_input_fn(train_files, 1, True, FLAGS.
                                              batch_size)
    eval_input_fn = lambda: feature_input_fn(
        eval_files, 1, False, FLAGS.batch_size)  # not shuffle for evaluate

    for epoch in range(FLAGS.num_epochs):
        if FLAGS.evaluate_only == False:
            model.train(train_input_fn)
        print("*" * 100)
        #results = model.evaluate(input_fn=eval_input_fn, steps=200)

        prediction_result = model.predict(eval_input_fn)

        click_sum = 0.0
        predictions = []
        user_id_list = []
        labels = []
        num_samples = FLAGS.batch_size * FLAGS.predict_steps
        num_pre_samples = 0
        print(num_samples)
        for pred_dict in prediction_result:
            #print(pred_dict)
            user_id = pred_dict['user_id'][0]
            p = pred_dict['probabilities'][0]
            label = float(pred_dict['label'][0])
            click_sum += p
            predictions.append(p)
            user_id_list.append(user_id)
            labels.append(label)
            if (p >= 0.5):
                num_pre_samples += 1

            if len(predictions) % (num_samples / 10) == 0:
                tf.logging.info(
                    'predict at step %d/%d',
                    int(
                        float(len(predictions)) / num_samples *
                        FLAGS.predict_steps), FLAGS.predict_steps)
            if len(predictions) >= num_samples:
                break

        tf.metrics.precision
        #print(len(predictions))
        num_samples = len(predictions)
        print('the predicted positive samples is: ' + str(num_pre_samples))
        # Display evaluation metrics

        label_mean = sum(labels) / num_samples
        prediction_mean = sum(predictions) / num_samples
        loss = sum(cross_entropy_loss(
            labels, predictions)) / num_samples * FLAGS.batch_size
        auc = roc_auc_score(labels, predictions)
        group_auc = cal_group_auc(labels, predictions, user_id_list)

        predict_diff = np.array(predictions) - prediction_mean
        predict_diff_square_sum = sum(np.square(predict_diff))
        s_deviation = np.sqrt(predict_diff_square_sum / num_samples)
        c_deviation = s_deviation / prediction_mean

        true_positive_samples = (np.array(predictions) * np.array(labels) >=
                                 0.5).tolist().count(True)
        false_positive_samples = (np.array(predictions) *
                                  (1 - np.array(labels)) >=
                                  0.5).tolist().count(True)
        print(true_positive_samples)
        print(false_positive_samples)
        #precision = float(true_positive_samples)/(true_positive_samples+false_positive_samples)
        precision = 0
        false_negative_samples = (np.array(predictions) * np.array(labels) <
                                  0.5).tolist().count(True)
        recall = float(true_positive_samples) / (true_positive_samples +
                                                 false_negative_samples)
        print(false_negative_samples)
        tf.logging.info('Results at epoch %d/%d', (epoch + 1),
                        FLAGS.num_epochs)
        tf.logging.info('-' * 60)
        tf.logging.info('label/mean: %s' % label_mean)
        tf.logging.info('predictions/mean: %s' % prediction_mean)
        tf.logging.info('total loss average batchsize: %s' % loss)
        tf.logging.info('standard deviation: %s' % s_deviation)
        tf.logging.info('coefficient of variation: %s' % c_deviation)
        tf.logging.info('precision: %s' % precision)
        tf.logging.info('recall: %s' % recall)
        tf.logging.info('auc: %s' % auc)
        tf.logging.info('group auc: %s' % group_auc)

    # Export the model
    if FLAGS.export_dir is not None:
        export_model(model, FLAGS.export_dir, feature_columns)