예제 #1
0
def train(train_part_list, eval_part_list, train_params, eval_params):
    data_iterator = DataIterator(train_params)
    din = DIN(train_params)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.5
    with tf.Session(config=config) as sess:
        # saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        i = 1
        for epoch in range(train_params["train_epoch"]):
            for part in train_part_list:
                logging.info("training part %s" % part)
                iterator = data_iterator.input_fn(part)
                sess.run(iterator.initializer)
                user_input, cross_input, ad_input, hist_click_seq, hist_click_length, target_ad, labels = iterator.get_next(
                )
                while True:
                    try:
                        (batch_user_input, batch_cross_input, batch_ad_input,
                         batch_hist_click_seq, batch_hist_click_length,
                         batch_target_ad, batch_label) = sess.run([
                             user_input, cross_input, ad_input, hist_click_seq,
                             hist_click_length, target_ad, labels
                         ])
                        loss, pred_score = sess.run(
                            [din.loss, din.pred_score],
                            feed_dict={
                                din.user_input: batch_user_input,
                                din.cross_input: batch_cross_input,
                                din.ad_input: batch_ad_input,
                                din.hist_click_seq: batch_hist_click_seq,
                                din.hist_click_length: batch_hist_click_length,
                                din.target_ad: batch_target_ad,
                                din.label: batch_label
                            })
                        # print(loss, pred_score)
                        sess.run(din.train_op,
                                 feed_dict={
                                     din.user_input: batch_user_input,
                                     din.cross_input: batch_cross_input,
                                     din.ad_input: batch_ad_input,
                                     din.hist_click_seq: batch_hist_click_seq,
                                     din.hist_click_length:
                                     batch_hist_click_length,
                                     din.target_ad: batch_target_ad,
                                     din.label: batch_label
                                 })
                    except tf.errors.OutOfRangeError:
                        break
                if i % 10 == 0:
                    auc, avg_loss = eval(sess, din, eval_part_list,
                                         eval_params)
                    # print("epoch %d after training %s, auc=%.4f, loss=%.4f" % (epoch, train_data_file, auc, avg_loss[0]))
                    logging.info("epoch %d, auc %.4f, loss %.4f" %
                                 (epoch, auc, avg_loss[0]))
                i += 1
        auc, avg_loss = eval(sess, din, eval_part_list, eval_params)
        logging.info("epoch %d, auc %.4f, loss %.4f" %
                     (epoch, auc, avg_loss[0]))
예제 #2
0
def eval(sess, esmm, eval_part_list, eval_params):
  auc = 0.0  
  avg_loss = 0.0
  scores = np.zeros((1, 1))
  losses = np.zeros((1, 1))
  all_labels = np.zeros((1, 1))
  cnt = 0

  data_iterator = DataIterator(eval_params)
  for part in eval_part_list:
    iterator = data_iterator.input_fn(part, shuffle=False)
    sess.run(iterator.initializer)
    user_input, cross_input, ad_input, hist_click_seq, hist_click_length, target_ad, ctr_labels, cvr_labels = iterator.get_next()
    while True:
      try:
        (batch_user_input, batch_cross_input, batch_ad_input, batch_hist_click_seq, batch_hist_click_length, batch_target_ad, batch_ctr_label, batch_cvr_labels) = sess.run([user_input, cross_input, ad_input, hist_click_seq, hist_click_length, target_ad, ctr_labels, cvr_labels])
        ctr_score, loss = sess.run([esmm.ctr_score, esmm.loss], feed_dict={esmm.user_input : batch_user_input, esmm.cross_input : batch_cross_input, esmm.ad_input : batch_ad_input, esmm.hist_click_seq : batch_hist_click_seq, esmm.hist_click_length : batch_hist_click_length, esmm.target_ad : batch_target_ad, esmm.ctr_label : batch_ctr_label, esmm.cvr_label : batch_cvr_label})
        scores = np.vstack([ctr_score, scores])
        losses = np.vstack([loss, losses])
        cnt += len(scores)
        all_labels = np.vstack([batch_label, all_labels])
      except ValueError:
        continue
      except tf.errors.OutOfRangeError:
        break
      true_and_pred = np.concatenate((all_labels[:-1], scores[:-1]), axis=1)
  global epoch_i
  np.savetxt('./pred/predictions_' + str(epoch_i) + '.txt', true_and_pred, fmt='%7.6f')
  epoch_i += 1
  avg_loss = np.sum(losses[:-1], axis=0) / len(losses[:-1])
  # try ... except ..
  auc = roc_auc_score(all_labels[:-1], scores[:-1])
  return auc, avg_loss
예제 #3
0
def eval(sess, dnn, eval_part_list, eval_params):
    auc = 0.0
    avg_loss = 0.0
    scores = np.zeros((1, 1))
    losses = np.zeros((1, 1))
    all_labels = np.zeros((1, 1))
    cnt = 0

    data_iterator = DataIterator(eval_params)
    for part in eval_part_list:
        iterator = data_iterator.input_fn(part, shuffle=False)
        sess.run(iterator.initializer)
        (user_features, topic_features, ad_features,
         labels) = iterator.get_next()
        while True:
            try:
                (batch_user_feature, batch_topic_feature, batch_ad_feature,
                 batch_label) = sess.run(
                     [user_features, topic_features, ad_features, labels])
                pred_score, loss = sess.run(
                    [dnn.pred_score, dnn.loss],
                    feed_dict={
                        dnn.user_input: batch_user_feature,
                        dnn.cross_input: batch_topic_feature,
                        dnn.ad_input: batch_ad_feature,
                        dnn.label: batch_label
                    })
                scores = np.vstack([pred_score, scores])
                losses = np.vstack([loss, losses])
                cnt += len(scores)
                all_labels = np.vstack([batch_label, all_labels])
            except ValueError:
                continue
            except tf.errors.OutOfRangeError:
                break
            true_and_pred = np.concatenate((all_labels[:-1], scores[:-1]),
                                           axis=1)
    global e
    np.savetxt('./pred/predictions_' + str(e) + '.txt',
               true_and_pred,
               fmt='%7.6f')
    e += 1
    avg_loss = np.sum(losses[:-1], axis=0) / len(losses[:-1])
    # try ... except ..
    auc = roc_auc_score(all_labels[:-1], scores[:-1])
    return auc, avg_loss
예제 #4
0
def train(train_part_list, eval_part_list, train_params, eval_params):
    data_iterator = DataIterator(train_params)
    dssm = DSSM(train_params)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.5
    with tf.Session(config=config) as sess:
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        i = 1
        for epoch in range(train_params["train_epoch"]):
            for part in train_part_list:
                logging.info("training part %s" % part)
                iterator = data_iterator.hdfs_input_fn(part)
                sess.run(iterator.initializer)
                (user_features, topic_features, ad_features,
                 labels) = iterator.get_next()
                while True:
                    try:
                        (batch_user_feature, batch_topic_feature,
                         batch_ad_feature, batch_label) = sess.run([
                             user_features, topic_features, ad_features, labels
                         ])
                        sess.run(dssm.train_op,
                                 feed_dict={
                                     dssm.user_input: batch_user_feature,
                                     dssm.cross_input: batch_topic_feature,
                                     dssm.ad_input: batch_ad_feature,
                                     dssm.label: batch_label
                                 })
                        # score = sess.run(dssm.score, feed_dict={dssm.query : batch_user_feature, dssm.doc : batch_ad_feature})
                        # print(score, batch_labels)
                    except tf.errors.OutOfRangeError:
                        break
                if i % 10 == 0:
                    auc, avg_loss = eval(sess, dssm, eval_part_list,
                                         eval_params)
                    # print("epoch %d after training %s, auc=%.4f, loss=%.4f" % (epoch, train_data_file, auc, avg_loss[0]))
                    logging.info("epoch %d, auc %.4f, loss %.4f" %
                                 (epoch, auc, avg_loss[0]))
                i += 1
        auc, avg_loss = eval(sess, dssm, eval_part_list, eval_params)
        # print("epoch %d after training %s, auc=%.4f, loss=%.4f" % (epoch, train_data_file, auc, avg_loss[0]))
        logging.info("epoch %d, auc %.4f, loss %.4f" %
                     (epoch, auc, avg_loss[0]))
예제 #5
0
    def __init__(self,
                 configs,
                 model: NET3,
                 iterator: DataIterator,
                 is_eval=False):
        # configs
        self.configs = self.default_configs()
        utils.update_configs(configs, self.configs)

        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.model = model
        if self.configs["pretrained_path"] is not None:
            self.model.load_state_dict(
                torch.load(self.configs["pretrained_path"]))

        # iterator
        self.iterator = iterator
        self.networks = self.iterator.get_networks()
        for n in self.networks:
            self.networks[n] = self.networks[n].to(self.device)

        # loss and optimizer
        self.loss_calc = utils.mse_loss
        self.opt = optim.Adam(params=self.model.parameters(),
                              lr=self.configs["lr"])
        self.loss_min = np.inf
        self.n_tolerance = 0

        # path
        if not os.path.exists(self.configs["save_dir"]):
            os.makedirs(self.configs["save_dir"])

        # evaluation
        self.is_eval = is_eval
        if is_eval:
            configs = self.iterator.configs
            configs["mode"] = "eval"
            self.iterator_eval = DataIterator(configs=configs)
    P_TRAIN = Config.get("Default", "P_TRAIN")
    P_TEST = Config.get("Default", "P_TEST")
    P_LABELS = Config.get("Default", "P_LABELS")
    P_CLIP_PARAMS = Config.get("Default", "P_CLIP_PARAMS")
    METHOD = Config.get("Default", "METHOD")

    ts = time.time()
    dt = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
    result_path = os.path.join("results", "archive", METHOD, dt)
    os.makedirs(result_path)
    logging.basicConfig(filename=os.path.join(result_path, "info.log"),
                        level=logging.INFO)
    model_path = os.path.join("models", METHOD, dt)
    os.makedirs(model_path)

    d = DataIterator(P_TRAIN, P_TEST, P_LABELS, batch_size=BATCH_SIZE)
    clip_params = np.load(P_CLIP_PARAMS)
    if METHOD == 'STAE':
        net = SpatialTemporalAutoencoder(tvol=TVOL,
                                         alpha=ALPHA,
                                         batch_size=BATCH_SIZE,
                                         lambd=LAMBDA)
    elif METHOD == 'CONVAE2D':
        net = ConvAE2d(tvol=TVOL,
                       alpha=ALPHA,
                       batch_size=BATCH_SIZE,
                       lambd=LAMBDA)
    else:
        raise ValueError('Incorrect method specification')

    area_under_roc, equal_error_rate = train(data=d,
from src.plots import plot_loss, plot_auc, plot_regularity
from src.train import train

if __name__ == "__main__":
    Config = ConfigParser.ConfigParser()
    Config.read('config/config.ini')
    num_iteration = int(Config.get("Default", "NUM_ITER"))
    batch_size = int(Config.get("Default", "BATCH_SIZE"))
    train_path = Config.get("Default", "P_TRAIN")
    test_path = Config.get("Default", "P_TEST")
    label_path = Config.get("Default", "P_LABELS")
    t_volume = int(Config.get("Default", "TVOL"))
    learning_rate = float(Config.get("Default", "ALPHA"))

    logging.basicConfig(filename=os.path.join("results", "STAE.log"),
                        level=logging.INFO)

    # manually modify the batch_size
    batch_size = 50
    data = DataIterator(train_path,
                        test_path,
                        label_path,
                        batch_size=batch_size)
    data_shape = (t_volume, 224, 224, 1)
    logging.info('The learning rate is {} and batch_size is {}'.format(
        learning_rate, batch_size))
    stae = STAE(data_shape=data_shape,
                learning_rate=learning_rate,
                optimizer='adam')

    train(data, stae, num_iteration=num_iteration, result_path="results/")