Ejemplo n.º 1
0
def runall():
    res = defaultdict(list)
    with open('results.txt', 'a') as f:
        with tf.Graph().as_default():
            session_conf = tf.ConfigProto(
                allow_soft_placement=FLAGS.allow_soft_placement,
                log_device_placement=FLAGS.log_device_placement)
            session_conf.gpu_options.allow_growth = True
            sess = tf.Session(config=session_conf)
            with sess.as_default():
                model = PredictionModel(
                    num_users=ratings.num_users,
                    num_items=ratings.num_items,
                    num_ratings=len(ratings.train),
                    embedding_dim=FLAGS.embedding_dim,
                    mu=np.mean(ratings.train['rating']),
                    alpha=FLAGS.alpha,
                    reg_lambda=FLAGS.reg_lambda,
                )
                for i in range(1):
                    last_loss = train(model, sess, 1e0, 40000, 0.5,
                                      FLAGS.training_stop_after)
                    f.write('loss: {}\n'.format(last_loss))
                    f.flush()
                    res['loss'].append(last_loss)
                    U, V, Vb = sess.run(model.get_embedding_mats())
                    np.savetxt('results-' + dataset_name + '/ours2.u.txt',
                               U,
                               delimiter=',')
                    np.savetxt('results-' + dataset_name + '/ours2.v.txt',
                               V,
                               delimiter=',')
                    np.savetxt('results-' + dataset_name + '/ours2.vb.txt',
                               Vb,
                               delimiter=',')
                    numtest = 1000
                    testids = np.random.permutation(
                        list(set(ratings.val['user_id'])))[:numtest]
                    predictions = np.matmul(U[testids],
                                            np.transpose(V)) + np.transpose(Vb)
                    ndcg, mrr, precision = calc_scores.calc_scores(
                        ratings.val, testids, predictions, 10)
                    f.write(repr((ndcg, mrr, precision)) + '\n')
                    f.write('\n')
                    f.flush()
                    res['ndcg_at_10'].append(ndcg)
                    res['mrr_at_10'].append(mrr)
                    res['precision_at_10'].append(precision)
        print_flush(res)
    return res
Ejemplo n.º 2
0
def runall():
    res = defaultdict(lambda: defaultdict(list))
    with open('results.txt', 'a') as f:
        # for alpha in [0.0, 0.1, 0.2, 0.3, 0.4]:
        # for alpha in [0.0]:
        for alpha in [1.0]:
            with tf.Graph().as_default():
                session_conf = tf.ConfigProto(
                    allow_soft_placement=FLAGS.allow_soft_placement,
                    log_device_placement=FLAGS.log_device_placement)
                session_conf.gpu_options.allow_growth = True
                sess = tf.Session(config=session_conf)
                with sess.as_default():
                    model = PredictionModel(
                        num_users=ratings.num_users,
                        num_items=ratings.num_items,
                        num_ratings=len(ratings.train),
                        embedding_dim=FLAGS.embedding_dim,
                        alpha=alpha,
                        reg_lambda=FLAGS.reg_lambda,
                    )
                    for i in range(1):
                        f.write('alpha: {}\n'.format(alpha))
                        last_loss = train(model, sess, 1e0, 40000, 0.5,
                                          FLAGS.training_stop_after)
                        f.write('loss: {}\n'.format(last_loss))
                        f.flush()
                        res[alpha]['loss:'].append(last_loss)
                        U, V = sess.run(model.get_embedding_mats())
                        np.savetxt('ml-100k-take1.pmf.u.txt', U, delimiter=',')
                        np.savetxt('ml-100k-take1.pmf.v.txt', V, delimiter=',')
                        predictions = np.matmul(U, np.transpose(V))
                        ndcg, mrr = calc_scores.calc_scores(
                            ratings.val, predictions, 10)
                        f.write(repr((ndcg, mrr)) + '\n')
                        f.write('\n')
                        f.flush()
                        # res[alpha]['precision_at_10'].append(precision_at_10)
                        res[alpha]['ndcg_at_10'].append(ndcg)
                        res[alpha]['mrr_at_10'].append(mrr)
            print_flush(res)
    return res
Ejemplo n.º 3
0
def train(model, sess, starter_learning_rate, learning_rate_decay_every,
          learning_rate_decay_by, stop_after):
    # Define Training procedure
    global_step = tf.Variable(0, name="global_step", trainable=False)
    #optimizer = tf.train.AdamOptimizer(1e-3)
    learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                               global_step,
                                               learning_rate_decay_every,
                                               learning_rate_decay_by,
                                               staircase=True)
    # optimizer = tf.train.AdamOptimizer(learning_rate)
    optimizer = tf.train.AdagradOptimizer(learning_rate)

    grads_and_vars = optimizer.compute_gradients(model.loss)
    train_op = optimizer.apply_gradients(grads_and_vars,
                                         global_step=global_step)

    # Keep track of gradient values and sparsity (optional)
    grad_summaries = []
    #for g, v in grads_and_vars:
    for g, v in []:
        if g is not None:
            grad_hist_summary = tf.summary.histogram(
                "{}/grad/hist".format(v.name), g)
            sparsity_summary = tf.summary.scalar(
                "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
            grad_summaries.append(grad_hist_summary)
            grad_summaries.append(sparsity_summary)
    #grad_summaries_merged = tf.summary.merge(grad_summaries)

    # Output directory for models and summaries
    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
    print_flush("Writing to {}\n".format(out_dir))

    # Summaries for loss
    loss_summary = tf.summary.scalar("loss", model.loss)
    learning_rate_summary = tf.summary.scalar("learning_rate", learning_rate)

    # Train Summaries
    train_summary_op = tf.summary.merge([loss_summary, learning_rate_summary
                                         ])  #, grad_summaries_merged])
    train_summary_dir = os.path.join(out_dir, "summaries", "train")
    train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

    # Val summaries
    val_summary_op = tf.summary.merge([loss_summary, learning_rate_summary])
    val_summary_dir = os.path.join(out_dir, "summaries", "val")
    val_summary_writer = tf.summary.FileWriter(val_summary_dir, sess.graph)

    # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
    checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    saver = tf.train.Saver(tf.global_variables(),
                           max_to_keep=FLAGS.num_checkpoints)

    # Initialize all variables
    sess.run(tf.global_variables_initializer())

    def train_step(user_ids, per_user_count, per_user_item_ids,
                   per_user_ratings):
        """
        A single training step 
        """
        feed_dict = {
            model.input_user_ids: user_ids,
            model.input_per_user_count: per_user_count,
            model.input_per_user_item_ids: per_user_item_ids,
            model.input_per_user_ratings: per_user_ratings,
        }
        sess.run(train_op, feed_dict)
        step, loss, rate = sess.run([global_step, model.loss, learning_rate],
                                    feed_dict)
        if step % FLAGS.summary_every == 0:
            summaries = sess.run(train_summary_op, feed_dict)
            train_summary_writer.add_summary(summaries, step)
        time_str = datetime.now().isoformat()
        if step % FLAGS.summary_every == 0:
            print_flush("{}: step {}, loss {:g}, rate {:g}".format(
                time_str, step, loss, rate))
        return loss

    def val_step(user_ids,
                 per_user_count,
                 per_user_item_ids,
                 per_user_ratings,
                 writer=None):
        """
        Evaluates model on a val set
        """
        feed_dict = {
            model.input_user_ids: user_ids,
            model.input_per_user_count: per_user_count,
            model.input_per_user_item_ids: per_user_item_ids,
            model.input_per_user_ratings: per_user_ratings,
        }
        step, summaries, loss = sess.run(
            [global_step, val_summary_op, model.loss], feed_dict)
        time_str = datetime.now().isoformat()
        print_flush("{}: step {}, loss {:g}".format(time_str, step, loss))
        if writer:
            writer.add_summary(summaries, step)
        return loss

    # Generate batches
    batches = ratings.train_batch_iter(FLAGS.batch_size, FLAGS.num_epochs)
    last_val_loss = 0
    # Training loop. For each batch...
    for (user_ids, per_user_count, per_user_item_ids,
         per_user_ratings) in batches:
        last_train_loss = train_step(user_ids, per_user_count,
                                     per_user_item_ids, per_user_ratings)
        current_step = tf.train.global_step(sess, global_step)
        if stop_after and current_step > stop_after:
            print_flush('Stopping after {} training steps'.format(stop_after))
            break
        if current_step % FLAGS.evaluate_every == 0:
            print_flush("\nEvaluation:")
            (val_user_ids, val_per_user_count, val_per_user_item_ids,
             val_per_user_ratings) = ratings.get_batch(
                 ratings.val[:FLAGS.batch_size])
            last_val_loss = val_step(val_user_ids,
                                     val_per_user_count,
                                     val_per_user_item_ids,
                                     val_per_user_ratings,
                                     writer=val_summary_writer)
            U, V = sess.run(model.get_embedding_mats())
            predictions = np.matmul(U, np.transpose(V))
            ndcg, mrr = calc_scores.calc_scores(ratings.val, predictions, 10)
            print_flush(
                ' NDCG@10 and MRR@10 for val set: {:.4f}, {:.4f}'.format(
                    ndcg, mrr))
            print_flush("")
        if current_step % FLAGS.checkpoint_every == 0:
            path = saver.save(sess,
                              checkpoint_prefix,
                              global_step=current_step)
            print_flush("Saved model checkpoint to {}\n".format(path))
            pass
    return (last_train_loss, last_val_loss)