Beispiel #1
0
def test(test_X, test_y, model_path):
    user_id = tf.placeholder(tf.int32, [None, 1], name='user_id')
    user_gender = tf.placeholder(tf.int32, [None, 1], name='user_gender')
    user_age = tf.placeholder(tf.int32, [None, 1], name='user_age')
    user_job = tf.placeholder(tf.int32, [None, 1], name='user_job')

    movie_id = tf.placeholder(tf.int32, [None, 1], name='movie_id')
    movie_genres = tf.placeholder(tf.float32, [None, 18], name='movie_categories')
    movie_titles = tf.placeholder(tf.int32, [None, 15], name='movie_titles')
    movie_title_length = tf.placeholder(tf.float32, [None], name='movie_title_length')
    targets = tf.placeholder(tf.int32, [None, 1], name='targets')
    dropout_keep_prob = tf.constant(DROPOUT_PROB, dtype=tf.float32, name='dropout_keep_prob')

    _, _, predicted = full_network(user_id, user_gender, user_age, user_job, movie_id,
                                   movie_genres, movie_titles, movie_title_length,
                                   dropout_keep_prob)

    with tf.name_scope('loss'):
        # MSE损失,将计算值回归到评分
        loss = tf.losses.mean_squared_error(targets, predicted)
        tf.summary.scalar('loss', loss)

    dataset = Dataset(test_X.values, test_y.values)
    batch_per_epcho = (len(test_X) + BATCH_SIZE - 1) // BATCH_SIZE

    saver = tf.train.Saver()

    summaries_merged = tf.summary.merge_all()

    with tf.Session() as sess:
        train_summary_dir = os.path.join('./data', 'summaries', 'test')
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

        sess.run(tf.global_variables_initializer())

        cpkt = tf.train.get_checkpoint_state(model_path)
        saver.restore(sess, cpkt.model_checkpoint_path)
        avg_loss = 0
        for batch_i in range(batch_per_epcho):
            Xs, ys = dataset.next_batch(BATCH_SIZE)
            users, movies = decompression_feature(Xs)

            feed = {
                user_id: users.id,
                user_gender: users.gender,
                user_age: users.age,
                user_job: users.job,
                movie_id: movies.id,
                movie_genres: movies.genres,
                movie_titles: movies.titles,
                movie_title_length: movies.title_length,
                targets: ys}

            test_loss, summaries = sess.run([loss, summaries_merged], feed)
            train_summary_writer.add_summary(summaries, batch_i)
            show_message = 'Batch {:>4}/{}   test_loss = {:.3f}'.format(batch_i, batch_per_epcho, test_loss)
            logging.info(show_message)
            avg_loss = avg_loss + test_loss * len(users.id)
        avg_loss = avg_loss / dataset.size
        logging.info('Loss on test is {:.3f}'.format(avg_loss))
Beispiel #2
0
def train(train_X, train_y, save_dir):
    user_id = tf.placeholder(tf.int32, [None, 1], name='user_id')
    user_gender = tf.placeholder(tf.int32, [None, 1], name='user_gender')
    user_age = tf.placeholder(tf.int32, [None, 1], name='user_age')
    user_job = tf.placeholder(tf.int32, [None, 1], name='user_job')

    movie_id = tf.placeholder(tf.int32, [None, 1], name='movie_id')
    movie_genres = tf.placeholder(tf.float32, [None, 18], name='movie_genres')
    movie_titles = tf.placeholder(tf.int32, [None, 15], name='movie_titles')
    movie_title_length = tf.placeholder(tf.float32, [None],
                                        name='movie_title_length')
    targets = tf.placeholder(tf.int32, [None, 1], name='targets')
    dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')

    _, _, predicted = full_network(user_id, user_gender, user_age, user_job,
                                   movie_id, movie_genres, movie_titles,
                                   movie_title_length, dropout_keep_prob)

    trainable_variable_summaries()
    with tf.name_scope('loss'):
        # MSE损失,将计算值回归到评分
        loss = tf.losses.mean_squared_error(targets, predicted)
        tf.summary.scalar('loss', loss)

    dataset = Dataset(train_X.values, train_y.values)
    batch_per_epcho = (len(train_X) + BATCH_SIZE - 1) // BATCH_SIZE

    global_step = tf.Variable(0, name='global_step', trainable=False)
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step,
                                               batch_per_epcho,
                                               LEARNING_RATE_DECAY)  # 优化损失
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(
        loss, global_step=global_step)  # cost

    saver = tf.train.Saver(
        max_to_keep=(batch_per_epcho * EPOCH + SAVE_MODEL_STEPS - 1) //
        SAVE_MODEL_STEPS)

    summaries_merged = tf.summary.merge_all()

    with tf.Session() as sess:
        train_summary_dir = os.path.join('./data', 'summaries', 'train')
        train_summary_writer = tf.summary.FileWriter(train_summary_dir,
                                                     sess.graph)

        sess.run(tf.global_variables_initializer())
        for epoch_i in range(EPOCH):
            # 训练的迭代,保存训练损失
            for batch_i in range(batch_per_epcho):
                Xs, ys = dataset.next_batch(BATCH_SIZE)
                users, movies = decompression_feature(Xs)

                feed = {
                    user_id: users.id,
                    user_gender: users.gender,
                    user_age: users.age,
                    user_job: users.job,
                    movie_id: movies.id,
                    movie_genres: movies.genres,
                    movie_titles: movies.titles,
                    movie_title_length: movies.title_length,
                    targets: ys,
                    dropout_keep_prob: DROPOUT_KEEP_PROB
                }

                step, train_loss, summaries, _ = sess.run(
                    [global_step, loss, summaries_merged, train_op], feed)
                train_summary_writer.add_summary(summaries, step)

                if step % SHOW_LOG_STEPS == 0:
                    show_message = 'Epoch {:>3} Batch {:>4}/{}   train_loss = {:.3f}'.format(
                        epoch_i, batch_i, batch_per_epcho, train_loss)
                    logging.info(show_message)
                if step % SAVE_MODEL_STEPS == 0:
                    saver.save(sess, save_dir, global_step=global_step)
        saver.save(sess, save_dir, global_step=global_step)
def main(model_path):
    user_id = tf.placeholder(tf.int32, [None, 1], name='user_id')
    user_gender = tf.placeholder(tf.int32, [None, 1], name='user_gender')
    user_age = tf.placeholder(tf.int32, [None, 1], name='user_age')
    user_job = tf.placeholder(tf.int32, [None, 1], name='user_job')

    movie_id = tf.placeholder(tf.int32, [None, 1], name='movie_id')
    movie_genres = tf.placeholder(tf.float32, [None, 18], name='movie_categories')
    movie_titles = tf.placeholder(tf.int32, [None, 15], name='movie_titles')
    movie_title_length = tf.placeholder(tf.float32, [None], name='movie_title_length')
    dropout_keep_prob = tf.constant(DROPOUT_PROB, dtype=tf.float32, name='dropout_keep_prob')

    user_feature, movie_feature, _ = full_network(user_id, user_gender, user_age, user_job, movie_id,
                                                  movie_genres, movie_titles, movie_title_length,
                                                  dropout_keep_prob)

    with tf.variable_scope('user_movie_fc', reuse=True):
        user_movie_fc_kernel = tf.get_variable('kernel')
        user_movie_fc_bias = tf.get_variable('bias')

    with open('./data/users.p', 'rb') as users:
        user_Xs = pickle.load(users)
    with open('./data/movies.p', 'rb') as movies:
        movie_Xs = pickle.load(movies)

    user_dataset = Dataset(user_Xs.values, shuffle=False)
    movie_dataset = Dataset(movie_Xs.values, shuffle=False)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        cpkt = tf.train.get_checkpoint_state(model_path)
        saver.restore(sess, cpkt.model_checkpoint_path)

        # 提取用户特征
        user_features = {}
        for batch in range((user_dataset.size + BATCH_SIZE - 1) // BATCH_SIZE):
            data = user_dataset.next_batch(BATCH_SIZE)
            feed = {
                user_id: np.reshape(data.take(0, 1), [len(data), 1]),
                user_gender: np.reshape(data.take(4, 1), [len(data), 1]),
                user_age: np.reshape(data.take(5, 1), [len(data), 1]),
                user_job: np.reshape(data.take(3, 1), [len(data), 1]),
            }
            feature = sess.run(user_feature, feed_dict=feed)
            user_features.update({key: value for (key, value) in zip(data.take(0, 1), feature)})
        with open('./data/user-features.p', 'wb') as uf:
            pickle.dump(user_features, uf)

        # 提取电影特征
        movie_features = {}
        for batch in range((movie_dataset.size + BATCH_SIZE - 1) // BATCH_SIZE):
            data = movie_dataset.next_batch(BATCH_SIZE)
            feed = {
                movie_id: np.reshape(data.take(0, 1), [len(data), 1]),
                movie_genres: np.array(list(data.take(4, 1))),
                movie_titles: np.array(list(data.take(5, 1))),
                movie_title_length: (np.array(list(data.take(5, 1))) != 0).sum(axis=1)
            }
            feature = sess.run(movie_feature, feed_dict=feed)
            movie_features.update({key: value for (key, value) in zip(data.take(0, 1), feature)})
        with open('./data/movie-features.p', 'wb') as mf:
            pickle.dump(movie_features, mf)

        # 保存损失层的kenel和biase
        kernel, bais = sess.run([user_movie_fc_kernel, user_movie_fc_bias])
        with open('./data/user-movie-fc-param.p', 'wb') as params:
            pickle.dump((kernel, bais), params)