Esempio n. 1
0
def train(args, data, show_loss, show_topk):

    var_to_restore = [
        "user_emb_matrix", "item_emb_matrix", "relation_emb_matrix",
        "entity_emb_matrix"
    ]
    n_user, n_item, n_entity, n_relation = data[0], data[1], data[2], data[3]
    print(n_entity)
    train_data, eval_data, test_data = data[4], data[5], data[6]
    kg = data[7]

    # top-K evaluation settings
    user_num = 100
    k_list = [1, 2, 5, 10, 20, 50, 100]
    train_record = get_user_record(train_data, True)
    test_record = get_user_record(test_data, False)
    user_list = list(set(train_record.keys()) & set(test_record.keys()))
    if len(user_list) > user_num:
        user_list = np.random.choice(user_list, size=user_num, replace=False)
    item_set = set(list(range(n_item)))

    export_version = int(time.time())

    try:
        # Load the latest model
        restore_path = max(
            os.listdir('./model/' + args.dataset + '/' + args.restore))
    except:
        restore_path = None

    model = MKR(args, n_user, n_item, n_entity, n_relation, restore_path)

    with tf.Session() as sess:
        if restore_path is None:

            sess.run(tf.global_variables_initializer())
        else:
            # Weight shift, If new users or movies join
            sess.run(tf.global_variables_initializer())
            user_emb = np.loadtxt('./model/' + args.dataset +
                                  '/vocab/user_emb_matrix.txt',
                                  dtype=np.float32)
            item_emb = np.loadtxt('./model/' + args.dataset +
                                  '/vocab/item_emb_matrix.txt',
                                  dtype=np.float32)
            entity_emb = np.loadtxt('./model/' + args.dataset +
                                    '/vocab/entity_emb_matrix.txt',
                                    dtype=np.float32)
            relation_emb = np.loadtxt('./model/' + args.dataset +
                                      '/vocab/relation_emb_matrix.txt',
                                      dtype=np.float32)
            print(n_user, n_user - len(user_emb))
            user_emb = np.vstack([
                user_emb,
                np.random.normal(size=[n_user - len(user_emb), args.dim])
            ])

            item_emb = np.vstack([
                item_emb,
                np.random.normal(size=[n_item - len(item_emb), args.dim])
            ])
            entity_emb = np.vstack([
                entity_emb,
                np.random.normal(size=[n_entity - len(entity_emb), args.dim])
            ])
            relation_emb = np.vstack([
                relation_emb,
                np.random.normal(
                    size=[n_relation - len(relation_emb), args.dim])
            ])

            var_to_restore = slim.get_variables_to_restore(
                exclude=var_to_restore)
            saver = tf.train.Saver(var_to_restore)
            saver.restore(
                sess,
                tf.train.latest_checkpoint('./model/' + args.dataset + '/' +
                                           args.restore + '/' + restore_path))
            model.init_embeding(
                sess, {
                    model.user_emb: user_emb,
                    model.item_emb: item_emb,
                    model.entity_emb: entity_emb,
                    model.relation_emb: relation_emb
                })

        for step in range(args.n_epochs):
            # RS training
            np.random.shuffle(train_data)
            start = 0
            while start < train_data.shape[0]:
                _, loss = model.train_rs(
                    sess,
                    get_feed_dict_for_rs(model, train_data, start,
                                         start + args.batch_size))
                start += args.batch_size
                if show_loss:
                    print(loss)

            # KGE training
            if step % args.kge_interval == 0:
                np.random.shuffle(kg)
                start = 0
                while start < kg.shape[0]:
                    _, rmse = model.train_kge(
                        sess,
                        get_feed_dict_for_kge(model, kg, start,
                                              start + args.batch_size))
                    start += args.batch_size
                    if show_loss:
                        print(rmse)

            # CTR evaluation
            train_auc, train_acc = model.eval(
                sess,
                get_feed_dict_for_rs(model, train_data, 0,
                                     train_data.shape[0]))
            eval_auc, eval_acc = model.eval(
                sess,
                get_feed_dict_for_rs(model, eval_data, 0, eval_data.shape[0]))
            test_auc, test_acc = model.eval(
                sess,
                get_feed_dict_for_rs(model, test_data, 0, test_data.shape[0]))

            print(
                'epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f     test auc: %.4f  acc: %.4f'
                % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc,
                   test_acc))

            # top-K evaluation
            if show_topk:
                precision, recall, f1 = topk_eval(sess, model, user_list,
                                                  train_record, test_record,
                                                  item_set, k_list)
                print('precision: ', end='')
                for i in precision:
                    print('%.4f\t' % i, end='')
                print()
                print('recall: ', end='')
                for i in recall:
                    print('%.4f\t' % i, end='')
                print()
                print('f1: ', end='')
                for i in f1:
                    print('%.4f\t' % i, end='')
                print('\n')

        # save embedding
        np.savetxt('./model/' + args.dataset + '/vocab/user_emb_matrix.txt',
                   model.user_emb_matrix.eval())
        np.savetxt('./model/' + args.dataset + '/vocab/item_emb_matrix.txt',
                   model.item_emb_matrix.eval())
        np.savetxt('./model/' + args.dataset + '/vocab/entity_emb_matrix.txt',
                   model.entity_emb_matrix.eval())
        np.savetxt(
            './model/' + args.dataset + '/vocab/relation_emb_matrix.txt',
            model.relation_emb_matrix.eval())

        # Model save recovery save/restore method
        saver = tf.train.Saver()
        wts_name = './model/' + args.dataset + '/restore' + "/{}/mkr.ckpt".format(
            export_version)
        saver.save(sess, wts_name)

        # save .pd ,deploy with tensorFlow Serving
        inputs = {
            "user_id": model.user_indices,
            "item_id": model.item_indices,
            "head_id": model.head_indices,
            "is_dropout": model.dropout_param
        }

        outputs = {"ctr_predict": model.scores_normalized}

        export_path = './model/' + args.dataset + '/result'
        signature = tf.saved_model.signature_def_utils.predict_signature_def(
            inputs=inputs, outputs=outputs)

        export_path = os.path.join(tf.compat.as_bytes(export_path),
                                   tf.compat.as_bytes(str(export_version)))
        builder = tf.saved_model.builder.SavedModelBuilder(export_path)
        legacy_init_op = tf.group(tf.tables_initializer(),
                                  name='legacy_init_op')
        builder.add_meta_graph_and_variables(
            sess=sess,
            tags=[tf.saved_model.tag_constants.SERVING],
            signature_def_map={
                'crt_scores': signature,
            },
            legacy_init_op=legacy_init_op)
        builder.save()
Esempio n. 2
0
def train(args, data, show_loss, show_topk):
    n_user, n_item, n_entity, n_relation = data[0], data[1], data[2], data[3]
    train_data, eval_data, test_data = data[4], data[5], data[6]
    kg = data[7]

    model = MKR(args, n_user, n_item, n_entity, n_relation)

    # top-K evaluation settings
    user_num = 100
    k_list = [1, 2, 5, 10, 20, 50, 100]
    train_record = get_user_record(train_data, True)
    test_record = get_user_record(test_data, False)
    user_list = list(set(train_record.keys()) & set(test_record.keys()))
    if len(user_list) > user_num:
        user_list = np.random.choice(user_list, size=user_num, replace=False)
    item_set = set(list(range(n_item)))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(args.n_epochs):
            # RS training
            np.random.shuffle(train_data)
            start = 0
            while start < train_data.shape[0]:
                _, loss = model.train_rs(sess, get_feed_dict_for_rs(model, train_data, start, start + args.batch_size))
                start += args.batch_size
                if show_loss:
                    print(loss)

            # KGE training
            if step % args.kge_interval == 0:
                np.random.shuffle(kg)
                start = 0
                while start < kg.shape[0]:
                    _, rmse = model.train_kge(sess, get_feed_dict_for_kge(model, kg, start, start + args.batch_size))
                    start += args.batch_size
                    if show_loss:
                        print(rmse)

            # CTR evaluation
            train_auc, train_acc = model.eval(sess, get_feed_dict_for_rs(model, train_data, 0, train_data.shape[0]))
            eval_auc, eval_acc = model.eval(sess, get_feed_dict_for_rs(model, eval_data, 0, eval_data.shape[0]))
            test_auc, test_acc = model.eval(sess, get_feed_dict_for_rs(model, test_data, 0, test_data.shape[0]))

            print('epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
                  % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc))

            # top-K evaluation
            if show_topk:
                precision, recall, f1 = topk_eval(
                    sess, model, user_list, train_record, test_record, item_set, k_list)
                print('precision: ', end='')
                for i in precision:
                    print('%.4f\t' % i, end='')
                print()
                print('recall: ', end='')
                for i in recall:
                    print('%.4f\t' % i, end='')
                print()
                print('f1: ', end='')
                for i in f1:
                    print('%.4f\t' % i, end='')
                print('\n')
Esempio n. 3
0
def train(args, data, show_loss, show_topk):
    n_user, n_item, n_entity, n_relation = data[0], data[1], data[2], data[3]
    train_data, eval_data, test_data = data[4], data[5], data[6]
    kg = data[7]
    adj_entity, adj_relation = data[8], data[9]

    model = MKR(args, n_user, n_item, n_entity, n_relation, adj_entity,
                adj_relation)
    # model.load_weights('model_weights')
    # top-K evaluation settings
    user_num = 100
    k_list = [1, 2, 5, 10, 20, 50, 100]
    train_record = get_user_record(train_data, True)
    test_record = get_user_record(test_data, False)
    user_list = list(set(train_record.keys()) & set(test_record.keys()))
    if len(user_list) > user_num:
        user_list = np.random.choice(user_list, size=user_num, replace=False)
    item_set = set(list(range(n_item)))

    for step in range(args.n_epochs):
        # RS training
        np.random.shuffle(train_data)
        start = 0
        optimizers = tf.keras.optimizers.Adam(learning_rate=model.args.lr_rs)
        while start < train_data.shape[0]:
            with tf.GradientTape() as tape:
                _, loss = model.train_rs(
                    get_feed_dict_for_rs(train_data, start,
                                         start + args.batch_size))
                g = tape.gradient(loss, model.trainable_variables)
            # optimizers = tf.keras.optimizers.Adam(learning_rate=model.args.lr_rs)
            optimizers.apply_gradients(
                grads_and_vars=zip(g, model.trainable_variables))
            # _,loss=model.train_rs (get_feed_dict_for_rs(train_data, start, start + args.batch_size))
            start += args.batch_size
            if show_loss:
                print(loss)

        if step % args.kge_interval == 0:
            np.random.shuffle(kg)
            start = 0
            optimizers = tf.keras.optimizers.Adam(
                learning_rate=model.args.lr_kge)
            while start < kg.shape[0]:
                with tf.GradientTape() as tape:
                    loss, rmse = model.train_kge(
                        get_feed_dict_for_kge(kg, start,
                                              start + args.batch_size))
                    g = tape.gradient(loss, model.trainable_variables)
                # optimizers = tf.keras.optimizers.Adam(learning_rate=model.args.lr_kge)
                optimizers.apply_gradients(zip(g, model.trainable_variables))
                # _, rmse = model.train_kge(get_feed_dict_for_kge(kg, start, start + args.batch_size))
                start += args.batch_size
                if show_loss:
                    print(rmse)

    # CTR evaluation
    #     train_auc, train_acc = model.eval(get_feed_dict_for_rs(train_data, 0, train_data.shape[0]))
    #     eval_auc, eval_acc = model.eval(get_feed_dict_for_rs(eval_data, 0, eval_data.shape[0]))
    #     test_auc, test_acc = model.eval(get_feed_dict_for_rs(test_data, 0, test_data.shape[0]))
        train_auc, train_acc = batch_eval(model, train_data, args.batch_size)
        eval_auc, eval_acc = batch_eval(model, eval_data, args.batch_size)
        test_auc, test_acc = batch_eval(model, test_data, args.batch_size)

        print(
            'epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
            % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc,
               test_acc))

    model.save_weights('model_weights')
Esempio n. 4
0
def train(args, data, show_loss, show_topk):
    n_user, n_item, n_entity, n_relation = data[0], data[1], data[2], data[3]
    train_data, eval_data, test_data = data[4], data[5], data[6]
    kg = data[7]

    model = MKR(args, n_user, n_item, n_entity, n_relation)

    # top-K evaluation settings
    user_num = 100
    k_list = [1, 2, 5, 10, 20, 50, 100]
    train_record = get_user_record(train_data, True)
    test_record = get_user_record(test_data, False)
    user_list = list(set(train_record.keys()) & set(test_record.keys()))
    if len(user_list) > user_num:
        user_list = np.random.choice(user_list, size=user_num, replace=False)
    item_set = set(list(range(n_item)))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(args.n_epochs):
            # RS training
            np.random.shuffle(train_data)
            start = 0
            while start < train_data.shape[0]:
                a = get_feed_dict_for_rs(model, train_data, start, start + args.batch_size)
                print(a)
                _, loss = model.train_rs(sess, get_feed_dict_for_rs(model, train_data, start, start + args.batch_size))
                start += args.batch_size
                if show_loss:
                    print(loss)

            # KGE training
            if step % args.kge_interval == 0:
                np.random.shuffle(kg)
                start = 0
                while start < kg.shape[0]:
                    _, rmse = model.train_kge(sess, get_feed_dict_for_kge(model, kg, start, start + args.batch_size))
                    start += args.batch_size
                    if show_loss:
                        print(rmse)

            # CTR evaluation
            train_auc, train_acc = model.eval(sess, get_feed_dict_for_rs(model, train_data, 0, train_data.shape[0]))
            eval_auc, eval_acc = model.eval(sess, get_feed_dict_for_rs(model, eval_data, 0, eval_data.shape[0]))
            test_auc, test_acc = model.eval(sess, get_feed_dict_for_rs(model, test_data, 0, test_data.shape[0]))

            print('epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
                  % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc))

            # top-K evaluation
            if show_topk:
                precision, recall, f1 = topk_eval(
                    sess, model, user_list, train_record, test_record, item_set, k_list)
                print('precision: ', end='')
                for i in precision:
                    print('%.4f\t' % i, end='')
                print()
                print('recall: ', end='')
                for i in recall:
                    print('%.4f\t' % i, end='')
                print()
                print('f1: ', end='')
                for i in f1:
                    print('%.4f\t' % i, end='')
                print('\n')
Esempio n. 5
0
def train(args, data, show_loss, show_topk):
    n_user, n_item, n_entity, n_relation = data[0], data[1], data[2], data[3]
    train_data, eval_data, test_data = data[4], data[5], data[6]
    kg = data[7]
    user_set = data[8]
    user_item_dict = data[9]

    BASELINE_OUTPUT_FILE = 'baseline_output.txt'
    OUTPUT_PATH = os.path.join('..', 'data', args.dataset,
                               BASELINE_OUTPUT_FILE)
    model = MKR(args, n_user, n_entity, n_entity, n_relation)

    # top-K evaluation settings
    user_num = 100
    k_list = [1, 2, 5, 10, 20, 50, 100]
    train_record = get_user_record(train_data, True)
    test_record = get_user_record(test_data, False)
    user_list = list(set(train_record.keys()) & set(test_record.keys()))
    if len(user_list) > user_num:
        user_list = np.random.choice(user_list, size=user_num, replace=False)
    item_set = set(list(range(n_item)))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(args.n_epochs):
            # RS training
            np.random.shuffle(train_data)
            start = 0
            while start < train_data.shape[0]:
                _, loss = model.train_rs(
                    sess,
                    get_feed_dict_for_rs(model, train_data, start,
                                         start + args.batch_size))
                start += args.batch_size
                if show_loss:
                    print(loss)

            # KGE training
            if step % args.kge_interval == 0:
                np.random.shuffle(kg)
                start = 0
                while start < kg.shape[0]:
                    _, rmse = model.train_kge(
                        sess,
                        get_feed_dict_for_kge(model, kg, start,
                                              start + args.batch_size))
                    start += args.batch_size
                    if show_loss:
                        print(rmse)

            # CTR evaluation
            train_auc, train_acc = model.eval(
                sess,
                get_feed_dict_for_rs(model, train_data, 0,
                                     train_data.shape[0]))
            eval_auc, eval_acc = model.eval(
                sess,
                get_feed_dict_for_rs(model, eval_data, 0, eval_data.shape[0]))
            test_auc, test_acc = model.eval(
                sess,
                get_feed_dict_for_rs(model, test_data, 0, test_data.shape[0]))

            print(
                'epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
                % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc,
                   test_acc))

            # top-K evaluation
            if show_topk:
                precision, recall, f1 = topk_eval(sess, model, user_list,
                                                  train_record, test_record,
                                                  item_set, k_list)
                print('precision: ', end='')
                for i in precision:
                    print('%.4f\t' % i, end='')
                print()
                print('recall: ', end='')
                for i in recall:
                    print('%.4f\t' % i, end='')
                print()
                print('f1: ', end='')
                for i in f1:
                    print('%.4f\t' % i, end='')
                print('\n')

        user_list = list(user_set)

        with open(OUTPUT_PATH, 'w') as writer:
            for user in user_list:

                test_item_list = list(user_item_dict[user])
                items, scores = model.get_scores(
                    sess, {
                        model.user_indices: [user] * len(test_item_list),
                        model.item_indices: test_item_list,
                        model.head_indices: test_item_list
                    })
                for item, score in zip(items, scores):
                    writer.write('%d\t%d\t%f\n' % (user, item, score))

        writer.close()
Esempio n. 6
0
def train(args, rs_dataset, kg_dataset):

    show_loss = args.show_loss
    show_topk = args.show_topk

    # Get RS data
    n_user = rs_dataset.n_user
    n_item = rs_dataset.n_item
    train_data, eval_data, test_data = rs_dataset.data
    train_indices, eval_indices, test_indices = rs_dataset.indices

    # Get KG data
    n_entity = kg_dataset.n_entity
    n_relation = kg_dataset.n_relation
    kg = kg_dataset.kg

    # Init train sampler
    train_sampler = SubsetRandomSampler(train_indices)

    # Init MKR model
    model = MKR(args, n_user, n_item, n_entity, n_relation)

    # Init Sumwriter
    writer = SummaryWriter(args.summary_path)

    # Top-K evaluation settings
    user_num = 100
    k_list = [1, 2, 5, 10, 20, 50, 100]
    train_record = get_user_record(train_data, True)
    test_record = get_user_record(test_data, False)
    user_list = list(set(train_record.keys()) & set(test_record.keys()))
    if len(user_list) > user_num:
        user_list = np.random.choice(user_list, size=user_num, replace=False)
    item_set = set(list(range(n_item)))
    step = 0
    for epoch in range(args.n_epochs):
        print("Train RS")
        train_loader = DataLoader(rs_dataset, batch_size=args.batch_size,
                                  num_workers=args.workers, sampler=train_sampler)
        for i, rs_batch_data in enumerate(train_loader):
            loss, base_loss_rs, l2_loss_rs = model.train_rs(rs_batch_data)
            writer.add_scalar("rs_loss", loss.cpu().detach().numpy(), global_step=step)
            writer.add_scalar("rs_base_loss", base_loss_rs.cpu().detach().numpy(), global_step=step)
            writer.add_scalar("rs_l2_loss", l2_loss_rs.cpu().detach().numpy(), global_step=step)
            step += 1
            if show_loss:
                print(loss)

        if epoch % args.kge_interval == 0:
            print("Train KGE")
            kg_train_loader = DataLoader(kg_dataset, batch_size=args.batch_size,
                                         num_workers=args.workers, shuffle=True)
            for i, kg_batch_data in enumerate(kg_train_loader):
                rmse, loss_kge, base_loss_kge, l2_loss_kge = model.train_kge(kg_batch_data)
                writer.add_scalar("kge_rmse_loss", rmse.cpu().detach().numpy(), global_step=step)
                writer.add_scalar("kge_loss", loss_kge.cpu().detach().numpy(), global_step=step)
                writer.add_scalar("kge_base_loss", base_loss_kge.cpu().detach().numpy(), global_step=step)
                writer.add_scalar("kge_l2_loss", l2_loss_kge.cpu().detach().numpy(), global_step=step)
                step += 1
                if show_loss:
                    print(rmse)


        # CTR evaluation
        train_auc, train_acc = model.eval(train_data)
        eval_auc, eval_acc = model.eval(eval_data)
        test_auc, test_acc = model.eval(test_data)

        print('epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
              % (epoch, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc))

        # top-K evaluation
        if show_topk:
            precision, recall, f1 = model.topk_eval(user_list, train_record, test_record, item_set, k_list)
            print('precision: ', end='')
            for i in precision:
                print('%.4f\t' % i, end='')
            print()
            print('recall: ', end='')
            for i in recall:
                print('%.4f\t' % i, end='')
            print()
            print('f1: ', end='')
            for i in f1:
                print('%.4f\t' % i, end='')
            print('\n')