Beispiel #1
0
import os
from scipy.stats import logistic
from entity.model_e2v_ntm import build_ntm_model
import tensorflow as tf

flag = "ntm_model.new_trained.freq=100.word=22548"
conf = Config(flag, "prod", 200)

# wrong!
# model = np.load(conf.path_model_npy + ".npy")
# word_embed = model[0]
# prod_embed = model[1]
# transfer_w = model[2]
# transfer_b = model[3]

dp = DataProvider(conf)

print('Start loading data')
dp = DataProvider(conf)
print('Data load complete')
model, word_embed, item_embed = build_ntm_model(conf, dp)
print('Start loading model weights')
model.load_weights(conf.path_checkpoint)
print('Loading model weights complete')
prod_embed = model.weights[0]
transfer_w = model.weights[1]
transfer_b = model.weights[2]
word_embed = model.weights[3]

init_op = tf.initialize_all_variables()
sess = tf.InteractiveSession()
Beispiel #2
0
    os.environ['GOTO_NUM_THREADS'] = str(n_processer)
    os.environ['OMP_NUM_THREADS'] = str(n_processer)
    # os.environ['THEANO_FLAGS'] = 'device=gpu,blas.ldflags=-lblas -lgfortran'
    os.environ['THEANO_FLAGS'] = 'device=gpu'
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 1
    session = tf.Session(config=config)
    K.set_session(session)

    with tf.device('/gpu:0'):
        # get data
        dp = DataProvider(conf)
        model, word_embed, item_embed = build_ntm_model(conf, dp)

        print(model.summary())

        # target = np.array([9999] * len(word_data)) # useless since loss function make it times with 0
        if os.path.exists(conf.path_checkpoint):
            print("load previous checker")
            # model.load_weights(conf.path_checker)

        # model.fit(
        #     {"word_idx": word_data, "item_pos_idx": item_pos_data, "item_neg_idx": item_neg_data},
        #     {"merge_layer": target, "pos_layer": target},
        #     batch_size=conf.batch_size, nb_epoch=conf.n_epoch, validation_split=0.1,
        #     callbacks=[my_checker_point(item_embed, word_embed, model, conf),
        #                # my_value_checker([word_embed_, item_pos_embed_, item_neg_embed_, pos_layer_, neg_layer_, merge_layer_]),
Beispiel #3
0
    print(args)
    flag = args[1]
    n_processer = int(args[4])

    import os
    config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.allocator_type = 'BFC'
    session = tf.Session(config=config)
    K.set_session(session)

    conf = Config(flag, args[2], int(args[3]))
    print(flag)

    # get data
    dp = DataProvider(conf)

    if os.path.exists(conf.path_checkpoint):
        print("load previous checker")


    dp.generate_init()
    model, item_embed, word_embed = build_doc2vec_model(conf,dp)
    model.fit_generator(generator=dp.generate_data(batch_size=conf.batch_size, is_validate=False), nb_worker=1, pickle_safe=False,
                        nb_epoch=conf.n_epoch, steps_per_epoch=int(np.ceil(conf.sample_per_epoch/conf.batch_size)),
                        validation_data = dp.generate_data(batch_size=conf.batch_size, is_validate=True), validation_steps=1,
                        verbose=1, callbacks=[
                            my_checker_point(item_embed, word_embed, model, conf),
                            ModelCheckpoint(filepath=conf.path_checkpoint, verbose=1, save_best_only=True)
                        ])
Beispiel #4
0
from config import Config
from entity.data import DataProvider
from gensim.models import KeyedVectors
import numpy as np
import os

flag = "ntm_model.new_trained.freq=100.word=22548"
conf = Config(flag, "tag" , 200)

if not os.path.exists(conf.path_word_w2c) and not os.path.exists(conf.path_doc_w2c):
    doc_embed = np.load(conf.path_doc_npy + ".npy")[0]
    dp = DataProvider(conf)

    # generate doc embedding file
    f = open(conf.path_doc_w2c,"w")
    f.write(str(len(dp.idx2prod)))
    f.write(" ")
    f.write(str(conf.dim_item))
    f.write("\n")
    idx = 0
    batch = ""
    for word in dp.idx2prod:
        batch = "".join([batch, word])
        batch = "".join([batch, " "])

        for i in range(conf.dim_item):
            batch = "".join([batch, str(doc_embed[idx][i])])
            batch = "".join([batch, " "])

        batch = "".join([batch, "\n"])
        idx += 1