import os from scipy.stats import logistic from entity.model_e2v_ntm import build_ntm_model import tensorflow as tf flag = "ntm_model.new_trained.freq=100.word=22548" conf = Config(flag, "prod", 200) # wrong! # model = np.load(conf.path_model_npy + ".npy") # word_embed = model[0] # prod_embed = model[1] # transfer_w = model[2] # transfer_b = model[3] dp = DataProvider(conf) print('Start loading data') dp = DataProvider(conf) print('Data load complete') model, word_embed, item_embed = build_ntm_model(conf, dp) print('Start loading model weights') model.load_weights(conf.path_checkpoint) print('Loading model weights complete') prod_embed = model.weights[0] transfer_w = model.weights[1] transfer_b = model.weights[2] word_embed = model.weights[3] init_op = tf.initialize_all_variables() sess = tf.InteractiveSession()
os.environ['GOTO_NUM_THREADS'] = str(n_processer) os.environ['OMP_NUM_THREADS'] = str(n_processer) # os.environ['THEANO_FLAGS'] = 'device=gpu,blas.ldflags=-lblas -lgfortran' os.environ['THEANO_FLAGS'] = 'device=gpu' os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 1 session = tf.Session(config=config) K.set_session(session) with tf.device('/gpu:0'): # get data dp = DataProvider(conf) model, word_embed, item_embed = build_ntm_model(conf, dp) print(model.summary()) # target = np.array([9999] * len(word_data)) # useless since loss function make it times with 0 if os.path.exists(conf.path_checkpoint): print("load previous checker") # model.load_weights(conf.path_checker) # model.fit( # {"word_idx": word_data, "item_pos_idx": item_pos_data, "item_neg_idx": item_neg_data}, # {"merge_layer": target, "pos_layer": target}, # batch_size=conf.batch_size, nb_epoch=conf.n_epoch, validation_split=0.1, # callbacks=[my_checker_point(item_embed, word_embed, model, conf), # # my_value_checker([word_embed_, item_pos_embed_, item_neg_embed_, pos_layer_, neg_layer_, merge_layer_]),
print(args) flag = args[1] n_processer = int(args[4]) import os config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True) config.gpu_options.allow_growth = True config.gpu_options.allocator_type = 'BFC' session = tf.Session(config=config) K.set_session(session) conf = Config(flag, args[2], int(args[3])) print(flag) # get data dp = DataProvider(conf) if os.path.exists(conf.path_checkpoint): print("load previous checker") dp.generate_init() model, item_embed, word_embed = build_doc2vec_model(conf,dp) model.fit_generator(generator=dp.generate_data(batch_size=conf.batch_size, is_validate=False), nb_worker=1, pickle_safe=False, nb_epoch=conf.n_epoch, steps_per_epoch=int(np.ceil(conf.sample_per_epoch/conf.batch_size)), validation_data = dp.generate_data(batch_size=conf.batch_size, is_validate=True), validation_steps=1, verbose=1, callbacks=[ my_checker_point(item_embed, word_embed, model, conf), ModelCheckpoint(filepath=conf.path_checkpoint, verbose=1, save_best_only=True) ])
from config import Config from entity.data import DataProvider from gensim.models import KeyedVectors import numpy as np import os flag = "ntm_model.new_trained.freq=100.word=22548" conf = Config(flag, "tag" , 200) if not os.path.exists(conf.path_word_w2c) and not os.path.exists(conf.path_doc_w2c): doc_embed = np.load(conf.path_doc_npy + ".npy")[0] dp = DataProvider(conf) # generate doc embedding file f = open(conf.path_doc_w2c,"w") f.write(str(len(dp.idx2prod))) f.write(" ") f.write(str(conf.dim_item)) f.write("\n") idx = 0 batch = "" for word in dp.idx2prod: batch = "".join([batch, word]) batch = "".join([batch, " "]) for i in range(conf.dim_item): batch = "".join([batch, str(doc_embed[idx][i])]) batch = "".join([batch, " "]) batch = "".join([batch, "\n"]) idx += 1