def __init__(self, args, word_dict, char_dict): self.args = args self.word_dict = word_dict self.char_dict = char_dict self.network = DSSM(args, word_dict, char_dict) if args.cuda: self.network.cuda() self.optimizer = optim.Adamax(self.network.parameters(), weight_decay=0)
class CrossModal(nn.Module): def __init__(self, vocab_size=250000, embed_size=128, hidden_size=512, pretrain_path=None): super(CrossModal, self).__init__() # image resnet = models.resnet18(pretrained=True) modules = list(resnet.children())[:-1] self.resnet = nn.Sequential(*modules) self.resnet_linear = nn.Linear(resnet.fc.in_features, hidden_size) self.resnet_bn = nn.BatchNorm1d(hidden_size, momentum=0.01) # text self.dssm = DSSM(vocab_size=vocab_size) self.dssm.load_state_dict(torch.load(pretrain_path)) self.dssm_linear = nn.Linear(embed_size, hidden_size) self.dssm_bn = nn.BatchNorm1d(hidden_size, momentum=0.01) # Function self.tanh = nn.Tanh() def forward(self, query, pos_img, neg_img): #with torch.no_grad(): text_feature = self.dssm.predict(query) pos_img_feature = self.resnet(pos_img) neg_img_feature = self.resnet(neg_img) text_feature = self.tanh(self.dssm_bn(self.dssm_linear(text_feature))) pos_img_feature = pos_img_feature.reshape(pos_img_feature.size(0), -1) pos_img_feature = self.tanh( self.resnet_bn(self.resnet_linear(pos_img_feature))) neg_img_feature = neg_img_feature.reshape(neg_img_feature.size(0), -1) neg_img_feature = self.tanh( self.resnet_bn(self.resnet_linear(neg_img_feature))) left = torch.cosine_similarity(text_feature, pos_img_feature) right = torch.cosine_similarity(text_feature, neg_img_feature) return left, right def query_emb(self, query): text_feature = self.dssm.predict(query) text_feature = self.tanh(self.dssm_bn(self.dssm_linear(text_feature))) return text_feature def img_emb(self, pos_img): pos_img_feature = self.resnet(pos_img) pos_img_feature = pos_img_feature.reshape(pos_img_feature.size(0), -1) pos_img_feature = self.tanh( self.resnet_bn(self.resnet_linear(pos_img_feature))) return pos_img_feature
def predict(data_params): meta_path = "./model/dssm.ckpt.meta" ckpt_path = "./model/dssm.ckpt" data_file = "./data/train.txt.10" dssm = DSSM() data_iterator = DataIterator(data_params) iterator = data_iterator.input_fn(data_file) # config with tf.Session() as sess: saver = tf.train.import_meta_graph(meta_path) saver.restore(sess, ckpt_path) sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) s = time.time() while True: try: (query_features, creative_ids, labels) = iterator.get_next() (batch_query, batch_creative_ids, batch_labels) = sess.run( [query_features, creative_ids, labels]) prediction = sess.run(dssm.score, feed_dict={ dssm.query: batch_query, dssm.doc: batch_creative_ids }) print(prediction) except tf.errors.OutOfRangeError: break e = time.time() # 平均每条 0.0001s print(e - s)
def train(): dssm = DSSM() with tf.Session() as sess: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) iterator = data_iterator.input_fn(data_file) sess.run(iterator.initializer) while True: try: (query_features, creative_ids, labels) = iterator.get_next() (batch_query, batch_creative_ids, batch_labels) = sess.run( [query_features, creative_ids, labels]) # print(sess.run([query_features, creative_ids, labels])) # print('loss:', sess.run(dssm.loss, feed_dict={dssm.query : batch_query, dssm.doc : batch_creative_ids, dssm.label : batch_labels})) sess.run(dssm.train_step, feed_dict={ dssm.query: batch_query, dssm.doc: batch_creative_ids, dssm.label: batch_labels }) print( 'score:', sess.run(dssm.score, feed_dict={ dssm.query: batch_query, dssm.doc: batch_creative_ids })) except tf.errors.OutOfRangeError: break saver.save(sess, model_path)
def train(): dssm = DSSM() with tf.Session() as sess: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) for i in range(FLAGS.epoch): train_raw_data = data_utils.load_all_dataset("0107") if train_raw_data is None: continue epoch_steps = int(len(train_raw_data) / FLAGS.batch_size) for step in range(epoch_steps): query_batch, doc_batch, label_batch = get_batch_data( step, FLAGS.batch_size, train_raw_data) # print(query_batch) #print('label:', label_batch) print( 'loss:', sess.run(dssm.loss, feed_dict={ dssm.query: query_batch, dssm.doc: doc_batch, dssm.label: label_batch })) # print('score:', sess.run(dssm.score, feed_dict={dssm.query : query_batch, dssm.doc : doc_batch})) sess.run(dssm.train_step, feed_dict={ dssm.query: query_batch, dssm.doc: doc_batch, dssm.label: label_batch }) saver.save(sess, model_path)
def debug(): query, doc, Y = fake_train_data() dssm = DSSM() with tf.Session() as sess: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) for i in range(len(Y)): q = query[i:i + 1] d = doc[i:i + 1] label = Y[i:i + 1] print('query:', sess.run(dssm.query, feed_dict={dssm.query: q})) print('doc:', sess.run(dssm.doc, feed_dict={dssm.doc: d})) print('label:', sess.run(dssm.label, feed_dict={dssm.label: label})) # embedding table print('embedding:', sess.run(dssm.embedding)) # debug query print('query_embedding:', sess.run(dssm.query_embeddings, feed_dict={dssm.query: q})) print('query_flatten:', sess.run(dssm.query_flatten, feed_dict={dssm.query: q})) # debug doc print('doc_embedding:', sess.run(dssm.doc, feed_dict={dssm.doc: d})) print('doc_flatten:', sess.run(dssm.doc_flatten, feed_dict={dssm.doc: d})) # debug dense layer print('query_layer_1_out:', sess.run(dssm.query_layer_1_out, feed_dict={dssm.query: q})) print('doc_layer_1_out:', sess.run(dssm.doc_layer_1_out, feed_dict={dssm.doc: d})) # debug cosine_similarity, score, loss print( 'cosine_similarity:', sess.run(dssm.cosine_similarity, feed_dict={ dssm.query: q, dssm.doc: d })) print('score:', sess.run(dssm.score, feed_dict={ dssm.query: q, dssm.doc: d })) print( 'loss:', sess.run(dssm.loss, feed_dict={ dssm.query: q, dssm.doc: d, dssm.label: label }))
def __init__(self, vocab_size=250000, embed_size=128, hidden_size=512, pretrain_path=None): super(CrossModal, self).__init__() # image resnet = models.resnet18(pretrained=True) modules = list(resnet.children())[:-1] self.resnet = nn.Sequential(*modules) self.resnet_linear = nn.Linear(resnet.fc.in_features, hidden_size) self.resnet_bn = nn.BatchNorm1d(hidden_size, momentum=0.01) # text self.dssm = DSSM(vocab_size=vocab_size) self.dssm.load_state_dict(torch.load(pretrain_path)) self.dssm_linear = nn.Linear(embed_size, hidden_size) self.dssm_bn = nn.BatchNorm1d(hidden_size, momentum=0.01) # Function self.tanh = nn.Tanh()
class TMmodel(object): """ """ def __init__(self, args, word_dict, char_dict): self.args = args self.word_dict = word_dict self.char_dict = char_dict self.network = DSSM(args, word_dict, char_dict) if args.cuda: self.network.cuda() self.optimizer = optim.Adamax(self.network.parameters(), weight_decay=0) def update(self, ex): self.network.train() if self.args.cuda: inputs = [ e if e is None else Variable(e.cuda(async=True)) for e in ex[1:9] ] label = Variable(ex[0].cuda(async=True))
def main(cfg): set_seed(7) file_num = cfg.filenum cfg.result_path = './result/' print('load dict') news_dict = json.load( open('./{}/news.json'.format(cfg.root), 'r', encoding='utf-8')) cfg.news_num = len(news_dict) print('load words dict') word_dict = json.load( open('./{}/word.json'.format(cfg.root), 'r', encoding='utf-8')) cfg.word_num = len(word_dict) if cfg.model == 'dssm': model = DSSM(cfg) elif cfg.model == 'gru': model = GRURec(cfg) saved_model_path = os.path.join('./checkpoint/', 'model.ep{0}'.format(cfg.epoch)) print("Load from:", saved_model_path) if not os.path.exists(saved_model_path): print("Not Exist: {}".format(saved_model_path)) return [] model.cpu() pretrained_model = torch.load(saved_model_path, map_location='cpu') print(model.load_state_dict(pretrained_model, strict=False)) for point_num in range(file_num): print("processing {}/raw/test-{}.npy".format(cfg.root, point_num)) valid_dataset = FMData( np.load("{}/raw/test-{}.npy".format(cfg.root, point_num))) dataset_list = split_dataset(valid_dataset, cfg.gpus) processes = [] for rank in range(cfg.gpus): cur_device = torch.device("cuda:{}".format(rank)) p = mp.Process(target=run, args=(cfg, rank, dataset_list[rank], cur_device, model)) p.start() processes.append(p) for p in processes: p.join() gather(cfg, point_num) gather_all(cfg.result_path, file_num, validate=True, save=True)
nwords = dataset._vocab_size trainData, evalData = dataset.dataGen() train_epoch_steps = int(len(trainData) / Config.batchSize) - 1 eval_epoch_steps = int(len(evalData) / Config.batchSize) - 1 # 定义计算图 with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, device_count={"CPU": 78}) sess = tf.Session(config=session_conf) # 定义会话 with sess.as_default(): dssm = DSSM(config, nwords) globalStep = tf.Variable(0, name="globalStep", trainable=False) # 定义优化函数,传入学习速率参数 optimizer = tf.train.AdamOptimizer(config.learningRate) # 计算梯度,得到梯度和变量 gradsAndVars = optimizer.compute_gradients(dssm.losses) # 将梯度应用到变量下,生成训练器 trainOp = optimizer.apply_gradients(gradsAndVars, global_step=globalStep) # 用summary绘制tensorBoard gradSummaries = [] for g, v in gradsAndVars: if g is not None: tf.summary.histogram("{}/grad/hist".format(v.name), g)
dictionary = data.Dictionary() train_corpus = data.Corpus(args.data, 'session_train.txt', dictionary) print('train set size = ', len(train_corpus.data)) print('vocabulary size = ', len(dictionary)) dev_corpus = data.Corpus(args.data, 'session_dev.txt', dictionary, is_test_corpus=True) print('dev set size = ', len(dev_corpus.data)) # save the dictionary object to use during testing helper.save_object(dictionary, args.save_path + 'dictionary.p') # ############################################################################### # # Build the model # ############################################################################### model = DSSM(dictionary, args) optimizer = optim.SGD(model.parameters(), args.lr) best_loss = -1 param_dict = helper.count_parameters(model) print('Number of trainable parameters = ', numpy.sum(list(param_dict.values()))) # for training on multiple GPUs. use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use if 'CUDA_VISIBLE_DEVICES' in os.environ: cuda_visible_devices = [int(x) for x in os.environ['CUDA_VISIBLE_DEVICES'].split(',')] if len(cuda_visible_devices) > 1: model = torch.nn.DataParallel(model, device_ids=cuda_visible_devices) if args.cuda: model = model.cuda() if args.resume:
shuffle=False, drop_last=False, collate_fn=my_collate_fn) x_raw_path = save_path / 'x_raw.pkl' kmeans_path = save_path / 'kmeans.pkl' # Check for cached clustering results if x_raw_path.exists() and kmeans_path.exists(): print('Loading precomputed clustering') with open(x_raw_path, 'rb') as f: x_raw = pickle.load(f) with open(kmeans_path, 'rb') as f: kmeans = pickle.load(f) else: print('Clustering') model = DSSM(**model_kwargs) model.eval() model.load_state_dict( torch.load(experiment_path_base / experiment_name / 'best_model.pth')) embeds = [] with torch.no_grad(): for (s, s_prime), (_, _) in tqdm(train_dataloader): if isinstance(model, DSSM): embeds.append(model.phi2(s_prime - s).numpy()) else: embeds.append( model.phi2(model.embed(s_prime) - model.embed(s)).numpy()) x_raw = np.concatenate(embeds) kmeans = KMeans(n_clusters=n_clusters, verbose=0, random_state=42)
import os import time import tensorflow as tf from dssm import DSSM import data_utils # saver = tf.train.Saver() batch_size = 100 # config sess = tf.Session() dssm = DSSM() meta_path = "./model/dssm.ckpt.meta" ckpt_path = "./model/dssm.ckpt" saver = tf.train.import_meta_graph(meta_path) saver.restore(sess, ckpt_path) # graph = tf.get_default_graph() sess.run(tf.global_variables_initializer()) def get_batch_data(step, batch_size, raw_data): start = step * batch_size end = (step + 1) * batch_size
map = map / num_batches ndcg_1 = ndcg_1 / num_batches ndcg_3 = ndcg_3 / num_batches ndcg_10 = ndcg_10 / num_batches print('MAP - ', map) print('NDCG@1 - ', ndcg_1) print('NDCG@3 - ', ndcg_3) print('NDCG@10 - ', ndcg_10) if __name__ == "__main__": # Load the saved pre-trained model dictionary = helper.load_object(args.save_path + 'dictionary.p') model = DSSM(dictionary, args) if 'CUDA_VISIBLE_DEVICES' in os.environ: cuda_visible_devices = [ int(x) for x in os.environ['CUDA_VISIBLE_DEVICES'].split(',') ] if len(cuda_visible_devices) > 1: model = torch.nn.DataParallel(model, device_ids=cuda_visible_devices) if args.cuda: model = model.cuda() helper.load_model_states_from_checkpoint( model, os.path.join(args.save_path, 'model_best.pth.tar'), 'state_dict') print('Model and dictionary loaded.')
def run(cfg, rank, device, finished, train_dataset_path, valid_dataset): """ train and evaluate :param args: config :param rank: process id :param device: device :param train_dataset: dataset instance of a process :return: """ set_seed(7) print("Worker %d is setting dataset ... " % rank) # Build Dataloader train_dataset = FMData(np.load(train_dataset_path)) train_data_loader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, drop_last=True) valid_data_loader = DataLoader(valid_dataset, batch_size=cfg.batch_size, shuffle=False) # # Build model. if cfg.model == 'dssm': model = DSSM(cfg) elif cfg.model == 'gru': model = GRURec(cfg) else: raise Exception('model error') model.to(device) # Build optimizer. steps_one_epoch = len(train_data_loader) train_steps = cfg.epoch * steps_one_epoch print("Total train steps: ", train_steps) optimizer = torch.optim.Adam(params=model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay) print("Worker %d is working ... " % rank) # Fast check the validation process if (cfg.gpus < 2) or (cfg.gpus > 1 and rank == 0): validate(cfg, -1, model, device, rank, valid_data_loader, fast_dev=True) logging.warning(model) gather_all(cfg.result_path, 1, validate=True, save=False) # Training and validation for epoch in range(cfg.epoch): # print(model.match_prediction_layer.state_dict()['2.bias']) train(cfg, epoch, rank, model, train_data_loader, optimizer, steps_one_epoch, device) validate(cfg, epoch, model, device, rank, valid_data_loader) # add finished count finished.value += 1 if (cfg.gpus < 2) or (cfg.gpus > 1 and rank == 0): save_checkpoint_by_epoch(model.state_dict(), epoch, cfg.checkpoint_path) while finished.value < cfg.gpus: time.sleep(1) gather_all(cfg.result_path, cfg.gpus, validate=True, save=False) finished.value = 0