def get_data(self,begin,batch_size,dssmDim): input_vids = [x.vids for x in self.X[begin:begin+batch_size]] input_title = sum([x.title for x in self.X[begin:begin+batch_size]],[]) input_title = TrainingData.toSparseTensorValue(input_title,dssmDim) output_vids = [y.vids for y in self.Y[begin:begin+batch_size]] output_title = sum([y.title for y in self.Y[begin:begin+batch_size]],[]) #print 'output_title size ', len(output_title) #print 'output_title size ',np.shape(output_title) output_title = TrainingData.toSparseTensorValue(output_title,dssmDim) return (input_vids,input_title,output_vids,output_title)
FLAGS = flags.FLAGS flags.DEFINE_string('summaries_dir', '/tmp/dssm-400-120-relu', 'Summaries directory') flags.DEFINE_float('learning_rate', 0.1, 'Initial learning rate.') flags.DEFINE_integer('max_steps', 900000, 'Number of steps to run trainer.') #flags.DEFINE_integer('epoch_steps', 18000, "Number of steps in one epoch.") #flags.DEFINE_integer('pack_size', 2000, "Number of batches in one pickle pack.") flags.DEFINE_bool('gpu', 1, "Enable GPU or not") flags.DEFINE_string('testdata', '../data/test', "Test Data path") #flags.DEFINE_string('traindata','../data/train',"Training data path") flags.DEFINE_string('traindata', '../data/test', "Training data path") # load training data for now start = time.time() test_data = TrainingData() test_data.load_data('{}.queryvec'.format(FLAGS.testdata), '{}.docvec'.format(FLAGS.testdata)) train_data = TrainingData() train_data.load_data('{}.queryvec'.format(FLAGS.traindata), '{}.docvec'.format(FLAGS.traindata)) end = time.time() print("Loading data from HDD to memory: %.2fs" % (end - start)) TRIGRAM_D = 7415 NEG = 50 BS = 512
import tensorflow as tf from tqdm import tqdm from datautil import TrainingData parser = argparse.ArgumentParser() parser.add_argument('--model', type=str, help="input model file", default=None) parser.add_argument('--dir', type=str, help="input model file", default=None) args = parser.parse_args() if args.model is None: raise ValueError model_path = args.model path = args.dir # load training data for now start = time.time() print 'Start to loading test data ' test_data = TrainingData() test_data.load_data(path + '/query.test', path + '/docvec.test') end = time.time() print("Loading data from HDD to memory: %.2fs" % (end - start)) # ---------------------config-------------------- TRIGRAM_D = 8710 NEG = 50 BS = 512 L1_N = 256 L2_N = 128 batch_num = test_data.size()/BS querynum = 10