Exemple #1
0
 def get_data(self,begin,batch_size,dssmDim):
     input_vids = [x.vids for x in self.X[begin:begin+batch_size]]
     input_title = sum([x.title for x in self.X[begin:begin+batch_size]],[])
     input_title = TrainingData.toSparseTensorValue(input_title,dssmDim)
     output_vids = [y.vids for y in self.Y[begin:begin+batch_size]]
     output_title = sum([y.title for y in self.Y[begin:begin+batch_size]],[])
     #print 'output_title size ', len(output_title)
     #print 'output_title size ',np.shape(output_title)
     output_title = TrainingData.toSparseTensorValue(output_title,dssmDim)
     return (input_vids,input_title,output_vids,output_title)
Exemple #2
0
FLAGS = flags.FLAGS

flags.DEFINE_string('summaries_dir', '/tmp/dssm-400-120-relu',
                    'Summaries directory')
flags.DEFINE_float('learning_rate', 0.1, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 900000, 'Number of steps to run trainer.')
#flags.DEFINE_integer('epoch_steps', 18000, "Number of steps in one epoch.")
#flags.DEFINE_integer('pack_size', 2000, "Number of batches in one pickle pack.")
flags.DEFINE_bool('gpu', 1, "Enable GPU or not")
flags.DEFINE_string('testdata', '../data/test', "Test Data path")
#flags.DEFINE_string('traindata','../data/train',"Training data path")
flags.DEFINE_string('traindata', '../data/test', "Training data path")

# load training data for now
start = time.time()
test_data = TrainingData()
test_data.load_data('{}.queryvec'.format(FLAGS.testdata),
                    '{}.docvec'.format(FLAGS.testdata))

train_data = TrainingData()
train_data.load_data('{}.queryvec'.format(FLAGS.traindata),
                     '{}.docvec'.format(FLAGS.traindata))

end = time.time()
print("Loading data from HDD to memory: %.2fs" % (end - start))

TRIGRAM_D = 7415

NEG = 50
BS = 512
Exemple #3
0
import tensorflow as tf
from tqdm import tqdm
from datautil import TrainingData

parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str, help="input model file", default=None)
parser.add_argument('--dir', type=str, help="input model file", default=None)
args = parser.parse_args()
if args.model is None:
    raise ValueError
model_path = args.model
path = args.dir
# load training data for now
start = time.time()
print 'Start to loading test data '
test_data = TrainingData()
test_data.load_data(path + '/query.test', path + '/docvec.test')


end = time.time()
print("Loading data from HDD to memory: %.2fs" % (end - start))

# ---------------------config--------------------
TRIGRAM_D = 8710
NEG = 50
BS = 512

L1_N = 256
L2_N = 128
batch_num = test_data.size()/BS
querynum = 10