def __init__(self): NN.__init__(self) self.d_loader = DataLoader() self.input_vector_size = self.d_loader.d_handler.get_vocab_size() # output vector size = 1 for scoring model self.output_vector_size = 1 self.train_dataset, self.train_labels, self.valid_dataset, \ self.valid_labels, self.test_dataset, self.test_labels = self.d_loader.get_ttv()
def __init__(self, qsize="1k"): super(FPNN, self).__init__() self.input_vector_size = NNConfig.max_doc_size # output vector size = 1 for scoring model self.output_vector_size = 1 self.d_loader = DataLoader(pretrained=True, qsize=qsize) self.train_dataset, self.train_labels, self.valid_dataset, \ self.valid_labels, self.test_dataset, self.test_labels = self.d_loader.get_ttv() self.embedding = None self.embedding_init = None self.embedding_placeholder = None self.pretrained = True
def __init__(self, embedding, pretrained, qsize="1k"): super(FENN, self).__init__() self.embedded_train_expanded = None self.embedded_valid_expanded = None self.embedded_test_expanded = None self.mode = None self.cache = None self.pretrained = pretrained self.d_loader = DataLoader(embedding=embedding, pretrained=pretrained, qsize=qsize) self.input_vector_size = NNConfig.max_doc_size # output vector size = 1 for scoring model self.output_vector_size = 10 if self.ordinal else 1 self.train_dataset, self.train_labels, self.train_queries, self.valid_dataset, \ self.valid_labels, self.valid_queries, \ self.test_dataset, self.test_labels, self.test_queries = self.d_loader.get_ttv2() if self.cache \ else self.d_loader.get_ttv()
class FNN(NN): def __init__(self): NN.__init__(self) self.d_loader = DataLoader() self.input_vector_size = self.d_loader.d_handler.get_vocab_size() # output vector size = 1 for scoring model self.output_vector_size = 1 self.train_dataset, self.train_labels, self.valid_dataset, \ self.valid_labels, self.test_dataset, self.test_labels = self.d_loader.get_ttv() def simple_NN(self, mode="/cpu:0"): logger.info("creating the computational graph...") graph = tf.Graph() with graph.as_default(): with tf.device(mode): # Input data tf_train_dataset = tf.placeholder( tf.float32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_train_labels = tf.placeholder( tf.float32, shape=(NNConfig.batch_size, self.output_vector_size)) # Do not load data to constant! # tf_valid_dataset = tf.constant(self.valid_dataset) # tf_test_dataset = tf.constant(self.test_dataset) # create a placeholder tf_valid_dataset_init = tf.placeholder( tf.float32, shape=self.valid_dataset.shape) tf_valid_dataset = tf.Variable(tf_valid_dataset_init) tf_test_dataset_init = tf.placeholder( tf.float32, shape=self.test_dataset.shape) tf_test_dataset = tf.Variable(tf_test_dataset_init) if self.cfg['NNConfig']['regularization']: beta_regu = tf.placeholder(tf.float32) if self.cfg['NNConfig']['learning_rate_decay']: global_step = tf.Variable(0) # Variables. def init_weights(shape): # return tf.Variable(tf.random_normal(shape, stddev=0.01)) return tf.Variable(tf.truncated_normal(shape)) def init_biases(shape): return tf.Variable(tf.zeros(shape)) w_h = init_weights( [self.input_vector_size, NNConfig.num_hidden_nodes]) b_h = init_biases([NNConfig.num_hidden_nodes]) w_o = init_weights( [NNConfig.num_hidden_nodes, self.output_vector_size]) b_o = init_biases([self.output_vector_size]) # Training computation def model(dataset, w_h, b_h, w_o, b_o, train): if NNConfig.dropout and train: drop_i = tf.nn.dropout( dataset, NNConfig.dropout_keep_prob_input) h_lay_train = tf.nn.relu(tf.matmul(drop_i, w_h) + b_h) drop_h = tf.nn.dropout( h_lay_train, NNConfig.dropout_keep_prob_hidden) return tf.matmul(drop_h, w_o) + b_o else: h_lay_train = tf.nn.relu( tf.matmul(dataset, w_h) + b_h) # or tf.nn.sigmoid return tf.matmul(h_lay_train, w_o) + b_o logits = model(tf_train_dataset, w_h, b_h, w_o, b_o, True) loss = tf.reduce_sum(tf.pow(logits - tf_train_labels, 2)) / \ (2 * tf.cast(tf.shape(tf_train_labels)[0], tf.float32)) # loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) if NNConfig.regularization: loss += beta_regu * (tf.nn.l2_loss(w_h) + tf.nn.l2_loss(w_o)) if NNConfig.learning_rate_decay: learning_rate = tf.train.exponential_decay( NNConfig.learning_rate, global_step, NNConfig.decay_steps, NNConfig.decay_rate, staircase=True) optimizer = tf.train.GradientDescentOptimizer( learning_rate).minimize(loss, global_step=global_step) else: optimizer = tf.train.GradientDescentOptimizer( NNConfig.learning_rate).minimize(loss) # linear activation train_prediction = logits valid_prediction = model(tf_valid_dataset, w_h, b_h, w_o, b_o, False) test_prediction = model(tf_test_dataset, w_h, b_h, w_o, b_o, False) ''' run accuracy scope ''' with tf.name_scope('accuracy'): pre = tf.placeholder("float", shape=[None, self.output_vector_size]) lbl = tf.placeholder("float", shape=[None, self.output_vector_size]) # compute the mean of all predictions accuracy = tf.reduce_sum(tf.pow(pre - lbl, 2)) / ( 2 * tf.cast(tf.shape(lbl)[0], tf.float32)) logger.info('running the session...') with tf.Session( graph=graph, config=tf.ConfigProto(log_device_placement=True)) as session: session.run(tf.global_variables_initializer(), feed_dict={ tf_valid_dataset_init: self.valid_dataset, tf_test_dataset_init: self.test_dataset }) logger.info('Initialized') for step in range(NNConfig.num_steps): offset = (step * NNConfig.batch_size) % ( self.train_labels.shape[0] - NNConfig.batch_size) batch_data = self.train_dataset[offset:( offset + NNConfig.batch_size), :] batch_labels = self.train_labels[offset:(offset + NNConfig.batch_size)] batch_labels = batch_labels.reshape(len(batch_labels), 1) feed_dict = { tf_train_dataset: batch_data, tf_train_labels: batch_labels } if NNConfig.regularization: feed_dict[beta_regu] = NNConfig.beta_regu _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if step % NNConfig.summary_steps == 0: logger.info("Minibatch loss at step %d: %f" % (step, l)) logger.info("Minibatch accuracy: %.3f%%" % session.run(accuracy, feed_dict={ pre: predictions, lbl: batch_labels })) # self.print_words(predictions, batch_labels) logger.info('Validation accuracy: %.3f%%' % session.run(accuracy, feed_dict={ pre: valid_prediction.eval(), lbl: self.valid_labels })) logger.info('Test accuracy: %.3f%%' % session.run(accuracy, feed_dict={ pre: test_prediction.eval(), lbl: self.test_labels })) self.print_words(test_prediction.eval(), self.test_labels) def simple_nn_w_candidate_sampling(self): print("creating the computational graph...") graph = tf.Graph() with graph.as_default(): with tf.device("/gpu:0"): # Input data tf_train_dataset = tf.placeholder( tf.float32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_train_labels = tf.placeholder( tf.float32, shape=(NNConfig.batch_size, self.output_vector_size)) tf_valid_dataset = tf.constant(self.valid_dataset) tf_test_dataset = tf.constant(self.test_dataset) if NNConfig.regularization: beta_regu = tf.placeholder(tf.float32) if NNConfig.learning_rate_decay: global_step = tf.Variable(0) def init_biases(shape): return tf.Variable(tf.zeros(shape)) w_h = tf.Variable( tf.truncated_normal( [self.input_vector_size, NNConfig.num_hidden_nodes], stddev=1.0 / math.sqrt(NNConfig.num_hidden_nodes))) b_h = init_biases([NNConfig.num_hidden_nodes]) w_o = tf.Variable( tf.truncated_normal( [NNConfig.num_hidden_nodes, self.output_vector_size], stddev=1.0 / math.sqrt(self.output_vector_size))) b_o = init_biases([self.output_vector_size]) # Training computation def model(dataset, w_h, b_h, w_o, b_o, train): if NNConfig.dropout and train: drop_i = tf.nn.dropout( dataset, NNConfig.dropout_keep_prob_input) h_lay_train = tf.nn.relu(tf.matmul(drop_i, w_h) + b_h) drop_h = tf.nn.dropout( h_lay_train, NNConfig.dropout_keep_prob_hidden) return tf.matmul(drop_h, w_o) + b_o else: h_lay_train = tf.nn.relu( tf.matmul(dataset, w_h) + b_h) # or tf.nn.sigmoid return tf.matmul(h_lay_train, w_o) + b_o logits = model(tf_train_dataset, w_h, b_h, w_o, b_o, True) if NNConfig.candidate_sampling == 'nce_loss': # Noise-contrastive estimation: instances_loss = tf.nn.nce_loss(w_o, b_o, tf_train_dataset, tf_train_labels, NNConfig.num_sampled, self.output_vector_size, num_true=10) elif NNConfig.candidate_sampling == 'softmax_loss': instances_loss = tf.nn.sampled_softmax_loss( w_o, b_o, tf_train_dataset, tf_train_labels, NNConfig.num_sampled, self.output_vector_size, num_true=10) else: instances_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=tf_train_labels) print('no candidate sampling....') loss = tf.reduce_mean(instances_loss) if NNConfig.regularization: loss += beta_regu * (tf.nn.l2_loss(w_h) + tf.nn.l2_loss(w_o)) if NNConfig.learning_rate_decay: learning_rate = tf.train.exponential_decay( NNConfig.learning_rate, global_step, NNConfig.decay_steps, NNConfig.decay_rate, staircase=True) optimizer = tf.train.GradientDescentOptimizer( learning_rate).minimize(loss, global_step=global_step) else: optimizer = tf.train.GradientDescentOptimizer( NNConfig.learning_rate).minimize(loss) train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax( model(tf_valid_dataset, w_h, b_h, w_o, b_o, False)) test_prediction = tf.nn.softmax( model(tf_test_dataset, w_h, b_h, w_o, b_o, False)) with tf.name_scope('accuracy'): pre = tf.placeholder("float", shape=[None, self.output_vector_size]) lbl = tf.placeholder("float", shape=[None, self.output_vector_size]) accuracy = tf.reduce_mean( tf.cast( tf.nn.sigmoid_cross_entropy_with_logits( logits=pre, labels=lbl), "float")) logger.info('running the session...') with tf.Session( graph=graph, config=tf.ConfigProto(log_device_placement=True)) as session: tf.initialize_all_variables().run() logger.info('Initialized') for step in range(NNConfig.num_steps): offset = (step * NNConfig.batch_size) % ( self.train_labels.shape[0] - NNConfig.batch_size) batch_data = self.train_dataset[offset:( offset + NNConfig.batch_size), :] batch_labels = self.train_labels[offset:( offset + NNConfig.batch_size), :] feed_dict = { tf_train_dataset: batch_data, tf_train_labels: batch_labels } if NNConfig.regularization: feed_dict[beta_regu] = NNConfig.beta_regu _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if step % NNConfig.summary_steps == 0: logger.info("Minibatch loss at step %d: %f" % (step, l)) logger.info("Minibatch accuracy: %.3f%%" % session.run(accuracy, feed_dict={ pre: predictions, lbl: batch_labels })) # self.print_words(predictions, batch_labels) logger.info('Validation accuracy: %.3f%%' % session.run(accuracy, feed_dict={ pre: valid_prediction.eval(), lbl: self.valid_labels })) logger.info('Test accuracy: %.3f%%' % session.run(accuracy, feed_dict={ pre: test_prediction.eval(), lbl: self.test_labels }))
class FENN(NN): def __init__(self, embedding, pretrained, qsize="1k"): super(FENN, self).__init__() self.embedded_train_expanded = None self.embedded_valid_expanded = None self.embedded_test_expanded = None self.mode = None self.cache = None self.pretrained = pretrained self.d_loader = DataLoader(embedding=embedding, pretrained=pretrained, qsize=qsize) self.input_vector_size = NNConfig.max_doc_size # output vector size = 1 for scoring model self.output_vector_size = 10 if self.ordinal else 1 self.train_dataset, self.train_labels, self.train_queries, self.valid_dataset, \ self.valid_labels, self.valid_queries, \ self.test_dataset, self.test_labels, self.test_queries = self.d_loader.get_ttv2() if self.cache \ else self.d_loader.get_ttv() def simple_NN_prob(self): self.log.info("creating the computational graph...") graph = tf.Graph() with graph.as_default(): with tf.device(self.mode): # Input data tf_train_left = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_train_right = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_train_labels = tf.placeholder( tf.float32, shape=(NNConfig.batch_size, self.output_vector_size)) tf_train_queries = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_valid_left = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_valid_right = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_valid_queries = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_test_left = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size), name="test_left") tf_test_right = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size), name="test_right") tf_test_queries = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size), name="test_queries") if NNConfig.regularization: beta_regu = tf.placeholder(tf.float32) if NNConfig.learning_rate_decay: global_step = tf.Variable(0) # Variables. def init_weights(shape): return tf.Variable(tf.truncated_normal(shape)) def init_biases(shape): return tf.Variable(tf.zeros(shape)) w_o = init_weights( [NNConfig.num_hidden_nodes, self.output_vector_size]) b_o = init_biases([self.output_vector_size]) # Embedding layer with tf.device('/gpu:0'), tf.name_scope("embedding"): W = self.embedding_W() embedded_left = tf.nn.embedding_lookup(W, tf_train_left) self.embedded_train_left = tf.reduce_sum( embedded_left, [1]) embedded_right = tf.nn.embedding_lookup(W, tf_train_right) self.embedded_train_right = tf.reduce_sum( embedded_right, [1]) embedded_queries = tf.nn.embedding_lookup( W, tf_train_queries) self.embedded_train_queries = tf.reduce_sum( embedded_queries, [1]) embedded_valid_left = tf.nn.embedding_lookup( W, tf_valid_left) self.embedded_valid_left = tf.reduce_sum( embedded_valid_left, [1]) embedded_valid_right = tf.nn.embedding_lookup( W, tf_valid_right) self.embedded_valid_right = tf.reduce_sum( embedded_valid_right, [1]) embedded_valid_queries = tf.nn.embedding_lookup( W, tf_valid_queries) self.embedded_valid_queries = tf.reduce_sum( embedded_valid_queries, [1]) embedded_test = tf.nn.embedding_lookup(W, tf_test_left) self.embedded_test_left = tf.reduce_sum(embedded_test, [1]) embedded_test = tf.nn.embedding_lookup(W, tf_test_right) self.embedded_test_right = tf.reduce_sum( embedded_test, [1]) embedded_test_queries = tf.nn.embedding_lookup( W, tf_test_queries) self.embedded_test_queries = tf.reduce_sum( embedded_test_queries, [1]) # Training computation def model(data_left, data_right, queries, w_o, b_o, train_mode, name=None): dataset = tf.concat([queries, data_left, data_right], axis=1) w_hs = [] if NNConfig.dropout and train_mode: drop_h = dataset for i in range(0, NNConfig.num_hidden_layers): drop_i = tf.nn.dropout( drop_h, NNConfig.dropout_keep_prob_input) w_h = init_weights([ drop_h.shape.as_list()[1], NNConfig.num_hidden_nodes ]) b_h = init_biases([NNConfig.num_hidden_nodes]) h_lay_train = tf.nn.relu( tf.matmul(drop_i, w_h) + b_h) drop_h = tf.nn.dropout( h_lay_train, NNConfig.dropout_keep_prob_hidden) w_hs.append(w_h) return tf.add(tf.matmul(drop_h, w_o), b_o, name=name), w_hs else: h_lay_train = dataset for i in range(0, NNConfig.num_hidden_layers): w_h = init_weights([ h_lay_train.shape.as_list()[1], NNConfig.num_hidden_nodes ]) b_h = init_biases([NNConfig.num_hidden_nodes]) h_lay_train = tf.nn.relu( tf.matmul(h_lay_train, w_h) + b_h) # or tf.nn.sigmoid w_hs.append(w_h) return tf.add(tf.matmul(h_lay_train, w_o), b_o, name=name), w_hs logits, w_hs = model(self.embedded_train_left, self.embedded_train_right, self.embedded_train_queries, w_o, b_o, True) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=tf_train_labels)) self.log.info("embedded_train shape: {}".format( tf.shape(self.embedded_train_left))) if NNConfig.regularization: loss += beta_regu * ( sum(tf.nn.l2_loss(w_h) for w_h in w_hs) + tf.nn.l2_loss(w_o)) if NNConfig.learning_rate_decay: learning_rate = tf.train.exponential_decay( NNConfig.learning_rate, global_step, NNConfig.decay_steps, NNConfig.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate).minimize( loss, global_step=global_step) else: optimizer = tf.train.AdamOptimizer( NNConfig.learning_rate).minimize(loss) # score model: linear activation train_prediction = logits valid_prediction, wv = model(self.embedded_valid_left, self.embedded_valid_right, self.embedded_valid_queries, w_o, b_o, False) with tf.name_scope("test_prediction"): test_prediction, tv = model(self.embedded_test_left, self.embedded_test_right, self.embedded_test_queries, w_o, b_o, False, name="test_prediction") ''' run accuracy scope ''' with tf.name_scope('accuracy'): pre = tf.placeholder("float", shape=[None, self.output_vector_size]) lbl = tf.placeholder("float", shape=[None, self.output_vector_size]) # compute the mean of all predictions accuracy = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=pre, labels=lbl)) self.log.info('running the session...') self.train_pairwise(graph, self.train_dataset, self.train_labels, self.train_queries, self.valid_dataset, self.valid_labels, self.valid_queries, self.test_dataset, self.test_labels, self.test_queries, tf_train_left, tf_train_right, tf_train_labels, tf_train_queries, tf_valid_left, tf_valid_right, tf_valid_queries, tf_test_left, tf_test_right, tf_test_queries, train_prediction, valid_prediction, test_prediction, loss, optimizer, accuracy, pre, lbl, beta_regu, prob=True) def simple_NN_pairwise(self): self.log.info("creating the computational graph...") graph = tf.Graph() with graph.as_default(): with tf.device(self.mode): # Input data tf_train_left = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_train_right = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_train_labels = tf.placeholder( tf.float32, shape=(NNConfig.batch_size, self.output_vector_size)) tf_train_queries = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_valid_left = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_valid_right = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_valid_queries = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_test_left = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size), name="test_left") tf_test_right = tf.placeholder(tf.int32, shape=(NNConfig.batch_size, self.input_vector_size), name="test_right") tf_test_queries = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size), name="test_queries") if NNConfig.regularization: beta_regu = tf.placeholder(tf.float32) if NNConfig.learning_rate_decay: global_step = tf.Variable(0) # Variables. def init_weights(shape): # return tf.Variable(tf.random_normal(shape, stddev=0.01)) return tf.Variable(tf.truncated_normal(shape)) def init_biases(shape): return tf.Variable(tf.zeros(shape)) w_o = init_weights( [NNConfig.num_hidden_nodes, self.output_vector_size]) b_o = init_biases([self.output_vector_size]) # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): W = self.embedding_W() embedded_left = tf.nn.embedding_lookup(W, tf_train_left) self.embedded_train_left = tf.reduce_sum( embedded_left, [1]) embedded_right = tf.nn.embedding_lookup(W, tf_train_right) self.embedded_train_right = tf.reduce_sum( embedded_right, [1]) embedded_queries = tf.nn.embedding_lookup( W, tf_train_queries) self.embedded_train_queries = tf.reduce_sum( embedded_queries, [1]) embedded_valid_left = tf.nn.embedding_lookup( W, tf_valid_left) self.embedded_valid_left = tf.reduce_sum( embedded_valid_left, [1]) embedded_valid_right = tf.nn.embedding_lookup( W, tf_valid_right) self.embedded_valid_right = tf.reduce_sum( embedded_valid_right, [1]) embedded_valid_queries = tf.nn.embedding_lookup( W, tf_valid_queries) self.embedded_valid_queries = tf.reduce_sum( embedded_valid_queries, [1]) embedded_test = tf.nn.embedding_lookup(W, tf_test_left) self.embedded_test_left = tf.reduce_sum(embedded_test, [1]) embedded_test = tf.nn.embedding_lookup(W, tf_test_right) self.embedded_test_right = tf.reduce_sum( embedded_test, [1]) embedded_test_queries = tf.nn.embedding_lookup( W, tf_test_queries) self.embedded_test_queries = tf.reduce_sum( embedded_test_queries, [1]) # Training computation def model(dataset, queries, w_o, b_o, train_mode): dataset = tf.concat([queries, dataset], axis=1) w_hs = [] if NNConfig.dropout and train_mode: drop_h = dataset for i in range(0, NNConfig.num_hidden_layers): drop_i = tf.nn.dropout( drop_h, NNConfig.dropout_keep_prob_input) w_h = init_weights([ drop_h.shape.as_list()[1], NNConfig.num_hidden_nodes ]) b_h = init_biases([NNConfig.num_hidden_nodes]) h_lay_train = tf.nn.relu( tf.matmul(drop_i, w_h) + b_h) drop_h = tf.nn.dropout( h_lay_train, NNConfig.dropout_keep_prob_hidden) w_hs.append(w_h) return tf.matmul(drop_h, w_o) + b_o, w_hs else: h_lay_train = dataset for i in range(0, NNConfig.num_hidden_layers): w_h = init_weights([ h_lay_train.shape.as_list()[1], NNConfig.num_hidden_nodes ]) b_h = init_biases([NNConfig.num_hidden_nodes]) h_lay_train = tf.nn.relu( tf.matmul(h_lay_train, w_h) + b_h) # or tf.nn.sigmoid w_hs.append(w_h) return tf.matmul(h_lay_train, w_o) + b_o, w_hs def pairwise_model(data_left, data_right, queries, w_o, b_o, train_mode): logits_left, w_hs_left = model(data_left, queries, w_o, b_o, train_mode) logits_right, w_hs_right = model(data_right, queries, w_o, b_o, train_mode) logits = logits_left - logits_right return logits, w_hs_left + w_hs_right logits, w_hs = pairwise_model(self.embedded_train_left, self.embedded_train_right, self.embedded_train_queries, w_o, b_o, True) loss = tf.losses.hinge_loss(labels=tf_train_labels, logits=logits) self.log.info("embedded_train shape: {}".format( tf.shape(self.embedded_train_left))) if NNConfig.regularization: loss += beta_regu * ( sum(tf.nn.l2_loss(w_h) for w_h in w_hs) + tf.nn.l2_loss(w_o)) if NNConfig.learning_rate_decay: learning_rate = tf.train.exponential_decay( NNConfig.learning_rate, global_step, NNConfig.decay_steps, NNConfig.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate).minimize( loss, global_step=global_step) else: optimizer = tf.train.AdamOptimizer( NNConfig.learning_rate).minimize(loss) # score model: linear activation train_prediction = logits valid_prediction, w_v = pairwise_model( self.embedded_valid_left, self.embedded_valid_right, self.embedded_valid_queries, w_o, b_o, False) with tf.name_scope("test_prediction"): test_prediction, tv = model(self.embedded_test_left, self.embedded_test_right, self.embedded_test_queries, w_o, b_o, False) test_prediction = tf.identity(test_prediction, name="test_prediction") ''' run accuracy scope ''' with tf.name_scope('accuracy'): pre = tf.placeholder("float", shape=[None, self.output_vector_size]) lbl = tf.placeholder("float", shape=[None, self.output_vector_size]) # compute the mean of all predictions accuracy = tf.losses.hinge_loss(labels=lbl, logits=pre) self.log.info('running the session...') self.train_pairwise(graph, self.train_dataset, self.train_labels, self.valid_dataset, self.valid_labels, self.test_dataset, self.test_labels, tf_train_left, tf_train_right, tf_train_labels, tf_valid_left, tf_valid_right, tf_test_left, tf_test_right, train_prediction, valid_prediction, test_prediction, loss, optimizer, accuracy, pre, lbl, beta_regu) def simple_NN(self): self.log.info("creating the computational graph...") graph = tf.Graph() with graph.as_default(): with tf.device(self.mode): # Input data tf_train_dataset = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_train_labels = tf.placeholder( tf.float32, shape=(NNConfig.batch_size, self.output_vector_size)) tf_train_queries = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) # create a placeholder tf_valid_dataset = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_valid_queries = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_test_dataset = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size), name="test") tf_test_queries = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size), name="test_queries") if NNConfig.regularization: beta_regu = tf.placeholder(tf.float32) if NNConfig.learning_rate_decay: global_step = tf.Variable(0) # Variables. def init_weights(shape): # return tf.Variable(tf.random_normal(shape, stddev=0.01)) return tf.Variable(tf.truncated_normal(shape)) def init_biases(shape): return tf.Variable(tf.zeros(shape)) w_o = init_weights( [NNConfig.num_hidden_nodes, self.output_vector_size]) b_o = init_biases([self.output_vector_size]) ''' Embedding layer ''' with tf.device('/gpu:0'), tf.name_scope("embedding"): self.embedding_layer(tf_train_dataset, tf_train_queries, tf_valid_dataset, tf_valid_queries, tf_test_dataset, tf_test_queries) # Training computation def model(dataset, queries, w_o, b_o, train_mode): dataset = tf.concat([queries, dataset], axis=1) w_hs = [] if NNConfig.dropout and train_mode: drop_h = dataset for i in range(0, NNConfig.num_hidden_layers): drop_i = tf.nn.dropout( drop_h, NNConfig.dropout_keep_prob_input) w_h = init_weights([ drop_h.shape.as_list()[1], NNConfig.num_hidden_nodes ]) b_h = init_biases([NNConfig.num_hidden_nodes]) h_lay_train = tf.nn.relu( tf.matmul(drop_i, w_h) + b_h) drop_h = tf.nn.dropout( h_lay_train, NNConfig.dropout_keep_prob_hidden) w_hs.append(w_h) if self.ordinal: return tf.nn.sigmoid(tf.matmul(drop_h, w_o) + b_o), w_hs else: return tf.matmul(drop_h, w_o + b_o), w_hs else: h_lay_train = dataset for i in range(0, NNConfig.num_hidden_layers): w_h = init_weights([ h_lay_train.shape.as_list()[1], NNConfig.num_hidden_nodes ]) b_h = init_biases([NNConfig.num_hidden_nodes]) h_lay_train = tf.nn.relu( tf.matmul(h_lay_train, w_h) + b_h) # or tf.nn.sigmoid w_hs.append(w_h) if self.ordinal: return tf.nn.sigmoid( tf.matmul(h_lay_train, w_o) + b_o), w_hs else: return tf.matmul(h_lay_train, w_o) + b_o, w_hs self.log.info("embedded_train shape: {}".format( tf.shape(self.embedded_train_expanded))) logits, w_hs = model(self.embedded_train_expanded, self.embedded_train_queries, w_o, b_o, True) if self.ordinal: loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=tf_train_labels)) else: loss = tf.reduce_sum(tf.pow(logits - tf_train_labels, 2)) / (2 * NNConfig.batch_size) if NNConfig.regularization: loss += beta_regu * ( sum(tf.nn.l2_loss(w_h) for w_h in w_hs) + tf.nn.l2_loss(w_o)) if NNConfig.learning_rate_decay: learning_rate = tf.train.exponential_decay( NNConfig.learning_rate, global_step, NNConfig.decay_steps, NNConfig.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate).minimize( loss, global_step=global_step) else: optimizer = tf.train.AdamOptimizer( NNConfig.learning_rate).minimize(loss) # score model: linear activation train_prediction = logits valid_prediction, w_v = model(self.embedded_valid_expanded, self.embedded_valid_queries, w_o, b_o, False) with tf.name_scope("test_prediction"): test_prediction, w_t = model(self.embedded_test_expanded, self.embedded_test_queries, w_o, b_o, False) test_prediction = tf.identity(test_prediction, name="test_prediction") ''' run accuracy scope ''' with tf.name_scope('accuracy'): pre = tf.placeholder("float", shape=[None, self.output_vector_size]) lbl = tf.placeholder("float", shape=[None, self.output_vector_size]) # compute the mean of all predictions if self.ordinal: accuracy = tf.reduce_mean( tf.cast( tf.nn.sigmoid_cross_entropy_with_logits( logits=pre, labels=lbl), "float")) else: accuracy = tf.reduce_sum(tf.pow(pre - lbl, 2)) / ( 2 * tf.cast(tf.shape(lbl)[0], tf.float32)) self.log.info('running the session...') self.train(graph, tf_train_dataset, tf_train_labels, tf_train_queries, tf_valid_dataset, tf_valid_queries, tf_test_dataset, tf_test_queries, train_prediction, valid_prediction, test_prediction, loss, optimizer, accuracy, pre, lbl, beta_regu) def embedding_W(self): if not self.pretrained: # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): W = tf.Variable(tf.random_uniform([ self.d_loader.d_handler.get_vocab_size(), NNConfig.embedding_dim ], -1.0, 1.0), name="W") elif self.hybrid: only_in_train = self.d_loader.d_handler.get_vocab( ) - self.d_loader.pretrain_vocab train_embeddings = tf.get_variable( name="embs_only_in_train", shape=[len(only_in_train), NNConfig.embedding_dim], initializer=tf.random_uniform_initializer(-1.0, 1.0), trainable=True) embed_vocab_size = len(self.d_loader.pretrain_vocab) embedding_dim = len(self.d_loader.embd[0]) pretrained_embs = tf.Variable(tf.constant( 0.0, shape=[embed_vocab_size, embedding_dim]), trainable=False, name="pretrained") self.embedding_placeholder = tf.placeholder( tf.float32, [embed_vocab_size, NNConfig.embedding_dim]) self.embedding_init = pretrained_embs.assign( self.embedding_placeholder) W = tf.concat([pretrained_embs, train_embeddings], axis=0) else: embed_vocab_size = len(self.d_loader.pretrain_vocab) embedding_dim = len(self.d_loader.embd[0]) self.embedding = np.asarray(self.d_loader.embd) W = tf.Variable(tf.constant( 0.0, shape=[embed_vocab_size, embedding_dim]), trainable=False, name="W") self.embedding_placeholder = tf.placeholder( tf.float32, [embed_vocab_size, NNConfig.embedding_dim]) self.embedding_init = W.assign(self.embedding_placeholder) return W def embedding_layer(self, tf_train_dataset, tf_train_queries, tf_valid_dataset, tf_valid_queries, tf_test_dataset, tf_test_queries): if not self.pretrained: # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): self.W = tf.Variable(tf.random_uniform([ self.d_loader.d_handler.get_vocab_size(), NNConfig.embedding_dim ], -1.0, 1.0), name="W") embedded_train = tf.nn.embedding_lookup( self.W, tf_train_dataset) self.embedded_train_expanded = tf.reduce_sum( embedded_train, [1]) embedded_train_queries = tf.nn.embedding_lookup( self.W, tf_train_queries) self.embedded_train_queries = tf.reduce_sum( embedded_train_queries, [1]) embedded_valid = tf.nn.embedding_lookup( self.W, tf_valid_dataset) self.embedded_valid_expanded = tf.reduce_sum( embedded_valid, [1]) embedded_valid_queries = tf.nn.embedding_lookup( self.W, tf_valid_queries) self.embedded_valid_queries = tf.reduce_sum( embedded_valid_queries, [1]) embedded_test = tf.nn.embedding_lookup(self.W, tf_test_dataset) self.embedded_test_expanded = tf.reduce_sum(embedded_test, [1]) embedded_test_queries = tf.nn.embedding_lookup( self.W, tf_test_queries) self.embedded_test_queries = tf.reduce_sum( embedded_test_queries, [1]) elif self.hybrid: only_in_train = self.d_loader.d_handler.get_vocab( ) - self.d_loader.pretrain_vocab train_embeddings = tf.get_variable( name="embs_only_in_train", shape=[len(only_in_train), NNConfig.embedding_dim], initializer=tf.random_uniform_initializer(-1.0, 1.0), trainable=True) embed_vocab_size = len(self.d_loader.pretrain_vocab) embedding_dim = len(self.d_loader.embd[0]) pretrained_embs = tf.Variable(tf.constant( 0.0, shape=[embed_vocab_size, embedding_dim]), trainable=False, name="pretrained") self.embedding_placeholder = tf.placeholder( tf.float32, [embed_vocab_size, NNConfig.embedding_dim]) self.embedding_init = pretrained_embs.assign( self.embedding_placeholder) W = tf.concat([pretrained_embs, train_embeddings], axis=0) # Reduce along dimension 1 (`n_input`) to get a single vector (row) # per input example. It's fairly typical to do this for bag-of-words type problems. self.embedded_train_expanded = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_train_dataset), [1]) self.embedded_train_queries = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_train_queries), [1]) self.embedded_valid_expanded = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_valid_dataset), [1]) self.embedded_valid_queries = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_valid_queries), [1]) self.embedded_test_expanded = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_test_dataset), [1]) self.embedded_test_queries = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_test_queries), [1]) else: embed_vocab_size = len(self.d_loader.pretrain_vocab) embedding_dim = len(self.d_loader.embd[0]) self.embedding = np.asarray(self.d_loader.embd) W = tf.Variable(tf.constant( 0.0, shape=[embed_vocab_size, embedding_dim]), trainable=False, name="W") self.embedding_placeholder = tf.placeholder( tf.float32, [embed_vocab_size, NNConfig.embedding_dim]) self.embedding_init = W.assign(self.embedding_placeholder) # Reduce along dimension 1 (`n_input`) to get a single vector (row) # per input example. It's fairly typical to do this for bag-of-words type problems. self.embedded_train_expanded = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_train_dataset), [1]) self.embedded_train_queries = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_train_queries), [1]) self.embedded_valid_expanded = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_valid_dataset), [1]) self.embedded_valid_queries = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_valid_queries), [1]) self.embedded_test_expanded = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_test_dataset), [1]) self.embedded_test_queries = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_test_queries), [1])
class Evaluation: def __init__(self, embedding=False, pretrained=False): print("Eval init.") self.d_loader = DataLoader(embedding=embedding, pretrained=pretrained, qsize="15") self.test_dataset, self.test_labels, self.test_queries = self.d_loader.load_test_data() def get_lbl_dict(self): lbl_dict = {} for row in self.test_labels: # row[0] = q if row[0] in lbl_dict: # row[2] = url # row[1] = label lbl_dict[row[0]][row[2]] = row[1] else: url_labels = {} url_labels[row[2]] = row[1] lbl_dict[row[0]] = url_labels return lbl_dict def predict_pointwise(self, model_path): # Add ops to save and restore all the variables. saver = tf.train.Saver() with tf.Session() as session: # Restore variables from disk. saver.restore(session, model_path) print("Model restored.") graph = tf.get_default_graph() test_prediction = graph.get_tensor_by_name('test_prediction:0') tf_test_dataset = graph.get_tensor_by_name('test:0') tf_test_queries = graph.get_tensor_by_name('test_queries:0') vsteps, test_data_batches, test_label_batches, test_query_batches = NN.batch_data(data=self.test_dataset, labels=self.test_labels, queries=self.test_queries, batch_size=NNConfig.batch_size) predictions = [] for step in range(0, vsteps): batch_predictions = session.run(test_prediction, feed_dict={tf_test_dataset: test_data_batches[step], tf_test_queries: test_query_batches[step]}) predictions.append(batch_predictions) assert len(predictions) == len(self.test_labels) return predictions def mle(self, pmat, max_iter=100): n = pmat.shape[0] # number of items wins = np.sum(pmat, axis=0) params = np.ones(n, dtype=float) for _ in range(max_iter): tiled = np.tile(params, (n, 1)) combined = 1.0 / (tiled + tiled.T) np.fill_diagonal(combined, 0) nxt = wins / np.sum(combined, axis=0) nxt = nxt / np.mean(nxt) if np.linalg.norm(nxt - params, ord=np.inf) < 1e-6: return nxt params = nxt raise RuntimeError('did not converge') def predict_prob(self, model_path, model_meta): # Add ops to save and restore all the variables. saver = tf.train.import_meta_graph(model_meta) with tf.Session() as session: # Restore variables from disk. saver.restore(session, model_path) print("Model restored.") graph = tf.get_default_graph() test_prediction = graph.get_tensor_by_name('test_prediction/test_prediction:0') tf_test_left = graph.get_tensor_by_name('test_left:0') tf_test_right = graph.get_tensor_by_name('test_right:0') tf_test_queries = graph.get_tensor_by_name('test_queries:0') test_data_left, test_data_right, test_labels_new, test_data_queries, \ q_pmat_dict = Utilities.transform_pairwise(self.test_dataset, self.test_labels, self.test_queries, prob=True, eval=True) vsteps, test_left_batches, test_right_batches, test_label_batches, test_query_batches = NN.batch_data_pairwise( data_left=test_data_left, data_right=test_data_right, labels=test_labels_new, queries=test_data_queries, batch_size=NNConfig.batch_size) predictions = np.zeros(len(test_labels_new)) idx = 0 for step in range(0, vsteps): batch_predictions = session.run(test_prediction, feed_dict={tf_test_left: test_left_batches[step], tf_test_right: test_right_batches[step], tf_test_queries: test_query_batches[step]}) for i in range(0, len(batch_predictions)): predictions[idx] = batch_predictions[i][0] idx += 1 lbl_dict = self.get_lbl_dict() for q, v in q_pmat_dict.items(): # q query # v pairwise comparision of docs in q # v[1]: docs in q # v[0]: pairwise dict print("len v: {}".format(len(v[1]))) print(len(predictions)) _pmat = np.zeros((len(v[1]), len(v[1]))) for i, j in v[0].items(): _pmat[j[0]][j[1]] = predictions[i] # Estimating Bradley-Terry model parameters. params = self.mle(_pmat) # Ranking (best to worst). ranking = np.argsort(params)[::-1] r = [] url_labels = lbl_dict[q] for urlr in ranking: url = v[1][urlr] if url not in url_labels: print(url) pass else: r.append(url_labels[url]) print('NDCG@{}: {}'.format(10, ndcg_at_k(r, 10))) print('NDCG@{}: {}'.format(20, ndcg_at_k(r, 20))) print('NDCG@{}: {}'.format(30, ndcg_at_k(r, 30)))
def __init__(self, embedding=False, pretrained=False): print("Eval init.") self.d_loader = DataLoader(embedding=embedding, pretrained=pretrained, qsize="15") self.test_dataset, self.test_labels, self.test_queries = self.d_loader.load_test_data()
class FPNN(NN): def __init__(self, qsize="1k"): super(FPNN, self).__init__() self.input_vector_size = NNConfig.max_doc_size # output vector size = 1 for scoring model self.output_vector_size = 1 self.d_loader = DataLoader(pretrained=True, qsize=qsize) self.train_dataset, self.train_labels, self.valid_dataset, \ self.valid_labels, self.test_dataset, self.test_labels = self.d_loader.get_ttv() self.embedding = None self.embedding_init = None self.embedding_placeholder = None self.pretrained = True def simple_NN(self, mode="/cpu:0"): self.log.info("creating the computational graph...") graph = tf.Graph() # sess = tf.Session(graph=graph) with graph.as_default(): with tf.device("/cpu:0"): tf_train_dataset = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_train_labels = tf.placeholder( tf.float32, shape=(NNConfig.batch_size, self.output_vector_size)) # create a placeholder tf_valid_dataset = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) tf_test_dataset = tf.placeholder( tf.int32, shape=(NNConfig.batch_size, self.input_vector_size)) if NNConfig.regularization: beta_regu = tf.placeholder(tf.float32) if NNConfig.learning_rate_decay: global_step = tf.Variable(0) # Variables. def init_weights(shape): return tf.Variable(tf.truncated_normal(shape)) def init_biases(shape): return tf.Variable(tf.zeros(shape)) w_o = init_weights( [NNConfig.num_hidden_nodes, self.output_vector_size]) b_o = init_biases([self.output_vector_size]) embed_vocab_size = len(self.d_loader.pretrain_vocab) embedding_dim = len(self.d_loader.embd[0]) self.embedding = np.asarray(self.d_loader.embd) W = tf.Variable(tf.constant( 0.0, shape=[embed_vocab_size, embedding_dim]), trainable=False, name="W") self.embedding_placeholder = tf.placeholder( tf.float32, [embed_vocab_size, NNConfig.embedding_dim]) self.embedding_init = W.assign(self.embedding_placeholder) # Reduce along dimension 1 (`n_input`) to get a single vector (row) # per input example. It's fairly typical to do this for bag-of-words type problems. self.embedded_train_expanded = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_train_dataset), [1]) self.embedded_valid_expanded = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_valid_dataset), [1]) self.embedded_test_expanded = tf.reduce_sum( tf.nn.embedding_lookup(W, tf_test_dataset), [1]) def model(dataset, w_o, b_o, train_mode): w_hs = [] if NNConfig.dropout and train_mode: drop_h = dataset for i in range(0, NNConfig.num_hidden_layers): drop_i = tf.nn.dropout( drop_h, NNConfig.dropout_keep_prob_input) w_h = init_weights([ drop_h.shape.as_list()[1], NNConfig.num_hidden_nodes ]) b_h = init_biases([NNConfig.num_hidden_nodes]) h_lay_train = tf.nn.relu(tf.matmul(drop_i, w_h) + b_h) drop_h = tf.nn.dropout( h_lay_train, NNConfig.dropout_keep_prob_hidden) w_hs.append(w_h) return tf.matmul(drop_h, w_o) + b_o, w_hs else: h_lay_train = dataset for i in range(0, NNConfig.num_hidden_layers): w_h = init_weights([ h_lay_train.shape.as_list()[1], NNConfig.num_hidden_nodes ]) b_h = init_biases([NNConfig.num_hidden_nodes]) h_lay_train = tf.nn.relu( tf.matmul(h_lay_train, w_h) + b_h) # or tf.nn.sigmoid w_hs.append(w_h) return tf.matmul(h_lay_train, w_o) + b_o, w_hs self.log.info("embedded_train shape: {}".format( tf.shape(self.embedded_train_expanded))) logits, w_hs = model(self.embedded_train_expanded, w_o, b_o, True) loss = tf.reduce_sum(tf.pow(logits - tf_train_labels, 2)) / (2 * NNConfig.batch_size) if NNConfig.regularization: loss += beta_regu * (sum(tf.nn.l2_loss(w_h) for w_h in w_hs) + tf.nn.l2_loss(w_o)) if NNConfig.learning_rate_decay: learning_rate = tf.train.exponential_decay( NNConfig.learning_rate, global_step, NNConfig.decay_steps, NNConfig.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate).minimize( loss, global_step=global_step) else: optimizer = tf.train.AdamOptimizer( NNConfig.learning_rate).minimize(loss) # score model: linear activation train_prediction = logits valid_prediction, w_v = model(self.embedded_valid_expanded, w_o, b_o, False) test_prediction, w_t = model(self.embedded_test_expanded, w_o, b_o, False) ''' run accuracy scope ''' with tf.name_scope('accuracy'): pre = tf.placeholder("float", shape=[None, self.output_vector_size]) lbl = tf.placeholder("float", shape=[None, self.output_vector_size]) # compute the mean of all predictions accuracy = tf.reduce_sum(tf.pow(pre - lbl, 2)) / ( 2 * tf.cast(tf.shape(lbl)[0], tf.float32)) self.log.info('running the session...') self.train(graph, tf_train_dataset, tf_train_labels, tf_valid_dataset, tf_test_dataset, train_prediction, valid_prediction, test_prediction, loss, optimizer, accuracy, pre, lbl, beta_regu)