def parse(serialized): """Parse a serialized string into tensors. Arguments: example: a serialized `tf.train.SequenceExample` (like the one returned from the `encode()` method). Returns: a tuple of 4 tensors: `words`: 1D tensor of shape [sentence_length]. `sentence_length`: 0D tesnor (i.e. scalar) representing the sentence length. `formula`: 1D tensor of shape [formula_length]. `formula_length`: a 0D tensor (i.e. scalar) representing the formula length """ features = { SENTENCE_LENGTH_KEY: tf.FixedLenFeature([], tf.int64), FORMULA_LENGTH_KEY: tf.FixedLenFeature([], tf.int64), WORDS_KEY: tf.VarLenFeature(tf.int64), FORMULA_KEY: tf.VarLenFeature(tf.int64), } parsed = tf.parse_single_example( serialized=serialized, features=features) sentence_length = parsed[SENTENCE_LENGTH_KEY] formula_length = parsed[FORMULA_LENGTH_KEY] words = tf.sparse_tensor_to_dense(parsed[WORDS_KEY]) formula = tf.sparse_tensor_to_dense(parsed[FORMULA_KEY]) return words, sentence_length, formula, formula_length
def build_model(self): dense_masker = tf.sparse_tensor_to_dense(self.mask) with tf.name_scope('encoding'): encoding = tf.add(tf.sparse_tensor_dense_matmul(self.X, self.W) , self.b, name= 'raw_values') encoded_values = self.enc_func(encoding, name = 'encoded_values') with tf.name_scope('decoding'): decoding = tf.nn.xw_plus_b(encoded_values, self.W_prime, self.b_prime) decoded_values = self.dec_func(decoding, name = 'decoded_values') masked_decoded_values = tf.multiply(dense_masker, decoded_values) with tf.name_scope('training_process'): diff = tf.squared_difference(tf.sparse_tensor_to_dense(self.Y, default_value = 0) , decoded_values) error = tf.reduce_sum( tf.multiply(dense_masker, diff) ) reg = 0 for param in self.params.items(): reg += tf.nn.l2_loss(param[1])* self.lambda_w loss = error + reg model_params = [p for p in self.params.values()] train_step = self._optimize(loss, model_params) tf.summary.scalar('error', error) tf.summary.scalar('loss', loss) for param in self.params.items(): tf.summary.histogram(param[0], param[1]) #tf.summary.histogram('predictions', decoded_values) merged_summary = tf.summary.merge_all() return encoded_values, decoded_values, masked_decoded_values, error, loss, train_step, merged_summary
def accuracy_instance(predictions, targets, n=[1, 2, 3, 4, 5, 10], nb_classes=5, nb_samples_per_class=10, batch_size=1): targets = tf.cast(targets, predictions.dtype) accuracy = tf.constant(value=0, shape=(batch_size, nb_samples_per_class), dtype=tf.float32) indices = tf.constant(value=0, shape=(batch_size, nb_classes+1), dtype=tf.float32) def step_((accuracy, indices), (p, t)): """with tf.variable_scope("Metric_step_var", reuse=True): accuracy = tf.get_variable(name="accuracy", shape=(batch_size, nb_samples_per_class), initializer=tf.constant_initializer(0), dtype=tf.float32) indices = tf.get_variable(name="indices", shape=(batch_size, nb_classes + 1), initializer=tf.constant_initializer(0), dtype=tf.float32)""" p = tf.cast(p, tf.int32) t = tf.cast(t, tf.int32) ##Accuracy Update batch_range = tf.cast(tf.range(0, batch_size), dtype=tf.int32) gather = tf.cast(tf.gather_nd(indices,tf.stack([tf.range(0,p.get_shape().as_list()[0]), t], axis=1)), tf.int32) index = tf.cast(tf.stack([batch_range, gather], axis=1), dtype=tf.int64) val = tf.cast(tf.equal(p, t), tf.float32) delta = tf.SparseTensor(indices=index, values=val, dense_shape=tf.cast(accuracy.get_shape().as_list(), tf.int64)) accuracy = accuracy + tf.sparse_tensor_to_dense(delta) ##Index Update index = tf.cast(tf.stack([batch_range, t], axis=1), dtype=tf.int64) val = tf.constant(1.0, shape=[batch_size]) delta = tf.SparseTensor(indices=index, values=val, dense_shape=tf.cast(indices.get_shape().as_list(), dtype=tf.int64)) indices = indices + tf.sparse_tensor_to_dense(delta) return [accuracy, indices]
def testPrintSparseTensorPassthrough(self): a = tf.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], shape=[3, 4]) b = tf.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], shape=[3, 4]) a = tf.contrib.framework.print_op(a) with self.test_session(): self.assertAllEqual(tf.sparse_tensor_to_dense(a).eval(), tf.sparse_tensor_to_dense(b).eval())
def _parse_example(serialized_example): """Return inputs and targets Tensors from a serialized tf.Example.""" data_fields = { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) } parsed = tf.parse_single_example(serialized_example, data_fields) inputs = tf.sparse_tensor_to_dense(parsed["inputs"]) targets = tf.sparse_tensor_to_dense(parsed["targets"]) return inputs, targets
def unpool_layer2x2(self, x, raveled_argmax, out_shape): argmax = self.unravel_argmax(raveled_argmax, tf.to_int64(out_shape)) output = tf.zeros([out_shape[1], out_shape[2], out_shape[3]]) height = tf.shape(output)[0] width = tf.shape(output)[1] channels = tf.shape(output)[2] t1 = tf.to_int64(tf.range(channels)) t1 = tf.tile(t1, [((width + 1) // 2) * ((height + 1) // 2)]) t1 = tf.reshape(t1, [-1, channels]) t1 = tf.transpose(t1, perm=[1, 0]) t1 = tf.reshape(t1, [channels, (height + 1) // 2, (width + 1) // 2, 1]) t2 = tf.squeeze(argmax) t2 = tf.pack((t2[0], t2[1]), axis=0) t2 = tf.transpose(t2, perm=[3, 1, 2, 0]) t = tf.concat(3, [t2, t1]) indices = tf.reshape(t, [((height + 1) // 2) * ((width + 1) // 2) * channels, 3]) x1 = tf.squeeze(x) x1 = tf.reshape(x1, [-1, channels]) x1 = tf.transpose(x1, perm=[1, 0]) values = tf.reshape(x1, [-1]) delta = tf.SparseTensor(indices, values, tf.to_int64(tf.shape(output))) return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_reorder(delta)), 0)
def input_fn(): features = learn.read_batch_features( filename, BATCH_SIZE, feature_info, reader=tf.TFRecordReader) target = features.pop('answer_ids') target = utils.resize_axis(tf.sparse_tensor_to_dense(target), 1, 1) return features, target
def to_matrix(sparse_indices, values, dense_shape): sparse_tensor = tf.sparse_reorder(tf.SparseTensor( indices=sparse_indices, values=tf.ones(sparse_indices.get_shape().as_list()[0]), #values=tf.reshape(values, [-1]), dense_shape=dense_shape)) return tf.sparse_tensor_to_dense(sparse_tensor)
def _slice_with_actions(embeddings, actions): """Slice a Tensor. Take embeddings of the form [batch_size, num_actions, embed_dim] and actions of the form [batch_size, 1], and return the sliced embeddings like embeddings[:, actions, :]. Args: embeddings: Tensor of embeddings to index. actions: int Tensor to use as index into embeddings Returns: Tensor of embeddings indexed by actions """ shape = tuple(t.value for t in embeddings.get_shape()) batch_size, num_actions = shape[0], shape[1] # Values are the 'values' in a sparse tensor we will be setting act_indx = tf.cast(actions, tf.int64)[:, None] values = tf.reshape(tf.cast(tf.ones(tf.shape(actions)), tf.bool), [-1]) # Create a range for each index into the batch act_range = tf.range(0, batch_size, dtype=tf.int64)[:, None] # Combine this into coordinates with the action indices indices = tf.concat([act_range, act_indx], 1) actions_mask = tf.SparseTensor(indices, values, [batch_size, num_actions]) actions_mask = tf.stop_gradient( tf.sparse_tensor_to_dense(actions_mask, default_value=False)) sliced_emb = tf.boolean_mask(embeddings, actions_mask) return sliced_emb
def test(self): index = 0 next_idx = 20 for index in range(10): next_idx, self.audio_features, self.audio_features_len, self.sparse_labels, wav_files = utils.next_batch( next_idx, 1, n_input, n_context, self.text_labels, self.wav_files, self.word_num_map) print('读入语音文件: ', wav_files[0]) print('开始识别语音数据......') d, train_ler = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0)) dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess) dense_labels = utils.trans_tuple_to_texts_ch(self.sparse_labels, self.words) for orig, decoded_array in zip(dense_labels, dense_decoded): # 转成string decoded_str = utils.trans_array_to_text_ch(decoded_array, self.words) print('语音原始文本: {}'.format(orig)) print('识别出来的文本: {}'.format(decoded_str)) break self.sess.close()
def __init__(self, config): paths, meta = Input._collect(config.path) self.dimension_count = meta['dimension_count'] self.sample_count = meta['sample_count'] self.batch_size = config.get('batch_size', 1) if self.sample_count % self.batch_size > 0: raise Exception( ('expected the number of samples ({}) to be ' + 'divisible by the batch size ({})').format(self.sample_count, self.batch_size)) with tf.variable_scope('state'): self.state = State() with tf.variable_scope('source'): paths = tf.Variable(paths, name='paths', dtype=tf.string, trainable=False) queue = tf.FIFOQueue(meta['path_count'], [tf.string]) enqueue = queue.enqueue_many([tf.random_shuffle(paths)]) tf.train.add_queue_runner(tf.train.QueueRunner(queue, [enqueue])) _, record = tf.TFRecordReader().read(queue) with tf.variable_scope('x'): features = tf.parse_single_example(record, { 'data': tf.VarLenFeature(tf.float32), }) data = tf.sparse_tensor_to_dense(features['data']) if self.batch_size == 1: self.x = tf.reshape(data, [1, -1, self.dimension_count]) else: x = tf.reshape(data, [-1, self.dimension_count]) _, outputs = tf.contrib.training.bucket_by_sequence_length( tf.shape(x)[0], [x], self.batch_size, config.buckets, dynamic_pad=True) self.x = outputs[0] with tf.variable_scope('y'): self.y = tf.pad(self.x[:, 1:, :], [[0, 0], [0, 1], [0, 0]])
def __init__(self,args): super(seqMLP, self).__init__() self.args = args self.batch_size=args.batch_size self.input_data = tf.placeholder(tf.float32,[self.args.batch_size,self.args.sentence_length,self.args.word_dim],name='inputdata') self.output_data = tf.sparse_placeholder(tf.float32, name='outputdata') #[None, 114] self.dense_outputdata= tf.sparse_tensor_to_dense(self.output_data) self.keep_prob = tf.placeholder(tf.float32,name='keep_prob_NER') self.entMentIndex = tf.placeholder(tf.int32,[None,5],name='ent_mention_index') self.entCtxLeftIndex = tf.placeholder(tf.int32,[None,10],name='ent_ctxleft_index') self.entCtxRightIndex = tf.placeholder(tf.int32,[None,10],name='ent_ctxright_index') self.pos_f1 = tf.placeholder(tf.float32,[None,5,1]) self.pos_f2 = tf.placeholder(tf.float32,[None,10,1]) self.pos_f3 = tf.placeholder(tf.float32,[None,10,1]) self.figerHier = np.asarray(cPickle.load(open('data/figer/figerhierarchical.p','rb')),np.float32) #add the hierarchy features self.layers={} self.layers['fullyConnect'] = layers_lib.FullyConnection(self.args.class_size) used = tf.sign(tf.reduce_max(tf.abs(self.input_data),reduction_indices=2)) self.length = tf.cast(tf.reduce_sum(used,reduction_indices=1),tf.int32) with tf.device('/gpu:0'): self.prediction,self.loss_lm = self.cl_loss_from_embedding(self.input_data) print 'self.loss_lm:',self.loss_lm _,self.adv_loss = self.adversarial_loss() print 'self.adv_loss:',self.adv_loss self.loss = tf.add(self.loss_lm,self.adv_loss)
def _decode_png_instance_masks(self, keys_to_tensors): """Decode PNG instance segmentation masks and stack into dense tensor. The instance segmentation masks are reshaped to [num_instances, height, width]. Args: keys_to_tensors: a dictionary from keys to tensors. Returns: A 3-D float tensor of shape [num_instances, height, width] with values in {0, 1}. """ def decode_png_mask(image_buffer): image = tf.squeeze( tf.image.decode_image(image_buffer, channels=1), axis=2) image.set_shape([None, None]) image = tf.to_float(tf.greater(image, 0)) return image png_masks = keys_to_tensors['image/object/mask'] height = keys_to_tensors['image/height'] width = keys_to_tensors['image/width'] if isinstance(png_masks, tf.SparseTensor): png_masks = tf.sparse_tensor_to_dense(png_masks, default_value='') return tf.cond( tf.greater(tf.size(png_masks), 0), lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32), lambda: tf.zeros(tf.to_int32(tf.stack([0, height, width]))))
def tensors_to_item(self, keys_to_tensors): """Maps the given dictionary of tensors to a concatenated list of bboxes. Args: keys_to_tensors: a mapping of TF-Example keys to parsed tensors. Returns: [time, num_boxes, 4] tensor of bounding box coordinates, in order [y_min, x_min, y_max, x_max]. Whether the tensor is a SparseTensor or a dense Tensor is determined by the return_dense parameter. Empty positions in the sparse tensor are filled with -1.0 values. """ sides = [] for key in self._full_keys: value = keys_to_tensors[key] expanded_dims = tf.concat( [tf.to_int64(tf.shape(value)), tf.constant([1], dtype=tf.int64)], 0) side = tf.sparse_reshape(value, expanded_dims) sides.append(side) bounding_boxes = tf.sparse_concat(2, sides) if self._return_dense: bounding_boxes = tf.sparse_tensor_to_dense( bounding_boxes, default_value=self._default_value) return bounding_boxes
def read_data_int64(input_fname): import pdb with tictoc(): input_fname_queue = tf.train.string_input_producer([input_fname], num_epochs=1) reader = tf.TFRecordReader() _, serialized_example = reader.read(input_fname_queue) features = {'bit_features' : tf.VarLenFeature(tf.int64)} parsed_example = tf.parse_single_example(serialized_example, features) bit_features = parsed_example['bit_features'] bit_features = tf.sparse_tensor_to_dense(bit_features) bit_features = tf.reshape(bit_features, [-1, 62]) with tf.Session() as sess: tf.initialize_all_variables().run() tf.initialize_local_variables().run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: i = 0 while not coord.should_stop(): x = bit_features.eval() if i % 10000 == 0: print("substance {}".format(i)) i += 1 except tf.errors.OutOfRangeError: pass finally: coord.request_stop() coord.join(threads)
def custom_fast_text(features, labels, mode, params): vocab_table = lookup.index_table_from_file(vocabulary_file='data/vocab.csv', num_oov_buckets=1, default_value=-1) text = features[commons.FEATURE_COL] words = tf.string_split(text) dense_words = tf.sparse_tensor_to_dense(words, default_value=commons.PAD_WORD) word_ids = vocab_table.lookup(dense_words) padding = tf.constant([[0, 0], [0, commons.CNN_MAX_DOCUMENT_LENGTH]]) # Pad all the word_ids entries to the maximum document length word_ids_padded = tf.pad(word_ids, padding) word_id_vector = tf.slice(word_ids_padded, [0, 0], [-1, commons.CNN_MAX_DOCUMENT_LENGTH]) if mode == tf.estimator.ModeKeys.TRAIN: tf.keras.backend.set_learning_phase(True) else: tf.keras.backend.set_learning_phase(False) embedded_sequences = tf.keras.layers.Embedding(params.N_WORDS, 20, input_length=commons.CNN_MAX_DOCUMENT_LENGTH)( word_id_vector) f1 = tf.keras.layers.GlobalMaxPooling1D()(embedded_sequences) logits = tf.keras.layers.Dense(commons.TARGET_SIZE, activation=None)(f1) predictions = tf.nn.sigmoid(logits) if mode == tf.estimator.ModeKeys.PREDICT: prediction_dict = { 'class': tf.cast(tf.map_fn(lambda x: tf.cond(x > 0.30, lambda: 1.0, lambda: 0.0), tf.squeeze(predictions)), dtype=tf.int32), } export_outputs = { 'predictions': tf.estimator.export.PredictOutput(prediction_dict) } return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=labels, logits=logits) tf.summary.scalar('loss', loss) acc = tf.equal(tf.cast(predictions, dtype=tf.int32), labels) acc = tf.reduce_mean(tf.cast(acc, tf.float32)) tf.summary.scalar('acc', acc) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, train_op=train_op, loss=loss) if mode == tf.estimator.ModeKeys.EVAL: eval_metrics_ops = { 'accuracy': tf.metrics.accuracy(labels=labels, predictions=predictions) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics_ops)
def cdk(self, visibles, k, learning_rate=0.001): h_start = self.propup(visibles) h_t = h_start for t in range(k): v_t = self.propdown(h_t,visibles) h_t = self.propup(v_t) # adapt to 3D tensor here w_positive_grad = matmul_to3D(h_start, tf.sparse_tensor_to_dense(visibles), self.m, self.F, self.K) # formula (5) of the paper w_negative_grad = matmul_to3D(h_t, v_t, self.m, self.F, self.K) # formula (5) of the paper update_w = self.delta_w.assign_add(learning_rate * (w_positive_grad - w_negative_grad)) update_vb = self.delta_vb.assign_add(learning_rate * (tf.sparse_tensor_to_dense(visibles) - v_t)) update_hb = self.delta_hb.assign_add(learning_rate * (h_start - h_t)) return [update_w, update_vb, update_hb]
def _call(self, inputs): x = tf.cast(inputs, self.dtype) if type(x) == tf.SparseTensor: # convert to dense if necessary x = tf.sparse_tensor_to_dense(x, validate_indices=False) x = tf.nn.dropout(x, tf.cast(1-self.dropout, self.dtype)) x = tf.matmul(x, self.vars['weights']) x = tf.matmul(self.adj, x) outputs = self.act(x) return outputs
def __init__(self, input_dim, output_dim, adj, dropout=0., act=tf.nn.relu, dtype=tf.float32, **kwargs): super(GraphConvolution, self).__init__(**kwargs) with tf.variable_scope(self.name + '_vars'): self.vars['weights'] = weight_variable_glorot(input_dim, output_dim, dtype=dtype, name="weights") self.dropout = dropout self.adj = adj if type(self.adj) == tf.SparseTensor: # convert to dense if necessary self.adj = tf.sparse_tensor_to_dense(self.adj, validate_indices=False) self.act = act self.dtype=dtype
def ImageInput(input_pattern, num_threads, shape, using_ctc, reader=None): """Creates an input image tensor from the input_pattern filenames. TODO(rays) Expand for 2-d labels, 0-d labels, and logistic targets. Args: input_pattern: Filenames of the dataset(s) to read. num_threads: Number of preprocessing threads. shape: ImageShape with the desired shape of the input. using_ctc: Take the unpadded_class labels instead of padded. reader: Function that returns an actual reader to read Examples from input files. If None, uses tf.TFRecordReader(). Returns: images: Float Tensor containing the input image scaled to [-1.28, 1.27]. heights: Tensor int64 containing the heights of the images. widths: Tensor int64 containing the widths of the images. labels: Serialized SparseTensor containing the int64 labels. sparse_labels: Serialized SparseTensor containing the int64 labels. truths: Tensor string of the utf8 truth texts. Raises: ValueError: if the optimizer type is unrecognized. """ data_files = tf.gfile.Glob(input_pattern) assert data_files, 'no files found for dataset ' + input_pattern queue_capacity = shape.batch_size * num_threads * 2 filename_queue = tf.train.string_input_producer( data_files, capacity=queue_capacity) # Create a subgraph with its own reader (but sharing the # filename_queue) for each preprocessing thread. images_and_label_lists = [] for _ in range(num_threads): image, height, width, labels, text = _ReadExamples(filename_queue, shape, using_ctc, reader) images_and_label_lists.append([image, height, width, labels, text]) # Create a queue that produces the examples in batches. images, heights, widths, labels, truths = tf.train.batch_join( images_and_label_lists, batch_size=shape.batch_size, capacity=16 * shape.batch_size, dynamic_pad=True) # Deserialize back to sparse, because the batcher doesn't do sparse. labels = tf.deserialize_many_sparse(labels, tf.int64) sparse_labels = tf.cast(labels, tf.int32) labels = tf.sparse_tensor_to_dense(labels) labels = tf.reshape(labels, [shape.batch_size, -1], name='Labels') # Crush the other shapes to just the batch dimension. heights = tf.reshape(heights, [-1], name='Heights') widths = tf.reshape(widths, [-1], name='Widths') truths = tf.reshape(truths, [-1], name='Truths') # Give the images a nice name as well. images = tf.identity(images, name='Images') tf.image_summary('Images', images) return images, heights, widths, labels, sparse_labels, truths
def one_label_tensor( label ): indices = [] values = [] for i in range(self.num_ulab_batch): indices += [[ i, label ]] values += [ 1. ] _y_ulab = tf.sparse_tensor_to_dense( tf.SparseTensor( indices=indices, values=values, shape=[ self.num_ulab_batch, self.dim_y ] ), 0.0 ) return _y_ulab
def get_prediction(self,items,v_arr,M): """ items contains either a list of item or a single item for instance items = [0,1,2] or items = 0 when predicting multiple value for the same user, it is more efficient to use this function with a list of some values in items instead of calling this function multiple times since it prevent from computing the same V for each rating prediction """ V = createV(v_arr,M,self.m, self.K) V = tf.sparse_tensor_to_dense(V) den = 0 # There are two terms in the exponential that are common to each k Exp_c = self.h_bias for l in range(self.K): Exp_c = tf.add(Exp_c,tf.matmul(tf.reshape(V[:,l],(1,self.m)),self.weights[l,:,:])) if (type(items)==list) | (type(items)==np.ndarray) | (type(items)==range): R = [] if (type(items)!=range): items = list(map(int,items)) # it must be int and not np.int32 for item in items: Gamma = tf.exp(tf.mul(V[item,:],self.v_bias[item,:])) R_tmp = 0 for k in range(self.K): tmp = tf.add(tf.exp(-Exp_c),tf.exp(V[item,k]*self.weights[k,item,:])) tmp = tf.reduce_prod(tmp) tmp = tf.reduce_prod(tmp) tmp = Gamma[k]*tmp den = den + tmp R_tmp = R_tmp + (k+1)*tmp R_tmp = R_tmp/den R.extend([R_tmp]) elif type(items)==int: R = 0 Gamma = tf.exp(tf.mul(V[items,:],self.v_bias[items,:])) for k in range(self.K): tmp = tf.add(tf.exp(-Exp_c),tf.exp(V[items,k]*self.weights[k,items,:])) tmp = tf.reduce_prod(tmp) tmp = tf.reduce_prod(tmp) tmp = Gamma[k]*tmp den = den + tmp R = R + (k+1)*tmp R = R/den else: print('type error') return R
def test_one_hot(self): ref = np.asarray( [[[ 0., 1., 0., 0., 0.], [ 0., 0., 1., 0., 0.]], [[ 0., 0., 0., 1., 0.], [ 0., 0., 0., 0., 1.]]], dtype=np.float32) with self.test_session(): labels = tf.constant([[1, 2], [3, 4]]) #import pdb; pdb.set_trace() one_hot = tf.sparse_tensor_to_dense( labels_to_one_hot(labels, 5)).eval() self.assertAllEqual(one_hot, ref)
def testRandom(self): np.random.seed(1618) shapes = [(13,), (6, 8), (1, 7, 1)] for shape in shapes: for dtype in [np.int32, np.int64, np.float16, np.float32, np.float64]: a_np = np.random.randn(*shape).astype(dtype) b_np = np.random.randn(*shape).astype(dtype) sp_a, unused_a_nnz = _sparsify(a_np, thresh=-0.5) sp_b, unused_b_nnz = _sparsify(b_np, thresh=-0.5) with self.test_session(use_gpu=False): maximum_tf = tf.sparse_maximum(sp_a, sp_b) maximum_tf_densified = tf.sparse_tensor_to_dense(maximum_tf).eval() minimum_tf = tf.sparse_minimum(sp_a, sp_b) minimum_tf_densified = tf.sparse_tensor_to_dense(minimum_tf).eval() a_densified = tf.sparse_tensor_to_dense(sp_a).eval() b_densified = tf.sparse_tensor_to_dense(sp_b).eval() self.assertAllEqual(np.maximum(a_densified, b_densified), maximum_tf_densified) self.assertAllEqual(np.minimum(a_densified, b_densified), minimum_tf_densified)
def parser(self, record): keys_to_features = { 'labels': tf.FixedLenFeature([], tf.string), 'userIds': tf.VarLenFeature(tf.int64), 'itemIds': tf.VarLenFeature(tf.int64), 'user_profiles_indices': tf.FixedLenFeature([], tf.string), 'user_profiles_values': tf.VarLenFeature(tf.int64), 'user_profiles_weights': tf.VarLenFeature(tf.float32), 'user_profiles_shape': tf.FixedLenFeature([2], tf.int64), 'item_profiles_indices': tf.FixedLenFeature([], tf.string), 'item_profiles_values': tf.VarLenFeature(tf.int64), 'item_profiles_weights': tf.VarLenFeature(tf.float32), 'item_profiles_shape': tf.FixedLenFeature([2], tf.int64) } parsed = tf.parse_single_example(record, keys_to_features) labels = tf.reshape(tf.decode_raw(parsed['labels'], tf.float32), [-1, 1]) userIds = tf.sparse_tensor_to_dense(parsed['userIds']) itemIds = tf.sparse_tensor_to_dense(parsed['itemIds']) user_profiles_indices = tf.reshape(tf.decode_raw(parsed['user_profiles_indices'], tf.int64), [-1, 2]) user_profiles_values = tf.sparse_tensor_to_dense(parsed['user_profiles_values']) user_profiles_weights = tf.sparse_tensor_to_dense(parsed['user_profiles_weights']) user_profiles_shape = parsed['user_profiles_shape'] item_profiles_indices = tf.reshape(tf.decode_raw(parsed['item_profiles_indices'], tf.int64), [-1, 2]) item_profiles_values = tf.sparse_tensor_to_dense(parsed['item_profiles_values']) item_profiles_weights = tf.sparse_tensor_to_dense(parsed['item_profiles_weights']) item_profiles_shape = parsed['item_profiles_shape'] return labels, userIds, itemIds, \ user_profiles_indices, user_profiles_values, user_profiles_weights, user_profiles_shape, \ item_profiles_indices, item_profiles_values, item_profiles_weights, item_profiles_shape
def gen_train_loss(self, scores, predicted_rewards): # want to _maximize_ the discriminator's probability outputs # so _minimize_ the negative of the log of the outputs probs = tf.nn.sigmoid(scores) rewards = -tf.log(probs) rewards = tf.squeeze(rewards, squeeze_dims=[2]) # subtract baseline baseline_loss = tf.constant(0.) if self.opts.with_baseline: baseline_subtracted_rewards = rewards - predicted_rewards baseline_loss = tf.reduce_mean(baseline_subtracted_rewards ** 2) # policy gradient loss is, for each reward, that reward # times the probability of the action that resulted in that reward # not sure how to incorporate a baseline, might be able to just do it here # by subtracting from the reward value batch_size = self.opts.batch_size sequence_length = self.opts.sequence_length vocab_dim = self.dataset.vocab_dim total_loss = tf.constant(0.) for timestep in range(sequence_length): # get the probabilities for this timestep probs = self.timestep_probs[:, timestep, :] # create indices into sparse tensor for this timestep # where for each row, the first column is just the row number # and the second column is the index of the selected word during sampling # basically, this is the numpy arange indexing with a matrix trick word_idxs = tf.expand_dims(self.generated[:, timestep], 1) range_idxs = tf.to_int64(tf.expand_dims(tf.range(batch_size), 1)) indices = tf.concat(1, (range_idxs, word_idxs)) choosen_word_indicators = tf.ones((batch_size,)) choosen_word_probs = tf.SparseTensor(indices, choosen_word_indicators, probs.get_shape()) choosen_word_probs = tf.sparse_tensor_to_dense(choosen_word_probs) choosen_word_probs = tf.reduce_sum(tf.mul(choosen_word_probs, probs), reduction_indices=1) # get the rewards for this timestep if self.opts.with_baseline: timestep_rewards = baseline_subtracted_rewards[:, timestep] else: timestep_rewards = rewards[:, timestep] # compute loss this timestep timestep_loss = tf.mul(timestep_rewards, choosen_word_probs) total_loss += timestep_loss return total_loss, baseline_loss
def test_hashed_output_v1_has_collision(self): """Tests the old version of the fingerprint concatenation has collisions. """ # The last 10 bits of 359 and 1024+359 are identical. # As a result, all the crosses collide. t1 = tf.constant([[359], [359 + 1024]]) t2 = tf.constant([list(range(10)), list(range(10))]) cross = tf.contrib.layers.sparse_feature_cross( [t2, t1], hashed_output=True, num_buckets=1024) cross_dense = tf.sparse_tensor_to_dense(cross) with tf.Session(): values = cross_dense.eval() self.assertTrue(numpy.equal(values[0], values[1]).all())
def test_hashed_output_v2_has_no_collision(self): """Tests the new version of the fingerprint concatenation has no collisions. """ # Although the last 10 bits of 359 and 1024+359 are identical. # As a result, all the crosses shouldn't collide. t1 = tf.constant([[359], [359 + 1024]]) t2 = tf.constant([list(range(10)), list(range(10))]) cross = tf.contrib.layers.sparse_feature_cross( [t2, t1], hashed_output=True, num_buckets=1024, hash_key=tf.contrib.layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY) cross_dense = tf.sparse_tensor_to_dense(cross) with tf.Session(): values = cross_dense.eval() self.assertTrue(numpy.not_equal(values[0], values[1]).all())
def parser(self, record): keys_to_features = { 'fm_feat_indices': tf.FixedLenFeature([], tf.string), 'fm_feat_values': tf.VarLenFeature(tf.float32), 'fm_feat_shape': tf.FixedLenFeature([2], tf.int64), 'labels': tf.FixedLenFeature([], tf.string), 'dnn_feat_indices': tf.FixedLenFeature([], tf.string), 'dnn_feat_values': tf.VarLenFeature(tf.int64), 'dnn_feat_weights': tf.VarLenFeature(tf.float32), 'dnn_feat_shape': tf.FixedLenFeature([2], tf.int64), } parsed = tf.parse_single_example(record, keys_to_features) fm_feat_indices = tf.reshape(tf.decode_raw(parsed['fm_feat_indices'], tf.int64), [-1, 2]) fm_feat_values = tf.sparse_tensor_to_dense(parsed['fm_feat_values']) fm_feat_shape = parsed['fm_feat_shape'] labels = tf.reshape(tf.decode_raw(parsed['labels'], tf.float32), [-1, 1]) dnn_feat_indices = tf.reshape(tf.decode_raw(parsed['dnn_feat_indices'], tf.int64), [-1, 2]) dnn_feat_values = tf.sparse_tensor_to_dense(parsed['dnn_feat_values']) dnn_feat_weights = tf.sparse_tensor_to_dense(parsed['dnn_feat_weights']) dnn_feat_shape = parsed['dnn_feat_shape'] return fm_feat_indices, fm_feat_values, \ fm_feat_shape, labels, dnn_feat_indices, \ dnn_feat_values, dnn_feat_weights, dnn_feat_shape
def testTranspose(self): with self.test_session(use_gpu=False) as sess: np.random.seed(1618) shapes = [np.random.randint(1, 10, size=rank) for rank in range(1, 6)] for shape in shapes: for dtype in [np.int32, np.int64, np.float32, np.float64]: dn_input = np.random.randn(*shape).astype(dtype) rank = tf.rank(dn_input).eval() perm = np.random.choice(rank, rank, False) sp_input, unused_a_nnz = _sparsify(dn_input) sp_trans = tf.sparse_transpose(sp_input, perm=perm) dn_trans = tf.sparse_tensor_to_dense(sp_trans).eval() expected_trans = tf.transpose(dn_input, perm=perm).eval() self.assertAllEqual(dn_trans, expected_trans)
def _make_train_op(self): # apply dropout if necessary if self.dropout is not None: self._X_batch = tf.nn.dropout(self._X_batch, keep_prob=self._dropout) # Run Gibbs chain for specified number of steps. with tf.name_scope('gibbs_chain'): h0_means = self._means_h_given_v(self._X_batch) h0_samples = self._sample_h_given_v(h0_means) h_states = h0_samples if self.sample_h_states else h0_means v_states, v_means, _, h_means = self._make_gibbs_chain(h_states) # visualize hidden activation means if self.display_hidden_activations: with tf.name_scope('hidden_activations_visualization'): h_means_display = h_means[:, :self.display_hidden_activations] h_means_display = tf.cast(h_means_display, tf.float32) h_means_display = tf.expand_dims(h_means_display, 0) h_means_display = tf.expand_dims(h_means_display, -1) tf.summary.image('hidden_activation_means', h_means_display) # encoded data, used by the transform method with tf.name_scope('transform'): transform_op = tf.identity(h_means) tf.add_to_collection('transform_op', transform_op) # compute gradients estimates (= positive - negative associations) with tf.name_scope('grads_estimates'): # number of training examples might not be divisible by batch size N = tf.cast(tf.shape(self._X_batch)[0], dtype=self._tf_dtype) # the following are equation (31-33) in the tex pdf file: CD-k algorithm with tf.variable_scope('dW'): # outer product of two horizontal vectors dW_positive = tf.matmul(self._X_batch, h0_means, transpose_a=True) dW_negative = tf.matmul(v_states, h_means, transpose_a=True) # update step of equation (31), with l2 regularization. # The self._l2 is the "weight decay" parameter, which is just a fancy way # of stating lambda in l2 regularization: lambda*||W||_2^2 # Shape of W: n_visible x n_hidden; N is the batch size, default is 10. # Multiplication of self._X_batch and h0_means will generate a sum, so # division by N gives the batch mean of dW. #dW = (dW_positive - dW_negative) / N - self._l2 * self._W # original update step #dW_temp = self._dW_temp #dW1_temp = self._dW1_temp #dW0_temp = self._dW0_temp dW = (dW_positive - dW_negative) / N - self._l2 * self._W with tf.name_scope('filter_update'): temp_ind_1 = self.gather_indices(self._ind_filter, 4) temp_values = tf.gather_nd(dW, temp_ind_1) # tf.scatter_nd_update(ref, ind, a) takes a column vector a and assign to ref according to indexes "ind" in ref #dW1_temp = tf.scatter_nd_update(self._dW1_temp, temp_ind_1, temp_values) dW1_temp = tf.scatter_nd(temp_ind_1, temp_values, tf.shape(dW)) with tf.name_scope('center_update'): temp_ind_2 = self.gather_indices(self._ind_center, 1) temp_values_2 = tf.gather_nd(dW, temp_ind_2) #dW0_temp = tf.scatter_nd_update(self._dW0_temp, temp_ind_2, temp_values_2) dW0_temp = tf.scatter_nd(temp_ind_2, temp_values_2, tf.shape(dW)) # take average over the filters, accounting for rotational and translational symmetry with tf.name_scope('cal_mean'): #dw1 = tf.reduce_mean(self._dW1_temp) #dw0 = tf.reduce_mean(self._dW0_temp) dw1 = tf.reduce_mean(dW1_temp) * self.n_visible / 4 dw0 = tf.reduce_mean(dW0_temp) * self.n_visible #tf.scatter_nd_update(self._dW_temp, temp_ind_1, dw1*tf.ones(temp_ind_1.shape[0])) # assign non-zero values to a zero matrix #tf.scatter_nd_update(self._dW_temp, temp_ind_2, dw0*tf.ones(temp_ind_2.shape[0])) dW = tf.scatter_nd(temp_ind_1, dw1*tf.ones(tf.shape(temp_ind_1)[0], dtype=dw1.dtype), tf.shape(self._dW_temp)) + \ tf.scatter_nd(temp_ind_2, dw0*tf.ones(tf.shape(temp_ind_2)[0], dtype=dw1.dtype), tf.shape(self._dW_temp)) with tf.name_scope('dvb'): dvb = tf.reduce_mean(self._X_batch - v_states, axis=0) # == sum / N with tf.name_scope('dhb'): dhb = tf.reduce_mean(h0_means - h_means, axis=0) # == sum / N # apply sparsity targets if needed with tf.name_scope('sparsity_targets'): q_means = tf.reduce_sum(h_means, axis=0) q_update = self._q_means.assign(self._sparsity_damping * self._q_means + \ (1 - self._sparsity_damping) * q_means) sparsity_penalty = self._sparsity_cost * (q_update - self._sparsity_target) #dhb -= sparsity_penalty #dW -= sparsity_penalty # update parameters with tf.name_scope('momentum_updates'): with tf.name_scope('dW_update'): # momentun method dW_update = self._dW.assign(self._learning_rate * (self._momentum * self._dW + dW)) W_update = self._W.assign_add(dW_update) with tf.name_scope('dvb_update'): dvb_update = self._dvb.assign( self._learning_rate * (self._momentum * self._dvb + dvb)) #vb_update = self._vb.assign_add(dvb_update) with tf.name_scope('dhb_update'): dhb_update = self._dhb.assign( self._learning_rate * (self._momentum * self._dhb + dhb)) #hb_update = self._hb.assign_add(dhb_update) # assemble train_op with tf.name_scope('training_step'): #train_op = tf.group(W_update, vb_update, hb_update) train_op = tf.group(W_update) tf.add_to_collection('train_op', train_op) # compute metrics with tf.name_scope('L2_loss'): l2_loss = self._l2 * tf.nn.l2_loss(self._W) tf.add_to_collection('l2_loss', l2_loss) with tf.name_scope('mean_squared_recon_error'): msre = tf.reduce_mean(tf.square(self._X_batch - v_means)) tf.add_to_collection('msre', msre) # Since reconstruction error is fairly poor measure of performance, # as this is not what CD-k learning algorithm aims to minimize [2], # compute (per sample average) pseudo-loglikelihood (proxy to likelihood) # instead, which not only is much more cheaper to compute, but also # learning with PLL is asymptotically consistent [1]. # More specifically, PLL computed using approximation as in [3]. with tf.name_scope('pseudo_loglik'): x = self._X_batch # randomly corrupt one feature in each sample x_ = tf.identity(x) batch_size = tf.shape(x)[0] pll_rand = tf.random_uniform([batch_size], minval=0, maxval=self._n_visible, dtype=tf.int32) ind = tf.transpose([tf.range(batch_size), pll_rand]) #m = tf.SparseTensor(indices=tf.to_int64(ind), # values=tf.ones_like(pll_rand, dtype=self._tf_dtype), # dense_shape=tf.to_int64(tf.shape(x_))) m = tf.SparseTensor(indices=tf.cast(ind, dtype='int64'), values=tf.ones_like(pll_rand, dtype=self._tf_dtype), dense_shape=tf.cast(tf.shape(x_), dtype='int64')) x_ = tf.multiply(x_, -tf.sparse_tensor_to_dense(m, default_value=-1)) x_ = tf.sparse_add(x_, m) x_ = tf.identity(x_, name='x_corrupted') pll = tf.cast(self._n_visible, dtype=self._tf_dtype) *\ tf.log_sigmoid(self._free_energy(x_)-self._free_energy(x)) tf.add_to_collection('pll', pll) # add also free energy of input batch to collection (for feg) free_energy_op = self._free_energy(self._X_batch) tf.add_to_collection('free_energy_op', free_energy_op) # collect summaries if self.metrics_config['l2_loss']: tf.summary.scalar(self._metrics_names_map['l2_loss'], l2_loss) if self.metrics_config['msre']: tf.summary.scalar(self._metrics_names_map['msre'], msre) if self.metrics_config['pll']: tf.summary.scalar(self._metrics_names_map['pll'], pll)
def buildModel(self, inputShape): #Running on GPU with tf.device(self.device): self.defineVars() with tf.name_scope("inputOps"): #self.inputImage = node_variable([self.batchSize, inputShape[0], inputShape[1], inputShape[2], inputShape[3]], "inputImage") #self.gt = node_variable([self.batchSize, 1, 8, 16, self.numClasses], "gt") self.inputImage = node_variable((self.batchSize,)+inputShape, "inputImage") #We split the time dimension to stereo and concatenate with feature dim if(self.stereo): numTime = inputShape[0]/2 self.reshapeImage = tf.reshape(self.inputImage, [self.batchSize, numTime, 2, inputShape[1], inputShape[2], inputShape[3]]) self.permuteImage = tf.transpose(self.reshapeImage, [0, 1, 3, 4, 5, 2]) imageShape = [self.batchSize, numTime, inputShape[1], inputShape[2], inputShape[3]*2] self.image = tf.reshape(self.permuteImage, imageShape) else: imageShape = [self.batchSize, inputShape[0], inputShape[1], inputShape[2], inputShape[3]] self.image = tf.reshape(self.inputImage, imageShape) if(self.augment): #Add noise to image #Image has mean 0, std 1 randMean = tf.random_uniform([], minval=-self.augMean, maxval=self.augMean) self.augNoise = tf.random_normal(imageShape, mean=randMean, stddev=self.augStd) #Placeholder for augmentation self.doAug = tf.placeholder("float32", [], "doAug") self.image = self.image + (self.augNoise * self.doAug) self.padInput = tf.pad(self.image, [[0, 0], [0, 0], [7, 7], [15, 15], [0, 0]]) if(self.gtSparse): self.gtIndices = tf.placeholder("int64", [2, None], "gtIndices") self.gtValues = node_variable([None], "gtValues") self.pre_gt = tf.sparse_tensor_to_dense(tf.SparseTensor( tf.transpose(self.gtIndices, [1, 0]), self.gtValues, [self.batchSize*self.gtShape[0], self.gtShape[1]*self.gtShape[2]*self.numClasses]), validate_indices=False ) self.gt = tf.reshape(self.pre_gt, [self.batchSize, self.gtShape[0], self.gtShape[1], self.gtShape[2], self.numClasses]) else: self.gt=tf.placeholder("float32", [self.batchSize, self.gtShape[0], self.gtShape[1], self.gtShape[2], self.numClasses]) self.select_gt = tf.squeeze(self.gt[:, :, :, :, :], squeeze_dims=[1]) #self.norm_gt = self.gt/tf.reduce_sum(self.gt, reduction_indices=4, keep_dims=True) with tf.name_scope("Hidden"): if(self.time): self.h_hidden= tf.nn.relu(tf.nn.conv3d(self.padInput, self.h_weight, [1, 1, 4, 4, 1], padding="VALID") + self.h_bias) self.timePooled = tf.reduce_max(self.h_hidden, reduction_indices=1) else: self.squeezeInput = tf.squeeze(self.padInput, axis=1) self.squeezeWeight = tf.squeeze(self.h_weight, axis=0) self.h_hidden = tf.nn.relu(tf.nn.conv2d(self.squeezeInput, self.squeezeWeight, [1, 4, 4, 1], padding="VALID") + self.h_bias) self.timePooled = self.h_hidden with tf.name_scope("Pool"): yPool = int(np.ceil(float(16)/self.gtShape[1])) xPool = int(np.ceil(float(64)/self.gtShape[2])) self.inputPooled = tf.nn.max_pool(self.timePooled, ksize=[1, yPool, xPool, 1], strides=[1, yPool, xPool, 1], padding="SAME") self.camPooled = tf.nn.max_pool(self.timePooled , ksize=[1, yPool, xPool, 1], strides=[1, 1, 1, 1], padding="SAME") self.h_conv = tf.nn.conv2d(self.inputPooled, self.class_weight, [1, 1, 1, 1], padding="VALID") + self.class_bias self.cam = tf.nn.conv2d(self.camPooled, self.class_weight, [1, 1, 1, 1], padding="VALID") + self.class_bias #Reshape batch and time together #self.reshape_cam = tf.transpose(tf.reshape(self.cam, [self.batchSize*7, 16, 32, 31]), [0, 3, 1, 2]) self.reshape_cam = tf.transpose(self.cam, [0, 3, 1, 2]) #Get ranking from h_conv self.classRank = tf.reduce_mean(self.reshape_cam, reduction_indices=[2, 3]) self.est = pixelSoftmax(self.h_conv) #self.est = self.h_conv with tf.name_scope("Loss"): self.flat_gt = tf.reshape(self.select_gt, [-1, self.numClasses]) self.flat_est = tf.reshape(self.est, [-1, self.numClasses]) gtClass = tf.argmax(self.flat_gt, 1) estClass = tf.argmax(self.flat_est, 1) correct = tf.equal(gtClass, estClass) self.accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) self.classF1 = [] for c in range(self.numClasses): classGT = tf.equal(gtClass, c) classEst = tf.equal(estClass, c) classTP = tf.reduce_sum(tf.cast(tf.logical_and(classGT, classEst), tf.float32)) classFP = tf.reduce_sum(tf.cast(tf.logical_and(tf.logical_not(classGT), classEst), tf.float32)) classFN = tf.reduce_sum(tf.cast(tf.logical_and(classGT, tf.logical_not(classEst)), tf.float32)) precision = classTP/(classTP+classFP+self.epsilon) recall = classTP/(classTP+classFN+self.epsilon) self.classF1.append((2*precision*recall)/(precision+recall+self.epsilon)) self.weightRegLoss = tf.reduce_sum(tf.square(self.h_weight)) + tf.reduce_sum(tf.square(self.class_weight)) self.loss = tf.reduce_mean(-tf.reduce_sum(self.lossWeight[0:self.numClasses] * self.select_gt* tf.log(self.est+self.epsilon), reduction_indices=3)) + self.regWeight * self.weightRegLoss #self.loss = 0.5 * tf.reduce_mean(tf.reduce_sum(tf.square(self.gt - self.est), reduction_indices=[1, 2, 3, 4])) with tf.name_scope("Opt"): self.optimizerAll = tf.train.AdamOptimizer(self.learningRate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon).minimize(self.loss, var_list=[ self.h_weight, #self.beta, #self.gamma, self.class_weight, ] ) self.optimizerBias = tf.train.GradientDescentOptimizer(self.learningRateBias).minimize(self.loss, var_list=[ self.h_bias, self.class_bias, ] ) self.optimizerPre = tf.train.AdamOptimizer(self.learningRate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon).minimize(self.loss, var_list=[ self.class_weight, ] ) self.optimizerPreBias = tf.train.GradientDescentOptimizer(self.learningRateBias).minimize(self.loss, var_list=[ self.class_bias, ] ) numK = min(5, self.numClasses) (self.eval_vals, self.eval_idx) = tf.nn.top_k(self.classRank, k=numK) #Summaries tf.summary.scalar('loss', self.loss) tf.summary.scalar('accuracy', self.accuracy) for c in range(self.numClasses): className = self.idxToName[c] tf.summary.scalar(className+' F1', self.classF1[c]) tf.summary.histogram('input', self.inputImage) tf.summary.histogram('inputPooled', self.inputPooled) tf.summary.histogram('gt', self.select_gt) if(self.augment): tf.summary.histogram('augNoise', self.augNoise) #Conv layer histograms tf.summary.histogram('h_conv', self.h_conv) tf.summary.histogram('h_hidden', self.h_hidden) tf.summary.histogram('h_norm_hidden', self.h_hidden) tf.summary.histogram('est', self.est) #Weight and bias hists tf.summary.histogram('h_weight', self.h_weight) tf.summary.histogram('h_bias', self.h_bias) tf.summary.histogram('class_weight', self.class_weight) tf.summary.histogram('class_bias', self.class_bias)
def train(config, init_checkpoint): if hasattr(config.train, 'random_seed'): np.random.seed(config.train.random_seed) tf.set_random_seed(config.train.random_seed) random.seed(config.train.random_seed) if hasattr(config.train.execution, 'CUDA_VISIBLE_DEVICES'): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = config.train.execution.CUDA_VISIBLE_DEVICES CTCUtils.vocab = config.vocab CTCUtils.r_vocab = config.r_vocab input_train_data = InputData(batch_size=config.train.batch_size, input_shape=config.input_shape, file_list_path=config.train.file_list_path, apply_basic_aug=config.train.apply_basic_aug, apply_stn_aug=config.train.apply_stn_aug, apply_blur_aug=config.train.apply_blur_aug) graph = tf.Graph() with graph.as_default(): global_step = tf.Variable(0, name='global_step', trainable=False) input_data, input_labels = input_train_data.input_fn() prob = inference(config.rnn_cells_num, input_data, config.num_classes) prob = tf.transpose(prob, (1, 0, 2)) # prepare for CTC data_length = tf.fill([tf.shape(prob)[1]], tf.shape(prob)[0]) # input seq length, batch size ctc = tf.py_func(CTCUtils.compute_ctc_from_labels, [input_labels], [tf.int64, tf.int64, tf.int64]) ctc_labels = tf.to_int32(tf.SparseTensor(ctc[0], ctc[1], ctc[2])) predictions = tf.to_int32( tf.nn.ctc_beam_search_decoder(prob, data_length, merge_repeated=False, beam_width=10)[0][0]) tf.sparse_tensor_to_dense(predictions, default_value=-1, name='d_predictions') tf.reduce_mean(tf.edit_distance(predictions, ctc_labels, normalize=False), name='error_rate') loss = tf.reduce_mean( tf.nn.ctc_loss(inputs=prob, labels=ctc_labels, sequence_length=data_length, ctc_merge_repeated=True), name='loss') learning_rate = tf.train.piecewise_constant(global_step, [150000, 200000], [config.train.learning_rate, 0.1 * config.train.learning_rate, 0.01 * config.train.learning_rate]) opt_loss = tf.contrib.layers.optimize_loss(loss, global_step, learning_rate, config.train.opt_type, config.train.grad_noise_scale, name='train_step') tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=1000, write_version=tf.train.SaverDef.V2, save_relative_paths=True) conf = tf.ConfigProto() if hasattr(config.train.execution, 'per_process_gpu_memory_fraction'): conf.gpu_options.per_process_gpu_memory_fraction = config.train.execution.per_process_gpu_memory_fraction if hasattr(config.train.execution, 'allow_growth'): conf.gpu_options.allow_growth = config.train.execution.allow_growth session = tf.Session(graph=graph, config=conf) coordinator = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=session, coord=coordinator) session.run('init') if init_checkpoint: tf.logging.info('Initialize from: ' + init_checkpoint) saver.restore(session, init_checkpoint) else: lastest_checkpoint = tf.train.latest_checkpoint(config.model_dir) if lastest_checkpoint: tf.logging.info('Restore from: ' + lastest_checkpoint) saver.restore(session, lastest_checkpoint) writer = None if config.train.need_to_save_log: writer = tf.summary.FileWriter(config.model_dir, session.graph) graph.finalize() mean_accuracy, mean_accuracy_minus_1 = 0 ,0 num = 0 for i in range(config.train.steps): curr_step, curr_learning_rate, curr_loss, curr_opt_loss = session.run([global_step, learning_rate, loss, opt_loss]) if i % config.train.display_iter == 0: if config.train.need_to_save_log: writer.add_summary(tf.Summary(value=[tf.Summary.Value(tag='train/loss', simple_value=float(curr_loss)), tf.Summary.Value(tag='train/learning_rate', simple_value=float(curr_learning_rate)), tf.Summary.Value(tag='train/optimization_loss', simple_value=float(curr_opt_loss)) ]), curr_step) writer.flush() tf.logging.info('Iteration: ' + str(curr_step) + ', Train loss: ' + str(curr_loss)) if ((curr_step % config.train.save_checkpoints_steps == 0 or curr_step == config.train.steps) and config.train.need_to_save_weights): saver.save(session, config.model_dir + '/model.ckpt-{:d}.ckpt'.format(curr_step)) coordinator.request_stop() coordinator.join(threads) session.close()
} ) label = features['label'] # casting so we can multiply with dot product label = tf.cast(label, tf.float32) index = features['index'] sparse_features = features['value'] # dense_feature = tf.sparse_to_dense(tf.sparse_tensor_to_dense(index), # [num_features,], # tf.sparse_tensor_to_dense(sparse_features)) # sparse gathered_w = tf.gather(w, tf.sparse_tensor_to_dense(index)) gathered_features = sparse_features.values dot = tf.reduce_sum(tf.multiply(gathered_w, tf.transpose(gathered_features))) # dots.append(dot) local_gradient = label * (tf.sigmoid(label * dot) - 1) * gathered_features # print("local grad::: ", local_gradient) local_gradient = tf.Print(local_gradient, [local_gradient], "local gradient") # -- sparse with tf.device("/job:worker/task:0"): print "updating value of w" # Sparse value assign_op = w.assign_sub((tf.sparse_to_dense(sparse_indices=tf.sparse_tensor_to_dense(index), output_shape=[num_features,], sparse_values=tf.reshape(local_gradient, [-1])))*eta) # -- sparse assign_op = tf.Print(assign_op, [assign_op], "new value of w")
def refine_detections_graph(rois, probs, deltas, window, config): '''Refine classified proposals and filter overlaps and return final detections. Inputs: rois: [N, (y1, x1, y2, x2)] in normalized coordinates probs: [N, num_classes]. Class probabilities. deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific bounding box deltas. window: (y1, x1, y2, x2) in normalized coordinates. The part of the image that contains the image excluding the padding. Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where coordinates are normalized. ''' # Class IDs per ROI class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) # Class probability of the top class of each ROI indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) class_scores = tf.gather_nd(probs, indices) # Class-specific bounding box deltas deltas_specific = tf.gather_nd(deltas, indices) # Apply bounding box deltas # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates refined_rois = apply_box_deltas_graph( rois, deltas_specific * config.BBOX_STD_DEV) # Clip boxes to image window refined_rois = clip_boxes_graph(refined_rois, window) # TODO: Filter out boxes with zero area # Filter out background boxes keep = tf.where(class_ids > 0)[:, 0] # Filter out low confidence boxes if config.DETECTION_MIN_CONFIDENCE: conf_keep = tf.where( class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Apply per-class NMS # 1. Prepare variables pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_scores = tf.gather(class_scores, keep) pre_nms_rois = tf.gather(refined_rois, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): '''Apply Non-Maximum Suppression on ROIs of the given class.''' # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=config.DETECTION_MAX_INSTANCES, iou_threshold=config.DETECTION_NMS_THRESHOLD) # Map indices class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) # Pad with -1 so returned tensors have the same shape gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # Set shape so map_fn() can infer result shape class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) return class_keep # 2. Map over class IDs nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int64) # 3. Merge results into one list, and remove -1 padding nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = config.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(class_scores, keep) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Coordinates are normalized. detections = tf.concat([ tf.gather(refined_rois, keep), tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], tf.gather(class_scores, keep)[..., tf.newaxis] ], axis=1) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], 'CONSTANT') return detections
def buildModel(self, inputShape): #Running on GPU with tf.device(self.device): self.defineVars() with tf.name_scope("inputOps"): #self.inputImage = node_variable([self.batchSize, inputShape[0], inputShape[1], inputShape[2], inputShape[3]], "inputImage") #self.gt = node_variable([self.batchSize, 1, 8, 16, self.numClasses], "gt") self.inputImage = node_variable( (self.batchSize, ) + inputShape, "inputImage") if (self.stereo): #We split the time dimension to stereo and concatenate with feature dim numTime = inputShape[0] / 2 self.reshapeImage = tf.reshape(self.inputImage, [ self.batchSize, numTime, 2, inputShape[1], inputShape[2], inputShape[3] ]) self.permuteImage = tf.transpose(self.reshapeImage, [0, 1, 3, 4, 5, 2]) self.image = tf.reshape(self.permuteImage, [ self.batchSize, numTime, inputShape[1], inputShape[2], inputShape[3] * 2 ]) else: self.image = tf.reshape(self.inputImage, [ self.batchSize, inputShape[0], inputShape[1], inputShape[2], inputShape[3] ]) self.padInput = tf.pad( self.image, [[0, 0], [0, 0], [7, 7], [15, 15], [0, 0]]) #Reshape time dimension to feature dimension if (self.gtSparse): self.gtIndices = tf.placeholder("int64", [2, None], "gtIndices") self.gtValues = node_variable([None], "gtValues") self.pre_gt = tf.sparse_tensor_to_dense( tf.SparseTensor(tf.transpose(self.gtIndices, [1, 0]), self.gtValues, [ self.batchSize * self.gtShape[0], self.gtShape[1] * self.gtShape[2] * self.numClasses ]), validate_indices=False) self.gt = tf.reshape(self.pre_gt, [ self.batchSize, self.gtShape[0], self.gtShape[1], self.gtShape[2], self.numClasses ]) else: self.gt = tf.placeholder("float32", [ self.batchSize, self.gtShape[0], self.gtShape[1], self.gtShape[2], self.numClasses ]) self.select_gt = tf.squeeze(self.gt[:, :, :, :, 0:self.numClasses], squeeze_dims=[1]) #self.norm_gt = self.gt/tf.reduce_sum(self.gt, reduction_indices=4, keep_dims=True) with tf.name_scope("Hidden"): if (self.time): self.h_hidden = tf.nn.relu( tf.nn.conv3d(self.padInput, self.h_weight, [1, 1, 4, 4, 1], padding="VALID") + self.h_bias) self.timePooled = tf.reduce_max(self.h_hidden, reduction_indices=1) else: self.squeezeInput = tf.squeeze(self.padInput, axis=1) self.squeezeWeight = tf.squeeze(self.h_weight, axis=0) self.h_hidden = tf.nn.relu( tf.nn.conv2d(self.squeezeInput, self.squeezeWeight, [1, 4, 4, 1], padding="VALID") + self.h_bias) self.timePooled = self.h_hidden with tf.name_scope("conv1"): yPool = 2 xPool = 2 self.hiddenPooled = tf.nn.max_pool( self.timePooled, ksize=[1, yPool, xPool, 1], strides=[1, yPool, xPool, 1], padding="SAME") self.h_res = tf.nn.relu( tf.nn.conv2d(self.hiddenPooled, self.conv1_w, [1, 1, 1, 1], padding="SAME") + self.conv1_b) self.h_conv1 = self.hiddenPooled + self.h_res with tf.name_scope("reg1"): self.keep_prob = tf.placeholder(tf.float32) self.h_dropout1 = tf.nn.dropout(self.h_conv1, self.keep_prob) with tf.name_scope("conv2"): yPool = 2 xPool = 2 #Pool over spatial dimensions to be 2x2 self.h_conv1_pool = tf.nn.max_pool( self.h_dropout1, ksize=[1, yPool, xPool, 1], strides=[1, yPool, xPool, 1], padding="SAME") self.h_res2 = tf.nn.relu( tf.nn.conv2d(self.h_conv1_pool, self.conv2_w, [1, 1, 1, 1], padding="SAME") + self.conv2_b) self.h_conv2 = self.h_conv1_pool + self.h_res2 with tf.name_scope("reg2"): self.h_dropout2 = tf.nn.dropout(self.h_conv2, self.keep_prob) with tf.name_scope("conv3"): yPool = int(np.ceil(float(16) / (self.gtShape[1] * 4))) xPool = int(np.ceil(float(64) / (self.gtShape[2] * 4))) self.h_conv2_pool = tf.nn.max_pool( self.h_dropout2, ksize=[1, yPool, xPool, 1], strides=[1, yPool, xPool, 1], padding="SAME") self.camPooled = tf.nn.max_pool(self.h_dropout2, ksize=[1, yPool, xPool, 1], strides=[1, 1, 1, 1], padding="SAME") self.h_conv3 = tf.nn.conv2d(self.h_conv2_pool, self.class_weight, [1, 1, 1, 1], padding="SAME") + self.class_bias #We evaluate pooling with smaller stride here self.cam = tf.nn.conv2d(self.camPooled, self.class_weight, [1, 1, 1, 1], padding="SAME") + self.class_bias #Reshape batch and time together #self.reshape_cam = tf.transpose(tf.reshape(self.cam, [self.batchSize*7, 16, 32, 31]), [0, 3, 1, 2]) self.reshape_cam = tf.transpose(self.cam, [0, 3, 1, 2]) #Get ranking from h_conv self.classRank = tf.reduce_mean(self.reshape_cam, reduction_indices=[2, 3]) self.est = pixelSoftmax(self.h_conv3) #self.est = self.h_conv with tf.name_scope("Loss"): self.flat_gt = tf.reshape(self.select_gt, [-1, self.numClasses]) self.flat_est = tf.reshape(self.est, [-1, self.numClasses]) gtClass = tf.argmax(self.flat_gt, 1) estClass = tf.argmax(self.flat_est, 1) correct = tf.equal(gtClass, estClass) self.accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) self.classF1 = [] for c in range(self.numClasses): classGT = tf.equal(gtClass, c) classEst = tf.equal(estClass, c) classTP = tf.reduce_sum( tf.cast(tf.logical_and(classGT, classEst), tf.float32)) classFP = tf.reduce_sum( tf.cast( tf.logical_and(tf.logical_not(classGT), classEst), tf.float32)) classFN = tf.reduce_sum( tf.cast( tf.logical_and(classGT, tf.logical_not(classEst)), tf.float32)) precision = classTP / (classTP + classFP + self.epsilon) recall = classTP / (classTP + classFN + self.epsilon) self.classF1.append((2 * precision * recall) / (precision + recall + self.epsilon)) self.weightRegLoss = tf.reduce_sum( tf.square(self.class_weight)) + tf.reduce_sum( tf.square(self.conv2_w)) + tf.reduce_sum( tf.square(self.conv1_w)) + tf.reduce_sum( tf.square(self.h_weight)) self.loss = tf.reduce_mean(-tf.reduce_sum( self.lossWeight[0:self.numClasses] * self.select_gt * tf.log(self.est + self.epsilon), reduction_indices=3)) + self.regWeight * self.weightRegLoss #self.loss = 0.5 * tf.reduce_mean(tf.reduce_sum(tf.square(self.gt - self.est), reduction_indices=[1, 2, 3, 4])) with tf.name_scope("Opt"): self.optimizerAll = tf.train.AdamOptimizer( self.learningRate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon).minimize(self.loss, var_list=[ self.h_weight, self.conv1_w, self.conv2_w, self.class_weight ]) self.optimizerBias = tf.train.GradientDescentOptimizer( self.learningRateBias).minimize(self.loss, var_list=[ self.h_bias, self.conv1_b, self.conv2_b, self.class_bias ]) self.optimizerPre = tf.train.AdamOptimizer( self.learningRate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon).minimize( self.loss, var_list=[ #self.h_weight, self.conv1_w, self.conv2_w, self.class_weight ]) self.optimizerPreBias = tf.train.GradientDescentOptimizer( self.learningRateBias).minimize( self.loss, var_list=[ #self.h_bias, self.conv1_b, self.conv2_b, self.class_bias ]) numK = min(5, self.numClasses) (self.eval_vals, self.eval_idx) = tf.nn.top_k(self.classRank, k=numK) #Summaries tf.summary.scalar('loss', self.loss) tf.summary.scalar('accuracy', self.accuracy) for c in range(self.numClasses): className = self.idxToName[c] tf.summary.scalar(className + ' F1', self.classF1[c]) tf.summary.histogram('input', self.inputImage) tf.summary.histogram('hiddenPooled', self.hiddenPooled) tf.summary.histogram('gt', self.select_gt) #Conv layer histograms tf.summary.histogram('h_hidden', self.h_hidden) tf.summary.histogram('h_conv1', self.h_conv1) tf.summary.histogram('h_conv2', self.h_conv2) tf.summary.histogram('h_conv3', self.h_conv3) tf.summary.histogram('est', self.est) #Weight and bias hists tf.summary.histogram('h_weight', self.h_weight) tf.summary.histogram('h_bias', self.h_bias) tf.summary.histogram('conv1_w', self.conv1_w) tf.summary.histogram('conv1_b', self.conv1_b) tf.summary.histogram('conv2_w', self.conv2_w) tf.summary.histogram('conv2_b', self.conv2_b) tf.summary.histogram('class_weight', self.class_weight) tf.summary.histogram('class_bias', self.class_bias)
def sparse_to_tensor(self, sparse_tensor, dtype=tf.int32, axis=[1]): return tf.squeeze(tf.cast(tf.sparse_tensor_to_dense(sparse_tensor), dtype), axis=axis)
def test_ctc_data_transform(self): ''' test ctc_data_transform ''' with self.cached_session(): ''' in this test case, the shape of inputs: (B,T,D) = (1, 3, 6) the shape of labels: (B,T) = (1,3) ''' inputs = np.asarray([[[ 0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553 ], [0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436], [ 0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688 ]]], dtype=np.float32) labels = np.asarray([[1, 2, 3]], dtype=np.int64) blank_index = 0 labels_after_transform, inputs_after_transform = loss_utils.ctc_data_transform( labels, inputs, blank_index) labels_after_transform = tf.sparse_tensor_to_dense( labels_after_transform) new_labels = [[0, 1, 2]] new_inputs = [[[ 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553, 0.633766 ], [0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436, 0.111121], [ 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688, 0.0357786 ]]] self.assertAllEqual(labels_after_transform, new_labels) self.assertAllClose(inputs_after_transform, new_inputs) blank_index = 2 labels_after_transform, inputs_after_transform = loss_utils.ctc_data_transform( labels, inputs, blank_index) labels_after_transform = tf.sparse_tensor_to_dense( labels_after_transform) new_labels = [[1, 5, 2]] new_inputs = [[[ 0.633766, 0.221185, 0.0129757, 0.0142857, 0.0260553, 0.0917319 ], [0.111121, 0.588392, 0.0055756, 0.00569609, 0.010436, 0.278779], [ 0.0357786, 0.633813, 0.00249248, 0.00272882, 0.0037688, 0.321418 ]]] self.assertAllEqual(labels_after_transform, new_labels) self.assertAllClose(inputs_after_transform, new_inputs) blank_index = 5 labels_after_transform, inputs_after_transform = loss_utils.ctc_data_transform( labels, inputs, blank_index) labels_after_transform = tf.sparse_tensor_to_dense( labels_after_transform) new_labels = [[1, 2, 3]] new_inputs = [[[ 0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553 ], [0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436], [ 0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688 ]]] self.assertAllEqual(labels_after_transform, new_labels) self.assertAllClose(inputs_after_transform, new_inputs) with self.assertRaises(ValueError) as valueErr: blank_index = -1 labels_after_transform, inputs_after_transform = loss_utils.ctc_data_transform( labels, inputs, blank_index) the_exception = valueErr.exception self.assertEqual( str(the_exception), 'blank_index must be greater than or equal to zero') with self.assertRaises(ValueError) as valueErr: blank_index = 10 labels_after_transform, inputs_after_transform = loss_utils.ctc_data_transform( labels, inputs, blank_index) the_exception = valueErr.exception self.assertEqual( str(the_exception), 'blank_index must be less than or equal to num_class - 1')
def build_ctc(self): with self._graph.as_default(): self.lr = tf.placeholder(name='lr', shape=(), dtype=tf.float32) self.dropout_ph = tf.placeholder(name='placeholder', shape=(), dtype=tf.float32) with tf.name_scope("Inputs"): self.X = tf.placeholder( name='X', shape=[None, None, self.n_feats], dtype=tf.float32) #[batchsize, seqlen, input_dims] self.len_x = tf.placeholder(name='lens_x', shape=[None], dtype=tf.int32) self.yIdx = tf.placeholder(tf.int64) self.yVals = tf.placeholder(tf.int32) self.yShape = tf.placeholder(tf.int64) self.y = tf.SparseTensor(self.yIdx, self.yVals, self.yShape) batchsize = tf.shape(self.X)[0] with tf.name_scope("rnn"): rnn_out = self.rnn_cell() #[batch_size, seqlen, n_hidden] with tf.name_scope("variables"): w = tf.get_variable( name="W", shape=[rnn_out.get_shape()[2], self.n_classes], dtype=tf.float32, initializer=tf.random_normal_initializer(mean=0.0, stddev=0.01, dtype=tf.float32)) b = tf.get_variable(name='b', shape=[self.n_classes], initializer=tf.constant_initializer(0)) with tf.name_scope("softmask"): rnn_out = tf.reshape( rnn_out, [-1, tf.shape(rnn_out)[2]]) #[batchsize*seqlen, n_hidden] logits = tf.matmul(rnn_out, w) + b #[batchsize*seqlen, n_classes] logits = tf.reshape(logits, [batchsize, -1, self.n_classes ]) #[batchsize, seqlen, n_classes] logits = tf.transpose( logits, [1, 0, 2] ) #[seqlen, batchsize, n_classes] <- ctc_greedy likes it like this self.framewise_probs = tf.nn.softmax(logits, axis=2) self.framewise_output = tf.argmax(self.framewise_probs, axis=2) self.decoded, self.probs = self.logits_decode( logits, self.len_x) self.ctc_output = tf.sparse_tensor_to_dense(self.decoded) with tf.name_scope("calc_losses"): sparse_y = self.y self.loss = tf.nn.ctc_loss( labels=sparse_y, inputs=logits, sequence_length=self.len_x, time_major=True ) #, ctc_merge_repeated = True, time_major = True) self.loss = tf.reduce_mean(self.loss) with tf.name_scope("calc_accuracies"): self.accuracy = tf.constant(1.0) - tf.reduce_mean( tf.edit_distance(tf.to_int32(self.decoded), sparse_y)) with tf.name_scope("optimizer"): self.optimizer = self.optimizer_fn(lr=self.lr, loss=self.loss) self.saver = tf.train.Saver() self.init = tf.global_variables_initializer() with tf.name_scope("vars_summaries"): hist_b = tf.summary.histogram('b', b) hist_w = tf.summary.histogram("w", w) self.hists_vars = tf.summary.merge([hist_w] + [hist_b]) with tf.name_scope("extras"): self.saver = tf.train.Saver() self.init_variables = tf.global_variables_initializer()
def test(beam_length, seq_len_to_test, batch_size, n): config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True seq_length = seq_len_to_test _, wordmap, inv_wordmap = load_dataset(seq_length=0, n_examples=FLAGS.MAX_N_EXAMPLES) real_inputs_discrete = [tf.placeholder(tf.int32, shape=[BATCH_SIZE, seq_length]), \ tf.placeholder(tf.int32, shape=[BATCH_SIZE, seq_length])] # global step global_step = tf.Variable(0, trainable=False, name='global-step') # indec of <naw> in the map naw_r, naw_c = wordmap['<naw>'][0], wordmap['<naw>'][1] session = tf.Session(config=config) _, _, ops, _, _ = define_objective(session, wordmap, real_inputs_discrete, seq_length, naw_r, naw_c, None) fake, inference_op, _ = ops with session.as_default(): optimistic_restore(session, tf.train.latest_checkpoint('pretrain/seq-%d' % n, 'checkpoint')) restore_config.set_restore_dir(load_from_curr_session=True) #inference_op[0] = tf.reshape(inference_op[0], [-1, len(inv_wordmap)]) #inference_op[1] = tf.reshape(inference_op[1], [-1, len(inv_wordmap)]) logits = [] for b in range(BATCH_SIZE): buff = [] for t in range(seq_len_to_test): #tmp_col = tf.reshape(tf.tile( tf.reshape(fake[1][b][t], [-1]), [len(inv_wordmap)]), \ # [len(inv_wordmap), len(inv_wordmap)]) tmp_col = tf.reshape(tf.tile( tf.reshape(inference_op[1][b][t], [-1]), [len(inv_wordmap)]), \ [len(inv_wordmap), len(inv_wordmap)]) tmp_col = tf.nn.softmax(tmp_col) #tmp_row = tf.reshape(tf.exp(fake[0][b][t]), [-1,1]) tmp_row = tf.reshape(tf.nn.softmax(inference_op[0][b][t]), [-1,1]) #tmp_row = tf.reshape(inference_op[0][b][t], [-1,1]) tmp = tmp_col + tmp_row #tmp = tf.matmul(tf.reshape(inference_op[0][b][t], [-1,1]), tf.reshape(inference_op[1][b][t], [1,-1])) tmp = tf.reshape(tmp, [1,-1]) #tmp = tf.concat([tmp, tf.zeros([1,1], dtype=tf.float32)], -1) buff.append(tmp) logits.append(tf.reshape(buff, [-1, len(inv_wordmap)**2])) logits = tf.reshape(logits, [BATCH_SIZE, seq_len_to_test, len(inv_wordmap)**2]) logits = tf.transpose(logits, [1,0,2]) #logits = tf.nn.softmax(logits) #_logits = tf.exp(logits) #_logits = tf.nn.softmax(logits) _logits = logits #_logits = tf.log(logits) #print(logits) length = tf.multiply(tf.ones([BATCH_SIZE], dtype=tf.int32),tf.constant(seq_len_to_test,dtype=tf.int32)) #length = tf.multiply(tf.ones([BATCH_SIZE], dtype=tf.int32),tf.constant(10,dtype=tf.int32)) print(session.run(length)) #res = tf.nn.ctc_beam_search_decoder(_logits, length, beam_width=10, merge_repeated=False) res = tf.nn.ctc_greedy_decoder(_logits, length, merge_repeated=False) paths = tf.sparse_tensor_to_dense(res[0][0], default_value=-1) # Shape: [batch_size, max_sequence_len] for batch in range(BATCH_SIZE): infer, logs, logit, i_op = session.run([paths, res[1], _logits, inference_op[0]]) #for x in range(1): # for y in range(20): # for z in range(62501): # assert (logit[x][y][z] != 0) print(logit) for i in range(len(infer)): for j in range(len(infer[0])): ind = infer[i][j] if infer[i][j] == -1: break row = ind // len(inv_wordmap) col = ind % len(inv_wordmap) print( inv_wordmap[row][col] , end=' ') print('') #print(infer) #infer_r, infer_c = infer session.close()
def convert_string_neighbors(string_neighbors): split = tf.string_split(string_neighbors, "") string_dense = tf.sparse_tensor_to_dense(split, default_value="0") num = tf.string_to_number(string_dense, out_type=tf.int32) bool_neigh = tf.cast(num, tf.bool) return bool_neigh
} num_features = features.shape[1] num_nodes = features.shape[0] model = GraphSCI(placeholders, num_features, num_nodes) pos_weight_adj = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm_adj = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) global_step = tf.Variable(0, trainable=False) # Optimizer with tf.name_scope('optimizer'): opt = OptimizerSCI(preds=(tf.reshape(model.z_adj, [-1]), tf.reshape(model.z_express, [-1])), labels=(tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), tf.reshape(placeholders['features_orig'], [-1])), model=model, num_nodes=num_nodes, num_features=num_features, pos_weight_adj=pos_weight_adj, norm_adj=norm_adj, global_step=global_step) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Initialize session gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) saver = tf.train.Saver(var_list=tf.global_variables())
def cropAndOccludeCenter(self, parsed_features): ''' Crop an image to the bounding box and occlude on the center of the image by @occlusion. @params: parsed_feature: dict of features: @var: image_data: JPEG image data in String shape: shape of the image bbox: List of bounding boxes tuple (xmin,ymin,xmax,ymax) @return: ''' image_data = parsed_features['image/encoded'] shape = (parsed_features['image/height'], parsed_features['image/width'], parsed_features['image/channels']) bbox = (tf.sparse_tensor_to_dense( parsed_features['image/object/bbox/xmin']), tf.sparse_tensor_to_dense( parsed_features['image/object/bbox/xmax']), tf.sparse_tensor_to_dense( parsed_features['image/object/bbox/ymin']), tf.sparse_tensor_to_dense( parsed_features['image/object/bbox/ymax'])) height = shape[0] width = shape[1] tf.assert_equal(shape[2], tf.constant([3], shape[2].dtype), message="Channels not equal 3") # tf.assert_equal(tf.size(bbox),tf.constant([4],tf.int32),message="Bbox size is not 4") xmin_scaled = bbox[0][0] xmax_scaled = bbox[1][0] ymin_scaled = bbox[2][0] ymax_scaled = bbox[3][0] offset_height = tf.cast(ymin_scaled * tf.to_float(height), tf.int32) offset_width = tf.cast(xmin_scaled * tf.to_float(width), tf.int32) target_height = tf.cast( (ymax_scaled - ymin_scaled) * tf.to_float(height), tf.int32) target_width = tf.cast( (xmax_scaled - xmin_scaled) * tf.to_float(width), tf.int32) #tf.cond(target_height = 0, recordError(parsed_features['image/filename'])) #tf.cond(target_width = 0, recordError(parsed_features['image/filename'])) #tf.assert_none_equal(target_height,tf.constant([0],tf.int32), data = (ymin_scaled,ymax_scaled,height,parsed_features['image/filename']),message="image crop height equals zero") #tf.assert_none_equal(target_width,tf.constant([0],tf.int32), data = (xmin_scaled,xmax_scaled,width,parsed_features['image/filename']), message="image crop width equals zero") imageCropped = tf.image.decode_and_crop_jpeg( image_data, [offset_height, offset_width, target_height, target_width]) imageResized = tf.image.resize_images( imageCropped, [227, 227], tf.image.ResizeMethod.NEAREST_NEIGHBOR) side = tf.sqrt(self.occlusionRatio) offset_height2 = tf.cast(((1.0 - side) / 2) * 227, tf.int32) target_height2 = tf.cast((side) * 227, tf.int32) imageOccluded = self.occlude(imageResized, offset_height2, offset_height2, target_height2, target_height2) # RGB -> BGR bgrImageOccluded = imageOccluded[:, :, ::-1] image = tf.subtract(tf.cast(bgrImageOccluded, tf.float32), IMAGENET_MEAN) data = {} # data['image_data'] = image label = tf.cast(parsed_features['image/class/label'], tf.int32) # data['synset'] = parsed_features['image/class/synset'] # data['text'] = parsed_features['image/class/text'] # data['filename'] = parsed_features['image/filename'] one_hot = tf.one_hot(label - 1, self.num_classes) return image, one_hot
batch_no = 1 # set to get in the loop while batch_no > 0: feed_dict = { model_input: trainX, model_target_ixs: batchTargetIxs, model_target_vals: batchTargetVals, model_target_shape: batchTargetShape, model_seq_lengths: batchSeqLengths, model_keep_prob: 0.8 } #model_loss = tf.reduce_mean(tf.nn.ctc_loss(model_targetY, model_logits3d, model_seq_lengths)) loss, dense, _, logits3d, ctc_loss = session.run([ model_loss, tf.sparse_tensor_to_dense(model_targetY), model_optimizer, model_logits3d, model_ctc_loss ], feed_dict) ''' if trainY[0][0] == trainY[0][1]: pdb.set_trace() if np.isinf(loss): pdb.set_trace() ''' print("loss({0})".format(loss)) X, Y, batch_no = next(batch) trainX, trainY = X, Y testX, testY = X, Y #overfit for now batchTargetIxs, batchTargetVals, batchTargetShape, batchSeqLengths = dense_to_sparse( trainY)
def buildErrorEM(self): self.w1 = tf.placeholder( tf.int32, self.numDatInst ) #input embedding id 1 #Currently assuming batch size is 50 self.w2 = tf.placeholder(tf.int32, self.numDatInst) #input embedding id 2 self.w12 = tf.placeholder( tf.int32, self.numDatInst) #input embedding id phrase12 self.y = tf.placeholder(tf.int32, self.numDatInst) #output class w1v = tf.nn.embedding_lookup(self.embedstf, self.w1) #Embedding table for words w2v = tf.nn.embedding_lookup(self.embedstf, self.w2) w12v = tf.nn.embedding_lookup(self.embedstf, self.w12) yh = tf.nn.embedding_lookup( self.oneHot43, self.y) #One hot encodings embedding table for classes self.numClasses -= 1 #To take first (n-1) classes as comp ''' #N1 matSize = self.dim * (self.numClasses) stddev = 1. / math.sqrt(matSize) W = tf.Variable(tf.random_normal(shape=[self.dim, self.numClasses],mean=0,stddev=stddev)) matSize = self.numClasses stddev = 1. / math.sqrt(matSize) b = tf.Variable(tf.random_normal(shape=[self.numClasses],mean=0,stddev=stddev)) #Can be improved matSize = 2 * self.dim * self.dim stddev = 1. / math.sqrt(matSize) Wcomp = tf.Variable(tf.random_normal(shape=[2*self.dim,self.dim],mean=0,stddev=stddev)) matSize = self.dim stddev = 1. / math.sqrt(matSize) bcomp = tf.Variable(tf.random_normal(shape=[self.dim],mean=0,stddev=stddev)) pred1 = tf.concat(1,[w1v,w2v]) #Concatenate pred = tf.matmul(pred1,Wcomp) + bcomp predcompclass = tf.nn.softmax(tf.matmul(pred, W) + b) #will be used for evaluation ''' #N2 matSize = self.dim * self.dim #numDim->dim for glove stddev = 1. / math.sqrt(matSize) M1 = self.buildVariable([self.dim, self.dim], stddev, 'M1') #numDim->dim for glove M2 = self.buildVariable([self.dim, self.dim], stddev, 'M2') #numDim->dim for glove predPhrase = tf.matmul(w1v, M1) + tf.matmul(w2v, M2) diffs = tf.sub(predPhrase, w12v) diffs2 = tf.mul(diffs, diffs) self.errors = tf.reduce_sum(diffs2, 1, keep_dims=True) self.errors = tf.cast(self.errors, tf.float32) #print errors.eval() self.k = 10 val, idic = tf.nn.top_k(tf.transpose(self.errors), self.k, sorted=False) #doubt,declare idx, self word? self.fvl, self.fid = tf.nn.top_k(tf.transpose(self.errors), self.k, sorted=True) idx, dummy = tf.nn.top_k(idic, self.k, sorted=True) idx = tf.reverse(idx, [False, True]) #print idx.get_shape() #print idx.eval() #print val.get_shape() #print val.eval() self.label = tf.ones([self.numDatInst, 1]) #doubt,valuates error also? #print self.label #self.shp= tf.to_int64((self.y.get_shape())[0]) #idic=tf.transpose(idx) self.delta = tf.sparse_tensor_to_dense( tf.SparseTensor(tf.transpose(tf.to_int64(idx)), values=tf.ones(self.k), shape=[self.numDatInst])) #print self.delta.get_shape() #print self.delta self.label1 = tf.sub(self.label, tf.expand_dims(self.delta, 1)) #print self.label1 self.loss = tf.reduce_sum(tf.mul(self.label, self.errors)) sgd = tf.train.AdagradOptimizer(.1) self.trainop = sgd.minimize(self.loss, var_list=[M1, M2]) self.initop = tf.initialize_all_variables() #doubt--which variable? self.saver = tf.train.Saver(tf.trainable_variables()) logging.info("Built Cascaded network.")
def parser_dense_tensor(tensor): return tf.sparse_tensor_to_dense(tensor)
def _sparse_to_dense(self, parsed_example): for key in self.int_list_column: if "var" not in key: parsed_example[key] = tf.sparse_tensor_to_dense( parsed_example[key]) return parsed_example
def forward(self, input_sequences, forced_types=None): emissions, batch_sizes = self._get_network_emissions(input_sequences) if self.model_mode == TMHMM3Mode.LSTM_CTC or self.model_mode == TMHMM3Mode.LSTM: output = torch.nn.functional.log_softmax(emissions, dim=2) _, predicted_labels = output[:, :, 0:5].max(dim=2) predicted_labels = list([ list(map(int, x[:batch_sizes[idx]])) for idx, x in enumerate(predicted_labels.transpose(0, 1)) ]) predicted_labels = list( torch.cuda.LongTensor(l) if self.use_gpu else torch. LongTensor(l) for l in predicted_labels) predicted_topologies = list( map(label_list_to_topology, predicted_labels)) if forced_types is None and self.model_mode == TMHMM3Mode.LSTM_CTC: tf_output = tf.placeholder(tf.float32, shape=emissions.size()) tf_batch_sizes = tf.placeholder(tf.int32, shape=(emissions.size()[1])) beam_decoded, _ = tf.nn.ctc_beam_search_decoder( tf_output, sequence_length=tf_batch_sizes, beam_width=10) decoded_topology = tf.sparse_tensor_to_dense(beam_decoded[0]) # beam search is much faster on the CPU, disable GPU for this part config = tf.ConfigProto(device_count={'GPU': 0}) with tf.Session(config=config) as tf_session: tf.global_variables_initializer().run() decoded_topology = tf_session.run( decoded_topology, feed_dict={ tf_output: output.detach().cpu().numpy(), tf_batch_sizes: batch_sizes }) predicted_types = torch.LongTensor( list( map(get_predicted_type_from_labels, decoded_topology))) else: predicted_types = torch.LongTensor( list(map(get_predicted_type_from_labels, predicted_labels))) else: mask = self.batch_sizes_to_mask(batch_sizes) labels_predicted = list( torch.cuda.LongTensor(l) if self.use_gpu else torch. LongTensor(l) for l in self.crfModel.decode(emissions, mask=mask)) if self.model_mode == TMHMM3Mode.LSTM_CRF_HMM: predicted_labels = list( map(remapped_labels_hmm_to_orginal_labels, labels_predicted)) predicted_types = torch.LongTensor( list(map(get_predicted_type_from_labels, predicted_labels))) elif self.model_mode == TMHMM3Mode.LSTM_CRF_MARG: alpha = self.crfModel._compute_log_alpha(emissions, mask, run_backwards=False) z = alpha[alpha.size(0) - 1] + self.crfModel.end_transitions type = z.view((-1, 4, 5)) type = self.logsumexp(type, dim=2) max, predicted_types = torch.max(type, dim=1) predicted_labels = list([l % 5 for l in labels_predicted]) # remap else: predicted_labels = labels_predicted predicted_types = torch.LongTensor( list(map(get_predicted_type_from_labels, predicted_labels))) if self.use_gpu: predicted_types = predicted_types.cuda() predicted_topologies = list( map(label_list_to_topology, predicted_labels)) # if all O's, change to all I's (by convention) for idx, labels in enumerate(predicted_labels): if torch.eq(labels, 4).all(): predicted_labels[idx] = labels - 1 return predicted_labels, predicted_types if forced_types is None else forced_types, predicted_topologies
def train(tfrecords_file_list): # ----------------------------------------data set API----------------------------------------- # 创建dataset对象 dataset = tf.data.TFRecordDataset(filenames=tfrecords_file_list) # 使用map映射的处理函数处理得到新的dataset dataset = dataset.map(map_func=_parse_data, num_parallel_calls=4) dataset = dataset.shuffle(buffer_size=1000).batch( timit_parameter.BATCH_SIZE).repeat() # 创建迭代器 iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() # --------------------------------------------------------------------------------------------- # 序列长度 mfcc_len = next_element[1] mfcc_mask = tf.sequence_mask(lengths=mfcc_len, maxlen=timit_parameter.MAX_FRAME_SIZE, name="mfcc_mask") # 用来去掉在mfcc特征帧上padding的mask label_len = next_element[3] label_mask = tf.sequence_mask( lengths=label_len, maxlen=timit_parameter.MAX_LABEL_SIZE, name="label_mask") # 用来去掉在label序列上padding的mask #输入和标签 mfcc = next_element[0] label = next_element[2] # label_hot = tf.one_hot(indices=label, depth=timit_parameter.CLASS_NUM) # one-hot转换[batch_size,max_label_size,class_num] # print("shape of y_hot:", y_hot) # y_masked = tf.boolean_mask(tensor=y, mask=mask, name="y_p_masked") #去掉padding[seq_len1+seq_len2+....+lenN,] # y_hot_masked = tf.boolean_mask(tensor=y_hot, mask=mask,name="y_hot_p_masked") # [seq_len1+seq_len2+....lenN,class_num] #keep prob keep_prob = timit_parameter.KEEP_PROB # # char_embedings=tf.constant(value=parameter.CHAR_EMBEDDING,dtype=tf.float32,name="char_embeddings") # print("char_embeddings.shape", char_embedings.shape) # 使用regularizer控制权重 regularizer = tf.contrib.layers.l2_regularizer(0.0001) acoustic_model = model.AcousticModel() #--------------------------------------------------logits------------------------------------------------------ logits = acoustic_model.forward(mfcc, mfcc_len, keep_prob, False) print("logits.shape:", logits.shape) #logits_normal [parameter.BATCH_SIZE,max_time_stpes,parameter.CLASS_NUM] # logits_normal = tf.reshape(logits,(-1, parameter.MAX_SENTENCE_SIZE, parameter.CLASS_NUM),"logits_normal") #logits_masked [seq_len1+seq_len2+..+seq_lenn, 5] # logits_masked = tf.boolean_mask(tensor=logits_normal,mask=mask,name="logits_masked") # print("logits_masked.shape", logits_masked.shape) #--------------------------------------------------------------------------------------------------------------- #----------------------------------------------------CTC Loss--------------------------------------------------- negative_log_probability = tf.nn.ctc_loss_v2(labels=label, logits=logits, label_length=label_len, logit_length=mfcc_len, logits_time_major=False, blank_index=-1, name="ctc_loss") #---------------------------------------------------------------------------------------------------------------- #------------------------------------------------ prediction------------------------------------------------------ result = tf.nn.ctc_beam_search_decoder_v2(inputs=tf.transpose( logits, (1, 0, 2)), sequence_length=mfcc_len)[0][0] result_dense = tf.sparse_tensor_to_dense(sp_input=result) #pred = tf.cast(tf.argmax(logits, 1), tf.int32, name="pred") #[parameter.BATCH_SIZE*max_time, 1] #pred_normal = tf.reshape(tensor=pred,shape=(-1, parameter.MAX_SENTENCE_SIZE),name="pred_normal") #[parameter.BATCH_SIZE, max_time] # pred_normal = tf.reshape(tensor=decode_tags, shape=(-1, parameter.MAX_SENTENCE_SIZE), name="pred_normal") # pred_masked = tf.boolean_mask(tensor=pred_normal, mask=mask, name="pred_masked") # [seq_len1+seq_len2+....+,] #--------------------------------------------------------------------------------------------------------------- # #----------------------------------------------CRF-------------------------------------------------------------- # log_likelihood,transition_params=tf.contrib.crf.crf_log_likelihood( # inputs=logits_normal,tag_indices=y,sequence_lengths=seq_len # ) # # # decode,potentials:[batch_size, max_seq_len, num_tags] decode_tags:[batch_size, max_seq_len] # decode_tags, best_score = tf.contrib.crf.crf_decode( # potentials=logits_normal,transition_params=transition_params,sequence_length=seq_len # ) # #--------------------------------------------------------------------------------------------------------------- # # # # accracy # correct_prediction = tf.equal(pred_masked, y_masked) # accuracy = tf.reduce_mean(input_tensor=tf.cast(x=correct_prediction, dtype=tf.float32),name="accuracy") # # loss l2_loss = tf.losses.get_regularization_loss() loss = tf.reduce_mean(negative_log_probability) + l2_loss # # # 学习率衰减 #global_step = tf.Variable(initial_value=1, trainable=False) # start_learning_rate = timit_parameter.LEARNING_RATE # learning_rate = tf.train.exponential_decay( # learning_rate=start_learning_rate, # global_step=global_step, # decay_steps=(timit_parameter.TRAIN_SIZE // timit_parameter.BATCH_SIZE) + 1, # decay_rate=timit_parameter.DECAY_RATE, # staircase=True, # name="decay_learning_rate" # ) # optimizer and gradient clip var_trainable_op = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss, var_trainable_op), 1.0) optimizer = tf.train.AdamOptimizer( timit_parameter.LEARNING_RATE).apply_gradients( zip(grads, var_trainable_op)) # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss,global_step=global_step) init_op = tf.global_variables_initializer() # #saver saver = tf.train.Saver() # # ------------------------------------Session----------------------------------------- with tf.Session(config=config) as sess: sess.run(init_op) # initialize all variables # save models if not os.path.exists(GRAPH_DEF_SAVING_DIR): os.mkdir(GRAPH_DEF_SAVING_DIR) # save praphdef print("save graph def.....") tf.train.write_graph(graph_or_graph_def=sess.graph_def, logdir=GRAPH_DEF_SAVING_DIR, name=GRAPH_DEF_SAVING_NAME, as_text=True) for epoch in range(1, timit_parameter.MAX_EPOCH + 1): print("Epoch:", epoch) # time evaluation start_time = time.time() train_losses = [] train_accus = [] # training loss/accuracy in every mini-batch # mini batch for i in range( 0, (timit_parameter.TRAIN_SIZE // timit_parameter.BATCH_SIZE)): # mfcc, mfcc_len, label, label_len = sess.run(next_element) # print("mfcc:\n", mfcc) # print("mfcc.shape", mfcc.shape) # print("\n") # print("mfcc_len:\n", mfcc_len) # print("\n") # print("label:\n", label) # print("\n") # print("label_len:\n", label_len) # print("\n") train_loss, result_, label_, _ = sess.run( fetches=[loss, result_dense, label, optimizer], ) print("train_loss:", train_loss) print("result_:\n", result_) recover(result=result_, label=label_) # add to list, train_losses.append(train_loss) # train_accus.append(train_accuracy) end_time = time.time() print("spend: ", (end_time - start_time) / 60, " mins") print("average train loss:", sum(train_losses) / len(train_losses)) # print("average train accuracy:",sum(train_accus)/len(train_accus)) print("model saving....") saver.save(sess=sess, save_path=MODEL_SAVING_PATH, global_step=epoch) print("model saving done!")
if (batch + 1) % 500 == 0: print('loop:', batch, 'Train cost: ', train_cost / (batch + 1)) print('loop:', batch, 'Train cost: ', train_cost / (batch + 1), file=floss) feed2 = { input_tensor: source, targets: sparse_labels, seq_length: source_lengths, keep_dropout: 1.0 } d, train_ler = sess.run([decoded[0], ler], feed_dict=feed2) dense_decoded = tf.sparse_tensor_to_dense( d, default_value=-1).eval(session=sess) dense_labels = sparse_tuple_to_texts_ch(sparse_labels, words) counter = 0 print('Label err rate: ', train_ler) for orig, decoded_arr in zip(dense_labels, dense_decoded): # convert to strings decoded_str = ndarray_to_text_ch(decoded_arr, words) print(' file {}'.format(counter)) print('Original: {}'.format(orig)) print('Decoded: {}'.format(decoded_str)) counter = counter + 1 break # 每训练100次保存一下模型 if (batch + 1) % 100 == 0:
def _parse_proto(buf): """Parse binary protocol buffer into tensors. The protocol buffer is expected to contain the following fields: * frames: 10 views of the scene rendered as images. * top_down_frame: single view of the scene from above rendered as an image. * cameras: 10 vectors describing the camera position from which the frames have been rendered * captions: A string description of the scene. For the natural language dataset, contains descriptions written by human annotators. For synthetic data contains a string describing each relation between objects in the scene exactly once. * simplified_captions: A string description of the scene. For the natural language dataset contains a string describing each relation between objects in the scene exactly once. For synthetic datasets contains a string describing every possible pairwise relation between objects in the scene. * meta_shape: A vector of strings describing the object shapes. * meta_color: A vector of strings describing the object colors. * meta_size: A vector of strings describing the object sizes. * meta_obj_positions: A matrix of floats describing the position of each object in the scene. * meta_obj_rotations: A matrix of floats describing the rotation of each object in the scene. * meta_obj_rotations: A matrix of floats describing the color of each object in the scene as RGBA in the range [0, 1]. Args: buf: A string containing the serialized protocol buffer. Returns: A dictionary containing tensors for each of the fields in the protocol buffer. If _PARSE_METADATA is False, will omit fields starting with 'meta_'. """ feature_map = { "frames": tf.io.FixedLenFeature(shape=[_NUM_VIEWS], dtype=tf.string), "top_down_frame": tf.io.FixedLenFeature(shape=[1], dtype=tf.string), "cameras": tf.io.FixedLenFeature(shape=[_NUM_VIEWS * _NUM_RAW_CAMERA_PARAMS], dtype=tf.float32), "captions": tf.io.VarLenFeature(dtype=tf.string), "simplified_captions": tf.io.VarLenFeature(dtype=tf.string), "meta_shape": tf.io.VarLenFeature(dtype=tf.string), "meta_color": tf.io.VarLenFeature(dtype=tf.string), "meta_size": tf.io.VarLenFeature(dtype=tf.string), "meta_obj_positions": tf.io.VarLenFeature(dtype=tf.float32), "meta_obj_rotations": tf.io.VarLenFeature(dtype=tf.float32), "meta_obj_colors": tf.io.VarLenFeature(dtype=tf.float32), } example = tf.io.parse_single_example(buf, feature_map) images = tf.concat(example["frames"], axis=0) images = tf.map_fn(tf.image.decode_jpeg, tf.reshape(images, [-1]), dtype=tf.uint8, back_prop=False) top_down = tf.image.decode_jpeg(tf.squeeze(example["top_down_frame"])) cameras = tf.reshape(example["cameras"], shape=[-1, _NUM_RAW_CAMERA_PARAMS]) captions = tf.sparse_tensor_to_dense(example["captions"], default_value="") simplified_captions = tf.sparse_tensor_to_dense( example["simplified_captions"], default_value="") meta_shape = tf.sparse_tensor_to_dense(example["meta_shape"], default_value="") meta_color = tf.sparse_tensor_to_dense(example["meta_color"], default_value="") meta_size = tf.sparse_tensor_to_dense(example["meta_size"], default_value="") meta_obj_positions = tf.sparse_tensor_to_dense( example["meta_obj_positions"], default_value=0) meta_obj_positions = tf.reshape(meta_obj_positions, shape=[-1, 3]) meta_obj_rotations = tf.sparse_tensor_to_dense( example["meta_obj_rotations"], default_value=0) meta_obj_rotations = tf.reshape(meta_obj_rotations, shape=[-1, 4]) meta_obj_colors = tf.sparse_tensor_to_dense(example["meta_obj_colors"], default_value=0) meta_obj_colors = tf.reshape(meta_obj_colors, shape=[-1, 4]) data_tensors = { "images": images, "cameras": cameras, "captions": captions, "simplified_captions": simplified_captions, "top_down": top_down } if _PARSE_METADATA: data_tensors.update({ "meta_shape": meta_shape, "meta_color": meta_color, "meta_size": meta_size, "meta_obj_positions": meta_obj_positions, "meta_obj_rotations": meta_obj_rotations, "meta_obj_colors": meta_obj_colors }) return data_tensors
def GetEmbeddingLookupList(signals_list, embedding_vars, sparse_ids, sparse_weights=None, combiners='sqrtn', partition_strategies='mod'): """Get a list of embedding lookup tensors. Args: signals_list: A list of strings, representing names of features. embedding_vars: Dict mapping feature names to full embedding variables. sparse_ids: Dict mapping feature names to SparseTensors of their ids. sparse_weights: Either None, or a dict mapping feature names to SparseTensors of their weights (which can also be None). combiners: Either a common combiner type for all features ('mean', sqrtn' or 'sum') or a dict mapping each feature name to a combiner type. partition_strategies: Either a common partition_strategy for all features ('mod' or 'div') or a dict mapping feature_names to partition_stratgies. Returns: embedding_lookup_list: A list of embedding lookup tensors used for bag of words attribution, aligned with signals_list. """ assert isinstance(embedding_vars, dict) and isinstance(sparse_ids, dict) assert sparse_weights is None or isinstance(sparse_weights, dict) assert combiners in ('mean', 'sqrtn', 'sum') or isinstance(combiners, dict) assert (partition_strategies in ('mod', 'div') or isinstance(partition_strategies, dict)) embedding_lookup_list = [] for signal in signals_list: combiner = combiners[signal] if isinstance(combiners, dict) else combiners partition_strategy = (partition_strategies[signal] if isinstance( partition_strategies, dict) else partition_strategies) # Batch dimension should be 1 for attribution. with tf.control_dependencies( [tf.assert_equal(tf.shape(sparse_ids[signal])[0], 1)]): embedding_lookup = tf.nn.embedding_lookup( params=embedding_vars[signal], ids=tf.sparse_tensor_to_dense(sparse_ids[signal]), partition_strategy=partition_strategy) if sparse_weights is None or sparse_weights[signal] is None: num_vals = tf.size(sparse_ids[signal].values) if combiner == 'mean': embedding_weights = tf.fill([1, num_vals], 1.0 / tf.to_float(num_vals)) elif combiner == 'sqrtn': embedding_weights = tf.fill([1, num_vals], 1.0 / tf.sqrt(tf.to_float(num_vals))) else: embedding_weights = tf.ones([1, num_vals], dtype=tf.float32) else: # Batch dimension should be 1 for attribution. with tf.control_dependencies( [tf.assert_equal(tf.shape(sparse_weights[signal])[0], 1)]): dense_weights = tf.sparse_tensor_to_dense( sparse_weights[signal]) if combiner == 'mean': embedding_weights = dense_weights / tf.reduce_sum( dense_weights) elif combiner == 'sqrtn': embedding_weights = ( dense_weights / tf.sqrt(tf.reduce_sum(tf.pow(dense_weights, 2)))) else: embedding_weights = dense_weights embedding_lookup *= tf.expand_dims(embedding_weights, -1) embedding_lookup_list.append(embedding_lookup) return embedding_lookup_list
def _sparse_to_tensor(sparse_tensor, dtype, shape=(-1,), default_value=0): return tf.cast(tf.reshape(tf.sparse_tensor_to_dense(sparse_tensor, default_value=default_value), shape=shape), dtype=dtype)
def sample_body(n, sample, n_produced=0, n_total_drawn=0, eff=1.0, is_sampled=None, weights_scaling=0.): eff = tf.reduce_max([eff, ztf.to_real(1e-6)]) n_to_produce = n - n_produced if isinstance( limits, EventSpace): # EXPERIMENTAL(Mayou36): added to test EventSpace limits.create_limits(n=n) do_print = settings.get_verbosity() > 5 if do_print: print_op = tf.print("Number of samples to produce:", n_to_produce, " with efficiency ", eff, " with total produced ", n_produced, " and total drawn ", n_total_drawn, " with weights scaling", weights_scaling) with tf.control_dependencies([print_op] if do_print else []): n_to_produce = tf.identity(n_to_produce) if dynamic_array_shape: n_to_produce = tf.to_int32( ztf.to_real(n_to_produce) / eff * (1.1)) + 10 # just to make sure # TODO: adjustable efficiency cap for memory efficiency (prevent too many samples at once produced) max_produce_cap = tf.to_int32(8e5) safe_to_produce = tf.maximum( max_produce_cap, n_to_produce) # protect against overflow, n_to_prod -> neg. n_to_produce = tf.minimum( safe_to_produce, max_produce_cap) # introduce a cap to force serial new_limits = limits else: # TODO(Mayou36): add cap for n_to_produce here as well if multiple_limits: raise DueToLazynessNotImplementedError( "Multiple limits for fixed event space not yet implemented" ) is_not_sampled = tf.logical_not(is_sampled) (lower, ), (upper, ) = limits.limits lower = tuple( tf.boolean_mask(low, is_not_sampled) for low in lower) upper = tuple(tf.boolean_mask(up, is_not_sampled) for up in upper) new_limits = limits.with_limits(limits=((lower, ), (upper, ))) draw_indices = tf.where(is_not_sampled) with tf.control_dependencies([n_to_produce]): rnd_sample, thresholds_unscaled, weights, weights_max, n_drawn = sample_and_weights( n_to_produce=n_to_produce, limits=new_limits, dtype=dtype) n_drawn = tf.cast(n_drawn, dtype=tf.int32) if run.numeric_checks: assert_op_n_drawn = tf.assert_non_negative(n_drawn) tfdeps = [assert_op_n_drawn] else: tfdeps = [] with tf.control_dependencies(tfdeps): n_total_drawn += n_drawn probabilities = prob(rnd_sample) shape_rnd_sample = tf.shape(rnd_sample)[0] if run.numeric_checks: assert_prob_rnd_sample_op = tf.assert_equal( tf.shape(probabilities), shape_rnd_sample) tfdeps = [assert_prob_rnd_sample_op] else: tfdeps = [] # assert_weights_rnd_sample_op = tf.assert_equal(tf.shape(weights), shape_rnd_sample) # print_op = tf.print("shapes: ", tf.shape(weights), shape_rnd_sample, "shapes end") with tf.control_dependencies(tfdeps): probabilities = tf.identity(probabilities) if prob_max is None or weights_max is None: # TODO(performance): estimate prob_max, after enough estimations -> fix it? # TODO(Mayou36): This control dependency is needed because otherwise the max won't be determined # correctly. A bug report on will be filled (WIP). # The behavior is very odd: if we do not force a kind of copy, the `reduce_max` returns # a value smaller by a factor of 1e-14 # with tf.control_dependencies([probabilities]): # UPDATE: this works now? Was it just a one-time bug? # safety margin, predicting future, improve for small samples? weights_maximum = tf.reduce_max(weights) weights_clipped = tf.maximum(weights, weights_maximum * 1e-5) # prob_weights_ratio = probabilities / weights prob_weights_ratio = probabilities / weights_clipped # min_prob_weights_ratio = tf.reduce_min(prob_weights_ratio) max_prob_weights_ratio = tf.reduce_max(prob_weights_ratio) ratio_threshold = 50000000. # clipping means that we don't scale more for a certain threshold # to properly account for very small numbers, the thresholds should be scaled to match the ratio # but if a weight of a sample is very low (compared to the other weights), this would force the acceptance # of other samples to decrease strongly. We introduce a cut here, meaning that any event with an acceptance # chance of less then 1 in ratio_threshold will be underestimated. # TODO(Mayou36): make ratio_threshold a global setting # max_prob_weights_ratio_clipped = tf.minimum(max_prob_weights_ratio, # min_prob_weights_ratio * ratio_threshold) max_prob_weights_ratio_clipped = max_prob_weights_ratio weights_scaling = tf.maximum( weights_scaling, max_prob_weights_ratio_clipped * (1 + 1e-2)) else: weights_scaling = prob_max / weights_max min_prob_weights_ratio = weights_scaling weights_scaled = weights_scaling * weights * (1 + 1e-8 ) # numerical epsilon random_thresholds = thresholds_unscaled * weights_scaled if run.numeric_checks: invalid_probs_weights = tf.greater(probabilities, weights_scaled) failed_weights = tf.boolean_mask(weights_scaled, mask=invalid_probs_weights) failed_probs = tf.boolean_mask(probabilities, mask=invalid_probs_weights) print_op = tf.print( "HACK WARNING: if the following is NOT empty, your sampling _may_ be biased." " Failed weights:", failed_weights, " failed probs", failed_probs) assert_no_failed_probs = tf.assert_equal(tf.shape(failed_weights), [0]) # assert_op = [print_op] assert_op = [assert_no_failed_probs] # for weights scaled more then ratio_threshold # assert_op = [tf.assert_greater_equal(x=weights_scaled, y=probabilities, # data=[tf.shape(failed_weights), failed_weights, failed_probs], # message="Not all weights are >= probs so the sampling " # "will be biased. If a custom `sample_and_weights` " # "was used, make sure that either the shape of the " # "custom sampler (resp. it's weights) overlap better " # "or decrease the `max_weight`")] # # # check disabled (below not added to deps) # assert_scaling_op = tf.assert_less(weights_scaling / min_prob_weights_ratio, ztf.constant(ratio_threshold), # data=[weights_scaling, min_prob_weights_ratio], # message="The ratio between the probabilities from the pdf and the" # f"probability from the sampler is higher " # f" then {ratio_threshold}. This will most probably bias the sampling. " # f"Use importance sampling or, to disable this check, do" # f"zfit.run.numeric_checks = False") # assert_op.append(assert_scaling_op) else: assert_op = [] with tf.control_dependencies(assert_op): take_or_not = probabilities > random_thresholds take_or_not = take_or_not[0] if len( take_or_not.shape) == 2 else take_or_not filtered_sample = tf.boolean_mask(rnd_sample, mask=take_or_not, axis=0) n_accepted = tf.shape(filtered_sample)[0] n_produced_new = n_produced + n_accepted if not dynamic_array_shape: indices = tf.boolean_mask(draw_indices, mask=take_or_not) current_sampled = tf.sparse_tensor_to_dense(tf.SparseTensor( indices=indices, values=tf.broadcast_to(input=(True, ), shape=(n_accepted, )), dense_shape=(tf.cast(n, dtype=tf.int64), )), default_value=False) is_sampled = tf.logical_or(is_sampled, current_sampled) indices = indices[:, 0] else: indices = tf.range(n_produced, n_produced_new) sample_new = sample.scatter(indices=tf.cast(indices, dtype=tf.int32), value=filtered_sample) # efficiency (estimate) of how many samples we get eff = tf.reduce_max([ztf.to_real(n_produced_new), ztf.to_real(1.)]) / tf.reduce_max( [ztf.to_real(n_total_drawn), ztf.to_real(1.)]) return n, sample_new, n_produced_new, n_total_drawn, eff, is_sampled, weights_scaling
# If no conv layer here voxel_vals_target = voxel_embedded_vals voxel_vals_target_flat = tf.reshape(voxel_vals_target, [-1, 19*19, word_emb_size]) voxel_vals_target VOXEL_VALS_SIZE = 19 # %% ones_like_words = tf.SparseTensor(words.indices, tf.ones_like(words.values), words.shape) words_len = tf.stop_gradient(tf.sparse_reduce_sum(ones_like_words, 1)) words_len.set_shape([batch_size]) words_dense = tf.sparse_tensor_to_dense(words) words_dense.set_shape([batch_size, None]) words_dense word_vals = tf.nn.embedding_lookup(word_embedding, words_dense) # %% match_logits = tf.batch_matmul( word_vals, voxel_vals_target_flat, adj_y=True ) air_mask = tf.expand_dims(tf.reshape(tf.equal(voxels, 0), [-1, VOXEL_VALS_SIZE**2]), 1)
def reshape_indices(indices, shape): reshaped = tf.sparse_reset_shape(indices, new_shape=shape) # Now convert to a dense representation x = tf.sparse_tensor_to_dense(reshaped) return x
def _make_train_op(self): # apply dropout if necessary if self.dropout is not None: self._X_batch = tf.nn.dropout(self._X_batch, keep_prob=self._dropout) # Run Gibbs chain for specified number of steps. with tf.name_scope('gibbs_chain'): h0_means = self._means_h_given_v(self._X_batch) h0_samples = self._sample_h_given_v(h0_means) h_states = h0_samples if self.sample_h_states else h0_means v_states, v_means, _, h_means = self._make_gibbs_chain(h_states) # visualize hidden activation means if self.display_hidden_activations: with tf.name_scope('hidden_activations_visualization'): h_means_display = h_means[:, :self.display_hidden_activations] h_means_display = tf.cast(h_means_display, tf.float32) h_means_display = tf.expand_dims(h_means_display, 0) h_means_display = tf.expand_dims(h_means_display, -1) tf.summary.image('hidden_activation_means', h_means_display) # encoded data, used by the transform method with tf.name_scope('transform'): transform_op = tf.identity(h_means) tf.add_to_collection('transform_op', transform_op) # compute gradients estimates (= positive - negative associations) with tf.name_scope('grads_estimates'): # number of training examples might not be divisible by batch size N = tf.cast(tf.shape(self._X_batch)[0], dtype=self._tf_dtype) with tf.name_scope('dW'): dW_positive = tf.matmul(self._X_batch, h0_means, transpose_a=True) dW_negative = tf.matmul(v_states, h_means, transpose_a=True) dW = (dW_positive - dW_negative) / N - self._l2 * self._W with tf.name_scope('dvb'): dvb = tf.reduce_mean(self._X_batch - v_states, axis=0) # == sum / N with tf.name_scope('dhb'): dhb = tf.reduce_mean(h0_means - h_means, axis=0) # == sum / N # apply sparsity targets if needed with tf.name_scope('sparsity_targets'): q_means = tf.reduce_sum(h_means, axis=0) q_update = self._q_means.assign(self._sparsity_damping * self._q_means + \ (1 - self._sparsity_damping) * q_means) sparsity_penalty = self._sparsity_cost * (q_update - self._sparsity_target) dhb -= sparsity_penalty dW -= sparsity_penalty # update parameters with tf.name_scope('momentum_updates'): with tf.name_scope('dW'): dW_update = self._dW.assign(self._learning_rate * (self._momentum * self._dW + dW)) W_update = self._W.assign_add(dW_update) with tf.name_scope('dvb'): dvb_update = self._dvb.assign( self._learning_rate * (self._momentum * self._dvb + dvb)) vb_update = self._vb.assign_add(dvb_update) with tf.name_scope('dhb'): dhb_update = self._dhb.assign( self._learning_rate * (self._momentum * self._dhb + dhb)) hb_update = self._hb.assign_add(dhb_update) # assemble train_op with tf.name_scope('training_step'): train_op = tf.group(W_update, vb_update, hb_update) tf.add_to_collection('train_op', train_op) # compute metrics with tf.name_scope('L2_loss'): l2_loss = self._l2 * tf.nn.l2_loss(self._W) tf.add_to_collection('l2_loss', l2_loss) with tf.name_scope('mean_squared_recon_error'): msre = tf.reduce_mean(tf.square(self._X_batch - v_means)) tf.add_to_collection('msre', msre) # Since reconstruction error is fairly poor measure of performance, # as this is not what CD-k learning algorithm aims to minimize [2], # compute (per sample average) pseudo-loglikelihood (proxy to likelihood) # instead, which not only is much more cheaper to compute, but also # learning with PLL is asymptotically consistent [1]. # More specifically, PLL computed using approximation as in [3]. with tf.name_scope('pseudo_loglik'): x = self._X_batch # randomly corrupt one feature in each sample x_ = tf.identity(x) batch_size = tf.shape(x)[0] pll_rand = tf.random_uniform([batch_size], minval=0, maxval=self._n_visible, dtype=tf.int32) ind = tf.transpose([tf.range(batch_size), pll_rand]) m = tf.SparseTensor(indices=tf.to_int64(ind), values=tf.ones_like(pll_rand, dtype=self._tf_dtype), dense_shape=tf.to_int64(tf.shape(x_))) x_ = tf.multiply(x_, -tf.sparse_tensor_to_dense(m, default_value=-1)) x_ = tf.sparse_add(x_, m) x_ = tf.identity(x_, name='x_corrupted') pll = tf.cast(self._n_visible, dtype=self._tf_dtype) *\ tf.log_sigmoid(self._free_energy(x_)-self._free_energy(x)) tf.add_to_collection('pll', pll) # add also free energy of input batch to collection (for feg) free_energy_op = self._free_energy(self._X_batch) tf.add_to_collection('free_energy_op', free_energy_op) # collect summaries if self.metrics_config['l2_loss']: tf.summary.scalar(self._metrics_names_map['l2_loss'], l2_loss) if self.metrics_config['msre']: tf.summary.scalar(self._metrics_names_map['msre'], msre) if self.metrics_config['pll']: tf.summary.scalar(self._metrics_names_map['pll'], pll)
def run_batches(self, dataset, is_training, decode, write_to_file, epoch): n_examples = len(dataset._txt_files) n_batches_per_epoch = int(np.ceil(n_examples / dataset._batch_size)) self.train_cost = 0 self.train_ler = 0 for batch in range(n_batches_per_epoch): # Get next batch of training data (audio features) and transcripts source, source_lengths, sparse_labels = dataset.next_batch() feed = { self.input_tensor: source, self.targets: sparse_labels, self.seq_length: source_lengths } # If the is_training is false, this means straight decoding without computing loss if is_training: # avg_loss is the loss_op, optimizer is the train_op; # running these pushes tensors (data) through graph batch_cost, _ = self.sess.run([self.avg_loss, self.optimizer], feed) self.train_cost += batch_cost * dataset._batch_size logger.debug('Batch cost: %.2f \t| Train cost: %.2f', batch_cost, self.train_cost / (batch + 1)) self.train_ler += self.sess.run( self.ler, feed_dict=feed) * dataset._batch_size logger.debug('epoch:%2d %5d/%5d \t| Label error rate: %.2f', epoch + 1, batch + 1, n_batches_per_epoch, self.train_ler / (batch + 1)) # Turn on decode only 1 batch per epoch if decode and batch == 0: d = self.sess.run(self.decoded[0], feed_dict={ self.input_tensor: source, self.targets: sparse_labels, self.seq_length: source_lengths }) dense_decoded = tf.sparse_tensor_to_dense( d, default_value=-1).eval(session=self.sess) dense_labels = sparse_tuple_to_texts(sparse_labels) # only print a set number of example translations counter = 0 counter_max = 4 if counter < counter_max: for orig, decoded_arr in zip(dense_labels, dense_decoded): # convert to strings decoded_str = ndarray_to_text(decoded_arr) logger.info('Batch {}, file {}'.format(batch, counter)) logger.info('Original: {}'.format(orig)) logger.info('Decoded: {}'.format(decoded_str)) counter += 1 # save out variables for testing self.dense_decoded = dense_decoded self.dense_labels = dense_labels # Metrics mean if is_training: self.train_cost /= n_examples self.train_ler /= n_examples # Populate summary for histograms and distributions in tensorboard self.accuracy, summary_line = self.sess.run( [self.avg_loss, self.summary_op], feed) self.writer.add_summary(summary_line, epoch) #tmp_logi, tmp_log, tmp_seq, tmp_deco = self.sess.run( # [self.logits, self.log_prob, self.seq_length, self.decoded], feed) #print('\nself.logits=\n{}'.format(tmp_logi[:1,:,:])) #print('\nself.log_prob=\n{}'.format(tmp_log)) #print('\nself.targets=\n{}'.format(tmp_log)) #print('\nself.seq_length=\n{}'.format(tmp_seq)) #print('\nself.decoded=\n{}'.format(tmp_deco)) #print('\ndense_decoded=\n{}'.format(dense_decoded)) #print('\nDecoded=\n{}'.format(ndarray_to_text(dense_decoded[0]))) return self.train_cost, self.train_ler
def buildModel(self, inputShape): #Running on GPU with tf.device(self.device): self.defineVars() with tf.name_scope("inputOps"): #self.inputImage = node_variable([self.batchSize, inputShape[0], inputShape[1], inputShape[2], inputShape[3]], "inputImage") #self.gt = node_variable([self.batchSize, 1, 8, 16, self.numClasses], "gt") #We represent inputImage and gt as sparse matrices, with indices/values self.dataIndices = tf.placeholder("int64", [2, None], "dataIndices") self.dataValues = node_variable([None], "dataValues") self.pre_inputImage = tf.sparse_tensor_to_dense( tf.SparseTensor(tf.transpose( self.dataIndices, [1, 0]), self.dataValues, [ self.batchSize * inputShape[0], inputShape[1] * inputShape[2] * inputShape[3] ]), validate_indices=False) self.inputImage = self.inputScale * tf.reshape( self.pre_inputImage, [ self.batchSize, inputShape[0], inputShape[1], inputShape[2], inputShape[3] ]) if (self.gtSparse): self.gtIndices = tf.placeholder("int64", [2, None], "gtIndices") self.gtValues = node_variable([None], "gtValues") self.pre_gt = tf.sparse_tensor_to_dense( tf.SparseTensor(tf.transpose(self.gtIndices, [1, 0]), self.gtValues, [ self.batchSize * self.gtShape[0], self.gtShape[1] * self.gtShape[2] * self.numClasses ]), validate_indices=False) self.gt = tf.reshape(self.pre_gt, [ self.batchSize, self.gtShape[0], self.gtShape[1], self.gtShape[2], self.numClasses ]) else: self.gt = tf.placeholder("float32", [ self.batchSize, self.gtShape[0], self.gtShape[1], self.gtShape[2], self.numClasses ]) self.select_gt = tf.squeeze(self.gt[:, :, :, :, :], squeeze_dims=[1]) #self.norm_gt = self.gt/tf.reduce_sum(self.gt, reduction_indices=4, keep_dims=True) with tf.name_scope("Pool"): yPool = int(np.ceil(float(inputShape[1]) / self.gtShape[1])) xPool = int(np.ceil(float(inputShape[2]) / self.gtShape[2])) #Pool over spatial dimensions to be 2x2 self.timePooled = tf.reduce_max(self.inputImage, reduction_indices=1) self.inputPooled = tf.nn.max_pool(self.timePooled, ksize=[1, yPool, xPool, 1], strides=[1, yPool, xPool, 1], padding="SAME") self.camPooled = tf.nn.max_pool(self.timePooled, ksize=[1, yPool, xPool, 1], strides=[1, 1, 1, 1], padding="SAME") self.h_conv = tf.nn.conv2d(self.inputPooled, self.class_weight, [1, 1, 1, 1], padding="SAME") + self.class_bias #We evaluate pooling with smaller stride here self.cam = tf.nn.conv2d(self.camPooled, self.class_weight, [1, 1, 1, 1], padding="SAME") + self.class_bias self.reshape_cam = tf.transpose(self.cam, [0, 3, 1, 2]) #Get ranking from h_conv self.classRank = tf.reduce_mean(self.reshape_cam, reduction_indices=[2, 3]) self.est = pixelSoftmax(self.h_conv) #self.est = self.h_conv with tf.name_scope("Loss"): self.flat_gt = tf.reshape(self.select_gt, [-1, self.numClasses]) self.flat_est = tf.reshape(self.est, [-1, self.numClasses]) gtClass = tf.argmax(self.flat_gt, 1) estClass = tf.argmax(self.flat_est, 1) correct = tf.equal(gtClass, estClass) self.accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) self.classF1 = [] for c in range(self.numClasses): classGT = tf.equal(gtClass, c) classEst = tf.equal(estClass, c) classTP = tf.reduce_sum( tf.cast(tf.logical_and(classGT, classEst), tf.float32)) classFP = tf.reduce_sum( tf.cast( tf.logical_and(tf.logical_not(classGT), classEst), tf.float32)) classFN = tf.reduce_sum( tf.cast( tf.logical_and(classGT, tf.logical_not(classEst)), tf.float32)) precision = classTP / (classTP + classFP + self.epsilon) recall = classTP / (classTP + classFN + self.epsilon) self.classF1.append((2 * precision * recall) / (precision + recall + self.epsilon)) self.weightRegLoss = tf.reduce_sum(tf.square( self.class_weight)) if (self.lossWeight == None): self.loss = tf.reduce_mean(-tf.reduce_sum( self.select_gt * tf.log(self.est + self.epsilon), reduction_indices=3 )) + self.regWeight * self.weightRegLoss else: self.loss = tf.reduce_mean(-tf.reduce_sum( self.lossWeight[0:self.numClasses] * self.select_gt * tf.log(self.est + self.epsilon), reduction_indices=3 )) + self.regWeight * self.weightRegLoss #self.loss = 0.5 * tf.reduce_mean(tf.reduce_sum(tf.square(self.gt - self.est), reduction_indices=[1, 2, 3, 4])) with tf.name_scope("Opt"): self.optimizerAll = tf.train.AdamOptimizer( self.learningRate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon).minimize(self.loss, var_list=[ self.class_weight, ]) self.optimizerBias = tf.train.GradientDescentOptimizer( self.learningRateBias).minimize(self.loss, var_list=[ self.class_bias, ]) numK = min(5, self.numClasses) (self.eval_vals, self.eval_idx) = tf.nn.top_k(self.classRank, k=numK) #Summaries tf.summary.scalar('loss', self.loss) tf.summary.scalar('accuracy', self.accuracy) for c in range(self.numClasses): className = self.idxToName[c] tf.summary.scalar(className + ' F1', self.classF1[c]) tf.summary.histogram('input', self.inputImage) tf.summary.histogram('inputPooled', self.inputPooled) tf.summary.histogram('gt', self.select_gt) #Conv layer histograms tf.summary.histogram('h_conv', self.h_conv) tf.summary.histogram('est', self.est) #Weight and bias hists tf.summary.histogram('class_weight', self.class_weight) tf.summary.histogram('class_bias', self.class_bias)
def _build(self, inputs, prev_hidden=None): if isinstance(inputs, tuple): # snt.Sequential passing (obs, hidden_state) as inputs, where hidden_state is None inputs = inputs[0] batch_size = shape_if_known(inputs, 0) if self._keys_dim == None: self._keys_dim = shape_if_known(inputs, -1) if inputs.get_shape().ndims == 3: # pdb.set_trace() key_seq = self._cores.keys(inputs) val_seq = self._cores.vals(inputs) d, D = self._keys_dim, self._vals_dim t = shape_if_known(key_seq, 1) key_seq = tf.reshape(key_seq, [batch_size, t, self._num_heads, d]) val_seq = tf.reshape(val_seq, [batch_size, t, self._num_heads, D]) if self._last_timestep_only: query = self._cores.query(inputs[:, -1]) query = tf.reshape(query, [batch_size, self._num_heads, d]) if not self._attend_over_self: # do not want to attend over current timestep key_seq = key_seq[:, :-1] val_seq = val_seq[:, :-1] logits = tf.einsum('bkhd,bhd->bkh', key_seq, query) / np.sqrt(d) probs = tf.nn.softmax(logits, dim=1) tf.add_to_collection('pr', probs) read = tf.einsum('bkh,bkhd->bhd', probs, val_seq) read = tf.reshape( read, [batch_size, self._num_heads * self._vals_dim]) else: # Do the entire sequence of attention in one pass. # We compute a [T, T] matrix of `logits`, and mask out a triangular section to make it causal. # Then we softmax over the last dimension to a [T] vector of `probs`. # Note that if you compare `probs` or `logits` in conv vs rnn mode, they will differ by a permutation. # However, because of how rnn-mode updates its hidden state, the keys/values will also differ by the same permutation, # so the results of the attentive read will be the same. When num_heads > 1, we repeat this process a num_heads # number of times, and concatenate the respective outputs if self._attend_over_self: def cond(i, j): return j <= i else: def cond(i, j): return j < i def get_idx(t): return np.array( sorted([(0, j, 0, i) for j in range(t) for i in range(t) if i - self._buffer_size <= j and cond(i, j)])) query_seq = self._cores.query(inputs) query_seq = tf.reshape(query_seq, [batch_size, t, self._num_heads, d]) logits = tf.einsum('bkhd,bqhd->bkhq', key_seq, query_seq) / np.sqrt(d) # Use a py_func to compute mask if sequence length is not known yet. if isinstance(t, tf.Tensor): indices = tf.py_func(get_idx, [t], tf.int64, stateful=False) indices.set_shape([None, 4]) shape = tf.to_int64(tf.stack([1, t, 1, t], axis=0)) else: indices = tf.constant(get_idx(t), tf.int64) shape = [1, t, 1, t] idx_sparse = tf.SparseTensor( indices, tf.ones([shape_if_known(indices, 0)], tf.bool), shape, ) mask = tf.tile( tf.sparse_tensor_to_dense(idx_sparse, False), [shape_if_known(logits, 0), 1, self._num_heads, 1]) logits = tf.where(mask, logits, -32 * tf.ones_like(logits)) probs = tf.nn.softmax(logits, dim=1) read = tf.einsum('bkhq,bkhd->bhqd', probs, val_seq) if not self._attend_over_self: # A slight weirdness but needed for correctness. read = tf.concat([ tf.zeros([ batch_size, self._num_heads, 1, self._vals_dim ]), read[:, :, 1:] ], axis=2) # transposing to (B,T,H,d) so we can reshape to (B,T,d*H) read = tf.transpose(read, [0, 2, 1, 3]) read = tf.reshape( read, [batch_size, t, self._num_heads * self._vals_dim]) next_hidden = None elif inputs.get_shape().ndims == 2: assert prev_hidden is not None next_hidden = [] key_seq, val_seq, mask_seq = prev_hidden d, D = self._keys_dim, self._vals_dim query = self._cores.query(inputs) key_seq = tf.reshape( key_seq, [batch_size, self._buffer_size, self._num_heads, d]) val_seq = tf.reshape( val_seq, [batch_size, self._buffer_size, self._num_heads, D]) query = tf.reshape(query, [batch_size, self._num_heads, d]) logits = tf.einsum('bkhd,bhd->bkh', key_seq, query) / np.sqrt(d) logits = tf.where(mask_seq, logits, -32 * tf.ones_like(logits)) probs = tf.nn.softmax(logits, dim=1) tf.add_to_collection('pr', probs) read = tf.einsum('bkh,bkhd->bhd', probs, val_seq) inputs_expanded = tf.expand_dims(inputs, 1) curr_keys = tf.reshape(self._cores.keys(inputs_expanded), [batch_size, 1, self._num_heads, d]) curr_vals = tf.reshape(self._cores.vals(inputs_expanded), [batch_size, 1, self._num_heads, D]) key_seq = tf.concat([curr_keys, key_seq[:, :-1]], axis=1) val_seq = tf.concat([curr_vals, val_seq[:, :-1]], axis=1) mask_seq = tf.concat([ tf.ones([batch_size, 1, self._num_heads], tf.bool), mask_seq[:, :-1] ], axis=1) next_hidden = key_seq, val_seq, mask_seq read = tf.reshape(read, [batch_size, self._num_heads * self._vals_dim]) else: raise ValueError if self._cores.postprocess: read = self._cores.postprocess(read) if self._dense: if self._last_timestep_only and inputs.get_shape().ndims == 3: output = tf.concat([inputs[:, -1], read], axis=-1) else: output = tf.concat([inputs, read], axis=-1) else: output = read return _discard_trailing_nones(output, next_hidden)