def read_cnn(filename_queue): class CNNRecord(object): pass result = CNNRecord() # each example in the file is ('megabatch') is label + (300,42) data in float32 (4 bytes per value). (1+12600)*4 = 50404 bytes per example bytes_per_value = 4 # float32 label_bytes = 1 * bytes_per_value result.height = EXAMPLE_HEIGHT result.width = EXAMPLE_WIDTH result.depth = 1 example_bytes = result.height * result.width * result.depth * bytes_per_value # Every record consists of a label followed by the image, with a # fixed number of bytes for each. record_bytes = label_bytes + example_bytes # print 'record_bytes number is ', record_bytes # should be 4 + 300*42*4 = 50404 reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value_bytes = reader.read(filename_queue) # Convert from a string to a vector of float32 that is record_bytes long. value = tf.decode_raw(value_bytes, tf.float32) # The first bytes represent the label, which we convert from uint8->int32. result.label = tf.slice(value, [0], [label_bytes/bytes_per_value]) # The remaining bytes after the label represent the image, which we reshape # from [depth * height * width] to [depth, height, width]. result.example = tf.slice(value, [label_bytes/bytes_per_value], [example_bytes/bytes_per_value]) return result
def cell_locate(size, bbox, S): """ locate the center of ground truth in which grid cell """ x = tf.cast(tf.slice(bbox, [0,0], [-1,1]), tf.float32) y = tf.cast(tf.slice(bbox, [0,1], [-1,1]), tf.float32) w = tf.cast(tf.slice(bbox, [0,2], [-1,1]), tf.float32) h = tf.cast(tf.slice(bbox, [0,3], [-1,1]), tf.float32) height, width = size cell_w = width / S cell_h = height / S center_y = tf.add(y, tf.mul(h, 0.5)) center_x = tf.add(x, tf.mul(w, 0.5)) cell_coord_x = tf.cast(tf.div(center_x, cell_w), tf.int32) cell_coord_y = tf.cast(tf.div(center_y, cell_h), tf.int32) cell_num = tf.add(tf.mul(cell_coord_y, S), cell_coord_x) return cell_num
def get_idx_map(shape): """Get index map for a image. Args: shape: [B, T, H, W] or [B, H, W] Returns: idx: [B, T, H, W, 2], or [B, H, W, 2] """ s = shape ndims = tf.shape(s) wdim = ndims - 1 hdim = ndims - 2 idx_shape = tf.concat(0, [s, tf.constant([1])]) ones_h = tf.ones(hdim - 1, dtype='int32') ones_w = tf.ones(wdim - 1, dtype='int32') h_shape = tf.concat(0, [ones_h, tf.constant([-1]), tf.constant([1, 1])]) w_shape = tf.concat(0, [ones_w, tf.constant([-1]), tf.constant([1])]) idx_y = tf.zeros(idx_shape, dtype='float') idx_x = tf.zeros(idx_shape, dtype='float') h = tf.slice(s, ndims - 2, [1]) w = tf.slice(s, ndims - 1, [1]) idx_y += tf.reshape(tf.to_float(tf.range(h[0])), h_shape) idx_x += tf.reshape(tf.to_float(tf.range(w[0])), w_shape) idx = tf.concat(ndims[0], [idx_y, idx_x]) return idx
def read_record(filename_queue): class FCNRecord(object): pass result = FCNRecord() result.mask_height = int(420/DOWNSAMPLE_FACTOR) result.mask_width = int(580/DOWNSAMPLE_FACTOR) result.mask_depth = 1 result.img_depth = 1 img_len = result.mask_height*result.mask_width*result.img_depth mask_len = result.mask_height*result.mask_width*result.mask_depth record_len = img_len + mask_len reader = tf.FixedLengthRecordReader(record_bytes=record_len) result.key, value = reader.read(filename_queue) record_bytes = tf.decode_raw(value, tf.uint8) #print(record_bytes.get_shape()) int_image = tf.reshape(tf.slice(record_bytes, [0], [img_len]),[result.mask_height, result.mask_width]) rgb_image = tf.pack([int_image,int_image,int_image]) rgb_img = tf.transpose(rgb_image,(1,2,0)) result.image = tf.cast(rgb_img,tf.float32) bool_mask = tf.cast( tf.reshape(tf.slice(record_bytes, [img_len], [mask_len]),[result.mask_height, result.mask_width]), tf.bool) hot_mask= tf.pack( [bool_mask, tf.logical_not(bool_mask)]) h_mask = tf.transpose(hot_mask,(1,2,0)) result.mask = tf.cast(h_mask, tf.float32) return result
def BatchClipByL2norm(t, upper_bound, name=None): """Clip an array of tensors by L2 norm. Shrink each dimension-0 slice of tensor (for matrix it is each row) such that the l2 norm is at most upper_bound. Here we clip each row as it corresponds to each example in the batch. Args: t: the input tensor. upper_bound: the upperbound of the L2 norm. name: optional name. Returns: the clipped tensor. """ assert upper_bound > 0 with tf.op_scope([t, upper_bound], name, "batch_clip_by_l2norm") as name: saved_shape = tf.shape(t) batch_size = tf.slice(saved_shape, [0], [1]) t2 = tf.reshape(t, tf.concat(0, [batch_size, [-1]])) upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]), tf.constant(1.0/upper_bound)) # Add a small number to avoid divide by 0 l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001) scale = tf.minimum(l2norm_inv, upper_bound_inv) * upper_bound clipped_t = tf.matmul(tf.diag(scale), t2) clipped_t = tf.reshape(clipped_t, saved_shape, name=name) return clipped_t
def embed_sequences(self, embed_sequence_batch): """Return sentence embeddings as a tensor with with shape [batch_size, hidden_size * 2] """ forward_values = embed_sequence_batch.values forward_mask = embed_sequence_batch.mask backward_values = tf.reverse(forward_values, [False, True, False]) backward_mask = tf.reverse(forward_mask, [False, True]) # Initialize LSTMs self._forward_lstm = LSTM(self.hidden_size, return_sequences=True) self._backward_lstm = LSTM(self.hidden_size, return_sequences=True) # Pass input through the LSTMs # Shape: (batch_size, seq_length, hidden_size) forward_seq = self._forward_lstm(forward_values, forward_mask) forward_seq.set_shape((None, self.seq_length, self.hidden_size)) backward_seq = self._backward_lstm(backward_values, backward_mask) backward_seq.set_shape((None, self.seq_length, self.hidden_size)) # Stitch the outputs together --> hidden states (for computing attention) # Final dimension: (batch_size, seq_length, hidden_size * 2) lstm_states = tf.concat(2, [forward_seq, tf.reverse(backward_seq, [False, True, False])]) self._hidden_states = SequenceBatch(lstm_states, forward_mask) # Stitch the final outputs together --> sequence embedding # Final dimension: (batch_size, hidden_size * 2) seq_length = tf.shape(forward_values)[1] forward_final = tf.slice(forward_seq, [0, seq_length - 1, 0], [-1, 1, self.hidden_size]) backward_final = tf.slice(backward_seq, [0, seq_length - 1, 0], [-1, 1, self.hidden_size]) return tf.squeeze(tf.concat(2, [forward_final, backward_final]), [1])
def knn_point(k, xyz1, xyz2): ''' Input: k: int32, number of k in k-nn search xyz1: (batch_size, ndataset, c) float32 array, input points xyz2: (batch_size, npoint, c) float32 array, query points Output: val: (batch_size, npoint, k) float32 array, L2 distances idx: (batch_size, npoint, k) int32 array, indices to input points ''' b = xyz1.get_shape()[0].value n = xyz1.get_shape()[1].value c = xyz1.get_shape()[2].value m = xyz2.get_shape()[1].value print b, n, c, m print xyz1, (b,1,n,c) xyz1 = tf.tile(tf.reshape(xyz1, (b,1,n,c)), [1,m,1,1]) xyz2 = tf.tile(tf.reshape(xyz2, (b,m,1,c)), [1,1,n,1]) dist = tf.reduce_sum((xyz1-xyz2)**2, -1) print dist, k outi, out = select_top_k(k, dist) idx = tf.slice(outi, [0,0,0], [-1,-1,k]) val = tf.slice(out, [0,0,0], [-1,-1,k]) print idx, val #val, idx = tf.nn.top_k(-dist, k=k) # ONLY SUPPORT CPU return val, idx
def input_fn(): starts = tf.random_uniform([batch_size], maxval=(2 * np.pi), seed=seed) sin_curves = tf.map_fn(_sin_fn, (starts,), dtype=tf.float32) inputs = tf.expand_dims( tf.slice(sin_curves, [0, 0], [batch_size, sequence_length]), 2) labels = tf.slice(sin_curves, [0, 1], [batch_size, sequence_length]) return {'inputs': inputs}, labels
def diff(x, axis=-1): """Take the finite difference of a tensor along an axis. Args: x: Input tensor of any dimension. axis: Axis on which to take the finite difference. Returns: d: Tensor with size less than x by 1 along the difference dimension. Raises: ValueError: Axis out of range for tensor. """ shape = x.get_shape() if axis >= len(shape): raise ValueError('Invalid axis index: %d for tensor with only %d axes.' % (axis, len(shape))) begin_back = [0 for unused_s in range(len(shape))] begin_front = [0 for unused_s in range(len(shape))] begin_front[axis] = 1 size = shape.as_list() size[axis] -= 1 slice_front = tf.slice(x, begin_front, size) slice_back = tf.slice(x, begin_back, size) d = slice_front - slice_back return d
def fast_rcnn_minibatch(self, reference_boxes): with tf.variable_scope('fast_rcnn_minibatch'): reference_boxes_mattached_gtboxes, object_mask, label = \ self.fast_rcnn_find_positive_negative_samples(reference_boxes) positive_indices = tf.reshape(tf.where(tf.not_equal(object_mask, 0.)), [-1]) num_of_positives = tf.minimum(tf.shape(positive_indices)[0], tf.cast(self.fast_rcnn_minibatch_size*self.fast_rcnn_positives_ratio, tf.int32)) positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) negative_indices = tf.reshape(tf.where(tf.equal(object_mask, 0.)), [-1]) num_of_negatives = tf.minimum(tf.shape(negative_indices)[0], self.fast_rcnn_minibatch_size - num_of_positives) negative_indices = tf.random_shuffle(negative_indices) negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives]) minibatch_indices = tf.concat([positive_indices, negative_indices], axis=0) minibatch_indices = tf.random_shuffle(minibatch_indices) minibatch_reference_boxes_mattached_gtboxes = tf.gather(reference_boxes_mattached_gtboxes, minibatch_indices) object_mask = tf.gather(object_mask, minibatch_indices) label = tf.gather(label, minibatch_indices) label_one_hot = tf.one_hot(label, self.num_classes + 1) return minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, object_mask, label_one_hot
def forward(self, state, autoencoder): ''' state: vector ''' if autoencoder is None: _input = state else: _input, _ = autoencoder.forward(state) state_ = _input # clip observation variables from full state x_H_ = tf.slice(state_, [0, 0], [-1, 6]) v_ct = tf.slice(state_, [0, 1], [-1, 1]) - tf.slice(state_, [0, 3], [-1, 1]) # x_H_ = tf.concat(concat_dim=1, values=[x_H_, v_ct]) h0 = tf.nn.xw_plus_b(x_H_, self.weights['0'], self.biases['0'], name='h0') relu0 = tf.nn.relu(h0) h1 = tf.nn.xw_plus_b(relu0, self.weights['1'], self.biases['1'], name='h1') relu1 = tf.nn.relu(h1) relu1_do = tf.nn.dropout(relu1, self.arch_params['do_keep_prob']) a = tf.nn.xw_plus_b(relu1_do, self.weights['c'], self.biases['c'], name='a') return a
def read_cifar_files(filename_queue, distort_images = True): reader = tf.FixedLengthRecordReader(record_bytes=record_length) key, record_string = reader.read(filename_queue) record_bytes = tf.decode_raw(record_string, tf.uint8) image_label = tf.cast(tf.slice(record_bytes, [0], [1]), tf.int32) # Extract image image_extracted = tf.reshape(tf.slice(record_bytes, [1], [image_vec_length]), [num_channels, image_height, image_width]) # Reshape image image_uint8image = tf.transpose(image_extracted, [1, 2, 0]) reshaped_image = tf.cast(image_uint8image, tf.float32) # Randomly Crop image final_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, crop_width, crop_height) if distort_images: # Randomly flip the image horizontally, change the brightness and contrast final_image = tf.image.random_flip_left_right(final_image) final_image = tf.image.random_brightness(final_image,max_delta=63) final_image = tf.image.random_contrast(final_image,lower=0.2, upper=1.8) # Normalize whitening final_image = tf.image.per_image_standardization(final_image) return(final_image, image_label)
def get_image(filename_queue): #CIFAR10Record is a 'C struct' bundling tensorflow input data class CIFAR10Record(object): pass # result = CIFAR10Record() label_bytes = 1 # 2 for CIFAR-100 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth # Every record consists of a label followed by the image, with a # fixed number of bytes for each. record_bytes = label_bytes + image_bytes # Read a record, getting filenames from the filename_queue. No # header or footer in the CIFAR-10 format, so we leave header_bytes # and footer_bytes at their default of 0. reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) # Convert from a string to a vector of uint8 that is record_bytes long. record_bytes = tf.decode_raw(value, tf.uint8) # The first bytes represent the label, which we convert from uint8->int32. result.label = tf.cast( tf.slice(record_bytes, [0], [label_bytes]), tf.int32) # The remaining bytes after the label represent the image, which we reshape # from [depth * height * width] to [depth, height, width]. depth_major = tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]), [result.depth, result.height, result.width]) # Convert from [depth, height, width] to [height, width, depth]. result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result
def read_cifar10(filename_queue): class CIFAR10Record(object): pass result = CIFAR10Record() label_bytes = 1 result.height = 32 result.width = 32 result.depth = 3 images_bytes = result.height * result.width * result.depth record_bytes = label_bytes + images_bytes print(record_bytes) reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) record = tf.decode_raw(value, tf.uint8) result.label = tf.cast(tf.slice(record, [0], [label_bytes]), tf.int32) depth_major = tf.reshape( tf.slice(record, [label_bytes], [images_bytes]), [result.depth, result.height, result.width] ) result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result
def input_fn(): random_sequence = tf.random_uniform([batch_size, sequence_length + 1], 0, 2, dtype=tf.int32, seed=seed) labels = tf.slice(random_sequence, [0, 0], [batch_size, sequence_length]) inputs = tf.expand_dims( tf.to_float(tf.slice(random_sequence, [0, 1], [batch_size, sequence_length])), 2 ) return {"inputs": inputs}, labels
def read_cifar10(filename_queue): """ CIFAR10のデータを読み込む Args: filename_queue: A queue of strings with the filenames to read from. """ class CIFAR10Record(object): pass result = CIFAR10Record() # CIFAR-10データのサイズ label_bytes = 1 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height + result.width * result.depth record_bytes = label_bytes + image_bytes reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) record_bytes = tf.decode_raw(value, tf.uint8) result.label = tf.cast( tf.slice(record_bytes, [0], [label_bytes]), tf.ini32) depth_major = tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]), [result.depth, result.height, result.width]) result.unit8image = tf.transpose(depth_major, [1, 2, 0]) return result
def read_cifar10(filename_queue): """Reads and parses examples from CIFAR10 data files. Recommendation: if you want N-way read parallelism, call this function N times. This will give you N independent Readers reading different files & positions within those files, which will give better mixing of examples. Args: filename_queue: A queue of strings with the filenames to read from. Returns: An object representing a single example, with the following fields: height: number of rows in the result (32) width: number of columns in the result (32) depth: number of color channels in the result (3) key: a scalar string Tensor describing the filename & record number for this example. label: an int32 Tensor with the label in the range 0..9. uint8image: a [height, width, depth] uint8 Tensor with the image data """ class CIFAR10Record(object): pass result = CIFAR10Record() # Dimensions of the images in the CIFAR-10 dataset. # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the # input format. label_bytes = 1 # 2 for CIFAR-100 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth # Every record consists of a label followed by the image, with a # fixed number of bytes for each. record_bytes = label_bytes + image_bytes # Read a record, getting filenames from the filename_queue. No # header or footer in the CIFAR-10 format, so we leave header_bytes # and footer_bytes at their default of 0. reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) # Convert from a string to a vector of uint8 that is record_bytes long. record_bytes = tf.decode_raw(value, tf.uint8) # The first bytes represent the label, which we convert from uint8->int32. result.label = tf.cast( tf.slice(record_bytes, [0], [label_bytes]), tf.int32) # The remaining bytes after the label represent the image, which we reshape # from [depth * height * width] to [depth, height, width]. depth_major = tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]), [result.depth, result.height, result.width]) # Convert from [depth, height, width] to [height, width, depth]. result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result
def _concat_top_nodes(self, old_beam_path, cur_top_nodes): cur_path_id = tf.reshape(tf.slice(cur_top_nodes, [0, 0], [-1, 1]), [-1]) cur_node = tf.reshape(tf.slice(cur_top_nodes, [0, 1], [-1, 1]), [-1,1]) old_path = tf.gather(old_beam_path, cur_path_id) new_beam_path = tf.concat(1, [old_path, cur_node]) return new_beam_path
def slice_constant(data, batch_size=32, name='constant_data', global_step=None): """Provide a slice based on the global_step. This is useful when the entire data array can be stored in memory because it allows you to feed the data very efficiently. Args: data: A numpy array or tensor. batch_size: The batch size for the produced data. name: An optional name for this data. global_step: A global step variable that is used to read the data. If None then the default prettytensor global_step is used. Returns: A tensor that produces the given data. """ with tf.name_scope(name): all_data = tf.convert_to_tensor(data) global_step = global_step or bookkeeper.global_step() count = len(data) / batch_size extra = len(data) - count * batch_size if extra: offset = tf.mod(global_step, count) return tf.slice(all_data, offset * batch_size, batch_size) else: offset = tf.mod(global_step, count + 1) return tf.slice(all_data, offset * batch_size, tf.where(tf.equal(offset, count), extra, batch_size))
def make_minibatch(self, valid_anchors): with tf.variable_scope('rpn_minibatch'): # in labels(shape is [N, ]): 1 is positive, 0 is negative, -1 is ignored labels, anchor_matched_gtboxes, object_mask = \ self.rpn_find_positive_negative_samples(valid_anchors) # [num_of_valid_anchors, ] positive_indices = tf.reshape(tf.where(tf.equal(labels, 1.0)), [-1]) # use labels is same as object_mask num_of_positives = tf.minimum(tf.shape(positive_indices)[0], tf.cast(self.rpn_mini_batch_size * self.rpn_positives_ratio, tf.int32)) # num of positives <= minibatch_size * 0.5 positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) # positive_anchors = tf.gather(self.anchors, positive_indices) negative_indices = tf.reshape(tf.where(tf.equal(labels, 0.0)), [-1]) num_of_negatives = tf.minimum(self.rpn_mini_batch_size - num_of_positives, tf.shape(negative_indices)[0]) negative_indices = tf.random_shuffle(negative_indices) negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives]) # negative_anchors = tf.gather(self.anchors, negative_indices) minibatch_indices = tf.concat([positive_indices, negative_indices], axis=0) minibatch_indices = tf.random_shuffle(minibatch_indices) minibatch_anchor_matched_gtboxes = tf.gather(anchor_matched_gtboxes, minibatch_indices) object_mask = tf.gather(object_mask, minibatch_indices) labels = tf.cast(tf.gather(labels, minibatch_indices), tf.int32) labels_one_hot = tf.one_hot(labels, depth=2) return minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, labels_one_hot
def diagonal_bilstm(inputs, conf, scope='diagonal_bilstm'): with tf.variable_scope(scope): def reverse(inputs): return tf.reverse(inputs, [False, False, True, False]) output_state_fw = diagonal_lstm(inputs, conf, scope='output_state_fw') output_state_bw = reverse(diagonal_lstm(reverse(inputs), conf, scope='output_state_bw')) tf.add_to_collection('output_state_fw', output_state_fw) tf.add_to_collection('output_state_bw', output_state_bw) if conf.use_residual: residual_state_fw = conv2d(output_state_fw, conf.hidden_dims * 2, [1, 1], "B", scope="residual_fw") output_state_fw = residual_state_fw + inputs residual_state_bw = conv2d(output_state_bw, conf.hidden_dims * 2, [1, 1], "B", scope="residual_bw") output_state_bw = residual_state_bw + inputs tf.add_to_collection('residual_state_fw', residual_state_fw) tf.add_to_collection('residual_state_bw', residual_state_bw) tf.add_to_collection('residual_output_state_fw', output_state_fw) tf.add_to_collection('residual_output_state_bw', output_state_bw) batch, height, width, channel = get_shape(output_state_bw) output_state_bw_except_last = tf.slice(output_state_bw, [0, 0, 0, 0], [-1, height-1, -1, -1]) output_state_bw_only_last = tf.slice(output_state_bw, [0, height-1, 0, 0], [-1, 1, -1, -1]) dummy_zeros = tf.zeros_like(output_state_bw_only_last) output_state_bw_with_last_zeros = tf.concat(1, [output_state_bw_except_last, dummy_zeros]) tf.add_to_collection('output_state_bw_with_last_zeros', output_state_bw_with_last_zeros) return output_state_fw + output_state_bw_with_last_zeros
def process(_, current): count = tf.cast(current[0], tf.int32) current = tf.slice(current, [1], [-1]) max = tf.shape(current)[0] sm = tf.expand_dims(tf.slice(current, [max - count], [-1]), 0) sm = tf.nn.softmax(sm) return tf.concat(0, [tf.zeros([max-count]), tf.squeeze(sm, [0])])
def get_model(point_cloud, is_training, bn_decay=None): """ Part segmentation PointNet, input is BxNx6 (XYZ NormalX NormalY NormalZ), output Bx50 """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} l0_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3]) l0_points = tf.slice(point_cloud, [0,0,3], [-1,-1,3]) # Set Abstraction layers l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=512, radius=0.2, nsample=64, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1') l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=128, radius=0.4, nsample=64, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2') l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='layer3') # Feature Propagation layers l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256,256], is_training, bn_decay, scope='fa_layer1') l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256,128], is_training, bn_decay, scope='fa_layer2') l0_points = pointnet_fp_module(l0_xyz, l1_xyz, tf.concat([l0_xyz,l0_points],axis=-1), l1_points, [128,128,128], is_training, bn_decay, scope='fa_layer3') # FC layers net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay) end_points['feats'] = net net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.conv1d(net, 50, 1, padding='VALID', activation_fn=None, scope='fc2') return net, end_points
def read_cifar10(filename_queue): result = CIFAR10Record() label_bytes = 1 # 2 for CIFAR-100 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth record_bytes = label_bytes + image_bytes # 固定長データのReader reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) # valueをdecode_rowでデコード record_bytes = tf.decode_raw(value, tf.uint8) # labelデータ(最初の1byteをスライス) result.label = tf.cast(tf.slice(record_bytes, [0], [label_bytes]), tf.int32) # 画像データ depth_major = tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]), [result.depth, result.height, result.width]) # transposeで[1,2,0]の順に並べる result.unit8image = tf.transpose(depth_major, [1,2,0]) return result
def make_input_output_histogramm(inp, labels): x_slice = tf.slice(input_= inp, begin= [0,0], size= [-1,1]) v_slice = tf.slice(input_= inp, begin= [0,1], size= [-1,1]) tf.histogram_summary('x_input', x_slice) tf.histogram_summary('v_input', v_slice) tf.histogram_summary('labels hist', labels)
def __call__(self, inputs, state, scope=None): '''Runs vanilla LSTM Cell and applies zoneout. ''' #Apply vanilla LSTM output, new_state = self._cell(inputs, state, scope) if self.state_is_tuple: (prev_c, prev_h) = state (new_c, new_h) = new_state else: num_proj = self._cell._num_units if self._cell._num_proj is None else self._cell._num_proj prev_c = tf.slice(state, [0, 0], [-1, self._cell._num_units]) prev_h = tf.slice(state, [0, self._cell._num_units], [-1, num_proj]) new_c = tf.slice(new_state, [0, 0], [-1, self._cell._num_units]) new_h = tf.slice(new_state, [0, self._cell._num_units], [-1, num_proj]) #Apply zoneout if self.is_training: #nn.dropout takes keep_prob (probability to keep activations) not drop_prob (probability to mask activations)! c = (1 - self._zoneout_cell) * tf.nn.dropout(new_c - prev_c, (1 - self._zoneout_cell)) + prev_c h = (1 - self._zoneout_outputs) * tf.nn.dropout(new_h - prev_h, (1 - self._zoneout_outputs)) + prev_h else: c = (1 - self._zoneout_cell) * new_c + self._zoneout_cell * prev_c h = (1 - self._zoneout_outputs) * new_h + self._zoneout_outputs * prev_h new_state = tf.nn.rnn_cell.LSTMStateTuple(c, h) if self.state_is_tuple else tf.concat(1, [c, h]) return output, new_state
def compute_first_or_last(self, select, first=True): #perform first ot last operation on row select with probabilistic row selection answer = tf.zeros_like(select) running_sum = tf.zeros([self.batch_size, 1], self.data_type) for i in range(self.max_elements): if (first): current = tf.slice(select, [0, i], [self.batch_size, 1]) else: current = tf.slice(select, [0, self.max_elements - 1 - i], [self.batch_size, 1]) curr_prob = current * (1 - running_sum) curr_prob = curr_prob * tf.cast(curr_prob >= 0.0, self.data_type) running_sum += curr_prob temp_ans = [] curr_prob = tf.expand_dims(tf.reshape(curr_prob, [self.batch_size]), 0) for i_ans in range(self.max_elements): if (not (first) and i_ans == self.max_elements - 1 - i): temp_ans.append(curr_prob) elif (first and i_ans == i): temp_ans.append(curr_prob) else: temp_ans.append(tf.zeros_like(curr_prob)) temp_ans = tf.transpose(tf.concat(axis=0, values=temp_ans)) answer += temp_ans return answer
def tf_format_mnist_images(X, Y, Y_, n=100, lines=10): correct_prediction = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1)) correctly_recognised_indices = tf.squeeze(tf.where(correct_prediction), [1]) # indices of correctly recognised images incorrectly_recognised_indices = tf.squeeze(tf.where(tf.logical_not(correct_prediction)), [1]) # indices of incorrectly recognised images everything_incorrect_first = tf.concat([incorrectly_recognised_indices, correctly_recognised_indices], 0) # images reordered with indeces of unrecognised images first everything_incorrect_first = tf.slice(everything_incorrect_first, [0], [n]) # compute first 100 only - no space to display more anyway # compute n=100 digits to display only Xs = tf.gather(X, everything_incorrect_first) Ys = tf.gather(Y, everything_incorrect_first) Ys_ = tf.gather(Y_, everything_incorrect_first) correct_prediction_s = tf.gather(correct_prediction, everything_incorrect_first) digits_left = tf.image.grayscale_to_rgb(tensorflowvisu_digits.digits_left()) correct_tags = tf.gather(digits_left, tf.argmax(Ys_, 1)) # correct digits to be printed on the images digits_right = tf.image.grayscale_to_rgb(tensorflowvisu_digits.digits_right()) computed_tags = tf.gather(digits_right, tf.argmax(Ys, 1)) # computed digits to be printed on the images #superimposed_digits = correct_tags+computed_tags superimposed_digits = tf.where(correct_prediction_s, tf.zeros_like(correct_tags),correct_tags+computed_tags) # only pring the correct and computed digits on unrecognised images correct_bkg = tf.reshape(tf.tile([1.3,1.3,1.3], [28*28]), [1, 28,28,3]) # white background incorrect_bkg = tf.reshape(tf.tile([1.3,1.0,1.0], [28*28]), [1, 28,28,3]) # red background recognised_bkg = tf.gather(tf.concat([incorrect_bkg, correct_bkg], 0), tf.cast(correct_prediction_s, tf.int32)) # pick either the red or the white background depending on recognised status I = tf.image.grayscale_to_rgb(Xs) I = ((1-(I+superimposed_digits))*recognised_bkg)/1.3 # stencil extra data on top of images and reorder them unrecognised first I = tf.image.convert_image_dtype(I, tf.uint8, saturate=True) Islices = [] # 100 images => 10x10 image block for imslice in range(lines): Islices.append(tf.concat(tf.unstack(tf.slice(I, [imslice*n//lines,0,0,0], [n//lines,28,28,3])), 1)) I = tf.concat(Islices, 0) return I
def _transform(theta, input_dim, out_size): num_batch = tf.shape(input=input_dim)[0] num_channels = tf.shape(input=input_dim)[3] theta = tf.reshape(theta, (-1, 2, 3)) theta = tf.cast(theta, 'float32') # grid of (x_t, y_t, 1), eq (1) in ref [1] out_height = out_size[0] out_width = out_size[1] grid = _meshgrid(out_height, out_width) grid = tf.expand_dims(grid, 0) grid = tf.reshape(grid, [-1]) grid = tf.tile(grid, tf.stack([num_batch])) grid = tf.reshape(grid, tf.stack([num_batch, 3, -1])) # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) T_g = tf.matmul(theta, grid) x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1]) y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1]) x_s_flat = tf.reshape(x_s, [-1]) y_s_flat = tf.reshape(y_s, [-1]) input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat, out_size) output = tf.reshape(input_transformed, tf.stack([num_batch, out_height, out_width, num_channels])) return output
def ApplyPcaAndWhitening(data, pca_matrix, pca_mean, output_dim, use_whitening=False, pca_variances=None): """Applies PCA/whitening to data. Args: data: [N, dim] float tensor containing data which undergoes PCA/whitening. pca_matrix: [dim, dim] float tensor PCA matrix, row-major. pca_mean: [dim] float tensor, mean to subtract before projection. output_dim: Number of dimensions to use in output data, of type int. use_whitening: Whether whitening is to be used. pca_variances: [dim] float tensor containing PCA variances. Only used if use_whitening is True. Returns: output: [N, output_dim] float tensor with output of PCA/whitening operation. """ output = tf.matmul( tf.subtract(data, pca_mean), tf.slice(pca_matrix, [0, 0], [output_dim, -1]), transpose_b=True, name='pca_matmul') # Apply whitening if desired. if use_whitening: output = tf.divide( output, tf.sqrt(tf.slice(pca_variances, [0], [output_dim])), name='whitening') return output
def _compute_loss(self, input_dict): """ Computes cross entropy based sequence-to-sequence loss :param input_dict: inputs to compute loss { "logits": logits tensor of shape [batch_size, T, dim] "target_sequence": tensor of shape [batch_size, T] "tgt_lengths": tensor of shape [batch_size] or None } :return: Singleton loss tensor """ logits = input_dict["decoder_output"]["logits"] #target_sequence = input_dict["tgt_sequence"] #tgt_lengths = input_dict["tgt_length"] target_sequence = input_dict['target_tensors'][0] tgt_lengths = input_dict['target_tensors'][1] if self._offset_target_by_one: # this is necessary for auto-regressive models current_ts = tf.to_int32( tf.minimum( tf.shape(target_sequence)[1], tf.shape(logits)[1], )) - 1 logits = tf.slice( logits, begin=[0, 0, 0], size=[-1, current_ts, -1], ) target_sequence = tf.slice(target_sequence, begin=[0, 1], size=[-1, current_ts]) else: current_ts = tf.to_int32( tf.minimum( tf.shape(target_sequence)[1], tf.shape(logits)[1], )) # Cast logits after potential slice if logits.dtype.base_dtype != tf.float32: logits = tf.cast(logits, tf.float32) if self._do_mask: if tgt_lengths is None: raise ValueError( "If you are masking loss, tgt_lengths can't be None") mask = tf.sequence_mask( lengths=tgt_lengths - 1, maxlen=current_ts, dtype=logits.dtype) # TODO: why store in float? else: mask = tf.cast(tf.ones_like(target_sequence), logits.dtype) """ if self._average_across_timestep: loss = tf.contrib.seq2seq.sequence_loss( logits=logits, targets=target_sequence, weights=mask, average_across_timesteps=self._average_across_timestep, average_across_batch=True, softmax_loss_function=tf.nn.sparse_softmax_cross_entropy_with_logits, ) else: crossent = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.reshape(target_sequence, shape=[-1]), logits=tf.reshape(logits, shape=[-1, self._tgt_vocab_size]), ) loss = tf.reduce_sum(crossent * tf.reshape(mask, shape=[-1])) loss /= self._batch_size_per_gpu """ crossent = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.reshape(target_sequence, shape=[-1]), logits=tf.reshape(logits, shape=[-1, self._tgt_vocab_size]), ) if self._average_across_timestep: loss = tf.reduce_mean(crossent * tf.reshape(mask, shape=[-1])) else: loss = tf.reduce_sum(crossent * tf.reshape(mask, shape=[-1])) loss /= self._batch_size return loss
def embedding_postprocessor(input_tensor, use_token_type=False, token_type_ids=None, token_type_vocab_size=16, token_type_embedding_name="token_type_embeddings", use_position_embeddings=True, position_embedding_name="position_embeddings", initializer_range=0.02, max_position_embeddings=512, dropout_prob=0.1): """Performs various post-processing on a word embedding tensor. Args: input_tensor: float Tensor of shape [batch_size, seq_length, embedding_size]. use_token_type: bool. Whether to add embeddings for `token_type_ids`. token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length]. Must be specified if `use_token_type` is True. token_type_vocab_size: int. The vocabulary size of `token_type_ids`. token_type_embedding_name: string. The name of the embedding table variable for token type ids. use_position_embeddings: bool. Whether to add position embeddings for the position of each token in the sequence. position_embedding_name: string. The name of the embedding table variable for positional embeddings. initializer_range: float. Range of the weight initialization. max_position_embeddings: int. Maximum sequence length that might ever be used with this model. This can be longer than the sequence length of input_tensor, but cannot be shorter. dropout_prob: float. Dropout probability applied to the final output tensor. Returns: float tensor with same shape as `input_tensor`. Raises: ValueError: One of the tensor shapes or input values is invalid. """ input_shape = get_shape_list(input_tensor, expected_rank=3) batch_size = input_shape[0] seq_length = input_shape[1] width = input_shape[2] output = input_tensor if use_token_type: if token_type_ids is None: raise ValueError("`token_type_ids` must be specified if" "`use_token_type` is True.") token_type_table = tf.get_variable( name=token_type_embedding_name, shape=[token_type_vocab_size, width], initializer=create_initializer(initializer_range)) # This vocab will be small so we always do one-hot here, since it is always # faster for a small vocabulary. flat_token_type_ids = tf.reshape(token_type_ids, [-1]) one_hot_ids = tf.one_hot(flat_token_type_ids, depth=token_type_vocab_size) token_type_embeddings = tf.matmul(one_hot_ids, token_type_table) token_type_embeddings = tf.reshape(token_type_embeddings, [batch_size, seq_length, width]) output += token_type_embeddings if use_position_embeddings: assert_op = tf.assert_less_equal(seq_length, max_position_embeddings) with tf.control_dependencies([assert_op]): full_position_embeddings = tf.get_variable( name=position_embedding_name, shape=[max_position_embeddings, width], initializer=create_initializer(initializer_range)) # Since the position embedding table is a learned variable, we create it # using a (long) sequence length `max_position_embeddings`. The actual # sequence length might be shorter than this, for faster training of # tasks that do not have long sequences. # # So `full_position_embeddings` is effectively an embedding table # for position [0, 1, 2, ..., max_position_embeddings-1], and the current # sequence has positions [0, 1, 2, ... seq_length-1], so we can just # perform a slice. position_embeddings = tf.slice(full_position_embeddings, [0, 0], [seq_length, -1]) num_dims = len(output.shape.as_list()) # Only the last two dimensions are relevant (`seq_length` and `width`), so # we broadcast among the first dimensions, which is typically just # the batch size. position_broadcast_shape = [] for _ in range(num_dims - 2): position_broadcast_shape.append(1) position_broadcast_shape.extend([seq_length, width]) position_embeddings = tf.reshape(position_embeddings, position_broadcast_shape) output += position_embeddings output = layer_norm_and_dropout(output, dropout_prob) return output
def winograd_conv(image, kernel, transformed_image, output_channel, mask=None): batch_size_d, image_width_d, image_height_d, input_channel_d = image.get_shape() batch_size = batch_size_d.value image_width = image_width_d.value image_height = image_height_d.value input_channel = input_channel_d.value n_patch_width = (image_width + 1 + 2 - 4) / 2 + 1 n_patch_height = (image_height + 1 + 2 - 4) / 2 + 1 if batch_size is None: batch_size = -1 CTdC_0 = tf.reshape(tf.strided_slice(transformed_image, [0, 0, 0], [1, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_1 = tf.reshape(tf.strided_slice(transformed_image, [1, 0, 0], [2, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_2 = tf.reshape(tf.strided_slice(transformed_image, [2, 0, 0], [3, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_3 = tf.reshape(tf.strided_slice(transformed_image, [3, 0, 0], [4, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_4 = tf.reshape(tf.strided_slice(transformed_image, [4, 0, 0], [5, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_5 = tf.reshape(tf.strided_slice(transformed_image, [5, 0, 0], [6, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_6 = tf.reshape(tf.strided_slice(transformed_image, [6, 0, 0], [7, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_7 = tf.reshape(tf.strided_slice(transformed_image, [7, 0, 0], [8, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_8 = tf.reshape(tf.strided_slice(transformed_image, [8, 0, 0], [9, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_9 = tf.reshape(tf.strided_slice(transformed_image, [9, 0, 0], [10, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_10 = tf.reshape(tf.strided_slice(transformed_image, [10, 0, 0], [11, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_11 = tf.reshape(tf.strided_slice(transformed_image, [11, 0, 0], [12, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_12 = tf.reshape(tf.strided_slice(transformed_image, [12, 0, 0], [13, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_13 = tf.reshape(tf.strided_slice(transformed_image, [13, 0, 0], [14, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_14 = tf.reshape(tf.strided_slice(transformed_image, [14, 0, 0], [15, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) CTdC_15 = tf.reshape(tf.strided_slice(transformed_image, [15, 0, 0], [16, 0, 0], [1, 1, 1], end_mask=6), [-1, input_channel]) GgGT = kernel # (16, input_channel, output_channel) GgGT_0 = tf.reshape(tf.slice(GgGT, [0, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_1 = tf.reshape(tf.slice(GgGT, [1, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_2 = tf.reshape(tf.slice(GgGT, [2, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_3 = tf.reshape(tf.slice(GgGT, [3, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_4 = tf.reshape(tf.slice(GgGT, [4, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_5 = tf.reshape(tf.slice(GgGT, [5, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_6 = tf.reshape(tf.slice(GgGT, [6, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_7 = tf.reshape(tf.slice(GgGT, [7, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_8 = tf.reshape(tf.slice(GgGT, [8, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_9 = tf.reshape(tf.slice(GgGT, [9, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_10 = tf.reshape(tf.slice(GgGT, [10, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_11 = tf.reshape(tf.slice(GgGT, [11, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_12 = tf.reshape(tf.slice(GgGT, [12, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_13 = tf.reshape(tf.slice(GgGT, [13, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_14 = tf.reshape(tf.slice(GgGT, [14, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) GgGT_15 = tf.reshape(tf.slice(GgGT, [15, 0, 0], [1, input_channel, output_channel]), [input_channel, output_channel]) prod_0 = tf.matmul(CTdC_0, GgGT_0) prod_1 = tf.matmul(CTdC_1, GgGT_1) prod_2 = tf.matmul(CTdC_2, GgGT_2) prod_3 = tf.matmul(CTdC_3, GgGT_3) prod_4 = tf.matmul(CTdC_4, GgGT_4) prod_5 = tf.matmul(CTdC_5, GgGT_5) prod_6 = tf.matmul(CTdC_6, GgGT_6) prod_7 = tf.matmul(CTdC_7, GgGT_7) prod_8 = tf.matmul(CTdC_8, GgGT_8) prod_9 = tf.matmul(CTdC_9, GgGT_9) prod_10 = tf.matmul(CTdC_10, GgGT_10) prod_11 = tf.matmul(CTdC_11, GgGT_11) prod_12 = tf.matmul(CTdC_12, GgGT_12) prod_13 = tf.matmul(CTdC_13, GgGT_13) prod_14 = tf.matmul(CTdC_14, GgGT_14) prod_15 = tf.matmul(CTdC_15, GgGT_15) ATprodA_0 = prod_0 + prod_1 + prod_2 + prod_4 + prod_5 + prod_6 + prod_8 + prod_9 + prod_10 ATprodA_1 = prod_1 - prod_2 - prod_3 + prod_5 - prod_6 - prod_7 + prod_9 - prod_10 - prod_11 ATprodA_2 = prod_4 + prod_5 + prod_6 - prod_8 - prod_9 - prod_10 - prod_12 - prod_13 - prod_14 ATprodA_3 = prod_5 - prod_6 - prod_7 - prod_9 + prod_10 + prod_11 - prod_13 + prod_14 + prod_15 ATprodA = tf.concat([ ATprodA_0, ATprodA_1, ATprodA_2, ATprodA_3 ], 0) ATprodA = tf.reshape(ATprodA, [2, 2, batch_size, n_patch_width, n_patch_height, output_channel]) ATprodA = tf.transpose(ATprodA, perm = [3, 0, 4, 1, 5, 2]) Image = tf.reshape(ATprodA, [2 * n_patch_width, 2 * n_patch_height, -1]) if image_width % 2 is not 0 or image_height % 2 is not 0: Image = tf.image.crop_to_bounding_box(Image, 0, 0, image_width, image_height) Image = tf.reshape(Image, [image_width, image_height, output_channel, batch_size]) Image = tf.transpose(Image, perm=[3, 0, 1, 2]) return Image
def model_fn(features, labels, mode, params): """model_fn constructs the ML model used to predict handwritten digits.""" del params targets = labels targets = ((1.0 - 0.1) * targets) + (1.0 / targets.shape[1].value ) # label smoothing print('inputs: {}'.format(features)) print('features shape: {}'.format(features.shape)) e1_idx = tf.slice(features, [0, 0], [features.shape[0].value, 1]) print('e1_idx: {}'.format(e1_idx)) r_idx = tf.slice(features, [0, 1], [features.shape[0].value, 1]) labels = tf.slice(features, [0, 2], [features.shape[0].value, 1]) data = Data(dataset='WN18', reverse=False) model = HyperER(len(data.entities), len(data.relations)) if mode == tf.estimator.ModeKeys.PREDICT: logits = model(e1_idx, r_idx, training=False) predictions = { 'class_ids': tf.argmax(logits, axis=1), 'probabilities': tf.sigmoid(logits), } return tf.contrib.tpu.TPUEstimatorSpec(mode, predictions=predictions) logits = model(e1_idx, r_idx, training=(mode == tf.estimator.ModeKeys.TRAIN)) predictions = tf.sigmoid(logits) print('number of inputs: {}'.format(features.shape[0].value)) loss = tf.keras.losses.binary_crossentropy( tf.cast(targets, tf.float32), tf.cast(predictions, tf.float32)) loss = tf.reduce_mean(loss) global_step = tf.train.get_or_create_global_step() if mode == tf.estimator.ModeKeys.TRAIN: learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=1337, decay_rate=0.99, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) if FLAGS.use_tpu: optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) # Batch normalization requires UPDATE_OPS to be added as a dependency to # the train operation. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=global_step) return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op) if mode == tf.estimator.ModeKeys.EVAL: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, eval_metrics=(metric_fn, [features, labels, logits]))
def __init__(self, buckets, isTraining, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, encoder_attribs, decoder_attribs): """Initializer of class that defines the computational graph. Args: buckets: List of input-output sizes that limit the amount of sequence padding (http://goo.gl/d8ybpl). isTraining: boolean that denotes training v/s evaluation. max_gradient_norm: Maximum value of gradient norm. batch_size: Minibatch size used for doing SGD. learning_rate: Initial learning rate of optimizer learning_rate_decay_factor: Multiplicative learning rate decay factor {encoder, decoder}_attribs: Dictionary containing attributes for {encoder, decoder} RNN. """ self.buckets = buckets self.isTraining = isTraining self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) # Number of gradient updates performed self.global_step = tf.Variable(0, trainable=False) # Number of epochs done self.epoch = tf.Variable(0, trainable=False) self.epoch_incr = self.epoch.assign(self.epoch + 1) self.encoder_attribs = encoder_attribs self.decoder_attribs = decoder_attribs # Placeholder for encoder input IDs self.encoder_inputs = {} for feat_type in self.encoder_attribs.feat_types: if feat_type == "speech_frames": self.encoder_inputs[feat_type] = tf.placeholder( tf.float32, # T * B * num_frame_per_word * frame_dimension shape=[ None, None, self.encoder_attribs.fixed_word_length, self.encoder_attribs.feat_dim ], name=feat_type + '_encoder') elif feat_type == "word_dur": self.encoder_inputs[feat_type] = tf.placeholder( tf.float32, shape=[None, None], name=feat_type + '_encoder') else: self.encoder_inputs[feat_type] = tf.placeholder( tf.int32, shape=[None, None], name=feat_type + '_encoder') _batch_size = self.encoder_inputs["word"].get_shape()[1].value # Input sequence length placeholder self.seq_len = tf.placeholder(tf.int32, shape=[_batch_size], name="seq_len") # Output sequence length placeholder self.seq_len_target = tf.placeholder(tf.int32, shape=[_batch_size], name="seq_len_target") # Input to decoder RNN. This input has an initial extra symbol - GO - # that initiates the decoding process. self.decoder_inputs = tf.placeholder(tf.int32, shape=[None, None], name="decoder") # Targets are decoder inputs shifted by one thus, ignoring GO symbol self.targets = tf.slice(self.decoder_inputs, [1, 0], [-1, -1]) # Initialize the encoder and decoder RNNs self.encoder = Encoder(isTraining, encoder_attribs) self.decoder = Decoder(isTraining, decoder_attribs) # First encode input self.encoder_hidden_states, self.final_state = \ self.encoder.encode_input(self.encoder_inputs, self.seq_len) # Then decode self.outputs = \ self.decoder.decode(self.decoder_inputs, self.seq_len_target, self.encoder_hidden_states, self.final_state, self.seq_len) if isTraining: # Training outputs and losses. self.losses = self.seq2seq_loss(self.outputs, self.targets, self.seq_len_target) # Gradients and parameter updation for training the model. params = tf.trainable_variables() print("\nModel parameters:\n") for var in params: print(("{0}: {1}").format(var.name, var.get_shape())) print # Initialize optimizer opt = tf.train.AdamOptimizer(self.learning_rate) # Get gradients from loss gradients = tf.gradients(self.losses, params) # Clip the gradients to avoid the problem of gradient explosion # possible early in training clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.gradient_norms = norm # Apply gradients self.updates = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) # Model saver function self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=2) self.best_saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
# rows x = tf.placeholder(dtype='float', shape=[None, 3]) y = x * 2 with tf.Session() as session: x_data = [[1, 2, 3], [4, 5, 6]] result = session.run(y, feed_dict={x: x_data}) print(result) import matplotlib.image as mpimg import matplotlib.pyplot as plt raw_image_data = mpimg.imread('MarshOrchid.jpg') image = tf.placeholder(dtype='uint8', shape=[None, None, 3]) sliced = tf.slice(image, begin=[1000, 0, 0], size=[3000, -1, -1]) # 注意这里没有global_variables_initializer(),因为没有Variable with tf.Session() as session: result = session.run(sliced, feed_dict={image: raw_image_data}) plt.imshow(result) # ============================================================================= # Exercise # ============================================================================= # 1) Take a look at the other functions for arrays in TensorFlow at the official # documentation. # 2) Break the image apart into four “corners”, then stitch it back together
def create_model (inputs, backbone_fn): #box_ft, mask_ft, gt_masks, gt_anchors, gt_anchors_weight, gt_params, gt_params_weight, gt_boxes, config): # ft: B * H' * W' * 3 input feature, H' W' is feature map size # gt_counts: B number of boxes in each sample of the batch # gt_boxes: ? * 4 boxes bb, _ = backbone_fn(inputs.X-PIXEL_MEANS, global_pool=False, output_stride=FLAGS.backbone_stride) #bb2, _ = backbone_fn(inputs.X-PIXEL_MEANS, global_pool=False, output_stride=FLAGS.backbone_stride, scope='bb2') gt_matcher = cpp.GTMatcher(FLAGS.match_th, FLAGS.max_masks) mask_extractor = cpp.MaskExtractor(FLAGS.mask_size, FLAGS.mask_size) end_points = {} with tf.variable_scope('boxnet'): assert FLAGS.backbone_stride % FLAGS.anchor_stride == 0 ss = FLAGS.backbone_stride // FLAGS.anchor_stride # generate anchor feature anchor_logits_ft = slim.conv2d_transpose(bb, FLAGS.anchor_logit_filters, ss*2, ss) anchor_params_ft = slim.conv2d_transpose(bb, FLAGS.anchor_params_filters, ss*2, ss) assert FLAGS.backbone_stride % FLAGS.mask_stride == 0 ss = FLAGS.backbone_stride // FLAGS.mask_stride mask_ft = slim.conv2d_transpose(bb, FLAGS.mask_filters, ss*2, ss) anchor_logits = slim.conv2d(anchor_logits_ft, 2 * len(PRIORS), 3, 1, activation_fn=None) anchor_logits2 = tf.reshape(anchor_logits, (-1, 2)) # ? * 2 # anchor probabilities anchor_prob = tf.squeeze(tf.slice(tf.nn.softmax(anchor_logits2), [0, 1], [-1, 1]), 1) gt_anchors = tf.reshape(inputs.gt_anchors, (-1, )) gt_anchors_weight = tf.reshape(inputs.gt_anchors_weight, (-1,)) # anchor cross-entropy axe = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=anchor_logits2, labels=gt_anchors) axe = axe * gt_anchors_weight axe = tf.reduce_sum(axe) / (tf.reduce_sum(gt_anchors_weight) + 1) params = slim.conv2d(anchor_params_ft, 4 * len(PRIORS), 3, 1, activation_fn=None) params = tf.reshape(params, (-1, 4)) # ? * 4 gt_params = tf.reshape(inputs.gt_params, (-1, 4)) gt_params_weight = tf.reshape(inputs.gt_params_weight, (-1,)) # params loss if True: dxy, wh = tf.split(params, [2,2], 1) dxy_gt, wh_gt = tf.split(gt_params, [2,2], 1) #wh = tf.log(tf.nn.relu(wh) + 1) wh_gt = tf.log(wh_gt + 1) pl = tf.losses.huber_loss(dxy, dxy_gt, reduction=tf.losses.Reduction.NONE) + \ tf.losses.huber_loss(wh, wh_gt, reduction=tf.losses.Reduction.NONE) pl = tf.reduce_sum(pl, axis=1) pl = tf.reduce_sum(pl * gt_params_weight) / (tf.reduce_sum(gt_params_weight) + 1) # generate boxes from anchor params boxes, box_ind = anchors2boxes(tf.shape(anchor_logits_ft), params) boxes_pre = boxes sel = tf.greater_equal(anchor_prob, inputs.anchor_th) # sel is a boolean mask # select only boxes with prob > th for nms anchor_prob = tf.boolean_mask(anchor_prob, sel) boxes = tf.boolean_mask(boxes, sel) box_ind = tf.boolean_mask(box_ind, sel) sel = tf.image.non_max_suppression(shift_boxes(boxes, box_ind), anchor_prob, 100000, iou_threshold=inputs.nms_th) # sel is a list of indices if True: # prediction head, not used in training psel = tf.slice(sel, [0], [tf.minimum(inputs.nms_max, tf.shape(sel)[0])]) boxes_predicted = tf.gather(boxes, psel) box_ind_predicted = tf.gather(box_ind, psel) mlogits = mask_net(inputs.X, mask_ft, boxes_predicted, box_ind_predicted) masks_predicted = tf.squeeze(tf.slice(tf.nn.softmax(mlogits), [0, 0, 0, 1], [-1, -1, -1, 1]), 3) pass anchor_prob = None # discard boxes = tf.gather(boxes, sel) box_ind = tf.gather(box_ind, sel) hit, index, gt_index = tf.py_func(gt_matcher.apply, [boxes, box_ind, inputs.gt_boxes], [tf.float32, tf.int32, tf.int32]) # % boxes found precision = hit / tf.cast(tf.shape(boxes)[0] + 1, tf.float32); recall = hit / tf.cast(tf.shape(inputs.gt_boxes)[0] + 1, tf.float32); boxes = tf.gather(boxes, index) box_ind = tf.gather(box_ind, index) gt_boxes = tf.gather(inputs.gt_boxes, gt_index) # normalize boxes to [0-1] nboxes = normalize_boxes(tf.shape(inputs.X), boxes) mlogits = mask_net(inputs.X, mask_ft, boxes, box_ind) gt_masks, = tf.py_func(mask_extractor.apply, [inputs.gt_masks, gt_boxes, boxes], [tf.float32]) #gt_masks, = tf.py_func(mask_extractor.apply, [inputs.gt_masks, gt_boxes, tf.slice(gt_boxes, [0, 3], [-1, 4])], [tf.float32]) end_points['gt_boxes'] = gt_boxes end_points['boxes'] = boxes gt_masks = tf.cast(tf.round(gt_masks), tf.int32) end_points['gt_masks'] = gt_masks # mask cross entropy mxe = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=mlogits, labels=gt_masks) mxe = tf.reshape(mxe, (-1, )) mxe = tf.reduce_sum(mxe) / tf.cast(tf.shape(mxe)[0] + 1, tf.float32) #tf.identity(logits, name='logits') #tf.identity(params, name='params') #tf.identity(boxes_pre, name='boxes_pre') tf.identity(boxes_predicted, name='boxes') tf.identity(masks_predicted, name='masks') #tf.identity(mlogits, name='mlogits') axe = tf.identity(axe, name='ax') # cross-entropy mxe = tf.identity(mxe, name='mx') # cross-entropy pl = tf.identity(pl * FLAGS.pl_weight, name='pl') # params-loss reg = tf.identity(tf.reduce_sum(tf.losses.get_regularization_losses()) * FLAGS.re_weight, name='re') precision = tf.identity(precision, name='p') recall = tf.identity(recall, name='r') loss = tf.identity(axe + mxe + pl + reg, name='lo') return loss, [axe, mxe, pl, reg, precision, recall], end_points
def build(self, for_deploy, variants=""): conf = self.conf name = self.name job_type = self.job_type dtype = self.dtype self.beam_size = 1 if (not for_deploy or variants == "score") else sum( self.conf.beam_splits) graphlg.info("Creating placeholders...") self.enc_str_inps = tf.placeholder(tf.string, shape=(None, conf.input_max_len), name="enc_inps") self.enc_lens = tf.placeholder(tf.int32, shape=[None], name="enc_lens") self.dec_str_inps = tf.placeholder( tf.string, shape=[None, conf.output_max_len + 2], name="dec_inps") self.dec_lens = tf.placeholder(tf.int32, shape=[None], name="dec_lens") self.down_wgts = tf.placeholder(tf.float32, shape=[None], name="down_wgts") with tf.name_scope("TableLookup"): # lookup tables self.in_table = lookup.MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=UNK_ID, shared_name="in_table", name="in_table", checkpoint=True) self.out_table = lookup.MutableHashTable(key_dtype=tf.int64, value_dtype=tf.string, default_value="_UNK", shared_name="out_table", name="out_table", checkpoint=True) self.enc_inps = self.in_table.lookup(self.enc_str_inps) self.dec_inps = self.in_table.lookup(self.dec_str_inps) # Create encode graph and get attn states graphlg.info("Creating embeddings and embedding enc_inps.") with ops.device("/cpu:0"): self.embedding = variable_scope.get_variable( "embedding", [conf.output_vocab_size, conf.embedding_size]) with tf.name_scope("Embed") as scope: dec_inps = tf.slice(self.dec_inps, [0, 0], [-1, conf.output_max_len + 1]) with ops.device("/cpu:0"): self.emb_inps = embedding_lookup_unique( self.embedding, self.enc_inps) emb_dec_inps = embedding_lookup_unique(self.embedding, dec_inps) # output projector (w, b) with tf.variable_scope("OutProj"): if conf.out_layer_size: w = tf.get_variable( "proj_w", [conf.out_layer_size, conf.output_vocab_size], dtype=dtype) elif conf.bidirectional: w = tf.get_variable( "proj_w", [conf.num_units * 2, conf.output_vocab_size], dtype=dtype) else: w = tf.get_variable("proj_w", [conf.num_units, conf.output_vocab_size], dtype=dtype) b = tf.get_variable("proj_b", [conf.output_vocab_size], dtype=dtype) graphlg.info("Creating dynamic rnn...") self.enc_outs, self.enc_states, mem_size, enc_state_size = DynRNN( conf.cell_model, conf.num_units, conf.num_layers, self.emb_inps, self.enc_lens, keep_prob=1.0, bidi=conf.bidirectional, name_scope="DynRNNEncoder") batch_size = tf.shape(self.enc_outs)[0] # Do vae on the state of the last layer of the encoder final_enc_states = [] KLDs = 0.0 for each in self.enc_states: z, KLD, l2 = CreateVAE([each], self.conf.enc_latent_dim, name_scope="VAE") if isinstance(each, LSTMStateTuple): final_enc_states.append( LSTMStateTuple(each.c, tf.concat([each.h, z], 1))) else: final_enc_state.append(tf.concat([z, each], 1)) KLDs += KLD / self.conf.num_layers with tf.name_scope("DynRNNDecode") as scope: with tf.name_scope("ShapeToBeam") as scope: beam_memory = tf.reshape( tf.tile(self.enc_outs, [1, 1, self.beam_size]), [-1, conf.input_max_len, mem_size]) beam_memory_lens = tf.squeeze( tf.reshape( tf.tile(tf.expand_dims(self.enc_lens, 1), [1, self.beam_size]), [-1, 1]), 1) def _to_beam(t): return tf.reshape(tf.tile(t, [1, self.beam_size]), [-1, int(t.get_shape()[1])]) beam_init_states = tf.contrib.framework.nest.map_structure( _to_beam, final_enc_states) max_mem_size = self.conf.input_max_len + self.conf.output_max_len + 2 cell = AttnCell(cell_model=conf.cell_model, num_units=mem_size, num_layers=conf.num_layers, attn_type=self.conf.attention, memory=beam_memory, mem_lens=beam_memory_lens, max_mem_size=max_mem_size, addmem=self.conf.addmem, keep_prob=conf.keep_prob, dtype=tf.float32, name_scope="AttnCell") dec_init_state = DecStateInit(all_enc_states=beam_init_states, decoder_cell=cell, batch_size=batch_size * self.beam_size, init_type="each2each") if not for_deploy: hp_train = helper.ScheduledEmbeddingTrainingHelper( inputs=emb_dec_inps, sequence_length=self.dec_lens, embedding=self.embedding, sampling_probability=self.conf.sample_prob, out_proj=(w, b)) output_layer = layers_core.Dense( self.conf.out_layer_size, use_bias=True) if self.conf.out_layer_size else None my_decoder = basic_decoder.BasicDecoder( cell=cell, helper=hp_train, initial_state=dec_init_state, output_layer=output_layer) cell_outs, final_state = decoder.dynamic_decode( decoder=my_decoder, impute_finished=False, maximum_iterations=conf.output_max_len + 1, scope=scope) elif variants == "score": dec_init_state = zero_attn_states hp_train = helper.ScheduledEmbeddingTrainingHelper( inputs=emb_dec_inps, sequence_length=self.dec_lens, embedding=self.embedding, sampling_probability=0.0, out_proj=(w, b)) output_layer = layers_core.Dense( self.conf.out_layer_size, use_bias=True) if self.conf.out_layer_size else None my_decoder = score_decoder.ScoreDecoder( cell=cell, helper=hp_train, out_proj=(w, b), initial_state=dec_init_state, output_layer=output_layer) cell_outs, final_state = decoder.dynamic_decode( decoder=my_decoder, scope=scope, maximum_iterations=self.conf.output_max_len, impute_finished=False) else: hp_infer = helper.GreedyEmbeddingHelper( embedding=self.embedding, start_tokens=tf.ones(shape=[batch_size * self.beam_size], dtype=tf.int32), end_token=EOS_ID, out_proj=(w, b)) output_layer = layers_core.Dense( self.conf.out_layer_size, use_bias=True) if self.conf.out_layer_size else None my_decoder = beam_decoder.BeamDecoder( cell=cell, helper=hp_infer, out_proj=(w, b), initial_state=dec_init_state, beam_splits=self.conf.beam_splits, max_res_num=self.conf.max_res_num, output_layer=output_layer) cell_outs, final_state = decoder.dynamic_decode( decoder=my_decoder, scope=scope, maximum_iterations=self.conf.output_max_len, impute_finished=True) if not for_deploy: outputs = cell_outs.rnn_output # Output ouputprojected to logits L = tf.shape(outputs)[1] outputs = tf.reshape(outputs, [-1, int(w.shape[0])]) outputs = tf.matmul(outputs, w) + b logits = tf.reshape(outputs, [-1, L, int(w.shape[1])]) # branch 1 for debugging, doesn't have to be called with tf.name_scope("DebugOutputs") as scope: self.outputs = tf.argmax(logits, axis=2) self.outputs = tf.reshape(self.outputs, [-1, L]) self.outputs = self.out_table.lookup( tf.cast(self.outputs, tf.int64)) with tf.name_scope("Loss") as scope: tars = tf.slice(self.dec_inps, [0, 1], [-1, L]) wgts = tf.cumsum(tf.one_hot(self.dec_lens, L), axis=1, reverse=True) #wgts = wgts * tf.expand_dims(self.down_wgts, 1) self.loss = loss.sequence_loss(logits=logits, targets=tars, weights=wgts, average_across_timesteps=False, average_across_batch=False) example_losses = tf.reduce_sum(self.loss, 1) batch_wgt = tf.reduce_sum(self.down_wgts) see_KLD = tf.reduce_sum(KLDs * self.down_wgts) / batch_wgt see_loss = tf.reduce_sum(example_losses / tf.cast( self.dec_lens, tf.float32) * self.down_wgts) / batch_wgt # not average over length self.loss = tf.reduce_sum( (example_losses + self.conf.kld_ratio * KLDs) * self.down_wgts) / batch_wgt with tf.name_scope(self.model_kind): tf.summary.scalar("loss", see_loss) tf.summary.scalar("kld", see_KLD) graph_nodes = { "loss": self.loss, "inputs": {}, "outputs": {}, "debug_outputs": self.outputs } elif variants == "score": L = tf.shape(cell_outs.logprobs)[1] one_hot = tf.one_hot(tf.slice(self.dec_inps, [0, 1], [-1, L]), depth=self.conf.output_vocab_size, axis=-1, on_value=1.0, off_value=0.0) outputs = tf.reduce_sum(cell_outs.logprobs * one_hot, 2) outputs = tf.reduce_sum(outputs, axis=1) inputs = { "enc_inps:0": self.enc_str_inps, "enc_lens:0": self.enc_lens, "dec_inps:0": self.dec_str_inps, "dec_lens:0": self.dec_lens } graph_nodes = { "loss": None, "inputs": inputs, "outputs": { "logprobs": outputs }, "visualize": None } else: L = tf.shape(cell_outs.beam_ends)[1] beam_symbols = cell_outs.beam_symbols beam_parents = cell_outs.beam_parents beam_ends = cell_outs.beam_ends beam_end_parents = cell_outs.beam_end_parents beam_end_probs = cell_outs.beam_end_probs alignments = cell_outs.alignments beam_ends = tf.reshape(tf.transpose(beam_ends, [0, 2, 1]), [-1, L]) beam_end_parents = tf.reshape( tf.transpose(beam_end_parents, [0, 2, 1]), [-1, L]) beam_end_probs = tf.reshape( tf.transpose(beam_end_probs, [0, 2, 1]), [-1, L]) ## Creating tail_ids batch_size = tf.Print(batch_size, [batch_size], message="VAERNN2 batch") batch_offset = tf.expand_dims( tf.cumsum( tf.ones([batch_size, self.beam_size], dtype=tf.int32) * self.beam_size, axis=0, exclusive=True), 2) offset2 = tf.expand_dims( tf.cumsum( tf.ones([batch_size, self.beam_size * 2], dtype=tf.int32) * self.beam_size, axis=0, exclusive=True), 2) out_len = tf.shape(beam_symbols)[1] self.beam_symbol_strs = tf.reshape( self.out_table.lookup(tf.cast(beam_symbols, tf.int64)), [batch_size, self.beam_size, -1]) self.beam_parents = tf.reshape( beam_parents, [batch_size, self.beam_size, -1]) - batch_offset self.beam_ends = tf.reshape(beam_ends, [batch_size, self.beam_size * 2, -1]) self.beam_end_parents = tf.reshape( beam_end_parents, [batch_size, self.beam_size * 2, -1]) - offset2 self.beam_end_probs = tf.reshape( beam_end_probs, [batch_size, self.beam_size * 2, -1]) self.beam_attns = tf.reshape( alignments, [batch_size, self.beam_size, out_len, -1]) inputs = { "enc_inps:0": self.enc_str_inps, "enc_lens:0": self.enc_lens } outputs = { "beam_symbols": self.beam_symbol_strs, "beam_parents": self.beam_parents, "beam_ends": self.beam_ends, "beam_end_parents": self.beam_end_parents, "beam_end_probs": self.beam_end_probs, "beam_attns": self.beam_attns } graph_nodes = { "loss": None, "inputs": inputs, "outputs": outputs, "visualize": { "z": z } } return graph_nodes
import numpy as np import matplotlib.pyplot as plt from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) batch_size = 512 learning_rate = 1e-3 epoch = 10000 discrete_latent_size = 10 contin_latent_size = 2 x = tf.placeholder(tf.float32, shape=[None, 784]) x_image = tf.reshape(x, [-1, 28, 28, 1]) y_ = tf.placeholder(tf.float32, shape=[None, 10]) z_in = tf.placeholder(tf.float32, shape=[batch_size, 112]) z_label_check = tf.slice(z_in, begin=[0, 100], size=[batch_size, 10]) z_contin_check = tf.slice(z_in, begin=[0, 110], size=[batch_size, 2]) initializer = tf.truncated_normal_initializer(stddev=0.02) def int_to_onehot(z_label): one_hot_array = np.zeros(shape=[len(z_label), discrete_latent_size]) one_hot_array[np.arange(len(z_label)), z_label] = 1 return one_hot_array def lrelu(x, leak=0.2, name="lrelu"):
def _compute_loss(self, input_dict): """ Computes cross entropy based sequence-to-sequence loss with label smoothing :param input_dict: inputs to compute loss { "logits": logits tensor of shape [batch_size, T, dim] "target_sequence": tensor of shape [batch_size, T] "tgt_lengths": tensor of shape [batch_size] or None } :return: Singleton loss tensor """ logits = input_dict["decoder_output"]["logits"] target_sequence = input_dict["target_tensors"][0] tgt_lengths = input_dict["target_tensors"][1] #target_sequence = input_dict["tgt_sequence"] #tgt_lengths = input_dict["tgt_length"] if self._offset_target_by_one: # this is necessary for auto-regressive models current_ts = tf.to_int32( tf.minimum( tf.shape(target_sequence)[1], tf.shape(logits)[1], )) - 1 logits = tf.slice( logits, begin=[0, 0, 0], size=[-1, current_ts, -1], ) target_sequence = tf.slice(target_sequence, begin=[0, 1], size=[-1, current_ts]) else: current_ts = tf.to_int32( tf.minimum( tf.shape(target_sequence)[1], tf.shape(logits)[1], )) # Cast logits after potential slice if logits.dtype.base_dtype != tf.float32: logits = tf.cast(logits, tf.float32) if self._do_mask: if tgt_lengths is None: raise ValueError( "If you are masking loss, tgt_lengths can't be None") mask = tf.sequence_mask(lengths=tgt_lengths - 1, maxlen=current_ts, dtype=tf.float32) else: mask = tf.cast(tf.ones_like(target_sequence), logits.dtype) labels = tf.one_hot(indices=tf.reshape(target_sequence, shape=[-1]), depth=self._tgt_vocab_size) logits = tf.reshape(logits, shape=[-1, self._tgt_vocab_size]) mask = tf.reshape(mask, shape=[-1]) loss = tf.losses.softmax_cross_entropy( onehot_labels=labels, logits=logits, weights=mask, label_smoothing=self._label_smoothing, reduction=tf.losses.Reduction.NONE) loss = tf.reduce_sum(loss * tf.reshape(mask, shape=[-1])) if self._average_across_timestep: loss /= tf.reduce_sum(mask) else: loss /= self._batch_size return loss
def inference_mem(images, cams, depth_num, depth_start, depth_interval, is_master_gpu=True): """ infer depth image from multi-view images and cameras """ # dynamic gpu params depth_end = depth_start + (tf.cast(depth_num, tf.float32) - 1) * depth_interval feature_c = 32 feature_h = FLAGS.max_h / 4 feature_w = FLAGS.max_w / 4 # reference image ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1) ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) # image feature extraction if is_master_gpu: ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=False) else: ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=True) ref_feature = ref_tower.get_output() ref_feature2 = tf.square(ref_feature) view_features = [] for view in range(1, FLAGS.view_num): view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0], [-1, 1, -1, -1, -1]), axis=1) view_tower = UniNetDS2({'data': view_image}, is_training=True, reuse=True) view_features.append(view_tower.get_output()) view_features = tf.stack(view_features, axis=0) # get all homographies view_homographies = [] for view in range(1, FLAGS.view_num): view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) homographies = get_homographies(ref_cam, view_cam, depth_num=depth_num, depth_start=depth_start, depth_interval=depth_interval) view_homographies.append(homographies) view_homographies = tf.stack(view_homographies, axis=0) # build cost volume by differentialble homography with tf.name_scope('cost_volume_homography'): depth_costs = [] for d in range(depth_num): # compute cost (standard deviation feature) ave_feature = tf.Variable( tf.zeros([FLAGS.batch_size, feature_h, feature_w, feature_c]), name='ave', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) ave_feature2 = tf.Variable( tf.zeros([FLAGS.batch_size, feature_h, feature_w, feature_c]), name='ave2', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) ave_feature = tf.assign(ave_feature, ref_feature) ave_feature2 = tf.assign(ave_feature2, ref_feature2) def body(view, ave_feature, ave_feature2): """Loop body.""" homography = tf.slice(view_homographies[view], begin=[0, d, 0, 0], size=[-1, 1, 3, 3]) homography = tf.squeeze(homography, axis=1) warped_view_feature = homography_warping( view_features[view], homography) ave_feature = tf.assign_add(ave_feature, warped_view_feature) ave_feature2 = tf.assign_add(ave_feature2, tf.square(warped_view_feature)) view = tf.add(view, 1) return view, ave_feature, ave_feature2 view = tf.constant(0) cond = lambda view, *_: tf.less(view, FLAGS.view_num - 1) _, ave_feature, ave_feature2 = tf.while_loop( cond, body, [view, ave_feature, ave_feature2], back_prop=False, parallel_iterations=1) ave_feature = tf.assign( ave_feature, tf.square(ave_feature) / (FLAGS.view_num * FLAGS.view_num)) ave_feature2 = tf.assign( ave_feature2, ave_feature2 / FLAGS.view_num - ave_feature) depth_costs.append(ave_feature2) cost_volume = tf.stack(depth_costs, axis=1) # filtered cost volume, size of (B, D, H, W, 1) if is_master_gpu: filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=False) else: filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=True) filtered_cost_volume = tf.squeeze(filtered_cost_volume_tower.get_output(), axis=-1) # depth map by softArgmin with tf.name_scope('soft_arg_min'): # probability volume by soft max probability_volume = tf.nn.softmax(tf.scalar_mul( -1, filtered_cost_volume), axis=1, name='prob_volume') # depth image by soft argmin volume_shape = tf.shape(probability_volume) soft_2d = [] for i in range(FLAGS.batch_size): soft_1d = tf.linspace(depth_start[i], depth_end[i], tf.cast(depth_num, tf.int32)) soft_2d.append(soft_1d) soft_2d = tf.reshape(tf.stack(soft_2d, axis=0), [volume_shape[0], volume_shape[1], 1, 1]) soft_4d = tf.tile(soft_2d, [1, 1, volume_shape[2], volume_shape[3]]) estimated_depth_map = tf.reduce_sum(soft_4d * probability_volume, axis=1) estimated_depth_map = tf.expand_dims(estimated_depth_map, axis=3) # probability map prob_map = get_propability_map(probability_volume, estimated_depth_map, depth_start, depth_interval) filtered_depth_map = tf.cast(tf.greater_equal(prob_map, 0.8), dtype='float32') * estimated_depth_map return filtered_depth_map, prob_map
def _init_graph(self): self._init_placeholders() self.word_emb = tf.get_variable("word_emb", initializer=tf.constant( self.init_word_emb, dtype=tf.float32), trainable=False) self.char_emb = tf.get_variable("char_emb", initializer=tf.constant( self.init_char_emb, dtype=tf.float32), trainable=self.opt['train_char_emb']) self.c_mask = tf.cast(self.c_ph, tf.bool) self.q_mask = tf.cast(self.q_ph, tf.bool) self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1) self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1) bs = tf.shape(self.c_ph)[0] self.c_maxlen = tf.reduce_max(self.c_len) self.q_maxlen = tf.reduce_max(self.q_len) self.c = tf.slice(self.c_ph, [0, 0], [bs, self.c_maxlen]) self.q = tf.slice(self.q_ph, [0, 0], [bs, self.q_maxlen]) self.c_mask = tf.slice(self.c_mask, [0, 0], [bs, self.c_maxlen]) self.q_mask = tf.slice(self.q_mask, [0, 0], [bs, self.q_maxlen]) self.cc = tf.slice(self.cc_ph, [0, 0, 0], [bs, self.c_maxlen, self.char_limit]) self.qc = tf.slice(self.qc_ph, [0, 0, 0], [bs, self.q_maxlen, self.char_limit]) self.cc_len = tf.reshape( tf.reduce_sum(tf.cast(tf.cast(self.cc, tf.bool), tf.int32), axis=2), [-1]) self.qc_len = tf.reshape( tf.reduce_sum(tf.cast(tf.cast(self.qc, tf.bool), tf.int32), axis=2), [-1]) self.y1 = tf.one_hot(self.y1_ph, depth=self.context_limit) self.y2 = tf.one_hot(self.y2_ph, depth=self.context_limit) self.y1 = tf.slice(self.y1, [0, 0], [bs, self.c_maxlen]) self.y2 = tf.slice(self.y2, [0, 0], [bs, self.c_maxlen]) with tf.variable_scope("emb"): with tf.variable_scope("char"): cc_emb = tf.reshape( tf.nn.embedding_lookup(self.char_emb, self.cc), [bs * self.c_maxlen, self.char_limit, self.char_emb_dim]) qc_emb = tf.reshape( tf.nn.embedding_lookup(self.char_emb, self.qc), [bs * self.q_maxlen, self.char_limit, self.char_emb_dim]) cc_emb = variational_dropout(cc_emb, keep_prob=self.keep_prob_ph) qc_emb = variational_dropout(qc_emb, keep_prob=self.keep_prob_ph) _, (state_fw, state_bw) = cudnn_bi_gru(cc_emb, self.char_hidden_size, seq_lengths=self.cc_len) cc_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = cudnn_bi_gru(qc_emb, self.char_hidden_size, seq_lengths=self.qc_len, reuse=True) qc_emb = tf.concat([state_fw, state_bw], axis=1) cc_emb = tf.reshape( cc_emb, [bs, self.c_maxlen, 2 * self.char_hidden_size]) qc_emb = tf.reshape( qc_emb, [bs, self.q_maxlen, 2 * self.char_hidden_size]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_emb, self.c) q_emb = tf.nn.embedding_lookup(self.word_emb, self.q) c_emb = tf.concat([c_emb, cc_emb], axis=2) q_emb = tf.concat([q_emb, qc_emb], axis=2) with tf.variable_scope("encoding"): rnn = CudnnGRU(num_layers=3, num_units=self.hidden_size, batch_size=bs, input_size=c_emb.get_shape().as_list()[-1], keep_prob=self.keep_prob_ph) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, att_size=self.attention_hidden_size, keep_prob=self.keep_prob_ph) rnn = CudnnGRU(num_layers=1, num_units=self.hidden_size, batch_size=bs, input_size=qc_att.get_shape().as_list()[-1], keep_prob=self.keep_prob_ph) att = rnn(qc_att, seq_len=self.c_len) with tf.variable_scope("match"): self_att = dot_attention(att, att, mask=self.c_mask, att_size=self.attention_hidden_size, keep_prob=self.keep_prob_ph) rnn = CudnnGRU(num_layers=1, num_units=self.hidden_size, batch_size=bs, input_size=self_att.get_shape().as_list()[-1], keep_prob=self.keep_prob_ph) match = rnn(self_att, seq_len=self.c_len) with tf.variable_scope("pointer"): init = simple_attention(q, self.hidden_size, mask=self.q_mask, keep_prob=self.keep_prob_ph) pointer = PtrNet(cell_size=init.get_shape().as_list()[-1], keep_prob=self.keep_prob_ph) logits1, logits2 = pointer(init, match, self.hidden_size, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part( outer, 0, tf.cast(tf.minimum(15, self.c_maxlen), tf.int64)) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) loss_1 = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) loss_2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(loss_1 + loss_2) if self.weight_decay < 1.0: self.var_ema = tf.train.ExponentialMovingAverage(self.weight_decay) ema_op = self.var_ema.apply(tf.trainable_variables()) with tf.control_dependencies([ema_op]): self.loss = tf.identity(self.loss) self.shadow_vars = [] self.global_vars = [] for var in tf.global_variables(): v = self.var_ema.average(var) if v: self.shadow_vars.append(v) self.global_vars.append(var) self.assign_vars = [] for g, v in zip(self.global_vars, self.shadow_vars): self.assign_vars.append(tf.assign(g, v))
def _call(self, inputs): ids, num_samples = inputs adj_lists = tf.nn.embedding_lookup(self.adj_info, ids) adj_lists = tf.transpose(tf.random_shuffle(tf.transpose(adj_lists))) adj_lists = tf.slice(adj_lists, [0,0], [-1, num_samples]) return adj_lists
def inference(images, cams, depth_num, depth_start, depth_interval, is_master_gpu=True): """ infer depth image from multi-view images and cameras """ # dynamic gpu params depth_end = depth_start + (tf.cast(depth_num, tf.float32) - 1) * depth_interval # reference image ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1) ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) # image feature extraction if is_master_gpu: ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=False) else: ref_tower = UniNetDS2({'data': ref_image}, is_training=True, reuse=True) view_towers = [] for view in range(1, FLAGS.view_num): view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0], [-1, 1, -1, -1, -1]), axis=1) view_tower = UniNetDS2({'data': view_image}, is_training=True, reuse=True) view_towers.append(view_tower) # get all homographies view_homographies = [] for view in range(1, FLAGS.view_num): view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) homographies = get_homographies(ref_cam, view_cam, depth_num=depth_num, depth_start=depth_start, depth_interval=depth_interval) view_homographies.append(homographies) # build cost volume by differentialble homography with tf.name_scope('cost_volume_homography'): depth_costs = [] for d in range(depth_num): # compute cost (variation metric) ave_feature = ref_tower.get_output() ave_feature2 = tf.square(ref_tower.get_output()) for view in range(0, FLAGS.view_num - 1): homography = tf.slice(view_homographies[view], begin=[0, d, 0, 0], size=[-1, 1, 3, 3]) homography = tf.squeeze(homography, axis=1) warped_view_feature = homography_warping( view_towers[view].get_output(), homography) ave_feature = ave_feature + warped_view_feature ave_feature2 = ave_feature2 + tf.square(warped_view_feature) ave_feature = ave_feature / FLAGS.view_num ave_feature2 = ave_feature2 / FLAGS.view_num cost = ave_feature2 - tf.square(ave_feature) depth_costs.append(cost) cost_volume = tf.stack(depth_costs, axis=1) # filtered cost volume, size of (B, D, H, W, 1) if is_master_gpu: filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=False) else: filtered_cost_volume_tower = RegNetUS0({'data': cost_volume}, is_training=True, reuse=True) filtered_cost_volume = tf.squeeze(filtered_cost_volume_tower.get_output(), axis=-1) # depth map by softArgmin with tf.name_scope('soft_arg_min'): # probability volume by soft max probability_volume = tf.nn.softmax(tf.scalar_mul( -1, filtered_cost_volume), axis=1, name='prob_volume') # depth image by soft argmin volume_shape = tf.shape(probability_volume) soft_2d = [] for i in range(FLAGS.batch_size): soft_1d = tf.linspace(depth_start[i], depth_end[i], tf.cast(depth_num, tf.int32)) soft_2d.append(soft_1d) soft_2d = tf.reshape(tf.stack(soft_2d, axis=0), [volume_shape[0], volume_shape[1], 1, 1]) soft_4d = tf.tile(soft_2d, [1, 1, volume_shape[2], volume_shape[3]]) estimated_depth_map = tf.reduce_sum(soft_4d * probability_volume, axis=1) estimated_depth_map = tf.expand_dims(estimated_depth_map, axis=3) # probability map prob_map = get_propability_map(probability_volume, estimated_depth_map, depth_start, depth_interval) return estimated_depth_map, prob_map #, filtered_depth_map, probability_volume
def read_cifar10(filename_queue): """Reads and parses examples from CIFAR10 data files. Recommendation: if you want N-way read parallelism, call this function N times. This will give you N independent Readers reading different files & positions within those files, which will give better mixing of examples. Args: filename_queue: A queue of strings with the filenames to read from. Returns: An object representing a single example, with the following fields: height: number of rows in the result (32) width: number of columns in the result (32) depth: number of color channels in the result (3) key: a scalar string Tensor describing the filename & record number for this example. label: an int32 Tensor with the label in the range 0..9. uint8image: a [height, width, depth] uint8 Tensor with the image data """ class CIFAR10Record(object): pass result = CIFAR10Record() # Dimensions of the images in the CIFAR-10 dataset. # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the # input format. label_bytes = 1 # 2 for CIFAR-100 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth # Every record consists of a label followed by the image, with a # fixed number of bytes for each. record_bytes = label_bytes + image_bytes # Read a record, getting filenames from the filename_queue. No # header or footer in the CIFAR-10 format, so we leave header_bytes # and footer_bytes at their default of 0. '''读取一条记录麽???,然后指针移动到下一个待读取的位置''' reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) # Convert from a string to a vector of uint8 that is record_bytes long. '''原始数据为二进制,所以需要一个转换,成八位无符号整形''' record_bytes = tf.decode_raw(value, tf.uint8) # The first bytes represent the label, which we convert from uint8->int32. result.label = tf.cast( tf.slice(record_bytes, [0], [label_bytes]), tf.int32) # The remaining bytes after the label represent the image, which we reshape # from [depth * height * width] to [depth, height, width]. depth_major = tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]), [result.depth, result.height, result.width]) # Convert from [depth, height, width] to [height, width, depth]. '''原来是[0,1,2],现在是[1,2,0]''' result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result
def eval_loop(preprocess_fn, network_factory, data_x, data_y, camera_indices, log_dir, eval_log_dir, image_shape=None, run_id=None, loss_mode="cosine-softmax", num_galleries=10, random_seed=4321): """Evaluate a running training session using CMC metric averaged over `num_galleries` galleries where each gallery contains for every identity a randomly selected image-pair. A call to this function will block indefinitely, monitoring the `log_dir/run_id` for saved checkpoints. Then, creates summaries in `eval_log_dir/run_id` that can be monitored with tensorboard. Parameters ---------- preprocess_fn : Callable[tf.Tensor] -> tf.Tensor A callable that applies preprocessing to a given input image tensor of dtype tf.uint8 and returns a floating point representation (tf.float32). network_factory : Callable[tf.Tensor] -> (tf.Tensor, tf.Tensor) A callable that takes as argument a preprocessed input image of dtype tf.float32 and returns the feature representation as well as a logits tensors. The logits may be set to None if not required by the loss. data_x : List[str] | np.ndarray A list of image filenames or a tensor of images. data_y : List[int] | np.ndarray A list or one-dimensional array of labels for the images in `data_x`. camera_indices: Optional[List[int] | np.ndarray] A list or one-dimensional array of camera indices for the images in `data_x`. If not None, CMC galleries are created such that image pairs are collected from different cameras. log_dir: str Should be equivalent to the `log_dir` passed into `train_loop` of the training run to monitor. eval_log_dir: Used to construct the tensorboard log directory where metrics are summarized. image_shape : Tuple[int, int, int] | NoneType Image shape (height, width, channels) or None. If None, `train_x` must be an array of images such that the shape can be queries from this variable. run_id : str A string that identifies the training run; must be set to the same `run_id` passed into `train_loop`. loss_mode : Optional[str] A string that identifies the loss function used for training; must be one of 'cosine-softmax', 'magnet', 'triplet'. This value defaults to 'cosine-softmax'. num_galleries: int The number of galleries to be constructed for evaluation of CMC metrics. random_seed: Optional[int] If not None, the NumPy random seed is fixed to this number; can be used to produce the same galleries over multiple runs. """ if image_shape is None: # If image_shape is not set, train_x must be an image array. Here we # query the image shape from the array of images. assert type(data_x) == np.ndarray image_shape = data_x.shape[1:] elif type(data_x) == np.ndarray: assert data_x.shape[1:] == image_shape read_from_file = type(data_x) != np.ndarray # Create num_galleries random CMC galleries to average CMC top-k over. probes, galleries = [], [] for i in range(num_galleries): probe_indices, gallery_indices = util.create_cmc_probe_and_gallery( data_y, camera_indices, seed=random_seed + i) probes.append(probe_indices) galleries.append(gallery_indices) probes, galleries = np.asarray(probes), np.asarray(galleries) # Set up the data feed. with tf.device("/cpu:0"): # Feed probe and gallery indices to the trainer. num_probes, num_gallery_images = probes.shape[1], galleries.shape[1] probe_idx_var = tf.placeholder(tf.int64, (None, num_probes)) gallery_idx_var = tf.placeholder(tf.int64, (None, num_gallery_images)) trainer = queued_trainer.QueuedTrainer( [probe_idx_var, gallery_idx_var]) # Retrieve indices from trainer and gather data from constant memory. data_x_var = tf.constant(data_x) data_y_var = tf.constant(data_y) probe_idx_var, gallery_idx_var = trainer.get_input_vars(batch_size=1) probe_idx_var = tf.squeeze(probe_idx_var) gallery_idx_var = tf.squeeze(gallery_idx_var) # Apply preprocessing. probe_x_var = tf.gather(data_x_var, probe_idx_var) if read_from_file: # NOTE(nwojke): tf.image.decode_jpg handles various image types. num_channels = image_shape[-1] if len(image_shape) == 3 else 1 probe_x_var = tf.map_fn(lambda x: tf.image.decode_jpeg( tf.read_file(x), channels=num_channels), probe_x_var, dtype=tf.uint8) probe_x_var = tf.image.resize_images(probe_x_var, image_shape[:2]) probe_x_var = tf.map_fn(lambda x: preprocess_fn(x, is_training=False), probe_x_var, back_prop=False, dtype=tf.float32) probe_y_var = tf.gather(data_y_var, probe_idx_var) gallery_x_var = tf.gather(data_x_var, gallery_idx_var) if read_from_file: # NOTE(nwojke): tf.image.decode_jpg handles various image types. num_channels = image_shape[-1] if len(image_shape) == 3 else 1 gallery_x_var = tf.map_fn(lambda x: tf.image.decode_jpeg( tf.read_file(x), channels=num_channels), gallery_x_var, dtype=tf.uint8) gallery_x_var = tf.image.resize_images(gallery_x_var, image_shape[:2]) gallery_x_var = tf.map_fn( lambda x: preprocess_fn(x, is_training=False), gallery_x_var, back_prop=False, dtype=tf.float32) gallery_y_var = tf.gather(data_y_var, gallery_idx_var) # Construct the network and compute features. probe_and_gallery_x_var = tf.concat(axis=0, values=[probe_x_var, gallery_x_var]) probe_and_gallery_x_var, _ = network_factory(probe_and_gallery_x_var) num_probe = tf.shape(probe_x_var)[0] probe_x_var = tf.slice(probe_and_gallery_x_var, [0, 0], [num_probe, -1]) gallery_x_var = tf.slice(probe_and_gallery_x_var, [num_probe, 0], [-1, -1]) # Set up the metrics. distance_measure = (metrics.cosine_distance if loss_mode == "cosine-softmax" else metrics.pdist) def cmc_metric_at_k(k): return metrics.streaming_mean_cmc_at_k(probe_x_var, probe_y_var, gallery_x_var, gallery_y_var, k=k, measure=distance_measure) names_to_values, names_to_updates = slim.metrics.aggregate_metric_map( {"Precision@%d" % k: cmc_metric_at_k(k) for k in [1, 5, 10, 20]}) for metric_name, metric_value in names_to_values.items(): tf.summary.scalar(metric_name, metric_value) # Start evaluation loop. trainer.evaluate((probes, galleries), log_dir, eval_log_dir, run_id=run_id, eval_op=list(names_to_updates.values()), eval_interval_secs=60)
nce_weights = tf.Variable(tf.truncated_normal([vocabulary_size, concatenated_size], stddev=1.0 / np.sqrt(concatenated_size))) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) # Create data/target placeholders x_inputs = tf.placeholder(tf.int32, shape=[None, window_size + 1]) # plus 1 for doc index y_target = tf.placeholder(tf.int32, shape=[None, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) # Lookup the word embedding # Add together element embeddings in window: embed = tf.zeros([batch_size, embedding_size]) for element in range(window_size): embed += tf.nn.embedding_lookup(embeddings, x_inputs[:, element]) doc_indices = tf.slice(x_inputs, [0,window_size],[batch_size,1]) doc_embed = tf.nn.embedding_lookup(doc_embeddings,doc_indices) #累加词嵌入向量后将文档嵌入向量连接到后面 # concatenate embeddings final_embed = tf.concat(1, [embed, tf.squeeze(doc_embed)]) # Get loss from prediction loss = tf.reduce_mean(tf.nn.nce_loss(nce_weights, nce_biases, y_target,final_embed, num_sampled, vocabulary_size)) # Create optimizer optimizer = tf.train.GradientDescentOptimizer(learning_rate=model_learning_rate) train_step = optimizer.minimize(loss) # Cosine similarity between words
def pnn_rec(features, labels, mode, params): ################################################## #weight variables for pnn modle fm_b = tf.get_variable(name='fm_b', shape=[1], initializer=tf.glorot_normal_initializer()) fm_w = tf.get_variable(name='fm_w', shape=[params['feature_size']], initializer=tf.glorot_normal_initializer()) fm_v = tf.get_variable( name='fm_v', shape=[params['feature_size'], params['embedding_size']], initializer=tf.glorot_normal_initializer()) ################################################## #feature index and vals feature_index = tf.reshape(features['feature_index'], shape=[-1, params['field_size']]) feature_value = tf.reshape(features['feature_value'], shape=[-1, params['field_size']]) #linear part embeddings_w = tf.nn.embedding_lookup(fm_w, feature_index) #N,F embeddings_w = tf.multiply(embeddings_w, feature_value) embeddings_w = tf.reshape(embeddings_w, shape=[-1, params['field_size'], 1]) #N,F,1 #cross part embeddings_v = tf.nn.embedding_lookup(fm_v, feature_index) #N, F, k feature_value = tf.reshape(feature_value, shape=[-1, params['field_size'], 1]) embeddings_v = tf.multiply(embeddings_v, feature_value) #N, F, k #inner product pnn if params['product_type'] == 'in_product': embeddings_p = tf.matmul(embeddings_v, embeddings_v, transpose_b=True) feat_arr = [] for i in range(params['field_size'] - 1): row_len = params['field_size'] - 1 - i row = tf.slice(embeddings_p, [0, i, i + 1], [-1, 1, row_len]) feat = tf.reshape(row, [-1, row_len]) feat_arr.append(feat) product = tf.concat(feat_arr, 1) # N, F*(F-1) #outer product pnn else: assert params['product_type'] == 'out_product' embeddings_p = tf.reshape( embeddings_v, [-1, params['field_size'] * params['embedding_size']]) embeddings_r = tf.reshape( tf.einsum('ni,nj->nij', embeddings_p, embeddings_p), [ -1, params['field_size'], params['embedding_size'], params['field_size'], params['embedding_size'] ]) embeddings_r = tf.transpose(embeddings_r, perm=[0, 1, 3, 2, 4]) embeddings_r = tf.reshape(embeddings_r, [ -1, params['field_size'], params['field_size'], params['embedding_size'] * params['embedding_size'] ]) feat_arr = [] for i in range(params['field_size'] - 1): row_len = params['field_size'] - 1 - i row = tf.slice(embeddings_r, [0, i, i + 1, 0], [-1, 1, row_len, -1]) feat = tf.reshape(row, [ -1, row_len * params['embedding_size'] * params['embedding_size'] ]) feat_arr.append(feat) product = tf.concat(feat_arr, 1) #N,F*(F-1)*K*K/2 #concat linear part and cross part embeddings = tf.concat([embeddings_w, embeddings_v], 2) #reshape concat result net = tf.reshape( embeddings, shape=[-1, params['field_size'] * (params['embedding_size'] + 1)]) b = fm_b * tf.ones_like(tf.reshape(tf.slice(net, [0, 0], [-1, 1]), [-1, 1])) net = tf.concat([net, b, product], 1) ################################################## #deep part of pnn model, perform weight decay, batch normlization, drop_out with each hidden layer hidden_index = 0 for units in params['hidden_units']: net = tf.layers.dense( net, units=int(units), activation=tf.nn.relu, kernel_regularizer=tf.contrib.layers.l2_regularizer( params['l2_reg'])) if params['batch_norm']: net = tf.layers.batch_normalization( net, training=(mode == tf.estimator.ModeKeys.TRAIN)) if params['drop_out']: net = tf.layers.dropout( net, rate=float(params['drop_rates'][hidden_index]), training=(mode == tf.estimator.ModeKeys.TRAIN)) hidden_index = hidden_index + 1 logits = tf.layers.dense(net, 1, activation=tf.nn.relu) #predict and export if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'prob': tf.nn.sigmoid(logits), 'logits': logits} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions) } return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) #evaluate and train labels = tf.reshape(labels, (-1, 1)) update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_op): loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)) auc = tf.metrics.auc(labels=labels, predictions=tf.nn.sigmoid(logits), name='auc_op') metrics = {'auc': auc} tf.summary.scalar('auc', auc[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) # train. assert mode == tf.estimator.ModeKeys.TRAIN if 'Adam' == params['optimizer']: optimizer = tf.train.AdamOptimizer( learning_rate=params['learning_rate'], beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False, name='Adam') elif 'RMSProp' == params['optimizer']: optimizer = tf.train.RMSPropOptimizer( learning_rate=params['learning_rate'], decay=0.9, momentum=0.0, epsilon=1e-10) elif 'Momentum' == params['optimizer']: optimizer = tf.train.MomentumOptimizer( learning_rate=params['learning_rate'], momentum=0.9) elif 'Adagrad' == params['optimizer']: optimizer = tf.train.AdagradOptimizer( learning_rate=params['learning_rate'], initial_accumulator_value=0.1) else: assert 'GD' == params['optimizer'] optimizer = tf.train.GradientDescentOptimizer( learning_rate=params['learning_rate']) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def run_inference_for_image_list(ordered_test_set, graph): output_dict_list = [] with graph.as_default(): with tf.Session() as sess: for i in range(len(ordered_test_set)): im_current_path = os.path.join(PATH_TO_LABEL_IMAGES_DIR, ordered_test_set[i]) im_prev_path = im_current_path if i == 0 else os.path.join( PATH_TO_LABEL_IMAGES_DIR, ordered_test_set[i - 1]) current_frame = skimage.io.imread(im_current_path) prev_frame = skimage.io.imread(im_prev_path) image_s = cv2.subtract(current_frame, prev_frame) six_channels_im = np.concatenate((current_frame, image_s), axis=2) # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph( ).get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: print("we have masks! :)") # The following processing is only for single image detection_boxes = tf.squeeze( tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze( tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast( tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_s.shape[0], image_s.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') # Run inference print("running {}/{}".format(i + 1, len(ordered_test_set))) output_dict = sess.run(tensor_dict, feed_dict={ image_tensor: np.expand_dims(six_channels_im, 0) }) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int( output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict[ 'detection_boxes'][0] output_dict['detection_scores'] = output_dict[ 'detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict[ 'detection_masks'][0] output_dict_list.append(output_dict) return output_dict_list
def createComputationalGraph(self): """ Create the computational graph in tensorflow """ ###################################### # Variables that are needed self.u = tf.Variable(np.zeros(self.N), dtype=self.dtype) self.rLowPass = tf.Variable(np.zeros(self.N), dtype=self.dtype) uNoise = tf.Variable(np.zeros(self.N), dtype=self.dtype) self.uNoiseLowPass = tf.Variable(np.zeros(self.N), dtype=self.dtype) self.uDotOld = tf.Variable(np.zeros(self.N), dtype=self.dtype) self.eligNow = tf.Variable( np.zeros((self.N, self.N)), dtype=self.dtype) self.eligibility = tf.Variable( np.zeros((self.N, self.N)), dtype=self.dtype) self.regEligibility = tf.Variable( np.zeros((self.N, self.N)), dtype=self.dtype) self.regEligibilityBorder = tf.Variable( np.zeros((self.N, self.N)), dtype=self.dtype) self.wTfNoWta = tf.Variable(self.WnoWta, dtype=self.dtype) self.wTfOnlyWta = tf.Variable(self.onlyWta, dtype=self.dtype) inputMask = self.input.getInput(0.)[2] self.inputMaskTf = tf.Variable(inputMask, dtype=self.dtype) outputMask = self.target.getTarget(self.T)[2] self.outputMaskTf = tf.Variable(outputMask, dtype=self.dtype) self.biasTf = tf.Variable(self.biasVector, dtype=self.dtype) # set up a mask for the learned weights in self.wTfNoWta # note that W.mask must omit the WTA network self.wNoWtaMask = tf.Variable(self.Wplastic.astype(float), dtype=self.dtype) ##################################### # Variables for debugging self.error = tf.Variable(np.zeros(self.N), dtype=self.dtype) ##################################### # Placeholders # The only datatransfer between the GPU and the CPU should be the # input to the input layer and the modulatory signal self.inputTf = tf.placeholder(dtype=self.dtype, shape=(self.N)) self.inputPrimeTf = tf.placeholder(dtype=self.dtype, shape=(self.N)) self.modulator = tf.placeholder(dtype=self.dtype, shape=()) #################################### # Aux variables for the calculations nInput = len(np.where(inputMask == 1)[0]) nOutput = len(np.where(outputMask == 1)[0]) nFull = len(inputMask) ##################################################### # Start the actual calculations for the comp graph # ##################################################### #################################### # Calculate the activations functions using the updated values self.rho = self.actFunc(self.u) rhoPrime = self.actFuncPrime(self.u) rhoPrimePrime = self.actFuncPrimePrime(self.u) self.rhoOutput = self.actFunc(self.u) ################################### # Update the exploration noise on the output neurons uNoiseOut = tf.slice(uNoise, [nFull - nOutput], [-1]) duOutNoise = self.noiseTheta * (self.noiseMean - uNoiseOut) * self.timeStep + self.noiseSigma * \ np.sqrt(self.timeStep) * \ tf.random_normal([nOutput], mean=0., stddev=1.0, dtype=self.dtype) updateNoise = tf.scatter_update(uNoise, np.arange(nFull - nOutput, nFull), uNoiseOut + duOutNoise) # Update the low-pass noise with tf.control_dependencies([updateNoise]): self.uNoiseLowPass = self.uNoiseLowPass + (self.timeStep/self.tau) * (uNoise - self.uNoiseLowPass) #################################### # Calculate the updates for the membrane potential and for the # eligibility trace with tf.control_dependencies([self.uNoiseLowPass, updateNoise, rhoPrime, rhoPrimePrime]): # frequently used tensors are claculated early on wNoWtaT = tf.transpose(self.wTfNoWta) wNoWtaRho = tfTools.tf_mat_vec_dot(self.wTfNoWta, self.rho) c = tfTools.tf_mat_vec_dot(wNoWtaT, self.u - wNoWtaRho - self.biasTf - self.inputTf - self.uNoiseLowPass) # get the matrix side of the equation A1 = tf.matmul(self.wTfNoWta, tf.diag(rhoPrime)) A2 = tf.matmul(tf.diag(c), tf.diag(rhoPrimePrime)) A3 = tf.matmul(tf.diag(rhoPrime), wNoWtaT) A4 = tf.matmul(tf.matmul(tf.diag(rhoPrime),wNoWtaT), tf.matmul(self.wTfNoWta, tf.diag(rhoPrime))) A5 = self.beta * self.alphaWna * tf.matmul(self.wTfOnlyWta, tf.diag(rhoPrime)) A = self.tau*(tf.eye(self.N) - A1 - A2 - A3 + A4 - A5) # get the vector side of the equation y1 = wNoWtaRho + self.biasTf + self.inputTf + self.alphaNoise * self.beta * uNoise - self.u y2 = self.tau * self.inputPrimeTf y3 = rhoPrime * c y4 = self.tau * rhoPrime * tfTools.tf_mat_vec_dot( wNoWtaT, self.inputPrimeTf) y5 = self.beta * self.alphaWna * tfTools.tf_mat_vec_dot( self.wTfOnlyWta, self.rho) y6 = rhoPrime * tfTools.tf_mat_vec_dot(wNoWtaT, uNoise - self.uNoiseLowPass) y = y1 + y2 + y3 - y4 + y5 - y6 # Solve the equation for uDot #self.uDiff = (1. / self.tau) * tf.linalg.solve(A, y) uDiff = tf.linalg.solve(A, tf.expand_dims(y, 1))[:, 0] #chol = tf.cholesky(A) #uDiff = tf.cholesky_solve(tf.cholesky(A), tf.expand_dims(y, 1))[:, 0] """ # The regular component with lookahead reg = tfTools.tf_mat_vec_dot( self.wTfNoWta, self.rho + rhoPrime * self.uDotOld * self.tau) - self.u + self.biasTf # Error term from the vanilla lagrange eVfirst = rhoPrime * c eVsecond = (rhoPrimePrime * self.uDotOld) * c eVthird = rhoPrime * \ tfTools.tf_mat_vec_dot( wNoWtaT, self.uDotOld - tfTools.tf_mat_vec_dot( self.wTfNoWta, rhoPrime * self.uDotOld) ) eV = eVfirst + self.tau * (eVsecond + eVthird) # terms from the winner nudges all circuit regWna = self.beta * self.alphaWna * tfTools.tf_mat_vec_dot( self.wTfOnlyWta, self.rho + self.tau * rhoPrime * self.uDotOld) # Terms from the exploration noise term #eNoise = self.alphaNoise * self.beta * \ # ((uNoise) - (uOut + self.tau * uDotOut)) eNoise = self.alphaNoise * self.beta * uNoise """ #uDiff = (1. / self.tau) * (reg + eV + regWna + eNoise) saveOldUDot = self.uDotOld.assign(uDiff) updateLowPassActivity = self.rLowPass.assign((self.rLowPass + self.timeStep / self.tauEligibility * self.rho) * tf.exp(-1. * self.timeStep / self.tauEligibility)) self.eligNowUpdate = self.eligNow.assign(tfTools.tf_outer_product(self.u - tfTools.tf_mat_vec_dot(self.wTfNoWta, self.rho) - self.biasTf, self.rho)) errorUpdate = self.error.assign(self.u - tfTools.tf_mat_vec_dot(self.wTfNoWta, self.rho) - self.biasTf - self.inputTf) with tf.control_dependencies([saveOldUDot, updateLowPassActivity, self.eligNowUpdate, errorUpdate]): self.updateEligiblity = self.eligibility.assign( (self.eligibility + self.timeStep * tfTools.tf_outer_product( self.u - tfTools.tf_mat_vec_dot(self.wTfNoWta, self.rho) - self.biasTf - self.inputTf - self.uNoiseLowPass, self.rho)) * tf.exp(-1. * self.timeStep / self.tauEligibility) ) self.updateRegEligibility = self.regEligibility.assign( (self.regEligibility + self.timeStep * tfTools.tf_outer_product( tf.nn.relu(self.uTarget - self.u), self.rho)) * tf.exp(-1. * self.timeStep / self.tauEligibility) ) self.updateRegEligibilityBorder = self.regEligibilityBorder.assign( (self.regEligibilityBorder + self.timeStep * tfTools.tf_outer_product( tf.nn.relu(self.uLow - self.u) - tf.nn.relu(self.u - self.uHigh), self.rho)) * tf.exp(-1. * self.timeStep / self.tauEligibility) ) #self.applyMembranePot = tf.scatter_update(self.u, np.arange( # nInput, nFull), tf.slice(self.u, [nInput], [-1]) + self.timeStep * tf.slice(uDiff, [nInput], [-1])) with tf.control_dependencies([saveOldUDot, updateLowPassActivity, self.eligNowUpdate, errorUpdate, self.updateEligiblity, self.updateRegEligibility, self.updateRegEligibilityBorder]): self.applyMembranePot = self.u.assign(self.u + self.timeStep * uDiff) ############################################### ## Node to update the weights of the network ## ############################################### self.updateW = self.wTfNoWta.assign(self.wTfNoWta + ( 1. / self.tauEligibility) * ( self.modulator * self.learningRate * self.eligibility * self.Wplastic + tf.math.abs(self.modulator) * self.learningRateH * self.regEligibility * self.noWnaMask + self.learningRateB * self.regEligibilityBorder * self.wNoWtaMask))
tf.reset_default_graph() #These two lines established the feed-forward part of the network. This does #the actual choosing. weights = tf.Variable(tf.ones([num_bandits])) chosen_action = tf.argmax(weights, 0) #The next six lines establish the training proceedure. We feed the reward and #chosen action into the network #to compute the loss, and use it to update the network. reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) action_holder = tf.placeholder( shape=[1], dtype=tf.int32) #index of slot machine to choose responsible_weight = tf.slice(weights, action_holder, [1]) #scalar weight responsible for reward loss = -(tf.log(responsible_weight) * reward_holder) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) update = optimizer.minimize(loss) total_episodes = 2000 #Set total number of episodes to train agent on. total_reward = np.zeros(num_bandits) #Set scoreboard for bandits to 0. choices = np.zeros(num_bandits) #How many times each bandit was chosen e = 0.1 #Set the chance of taking a random action (exploration rate) init = tf.global_variables_initializer() # Launch the tensorflow graph with tf.Session() as sess: sess.run(init) for i in range(total_episodes):
def convLSTM_anim(self, graph, log_count): print("Python version :", sys.version) print("TensorFlow version: ", tf.VERSION) print("Current directory : ", os.getcwd()) LOGDIR = "/tmp/convLSTM/" NUM_UNROLLINGS = 3 NUM_TRAINING_STEPS = 801 counter = 4 anim = pickle.load(open(self.file, 'rb')) anim = tf.expand_dims(anim, axis=-1) image = tf.slice(anim, [counter, 0, 0, 0], [1, self.IM_SZ_LEN, self.IM_SZ_WID, 1]) def newEmpty4Dtensor_1channel(): emptyTensor = tf.zeros([self.IM_SZ_LEN, self.IM_SZ_WID]) emptyTensor = tf.expand_dims(emptyTensor, 0) emptyTensor = tf.expand_dims(emptyTensor, -1) return emptyTensor def newEmpty4Dtensor_2channels(): emptyTensor = tf.zeros([self.IM_SZ_LEN, self.IM_SZ_WID, 2]) emptyTensor = tf.expand_dims(emptyTensor, 0) return emptyTensor # create some initializations initial_lstm_state = newEmpty4Dtensor_1channel() initial_lstm_output = newEmpty4Dtensor_1channel() initial_err_input = newEmpty4Dtensor_2channels() # The above weights are global to this definition. def convLstmLayer(err_inp, prev_s, prev_h): with tf.name_scope("LSTM"): inp = tf.nn.tanh( tf.nn.conv2d(err_inp, self.U, [1, 1, 1, 1], padding='SAME') + tf.nn.conv2d( prev_h, self.W, [1, 1, 1, 1], padding='SAME') + self.B, name="inp") g_gate = tf.nn.tanh( tf.nn.conv2d( err_inp, self.Ug, [1, 1, 1, 1], padding='SAME') + tf.nn.conv2d(prev_h, self.Wg, [1, 1, 1, 1], padding='SAME') + self.Bg, name="g_gate") # i_gate is more common name f_gate = tf.nn.tanh( tf.nn.conv2d( err_inp, self.Uf, [1, 1, 1, 1], padding='SAME') + tf.nn.conv2d(prev_h, self.Wf, [1, 1, 1, 1], padding='SAME') + self.Bf, name="f_gate") q_gate = tf.nn.tanh( tf.nn.conv2d( err_inp, self.Uo, [1, 1, 1, 1], padding='SAME') + tf.nn.conv2d(prev_h, self.Wo, [1, 1, 1, 1], padding='SAME') + self.Bo, name="q_gate") # o_gate is more common name s = tf.add(tf.multiply(f_gate, prev_s), tf.multiply(g_gate, inp), name="state") h = tf.multiply(q_gate, tf.nn.relu6(s), name="output") # Also try relu return s, h # normally above is tanh # errorModule doesn't use variables, so doesn't undergo training def errorModule(image, predict): with tf.name_scope("ErrMod"): err1 = tf.nn.relu(image - predict, name="E1") err2 = tf.nn.relu(predict - image, name="E2") tensor5D = tf.stack([err1, err2], axis=3) tensor4D = tf.reshape(tensor5D, [1, self.IM_SZ_LEN, self.IM_SZ_WID, 2], name="PrdErr") return tensor4D # Build LSTM lstm_state = initial_lstm_state lstm_output = initial_lstm_output err_input = initial_err_input with tf.name_scope("full_model"): for _ in range(NUM_UNROLLINGS): # three unrollings lstm_state, lstm_output = convLstmLayer( err_input, lstm_state, lstm_output) err_input = errorModule(image, lstm_output) counter = counter + 1 image = tf.slice(anim, [counter, 0, 0, 0], [1, self.IM_SZ_LEN, self.IM_SZ_WID, 1]) #New optimizer block, uses exp decay on learning rate, added clip_by_global_norm loss = tf.reduce_sum( err_input) # sums the values across each component of the tensor global_step = tf.Variable(0) #learning rate starts at 10, decreases by 90% every 300 steps learning_rate = tf.train.exponential_decay(10.0, global_step, 300, 0.1, staircase=True, name='LearningRate') optimizer = tf.train.GradientDescentOptimizer(learning_rate) gradients, v = zip(*optimizer.compute_gradients(loss)) gradients, _ = tf.clip_by_global_norm(gradients, 1.25) optimizer = optimizer.apply_gradients(zip(gradients, v), global_step=global_step) with tf.name_scope("initializations"): tf.summary.image("initial_lstm_state", initial_lstm_state, 3) tf.summary.image("initial_lstm_output", initial_lstm_output, 3) tf.summary.image( "initial_error1", tf.slice(initial_err_input, [0, 0, 0, 0], [1, self.IM_SZ_LEN, self.IM_SZ_WID, 1]), 3) tf.summary.image( "initial_error2", tf.slice(initial_err_input, [0, 0, 0, 1], [1, self.IM_SZ_LEN, self.IM_SZ_WID, 1]), 3) with tf.name_scope("input"): tf.summary.image("image", image, 3) with tf.name_scope("lstm"): tf.summary.image("lstm_out", lstm_output, 3) tf.summary.image("lstm_state", lstm_state, 3) with tf.name_scope("error"): tf.summary.image( "perror_1", tf.slice(err_input, [0, 0, 0, 0], [1, self.IM_SZ_LEN, self.IM_SZ_WID, 1]), 3) tf.summary.image( "perror_2", tf.slice(err_input, [0, 0, 0, 1], [1, self.IM_SZ_LEN, self.IM_SZ_WID, 1]), 3) with tf.name_scope('optimizer'): tf.summary.scalar('loss', loss) tf.summary.scalar('learning_rate', learning_rate) with tf.name_scope('weights'): with tf.name_scope('input_update'): newU1 = tf.slice(self.U, [0, 0, 0, 0], [5, 5, 1, 1]) newU2 = tf.slice(self.U, [0, 0, 1, 0], [5, 5, 1, 1]) newW = tf.slice(self.W, [0, 0, 0, 0], [5, 5, 1, 1]) newU1 = tf.squeeze(newU1) #now a viewable [5x5] matrix newU2 = tf.squeeze(newU2) newW = tf.squeeze(newW) newU1 = tf.reshape(newU1, [1, 5, 5, 1]) newU2 = tf.reshape(newU2, [1, 5, 5, 1]) newW = tf.reshape(newW, [1, 5, 5, 1]) tf.summary.image('U1', newU1) tf.summary.image('U2', newU2) tf.summary.image('W', newW) tf.summary.image('B', self.B) with tf.name_scope('input_gate'): newUg1 = tf.slice(self.Ug, [0, 0, 0, 0], [5, 5, 1, 1]) newUg2 = tf.slice(self.Ug, [0, 0, 1, 0], [5, 5, 1, 1]) newWg = tf.slice(self.Wg, [0, 0, 0, 0], [5, 5, 1, 1]) newUg1 = tf.squeeze(newUg1) #now a viewable [5x5] matrix newUg2 = tf.squeeze(newUg2) newWg = tf.squeeze(newWg) newUg1 = tf.reshape(newUg1, [1, 5, 5, 1]) newUg2 = tf.reshape(newUg2, [1, 5, 5, 1]) newWg = tf.reshape(newWg, [1, 5, 5, 1]) tf.summary.image('Ug1', newUg1) tf.summary.image('Ug2', newUg2) tf.summary.image('Wg', newWg) tf.summary.image('Bg', self.Bg) with tf.name_scope('forget_gate'): newUf1 = tf.slice(self.Uf, [0, 0, 0, 0], [5, 5, 1, 1]) newUf2 = tf.slice(self.Uf, [0, 0, 1, 0], [5, 5, 1, 1]) newWf = tf.slice(self.Wf, [0, 0, 0, 0], [5, 5, 1, 1]) newUf1 = tf.squeeze(newUf1) #now a viewable [5x5] matrix newUf2 = tf.squeeze(newUf2) newWf = tf.squeeze(newWf) newUf1 = tf.reshape(newUf1, [1, 5, 5, 1]) newUf2 = tf.reshape(newUf2, [1, 5, 5, 1]) newWf = tf.reshape(newWf, [1, 5, 5, 1]) tf.summary.image('Uf1', newUf1) tf.summary.image('Uf2', newUf2) tf.summary.image('Wf', newWf) tf.summary.image('Bf', self.Bf) with tf.name_scope('output_gate'): newUo1 = tf.slice(self.Uo, [0, 0, 0, 0], [5, 5, 1, 1]) newUo2 = tf.slice(self.Uo, [0, 0, 1, 0], [5, 5, 1, 1]) newWo = tf.slice(self.Wo, [0, 0, 0, 0], [5, 5, 1, 1]) newUo1 = tf.squeeze(newUo1) #now a viewable [5x5] matrix newUo2 = tf.squeeze(newUo2) newWo = tf.squeeze(newWo) newUo1 = tf.reshape(newUo1, [1, 5, 5, 1]) newUo2 = tf.reshape(newUo2, [1, 5, 5, 1]) newWo = tf.reshape(newWo, [1, 5, 5, 1]) tf.summary.image('Uo1', newUo1) tf.summary.image('Uo2', newUo2) tf.summary.image('Wo', newWo) tf.summary.image('Bo', self.Bo) # Start training with tf.Session(graph=graph) as sess: tf.global_variables_initializer().run() # Create graph summary # Use a different log file each time you run the program. msumm = tf.summary.merge_all() writer = tf.summary.FileWriter( LOGDIR + str(log_count)) # += 1 for each run till /tmp is cleard log_count += 1 writer.add_graph(sess.graph) for step in range(NUM_TRAINING_STEPS): # 0 to 100 if step % 1 == 0: ms = sess.run(msumm) writer.add_summary(ms, step) _, l, predictions = sess.run([optimizer, loss, lstm_output]) print("Step: ", step) print("Loss: ", l)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Get dataset-dependent information. dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.vis_split, dataset_dir=FLAGS.dataset_dir, batch_size=FLAGS.vis_batch_size, crop_size=[int(sz) for sz in FLAGS.vis_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, model_variant=FLAGS.model_variant, is_training=False, should_shuffle=False, should_repeat=False) train_id_to_eval_id = None if dataset.dataset_name == data_generator.get_cityscapes_dataset_name(): tf.logging.info('Cityscapes requires converting train_id to eval_id.') train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID # Prepare for visualization. tf.gfile.MakeDirs(FLAGS.vis_logdir) save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(save_dir) raw_save_dir = os.path.join(FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER) tf.gfile.MakeDirs(raw_save_dir) tf.logging.info('Visualizing on %s set', FLAGS.vis_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() model_options = common.ModelOptions( outputs_to_num_classes={ common.OUTPUT_TYPE: dataset.num_of_classes }, crop_size=[int(sz) for sz in FLAGS.vis_crop_size], atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions = model.predict_labels_multi_scale( samples[common.IMAGE], model_options=model_options, eval_scales=FLAGS.eval_scales, add_flipped_images=FLAGS.add_flipped_images) predictions = predictions[common.OUTPUT_TYPE] if FLAGS.min_resize_value and FLAGS.max_resize_value: # Only support batch_size = 1, since we assume the dimensions of original # image after tf.squeeze is [height, width, 3]. assert FLAGS.vis_batch_size == 1 # Reverse the resizing and padding operations performed in preprocessing. # First, we slice the valid regions (i.e., remove padded region) and then # we resize the predictions back. original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE]) original_image_shape = tf.shape(original_image) predictions = tf.slice( predictions, [0, 0, 0], [1, original_image_shape[0], original_image_shape[1]]) resized_shape = tf.to_int32([ tf.squeeze(samples[common.HEIGHT]), tf.squeeze(samples[common.WIDTH]) ]) predictions = tf.squeeze( tf.image.resize_images( tf.expand_dims(predictions, 3), resized_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True), 3) tf.train.get_or_create_global_step() if FLAGS.quantize_delay_step >= 0: contrib_quantize.create_eval_graph() num_iteration = 0 max_num_iteration = FLAGS.max_number_of_iterations checkpoints_iterator = contrib_training.checkpoints_iterator( FLAGS.checkpoint_dir, min_interval_secs=FLAGS.eval_interval_secs) for checkpoint_path in checkpoints_iterator: num_iteration += 1 tf.logging.info('Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) tf.logging.info('Visualizing with model %s', checkpoint_path) scaffold = tf.train.Scaffold( init_op=tf.global_variables_initializer()) session_creator = tf.train.ChiefSessionCreator( scaffold=scaffold, master=FLAGS.master, checkpoint_filename_with_path=checkpoint_path) with tf.train.MonitoredSession(session_creator=session_creator, hooks=None) as sess: batch = 0 image_id_offset = 0 while not sess.should_stop(): tf.logging.info('Visualizing batch %d', batch + 1) _process_batch( sess=sess, original_images=samples[common.ORIGINAL_IMAGE], semantic_predictions=predictions, image_names=samples[common.IMAGE_NAME], image_heights=samples[common.HEIGHT], image_widths=samples[common.WIDTH], image_id_offset=image_id_offset, save_dir=save_dir, raw_save_dir=raw_save_dir, train_id_to_eval_id=train_id_to_eval_id) image_id_offset += FLAGS.vis_batch_size batch += 1 tf.logging.info('Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) if max_num_iteration > 0 and num_iteration >= max_num_iteration: break
def _build_model(self): # filename_queue = tf.train.string_input_producer(['american_train.tfrecords', 'british_train.tfrecords'], num_epochs=200) # reader = tf.TFRecordReader() count = tf.Variable(0, name='counter', dtype=tf.float32) inc = tf.assign_add(count, 1, name='increment') # self.token = tf.Variable(0, name='token', dtype=tf.float32) # self.X_source = train_source_x # tf.Print(self.X_source, [self.X_source]) # self.y_source = train_source_y # self.X_target = train_target_x # self.y_target = train_target_y # self.X_source = source_batch_x # self.y_source = source_batch_y # self.X_target = target_batch_x # self.y_target = target_batch_y # source_x, source_y = custom_runner.queue.dequeue_many(128) # target_x, target_y = custom_runner.queue2.dequeue_many(128) # # self.X_source = source_x # self.token2 = source_x + 1 # self.y_source = source_batch_y # self.X_target = target_batch_x # self.y_target = target_batch_y # source_x, source_y, target_x, target_y = input_pipeline() self.X = tf.concat([source_x, target_x], 0) self.y = tf.concat([source_y, target_y], 0) self.y = tf.cast(self.y, dtype=tf.int32) # self.y = tf.cast(self.y, dtype=tf.int32) self.y = tf.one_hot(self.y, depth=1677) # self.X = tf.reshape(self.X, [None, 31*160, 1, 1]) # self.domain = tf.concat([self.domain_0, self.domain_1], 0) self.train = tf.constant(True, dtype=tf.bool) self.l = 2 / (1 + tf.exp(-10 * (inc / num_steps))) - 1 self.lr = 0.005 / (tf.pow(1 + 10 * (inc / num_steps), 0.75)) # self.l = tf.constant(0.4, dtype=tf.float32) # self.lr = tf.constant(0.01, dtype=tf.float32) # self.domain_0 = tf.zeros([128], dtype=tf.int32) # self.domain_1 = tf.ones([128], dtype=tf.int32) self.domain_0 = tf.zeros([128], dtype=tf.int32) self.domain_1 = tf.ones([128], dtype=tf.int32) rand = tf.random_uniform(shape=[], maxval=1) self.domain1 = lambda: tf.concat([self.domain_0, self.domain_1], 0) self.domain2 = lambda: tf.concat([self.domain_1, self.domain_0], 0) self.domain = tf.cond(tf.greater(rand, 0.9), self.domain2, self.domain1) # self.domain = tf.concat([self.domain_0, self.domain_1], 0) self.domain = tf.one_hot(self.domain, depth=2) self.keep_prob = tf.constant(0.5, dtype=tf.float32) # X_input = (tf.cast(self.X, tf.float32) - pixel_mean) / 255. # CNN model for feature extractio with tf.name_scope('feature_extractor'): self.W_conv0 = weight_variable([256, 1, 1, 64], 'W_Conv0') self.b_conv0 = bias_variable([64], 'b_Conv0') # h_conv0 = conv2d(self.X, W_conv0, [1, 31,1,1])+ b_conv0 # h_conv0 = tf.contrib.layers.batch_norm(h_conv0, center=True, scale=True, is_training=self.train) # h_conv0 = tf.nn.relu(h_conv0) # print(self.X.get_shape().as_list()) h_conv0 = tf.nn.relu( conv2d(self.X, self.W_conv0, [1, 31, 1, 1]) + self.b_conv0) tf.Print(h_conv0, [h_conv0]) h_pool0 = max_pool_2x2(h_conv0, 2, 2) tf.summary.histogram('Conv 0 weight', self.W_conv0) tf.summary.histogram('Conv 0 bias', self.b_conv0) tf.summary.histogram('Conv 0 activation', h_conv0) self.W_conv1 = weight_variable([15, 1, 64, 128], 'W_conv1') self.b_conv1 = bias_variable([128], 'b_conv1') # h_conv1 = conv2d(h_pool0, W_conv1, [1,1,1,1])+b_conv1 # h_conv1 = tf.contrib.layers.batch_norm(h_conv1, center=True, scale=True, is_training=self.train) # h_conv1 = tf.nn.relu(h_conv1) h_conv1 = tf.nn.tanh( conv2d(h_pool0, self.W_conv1, [1, 1, 1, 1]) + self.b_conv1) h_pool1 = max_pool_2x2(h_conv1, 2, 2) tf.summary.histogram('Conv 1 weight', self.W_conv1) tf.summary.histogram('Conv 1 bias', self.b_conv1) tf.summary.histogram('Conv 1 activation', h_conv1) a, b, c, d = h_pool1.get_shape().as_list() # The domain-invariant feature self.feature = tf.reshape(h_pool1, [-1, b * c * d]) # MLP for class prediction with tf.name_scope('label_predictor'): # Switches to route target examples (second half of batch) differently # depending on train or test mode. all_features = lambda: self.feature source_features = lambda: tf.slice(self.feature, [0, 0], [batch_size, -1]) # classify_feats = tf.cond(self.train, source_features, all_features) classify_feats = self.feature all_labels = lambda: self.y source_labels = lambda: tf.slice(self.y, [0, 0], [batch_size, -1]) # self.classify_labels = tf.cond(self.train, source_labels, all_labels) self.classify_labels = self.y self.W_fc0 = weight_variable([b * c * d, 1024 * 2], 'W_fc0') self.b_fc0 = bias_variable([1024 * 2], 'b_fc0') h_fc0 = tf.nn.relu( tf.matmul(classify_feats, self.W_fc0) + self.b_fc0) tf.summary.histogram('fc0 weight', self.W_fc0) tf.summary.histogram('fc0 bias', self.b_fc0) tf.summary.histogram('fc0 activation', h_fc0) h_fc0 = tf.nn.dropout(h_fc0, self.keep_prob) self.W_fc1 = weight_variable([1024 * 2, 1024 * 2], 'W_fc1') self.b_fc1 = bias_variable([1024 * 2], 'b_fc1') h_fc1 = tf.nn.relu(tf.matmul(h_fc0, self.W_fc1) + self.b_fc1) tf.summary.histogram('fc1 weight', self.W_fc1) tf.summary.histogram('fc1 bias', self.b_fc1) tf.summary.histogram('fc1 activation', h_fc1) h_fc1 = tf.nn.dropout(h_fc1, self.keep_prob) self.W_fc2 = weight_variable([1024 * 2, 1024 * 2], 'W_fc2') self.b_fc2 = bias_variable([1024 * 2], 'b_fc2') h_fc2 = tf.nn.relu(tf.matmul(h_fc1, self.W_fc2) + self.b_fc2) tf.summary.histogram('fc2 weight', self.W_fc2) tf.summary.histogram('fc2 bias', self.b_fc2) tf.summary.histogram('fc2 activation', h_fc2) h_fc2 = tf.nn.dropout(h_fc2, self.keep_prob) self.W_fc3 = weight_variable([1024 * 2, 1024 * 2], 'W_fc3') self.b_fc3 = bias_variable([1024 * 2], 'b_fc3') h_fc3 = tf.nn.relu(tf.matmul(h_fc2, self.W_fc3) + self.b_fc3) tf.summary.histogram('fc3 weight', self.W_fc3) tf.summary.histogram('fc3 bias', self.b_fc3) tf.summary.histogram('fc3 activation', h_fc3) h_fc3 = tf.nn.dropout(h_fc3, self.keep_prob) self.W_fc4 = weight_variable([1024 * 2, 1024 * 2], 'W_fc4') self.b_fc4 = bias_variable([1024 * 2], 'b_fc4') h_fc4 = tf.nn.relu(tf.matmul(h_fc3, self.W_fc4) + self.b_fc4) tf.summary.histogram('fc4 weight', self.W_fc4) tf.summary.histogram('fc4 bias', self.b_fc4) tf.summary.histogram('fc4 activation', h_fc4) h_fc4 = tf.nn.dropout(h_fc4, self.keep_prob) self.W_fc5 = weight_variable([1024 * 2, 1024 * 2], 'W_fc5') self.b_fc5 = bias_variable([1024 * 2], 'b_fc5') h_fc5 = tf.nn.relu(tf.matmul(h_fc4, self.W_fc5) + self.b_fc5) tf.summary.histogram('fc5 weight', self.W_fc5) tf.summary.histogram('fc5 bias', self.b_fc5) tf.summary.histogram('fc5 activation', h_fc5) h_fc5 = tf.nn.dropout(h_fc5, self.keep_prob) self.W_fc6 = weight_variable([1024 * 2, 1024 * 2], 'W_fc6') self.b_fc6 = bias_variable([1024 * 2], 'b_fc6') h_fc6 = tf.nn.relu(tf.matmul(h_fc5, self.W_fc6) + self.b_fc6) tf.summary.histogram('fc6 weight', self.W_fc6) tf.summary.histogram('fc6 bias', self.b_fc6) tf.summary.histogram('fc6 activation', h_fc6) h_fc6 = tf.nn.dropout(h_fc6, self.keep_prob) self.W_fc7 = weight_variable([1024 * 2, 1677], 'W_fc7') self.b_fc7 = bias_variable([1677], 'b_fc7') logits = tf.matmul(h_fc6, self.W_fc7) + self.b_fc7 tf.summary.histogram('fc7 weight', self.W_fc7) tf.summary.histogram('fc7 bias', self.b_fc7) tf.summary.histogram('fc7 activation', logits) with tf.name_scope('predicted_loss'): self.pred = tf.nn.softmax(logits) self.pred_loss = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=self.classify_labels) # Small MLP for domain prediction with adversarial loss with tf.name_scope('domain_predictor'): # Flip the gradient when backpropagating through this operation feat = flip_gradient(self.feature, self.l) d_W_fc0 = weight_variable([b * c * d, 1024 * 2], 'd_W_fc0') d_b_fc0 = bias_variable([1024 * 2], 'd_b_fc0') d_h_fc0 = tf.nn.relu(tf.matmul(feat, d_W_fc0) + d_b_fc0) tf.summary.histogram('domain fc0 weights', d_W_fc0) tf.summary.histogram('domain fc0 bias', d_b_fc0) tf.summary.histogram('domain fc0 activation', d_h_fc0) d_h_fc0 = tf.nn.dropout(d_h_fc0, keep_prob=self.keep_prob) d_W_fc1 = weight_variable([1024 * 2, 1024 * 2], 'd_W_fc1') d_b_fc1 = bias_variable([1024 * 2], 'd_b_fc1') d_h_fc1 = tf.nn.relu(tf.matmul(d_h_fc0, d_W_fc1) + d_b_fc1) tf.summary.histogram('domain fc1 weights', d_W_fc1) tf.summary.histogram('domain fc1 bias', d_b_fc1) tf.summary.histogram('domain fc1 activation', d_h_fc1) d_h_fc1 = tf.nn.dropout(d_h_fc1, keep_prob=self.keep_prob) d_W_fc2 = weight_variable([1024 * 2, 1024 * 2], 'd_W_fc2') d_b_fc2 = bias_variable([1024 * 2], 'd_b_fc2') d_h_fc2 = tf.nn.relu(tf.matmul(d_h_fc1, d_W_fc2) + d_b_fc2) tf.summary.histogram('domain fc2 weights', d_W_fc2) tf.summary.histogram('domain fc2 bias', d_b_fc2) tf.summary.histogram('domain fc2 activation', d_h_fc2) d_h_fc2 = tf.nn.dropout(d_h_fc2, keep_prob=self.keep_prob) d_W_fc3 = weight_variable([1024 * 2, 1024 * 2], 'd_W_fc3') d_b_fc3 = bias_variable([1024 * 2], 'd_b_fc3') d_h_fc3 = tf.nn.relu(tf.matmul(d_h_fc2, d_W_fc3) + d_b_fc3) tf.summary.histogram('domain fc3 weights', d_W_fc3) tf.summary.histogram('domain fc3 bias', d_b_fc3) tf.summary.histogram('domain fc3 activation', d_h_fc3) d_h_fc3 = tf.nn.dropout(d_h_fc3, keep_prob=self.keep_prob) d_W_fc4 = weight_variable([1024 * 2, 1024 * 2], 'd_W_fc4') d_b_fc4 = bias_variable([1024 * 2], 'd_b_fc4') d_h_fc4 = tf.nn.relu(tf.matmul(d_h_fc3, d_W_fc4) + d_b_fc4) tf.summary.histogram('domain fc4 weights', d_W_fc4) tf.summary.histogram('domain fc4 bias', d_b_fc4) tf.summary.histogram('domain fc4 activation', d_h_fc4) d_h_fc4 = tf.nn.dropout(d_h_fc4, keep_prob=self.keep_prob) d_W_fc5 = weight_variable([1024 * 2, 2], 'd_W_fc5') d_b_fc5 = bias_variable([2], 'd_b_fc5') d_logits = tf.matmul(d_h_fc4, d_W_fc5) + d_b_fc5 tf.summary.histogram('domain fc5 weigth', d_W_fc5) tf.summary.histogram('domain fc5 bias', d_b_fc5) tf.summary.histogram('domain fc5 activation', d_logits) with tf.name_scope('domain_loss'): self.domain_pred = tf.nn.softmax(d_logits) self.domain_loss = tf.nn.softmax_cross_entropy_with_logits( logits=d_logits, labels=self.domain)
def _dynamic_slice(operand, *start_indices, slice_sizes=None): return tf.slice(operand, tf.stack(start_indices), slice_sizes)
def decode_record_seg(record, level_num, is_point_feature): mesh_name = record['mesh_name'] label = tf.decode_raw(record['label'], tf.int32) label = tf.cast(label, tf.int32) shape_list = tf.decode_raw(record['shape'], tf.int32) shape_list = tf.reshape(shape_list, (-1, 4)) if (is_point_feature): input_feature = tf.decode_raw(record['input_feature'], tf.float32) feature_channel = record['feature_channel'] feature_channel = tf.cast(feature_channel, tf.int32) input_feature = tf.reshape(input_feature, (shape_list[0][0], feature_channel)) else: feature_channel = record['feature_channel'] feature_channel = tf.cast(feature_channel, tf.int32) input_feature = tf.decode_raw(record['input_feature'], tf.float32) input_feature = tf.reshape( input_feature, (shape_list[0][0] * shape_list[0][1] * shape_list[0][2] * shape_list[0][3], feature_channel)) conv_offset = tf.decode_raw(record['conv/offset'], tf.int32) conv_indices_list = tf.decode_raw(record['conv/indices'], tf.int32) conv_weights_list = tf.decode_raw(record['conv/weights'], tf.float32) conv_begin = 0 conv_matrix_list = [] for level_i in range(level_num): conv_indices = tf.slice(conv_indices_list, [2 * conv_begin], [2 * conv_offset[level_i]]) conv_indices = tf.cast( tf.reshape(conv_indices, (conv_offset[level_i], 2)), tf.int64) conv_weights = tf.slice(conv_weights_list, [conv_begin], [conv_offset[level_i]]) conv_matrix_list.append( Conv_Matrix(shape_list[level_i][0], shape_list[level_i][1], shape_list[level_i][2], shape_list[level_i][3], conv_indices, conv_weights)) conv_begin = conv_begin + conv_offset[level_i] maxpooling_offset = tf.decode_raw(record['maxpool/offset'], tf.int32) maxpooling_arg_list = tf.decode_raw(record['maxpool/arg'], tf.int32) maxpooling_indices_list = tf.decode_raw(record['maxpool/indices'], tf.int32) unpooling_indices_list = tf.decode_raw(record['unpooling/indices'], tf.int32) maxpooling_matrix_list = [] unpooling_list = [] pool_begin = 0 for level_i in range(level_num - 1): maxpooling_indices = tf.slice(maxpooling_indices_list, [2 * pool_begin], [2 * maxpooling_offset[level_i]]) maxpooling_indices = tf.cast( tf.reshape(maxpooling_indices, (maxpooling_offset[level_i], 2)), tf.int64) maxpooling_values = tf.ones(shape=[maxpooling_offset[level_i]], dtype=tf.float32) maxpooling_matrix_list.append( MaxPooling_Matrix(shape_list[level_i][0], shape_list[level_i + 1][0], shape_list[level_i][1], maxpooling_indices, maxpooling_values, maxpooling_arg_list[level_i])) unpooling_indices = tf.slice(unpooling_indices_list, [2 * pool_begin], [2 * maxpooling_offset[level_i]]) unpooling_values = tf.ones(shape=[maxpooling_offset[level_i]], dtype=tf.float32) unpooling_indices = tf.cast( tf.reshape(unpooling_indices, (maxpooling_offset[level_i], 2)), tf.int64) unpooling_list.append( UnPooling_Matrix(shape_list[level_i][0], shape_list[level_i + 1][0], shape_list[level_i][1], unpooling_indices, unpooling_values)) pool_begin = pool_begin + maxpooling_offset[level_i] return mesh_name, label, shape_list, input_feature, maxpooling_matrix_list, maxpooling_arg_list, conv_matrix_list, unpooling_list
def train_image(image_buffer, height, width, bbox, batch_position, resize_method, distortions, scope=None, summary_verbosity=0, distort_color_in_yiq=False, fuse_decode_and_crop=False): """Distort one image for training a network. Distorting images provides a useful technique for augmenting the data set during training in order to make the network invariant to aspects of the image that do not effect the label. Args: image_buffer: scalar string Tensor representing the raw JPEG image buffer. height: integer width: integer bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. batch_position: position of the image in a batch, which affects how images are distorted and resized. NOTE: this argument can be an integer or a tensor resize_method: round_robin, nearest, bilinear, bicubic, or area. distortions: If true, apply full distortions for image colors. scope: Optional scope for op_scope. summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both summaries and checkpoints. distort_color_in_yiq: distort color of input images in YIQ space. fuse_decode_and_crop: fuse the decode/crop operation. Returns: 3-D float Tensor of distorted image used for training. """ # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): with tf.name_scope(scope or 'distort_image'): # A large fraction of image datasets contain a human-annotated bounding box # delineating the region of the image containing the object of interest. We # choose to create a new bounding box for the object which is a randomly # distorted version of the human-annotated bounding box that obeys an # allowed range of aspect ratios, sizes and overlap with the human-annotated # bounding box. If no box is supplied, then we assume the bounding box is # the entire image. sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.image.extract_jpeg_shape(image_buffer), bounding_boxes=bbox, min_object_covered=0.1, aspect_ratio_range=[0.75, 1.33], area_range=[0.05, 1.0], max_attempts=100, use_image_if_no_bounding_boxes=True) bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box if summary_verbosity >= 3: image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_FAST') image = tf.image.convert_image_dtype(image, dtype=tf.float32) image_with_distorted_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), distort_bbox) tf.summary.image('images_with_distorted_bounding_box', image_with_distorted_box) # Crop the image to the specified bounding box. if fuse_decode_and_crop: offset_y, offset_x, _ = tf.unstack(bbox_begin) target_height, target_width, _ = tf.unstack(bbox_size) crop_window = tf.stack( [offset_y, offset_x, target_height, target_width]) image = tf.image.decode_and_crop_jpeg(image_buffer, crop_window, channels=3) else: image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_FAST') image = tf.slice(image, bbox_begin, bbox_size) distorted_image = tf.image.random_flip_left_right(image) # This resizing operation may distort the images because the aspect # ratio is not respected. image_resize_method = get_image_resize_method(resize_method, batch_position) if cnn_util.tensorflow_version() >= 11: distorted_image = tf.image.resize_images(distorted_image, [height, width], image_resize_method, align_corners=False) else: distorted_image = tf.image.resize_images(distorted_image, height, width, image_resize_method, align_corners=False) # Restore the shape since the dynamic slice based upon the bbox_size loses # the third dimension. distorted_image.set_shape([height, width, 3]) if summary_verbosity >= 3: tf.summary.image('cropped_resized_maybe_flipped_image', tf.expand_dims(distorted_image, 0)) if distortions: distorted_image = tf.cast(distorted_image, dtype=tf.float32) # Images values are expected to be in [0,1] for color distortion. distorted_image /= 255. # Randomly distort the colors. distorted_image = distort_color( distorted_image, batch_position, distort_color_in_yiq=distort_color_in_yiq) # Note: This ensures the scaling matches the output of eval_image distorted_image *= 255 if summary_verbosity >= 3: tf.summary.image('final_distorted_image', tf.expand_dims(distorted_image, 0)) return distorted_image
axis=1) gen_loss = tf.reduce_mean(gen_loss0) #average over N gen_opt = tf.train.AdamOptimizer(lr) gen_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='decoder') gen_gradvars = gen_opt.compute_gradients(gen_loss, var_list=gen_vars) gen_train_op = gen_opt.apply_gradients(gen_gradvars) #provide encoder q(b|x) gradient by data augmentation Dir = Dirichlet([1.0] * n_class) pai = Dir.sample(sample_shape=[N, K_u, n_cv]) #[N,K_u,n_cv,n_class] x_star_u = tf.tile(tf.expand_dims(x_binary, axis=1), [1, K_u, 1]) #N*K_u*d_x jj = 0 pai_slice_j = tf.slice(pai, begin=[0, 0, 0, jj], size=[-1, -1, -1, 1]) #N,K_u,n_cv,1 pai_j = swap(pai, jj, 0) F_j = Fn(pai_j, prior_logit0, z_concate, x_star_u) #N*K_u for mm in range(1, n_class): pai_m = swap(pai, jj, mm) F_m = Fn(pai_m, prior_logit0, z_concate, x_star_u) grad_m = tf.expand_dims(tf.expand_dims(F_m - F_j, 2), 3) * (1 - n_class * pai_slice_j) if mm == 1: alpha_grads = grad_m else: alpha_grads = tf.concat([alpha_grads, grad_m], 3) alpha_grads = tf.reduce_mean(alpha_grads, axis=1) #N*n_cv*d_b, expectation over pai alpha_grads = tf.reshape(alpha_grads, [-1, z_dim])
def eval_image(image, height, width, batch_position, resize_method, summary_verbosity=0): """Get the image for model evaluation. We preprocess the image simiarly to Slim, see https://github.com/tensorflow/models/blob/master/slim/preprocessing/vgg_preprocessing.py Validation images do not have bounding boxes, so to crop the image, we first resize the image such that the aspect ratio is maintained and the resized height and width are both at least 1.15 times `height` and `width` respectively. Then, we do a central crop to size (`height`, `width`). Args: image: 3-D float Tensor representing the image. height: The height of the image that will be returned. width: The width of the image that will be returned. batch_position: position of the image in a batch, which affects how images are distorted and resized. NOTE: this argument can be an integer or a tensor resize_method: one of the strings 'round_robin', 'nearest', 'bilinear', 'bicubic', or 'area'. summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both summaries and checkpoints. Returns: An image of size (output_height, output_width, 3) that is resized and cropped as described above. """ # TODO(reedwm): Currently we resize then crop. Investigate if it's faster to # crop then resize. with tf.name_scope('eval_image'): if summary_verbosity >= 3: tf.summary.image('original_image', tf.expand_dims(image, 0)) shape = tf.shape(image) image_height = shape[0] image_width = shape[1] image_height_float = tf.cast(image_height, tf.float32) image_width_float = tf.cast(image_width, tf.float32) scale_factor = 1.15 # Compute resize_height and resize_width to be the minimum values such that # 1. The aspect ratio is maintained (i.e. resize_height / resize_width is # image_height / image_width), and # 2. resize_height >= height * `scale_factor`, and # 3. resize_width >= width * `scale_factor` max_ratio = tf.maximum(height / image_height_float, width / image_width_float) resize_height = tf.cast(image_height_float * max_ratio * scale_factor, tf.int32) resize_width = tf.cast(image_width_float * max_ratio * scale_factor, tf.int32) # Resize the image to shape (`resize_height`, `resize_width`) image_resize_method = get_image_resize_method(resize_method, batch_position) distorted_image = tf.image.resize_images(image, [resize_height, resize_width], image_resize_method, align_corners=False) # Do a central crop of the image to size (height, width). total_crop_height = (resize_height - height) crop_top = total_crop_height // 2 total_crop_width = (resize_width - width) crop_left = total_crop_width // 2 distorted_image = tf.slice(distorted_image, [crop_top, crop_left, 0], [height, width, 3]) distorted_image.set_shape([height, width, 3]) if summary_verbosity >= 3: tf.summary.image('cropped_resized_image', tf.expand_dims(distorted_image, 0)) image = distorted_image return image
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="input_image") annotation = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 2], name="annotation") z = tf.placeholder(tf.float32, shape=[None, 1, 1, 10], name="z") # mask = tf.placeholder(tf.float32, shape=[None, 64, 64, 1], name="mask") # mask2 = tf.placeholder(tf.float32, shape=[None, 64, 64, 1], name="mask2") z_new = tf.placeholder(tf.float32, shape=[None, 1, 1, 10], name="z_new") istrain = tf.placeholder(tf.bool) #z_lip = tf.placeholder(tf.float32, shape=[None, 1, 1, 10], name="z_lip") #z_lip_inv = tf.placeholder(tf.float32, shape=[None, 1, 1, 10], name="z_lip_inv") e = tf.placeholder(tf.float32, shape=[None, 4, 4, 522], name="e") e_p = tf.placeholder(tf.float32, shape=[None, 1, 1, 8202], name="e_p") save_itr = 0 # pred_annotation, logits = inference(image, keep_probability,z) # tf.summary.image("input_image", image, max_outputs=2) # tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) # tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) # loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, # labels=tf.squeeze(annotation, squeeze_dims=[3]), # name="entropy"))) # mask_ = tf.ones([FLAGS.batch_size,32,64,3]) # mask = tf.pad(mask_, [[0,0],[0,32],[0,0],[0,0]]) # mask2__ = tf.ones([FLAGS.batch_size,78,78,3]) # mask2_ = tf.pad(mask2__, [[0,0],[25,25],[25,25],[0,0]]) # mask2 = mask2_ - mask zero = tf.zeros([FLAGS.batch_size,1,1,8202]) logits, h = inference(image, keep_probability,z,0.0,istrain) logits_e, h_e = inference(image, keep_probability,z,e,istrain) #logits_lip,_ = inference((1-mask)*image + mask*0.0, keep_probability,z_lip,istrain ) #logits_lip_inv,_ = inference((1-mask)*image + mask*0.0, keep_probability,z_lip_inv,istrain ) z_pred = predictor(h,z,zero,istrain) z_pred_e = predictor(h,z,e_p,istrain) # z_pred_lip = predictor(h,z_lip,istrain) # z_pred_lip_inv = predictor(h,z_lip_inv,istrain) # logits = inference(image, keep_probability,z,istrain) tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) # tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) # lossz = 0.1 * tf.reduce_mean(tf.reduce_sum(tf.abs(z),[1,2,3])) # lossz = 0.1 * tf.reduce_mean(tf.abs(z)) # loss_all = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square((image - logits)),[1,2,3]))) # loss_all = tf.reduce_mean(tf.reduce_sum(tf.contrib.layers.flatten(tf.abs(image - logits)),1)) # loss_mask = 0.8*tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square((image - logits)*mask),[1,2,3]))) g_k = gaussian_kernel(2,0.0,1.0) gauss_kernel = tf.tile(g_k[ :, :, tf.newaxis, tf.newaxis],[1,1,1,1]) # Convolve. logits_a = tf.slice(logits, [0,0,0,0],[-1,-1,-1,1]) logits_b = tf.slice(logits, [0,0,0,1],[-1,-1,-1,1]) logits_smooth_a_ = tf.nn.conv2d(tf.abs(logits_a), gauss_kernel, strides=[1, 1, 1, 1], padding="SAME") logits_smooth_b_ = tf.nn.conv2d(tf.abs(logits_b), gauss_kernel, strides=[1, 1, 1, 1], padding="SAME") #logits_smooth_ = tf.concat([logits_smooth_a,logits_smooth_b], axis = 3) logits_smooth_a = tf.maximum(logits_smooth_a_, 0.0001) logits_smooth_b = tf.maximum(logits_smooth_b_, 0.0001) # logits_smooth_norm = tf.contrib.layers.flatten(logits_smooth)/tf.reduce_sum(logits_smooth,axis=[1,2,3], keep_dims = True) logits_smooth_norm_a = tf.nn.softmax(tf.contrib.layers.flatten(logits_smooth_a), axis = 1) logits_smooth_norm_b = tf.nn.softmax(tf.contrib.layers.flatten(logits_smooth_b), axis = 1) ones = tf.ones([FLAGS.batch_size,IMAGE_SIZE,IMAGE_SIZE,1]) zeros = tf.zeros([FLAGS.batch_size,IMAGE_SIZE,IMAGE_SIZE,1]) normal_dist_d = tf.distributions.Uniform(low = zeros, high = ones) normal_dist_a_ = normal_dist_d.sample() normal_dist_a = tf.maximum(normal_dist_a_, 0.0001) #normal_dist_norm = tf.contrib.layers.flatten(normal_dist)/tf.reduce_sum(normal_dist,axis=[1,2,3], keep_dims = True) normal_dist_norm_a =tf.nn.softmax(tf.contrib.layers.flatten(normal_dist_a), axis = 1) normal_dist_b_ = normal_dist_d.sample() normal_dist_b = tf.maximum(normal_dist_b_, 0.0001) normal_dist_norm_b =tf.nn.softmax(tf.contrib.layers.flatten(normal_dist_b), axis = 1) X_a = tf.distributions.Categorical(probs=logits_smooth_norm_a) X_b = tf.distributions.Categorical(probs=logits_smooth_norm_b) Y_a = tf.distributions.Categorical(probs=normal_dist_norm_a) Y_b = tf.distributions.Categorical(probs=normal_dist_norm_b) kl_dist_a = tf.reduce_mean(tf.distributions.kl_divergence(X_a, Y_a)) kl_dist_b = tf.reduce_mean(tf.distributions.kl_divergence(X_b, Y_b)) # kl_dist = tf.reduce_sum(logits_smooth_norm * tf.log(logits_smooth_norm/normal_dist_norm)) # kl_dist = tf.contrib.distributions.kl_divergence(logits_smooth_norm,normal_dist_norm) # logits_std = tf.reduce_std(logits_smooth, axis =[1,2],keep_dims=True ) # logits_mean = tf.reduce_mean(logits_smooth, axis =[1,2], keep_dims=True) # logits_normalized = (logits_smooth - logits_mean)/logits_std # annotation_weights_norm = tf.reduce_sum(tf.exp(tf.abs(annotation))/tf.exp(1.0)) # annotation_weights = (tf.exp(tf.abs(annotation))/tf.exp(1.0)) loss_ = tf.reduce_mean(tf.reduce_sum(tf.contrib.layers.flatten(tf.abs((annotation - logits)) ),1)) +0.5* kl_dist_a + 0.5*kl_dist_b # loss_ = 0.4*loss_mask + loss_mask2 # loss = tf.reduce_mean(tf.squared_difference(logits ,annotation )) loss_summary = tf.summary.scalar("entropy", loss_) # zloss = tf.reduce_mean(tf.losses.cosine_distance(tf.contrib.layers.flatten(z_new) ,tf.contrib.layers.flatten(z_pred),axis =1)) zloss_ = tf.reduce_mean(tf.reduce_sum(tf.contrib.layers.flatten(tf.abs((z_pred - z_new))),1)) # zloss_lip = tf.reduce_mean(tf.reduce_sum(tf.contrib.layers.flatten(tf.abs((z_pred - z_pred_lip))),1)) # zloss_lip_inv = -tf.reduce_mean(tf.reduce_sum(tf.contrib.layers.flatten(tf.abs((z_pred - z_pred_lip_inv))),1)) # z_loss = zloss_ + 0.1* zloss_lip# + zloss_lip_inv lip_loss_dec = tf.reduce_mean(tf.reduce_sum(tf.contrib.layers.flatten(tf.abs((logits - logits_e))),1)) loss = loss_ + 0.1*lip_loss_dec lip_loss_pred = tf.reduce_mean(tf.reduce_sum(tf.contrib.layers.flatten(tf.abs((z_pred - z_pred_e))),1)) zloss = zloss_ + 0.1*lip_loss_pred grads = train_z(loss_,z) trainable_var = tf.trainable_variables() trainable_z_pred_var = tf.trainable_variables(scope="predictor") trainable_d_pred_var = tf.trainable_variables(scope="decoder") print(trainable_z_pred_var) if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) train_pred = train_predictor(zloss,trainable_z_pred_var) print("Setting up summary op...") summary_op = tf.summary.merge_all() print("Setting up image reader...") train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir) print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = {'resize': True, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() # create two summary writers to show training loss and validation loss in the same graph # need to create two folders 'train' and 'validation' inside FLAGS.logs_dir train_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/validation') sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") saved =True if FLAGS.mode == "train": for itr in xrange(MAX_ITERATION): train_images, train_annotations = train_dataset_reader.next_batch(FLAGS.batch_size) print("$$$$$$$$$$$$$$$$$$$$") print(train_images.shape) # z_ = np.reshape(signal.gaussian(200, std=1),(FLAGS.batch_size,1,1,10))-0.5 z_ = np.random.uniform(low=-1.0, high=1.0, size=(FLAGS.batch_size,1,1,10)) # train_images[train_images < 0.] = -1. # train_annotations[train_annotations < 0.] = -1. # train_images[train_images >= 0.] = 1.0 # train_annotations[train_annotations >= 0.] = 1.0 x1 = random.randint(0, 10) w1 = random.randint(30, 54) y1 = random.randint(0, 10) h1 = random.randint(30, 54) cond = random.randint(0, 10) # saved = True if False: saved = False train_images_m, train_annotations_m = train_dataset_reader.get_random_batch(FLAGS.batch_size) train_images_m[train_images_m < 0.] = -1. train_annotations_m[train_annotations_m < 0.] = -1. train_images_m[train_images_m >= 0.] = 1.0 train_annotations_m[train_annotations_m >= 0.] = 1.0 train_images = (train_images + 1.)/2.0*255.0 train_annotations = (train_annotations + 1.)/2.0*255.0 train_images_m = (train_images_m + 1.)/2.0*255.0 train_annotations_m = (train_annotations_m + 1.)/2.0*255.0 train_images_m[:,32:,:,:] = 0 train_annotations_m[:,32:,:,:] = 0 train_images = np.clip((train_images + train_images_m),0.0,255.0) train_annotations = np.clip((train_annotations + train_annotations_m),0.0,255.0) ''' train_images[train_images < 0.] = -1. train_annotations[train_annotations < 0.] = -1. train_images[train_images >= 0.] = 1.0 train_annotations[train_annotations >= 0.] = 1.0 ''' train_annotations_ = np.squeeze(train_annotations,axis = 3) train_images_ = train_images train_images = train_images/127.5 - 1.0 train_annotations = train_annotations/127.5 - 1.0 # for itr_ in range(FLAGS.batch_size): # utils.save_image(train_images_[itr_].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr_) ) # utils.save_image(train_annotations_[itr_].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr_) ) # train_images[:,x1:w1,y1:h1,:] = 0 # print(train_images) r_m, r_m2 = random_mask(64) #feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85, z: z_,mask:r_m, istrain:True } #train_images[:,50:100,50:100,:] =0 v = 0 # print(train_images) error_dec = np.random.normal(0.0,0.001,(FLAGS.batch_size,4,4,522)) error_dec_ = np.random.normal(0.0,0.001,(FLAGS.batch_size,1,1,8202)) # z_l_inv = z_ + np.random.normal(0.0,0.1) # feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85, z: z_, e:error_dec, mask:r_m, istrain:True } # z_l = z_ + np.random.normal(0.0,0.001) # lloss,_ = sess.run([lip_loss, train_lip ], feed_dict=feed_dict) # z_l = z_ + np.random.normal(0.0,0.001) # print("Step: %d, lip_loss:%g" % (itr,lloss)) for p in range(20): z_ol = np.copy(z_) # z_l = z_ol + np.random.normal(0.0,0.001) # print("666666666666666666666666666666666666666") feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85, z: z_,e:error_dec, istrain:True } # lloss,_ = sess.run([lip_loss, train_lip ], feed_dict=feed_dict) # print("Step: %d, z_step: %d, lip_loss:%g" % (itr,p,lloss)) z_loss, summ = sess.run([loss,loss_summary], feed_dict=feed_dict) print("Step: %d, z_step: %d, Train_loss:%g" % (itr,p,z_loss)) # print(z_) g = sess.run([grads],feed_dict=feed_dict) v_prev = np.copy(v) # print(g[0][0].shape) v = 0.001*v - 0.1*g[0][0] z_ += 0.001 * v_prev + (1+0.001)*v #z_ = np.clip(z_, -10.0, 10.0) ''' m = interp1d([-10.0,10.0],[-1.0,1.0]) print(np.max(z_)) print(np.min(z_)) z_ol_interp = m(z_ol) z_interp = m(z_) _,z_pred_loss =sess.run([train_pred,zloss],feed_dict={image: train_images,mask:r_m,z:z_ol_interp,z_new:z_interp,e_p:error_dec_,istrain:True,keep_probability: 0.85}) print("Step: %d, z_step: %d, z_pred_loss:%g" % (itr,p,z_pred_loss)) ''' # _,z_pred_loss =sess.run([train_pred,zloss],feed_dict={image: train_images,mask:r_m,z:z_ol,z_new:z_,istrain:True,keep_probability: 0.85}) # print("Step: %d, z_step: %d, z_pred_loss:%g" % (itr,p,z_pred_loss)) # z_ = np.clip(z_, -1.0, 1.0) # print(v.shape) # print(z_.shape) feed_dict = {image: train_images, annotation: train_annotations, keep_probability:0.85,e:error_dec, z: z_, istrain:True } sess.run(train_op, feed_dict=feed_dict) if itr % 10 == 0: train_loss, summary_str = sess.run([loss, loss_summary], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) train_writer.add_summary(summary_str, itr) if itr % 500 == 0: valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size) # valid_annotations[valid_annotations < 0.] = -1. # valid_images[valid_images < 0.] = -1. # valid_annotations[valid_annotations >= 0.] = 1.0 # valid_images[valid_images >= 0.] = 1.0 x1 = random.randint(0, 10) w1 = random.randint(30, 54) y1 = random.randint(0, 10) h1 = random.randint(30, 54) # valid_images[:,x1:w1,y1:h1,:] = 0 valid_loss, summary_sva = sess.run([loss, loss_summary], feed_dict={image: valid_images, annotation: valid_annotations, keep_probability: 1.0, z: z_,e:error_dec, istrain:False }) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) # add validation loss to TensorBoard validation_writer.add_summary(summary_sva, itr) if itr % 3000 == 0: save_itr = save_itr + 3000 saver.save(sess, FLAGS.logs_dir + "model_fuse.ckpt", save_itr) elif FLAGS.mode == "visualize": valid_images, valid_annotations = validation_dataset_reader.get_random_batch(FLAGS.batch_size) # valid_annotations[valid_annotations < 0.] = -1.0 # valid_images[valid_images < 0.] = -1.0 # valid_annotations[valid_annotations >= 0.] = 1.0 # valid_images[valid_images >= 0.] = 1.0 x1 = random.randint(0, 10) w1 = random.randint(30, 54) y1 = random.randint(0, 10) h1 = random.randint(30, 54) # valid_images[:,x1:w1,y1:h1,:] = 0 r_m, r_m2 = random_mask(64) # z_ = np.zeros(low=-1.0, high=1.0, size=(FLAGS.batch_size,1,1,10)) # z_ = np.reshape(signal.gaussian(200, std=1),(FLAGS.batch_size,1,1,10))-0.5 z_ = np.random.uniform(low=-1.0, high=1.0, size=(FLAGS.batch_size,1,1,10)) feed_dict = {image: valid_images, annotation: valid_annotations, keep_probability: 0.85, z: z_, istrain:False } v= 0 m__ = interp1d([-10.0,10.0],[-1.0,1.0]) z_ = m__(z_) # feed_dict = {image: valid_images, annotation: valid_annotations, keep_probability: 0.85, z: z_, istrain:False,mask:r_m } for p in range(20): z_ol = np.copy(z_) # print("666666666666666666666666666666666666666") # print(z_) # feed_dict = {image: valid_images, annotation: valid_annotations, keep_probability: 0.85, z: z_, istrain:False,mask:r_m } # z_loss, summ = sess.run([loss,loss_summary], feed_dict=feed_dict) # print("z_step: %d, Train_loss:%g" % (p,z_loss)) # z_, z_pred_loss = sess.run(z_pred,zlossfeed_dict = {image: valid_images, annotation: valid_annotations, keep_probability: 1.0, z:z_ol, istrain:False,mask:r_m}) # print(z_) g = sess.run([grads],feed_dict=feed_dict) v_prev = np.copy(v) # print(g[0][0].shape) v = 0.001*v - 0.1*g[0][0] z_ = z_ol + 0.001 * v_prev + (1+0.001)*v # z_ = z_ol + 0.001 * v_prev + (1+0.001)*v # print("z_____________") # print(z__) # print("z_") # print(z_) # m__ = interp1d([-10.0,10.0],[-1.0,1.0]) # z_ol = m__(z_ol) # z_ = sess.run(z_pred,feed_dict = {image: valid_images, annotation: valid_annotations, keep_probability: 0.85, z:z_ol, istrain:False,mask:r_m}) # m_ = interp1d([-1.0,1.0],[-10.0,10.0]) # z_ = m_(z_) # z_ = np.clip(z_, -1.0, 1.0) # print(z_pred_loss) # m_ = interp1d([-1.0,1.0],[-10.0,10.0]) # z_ = m_(z_) pred = sess.run(logits, feed_dict={image: valid_images, annotation: valid_annotations,z:z_, istrain:False, keep_probability: 0.85}) # print(sess.run(logits_smooth_norm, feed_dict={image: valid_images, annotation: valid_annotations,z:z_, istrain:False, # keep_probability: 0.85} ) ) print("#######################") # print(sess.run(normal_dist_norm)) valid_images = (valid_images +1.)/2.0*100.0 # predicted_patch = sess.run(mask) * pred # pred = valid_images_masked + predicted_patch pred_ = pred * 128.0 # pred = pred + 1./2.0*255 print(np.max(pred_)) print(np.min(pred_)) pred = np.reshape(np.concatenate((valid_images,pred_), axis=3),(-1,64,64,3)) valid_annotations_ = valid_annotations * 128.0 valid_annotations = np.reshape(np.concatenate((valid_images, valid_annotations_), axis=3),(-1,64,64,3)) valid_images_gray = np.squeeze(valid_images) # for itr in range(FLAGS.batch_size): # utils.save_image(valid_images_masked[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr)) # utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr)) # utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="predz_" + str(5+itr)) # utils.save_image(valid_images_masked[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr)+'_' + str(p) ) # utils.save_image(valid_annotations_[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr)+'_' + str(p) ) # utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="predz_" + str(5+itr)+'_' + str(p) ) # print("Saved image: %d" % itr) for itr in range(FLAGS.batch_size): utils.save_image(valid_images_gray[itr], FLAGS.logs_dir, name="inp_" + str(5+itr) ) utils.save_image(color.lab2rgb(pred[itr]), FLAGS.logs_dir, name="predz_" + str(5+itr) ) utils.save_image(color.lab2rgb(valid_annotations[itr]), FLAGS.logs_dir, name="gt_" + str(5+itr) )