def recurrent(self, node, current_level, postfix, is_training): num_features = self.num_filters(current_level) batch_size, _, image_size = get_batch_channel_image_size(node, data_format=self.data_format) cell = self.recurrent_cell(image_size, num_features, postfix, is_training) lstm_input_state = self.lstm_input_states[current_level] node, lstm_output_state = cell(node, lstm_input_state) self.lstm_output_states[current_level] = lstm_output_state return node
def __init__(self, cv, network_id): super().__init__() self.cv = cv self.network_id = network_id self.output_folder = '{}_cv{}'.format(network_id, cv) + '/' + self.output_folder_timestamp() self.batch_size = 8 self.learning_rate = 0.0000001 # TODO adapt learning rates for different networks for faster training self.max_iter = 20000 self.test_iter = 5000 self.disp_iter = 100 self.snapshot_iter = self.test_iter self.test_initialization = True self.current_iter = 0 self.reg_constant = 0.00005 self.invert_transformation = False image_sizes = {'scn': [256, 256], 'unet': [256, 256], 'downsampling': [256, 256], 'conv': [128, 128], 'scn_mmwhs': [256, 256]} heatmap_sizes = {'scn': [256, 256], 'unet': [256, 256], 'downsampling': [64, 64], 'conv': [128, 128], 'scn_mmwhs': [256, 256]} sigmas = {'scn': 3.0, 'unet': 3.0, 'downsampling': 1.5, 'conv': 3.0, 'scn_mmwhs': 3.0} self.image_size = image_sizes[self.network_id] self.heatmap_size = heatmap_sizes[self.network_id] self.sigma = sigmas[self.network_id] self.image_channels = 1 self.num_landmarks = 37 self.data_format = 'channels_first' self.save_debug_images = False self.base_folder = 'hand_xray_dataset/' dataset = Dataset(self.image_size, self.heatmap_size, self.num_landmarks, self.sigma, self.base_folder, self.cv, self.data_format, self.save_debug_images) self.dataset_train = dataset.dataset_train() self.dataset_train.get_next() self.dataset_val = dataset.dataset_val() networks = {'scn': network_scn, 'unet': network_unet, 'downsampling': network_downsampling, 'conv': network_conv, 'scn_mmwhs': network_scn_mmwhs} self.network = networks[self.network_id] self.loss_function = lambda x, y: tf.nn.l2_loss(x - y) / get_batch_channel_image_size(x, self.data_format)[0] self.files_to_copy = ['main.py', 'network.py', 'dataset.py']
def loss_function(self, pred, target): """ L2 loss function calculated with prediction and target. :param pred: The predicted image. :param target: The target image. :return: L2 loss of (pred - target) / batch_size """ batch_size, _, _ = get_batch_channel_image_size(pred, self.data_format) return tf.nn.l2_loss(pred - target) / batch_size
def loss_function(self, pred, target, mask=None): """ L2 loss function calculated with prediction and target. :param pred: The predicted image. :param target: The target image. :param mask: If not none, calculate loss only pixels, where mask == 1 :return: L2 loss of (pred - target) / batch_size """ batch_size, _, _ = get_batch_channel_image_size(pred, self.data_format) if mask is not None: return tf.nn.l2_loss((pred - target) * mask) / batch_size else: return tf.nn.l2_loss(pred - target) / batch_size
def recurrent(self, node, current_level, postfix, is_training): tf.add_to_collection('checkpoints', node) num_features = self.num_filters(current_level) batch_size, _, image_size = get_batch_channel_image_size( node, data_format=self.data_format) cell = self.recurrent_cell(image_size, num_features, postfix, is_training) if self.use_lstm_input_state: lstm_input_state = self.lstm_input_states[current_level] else: lstm_input_state = cell.zero_state(batch_size, tf.float32) self.lstm_input_states[current_level] = lstm_input_state node, lstm_output_state = cell(node, lstm_input_state) tf.add_to_collection('checkpoints', node) tf.add_to_collection('checkpoints', lstm_output_state) self.lstm_output_states[current_level] = lstm_output_state return node
def __init__(self, cv, modality): super().__init__() self.cv = cv self.output_folder = './mmwhs_localization/{}_{}'.format( modality, cv) + '/' + self.output_folder_timestamp() self.batch_size = 1 self.learning_rate = 0.00001 self.learning_rates = [self.learning_rate, self.learning_rate * 0.1] self.learning_rate_boundaries = [10000] self.max_iter = 20000 self.test_iter = 5000 self.disp_iter = 100 self.snapshot_iter = self.test_iter self.test_initialization = False self.current_iter = 0 self.reg_constant = 0.00005 self.invert_transformation = False self.image_size = [32] * 3 if modality == 'ct': self.image_spacing = [10] * 3 else: self.image_spacing = [12] * 3 self.sigma = [1.5] * 3 self.image_channels = 1 self.num_landmarks = 1 self.data_format = 'channels_first' self.save_debug_images = False self.local_base_folder = '../../semantic_segmentation/mmwhs/mmwhs_dataset' dataset_parameters = { 'base_folder': self.local_base_folder, 'image_size': self.image_size, 'image_spacing': self.image_spacing, 'cv': cv, 'input_gaussian_sigma': 4.0, 'modality': modality, 'save_debug_images': self.save_debug_images } dataset = Dataset(**dataset_parameters) self.dataset_train = dataset.dataset_train() self.dataset_val = dataset.dataset_val() self.network = network_unet self.loss_function = lambda x, y: tf.nn.l2_loss( x - y) / get_batch_channel_image_size(x, self.data_format)[0]
def resize_trilinear(inputs, factors=None, output_size=None, name=None, data_format='channels_first'): """ Trilinearly resizes an input volume to either a given size of a factor. :param inputs: 5D tensor. :param output_size: Output size. :param factors: Scale factors. :param name: Name. :param data_format: Data format. :return: The resized tensor. """ num_batches, num_channels, [depth, height, width] = get_batch_channel_image_size(inputs, data_format) dtype = inputs.dtype name = name or 'upsample' with tf.name_scope(name): if data_format == 'channels_first': inputs_channels_last = tf.transpose(inputs, [0, 2, 3, 4, 1]) else: inputs_channels_last = inputs if output_size is None: output_depth, output_height, output_width = [int(s * f) for s, f in zip([depth, height, width], factors)] else: output_depth, output_height, output_width = output_size # resize y-z squeeze_b_x = tf.reshape(inputs_channels_last, [-1, height, width, num_channels]) resize_b_x = tf.cast(tf.image.resize_bilinear(squeeze_b_x, [output_height, output_width], align_corners=False, half_pixel_centers=True), dtype=dtype) resume_b_x = tf.reshape(resize_b_x, [num_batches, depth, output_height, output_width, num_channels]) # resize x # first reorient reoriented = tf.transpose(resume_b_x, [0, 3, 2, 1, 4]) # squeeze and 2d resize squeeze_b_z = tf.reshape(reoriented, [-1, output_height, depth, num_channels]) resize_b_z = tf.cast(tf.image.resize_bilinear(squeeze_b_z, [output_height, output_depth], align_corners=False, half_pixel_centers=True), dtype=dtype) resume_b_z = tf.reshape(resize_b_z, [num_batches, output_width, output_height, output_depth, num_channels]) if data_format == 'channels_first': output = tf.transpose(resume_b_z, [0, 4, 3, 2, 1]) else: output = tf.transpose(resume_b_z, [0, 3, 2, 1, 4]) return output
def network2d(input, is_training, num_outputs_embedding, actual_network, filters=64, levels=5, activation='relu', normalize=True, data_format='channels_first', padding='same'): if activation == 'selu': activation = tf.nn.selu kernel_initializer = selu_initializer elif activation == 'relu': activation = tf.nn.relu kernel_initializer = he_initializer elif activation == 'tanh': activation = tf.nn.tanh kernel_initializer = selu_initializer padding = padding embedding_axis = 1 if data_format == 'channels_first' else 4 if normalize: embeddings_activation = lambda x, name: tf.nn.l2_normalize(x, dim=embedding_axis, name=name, epsilon=1e-4) else: if activation == tf.nn.selu: embeddings_activation = tf.nn.selu else: embeddings_activation = None embeddings_normalization = lambda x, name: tf.nn.l2_normalize(x, dim=embedding_axis, name=name, epsilon=1e-4) batch_size, channels, (num_frames, height, width) = get_batch_channel_image_size(input, data_format=data_format) with tf.variable_scope('unet_0'): unet = actual_network(num_filters_base=filters, kernel=[3, 3], num_levels=levels, data_format=data_format, kernel_initializer=kernel_initializer, activation=activation, is_training=is_training, name='unet', padding=padding) unet_out = unet(input[:, 0, :, :, :], is_training) embeddings_2d = conv2d(unet_out, kernel_size=[1, 1], name='embeddings', filters=num_outputs_embedding * num_frames, kernel_initializer=kernel_initializer, activation=embeddings_activation, data_format=data_format, is_training=is_training, padding=padding) embeddings = tf.reshape(embeddings_2d, [batch_size, num_outputs_embedding, num_frames, height, width]) with tf.variable_scope('unet_1'): normalized_embeddings = embeddings_normalization(embeddings, 'embeddings_normalized') normalized_embeddings_2d = tf.reshape(embeddings_2d, [batch_size, num_outputs_embedding * num_frames, height, width]) input_concat = concat_channels([input[:, 0, :, :, :], normalized_embeddings_2d], name='input_concat', data_format=data_format) unet = actual_network(num_filters_base=filters, kernel=[3, 3], num_levels=levels, data_format=data_format, kernel_initializer=kernel_initializer, activation=activation, is_training=is_training, name='unet', padding=padding) unet_out = unet(input_concat, is_training) embeddings_2_2d = conv2d(unet_out, kernel_size=[1, 1], name='embeddings', filters=num_outputs_embedding * num_frames, kernel_initializer=kernel_initializer, activation=embeddings_activation, data_format=data_format, is_training=is_training, padding=padding) embeddings_2 = tf.reshape(embeddings_2_2d, [batch_size, num_outputs_embedding, num_frames, height, width]) return embeddings, embeddings_2
def loss_function(self, pred, target): batch_size, _, _ = get_batch_channel_image_size(pred, self.data_format) return tf.nn.l2_loss(pred - target) / batch_size
def loss_function(self, target, prediction): return tf.nn.l2_loss(target - prediction) / get_batch_channel_image_size(target, self.data_format)[0]