def test_metrics(self): with self.test_session(): y_a = K.variable(np.random.random((6, 7))) y_b = K.variable(np.random.random((6, 7))) for metric in [metrics.binary_accuracy, metrics.categorical_accuracy]: output = metric(y_a, y_b) self.assertEqual(K.eval(output).shape, (6,))
def test_merge_add(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) i3 = keras.layers.Input(shape=(4, 5)) add_layer = keras.layers.Add() o = add_layer([i1, i2, i3]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2, i3], o) model.run_eagerly = testing_utils.should_run_eagerly() x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) x3 = np.random.random((2, 4, 5)) out = model.predict([x1, x2, x3]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, x1 + x2 + x3, atol=1e-4) self.assertEqual( add_layer.compute_mask([i1, i2, i3], [None, None, None]), None) self.assertTrue( np.all( K.eval( add_layer.compute_mask( [i1, i2], [K.variable(x1), K.variable(x2)])))) with self.assertRaisesRegexp(ValueError, "`mask` should be a list."): add_layer.compute_mask([i1, i2, i3], x1) with self.assertRaisesRegexp(ValueError, "`inputs` should be a list."): add_layer.compute_mask(i1, [None, None, None]) with self.assertRaisesRegexp(ValueError, " should have the same length."): add_layer.compute_mask([i1, i2, i3], [None, None])
def test_merge_subtract(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) i3 = keras.layers.Input(shape=(4, 5)) subtract_layer = keras.layers.Subtract() o = subtract_layer([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, x1 - x2, atol=1e-4) self.assertEqual(subtract_layer.compute_mask([i1, i2], [None, None]), None) self.assertTrue( np.all( K.eval( subtract_layer.compute_mask( [i1, i2], [K.variable(x1), K.variable(x2)])))) with self.assertRaisesRegexp(ValueError, "`mask` should be a list."): subtract_layer.compute_mask([i1, i2], x1) with self.assertRaisesRegexp(ValueError, "`inputs` should be a list."): subtract_layer.compute_mask(i1, [None, None]) with self.assertRaisesRegexp(ValueError, "layer should be called on exactly 2 inputs"): subtract_layer([i1, i2, i3]) with self.assertRaisesRegexp(ValueError, "layer should be called on exactly 2 inputs"): subtract_layer([i1])
def test_sparse_top_k_categorical_accuracy(self): with self.cached_session(): # Test correctness if the shape of y_true is (num_samples, 1) y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([[1], [0]])) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) self.assertEqual(result, 1) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) self.assertEqual(result, 0.5) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(result, 0.) # Test correctness if the shape of y_true is (num_samples,) y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([1, 0])) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) self.assertEqual(result, 1) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) self.assertEqual(result, 0.5) result = K.eval( metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(result, 0.)
def test_merge_concatenate(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) concat_layer = keras.layers.Concatenate(axis=1) o = concat_layer([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 8, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 8, 5)) self.assertAllClose(out, np.concatenate([x1, x2], axis=1), atol=1e-4) self.assertEqual(concat_layer.compute_mask([i1, i2], [None, None]), None) self.assertTrue( np.all( K.eval( concat_layer.compute_mask( [i1, i2], [K.variable(x1), K.variable(x2)])))) with self.assertRaisesRegexp(ValueError, "`mask` should be a list."): concat_layer.compute_mask([i1, i2], x1) with self.assertRaisesRegexp(ValueError, "`inputs` should be a list."): concat_layer.compute_mask(i1, [None, None]) with self.assertRaisesRegexp(ValueError, "should have the same length"): concat_layer.compute_mask([i1, i2], [None]) with self.assertRaisesRegexp(ValueError, "layer should be called on a list of inputs"): concat_layer(i1)
def offset_sep_conv2d_eval(depth, padding, x): """Perform a separable conv2d on x with a given padding""" depthwise_kernel = K.variable(value=np.array([[[[1]] * depth]]), dtype='float32') pointwise_kernel = K.variable(value=np.array([[[[1]] + [[0]] * (depth - 1)]]), dtype='float32') return K.separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(3, 3), padding=padding)
def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False, **kwargs): super(SGD, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.momentum = K.variable(momentum, name='momentum') self.decay = K.variable(decay, name='decay') self.initial_decay = decay self.nesterov = nesterov
def test_top_k_categorical_accuracy(self): with self.test_session(): y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]])) result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=3)) self.assertEqual(result, 1) result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=2)) self.assertEqual(result, 0.5) result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(result, 0.)
def __init__(self, lr=0.01, epsilon=None, decay=0., **kwargs): super(Adagrad, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.lr = K.variable(lr, name='lr') self.decay = K.variable(decay, name='decay') self.iterations = K.variable(0, dtype='int64', name='iterations') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay
def test_simple_3d(self): img_h, img_w = np.random.randint(2, 5), np.random.randint(5, 9) boxes = np.array([[ [9, 9, 9, 9], [-1, -1, -1, -1], [0, 0, img_w, img_h], [0, 0, img_w + 1, img_h + 1], [0, 0, img_w - 1, img_h - 1], ]], dtype='int') boxes = np.expand_dims(boxes, axis=0) boxes = K.variable(boxes) # compute expected output expected = np.array([[ [img_w - 1, img_h - 1, img_w - 1, img_h - 1], [0, 0, 0, 0], [0, 0, img_w - 1, img_h - 1], [0, 0, img_w - 1, img_h - 1], [0, 0, img_w - 1, img_h - 1], ]], dtype=K.floatx()) expected = np.expand_dims(expected, axis=0) # test channels_last # create input image = K.variable(np.random.random((1, 1, img_h, img_w, 3))) # create simple ClipBoxes layer layer = layers.ClipBoxes(data_format='channels_last') # compute output computed_shape = layer.compute_output_shape([image.shape, boxes.shape]) actual = layer.call([image, boxes]) actual = K.get_value(actual) self.assertEqual(actual.shape, tuple(computed_shape)) self.assertAllClose(actual, expected) # test channels_first # create input image = K.variable(np.random.random((1, 6, 1, img_h, img_w))) # create simple ClipBoxes layer layer = layers.ClipBoxes(data_format='channels_first') # compute output computed_shape = layer.compute_output_shape([image.shape, boxes.shape]) actual = layer.call([image, boxes]) actual = K.get_value(actual) self.assertEqual(actual.shape, tuple(computed_shape)) self.assertAllClose(actual, expected)
def __init__(self, lr=0.001, rho=0.9, epsilon=None, decay=0., **kwargs): super(RMSprop, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.lr = K.variable(lr, name='lr') self.rho = K.variable(rho, name='rho') self.decay = K.variable(decay, name='decay') self.iterations = K.variable(0, dtype='int64', name='iterations') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay
def __init__(self, lr=0.01, epsilon=None, decay=0., **kwargs): super(Adagrad, self).__init__(**kwargs) with backend.name_scope(self.__class__.__name__): self.lr = backend.variable(lr, name='lr') self.decay = backend.variable(decay, name='decay') self.iterations = backend.variable(0, dtype='int64', name='iterations') if epsilon is None: epsilon = backend.epsilon() self.epsilon = epsilon self.initial_decay = decay
def offset_sep_conv2d_eval(depth, padding, x): """Perform a separable conv2d on x with a given padding""" depthwise_kernel = K.variable(value=np.array([[[[1]] * depth]]), dtype='float32') pointwise_kernel = K.variable(value=np.array([[[[1]] + [[0]] * (depth - 1)] ]), dtype='float32') return K.separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(3, 3), padding=padding)
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False, model=None, zero_penalties=True, total_iterations=0, total_iterations_wd=None, use_cosine_annealing=False, lr_multipliers=None, weight_decays=None, autorestart=None, init_verbose=True, eta_min=0, eta_max=1, t_cur=0, name="CustomOptimizer", **kwargs): if total_iterations > 1: weight_decays = _init_weight_decays(model, zero_penalties, weight_decays) eta_t = kwargs.pop('eta_t', 1.) super(CustomOptimizer, self).__init__(name, **kwargs) self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) self._set_hyper('decay', self._initial_decay) self._set_hyper('beta_1', beta_1) self._set_hyper('beta_2', beta_2) self.eta_min = K.constant(eta_min, name='eta_min') self.eta_max = K.constant(eta_max, name='eta_max') self.eta_t = K.variable(eta_t, dtype='float32', name='eta_t') self.t_cur = K.variable(t_cur, dtype='int64', name='t_cur') self.total_iterations = total_iterations self.total_iterations_wd = total_iterations_wd or total_iterations self.lr_multipliers = lr_multipliers self.weight_decays = weight_decays or {} self.init_verbose = init_verbose self.use_cosine_annealing = use_cosine_annealing self.epsilon = epsilon or backend_config.epsilon() self.amsgrad = amsgrad _set_autorestart(self, autorestart, use_cosine_annealing) _check_args(self, total_iterations, use_cosine_annealing, weight_decays) self._init_lr = kwargs.get('lr', learning_rate) # to print lr_mult setup self._updates_processed = 0 # to track num calls to '_resource_apply_...' self._init_notified = False
def test_top_k_categorical_accuracy(self): with self.cached_session(): y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]])) result = K.eval( metrics.top_k_categorical_accuracy(y_true, y_pred, k=3)) self.assertEqual(result, 1) result = K.eval( metrics.top_k_categorical_accuracy(y_true, y_pred, k=2)) self.assertEqual(result, 0.5) result = K.eval( metrics.top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(result, 0.)
def test_sparse_categorical_accuracy_int(self): with self.cached_session(): metric = metrics.sparse_categorical_accuracy y_true = K.variable(np.random.randint(0, 7, (6, ))) y_pred = K.variable(np.random.random((6, 7))) self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6, )) # Test correctness if the shape of y_true is (num_samples,) y_true = K.variable([1., 0., 0., 0.]) y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.]) # Test correctness if the shape of y_true is (num_samples, 1) y_true = K.variable([[1.], [0.], [0.], [0.]]) y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.]) # Test correctness if the shape of y_true is (batch_size, seq_length) and # y_pred is (batch_size, seq_length, num_classes) y_pred = K.variable( np.array([[[0.2, 0.3, 0.1], [0.1, 0.2, 0.7]], [[0.3, 0.2, 0.1], [0.7, 0.2, 0.1]]])) y_true = K.variable(np.array([[1, 0], [1, 0]])) self.assertAllEqual(K.eval(metric(y_true, y_pred)), [[1., 0.], [0., 1.]])
def __init__(self, lr=0.002, lr_boost=10.0, gamma=1e-3, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., schedule_decay=0.004, amsgrad=False, sgdcorr=True, **kwargs): super(Nadabound, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.m_schedule = K.variable(1., name='m_schedule') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') self.lr_boost = K.variable(lr_boost, name='lr_boost') self.gamma = K.variable(gamma, name='gamma') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay self.schedule_decay = schedule_decay self.amsgrad = amsgrad self.sgdcorr = sgdcorr
def do_2d_convolution(feature_matrix, kernel_matrix, pad_edges=False, stride_length_px=1): """Convolves 2-D feature maps with 2-D kernel. m = number of rows in kernel n = number of columns in kernel c = number of output feature maps (channels) :param feature_matrix: Input feature maps (numpy array). Dimensions must be M x N x C or 1 x M x N x C. :param kernel_matrix: Kernel as numpy array. Dimensions must be m x n x C x c. :param pad_edges: Boolean flag. If True, edges of input feature maps will be zero-padded during convolution, so spatial dimensions of the output feature maps will be the same (M x N). If False, dimensions of the output maps will be (M - m + 1) x (N - n + 1). :param stride_length_px: Stride length (pixels). The kernel will move by this many rows or columns at a time as it slides over each input feature map. :return: feature_matrix: Output feature maps (numpy array). Dimensions will be 1 x M x N x c or 1 x (M - m + 1) x (N - n + 1) x c, depending on whether or not edges are padded. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_numpy_array_without_nan(kernel_matrix) error_checking.assert_is_numpy_array(kernel_matrix, num_dimensions=4) error_checking.assert_is_boolean(pad_edges) error_checking.assert_is_integer(stride_length_px) error_checking.assert_is_geq(stride_length_px, 1) if len(feature_matrix.shape) == 3: feature_matrix = numpy.expand_dims(feature_matrix, axis=0) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=4) if pad_edges: padding_string = 'same' else: padding_string = 'valid' feature_tensor = K.conv2d(x=K.variable(feature_matrix), kernel=K.variable(kernel_matrix), strides=(stride_length_px, stride_length_px), padding=padding_string, data_format='channels_last') return feature_tensor.numpy()
def __init__(self, lr=0.001, lr_boost=10.0, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False, switch_flag=False, **kwargs): super(Adam2SGD, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') if switch_flag: # using SGD self.beta_g = K.variable(1.0, name='beta_g') else: # using Adam self.beta_g = K.variable(1.0 - beta_1, name='beta_g') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') self.switch_flag = K.variable(switch_flag, dtype='bool', name='switch_flag') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay self.amsgrad = amsgrad self.lr_boost = lr_boost
def __init__(self, decay_steps, warmup_steps, min_lr=0.0, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, weight_decay=0., weight_decay_pattern=None, amsgrad=False, **kwargs): learning_rate = kwargs.pop('lr', learning_rate) super(AdamWarmup, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.decay_steps = K.variable(decay_steps, name='decay_steps') self.warmup_steps = K.variable(warmup_steps, name='warmup_steps') self.min_lr = K.variable(min_lr, name='min_lr') self.iterations = K.variable(0, dtype='int64', name='iterations') self.learning_rate = K.variable(learning_rate, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.weight_decay = K.variable(weight_decay, name='weight_decay') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_weight_decay = weight_decay self.weight_decay_pattern = weight_decay_pattern self.amsgrad = amsgrad
def test_max_norm(self): array = get_example_array() for m in get_test_values(): norm_instance = constraints.max_norm(m) normed = norm_instance(backend.variable(array)) assert np.all(backend.eval(normed) < m) # a more explicit example norm_instance = constraints.max_norm(2.0) x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T x_normed_target = np.array( [[0, 0, 0], [1.0, 0, 0], [2.0, 0, 0], [2. / np.sqrt(3), 2. / np.sqrt(3), 2. / np.sqrt(3)]]).T x_normed_actual = backend.eval(norm_instance(backend.variable(x))) self.assertAllClose(x_normed_actual, x_normed_target, rtol=1e-05)
def loss(y_true, y_pred): var = K.variable(1.0) pi = K.variable(3.1415926) denom = K.sqrt(2 * pi * var) prob_num = K.exp(-K.square(y_true - y_pred) / (2 * var)) old_prob_num = K.exp(-K.square(y_true - old_prediction) / (2 * var)) prob = prob_num / denom old_prob = old_prob_num / denom r = prob / (old_prob + 1e-10) return -K.mean( K.minimum( r * advantage, K.clip(r, min_value=1 - 1e-5, max_value=1 + 1e-5) * advantage))
def _runner(self, init, shape, target_mean=None, target_std=None, target_max=None, target_min=None): variable = backend.variable(init(shape)) output = backend.get_value(variable) # Test serialization (assumes deterministic behavior). config = init.get_config() reconstructed_init = init.__class__.from_config(config) variable = backend.variable(reconstructed_init(shape)) output_2 = backend.get_value(variable) self.assertAllClose(output, output_2, atol=1e-4)
def do_3d_convolution(feature_matrix, kernel_matrix, pad_edges=False, stride_length_px=1): """Convolves 3-D feature maps with 3-D kernel. m = number of rows in kernel n = number of columns in kernel h = number of height in kernel c = number of output feature maps (channels) :param feature_matrix: Input feature maps (numpy array). Dimensions must be M x N x H x C or 1 x M x N x H x C. :param kernel_matrix: Kernel as numpy array. Dimensions must be m x n x h x C x c. :param pad_edges: See doc for `do_2d_convolution`. :param stride_length_px: See doc for `do_2d_convolution`. :return: feature_matrix: Output feature maps (numpy array). Dimensions will be 1 x M x N x H x c or 1 x (M - m + 1) x (N - n + 1) x (H - h + 1) x c, depending on whether or not edges are padded. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_numpy_array_without_nan(kernel_matrix) error_checking.assert_is_numpy_array(kernel_matrix, num_dimensions=5) error_checking.assert_is_boolean(pad_edges) error_checking.assert_is_integer(stride_length_px) error_checking.assert_is_geq(stride_length_px, 1) if len(feature_matrix.shape) == 4: feature_matrix = numpy.expand_dims(feature_matrix, axis=0) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=5) if pad_edges: padding_string = 'same' else: padding_string = 'valid' feature_tensor = K.conv3d(x=K.variable(feature_matrix), kernel=K.variable(kernel_matrix), strides=(stride_length_px, stride_length_px, stride_length_px), padding=padding_string, data_format='channels_last') return feature_tensor.numpy()
def testConditionalMaskUpdate(self): weight = K.variable(np.linspace(1.0, 100.0, 100), name="weights") mask = K.ones(weight.get_shape()) threshold = K.zeros([]) def linear_sparsity(step): sparsity_val = ops.convert_to_tensor( [0.0, 0.1, 0.1, 0.3, 0.3, 0.5, 0.5, 0.5, 0.5, 0.5]) return ops.convert_to_tensor(True), sparsity_val[step] # Set up pruning p = pruning_impl.Pruning(pruning_vars=[(weight, mask, threshold)], training_step_fn=self.training_step_fn, pruning_schedule=linear_sparsity, block_size=self.block_size, block_pooling_type=self.block_pooling_type) non_zero_count = [] for _ in range(10): if context.executing_eagerly(): p.conditional_mask_update() p.weight_mask_op() state_ops.assign_add(self.global_step, 1) else: K.get_session().run(p.conditional_mask_update()) K.get_session().run(p.weight_mask_op()) K.get_session().run(state_ops.assign_add(self.global_step, 1)) non_zero_count.append(np.count_nonzero(K.get_value(weight))) # Weights pruned at steps 1,3,5 expected_non_zero_count = [100, 90, 90, 70, 70, 50, 50, 50, 50, 50] self.assertAllEqual(expected_non_zero_count, non_zero_count)
def define_deepDream_model_layerBased(model): dream = model.input print('Model loaded.') # Get the symbolic outputs of each "key" layer (we gave them unique names). layer_dict = dict([(layer.name, layer) for layer in model.layers]) # Define the loss. loss = K.variable(0.) for layer_name in settings['features']: # Add the L2 norm of the features of a layer to the loss. if layer_name not in layer_dict: raise ValueError('Layer ' + layer_name + ' not found in model.') coeff = settings['features'][layer_name] x = layer_dict[layer_name].output # We avoid border artifacts by only involving non-border pixels in the loss. scaling = K.prod(K.cast(K.shape(x), 'float32')) if K.image_data_format() == 'channels_first': loss = loss + coeff * K.sum(K.square(x[:, :, 2:-2, 2:-2])) / scaling else: loss = loss + coeff * K.sum(K.square(x[:, 2:-2, 2:-2, :])) / scaling # Compute the gradients of the dream wrt the loss. grads = K.gradients(loss, dream)[0] # Normalize gradients. grads /= K.maximum(K.mean(K.abs(grads)), K.epsilon()) # Set up function to retrieve the value # of the loss and gradients given an input image. outputs = [loss, grads] fetch_loss_and_grads = K.function([dream], outputs)
def _horovod_average_metrics_in_place(self, logs): logs = logs or {} reduced_logs = {} import horovod.tensorflow as hvd if self._allreduce_ranks <= 1.: self._allreduce_ranks = float(hvd.size()) # Reduce every metric among workers. Sort metrics by name # to ensure consistent order. for metric, value in sorted(logs.items()): from tensorflow.python.eager import context if context.executing_eagerly(): reduced_logs[metric] = hvd.allreduce( K.constant(value, name=metric)).numpy() else: if metric not in self._m_vars: with K.name_scope('MetricAverageCallback'): var = K.variable(value, name=metric) K.get_session().run(var.initializer) self._m_vars[metric] = var self._allreduce_ops[metric] = hvd.allreduce( var, device_dense=self._device) else: K.set_value(self._m_vars[metric], value) reduced_logs[metric] = K.get_session().run( self._allreduce_ops[metric]) # Override the reduced values back into logs dictionary # for other callbacks to use. for metric, value in reduced_logs.items(): logs[metric] = value
def __init__(self, model, layer_name): self.model = model self.layer_name = layer_name dream = model.input # Get the symbolic outputs of each "key" layer (we gave them unique names). layers_all = [layer.name for layer in model.layers] if layer_name not in layers_all: raise ValueError('Layer ' + layer_name + ' not found in model.') # Define the loss. loss = K.variable(0.) for layer_local in model.layers: if layer_local.name == layer_name: x = layer_local.output # We avoid border artifacts by only involving non-border pixels in the loss. if K.image_data_format() == 'channels_first': scaling = K.prod(K.cast(K.shape(x), 'float32')) loss = loss + K.sum(K.square(x[:, :, 2:-2, 2:-2])) / scaling else: scaling = K.prod(K.cast(K.shape(x), 'float32')) loss = loss + K.sum(K.square(x[:, 2:-2, 2:-2, :])) / scaling # Compute the gradients of the dream wrt the loss. grads = K.gradients(loss, dream)[0] # Normalize gradients. grads /= K.maximum(K.mean(K.abs(grads)), K.epsilon()) # Set up function to retrieve the value # of the loss and gradients given an input image. outputs = [loss, grads] self.fetch_loss_and_grads = K.function([dream], outputs)
def test_simple(self): with self.test_session(): # create simple Anchors layer anchors_layer = layers.Anchors( size=32, stride=8, ratios=np.array([1], K.floatx()), scales=np.array([1], K.floatx()), ) # create fake features input (only shape is used anyway) features = np.zeros((1, 2, 2, 1024), dtype=K.floatx()) features = K.variable(features) # call the Anchors layer anchors = anchors_layer.call(features) anchors = K.get_value(anchors) # expected anchor values expected = np.array([[ [-12, -12, 20, 20], [-4, -12, 28, 20], [-12, -4, 20, 28], [-4, -4, 28, 28], ]], dtype=K.floatx()) # test anchor values self.assertAllEqual(anchors, expected)
def binary_PFA(y_true, y_pred, threshold=K.variable(value=0.5)): y_pred = K.cast(y_pred >= threshold, 'float32') # N = total number of negative labels N = K.sum(1 - y_true) # FP = total number of false alerts, alerts from the negative class labels FP = K.sum(y_pred - y_pred * y_true) return FP / N
def binary_PTA(y_true, y_pred, threshold=K.variable(value=0.5)): y_pred = K.cast(y_pred >= threshold, 'float32') # P = total number of positive labels P = K.sum(y_true) # TP = total number of correct alerts, alerts from the positive class labels TP = K.sum(y_pred * y_true) return TP / P
def test_mini_batch(self): # create simple Anchors layer anchors_layer = layers.Anchors( size=32, stride=8, ratios=np.array([1], dtype=K.floatx()), scales=np.array([1], dtype=K.floatx()), ) # create fake features input with batch_size=2 features = np.zeros((2, 2, 2, 1024), dtype=K.floatx()) features = K.variable(features) # call the Anchors layer anchors = anchors_layer.call(features) anchors = K.get_value(anchors) # expected anchor values expected = np.array([[ [-12, -12, 20, 20], [-4, -12, 28, 20], [-12, -4, 20, 28], [-4, -4, 28, 28], ]], dtype=K.floatx()) expected = np.tile(expected, (2, 1, 1)) # test anchor values self.assertAllEqual(anchors, expected)
def __init__(self, size, stride, ratios=None, scales=None, *args, **kwargs): self.size = size self.stride = stride self.ratios = ratios self.scales = scales if ratios is None: self.ratios = np.array([0.5, 1, 2], K.floatx()), elif isinstance(ratios, list): self.ratios = np.array(ratios) if scales is None: self.scales = np.array([2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)], K.floatx()), elif isinstance(scales, list): self.scales = np.array(scales) self.num_anchors = len(ratios) * len(scales) self.anchors = K.variable( generate_anchors( base_size=size, ratios=ratios, scales=scales, )) super(Anchors, self).__init__(*args, **kwargs)
def __init__(self, size, stride, ratios=None, scales=None, data_format=None, *args, **kwargs): super(Anchors, self).__init__(*args, **kwargs) self.data_format = conv_utils.normalize_data_format(data_format) self.size = size self.stride = stride self.ratios = ratios self.scales = scales if self.ratios is None: self.ratios = retinanet_anchor_utils.AnchorParameters.default.ratios if isinstance(self.ratios, list): self.ratios = np.array(self.ratios) if self.scales is None: self.scales = retinanet_anchor_utils.AnchorParameters.default.scales if isinstance(self.scales, list): self.scales = np.array(self.scales) self.num_anchors = len(self.ratios) * len(self.scales) self.anchors = K.variable( retinanet_anchor_utils.generate_anchors(base_size=size, ratios=ratios, scales=scales)) super(Anchors, self).__init__(*args, **kwargs)
def add_style_loss(vgg, style_image_path, vgg_layers, vgg_output_dict, img_width, img_height, weight): style_img = img_util.preprocess_image(style_image_path, img_width, img_height) print('Getting style features from VGG network.') style_layers = [ 'block1_conv2', 'block2_conv2', 'block3_conv3', 'block4_conv3' ] style_layer_outputs = [] for layer in style_layers: style_layer_outputs.append(vgg_output_dict[layer]) vgg_style_func = K.function([vgg.layers[-19].input], style_layer_outputs) style_features = vgg_style_func([style_img]) # Style Reconstruction Loss for i, layer_name in enumerate(style_layers): layer = vgg_layers[layer_name] feature_var = K.variable(value=style_features[i][0]) style_loss = StyleReconstructionRegularizer( style_feature_target=feature_var, weight=weight)(layer) layer.add_loss(style_loss)
def GetGradient(data_x_win,model,perturbation): y_pred = model.output y_true = K.variable(np.array(df_YY_actual.iloc[0])) #ten_x_scale = K.variable(np.array(data_x_win[0])) loss = keras.losses.mean_squared_error(y_true, y_pred) grads = K.gradients(loss,model.input)[0] x_adv = K.sign(grads) sess =K.get_session() init = tf.compat.v1.global_variables_initializer() sess.run(init) x_adv_0 = x_adv[0] adv = [] if len(Y) != len(data_x_win): print("WARNING!!!! Unequal length of X and Y") #len(df_x_scale) for i in range(len(data_x_win)): adv_i = sess.run(x_adv_0, feed_dict={model.input:[data_x_win[i]],y_true:np.array(df_YY_actual.iloc[i])}) if i%1000 == 0: print(i) print(datetime.datetime.now().isoformat()) df_grd_i = pd.DataFrame(adv_i,columns = header) adv.append(np.array(df_grd_i)) return adv
def do_3d_pooling(feature_matrix, stride_length_px=2, pooling_type_string=MAX_POOLING_TYPE_STRING): """Pools 3-D feature maps. :param feature_matrix: Input feature maps (numpy array). Dimensions must be M x N x H x C or 1 x M x N x H x C. :param stride_length_px: See doc for `do_2d_pooling`.import tensorflow.python.keras.backend as K :param pooling_type_string: Pooling type (must be accepted by `_check_pooling_type`). :return: feature_matrix: Output feature maps (numpy array). Dimensions will be 1 x m x n x h x C. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_integer(stride_length_px) error_checking.assert_is_geq(stride_length_px, 2) _check_pooling_type(pooling_type_string) if len(feature_matrix.shape) == 4: feature_matrix = numpy.expand_dims(feature_matrix, axis=0) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=5) feature_tensor = K.pool3d(x=K.variable(feature_matrix), pool_mode=pooling_type_string, pool_size=(stride_length_px, stride_length_px, stride_length_px), strides=(stride_length_px, stride_length_px, stride_length_px), padding='valid', data_format='channels_last') return feature_tensor.numpy()
def build_predictor(self, predict_activation=None): """ Construct the predictor network from the list of layers After the last layer in self.predictorLayers_, a final Dense layer is added that with self.predDim_ units (i.e. outputs the prediction) Args: predict_activation: activation function for the final dense layer """ if len(self.predictorLayers_) == 0: raise ValueError("Must add at least one predictor hidden layer") pred_in = self._build_decoder_inputs() h = self._edit_decoder_inputs(pred_in) for hid in self.predictorLayers_: h = hid(h) y_pred = Dense(units=self.predDim_, activation=predict_activation)(h) log_var_y = Dense(self.predDim_, name='log_var_y')(h) if not self.learnUncertainty_: log_var_y = Lambda(lambda lv: 0 * lv + K.ones_like(lv) * K.log(K.variable(self.predVar_)))(log_var_y) self.predictor_ = Model(inputs=pred_in, outputs=[y_pred, log_var_y], name='predictor')
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004, **kwargs): super(Nadam, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.m_schedule = K.variable(1., name='m_schedule') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.schedule_decay = schedule_decay
def __init__(self, optimizer, iterations=None): # pylint: disable=super-init-not-called self.optimizer = optimizer self._track_checkpointable(optimizer, name='optimizer') if iterations is None: with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') else: self.iterations = iterations self._track_checkpointable(self.iterations, name='global_step')
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., **kwargs): super(Adamax, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay
def check_operation_offset(depth, eval_f, padding): """Check if backend used an offset while placing the filter e.g. during a convolution. TensorFlow is inconsistent in doing so depending on the type of operation, the used device (CPU/GPU) and the input depth. """ in_arr = np.array([[[[i] * depth for i in range(6)]]]) input_data = K.variable(value=in_arr, dtype='float32') output = eval_f(depth, padding, input_data) result = K.eval(output).flatten().tolist() assert result in [[0, 3], [1, 4]] return result == [1, 4]
def test_sparse_categorical_accuracy(self): with self.cached_session(): metric = metrics.sparse_categorical_accuracy y_true = K.variable(np.random.randint(0, 7, (6,))) y_pred = K.variable(np.random.random((6, 7))) self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6,)) # Test correctness if the shape of y_true is (num_samples,) y_true = K.variable([1., 0., 0., 0.]) y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) print(K.eval(metric(y_true, y_pred))) self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.]) # Test correctness if the shape of y_true is (num_samples, 1) y_true = K.variable([[1.], [0.], [0.], [0.]]) y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) print(K.eval(metric(y_true, y_pred))) self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.])
def test_dynamic_loss_scaling(self, strategy_fn, cloning=True): strategy = strategy_fn() initial_loss_scale = 2. batch_size = 4 expected_gradient = backend.variable([initial_loss_scale / batch_size], dtype=dtypes.float16) # If this variable is set to True, the model below will have NaN gradients have_nan_gradients = backend.variable(False, dtype=dtypes.bool) with strategy.scope(): with policy.policy_scope(policy.Policy('infer_float32_vars')): x = layers.Input(shape=(1,), batch_size=batch_size, dtype=dtypes.float16) layer = AddLayer(assert_type=dtypes.float16) y = layer(x) identity_with_nan_grads = ( mp_test_util.create_identity_with_nan_gradients_fn( have_nan_gradients)) y = core.Lambda(identity_with_nan_grads)(y) identity_with_grad_check_fn = ( mp_test_util.create_identity_with_grad_check_fn( expected_dtype=dtypes.float16, expected_gradient=expected_gradient)) y = core.Lambda(identity_with_grad_check_fn)(y) y = math_ops.cast(y, dtypes.float32) model = models.Model(inputs=x, outputs=y) def loss_fn(y_true, y_pred): del y_true return math_ops.reduce_mean(y_pred) opt = gradient_descent.SGD(1.) loss_scale = loss_scale_module.DynamicLossScale( initial_loss_scale=initial_loss_scale, increment_period=2) opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale) model.compile(opt, loss=loss_fn, cloning=cloning) self.assertEqual(backend.eval(layer.v), 1) x = np.ones((batch_size, 1)) y = np.ones((batch_size, 1)) dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(batch_size) model.fit(dataset) # The variables starts with 1 and has a gradient of 1, so will go down by 1 # each step. self.assertEqual(backend.eval(layer.v), 0) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -1) # There have been two steps without NaNs, so the loss scale will double backend.set_value(expected_gradient, backend.get_value(expected_gradient * 2)) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -2) # Next test with NaN gradients. backend.set_value(have_nan_gradients, True) model.fit(dataset) # Variable should not be updated self.assertEqual(backend.eval(layer.v), -2) # Test with finite gradients again backend.set_value(have_nan_gradients, False) # The loss scale will be halved due to the NaNs, so the gradient will also # be halved backend.set_value(expected_gradient, backend.get_value(expected_gradient / 2)) model.fit(dataset) self.assertEqual(backend.eval(layer.v), -3)
def experimental_tpu_fit_loop(model, dataset, epochs=100, verbose=1, callbacks=None, initial_epoch=0, steps_per_epoch=None, val_dataset=None, validation_steps=None, validation_freq=1): """Fit loop for training with TPU DistributionStrategy. Arguments: model: Keras Model instance. dataset: Dataset that returns inputs and targets epochs: Number of times to iterate over the data verbose: Integer, Verbosity mode, 0, 1 or 2 callbacks: List of callbacks to be called during training initial_epoch: Epoch at which to start training (useful for resuming a previous training run) steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. Ignored with the default value of `None`. val_dataset: Dataset for validation data. validation_steps: Number of steps to run validation for (only if doing validation from data tensors). Ignored with the default value of `None`. validation_freq: Only relevant if validation data is provided. Integer or `collections.Container` instance (e.g. list, tuple, etc.). If an integer, specifies how many training epochs to run before a new validation run is performed, e.g. `validation_freq=2` runs validation every 2 epochs. If a Container, specifies the epochs on which to run validation, e.g. `validation_freq=[1, 2, 10]` runs validation at the end of the 1st, 2nd, and 10th epochs. Returns: Returns `None`. Raises: ValueError: in case of invalid arguments. """ mode = ModeKeys.TRAIN # TODO(fchollet): add support for `steps_per_epoch=None` in TPU loops. current_strategy = model._distribution_strategy iterator = distributed_training_utils.get_iterator(dataset, current_strategy) steps_per_epoch = training_utils.infer_steps_for_dataset( dataset, steps_per_epoch, epochs, steps_name='steps_per_epoch') if (current_strategy.extended.steps_per_run != 1 and steps_per_epoch is None): raise ValueError('`steps_per_epoch` should be specified when calling ' '`fit` on the model with TPUStrategy when ' '`steps_per_run` != 1 .') scope = distributed_training_utils.distributed_scope( strategy=current_strategy, learning_phase=1) scope.__enter__() out_labels = model.metrics_names or [] step_fn = _make_step_fn(model, ModeKeys.TRAIN, current_strategy, out_labels) # Add initial dummy values for loss and other metric tensors. initial_loop_values = {} initial_loop_values['loss'] = constant_op.constant(1e7) for name in model.metrics_names[1:]: tensor = model._all_stateful_metrics_tensors[name] initial_loop_values[name] = array_ops.zeros(tensor.shape, tensor.dtype) use_steps = steps_per_epoch is not None if use_steps: iteration_value = min(steps_per_epoch, current_strategy.extended.steps_per_run) else: iteration_value = current_strategy.extended.steps_per_run steps_per_run = K.variable( value=iteration_value, dtype='int32', name='steps_per_run') ctx = current_strategy.extended.experimental_run_steps_on_iterator( step_fn, iterator, iterations=steps_per_run, initial_loop_values=initial_loop_values) train_op = ctx.run_op output_tensors = ctx.last_step_outputs do_validation = bool(validation_steps) if model._compile_distribution: distributed_training_utils._copy_weights_to_distributed_model(model, mode) callbacks = cbks.configure_callbacks( callbacks, model, do_validation=do_validation, epochs=epochs, steps_per_epoch=steps_per_epoch, verbose=verbose, count_mode='steps', mode=mode) # Calculate the steps each time on the device. if use_steps: steps_to_run = ([current_strategy.extended.steps_per_run] * (steps_per_epoch // current_strategy.extended.steps_per_run)) if steps_per_epoch % current_strategy.extended.steps_per_run: steps_to_run.append( steps_per_epoch % current_strategy.extended.steps_per_run) target_steps = len(steps_to_run) else: target_steps = np.inf callbacks._call_begin_hook(mode) for epoch in range(initial_epoch, epochs): distributed_training_utils._reset_metrics(model) callbacks.on_epoch_begin(epoch) epoch_logs = {} step_index = 0 prev_step_count = None current_step = 0 while current_step < target_steps: step_count = steps_to_run[current_step] if use_steps else 1 batch_logs = {'batch': step_index, 'size': 1, 'num_steps': step_count} callbacks._call_batch_hook(mode, 'begin', step_index, batch_logs) if prev_step_count is None or step_count != prev_step_count: steps_per_run.load(step_count, K.get_session()) prev_step_count = step_count try: _, outputs = K.batch_get_value([train_op, output_tensors]) except errors.OutOfRangeError: if use_steps: logging.warning('Your dataset iterator ran out of data; ' 'interrupting training. Make sure that your dataset ' 'can generate at least `steps_per_epoch * epochs` ' 'batches (in this case, %d batches).' % steps_per_epoch * epochs) else: target_steps = current_step logging.info('Dataset iterator ran out of data. Inferring the ' 'value of `steps_per_epoch` as %s .' % target_steps) distributed_training_utils.initialize_iterator(iterator, current_strategy) break batch_logs.update(outputs) callbacks._call_batch_hook(mode, 'end', step_index, batch_logs) step_index = step_index + step_count current_step += 1 if callbacks.model.stop_training: break if (do_validation and training_utils.should_run_validation(validation_freq, epoch)): logging.info('Running validation at fit epoch: %s', epoch) if model._compile_distribution: # Since we create a new clone from the original model we need to copy # the weights back to the original model before we can run validation. distributed_training_utils._copy_weights_to_original_model( model, ModeKeys.TRAIN) val_outs = experimental_tpu_test_loop( # pylint: disable=undefined-variable model, val_dataset, steps=validation_steps, verbose=verbose, callbacks=callbacks) if not isinstance(val_outs, list): val_outs = [val_outs] # Same labels assumed. for label, val_out in zip(out_labels, val_outs): epoch_logs['val_' + label] = val_out callbacks.on_epoch_end(epoch, epoch_logs) if callbacks.model.stop_training: break callbacks._call_end_hook(mode) if model._compile_distribution: # Copy the weights back from the replicated model to the original model. distributed_training_utils._copy_weights_to_original_model( model, ModeKeys.TRAIN) scope.__exit__(None, None, None) return model.history
def experimental_tpu_fit_loop(model, dataset, epochs=100, verbose=1, callbacks=None, initial_epoch=0, steps_per_epoch=None, val_dataset=None, validation_steps=None, validation_freq=1): """Fit loop for training with TPU DistributionStrategy. Arguments: model: Keras Model instance. dataset: Dataset that returns inputs and targets epochs: Number of times to iterate over the data verbose: Integer, Verbosity mode, 0, 1 or 2 callbacks: List of callbacks to be called during training initial_epoch: Epoch at which to start training (useful for resuming a previous training run) steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. Ignored with the default value of `None`. val_dataset: Dataset for validation data. validation_steps: Number of steps to run validation for (only if doing validation from data tensors). Ignored with the default value of `None`. validation_freq: Only relevant if validation data is provided. Integer or `collections.Container` instance (e.g. list, tuple, etc.). If an integer, specifies how many training epochs to run before a new validation run is performed, e.g. `validation_freq=2` runs validation every 2 epochs. If a Container, specifies the epochs on which to run validation, e.g. `validation_freq=[1, 2, 10]` runs validation at the end of the 1st, 2nd, and 10th epochs. Returns: Returns `None`. Raises: ValueError: in case of invalid arguments. """ mode = ModeKeys.TRAIN # TODO(fchollet): add support for `steps_per_epoch=None` in TPU loops. current_strategy = model._distribution_strategy iterator = distributed_training_utils.get_iterator(dataset, current_strategy) scope = distributed_training_utils.distributed_scope( strategy=current_strategy, learning_phase=1) scope.__enter__() def _per_device_fit_function(model): model._make_fit_function() return (model._fit_function.inputs, model._fit_function.outputs, model._fit_function.updates_op, model._fit_function.session_kwargs) out_labels = model.metrics_names or [] def step_fn(ctx, inputs): """Clones the model and calls make_fit_function.""" inputs, targets = inputs if model._compile_distribution: distributed_training_utils.clone_model_on_replicas( model, current_strategy, mode, inputs=inputs, targets=targets) else: distributed_training_utils._build_distributed_network( model, current_strategy, mode, inputs, targets) (grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) = current_strategy.extended.call_for_each_replica( _per_device_fit_function, args=(distributed_training_utils.get_distributed_model( model, ModeKeys.TRAIN),)) (all_inputs, all_outputs, all_updates, all_session_args) = distributed_training_utils.unwrap_values( current_strategy, grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) combined_fn = K.function( all_inputs, all_outputs, updates=all_updates, name='distributed_fit_function', **all_session_args) for label, output in zip(out_labels, combined_fn.outputs): if label == 'loss': reduce_op = ds_reduce_util.ReduceOp.SUM else: # We reduce all other metrics using mean for now. This is temporary # workaround until new metrics are in place. reduce_op = ds_reduce_util.ReduceOp.MEAN ctx.set_last_step_output(label, output, reduce_op) # TODO(priyag, sourabhbajaj): Ignoring these things from the combined_fn: # feed_dict, session kwargs, run options, run_metadata for now. These should # be handled appropriately return combined_fn.updates_op # Add initial dummy values for loss and other metric tensors. initial_loop_values = {} initial_loop_values['loss'] = constant_op.constant(1e7) for name in model.metrics_names[1:]: tensor = model._all_stateful_metrics_tensors[name] initial_loop_values[name] = array_ops.zeros(tensor.shape, tensor.dtype) if steps_per_epoch is None: raise ValueError('`steps_per_epoch` should be specified when calling ' '`fit` on the model.') steps_per_run = K.variable( value=min(steps_per_epoch, current_strategy.extended.steps_per_run), dtype='int32', name='steps_per_run') ctx = current_strategy.extended.experimental_run_steps_on_iterator( step_fn, iterator, iterations=steps_per_run, initial_loop_values=initial_loop_values) train_op = ctx.run_op output_tensors = ctx.last_step_outputs do_validation = bool(validation_steps) if model._compile_distribution: distributed_training_utils._copy_weights_to_distributed_model(model, mode) callbacks = cbks.configure_callbacks( callbacks, model, do_validation=do_validation, epochs=epochs, steps_per_epoch=steps_per_epoch, verbose=verbose, count_mode='steps', mode=mode) # Calculate the steps each time on the device. steps_to_run = [current_strategy.extended.steps_per_run] * ( steps_per_epoch // current_strategy.extended.steps_per_run) if steps_per_epoch % current_strategy.extended.steps_per_run: steps_to_run.append( steps_per_epoch % current_strategy.extended.steps_per_run) callbacks._call_begin_hook(mode) for epoch in range(initial_epoch, epochs): distributed_training_utils._reset_metrics(model) callbacks.on_epoch_begin(epoch) epoch_logs = {} step_index = 0 prev_step_count = None for step_count in steps_to_run: batch_logs = {'batch': step_index, 'size': 1, 'num_steps': step_count} callbacks._call_batch_hook(mode, 'begin', step_index, batch_logs) if prev_step_count is None or step_count != prev_step_count: steps_per_run.load(step_count, K.get_session()) prev_step_count = step_count try: _, outputs = K.get_session().run([train_op, output_tensors]) except errors.OutOfRangeError: logging.warning('Your dataset iterator ran out of data; ' 'interrupting training. Make sure that your dataset ' 'can generate at least `steps_per_epoch * epochs` ' 'batches (in this case, %d batches).' % steps_per_epoch * epochs) break batch_logs.update(outputs) callbacks._call_batch_hook(mode, 'end', step_index, batch_logs) step_index = step_index + step_count if callbacks.model.stop_training: break if (do_validation and training_utils.should_run_validation(validation_freq, epoch)): logging.info('Running validation at fit epoch: %s', epoch) if model._compile_distribution: # Since we create a new clone from the original model we need to copy # the weights back to the original model before we can run validation. distributed_training_utils._copy_weights_to_original_model( model, ModeKeys.TRAIN) val_outs = experimental_tpu_test_loop( # pylint: disable=undefined-variable model, val_dataset, steps=validation_steps, verbose=verbose, callbacks=callbacks) if not isinstance(val_outs, list): val_outs = [val_outs] # Same labels assumed. for label, val_out in zip(out_labels, val_outs): epoch_logs['val_' + label] = val_out callbacks.on_epoch_end(epoch, epoch_logs) if callbacks.model.stop_training: break callbacks._call_end_hook(mode) if model._compile_distribution: # Copy the weights back from the replicated model to the original model. distributed_training_utils._copy_weights_to_original_model( model, ModeKeys.TRAIN) scope.__exit__(None, None, None) return model.history
def __init__(self, name='true_positives', **kwargs): super(BinaryTruePositives, self).__init__(name=name, **kwargs) self.true_positives = K.variable(value=0, dtype='int32') self.stateful = True
def test_sparse_categorical_accuracy_float(self): with self.cached_session(): metric = metrics.sparse_categorical_accuracy y_true = K.variable(np.random.random((6,))) y_pred = K.variable(np.random.random((6, 7))) self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6,))
def test_sparse_categorical_accuracy(self): with self.test_session(): metric = metrics.sparse_categorical_accuracy y_a = K.variable(np.random.randint(0, 7, (6,))) y_b = K.variable(np.random.random((6, 7))) self.assertEqual(K.eval(metric(y_a, y_b)).shape, (6,))
def __init__(self, optimizer): # pylint: disable=super-init-not-called self.optimizer = optimizer self._track_checkpointable(optimizer, name='optimizer') with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations')
def _experimental_fit_loop( model, iterator, epochs=100, verbose=1, callbacks=None, initial_epoch=0, steps_per_epoch=None, val_iterator=None, validation_steps=None): """Fit loop for training with TPU DistributionStrategy. Arguments: model: Keras Model instance. iterator: Iterator that returns inputs and targets epochs: Number of times to iterate over the data verbose: Integer, Verbosity mode, 0, 1 or 2 callbacks: List of callbacks to be called during training initial_epoch: Epoch at which to start training (useful for resuming a previous training run) steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. Ignored with the default value of `None`. val_iterator: Iterator for validation data. validation_steps: Number of steps to run validation for (only if doing validation from data tensors). Ignored with the default value of `None`. Returns: Returns `None`. Raises: ValueError: in case of invalid arguments. """ current_strategy = model._distribution_strategy K.get_session().run(current_strategy.initialize()) def _per_device_fit_function(model): model._make_fit_function() return (model._fit_function.inputs, model._fit_function.outputs, model._fit_function.updates_op, model._fit_function.session_kwargs) # TODO(priyag, sourabhbajaj): This should likely not be hardcoded here. K.set_learning_phase(1) out_labels = model.metrics_names or [] def step_fn(ctx, inputs, targets): """Clones the model and calls make_fit_function.""" # TODO(priyag, sourabhbajaj): The model gets cloned every time # fit/test/predict is called. We should look into caching this keyed on # input shapes. clone_model_on_replicas( model, current_strategy, make_callback_model=True, inputs=inputs, targets=targets, mode=_Mode.TRAIN) (grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) = current_strategy.call_for_each_replica( _per_device_fit_function, args=(model._grouped_model_train,)) (all_inputs, all_outputs, all_updates, all_session_args) = distributed_training_utils.unwrap_values( current_strategy, grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) combined_fn = K.function( all_inputs, all_outputs, updates=all_updates, name='distributed_fit_function', **all_session_args) for label, output in zip(out_labels, combined_fn.outputs): if label == 'loss': aggregation = distribute_lib.get_loss_reduction() else: # We aggregate all other metrics using mean for now. This is temporary # workaround until new metrics are in place. aggregation = variable_scope.VariableAggregation.MEAN ctx.set_last_step_output(label, output, aggregation) # TODO(priyag, sourabhbajaj): Ignoring these things from the combined_fn: # feed_dict, session kwargs, run options, run_metadata for now. These should # be handled appropriately return combined_fn.updates_op # Add initial dummy values for loss and other metric tensors. initial_loop_values = {} initial_loop_values['loss'] = constant_op.constant(1e7) for name, tensor in zip(model.metrics_names[1:], model.metrics_tensors): initial_loop_values[name] = array_ops.zeros(tensor.shape, tensor.dtype) if steps_per_epoch is None: raise ValueError('`steps_per_epoch` should be specified when calling ' '`fit` on the model.') steps_per_run = K.variable( value=min(steps_per_epoch, current_strategy.steps_per_run), dtype='int32', name='steps_per_run') with current_strategy.scope(): ctx = current_strategy.run_steps_on_dataset( step_fn, iterator, iterations=steps_per_run, initial_loop_values=initial_loop_values) train_op = ctx.run_op output_tensors = ctx.last_step_outputs do_validation = bool(validation_steps) # Copy the weights from the original model to each of the replicated models. orig_model_weights = model.get_weights() with current_strategy.scope(): distributed_model = current_strategy.unwrap(model._grouped_model_train)[0] distributed_training_utils.set_weights( current_strategy, distributed_model, orig_model_weights) callbacks = cbks.configure_callbacks( callbacks, model, do_validation=do_validation, val_inputs=None, val_targets=None, epochs=epochs, steps_per_epoch=steps_per_epoch, verbose=verbose) # Calculate the steps each time on the device. steps_to_run = [current_strategy.steps_per_run] * ( steps_per_epoch // current_strategy.steps_per_run) if steps_per_epoch % current_strategy.steps_per_run: steps_to_run.append(steps_per_epoch % current_strategy.steps_per_run) callbacks.on_train_begin() for epoch in range(initial_epoch, epochs): callbacks.on_epoch_begin(epoch) epoch_logs = {} step_index = 0 prev_step_count = None for step_count in steps_to_run: batch_logs = {'batch': step_index, 'size': 1, 'num_steps': step_count} callbacks.on_batch_begin(step_index, batch_logs) if prev_step_count is None or step_count != prev_step_count: steps_per_run.load(step_count, K.get_session()) prev_step_count = step_count try: _, outputs = K.get_session().run([train_op, output_tensors]) except errors.OutOfRangeError: logging.warning('Your dataset iterator ran out of data; ' 'interrupting training. Make sure that your dataset ' 'can generate at least `steps_per_epoch * epochs` ' 'batches (in this case, %d batches).' % steps_per_epoch * epochs) break batch_logs.update(outputs) callbacks.on_batch_end(step_index, batch_logs) step_index = step_index + step_count if callbacks.model.stop_training: break if do_validation: logging.info('Running validation at fit epoch: %s', epoch) # Since we create a new clone from the original model we need to copy # the weights back to the original model before we can run validation. with current_strategy.scope(): updated_weights = current_strategy.unwrap( model._grouped_model_train)[0].get_weights() model.set_weights(updated_weights) val_outs = _experimental_test_loop( model, val_iterator, steps=validation_steps, verbose=verbose, initialize_finalize_strategy=False) if not isinstance(val_outs, list): val_outs = [val_outs] # Same labels assumed. for label, val_out in zip(out_labels, val_outs): epoch_logs['val_' + label] = val_out callbacks.on_epoch_end(epoch, epoch_logs) if callbacks.model.stop_training: break callbacks.on_train_end() # Copy the weights back from the replicated model to the original model. with current_strategy.scope(): updated_weights = current_strategy.unwrap( model._grouped_model_train)[0].get_weights() model.set_weights(updated_weights) K.get_session().run(current_strategy.finalize()) return model.history
def offset_conv2d_eval(depth, padding, x): """Perform a conv2d on x with a given padding""" kernel = K.variable(value=np.array([[[[1]] + [[0]] * (depth - 1)]]), dtype='float32') return K.conv2d(x, kernel, strides=(3, 3), padding=padding)
def get_locallyconnected_mask(input_shape, kernel_shape, strides, padding, data_format, dtype): """Return a mask representing connectivity of a locally-connected operation. This method returns a masking tensor of 0s and 1s (of type `dtype`) that, when element-wise multiplied with a fully-connected weight tensor, masks out the weights between disconnected input-output pairs and thus implements local connectivity through a sparse fully-connected weight tensor. Assume an unshared convolution with given parameters is applied to an input having N spatial dimensions with `input_shape = (d_in1, ..., d_inN)` to produce an output with spatial shape `(d_out1, ..., d_outN)` (determined by layer parameters such as `strides`). This method returns a mask which can be broadcast-multiplied (element-wise) with a 2*(N+1)-D weight matrix (equivalent to a fully-connected layer between (N+1)-D activations (N spatial + 1 channel dimensions for input and output) to make it perform an unshared convolution with given `kernel_shape`, `strides`, `padding` and `data_format`. Arguments: input_shape: tuple of size N: `(d_in1, ..., d_inN)` spatial shape of the input. kernel_shape: tuple of size N, spatial shape of the convolutional kernel / receptive field. strides: tuple of size N, strides along each spatial dimension. padding: type of padding, string `"same"` or `"valid"`. data_format: a string, `"channels_first"` or `"channels_last"`. dtype: type of the layer operation, e.g. `tf.float64`. Returns: a `dtype`-tensor of shape `(1, d_in1, ..., d_inN, 1, d_out1, ..., d_outN)` if `data_format == `"channels_first"`, or `(d_in1, ..., d_inN, 1, d_out1, ..., d_outN, 1)` if `data_format == "channels_last"`. Raises: ValueError: if `data_format` is neither `"channels_first"` nor `"channels_last"`. """ mask = conv_utils.conv_kernel_mask( input_shape=input_shape, kernel_shape=kernel_shape, strides=strides, padding=padding ) ndims = int(mask.ndim / 2) mask = K.variable(mask, dtype) if data_format == 'channels_first': mask = K.expand_dims(mask, 0) mask = K.expand_dims(mask, - ndims - 1) elif data_format == 'channels_last': mask = K.expand_dims(mask, ndims) mask = K.expand_dims(mask, -1) else: raise ValueError('Unrecognized data_format: ' + str(data_format)) return mask