def get_updates(self, loss, params): grads = self.get_gradients(loss, params) accumulators = [ K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params ] self.weights = accumulators self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * math_ops.square(g) self.updates.append(state_ops.assign(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[...,::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[...,::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = accumulators self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * math_ops.square(g) self.updates.append(state_ops.assign(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def YOLOCorrectBoxes(box_xy, box_wh, input_shape, image_shape): '''Get Corrected Boxes.''' box_yx = box_xy[..., ::-1] box_hw = box_wh[..., ::-1] input_shape = K.cast(input_shape, K.dtype(box_yx)) image_shape = K.cast(image_shape, K.dtype(box_yx)) new_shape = K.round(image_shape * K.min(input_shape / image_shape)) offset = (input_shape - new_shape) / 2. / input_shape scale = input_shape / new_shape box_yx = (box_yx - offset) * scale box_hw *= scale box_mins = box_yx - (box_hw / 2.) box_max = box_yx + (box_hw / 2.) boxes = K.concatenate([ box_mins[..., 0:1], #y min box_mins[..., 1:2], #x min box_max[..., 0:1], #y max box_max[..., 1:2] #x max ]) #Scale boxes back to original image shape boxes *= K.concatenate([image_shape, image_shape]) return boxes
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 # Applies bounds on actual learning rate step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) final_lr = self.final_lr * lr / self.base_lr lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.)) upper_bound = final_lr * (1. + 1. / (self.gamma * t)) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsbound: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): # apply weight decay if self.weight_decay != 0.: g += self.weight_decay * K.stop_gradient(p) m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsbound: vhat_t = K.maximum(vhat, v_t) denom = (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: denom = (K.sqrt(v_t) + self.epsilon) # Compute the bounds step_size_p = step_size * K.ones_like(denom) step_size_p_bound = step_size_p / denom bounded_lr_t = m_t * K.minimum( K.maximum(step_size_p_bound, lower_bound), upper_bound) p_t = p - bounded_lr_t self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def _create_all_weights(self, params): ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats return ms, vs, vhats
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) ) t = math_ops.cast(self.iterations, K.floatx()) + 1 # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * ( 1. - 0.5 * (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * ( 1. - 0.5 * (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): # the following equations given in [1] g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + (1. - self.beta_1) * g m_t_prime = m_t / (1. - m_schedule_next) v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g) if self.amsgrad: vhat_t = math_ops.maximum(vhat, v_t) self.updates.append(state_ops.assign(vhat, vhat_t)) v_t_prime = vhat_t / (1. - math_ops.pow(self.beta_2, t)) else: v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t)) m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) p_t = p - lr * m_t_bar / (gen_math_ops.sqrt(v_t_prime) + self.epsilon) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def _preprocess_symbolic_input(x, data_format, mode): """Preprocesses a tensor encoding a batch of images. Arguments: x: Input tensor, 3D or 4D. data_format: Data format of the image tensor. mode: One of "caffe", "tf" or "torch". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. - torch: will scale pixels between 0 and 1 and then will normalize each channel with respect to the ImageNet dataset. Returns: Preprocessed tensor. """ global _IMAGENET_MEAN if mode == 'tf': x /= 127.5 x -= 1. return x if mode == 'torch': x /= 255. mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] else: if data_format == 'channels_first': # 'RGB'->'BGR' if K.ndim(x) == 3: x = x[::-1, ...] else: x = x[:, ::-1, ...] else: # 'RGB'->'BGR' x = x[..., ::-1] mean = [103.939, 116.779, 123.68] std = None if _IMAGENET_MEAN is None: _IMAGENET_MEAN = constant_op.constant(-np.array(mean), dtype=K.floatx()) # Zero-center by mean pixel if K.dtype(x) != K.dtype(_IMAGENET_MEAN): x = K.bias_add(x, math_ops.cast(_IMAGENET_MEAN, K.dtype(x)), data_format) else: x = K.bias_add(x, _IMAGENET_MEAN, data_format) if std is not None: x /= std return x
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) with ops.control_dependencies( [state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, K.floatx()) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats beta_1_power = math_ops.pow(self.beta_1, t) beta_2_power = math_ops.pow(self.beta_2, t) rho_t = self.rho_inf - 2.0 * t * beta_2_power / (1.0 - beta_2_power) lr_t = tf.where( rho_t >= 5.0, K.sqrt((rho_t - 4.) * (rho_t - 2.) * self.rho_inf / ((self.rho_inf - 4.) * (self.rho_inf - 2.) * rho_t)) * lr * (K.sqrt(1. - beta_2_power) / (1. - beta_1_power)), self.warmup_coef * lr / (1. - beta_1_power)) for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) if self.amsgrad: vhat_t = math_ops.maximum(vhat, v_t) p_t = p - lr_t * tf.where( rho_t >= 5.0, m_t / (K.sqrt(vhat_t) + self.epsilon), m_t) self.updates.append(state_ops.assign(vhat, vhat_t)) else: p_t = p - lr_t * tf.where(rho_t >= 5.0, m_t / (K.sqrt(v_t) + self.epsilon), m_t) self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def get_updates_ADAM(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] base_lr = self._optimizer.lr if self._optimizer._initial_decay > 0: base_lr = base_lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self._optimizer.decay * math_ops.cast(self._optimizer.iterations, K.dtype(self._optimizer.decay)))) with ops.control_dependencies( [state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self._optimizer.iterations, K.floatx()) base_lr_t = base_lr * ( K.sqrt(1. - math_ops.pow(self._optimizer.beta_2, t)) / (1. - math_ops.pow(self._optimizer.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self._optimizer.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): if self._get_multiplier(p) is None: multiplier = 1.0 else: multiplier = self._get_multiplier(p) m_t = (self._optimizer.beta_1 * m) + (1. - self._optimizer.beta_1) * g v_t = (self._optimizer.beta_2 * v) + (1. - self._optimizer.beta_2) * math_ops.square(g) if self.amsgrad: vhat_t = math_ops.maximum(vhat, v_t) p_t = p - base_lr_t * multiplier * m_t / ( K.sqrt(vhat_t) + self._optimizer.epsilon) self.updates.append(state_ops.assign(vhat, vhat_t)) else: p_t = p - base_lr_t * multiplier * m_t / ( K.sqrt(v_t) + self._optimizer.epsilon) self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 '''Bias corrections according to the Adam paper ''' lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): #################################################### # Add a lr multiplier for vars outside excluded_vars if p.name in self.excluded_vars: multiplied_lr_t = lr_t else: multiplied_lr_t = lr_t * self.lr_mult ################################################### m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) '''Schedule multiplier eta_t = 1 for simple AdamW According to the AdamW paper, eta_t can be fixed, decay, or also be used for warm restarts (AdamWR to come). ''' eta_t = 1. p_t = p - eta_t * (multiplied_lr_t * m_t / (K.sqrt(v_t) + self.epsilon)) if self.weight_decay != 0: '''Normalized weight decay according to the AdamW paper ''' w_d = self.weight_decay * K.sqrt(self.batch_size / (self.samples_per_epoch * self.epochs)) p_t = p_t - eta_t * (w_d * p) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def multi_prototype_categorical_accuracy(y_true_one_hot, y_pred, K): y_pred = ops.convert_to_tensor_v2_with_dispatch(y_pred) y_true = ops.convert_to_tensor_v2_with_dispatch(y_true_one_hot) y_true = math_ops.argmax(y_true, axis=1) y_pred = math_ops.argmax(y_pred, axis=-1) # If the predicted output and actual output types don't match, force cast them # to match. if backend.dtype(y_pred) != backend.dtype(y_true): y_pred = math_ops.cast(y_pred, backend.dtype(y_true)) return math_ops.cast(math_ops.equal(y_true, y_pred // K), backend.floatx())
def sparse_categorical_accuracy(y_true, y_pred): # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))): y_true = array_ops.squeeze(y_true, [-1]) y_pred = math_ops.argmax(y_pred, axis=-1) # If the predicted output and actual output types don't match, force cast them # to match. if K.dtype(y_pred) != K.dtype(y_true): y_pred = math_ops.cast(y_pred, K.dtype(y_true)) return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) t = math_ops.cast(self.iterations, K.floatx()) + 1 lr_t = lr * ( K.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): #################################################### # Add a lr multiplier for vars outside excluded_vars if p.name in self.excluded_vars: multiplied_lr_t = lr_t else: multiplied_lr_t = lr_t * self.lr_mult ################################################### m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) if self.amsgrad: vhat_t = math_ops.maximum(vhat, v_t) p_t = p - multiplied_lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) self.updates.append(state_ops.assign(vhat, vhat_t)) else: p_t = p - multiplied_lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def new_sparse_categorical_accuracy(y_true, y_pred): y_pred_rank = ops.convert_to_tensor(y_pred).get_shape().ndims y_true_rank = ops.convert_to_tensor(y_true).get_shape().ndims # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) if (y_true_rank is not None) and (y_pred_rank is not None) and (len( K.int_shape(y_true)) == len(K.int_shape(y_pred))): y_true = array_ops.squeeze(y_true, [-1]) y_pred = math_ops.argmax(y_pred, axis=-1) # If the predicted output and actual output types don't match, force cast them # to match. if K.dtype(y_pred) != K.dtype(y_true): y_pred = math_ops.cast(y_pred, K.dtype(y_true)) return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.initial_decay > 0: lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) ) with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, K.floatx()) lr_t = gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t)) lower_bound = self.lr_boost * (1. - 1. / (self.gamma * t + 1.)) upper_bound = self.lr_boost * (1. + 1. / (self.gamma * t)) if self.sgdcorr: m_rate = 1. - self.beta_1 / (self.gamma * t + 1.) else: m_rate = 1. - self.beta_1 ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + m_rate * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) if self.amsgrad: vhat_t = math_ops.maximum(vhat, v_t) lr_v = gen_math_ops.reciprocal(gen_math_ops.sqrt(vhat_t) + self.epsilon) self.updates.append(state_ops.assign(vhat, vhat_t)) else: lr_v = gen_math_ops.reciprocal(gen_math_ops.sqrt(v_t) + self.epsilon) lr_bound = gen_math_ops.minimum(gen_math_ops.maximum(lr_t * lr_v, lower_bound), upper_bound) p_t = p - lr * lr_bound * m_t self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def get_updates_Padam(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] base_lr = self._optimizer.learning_rate if self.initial_decay > 0: base_lr = base_lr * (1. / (1. + self.decay * K.cast( self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = base_lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): if self._get_multiplier(p) is None: multiplier = 1.0 else: multiplier = self._get_multiplier(p) m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsgrad: vhat_t = K.maximum(vhat, v_t) denom = (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: denom = (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) # Partial momentum adaption. new_p = p - (lr_t * multiplier * (m_t / (denom**(self.partial * 2)))) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) with ops.control_dependencies( [state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, K.floatx()) lr_t = lr * (gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + self.beta_g * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) if self.amsgrad: vhat_t = math_ops.maximum(vhat, v_t) p_t_ada = p - lr_t * m_t / (gen_math_ops.sqrt(vhat_t) + self.epsilon) self.updates.append(state_ops.assign(vhat, vhat_t)) else: p_t_ada = p - lr_t * m_t / (gen_math_ops.sqrt(v_t) + self.epsilon) p_t_sgd = p - self.lr_boost * lr * m_t self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) new_p = m_switch(self.switch_flag, p_t_sgd, p_t_ada) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def call(self, inputs): dtype = K.dtype(inputs) if dtype != 'int32' and dtype != 'int64': inputs = math_ops.cast(inputs, 'int32') np_ts = lambda nparray: tensorflow.convert_to_tensor(nparray, dtype=dtype) np_inputs = np.array(inputs) #index_pre_ids = np.where(np_inputs<self.input_dim)[0] #index_add_ids = np.where(np_inputs>self.input_dim)[0] index_pre_ids = inputs < self.input_dim index_add_ids = inputs > self.input_dim pre_inputs = inputs[index_pre_ids] add_inputs = inputs[index_add_ids] - self.input_dim pre_out = embedding_ops.embedding_lookup(self.embeddings, pre_inputs) add_out = embedding_ops.embedding_lookup(self.additional_embeddings, add_inputs) result_numpy = np.zeros(shape=(np_inputs.shape[0], self.output_dim), dtype=np.float32) out = tf.zeros(shape=(inputs.shape[0], self.output_dim), dtype=tf.dtypes.float32) return pre_out
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [state_ops.assign_add(self.iterations, 1)] accumulators, delta_accumulators = self._create_all_weights(params) lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * math_ops.cast( self.iterations, backend.dtype(self.decay)))) for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * math_ops.square(g) self.updates.append(state_ops.assign(a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * backend.sqrt(d_a + self.epsilon) / backend.sqrt( new_a + self.epsilon) new_p = p - lr * update # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * math_ops.square(update) self.updates.append(state_ops.assign(d_a, new_d_a)) return self.updates
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False, accumulation_steps=2, **kwargs): super(AccumulatedAdam, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.loss = K.variable(0, dtype='float32', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay self.amsgrad = amsgrad self.accum_iters = K.variable(accumulation_steps, K.dtype(self.iterations)) self.accum_iters_float = K.cast(self.accum_iters, K.floatx())
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') def _l2normalize(v, eps=1e-12): return v / (K.sum(v ** 2) ** 0.5 + eps) def power_iteration(W, u): #Accroding the paper, we only need to do power iteration one time. _u = u _v = _l2normalize(K.dot(_u, K.transpose(W))) _u = _l2normalize(K.dot(_v, W)) return _u, _v W_shape = self.embeddings.shape.as_list() #Flatten the Tensor W_reshaped = K.reshape(self.embeddings, [-1, W_shape[-1]]) _u, _v = power_iteration(W_reshaped, self.u) #Calculate Sigma sigma=K.dot(_v, W_reshaped) sigma=K.dot(sigma, K.transpose(_u)) #normalize it W_bar = W_reshaped / sigma #reshape weight tensor if training in {0, False}: W_bar = K.reshape(W_bar, W_shape) else: with tf.control_dependencies([self.u.assign(_u)]): W_bar = K.reshape(W_bar, W_shape) self.embeddings = W_bar out = K.gather(self.embeddings, inputs) return out
def _prep_predictions(y_true, y_pred): y_pred = ops.convert_to_tensor_v2_with_dispatch(y_pred) y_true = ops.convert_to_tensor_v2_with_dispatch(y_true) y_pred_rank = y_pred.shape.ndims y_true_rank = y_true.shape.ndims # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) if (y_true_rank is not None) and (y_pred_rank is not None) and (len( K.int_shape(y_true)) == len(K.int_shape(y_pred))): y_true = array_ops.squeeze(y_true, [-1]) y_pred = math_ops.argmax(y_pred, axis=-1) # If the predicted output and actual output types don't match, force cast them # to match. if K.dtype(y_pred) != K.dtype(y_true): y_pred = math_ops.cast(y_pred, K.dtype(y_true)) return y_true, y_pred
def call(self, inputs): dtype = K.dtype(inputs) if dtype != 'int32' and dtype != 'int64': inputs = math_ops.cast(inputs, 'int32') out = embedding_ops.embedding_lookup(self.embeddings, inputs) # print("embeddings out: %s" % out) return out
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) #calculate truncated step size global_grad_norm = tf.global_norm(grads)**2 #TODO verify this lr_trunc = tf.cond(0 < global_grad_norm, lambda: tf.divide(loss, global_grad_norm), lambda: lr) lr = tf.minimum(lr,lr_trunc) # momentum shapes = [K.int_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments for p, g, m in zip(params, grads, moments): v = self.momentum * m - lr * g # velocity self.updates.append(state_ops.assign(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def _create_all_weights(self, params): accumulators = [ backend.zeros(backend.int_shape(p), dtype=backend.dtype(p)) for p in params ] self.weights = accumulators return accumulators
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] delta_accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * math_ops.square(g) self.updates.append(state_ops.assign(a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) new_p = p - lr * update # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * math_ops.square(update) self.updates.append(state_ops.assign(d_a, new_d_a)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) # momentum shapes = [K.int_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments for p, g, m in zip(params, grads, moments): v = self.momentum * m - lr * g # velocity self.updates.append(state_ops.assign(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) #calculate the truncated-adagrad stepsize #global_grad_norm = tf.global_norm(grads)**2 #TODO verify this rotated_grad_norm = 0 for i in range(0,len(grads)): rotated_grad_norm += tf.reduce_sum(tf.multiply(tf.truediv(grads[i],tf.sqrt(accumulators[i])+ self.epsilon),grads[i])) #TODO verify this lr_trunc = tf.cond(0 < rotated_grad_norm, lambda: tf.divide(loss, rotated_grad_norm), lambda: lr) lr = tf.minimum(lr, lr_trunc) for p, g, a in zip(params, grads, accumulators): new_a = a + math_ops.square(g) # update accumulator self.updates.append(state_ops.assign(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, backend.dtype(self.decay)))) with ops.control_dependencies( [state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, backend.floatx()) lr_t = lr / (1. - math_ops.pow(self.beta_1, t)) ms, us = self._create_all_weights(params) for p, g, m, u in zip(params, grads, ms, us): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g u_t = math_ops.maximum(self.beta_2 * u, math_ops.abs(g)) p_t = p - lr_t * m_t / (u_t + self.epsilon) self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(u, u_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * math_ops.cast( self.iterations, backend.dtype(self.decay)))) # momentum moments = self._create_all_weights(params) for p, g, m in zip(params, grads, moments): v = self.momentum * m - lr * g # velocity self.updates.append(state_ops.assign(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, K.floatx()) lr_t = lr * ( K.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) if self.amsgrad: vhat_t = math_ops.maximum(vhat, v_t) p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) self.updates.append(state_ops.assign(vhat, vhat_t)) else: p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def sparse_categorical_accuracy(y_true, y_pred): y_true = math_ops.reduce_max(y_true, axis=-1) y_pred = math_ops.argmax(y_pred, axis=-1) # If the expected labels are float, we need to cast the int returned by # argmax to compare. if K.dtype(y_true) == K.floatx(): y_pred = math_ops.cast(y_pred, K.floatx()) return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
def sparse_categorical_accuracy(y_true, y_pred): # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))): y_true = array_ops.squeeze(y_true, [-1]) y_pred = math_ops.argmax(y_pred, axis=-1) # If the expected labels are float, we need to cast the int returned by # argmax to compare. if K.dtype(y_true) == K.floatx(): y_pred = math_ops.cast(y_pred, K.floatx()) return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, K.floatx()) lr_t = lr / (1. - math_ops.pow(self.beta_1, t)) shapes = [K.int_shape(p) for p in params] # zero init of 1st moment ms = [K.zeros(shape) for shape in shapes] # zero init of exponentially weighted infinity norm us = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + us for p, g, m, u in zip(params, grads, ms, us): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g u_t = math_ops.maximum(self.beta_2 * u, math_ops.abs(g)) p_t = p - lr_t * m_t / (u_t + self.epsilon) self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(u, u_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def __init__(self, input_shape=None, batch_size=None, dtype=None, input_tensor=None, sparse=False, name=None, **kwargs): strategy = distribution_strategy_context.get_strategy() if strategy and batch_size is not None and \ distributed_training_utils.global_batch_size_supported(strategy): if batch_size % strategy.num_replicas_in_sync != 0: raise ValueError('The `batch_size` argument value {} cannot be ' 'divisible by number of replicas {}'.format( batch_size, strategy.num_replicas_in_sync)) batch_size = batch_size // strategy.num_replicas_in_sync if 'batch_input_shape' in kwargs: batch_input_shape = kwargs.pop('batch_input_shape') if input_shape and batch_input_shape: raise ValueError('Only provide the input_shape OR ' 'batch_input_shape argument to ' 'InputLayer, not both at the same time.') batch_size = batch_input_shape[0] input_shape = batch_input_shape[1:] if kwargs: raise ValueError('Unrecognized keyword arguments:', kwargs.keys()) if not name: prefix = 'input' name = prefix + '_' + str(backend.get_uid(prefix)) if not dtype: if input_tensor is None: dtype = backend.floatx() else: dtype = backend.dtype(input_tensor) elif input_tensor is not None and input_tensor.dtype != dtype: raise ValueError('`input_tensor.dtype` differs from `dtype`: %s vs. %s' % (input_tensor.dtype, dtype)) super(InputLayer, self).__init__(dtype=dtype, name=name) self.built = True self.sparse = sparse self.batch_size = batch_size self.supports_masking = True if isinstance(input_shape, tensor_shape.TensorShape): input_shape = tuple(input_shape.as_list()) elif isinstance(input_shape, int): input_shape = (input_shape,) if input_tensor is None: if input_shape is not None: batch_input_shape = (batch_size,) + tuple(input_shape) else: batch_input_shape = None graph = backend.get_graph() with graph.as_default(): # In graph mode, create a graph placeholder to call the layer on. if sparse: input_tensor = backend.placeholder( shape=batch_input_shape, dtype=dtype, name=self.name, sparse=True) else: input_tensor = backend.placeholder( shape=batch_input_shape, dtype=dtype, name=self.name) self.is_placeholder = True self._batch_input_shape = batch_input_shape else: if not tf_utils.is_symbolic_tensor(input_tensor): raise ValueError('You should not pass an EagerTensor to `Input`. ' 'For example, instead of creating an ' 'InputLayer, you should instantiate your model and ' 'directly call it on your input.') self.is_placeholder = False self._batch_input_shape = tuple(input_tensor.shape.as_list()) # Create an input node to add to self.outbound_node # and set output_tensors' _keras_history. input_tensor._keras_history = (self, 0, 0) # pylint: disable=protected-access input_tensor._keras_mask = None base_layer.Node( self, inbound_layers=[], node_indices=[], tensor_indices=[], input_tensors=[input_tensor], output_tensors=[input_tensor])
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, input_data=None, expected_output=None, expected_output_dtype=None, fixed_batch_size=False): # generate input data if input_data is None: if not input_shape: raise AssertionError() if not input_dtype: input_dtype = K.floatx() input_data_shape = list(input_shape) for i, e in enumerate(input_data_shape): if e is None: input_data_shape[i] = np.random.randint(1, 4) if all(isinstance(e, tuple) for e in input_data_shape): input_data = [] for e in input_data_shape: input_data.append( (10 * np.random.random(e)).astype(input_dtype)) else: input_data = (10 * np.random.random(input_data_shape)) input_data = input_data.astype(input_dtype) else: if input_shape is None: input_shape = input_data.shape if input_dtype is None: input_dtype = input_data.dtype if expected_output_dtype is None: expected_output_dtype = input_dtype # instantiation layer = layer_cls(**kwargs) # test get_weights , set_weights at layer level weights = layer.get_weights() layer.set_weights(weights) try: expected_output_shape = layer.compute_output_shape(input_shape) except Exception: expected_output_shape = layer._compute_output_shape(input_shape) # test in functional API if isinstance(input_shape, list): if fixed_batch_size: x = [Input(batch_shape=e, dtype=input_dtype) for e in input_shape] else: x = [Input(shape=e[1:], dtype=input_dtype) for e in input_shape] else: if fixed_batch_size: x = Input(batch_shape=input_shape, dtype=input_dtype) else: x = Input(shape=input_shape[1:], dtype=input_dtype) y = layer(x) if not (K.dtype(y) == expected_output_dtype): raise AssertionError() # check with the functional API model = Model(x, y) actual_output = model.predict(input_data) actual_output_shape = actual_output.shape for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: if not (expected_dim == actual_dim): raise AssertionError() if expected_output is not None: assert_allclose(actual_output, expected_output, rtol=1e-3) # test serialization, weight setting at model level model_config = model.get_config() recovered_model = model.__class__.from_config(model_config) if model.weights: weights = model.get_weights() recovered_model.set_weights(weights) _output = recovered_model.predict(input_data) assert_allclose(_output, actual_output, rtol=1e-3) # test training mode (e.g. useful when the layer has a # different behavior at training and testing time). if has_arg(layer.call, 'training'): model.compile('rmsprop', 'mse') model.train_on_batch(input_data, actual_output) # test instantiation from layer config layer_config = layer.get_config() layer_config['batch_input_shape'] = input_shape layer = layer.__class__.from_config(layer_config) # for further checks in the caller function return actual_output
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = math_ops.cast(inputs, 'int32') inputs = array_ops.one_hot(inputs, self.input_dim) return math_ops.tensordot(inputs, self.embeddings, 1)
def __init__(self, input_shape=None, batch_size=None, dtype=None, input_tensor=None, sparse=False, name=None, **kwargs): if 'batch_input_shape' in kwargs: batch_input_shape = kwargs.pop('batch_input_shape') if input_shape and batch_input_shape: raise ValueError('Only provide the input_shape OR ' 'batch_input_shape argument to ' 'InputLayer, not both at the same time.') batch_size = batch_input_shape[0] input_shape = batch_input_shape[1:] if kwargs: raise ValueError('Unrecognized keyword arguments:', kwargs.keys()) if not name: prefix = 'input' name = prefix + '_' + str(backend.get_uid(prefix)) if not dtype: if input_tensor is None: dtype = backend.floatx() else: dtype = backend.dtype(input_tensor) super(InputLayer, self).__init__(dtype=dtype, name=name) self.built = True self.sparse = sparse self.batch_size = batch_size self.supports_masking = True if isinstance(input_shape, tensor_shape.TensorShape): input_shape = tuple(input_shape.as_list()) if input_tensor is None: if input_shape is not None: batch_input_shape = (batch_size,) + tuple(input_shape) else: batch_input_shape = None graph = backend.get_graph() with graph.as_default(): # In graph mode, create a graph placeholder to call the layer on. if sparse: input_tensor = backend.placeholder( shape=batch_input_shape, dtype=dtype, name=self.name, sparse=True) else: input_tensor = backend.placeholder( shape=batch_input_shape, dtype=dtype, name=self.name) self.is_placeholder = True self._batch_input_shape = batch_input_shape else: if not tf_utils.is_symbolic_tensor(input_tensor): raise ValueError('You should not pass an EagerTensor to `Input`. ' 'For example, instead of creating an ' 'InputLayer, you should instantiate your model and ' 'directly call it on your input.') self.is_placeholder = False self._batch_input_shape = tuple(input_tensor.get_shape().as_list()) # Create an input node to add to self.outbound_node # and set output_tensors' _keras_history. input_tensor._keras_history = (self, 0, 0) # pylint: disable=protected-access base_layer.Node( self, inbound_layers=[], node_indices=[], tensor_indices=[], input_tensors=[input_tensor], output_tensors=[input_tensor])
def call(self, inputs): dtype = K.dtype(inputs) if dtype != 'int32' and dtype != 'int64': inputs = math_ops.cast(inputs, 'int32') out = embedding_ops.embedding_lookup(self.embeddings, inputs) return out
def __init__(self, input_shape=None, batch_size=None, dtype=None, input_tensor=None, sparse=False, name=None, **kwargs): if 'batch_input_shape' in kwargs: batch_input_shape = kwargs.pop('batch_input_shape') if input_shape and batch_input_shape: raise ValueError('Only provide the input_shape OR ' 'batch_input_shape argument to ' 'InputLayer, not both at the same time.') batch_size = batch_input_shape[0] input_shape = batch_input_shape[1:] if kwargs: raise ValueError('Unrecognized keyword arguments:', kwargs.keys()) if not name: prefix = 'input' name = prefix + '_' + str(K.get_uid(prefix)) if not dtype: if input_tensor is None: dtype = K.floatx() else: dtype = K.dtype(input_tensor) super(InputLayer, self).__init__(dtype=dtype, name=name) self.built = True self.sparse = sparse self.batch_size = batch_size if isinstance(input_shape, tensor_shape.TensorShape): input_shape = tuple(input_shape.as_list()) if input_tensor is None: if input_shape is not None: batch_input_shape = (batch_size,) + tuple(input_shape) else: batch_input_shape = None if context.executing_eagerly(): # In eager mode, create a temporary placeholder to call the layer on. input_tensor = base_layer.DeferredTensor( # pylint: disable=protected-access shape=batch_input_shape, dtype=dtype, name=self.name) else: # In graph mode, create a graph placeholder to call the layer on. if sparse: input_tensor = array_ops.sparse_placeholder( shape=batch_input_shape, dtype=dtype, name=self.name) else: input_tensor = array_ops.placeholder( shape=batch_input_shape, dtype=dtype, name=self.name) # For compatibility with Keras API. self.is_placeholder = True self._batch_input_shape = batch_input_shape else: # For compatibility with Keras API. self.is_placeholder = False self._batch_input_shape = tuple(input_tensor.get_shape().as_list()) if context.executing_eagerly(): raise ValueError('You should not pass an input tensor when executing ' 'in eager mode. For example, instead of creating an ' 'InputLayer, you should instantiate your model and ' 'directly call it on your input.') # Create an input node to add to self.outbound_node # and set output_tensors' _keras_history. input_tensor._keras_history = (self, 0, 0) # pylint: disable=protected-access base_layer.Node( self, inbound_layers=[], node_indices=[], tensor_indices=[], input_tensors=[input_tensor], output_tensors=[input_tensor])