def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = self.iterations + 1 lr_t = self.lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t)) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - self.get_param_learning_rate_t(p,t,lr_t) * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) lr = self.lr * (1. / (1. + self.decay * self.iterations)) self.updates = [K.update_add(self.iterations, 1)] # momentum shapes = [K.get_variable_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments for p, g, m in zip(params, grads, moments): v = self.momentum * m - self.get_param_learning_rate(p, lr) * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) vs = [K.zeros(K.get_variable_shape(p)) for p in params] self.weights = [self.iterations]+ vs for p, g, v in zip(params, grads, vs): v_t = v + K.square(g) p_t = p - self.lr * g / (v_t + self.xi_2*K.exp(-self.xi_1*v_t) ) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def perceptual_loss(y_true, y_predict): n_batches, y_dim, x_dim, n_channels = K.get_variable_shape(y_true) vgg = vgg16.VGG16(include_top=False, weights='imagenet', input_shape=(y_dim, x_dim, 3)) loss_model = models.Model(inputs=vgg.input, outputs=vgg.get_layer('block3_conv3').output) loss_model.trainable = False loss = 0 for ii in range(4): y_true_slice = tf.expand_dims(y_true[:,:,:,ii],-1) y_true_rgb = tf.image.grayscale_to_rgb(y_true_slice, name=None) y_predict_slice = tf.expand_dims(y_predict[:,:,:,ii],-1) y_predict_rgb = tf.image.grayscale_to_rgb(y_predict_slice, name=None) loss += K.mean(K.square(loss_model(y_true_rgb) - loss_model(y_predict_rgb))) return loss
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) shapes = [K.get_variable_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] gs = [K.zeros(shape) for shape in shapes] moms = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) self.updates.append(K.update_add(self.iterations, 1)) for p, grad, g, mom, a in zip(params, grads, gs, moms, accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(grad) self.updates.append(K.update(a, new_a)) new_g = self.rho * g + (1 - self.rho) * grad self.updates.append(K.update(g, new_g)) #new_p = p - lr * grad / K.sqrt(new_a - K.square(new_g) + self.epsilon) new_mom = self.momentum * mom - lr * grad / K.sqrt( new_a - K.square(new_g) + self.epsilon) new_p = p + new_mom #new_p = p - lr * grad / (K.sqrt(new_a) + self.epsilon) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = self.iterations + 1 lr_t = self.lr / (1. - K.pow(self.beta_1, t)) shapes = [K.get_variable_shape(p) for p in params] # zero init of 1st moment ms = [K.zeros(shape) for shape in shapes] # zero init of exponentially weighted infinity norm us = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + us for p, g, m, u in zip(params, grads, ms, us): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g u_t = K.maximum(self.beta_2 * u, K.abs(g)) p_t = p - self.get_param_learning_rate_t(p,t,lr_t) * m_t / (u_t + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(u, u_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_vae(encoder, decoder, embeding_loss_weight, layer_for_z_sigma, recon_loss_func, constant_sigma): last_but_one_encoder_layer_output = encoder.get_layer(index=-2).output with tf.name_scope('encoder'): # log_sigma = _Dense(bottleneck_size, activation='tanh')(last_but_one_encoder_layer_output) e_in = encoder.inputs if constant_sigma is None: log_sigma = layer_for_z_sigma(last_but_one_encoder_layer_output) log_sigma = Lambda(lambda x: 5 * x, name='z_sigma')(log_sigma) encoder = Model(inputs=e_in, outputs=encoder.outputs + [log_sigma], name='std_vae_encoder_model') else: # Very nasty hack. Takes an input but always returns the same constant value! log_sigma = Lambda(lambda x: K.log(constant_sigma))( last_but_one_encoder_layer_output) with tf.name_scope('full_VAE'): mu = encoder.outputs[0] bottleneck_size = K.get_variable_shape(encoder.outputs[0])[-1] z = Lambda( loss_functions.get_sampler(bottleneck_size))([mu, log_sigma]) vae_out = decoder(z) vae = Model(inputs=e_in, outputs=vae_out, name='vae') vae.compile( optimizer=Adam(lr=1e-4), loss=loss_functions.total_loss(mu, log_sigma, kl_weight=embeding_loss_weight, recon_loss_func=recon_loss_func), metrics=[loss_functions.loss_kl_divergence(mu, log_sigma), 'mse']) vae.summary() return encoder, decoder, vae
def get_wae(encoder, decoder, embeding_loss_weight, batch_size, recon_loss_func): with tf.name_scope('full_WAE'): bottleneck_size = K.get_variable_shape(encoder.outputs[0])[-1] opts = { 'mmd_kernel': 'IMQ', 'pz_scale': 1.0, 'pz': 'normal', 'zdim': bottleneck_size } e_in = encoder.inputs if bottleneck_size == 64: # Dirty hack as WAE for CELEBA is trained usng noisy input as suggested in WAE paper others are not Make it uniform e_in_noisy = Lambda(lambda x: x + K.clip( K.random_normal(K.shape(x), mean=0.0, stddev=0.01), -0.01, 0.01 ))(e_in[0]) q_zs = encoder(e_in_noisy) else: q_zs = encoder.outputs[0] vae_out = decoder(q_zs) vae = Model(inputs=e_in, outputs=vae_out, name='vae') vae.compile( optimizer=Adam(lr=1e-3), loss=loss_functions.total_loss(opts, q_zs, batch_size, embeding_loss_weight, recon_loss_func), metrics=[loss_functions.mmd_loss(q_zs, batch_size, opts), 'mse']) vae.summary() return encoder, decoder, vae
def tversky_loss(y_true, y_pred, alpha=0.5, beta=0.8, weight=(0.25, 1, 1, 1.5)): # , 3, 3 """ :param y_true: :param y_pred: :param y_pred: :param alpha: # 待修改项,调节假阳,默认为0.5 :param beta: # 待修改项,调节假阴,默认为0.5 :param weight: # 待修改项, 人为设定权重 :return: """ class_n = K.get_variable_shape(y_pred)[-1] # 待修改项,总分类数 print('number of class %d' % class_n) total_loss = 0. for i in range(class_n): temp_true = y_true[..., i] # G temp_pred = y_pred[..., i] # P TP = K.sum(temp_true * temp_pred) # G∩P,真阳 FN = K.sum(temp_true) - K.sum(temp_true * temp_pred) # G-(G∩P),假阴 FP = K.sum(temp_pred) - K.sum(temp_true * temp_pred) # P-(G∩P),假阳 temp_loss = 1 - (TP + 1e-10) / (TP + alpha * FN + beta * FP + 1e-10) if weight is not None: temp_loss *= weight[i] total_loss += temp_loss tversky_loss = total_loss / sum(weight) return tversky_loss
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) shapes = [K.get_variable_shape(p) for p in params] alphas = [ K.variable(K.ones(shape) * self.init_alpha) for shape in shapes ] old_grads = [K.zeros(shape) for shape in shapes] self.weights = alphas + old_grads self.updates = [] for p, grad, old_grad, alpha in zip(params, grads, old_grads, alphas): grad = K.sign(grad) new_alpha = K.switch( K.greater(grad * old_grad, 0), K.minimum(alpha * self.scale_up, self.max_alpha), K.switch(K.less(grad * old_grad, 0), K.maximum(alpha * self.scale_down, self.min_alpha), alpha)) grad = K.switch(K.less(grad * old_grad, 0), K.zeros_like(grad), grad) new_p = p - grad * new_alpha # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) self.updates.append(K.update(alpha, new_alpha)) self.updates.append(K.update(old_grad, grad)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] mems = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs + mems self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, m, v, mem in zip(params, grads, ms, vs, mems): r = 1. / (1. + mem) new_m = (1. - r) * m + r * g new_v = (1. - r) * v + r * K.square(g) denoise = K.square(new_m) / (new_v + self.epsilon) new_p = p - g * K.minimum(lr, denoise) / (K.sqrt(new_v) + self.epsilon) new_mem = 1. + mem * (1. - denoise) self.updates.append(K.update(m, new_m)) self.updates.append(K.update(v, new_v)) self.updates.append(K.update(mem, new_mem)) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) shapes = [K.get_variable_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] delta_accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators self.updates = [] for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) new_p = p - get_learing_rate(p,self.lr) * update # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update) self.updates.append(K.update(d_a, new_d_a)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.get_variable_shape(p) for p in params] self.updates.append(K.update_add(self.iterations, 1)) g1s = [K.zeros(shape) for shape in shapes] g2s = [K.zeros(shape) for shape in shapes] mems = [K.ones(shape) for shape in shapes] lr = self.lr if self.inital_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) self.weights = [self.iterations] + g1s + g2s + mems for p, g, g1, g2, m in zip(params, grads, g1s, g2s, mems): r = 1. / (m + 1) new_g1 = (1. - r) * g1 + r * g new_g2 = (1. - r) * g2 + r * K.square(g) # update accumulators self.updates.append(K.update(g1, new_g1)) self.updates.append(K.update(g2, new_g2)) new_p = p - g * K.minimum( lr, K.square(new_g1) / (new_g2 + self.epsilon)) / (K.sqrt(new_g2) + self.epsilon) new_m = 1 + m * (1 - K.square(new_g1) / (new_g2 + self.epsilon)) # update rho self.updates.append(K.update(m, new_m)) # apply constraints if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def sparsity_level(x): _shape = K.get_variable_shape(x) shape = K.shape(x) total = K.cast(K.prod(shape[1:]), K.floatx()) return K.reshape( K.sum(K.cast(x > 0.0, K.floatx()), axis=range(1, len(_shape))), (-1, 1)) / total
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.inital_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) ** 0.75 self.updates .append(K.update_add(self.iterations, 1)) # momentum shapes = [K.get_variable_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments #zip后跟迭代器,返回元组列表 for p, g, m in zip(params, grads, moments): v = self.momentum * m - lr * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def adadelta(mlsl_obj, dist, parameters, gradients, rho=0.95, eps=1e-6): # create variables to store intermediate updates if mlsl_obj != None: reduce_op=AllReduce(mlsl_obj,dist,1) shapes = [K.get_variable_shape(p) for p in parameters] gradients_sq = [K.zeros(shape) for shape in shapes] deltas_sq = [K.zeros(shape) for shape in shapes] if mlsl_obj != None: # collect grad first gradients = [reduce_op(grad) for grad in gradients] # calculates the new "average" delta for the next iteration gradients_sq_new = [rho * g_sq + (1 - rho) * K.square(g) for g_sq, g in izip(gradients_sq, gradients)] # calculates the step in direction. The square root is an approximation to getting the RMS for the average value deltas = [(K.sqrt(d_sq + eps) / K.sqrt(g_sq + eps)) * grad for d_sq, g_sq, grad in izip(deltas_sq, gradients_sq_new, gradients)] # calculates the new "average" deltas for the next step. deltas_sq_new = [rho * d_sq + (1 - rho) * K.square(d) for d_sq, d in izip(deltas_sq, deltas)] gradient_sq_updates = [K.update(p, new_p) for (p, new_p) in zip(gradients_sq, gradients_sq_new)] deltas_sq_updates = [K.update(p, new_p) for (p, new_p) in zip(deltas_sq, deltas_sq_new)] parameters_updates = [K.update(p, p - d) for p, d in izip(parameters, deltas)] return gradient_sq_updates + deltas_sq_updates + parameters_updates
def brier_skill(y_true, y_pred, use_true): """ Calculate Brier score, relative to either true class or predicted class if use_true = True, it's relative to the y_true class if use_true = False, it's relative to the y_pred class (how confident are we in the prediction, no knowledge of true class) """ do_eval = False # We use this function later on for static values if isinstance(y_pred, np.ndarray): do_eval = True y_pred = K.variable(y_pred) num_classes = K.get_variable_shape(y_pred)[1] if use_true: y_pick = y_true else: y_pick = y_pred pick_classes = K.argmax(y_pick, axis=1) brier_out = _brier(num_classes, y_pred, pick_classes) if do_eval: brier_out = K.get_value(brier_out) return brier_out
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) t = self.iterations + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] mems = [K.zeros(shape) for shape in shapes] denoises = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs + mems + denoises for p, g, m, v, mem, denoise in zip(params, grads, ms, vs, mems, denoises): r = K.minimum(0.2, K.maximum(0.005, 1. / (1. + mem))) mem_t = 1. / r - 1. m_t = (1. - r) * m + r * g v_t = (1. - r) * v + r * K.square(g) denoise_t = 0.99 * denoise + 0.01 * K.square(m_t) / (v_t + self.epsilon) p_t = p - g * denoise_t / (K.sqrt(v_t) + self.epsilon) mem_t = K.maximum(0., 1. + mem_t * (1. - denoise_t)) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) self.updates.append(K.update(mem, mem_t)) self.updates.append(K.update(denoise, denoise_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] mems = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs + mems for p, g, m, v, mem in zip(params, grads, ms, vs, mems): r = 1. / (1. + mem) m_t = (1. - r) * m + r * g v_t = (1. - r) * v + r * K.square(g) denoise = K.square(m_t) / (v_t + self.epsilon) p_t = p - g * K.minimum(lr, denoise) / (K.sqrt(v_t) + self.epsilon) mem_t = 1. + mem * (1. - denoise) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) self.updates.append(K.update(mem, mem_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def create_cnn_dense(seq_length, word_num, embedding_dim, x_dim, sparse=True): # CNN part,input:text sequence cnn_input = Input(shape=(seq_length, )) embed = Embedding( input_shape=(seq_length, ), input_dim=word_num + 1, output_dim=embedding_dim, embeddings_initializer=initializers.glorot_uniform(seed=9))(cnn_input) cnn_hidden = Conv1D(filters=128, kernel_size=5, activation='relu')(embed) cnn_hidden = MaxPooling1D( pool_size=K.get_variable_shape(cnn_hidden)[1])(cnn_hidden) cnn_hidden = Flatten()(cnn_hidden) cnn_hidden = BatchNormalization()(cnn_hidden) # Dense pasrt, input:tf-idf dense_input = Input(shape=(x_dim, ), sparse=sparse) dense_hidden = Dense(units=64, activation='relu')(dense_input) dense_hidden = BatchNormalization()(dense_hidden) # merge the two parts above hidden = concatenate([dense_hidden, cnn_hidden]) # hidden = Dropout(0.3)(hidden) hidden = Dense(units=128, activation='tanh')(hidden) hidden = Dropout(0.3)(hidden) hidden = BatchNormalization()(hidden) hidden = Dense(units=64, activation='tanh')(hidden) hidden = BatchNormalization()(hidden) hidden = Dense(units=32, activation='tanh')(hidden) output = Dense(units=1, activation='sigmoid')(hidden) model = Model(inputs=[cnn_input, dense_input], outputs=output) return model
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) self.updates.append(K.update_add(self.iterations, 1)) # momentum shapes = [K.get_variable_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments for p, g, m in zip(params, grads, moments): if self.noise > 0: g = g + K.random_normal(g.shape, mean=0, stddev=(self.noise)) v = self.momentum * m - lr * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) self.sanitized_gradients = None return self.updates
def get_weightnorm_params_and_grads(p, g): ps = K.get_variable_shape(p) # construct weight scaler: V_scaler = g/||V|| V_scaler_shape = (ps[-1], ) # assumes we're using tensorflow! print ps print V_scaler_shape #import code #code.interact(local=locals()) V_scaler = K.ones( V_scaler_shape) # init to ones, so effective parameters don't change # get V parameters = ||V||/g * W norm_axes = [i for i in range(len(ps) - 1)] V = p / tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) # split V_scaler into ||V|| and g parameters V_norm = tf.sqrt(tf.reduce_sum(tf.square(V), norm_axes)) g_param = V_scaler * V_norm # get grad in V,g parameters grad_g = tf.reduce_sum(g * V, norm_axes) / V_norm grad_V = tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) * \ (g - tf.reshape(grad_g / V_norm, [1] * len(norm_axes) + [-1]) * V) return V, V_norm, V_scaler, g_param, grad_g, grad_V
def get_pad_shape(target, refer): ch = (K.get_variable_shape(refer)[1] - K.get_variable_shape(target)[1]) assert (ch >= 0) if ch % 2 != 0: ch1, ch2 = int(ch / 2), int(ch / 2) + 1 else: ch1, ch2 = int(ch / 2), int(ch / 2) cw = (K.get_variable_shape(refer)[2] - K.get_variable_shape(target)[2]) assert (cw >= 0) if cw % 2 != 0: cw1, cw2 = int(cw / 2), int(cw / 2) + 1 else: cw1, cw2 = int(cw / 2), int(cw / 2) return (ch1, ch2), (cw1, cw2)
def set_param(self, p): if self.p is not None: raise Exception('Regularizers cannot be reused. ' 'Instantiate one regularizer per layer.') self.p = p # make matrix p_shape = K.get_variable_shape(p) w_rows = np.prod(p_shape[0:3]) # todo: append theano pattern w_cols = self.b.shape[1] if self.wtype == 'random': self.w = np.random.randn(w_rows, w_cols) elif self.wtype == 'direct': self.w = np.zeros((w_rows, w_cols), dtype=None) rand_idx_gen = random_index_generator(w_rows) for col in range(w_cols): self.w[next(rand_idx_gen)][col] = 1. elif self.wtype == 'diff': self.w = np.zeros((w_rows, w_cols), dtype=None) rand_idx_gen = random_index_generator(w_rows) for col in range(w_cols): self.w[next(rand_idx_gen)][col] = 1. self.w[next(rand_idx_gen)][col] = -1. else: raise Exception('wtype="{}" is not supported'.format(self.wtype))
def gan_loss(self, d_logit_real, d_logit_fake): """ define loss function """ d_target_real = K.ones_like(d_logit_real) d_target_fake = K.zeros_like(d_logit_fake) if self.label_flipping > 0: # some idx replace to zeros flip_val = K.random_binomial(K.get_variable_shape(d_logit_real), p=self.label_flipping) d_target_real -= flip_val # some idx replace t0 ones flip_val = K.random_binomial(K.get_variable_shape(d_logit_fake), p=self.label_flipping) d_target_fake += flip_val #if self.oneside_smooth is True: if self.oneside_smooth: # some idx replace 0.9-1.0 smooth_val = K.random_uniform_variable( K.get_variable_shape(d_logit_real), low=0.9, high=0.99) d_target_real *= smooth_val # some idx replace 0.9-1.0 (When label_flipping = 0, no processing ) smooth_val = K.random_uniform_variable( K.get_variable_shape(d_logit_fake), low=0.9, high=0.99) d_target_fake *= smooth_val d_loss_real = K.mean(K.binary_crossentropy(output=d_logit_real, target=d_target_real, from_logits=True), axis=1) d_loss_fake = K.mean(K.binary_crossentropy(output=d_logit_fake, target=d_target_fake, from_logits=True), axis=1) d_loss = K.mean(d_loss_real + d_loss_fake) g_loss = K.mean( K.binary_crossentropy(output=d_logit_fake, target=K.ones_like(d_logit_fake), from_logits=True)) return d_loss, g_loss
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) shapes = [K.get_variable_shape(p) for p in params] old_grads = [K.zeros(shape) for shape in shapes] self.updates = [] new_zeta = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='new_zeta_' + str(i)) for (i, p) in enumerate(params) ] Z = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='Z_' + str(i)) for (i, p) in enumerate(params) ] theta = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='theta_' + str(i)) for (i, p) in enumerate(params) ] for param, grad, old_grad, new_zeta, Z, theta in zip( params, grads, old_grads, new_zeta, Z, theta): new_step = K.switch( K.greater(grad * old_grad, 0), K.minimum(new_zeta * self.scale_up, self.zeta_max), K.maximum(new_zeta * self.scale_down, self.zeta_min)) Z_updated = ((self.alpha * Z) + ((1. - self.alpha) * new_step) ) #was new_zeta before theta_updated = ((self.alpha * theta) + ((1. - self.alpha) * ((grad * self.t)**2))) new_t = (-self.lr * Z_updated * grad * (math.sqrt(1 / theta_updated))) new_param = param + new_t # Apply constraints if param in constraints: c = constraints[param] new_param = c(new_param) self.updates.append(K.update(param, new_param)) self.updates.append(K.update(old_grad, grad)) self.updates.append(K.update(new_zeta, new_step)) self.updates.append(K.update(Z, Z_updated)) self.updates.append(K.update(theta, theta_updated)) return self.updates
def update_state(losstot, x, state_c, state_h): with tf.name_scope("gradients"): shapes = [K.get_variable_shape(p) for p in x] grads = nest.flatten([ K.gradients(losstot[a], x[num_var[b]:num_var[b] + num_var[b + 1]]) for a, b in zip(range(len(losstot)), range( len(num_var) - 1)) ]) grads = [tf.stop_gradient(g) for g in grads] with tf.variable_scope('MetaNetwork'): cell_count = 0 delta = [[] for _ in range(len(grads))] S_C_out = [[] for _ in range(len(opt_var))] S_H_out = [[] for _ in range(len(opt_var))] for i in range(len(grads)): g = grads[i] n_param = int(np.prod(shapes[i])) flat_g = tf.reshape(g, [n_param, -1]) flat_g_mod = tf.reshape(Preprocess.log_encode(flat_g), [n_param, -1]) rnn_new_c = [[] for _ in range(num_layer)] rnn_new_h = [[] for _ in range(num_layer)] # Apply RNN cell for each parameter with tf.variable_scope("RNN"): rnn_state_c = [[] for _ in range(num_layer)] rnn_state_h = [[] for _ in range(num_layer)] input_loop = [] for ii in range(n_param): input_loop.append([ flat_g_mod[ii:ii + 1, :], state_c[i][ii], state_h[i][ii], cell_count ]) pool = multiprocessing.Pool() rnn_outputs, state_out, cell_count = zip( *pool.map(rnn_cell, input_loop)) for j in range(num_layer): rnn_state_c[j].append(state_out[j].c) rnn_state_h[j].append(state_out[j].h) # Form output as tensor rnn_outputs = tf.reshape(tf.stack(rnn_outputs, axis=1), g.get_shape()) for j in range(num_layer): rnn_new_c[j] = tf.reshape( tf.stack(rnn_state_c[j], axis=1), (n_param, hidden_size)) rnn_new_h[j] = tf.reshape( tf.stack(rnn_state_h[j], axis=1), (n_param, hidden_size)) # Dense output from state delta[i] = rnn_outputs S_C_out[i] = rnn_new_c S_H_out[i] = rnn_new_h return delta, S_C_out, S_H_out
def crnn_melspect_2D(input_shape): pair_two = [2, 2] pair_three = [3, 3] pair_four = [4, 4] num_classes = 10 drop_ratio = 0.1 channel_axis = 1 # activation_func = LeakyReLU() activation_func = Activation('relu') inputs = Input(input_shape) print('input_shape: ', input_shape) # Convolutional block_1 conv1 = Conv2D(64, kernel_size=pair_three, name='conv1')(inputs) bn1 = BatchNormalization(axis=channel_axis, mode=0, name='bn1')(conv1) elu1 = ELU()(bn1) pool1 = MaxPooling2D(pool_size=pair_two, strides=pair_two, name='pool1')(elu1) dr1 = Dropout(drop_ratio, name='dropout1')(pool1) # Convolutional block_2 conv2 = Conv2D(128, kernel_size=pair_three, name='conv2')(dr1) bn2 = BatchNormalization(axis=channel_axis, mode=0, name='bn2')(conv2) elu2 = ELU()(bn2) pool2 = MaxPooling2D(pool_size=pair_two, strides=pair_two, name='pool2')(elu2) dr2 = Dropout(drop_ratio, name='dropout2')(pool2) # Convolutional block_3 conv3 = Conv2D(128, kernel_size=pair_three, name='conv3')(dr2) bn3 = BatchNormalization(axis=channel_axis, mode=0, name='bn3')(conv3) elu3 = ELU()(bn3) pool3 = MaxPooling2D(pool_size=pair_three, strides=pair_three, name='pool3')(elu3) dr3 = Dropout(drop_ratio, name='dropout3')(pool3) # Convolutional block_4 conv4 = Conv2D(128, kernel_size=pair_three, name='conv4')(dr3) bn4 = BatchNormalization(axis=channel_axis, mode=0, name='bn4')(conv4) elu4 = ELU()(bn4) pool4 = MaxPooling2D(pool_size=pair_four, strides=pair_four, name='pool4')(elu4) dr4 = Dropout(drop_ratio, name='dropout4')(pool4) print('dr4shape:', K.get_variable_shape(dr4)) # Reshaping # x = Permute((3, 1, 2))(dr4) rs = Reshape((25, 128))(dr4) # 15, 128 # GRU block 1, 2, output gru1 = GRU(32, return_sequences=True, name='gru1')(rs) gru2 = GRU(32, return_sequences=False, name='gru2')(gru1) reg = Dropout(0.3)(gru2) dense2 = Dense(num_classes, activation='sigmoid', name='output')(reg) model = Model(inputs=[inputs], outputs=[dense2]) model.summary() return model
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) # 计算梯度 g_t self.updates = [K.update_add(self.iterations, 1)] # 迭代次数加 1 lr = self.lr if self.initial_decay > 0: # 如果初始学习速率衰减因子不为0, # 则随着迭代次数增加,学习速率将不断减小 lr *= (1. / (1. + self.decay * self.iterations)) t = self.iterations + 1 # 就是公式中的 t # 有偏估计到无偏估计的校正值 # 这里将循环内的公共计算提到循环外面,提高速度 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) shapes = [K.get_variable_shape(p) for p in params] # 获得权值形状 ms = [K.zeros(shape) for shape in shapes] # 一阶矩估计初始值 vs = [K.zeros(shape) for shape in shapes] # 二阶矩估计初始值 self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g # 一阶矩估计 v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) # 二阶矩估计 p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) # 权值更新 self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # 对权值加约束 if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def samplewise_normalize(img): N, H, W, D = K.get_variable_shape(img) tmp = K.flatten(img) numerator = tmp - K.mean(tmp, 0) denominator = K.std(tmp, 0) + 1e-8 normalized = numerator / denominator return K.reshape(normalized, (-1, H, W, D))
def on_train_begin(self, logs=None): logs = logs or {} # initialize proper learning rates # handle edge case of multiple learning rates lr_shape = K.get_variable_shape(self.model.optimizer.lr) if len(lr_shape) > 0: self.min_lr = np.full(lr_shape, self._get_lr()) K.set_value(self.model.optimizer.lr, self.min_lr)
def add_weightnorm_param_updates(updates, new_V_param, new_g_param, W, V_scaler): ps = K.get_variable_shape(new_V_param) norm_axes = [i for i in range(len(ps) - 1)] # update W and V_scaler new_V_norm = tf.sqrt(tf.reduce_sum(tf.square(new_V_param), norm_axes)) new_V_scaler = new_g_param / new_V_norm new_W = tf.reshape(new_V_scaler, [1] * len(norm_axes) + [-1]) * new_V_param updates.append(K.update(W, new_W)) updates.append(K.update(V_scaler, new_V_scaler))
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) # lr_t = lr * (1. - K.pow(self.beta_1, t)) / (1. - K.pow(self.beta_2, t)) # lr_t = lr shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] vhats = [K.zeros(shape) for shape in shapes] ls = [K.zeros(shape) for shape in shapes] #ws = [K.zeros(shape) for shape in shapes] #whats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs + vhats + ls for p, g, m, v, vhat, l in zip(params, grads, ms, vs, vhats, ls): m_t = g - v v_t = v + self.beta_1 * m_t vhat_t = self.beta_2 * vhat + (1 - self.beta_2) * K.square(m_t) #l_t = K.square(v_t + K.sqrt(vhat_t)) - w #w_t = w + self.beta_2 * l_t #l_t = (self.beta_3 * l) + (1. - self.beta_3) * K.square(K.square(v_t + K.pow((1 - self.beta_3), t) * K.sqrt(vhat_t))) l_t = (self.beta_3 * l) + (1. - self.beta_3) * K.square(g) #w_t = (self.beta_3 * w) + (1. - self.beta_3) * p #what_t = (1-self.beta_2) * (what + self.beta_2 * K.square(l_t)) #p_t = p - lr_t * (v_t) / (K.sqrt(w_t) + K.sqrt(K.sqrt(what_t))) p_t = p - lr_t * (v_t + K.pow( (1 - self.beta_2), t) * K.sqrt(vhat_t)) / (K.sqrt(l_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) self.updates.append(K.update(vhat, vhat_t)) self.updates.append(K.update(l, l_t)) #self.updates.append(K.update(w, w_t)) #self.updates.append(K.update(what, what_t)) new_p = p_t # Apply constraint if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) shapes = [K.get_variable_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [] for p, g, a in zip(params, grads, accumulators): new_a = a + K.square(g) # update accumulator self.updates.append(K.update(a, new_a)) new_p = p - get_learing_rate(p, self.lr) * g / (K.sqrt(new_a) + self.epsilon) # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = self.iterations + 1 # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * (1. - 0.5 * (K.pow(0.96, t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * (K.pow(0.96, (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): # the following equations given in [1] g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + (1. - self.beta_1) * g m_t_prime = m_t / (1. - m_schedule_next) v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g) v_t_prime = v_t / (1. - K.pow(self.beta_2, t)) m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) p_t = p - get_learing_rate(p, self.lr) * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = self.iterations + 1 loss_prev = K.variable(0) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] ch_fact_lbound = K.switch(K.greater(loss, loss_prev), 1+self.thl, 1/(1+self.thu)) ch_fact_ubound = K.switch(K.greater(loss, loss_prev), 1+self.thu, 1/(1+self.thl)) loss_ch_fact = loss / loss_prev loss_ch_fact = K.switch(K.lesser(loss_ch_fact, ch_fact_lbound), ch_fact_lbound, loss_ch_fact) loss_ch_fact = K.switch(K.greater(loss_ch_fact, ch_fact_ubound), ch_fact_ubound, loss_ch_fact) loss_hat = K.switch(K.greater(t, 1), loss_prev * loss_ch_fact, loss) d_den = K.switch(K.greater(loss_hat, loss_prev), loss_prev, loss_hat) d_t = (self.beta_3 * self.d) + (1. - self.beta_3) * K.abs((loss_hat - loss_prev) / d_den) d_t = K.switch(K.greater(t, 1), d_t, 1.) self.updates.append(K.update(self.d, d_t)) for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g mhat_t = m_t / (1. - K.pow(self.beta_1, t)) self.updates.append(K.update(m, m_t)) v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) vhat_t = v_t / (1. - K.pow(self.beta_2, t)) self.updates.append(K.update(v, v_t)) p_t = p - (self.lr / (1. + (self.iterations * self.decay))) * mhat_t / ((K.sqrt(vhat_t) * d_t) + self.epsilon) self.updates.append(K.update(p, p_t)) self.updates.append(K.update(loss_prev, loss_hat)) return self.updates