def on_epoch_end(self, epoch, logs=None): # if epoch < 50: return # 评估较慢,前50个epoch先不进行评估 acc, f1, final = self.evaluate() print(f'acc={acc},f1={f1},final={final}') self.metrics.append((epoch, acc, f1, final)) json.dump(self.metrics, open('train.log', 'w'), indent=4) if final > self.best: self.best = final self.model.save_weights('best_model.weights') print('learning rate: %s' % (tk.eval(self.model.optimizer.lr))) print('acc: %.4f, f1: %.4f, final: %.4f, best final: %.4f\n' % (acc, f1, final, self.best)) if epoch + 1 == 30 or (self.stage == 0 and epoch > 15 and (final < 0.5 or np.argmax(self.metrics, 0)[3] < len(self.metrics) - 5)): """达到30个epoch,或者final开始下降到0.5以下(开始发散), 或者连续5个epoch都没提升,就降低学习率。 """ self.stage = 1 self.model.load_weights('best_model.weights') tk.set_value(self.model.optimizer.lr, 1e-4) tk.set_value(self.model.optimizer.iterations, 0) opt_weights = tk.batch_get_value(self.model.optimizer.weights) opt_weights = [w * 0. for w in opt_weights] tk.batch_set_value(zip(self.model.optimizer.weights, opt_weights))
def load_weights(model, weights_path): """Load weights from Caffe models.""" print("Loading weights...") if h5py is None: raise ImportError('`load_weights` requires h5py.') f = h5py.File(weights_path, mode='r') # New file format. layer_names = [n.decode('utf8') for n in f.attrs['layer_names']] # Reverse index of layer name to list of layers with name. index = {} for layer in model.layers: if layer.name: index.setdefault(layer.name, []).append(layer) # We batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for k, name in enumerate(layer_names): g = f[name] weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] weight_values = [g[weight_name] for weight_name in weight_names] for layer in index.get(name, []): symbolic_weights = layer.weights # Set values. for i in range(len(weight_values)): weight_value_tuples.append( (symbolic_weights[i], weight_values[i])) K.batch_set_value(weight_value_tuples) return layer_names
def load(kmdl, path): # load the parts which have identical names and shapes: std->std; lhc-formable->lhc-formable kmdl.load_weights(path, True, True) file0 = file = h5py.File(path, 'r') if 'layer_names' not in file.attrs and 'model_weights' in file: file = file['model_weights'] from tensorflow.python.keras.saving.hdf5_format import _legacy_weights, load_attributes_from_hdf5_group, \ preprocess_weights_for_loading if 'keras_version' in file.attrs: original_keras_version = file.attrs['keras_version'] # .decode('utf8') else: original_keras_version = '1' if 'backend' in file.attrs: original_backend = file.attrs['backend'] # .decode('utf8') else: original_backend = None layer_names = load_attributes_from_hdf5_group(file, 'layer_names') index = {} for layer in kmdl.layers: if layer.name: index.setdefault(layer.name, []).append(layer) # load the remaining parts weight_value_tuples = [] for k, name in enumerate(layer_names): g = file[name] weight_names = load_attributes_from_hdf5_group(g, 'weight_names') weight_values = [ np.asarray(g[weight_name]) for weight_name in weight_names ] layer = index.get(name, []) if len(layer) == 0: continue assert len(layer) == 1 layer = layer[0] if type(layer) in (Conv2dLhcf, Conv2dLhcr): weight_values = preprocess_weights_for_loading( layer, weight_values, original_keras_version, original_backend) wdict = dict(zip(weight_names, weight_values)) symbolic_weights = _legacy_weights(layer) symbol_names = [s.name for s in symbolic_weights] sdict = dict(zip(symbol_names, symbolic_weights)) for pname in Conv2dLhcf.VAR_NAMES[:3]: # symb = [__ for _, __ in sdict.items() if _[:-2].endswith(pname)] symb = [__ for _, __ in sdict.items() if pname in _] # wght = [__ for _, __ in wdict.items() if _[:-2].endswith(pname)] wght = [__ for _, __ in wdict.items() if pname in _] assert len(symb) == 1 and len(wght) <= 1 if len(wght) == 1: weight_value_tuples.append((symb[0], wght[0])) KB.batch_set_value(weight_value_tuples) file0.close()
def load_weights_by_name(f, layers): """Load the weights by name from the h5py file to the model""" # If f is not an h5py thing try to open it if not isinstance(f, (h5py.File, h5py.Group)): with h5py.File(f, "r") as h5f: return load_weights_by_name(h5f, layers) # Extract all the weights from the layers/model if not isinstance(layers, list): layers = layers.layers weights = dict( reduce(lambda a, x: a + [(w.name, w) for w in x.weights], layers, [])) # Loop through all the possible layer weights in the file and make a list # of updates updates = [] updated = [] for name, weight in weights_from_hdf5(f): for n in possible_weight_names(name): if n in weights: updates.append((weights[n], weight)) updated.append(n) break K.batch_set_value(updates) return updated
def apply_mask(self, model, masks): tuples = [] for (weight, mask) in list(zip(model.trainable_weights, masks)): masked_weight = weight * tf.cast(mask, weight.dtype.base_dtype) tuples.append((weight, masked_weight)) K.batch_set_value(tuples)
def load_gsc_weights_from_pytorch(model_tf, model_pt, weights_map=None): """ Update tensorflow model weights using pre-trained GSC pytorch model :param model_tf: Untrained GSC model (tensorflow). :type model_tf: :class:`nupic.tensorflow.models.GSCSparseCNN` :param model_pt: Pre-trained GSC model (pytorch). :type model_pt: :class:`nupic.torch.models.GSCSparseCNN` :param weights_map: Dictionay mapping tensorflow variables to pytorch state :type weights_map: dict """ if not isinstance(model_pt, nupic.torch.models.GSCSparseCNN): raise NotImplementedError() if weights_map is None: weights_map = _GSC_SPARSE_MAP state_dict = model_pt.state_dict() batch_values = [] for var in model_tf.variables: name = var.name if name in weights_map: tensor, transform = weights_map[name] value = state_dict[tensor].data.numpy() if transform is not None: value = transform(value) batch_values.append((var, value)) else: TF_LOGGER.warn("Unknown variable: %s", var.name) K.batch_set_value(batch_values)
def apply_ema_weights(self): """ store origin model weights, then apply the ema_weights to model """ self.old_weights = K.batch_get_value(self.model.weights) ema_weights = K.batch_get_value(self.ema_weights) K.batch_set_value(zip(self.model.weights, ema_weights))
def reset_states(self): if isinstance(self.thresholds, list): num_thresholds = len(self.thresholds) else: num_thresholds = len(list(self.thresholds)) K.batch_set_value([(v, np.zeros((num_thresholds, ))) for v in self.variables])
def set_model(self, model): """绑定模型,并初始化参数 """ super(ExponentialMovingAverage, self).set_model(model) self.ema_weights = [K.zeros(K.shape(w)) for w in model.weights] self.old_weights = K.batch_get_value(model.weights) K.batch_set_value(zip(self.ema_weights, self.old_weights)) self.updates = [] for w1, w2 in zip(self.ema_weights, model.weights): op = K.moving_average_update(w1, w2, self.momentum) self.updates.append(op)
def set_kernal(self, kernal_values): params = self.kernel if not params: return weight_value_tuples = [] param_values = K.batch_get_value(params) if param_values.shape != kernal_values.shape: raise ValueError('Layer kernel shape ' + str(param_values.shape) + ' not compatible with ' 'provided kernel shape ' + str(kernal_values.shape)) weight_value_tuples.append((params, kernal_values)) K.batch_set_value(weight_value_tuples)
def set_bias(self, kernal_bias): if not self.use_bias: return params = self.bias if not params: return weight_value_tuples = [] param_values = K.batch_get_value(params) if param_values.shape != kernal_bias.shape: raise ValueError('Layer bias shape ' + str(param_values.shape) + ' not compatible with ' 'provided bias shape ' + str(kernal_bias.shape)) weight_value_tuples.append((params, kernal_bias)) K.batch_set_value(weight_value_tuples)
def update_grad(self, sample_generator): sample_generator = iter(sample_generator) x, y = next(sample_generator) N = len(y) gradient_sum = self._get_grad(_tolist(x) + [y, 1]) for g_sum in gradient_sum: g_sum *= N for x, y in sample_generator: grads = self._get_grad(_tolist(x) + [y, 1]) n = len(y) for g_sum, g in zip(gradient_sum, grads): g_sum += g * n N += len(y) for g_sum in gradient_sum: g_sum /= N K.batch_set_value(zip(self._batch_grad, gradient_sum)) self._snapshot.set_weights(self.model.get_weights())
def save_weights_as_checkpoint(self, filename, mapping=None): """根据mapping将权重保存为checkpoint格式 """ mapping = mapping or self.variable_mapping() mapping = {self.prefixed(k): v for k, v in mapping.items()} mapping = {k: v for k, v in mapping.items() if k in self.layers} with tf.Graph().as_default(): all_variables, all_values = [], [] for layer, variables in mapping.items(): layer = self.layers[layer] values = K.batch_get_value(layer.trainable_weights) for name, value in zip(variables, values): variable, value = self.create_variable(name, value) all_variables.append(variable) all_values.append(value) with tf.Session() as sess: K.batch_set_value(zip(all_variables, all_values)) saver = tf.train.Saver() saver.save(sess, filename)
def load_weights_from_checkpoint(self, checkpoint, mapping=None): """根据mapping从checkpoint加载权重 """ mapping = mapping or self.variable_mapping() mapping = {self.prefixed(k): v for k, v in mapping.items()} mapping = {k: v for k, v in mapping.items() if k in self.layers} weight_value_pairs = [] for layer, variables in mapping.items(): layer = self.layers[layer] weights = layer.trainable_weights values = [self.load_variable(checkpoint, v) for v in variables] if isinstance(layer, MultiHeadAttention): """如果key_size不等于head_size,则可以通过 正交矩阵将相应的权重投影到合适的shape。 """ count = 2 if layer.use_bias: count += 2 heads = self.num_attention_heads head_size = self.attention_head_size key_size = self.attention_key_size W = np.linalg.qr(np.random.randn(key_size, head_size))[0].T if layer.attention_scale: W = W * key_size**0.25 / head_size**0.25 for i in range(count): w, v = weights[i], values[i] w_shape, v_shape = K.int_shape(w), v.shape if w_shape[-1] != v_shape[-1]: pre_shape = w_shape[:-1] v = v.reshape(pre_shape + (heads, head_size)) v = np.dot(v, W) v = v.reshape(pre_shape + (heads * key_size, )) values[i] = v weight_value_pairs.extend(zip(weights, values)) K.batch_set_value(weight_value_pairs)
def load_layer_weights(weight_values, symbolic_weights): """loads weight_values which is a list ot tuples from get_named_layer_weights_from_h5py() into symbolic_weights obtained from get_symbolic_filtered_layer_weights_from_model() """ if len(weight_values) != len( symbolic_weights): # they must have the same length of layers raise ValueError('number of weights aren\'t equal', len(weight_values), len(symbolic_weights)) else: # similar to keras source code :D .. load_weights_from_hdf5_group print("length of layers to load", len(weight_values)) weight_value_tuples = [] # load layer by layer weights for i in range(len( weight_values)): # list(layers) i.e. list of lists(weights) assert len(symbolic_weights[i]) == len(weight_values[i][1]) # symbolic_weights[i] : list of symbolic names for layer i # symbolic_weights[i] : list of weight ndarrays for layer i weight_value_tuples += zip( symbolic_weights[i], weight_values[i] [1]) # both are lists with equal lengths (name,value) mapping K.batch_set_value( weight_value_tuples) # loaded a batch to be efficient
def modify_set_weights(new_model, weights): if len(new_model.weights) != len(weights): raise ValueError('You called `set_weights(weights)` on layer "' + new_model.name + '" with a weight list of length ' + str(len(weights)) + ', but the layer was expecting ' + str(len(new_model.weights)) + ' weights. Provided weights: ' + str(weights)[:50] + '...') if not new_model.weights: return weight_value_tuples = [] param_values = backend.batch_get_value(new_model.weights) r = 0 l = len(new_model.weights) for pv, p, w in zip(param_values, new_model.weights, weights): if r == l - 2: # 手动调整最后两层的weight的赋值 new_18 = w old_18 = p # layer_18 = pv # weight_value_tuples.append((w, p)) elif r == l - 1: new_19 = w old_19 = p # layer_19 = pv weight_value_tuples.append((old_18, new_19)) weight_value_tuples.append((old_19, new_18)) break elif pv.shape != w.shape: raise ValueError('Layer weight shape ' + str(pv.shape) + ' not compatible with ' 'provided weight shape ' + str(w.shape)) else: weight_value_tuples.append((p, w)) r += 1 backend.batch_set_value(weight_value_tuples) return new_model
def reset_states(self) -> None: # The state of the metric will be reset at the start of each epoch. K.batch_set_value([(v, tf.zeros_like(v)) for v in self.variables])
def reset_states(self): reset_value = np.zeros(self.num_classes, dtype=np.int32) K.batch_set_value([(v, reset_value) for v in self.variables])
def reset_states(self): reset_value = tf.zeros(self.init_shape, dtype=self.dtype) K.batch_set_value([(v, reset_value) for v in self.variables])
def apply_ema_weights(self): """备份原模型权重,然后将平均权重应用到模型上去。 """ self.old_weights = K.batch_get_value(self.model.weights) ema_weights = K.batch_get_value(self.ema_weights) K.batch_set_value(zip(self.model.weights, ema_weights))
def reset_states(self): tf.print(self.cmats, output_stream='file://cmat.out') K.batch_set_value([(v, np.zeros((self.num_classes, self.num_classes), dtype=np.float32)) for v in self.variables])
def reset_states(self) -> None: K.batch_set_value([(v, 0) for v in self.variables])
def reset_states(self): K.batch_set_value([(v, np.zeros(v.shape)) for v in self.variables])
def load_weights(model, filepath, lookup={}, ignore=[], transform=None, verbose=True): """Modified version of keras load_weights that loads as much as it can. Useful for transfer learning. read the weights of layers stored in file and copy them to a model layer. the name of each layer is used to match the file's layers with the model's. It is possible to have layers in the model that dont appear in the file. The loading stops if a problem is encountered and the weights of the file layer that first caused the problem are returned. Args: model (keras.models.Model): The target. filepath (str): Source hdf5 file. lookup (dict): (optional) By default, the weights of each layer in the file are copied to the layer with the same name in the model. Using lookup you can replace the file name with a different model layer name, or to a list of model layer names, in which case the same weights will be copied to all layer models. ignore (list): (optional) The list of model layer names to ignore in transform (function): (optional) Function that receives the list of weights read from a layer in the file and filters them to the weights that will be loaded in the target model. verbose (bool): Flag. Highly recommended to keep this true and to follow the print messages. Returns: weights of the file layer which first caused the load to abort or None on successful load. """ if verbose: print('Loading', filepath, 'to', model.name) with h5py.File(filepath, mode='r') as f: # new file format layer_names = [n.decode('utf8') for n in f.attrs['layer_names']] # we batch weight value assignments in a single backend call # which provides a speedup in TensorFlow. weight_value_tuples = [] for name in layer_names: if verbose: print(name) g = f[name] weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] if len(weight_names): weight_values = [ g[weight_name] for weight_name in weight_names ] if verbose: print('loading', ' '.join(_str_shape(w) for w in weight_values)) target_names = lookup.get(name, name) if isinstance(target_names, str): target_names = [target_names] # handle the case were lookup asks to send the same weight to multiple layers target_names = [ target_name for target_name in target_names if target_name == name or target_name not in layer_names ] for target_name in target_names: if verbose: print(target_name) try: layer = model.get_layer(name=target_name) except: layer = None if layer: # the same weight_values are copied to each of the target layers symbolic_weights = layer.trainable_weights + layer.non_trainable_weights if transform is not None: transformed_weight_values = transform( weight_values, layer) if transformed_weight_values is not None: if verbose: print('(%d->%d)' % (len(weight_values), len(transformed_weight_values))) weight_values = transformed_weight_values problem = len(symbolic_weights) != len(weight_values) if problem and verbose: print('(bad #wgts)'), if not problem: weight_value_tuples += zip(symbolic_weights, weight_values) else: problem = True if problem: if verbose: if name in ignore or ignore == '*': print('(skipping)') else: print('ABORT') if not (name in ignore or ignore == '*'): K.batch_set_value(weight_value_tuples) return [np.array(w) for w in weight_values] if verbose: print() else: if verbose: print('skipping this is empty file layer') K.batch_set_value(weight_value_tuples)
def reset_states(self): num_thresholds = len(to_list(self.thresholds)) K.batch_set_value( [(v, np.zeros((num_thresholds,))) for v in self.variables])
def reset_old_weights(self): """恢复模型到旧权重。 """ K.batch_set_value(zip(self.model.weights, self.old_weights))
def reset_state(self) -> None: # The state of the metric will be reset at the start of each epoch. K.batch_set_value([(v, np.zeros(v.shape)) for v in self.variables])
def reset_states(self): """Resets all of the metric state variables at the start of each epoch.""" K.batch_set_value([(v, 0) for v in self.variables])
def reset_states(self): """Resets all of the metric state variables.""" reset_value = tf.zeros(self.num_classes, dtype=self.dtype) K.batch_set_value([(v, reset_value) for v in self.variables])
def reset_states(self): # When disabling eager execution, must cast to np.zeros rather than tf.zeros reset_value = np.zeros(self.init_shape, dtype=self.dtype) K.batch_set_value([(v, reset_value) for v in self.variables])