def track_metrics_on_train_start(self, model: tf.keras.Model, name, optimizer_name, loss_function, learning_rate, train_bg: bool, train_hp: bool, train_kps: bool, training_time, epochs, epoch_steps, batch_size, description): text_summary = f'|name |{name} |\n|----|----|\n' trainable_params = count_params(model.trainable_weights) non_trainable_params = count_params(model.non_trainable_weights) layers = len(model.layers) for item in (['optimizer_name', optimizer_name], ['epochs:epoch_steps:batch_size', f"{epochs}:{epoch_steps}:{batch_size}"], ['loss_function', loss_function], ['train_bg', train_bg], ['train_body_parts', train_hp], ['train_keypoints', train_kps], ['learning_rate', str(learning_rate)], ['all_params', f"{trainable_params + non_trainable_params:,}"], ['trainable_params', f"{trainable_params:,}"], ['non_trainable_params', f"{non_trainable_params:,}"], ['layers', layers], ['training_time_1_epoch', training_time], ['description', description], ): text_summary += f"|**{item[0].ljust(30)[:30]}**|{str(item[1]).ljust(120)[:120]}|\n" print(text_summary) with self._file_writer.as_default(): tf.summary.text(name, tf.constant(text_summary), description=text_summary, step=0)
def add_model(model): # Taken from model layer_utils.print_summary() if hasattr(model, '_collected_trainable_weights'): trainable_count = layer_utils.count_params(model._collected_trainable_weights) else: trainable_count = layer_utils.count_params(model.trainable_weights) x.append({'model': model.name, 'layers': len(model.layers), 'parameters': trainable_count})
def test_model_expansion(func, expected_names): model, x_val = func(batch_size=32) expanded_model = ModelExpansion().process_model(CustomLayer, deepcopy(model)) num_model_params = count_params(model.trainable_weights) num_expanded_model_params = count_params(expanded_model.trainable_weights) assert num_model_params == num_expanded_model_params names = [layer.name for layer in expanded_model.layers] assert names == expected_names
def count_params_keras(mod: tf.keras.models.Model): mod._check_trainable_weights_consistency() if hasattr(mod, '_collected_trainable_weights'): trainable_count = count_params(mod._collected_trainable_weights) elif hasattr(mod, '_unique_trainable_weights'): trainable_count = count_params( mod._unique_trainable_weights) # TF r2.0 else: trainable_count = count_params(mod.trainable_weights) # TF r1.14 non_trainable_count = count_params(mod.non_trainable_weights) return trainable_count, non_trainable_count
def unfreeze(self, from_layer, lr, epochs): print( 'GradualUnfreezing: Training from layer {} during {} epochs with lr {}' .format(from_layer, epochs, lr)) for l in self.model.layers[:from_layer]: l.trainable = False for l in self.model.layers[from_layer:]: l.trainable = True self.model.compile(loss=self.model.loss, optimizer=self.model.optimizer, metrics=self.model.compiled_metrics._metrics) self.model.train_function = self.model.make_train_function() if type(self.model.optimizer.lr).__name__ == 'EagerTensor': tf.keras.backend.set_value(self.model.optimizer.lr, lr) print('Trainable weights: {}, Non trainable weights: {}'.format( count_params(self.model.trainable_weights), count_params(self.model.non_trainable_weights))) self.current_lr = lr
def raw_statistics(self): raw_sparsity_statistics = {} sparsity_levels = [] mask_names = [] weights_shapes = [] weights_numbers = [] total_weights_number = tf.constant(0) total_sparsified_weights_number = tf.constant(0) total_bkup_weights_number = tf.constant(0) wrapped_layers = collect_wrapped_layers(self._model) for wrapped_layer in wrapped_layers: for ops in wrapped_layer.weights_attr_ops.values(): for op_name, op in ops.items(): if op_name in self._op_names: if isinstance(op, BinaryMaskWithWeightsBackup): total_bkup_weights_number += tf.size(op.bkup_var) if isinstance(op, BinaryMask): mask = wrapped_layer.ops_weights[op_name]['mask'] mask_names.append(mask.name) weights_shapes.append(list(mask.shape)) weights_number = tf.size(mask) weights_numbers.append(weights_number) sparsified_weights_number = weights_number - tf.reduce_sum(tf.cast(mask, tf.int32)) sparsity_levels.append(sparsified_weights_number / weights_number) total_weights_number += weights_number total_sparsified_weights_number += sparsified_weights_number sparsity_rate_for_sparsified_modules = (total_sparsified_weights_number / total_weights_number).numpy() model_weights_number = count_params(self._model.weights) - total_weights_number - total_bkup_weights_number sparsity_rate_for_model = (total_sparsified_weights_number / model_weights_number).numpy() raw_sparsity_statistics.update({ 'sparsity_rate_for_sparsified_modules': sparsity_rate_for_sparsified_modules, 'sparsity_rate_for_model': sparsity_rate_for_model, 'sparsity_threshold': self._threshold }) sparsity_levels = tf.keras.backend.batch_get_value(sparsity_levels) weights_percentages = [weights_number / total_weights_number * 100 for weights_number in weights_numbers] weights_percentages = tf.keras.backend.batch_get_value(weights_percentages) mask_sparsity = list(zip(mask_names, weights_shapes, sparsity_levels, weights_percentages)) raw_sparsity_statistics['sparsity_statistic_by_layer'] = [] for mask_name, weights_shape, sparsity_level, weights_percentage in mask_sparsity: raw_sparsity_statistics['sparsity_statistic_by_layer'].append({ 'Name': mask_name, 'Weight\'s Shape': weights_shape, 'SR': sparsity_level, '% weights': weights_percentage }) return raw_sparsity_statistics
def log_results(results: List[Dict]) -> None: """ Logs the KT results formatted. :param results: the results list. """ # Get project logger. kt_logging = logging.getLogger('KT') # Show final results. final_results = 'Final results: \n' teacher_params = count_params(results[-1]['network'].trainable_weights) student_params = count_params(results[0]['network'].trainable_weights) final_results += 'Parameters:\n Teacher params: {}\n Student params: {}\n Ratio: T/S={:.4} S/T={:.4}\n' \ .format(teacher_params, student_params, teacher_params / student_params, student_params / teacher_params) for result in results: final_results += result['method'] + ': \n' final_results += _get_model_results(result['evaluation'], result['network'].metrics_names) kt_logging.info(final_results)
def print_all_layers(root_layer, print_fn=None): if print_fn is None: print_fn = print line_length = 150 # 98 positions = [0.5, 1.0] if positions[-1] <= 1: positions = [int(line_length * p) for p in positions] # header names for the different log elements to_display = ["Param # (output shape)", "Layer (type)"] print_fn("_" * line_length) print_row(to_display, positions, print_fn) print_fn("=" * line_length) layer_tree = LayerTreeNode(root_layer, list()) unbuilt_layers = get_sub_layers(root_layer, layer_tree, "", layers_with_params_only=True) print_layers(layer_tree, "", [], positions, print_fn) print_fn("=" * line_length) trainable_count = count_params(root_layer.trainable_weights) non_trainable_count = count_params(root_layer.non_trainable_weights) print_fn("Total params: {:,}".format(trainable_count + non_trainable_count)) print_fn("Trainable params: {:,}".format(trainable_count)) print_fn("Non-trainable params: {:,}".format(non_trainable_count)) if len(unbuilt_layers) > 0: print_fn( "There are layers which are not yet built and accordingly the number of weights is unknown:" ) for unbuilt_layer in unbuilt_layers: print_fn(" " + unbuilt_layer) print_fn("_" * line_length)
def evaluate_save_model(model, test_data, training_results, execution_time, learning_rate, epochs, save=True): # Get the model train history model_train_history = training_results.history # Get the number of epochs the training was run for num_epochs = len(model_train_history["loss"]) # Plot training results fig = plt.figure(figsize=(20, 5)) axs = fig.add_subplot(1, 2, 1) axs.set_title('Loss') # Plot all metrics for metric in ["loss", "val_loss"]: axs.plot(np.arange(0, num_epochs, 1), model_train_history[metric], label=metric) axs.xaxis.set_major_locator(mticker.MaxNLocator(num_epochs)) ticks_loc = axs.get_xticks().tolist() axs.xaxis.set_major_locator(mticker.FixedLocator(ticks_loc)) label_format = '{:,.0f}' axs.set_xticklabels([label_format.format(x) for x in ticks_loc]) axs.legend() axs = fig.add_subplot(1, 2, 2) axs.set_title('Accuracy') # Plot all metrics for metric in ["accuracy", "val_accuracy"]: axs.plot(np.arange(0, num_epochs, 1), model_train_history[metric], label=metric) axs.xaxis.set_major_locator(mticker.MaxNLocator(num_epochs)) ticks_loc = axs.get_xticks().tolist() axs.xaxis.set_major_locator(mticker.FixedLocator(ticks_loc)) label_format = '{:,.0f}' axs.set_xticklabels([label_format.format(x) for x in ticks_loc]) axs.legend() plt.show() # Evaluate on test data evaluation_results = model.evaluate(test_data) ytrue = np.concatenate([y for x, y in test_data], axis=0) if isinstance(model, TFBertForSequenceClassification): preds = model.predict(test_data) preds = np.argmax(preds['logits'], axis=1) evaluation_results.append(f1_score(ytrue, preds)) else: preds = model.predict(test_data).flatten() evaluation_results.append(f1_score(ytrue, preds > 0.5)) print(evaluation_results) if save: # Save model save_model(model, model_name=model.name) model_size = get_model_size(model_name=model.name) # Save model history with open(os.path.join("models", model.name + "_train_history.json"), "w") as json_file: json_file.write(json.dumps(model_train_history, cls=JsonEncoder)) trainable_parameters = count_params(model.trainable_weights) non_trainable_parameters = count_params(model.non_trainable_weights) # Save model metrics metrics = { "trainable_parameters": trainable_parameters, "execution_time": execution_time, "loss": evaluation_results[0], "accuracy": evaluation_results[1], "f1_score": evaluation_results[2], "model_size": model_size, "learning_rate": learning_rate, "epochs": epochs, "name": model.name, "id": int(time.time()) } with open(os.path.join("models", model.name + "_metrics.json"), "w") as json_file: json_file.write(json.dumps(metrics, cls=JsonEncoder))
def print_layer_summary(layer, layer_name, full_layer_name, positions, print_fn, is_last, is_leaf, indent=2): cls_name = layer.__class__.__name__ if layer.built: num_params = layer.count_params() trainable_weights = layer.trainable_weights non_trainable_weights = layer.non_trainable_weights num_trainable_weights = count_params(trainable_weights) num_non_trainable_weights = count_params(non_trainable_weights) if num_params > 0: if num_trainable_weights == num_params: numstring = "" elif num_trainable_weights == 0: numstring = "not " else: numstring = str(num_trainable_weights) + " " trainablestring = " [" + numstring + "trainable]" else: trainablestring = "" else: num_params = "?" trainablestring = " [? trainable]" if is_last[-1]: s = "\u2514" else: s = "\u251C" prefix = "" for b in is_last[:-1]: if not b: prefix += "\u2502" + " " * indent else: prefix += " " * (indent + 1) fields = [ prefix + s + "\u2500" * (indent - 1) + " " + str(num_params) + trainablestring, prefix + s + "\u2500" * (indent - 1) + " " + layer_name + " (" + cls_name + ")", ] print_row(fields, positions, print_fn) if is_leaf: if layer.built: weight_names = [ weight.name.split("/")[-1] for weight in layer.weights ] num_trainable_as_list = [ count_params([weight]) if weight.trainable else 0 for weight in layer.weights ] weights = layer.get_weights() num_weights = len(weights) for weight_idx in range(num_weights): weight = weights[weight_idx] weight_name = weight_names[weight_idx] if not is_last[-1]: sub_prefix = "\u2502" + " " * indent else: sub_prefix = " " * (indent + 1) if weight_idx == (num_weights - 1): ssub = "\u2514" else: ssub = "\u251C" weight_size = np.size(weight) if weight_size > 0: num_trainable_weights = num_trainable_as_list[weight_idx] if num_trainable_weights == weight_size: numstring = "" elif num_trainable_weights == 0: numstring = "not " else: numstring = str(num_trainable_weights) + " " trainablestring = " [" + numstring + "trainable] " + str( np.shape(weight)) else: trainablestring = "" fields = [ prefix + sub_prefix + ssub + "\u2500" * (indent - 1) + " " + str(weight_size) + trainablestring, prefix + sub_prefix + ssub + "\u2500" * (indent - 1) + " " + weight_name, ] print_row(fields, positions, print_fn)
def summary(self): with logging_block(self.scope): trainable_params = count_params(self.trainable_variables) non_trainable_params = count_params(self.non_trainable_variables) print(f"Trainable params: {trainable_params:>12,}") print(f"Non-trainable params: {non_trainable_params:>12,}")
def build( cls, model, model_paths: Model, volume_paths: Volume, train_generator: VolumeImgSegmSequence, val_generator: VolumeImgSegmSequence, nb_filters_0, input_shape, n_epochs ): train_x, train_y = train_generator[0] val_x, val_y = val_generator[0] batch_size = train_generator.batch_size n_batches_per_epoch = len(train_generator) n_examples_per_epoch = batch_size * n_batches_per_epoch # just syntatic sugar ds = Metadata.Dataset vol = Metadata.Dataset.Volume archi = Metadata.Architecture pth = Metadata.Paths optimizer_class = model.optimizer.__class__ optimizer_name = f"{optimizer_class.__module__}.{optimizer_class.__name__}" learning_rate = float(model.optimizer.lr) # todo use keywords everywhere return cls( model_name=model.name, paths=pth( str(model_paths.model_path), str(model_paths.autosaved_model_path), str(model_paths.logger_path), str(model_paths.summary_path), str(model_paths.history_path), metadata_yml=str(model_paths.metadata_yml_path) ), train=ds( # todo make build classmethod for the sub classes as well vol( str(volume_paths.train_data_path), str(train_generator.source_volume.shape), train_generator.source_volume.dtype.name, train_generator.source_volume.nbytes, humanize.naturalsize(train_generator.source_volume.nbytes) ), vol( str(volume_paths.train_labels_path), str(train_generator.label_volume.shape), train_generator.label_volume.dtype.name, train_generator.label_volume.nbytes, humanize.naturalsize(train_generator.label_volume.nbytes) ), str(train_generator.axes), train_generator.crop_size, str(train_x.shape), train_x.dtype.name, str(train_y.shape), train_y.dtype.name ), val=ds( vol( str(volume_paths.val_data_path), str(val_generator.source_volume.shape), val_generator.source_volume.dtype.name, val_generator.source_volume.nbytes, humanize.naturalsize(val_generator.source_volume.nbytes) ), vol( str(volume_paths.val_labels_path), str(val_generator.label_volume.shape), val_generator.label_volume.dtype.name, val_generator.label_volume.nbytes, humanize.naturalsize(val_generator.label_volume.nbytes) ), str(val_generator.axes), val_generator.crop_size, str(val_x.shape), val_x.dtype.name, str(val_y.shape), val_y.dtype.name ), architecture=archi( model.count_params(), humanize.intword(model.count_params()), count_params(model.trainable_weights), humanize.intword(count_params(model.trainable_weights)), count_params(model.non_trainable_weights), humanize.intword(count_params(model.non_trainable_weights)), model.factory_function, nb_filters_0, str(input_shape) ), batch_size=batch_size, n_batches_per_epoch=n_batches_per_epoch, n_examples_per_epoch=n_examples_per_epoch, n_examples_per_epoch_human=humanize.intcomma(n_examples_per_epoch), n_epochs=n_epochs, optimizer=optimizer_name, learning_rate=f"{learning_rate:.2e}", loss_func=f"{model.loss.__module__}.{model.loss.__name__}" )
def build(self): pruned_model_list = list() val_acc_list = list() for i in range(self.num_candidates): # Pruning strategy channel_config = get_strartegy_generation(self.net, self.min_rate, self.max_rate) #Get pruned model using l1 pruning pruned_model = l1_pruning(self.net, channel_config) pruned_model_list.append(pruned_model) # Adpative BN-statistics slice_idx = int(np.shape(self.x_train)[0]/30) sliced_x_train = self.x_train[:slice_idx, :, :, :] sliced_y_train = self.y_train[:slice_idx, :] sliced_train_dataset = tf.data.Dataset.from_tensor_slices((sliced_x_train, sliced_y_train)).batch(64) max_iters=10 for j in range(max_iters): for x_batch, y_batch in sliced_train_dataset: output = pruned_model(x_batch, training=True) #Evaluate top-1 accuracy for prunned model small_val_datsaet = tf.data.Dataset.from_tensor_slices((sliced_x_train, sliced_y_train)).batch(64) small_val_acc = k.metrics.CategoricalAccuracy() for x_batch, y_batch in small_val_datsaet: output = pruned_model(x_batch) small_val_acc.update_state(y_batch, output) small_val_acc = small_val_acc.result().numpy() print(f'Adaptive-BN-based accuracy for {i}-th prunned model: {small_val_acc}') val_acc_list.append(small_val_acc) #Select the best candidate model val_acc_np = np.array(val_acc_list) best_candidate_idx = np.argmax(val_acc_np) best_model = pruned_model_list[best_candidate_idx] print(f'\n The best candidate is {best_candidate_idx}-th prunned model (Acc: {val_acc_np[best_candidate_idx]})') #Fine tuning metrics = 'accuracy' optimizer = Adam(learning_rate=self.lr) loss = tf.keras.losses.CategoricalCrossentropy() def get_callback_list(save_path, early_stop=True, lr_reducer=True): callback_list=list() if lr_reducer == True: callback_list.append(tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, cooldown=0, patience=10, mode='auto', epsilon=0.0001, min_lr=0)) if early_stop == True: callback_list.append(tf.keras.callbacks.EarlyStopping(min_delta=0, patience=20, verbose=2, mode='auto')) return callback_list best_model.compile(loss=loss, optimizer=optimizer, metrics=[metrics]) self.net.compile(loss= loss, optimizer=optimizer, metrics=[metrics]) callback_list = get_callback_list(self.result_dir) if self.data_augmentation == True: datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images datagen.fit(self.x_train) best_model.fit(datagen.flow(self.x_train, self.y_train, batch_size=self.bs), \ steps_per_epoch=len(self.x_train) / self.bs,epochs=self.epochs, \ validation_data=(self.x_test,self.y_test), callbacks=callback_list) else: best_model.fit(self.x_train, self.y_train, batch_size=self.bs, epochs=self.epochs,\ validation_data=(self.x_test,self.y_test), callbacks=callback_list) #Get flops and parameters of the base model and pruned model params_prev = count_params(self.net.trainable_weights) flops_prev = count_flops(self.net) scores_prev = self.net.evaluate(self.x_test, self.y_test, batch_size=self.bs, verbose=0) print(f'\nTest loss (on base model): {scores_prev[0]}') print(f'Test accuracy (on base model): {scores_prev[1]}') print(f'The number of parameters (on base model): {params_prev}') print(f'The number of flops (on base model): {flops_prev}') params_after = count_params(best_model.trainable_weights) flops_after = count_flops(best_model) scores_after = best_model.evaluate(self.x_test, self.y_test, batch_size=self.bs, verbose=0) print(f'\nTest loss (on pruned model): {scores_after[0]}') print(f'Test accuracy (on pruned model): {scores_after[1]}') print(f'The number of parameters (on pruned model): {params_after}') print(f'The number of flops (on prund model): {flops_after}') #save the best candidate model slash_idx = self.model_path.rfind('/') ext_idx = self.model_path.rfind('.') save_name = self.model_path[slash_idx:ext_idx] tf.keras.models.save_model( model=best_model, filepath=self.result_dir+save_name+'_pruned.h5', include_optimizer=False )
def raw_statistics(self): raw_sparsity_statistics = {} sparsity_levels = [] mask_names = [] weights_shapes = [] weights_numbers = [] sparse_prob_sum = tf.constant(0.) total_weights_number = tf.constant(0) total_sparsified_weights_number = tf.constant(0) wrapped_layers = collect_wrapped_layers(self._model) for wrapped_layer in wrapped_layers: for ops in wrapped_layer.weights_attr_ops.values(): for op_name in ops: if op_name in self._op_names: mask = wrapped_layer.ops_weights[op_name]['mask'] sw_loss = tf.reduce_sum(binary_mask(mask)) weights_number = tf.size(mask) sparsified_weights_number = weights_number - tf.cast( sw_loss, tf.int32) mask_names.append(wrapped_layer.name + '_rb_mask') weights_shapes.append(list(mask.shape)) weights_numbers.append(weights_number) sparsity_levels.append(sparsified_weights_number / weights_number) sparse_prob_sum += tf.math.reduce_sum( tf.math.sigmoid(mask)) total_weights_number += weights_number total_sparsified_weights_number += sparsified_weights_number sparsity_rate_for_sparsified_modules = ( total_sparsified_weights_number / total_weights_number).numpy() model_weights_number = count_params( self._model.weights) - total_weights_number sparsity_rate_for_model = (total_sparsified_weights_number / model_weights_number).numpy() mean_sparse_prob = (sparse_prob_sum / tf.cast(total_weights_number, tf.float32)).numpy() raw_sparsity_statistics.update({ 'sparsity_rate_for_sparsified_modules': sparsity_rate_for_sparsified_modules, 'sparsity_rate_for_model': sparsity_rate_for_model, 'mean_sparse_prob': mean_sparse_prob, 'target_sparsity_rate': self.loss.target_sparsity_rate, }) sparsity_levels = tf.keras.backend.batch_get_value(sparsity_levels) weights_percentages = [ weights_number / total_weights_number * 100 for weights_number in weights_numbers ] weights_percentages = tf.keras.backend.batch_get_value( weights_percentages) mask_sparsity = list( zip(mask_names, weights_shapes, sparsity_levels, weights_percentages)) raw_sparsity_statistics['sparsity_statistic_by_layer'] = [] for mask_name, weights_shape, sparsity_level, weights_percentage in mask_sparsity: raw_sparsity_statistics['sparsity_statistic_by_layer'].append({ 'Name': mask_name, 'Weight\'s Shape': weights_shape, 'SR': sparsity_level, '% weights': weights_percentage }) return raw_sparsity_statistics
def transfer(self, target_port: Port, evaluator: Evaluator, config_uids: List[int] = None) -> None: """ Transfer models to target port :param target_port: port for which to train transfer-model :param evaluator: evaluator instance to store results :param config_uids: specify config_uids to transfer. If none, transfer all :return: None """ if target_port.name not in self.transfer_defs: print( f"No transfer definition found for target port '{target_port.name}'" ) return # transfer definitions for specified target port tds = self.transfer_defs[target_port.name] output_dir = os.path.join(script_dir, os.pardir, "output") training_type = "transfer" print(f"TRANSFERRING MODELS TO TARGET PORT '{target_port.name}'") if config_uids is not None: print(f"Transferring configs -> {config_uids} <-") window_width = 50 num_epochs = 25 train_lr = 0.01 fine_num_epochs = 20 fine_tune_lr = 1e-5 batch_size = 1024 # skip port if fully transferred num_not_transferred = 0 for td in tds: for config in self.transfer_configs: if not self._is_transferred(target_port.name, td.base_port_name, config.uid): # print(f"Not transferred: {td.base_port_name} -> {target_port.name} ({config.uid})") num_not_transferred += 1 num_transfers = len(tds) * len(self.transfer_configs) print( f"Transferred count {num_transfers - num_not_transferred}/{num_transfers}" ) if num_not_transferred == 0: print( f"All transfers done for target port '{target_port.name}': Skipping" ) return X_ts, y_ts = load_data(target_port, window_width) baseline = mean_absolute_error(y_ts, np.full_like(y_ts, np.mean(y_ts))) evaluator.set_naive_baseline(target_port, baseline) print(f"Naive baseline: {baseline}") # X_train_orig, X_test_orig, y_train_orig, y_test_orig = train_test_split(X_ts, y_ts, test_size=0.2, # random_state=42, shuffle=False) # train_optimizer = Adam(learning_rate=train_lr) # fine_tune_optimizer = Adam(learning_rate=fine_tune_lr) for td in tds: print( f".:'`!`':. TRANSFERRING PORT {td.base_port_name} TO {td.target_port_name} .:'`!`':." ) print( f"- - Epochs {num_epochs} </> </> Learning rate {train_lr} - -" ) print( f"- - Window width {window_width} </> Batch size {batch_size} - -" ) # print(f"- - Number of model's parameters {num_total_trainable_parameters(model)} device {device} - -") base_port = self.pm.find_port(td.base_port_name) if base_port is None: raise ValueError( f"Unable to associate port with port name '{td.base_port_name}'" ) # model = inception_time(input_shape=(window_width, 37)) # print(model.summary()) # apply transfer config for config in self.transfer_configs: if config_uids is not None and config.uid not in config_uids: continue if self._is_transferred(target_port.name, td.base_port_name, config.uid): print(f"Skipping config {config.uid}") continue print(f"\n.:'':. APPLYING CONFIG {config.uid} ::'':.") print(f"-> -> {config.desc} <- <-") print(f"-> -> nth_subset: {config.nth_subset} <- <-") print(f"-> -> trainable layers: {config.train_layers} <- <-") _, _, start_time, _, _ = decode_keras_model( os.path.split(td.base_model_path)[1]) model_file_name = encode_keras_model(td.target_port_name, start_time, td.base_port_name, config.uid) file_path = os.path.join(output_dir, "model", td.target_port_name, model_file_name) X_train_orig, X_test_orig, y_train_orig, y_test_orig = train_test_split( X_ts, y_ts, test_size=0.2, random_state=42, shuffle=False) train_optimizer = Adam(learning_rate=train_lr) fine_tune_optimizer = Adam(learning_rate=fine_tune_lr) checkpoint = ModelCheckpoint(file_path, monitor='val_mae', mode='min', verbose=2, save_best_only=True) early = EarlyStopping(monitor="val_mae", mode="min", patience=10, verbose=2) redonplat = ReduceLROnPlateau(monitor="val_mae", mode="min", patience=3, verbose=2) callbacks_list = [checkpoint, early, redonplat] # optimizer = Adam(learning_rate=lr) # # # configure model # model.compile(optimizer=optimizer, loss="mse", metrics=["mae"]) # load base model model = load_model(td.base_model_path) # if config.uid == 0: # print(model.summary()) # else: # print(model.summary()) # del model X_train = X_train_orig X_test = X_test_orig y_train = y_train_orig y_test = y_test_orig # apply transfer configuration if config.nth_subset > 1: if X_train.shape[0] < config.nth_subset: print(f"Unable to apply nth-subset. Not enough data") X_train = X_train_orig[0::config.nth_subset] X_test = X_test_orig[0::config.nth_subset] y_train = y_train_orig[0::config.nth_subset] y_test = y_test_orig[0::config.nth_subset] print( f"Orig shape: {X_train_orig.shape} {config.nth_subset} th-subset shape: {X_train.shape}" ) print( f"Orig shape: {X_test_orig.shape} {config.nth_subset} th-subset shape: {X_test.shape}" ) print( f"Orig shape: {y_train_orig.shape} {config.nth_subset} th-subset shape: {y_train.shape}" ) print( f"Orig shape: {y_test_orig.shape} {config.nth_subset} th-subset shape: {y_test.shape}" ) modified = False # freeze certain layers for layer in model.layers: if layer.name not in config.train_layers: modified = True print(f"setting layer {layer.name} to False") layer.trainable = False else: print(f"layer {layer.name} stays True") if modified: print(f"modified. compiling") # re-compile model.compile(optimizer=train_optimizer, loss="mse", metrics=["mae"]) # trainable_count = int(np.sum([K.count_params(p) for p in set(model.trainable_weights)])) # non_trainable_count = int(np.sum([K.count_params(p) for p in set(model.non_trainable_weights)])) trainable_count = count_params(model.trainable_weights) non_trainable_count = count_params(model.non_trainable_weights) print(f"Total params: {trainable_count + non_trainable_count}") print(f"Trainable params: {trainable_count}") print(f"Non trainable params: {non_trainable_count}") # transfer model result = model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size, verbose=2, validation_data=(X_test, y_test), callbacks=callbacks_list) train_mae = result.history["mae"] val_mae = result.history["val_mae"] gc.collect() tune_result = None tune_train_mae = None tune_val_mae = None if config.tune: print(f"Fine-Tuning transferred model") # apply fine-tuning: unfreeze all but batch-normalization layers! for layer in model.layers: if not layer.name.startswith("batch_normalization"): layer.trainable = True model.compile(optimizer=fine_tune_optimizer, loss="mse", metrics=["mae"]) # print(f"model for fine tuning") # print(model.summary()) tune_result = model.fit(X_train, y_train, epochs=fine_num_epochs, batch_size=batch_size, verbose=2, validation_data=(X_test, y_test), callbacks=callbacks_list) tune_train_mae = tune_result.history["mae"] tune_val_mae = tune_result.history["val_mae"] model.load_weights(file_path) # set evaluation def _compute_mae(_val_mae: List[float], _tune_val_mae: List[float]) -> float: if _tune_val_mae is not None: _val_mae = _val_mae + _tune_val_mae return min(val_mae) evaluator.set_mae(target_port, start_time, _compute_mae(val_mae, tune_val_mae), base_port, config.uid) y_pred = model.predict(X_test) grouped_mae = evaluator.group_mae(y_test, y_pred) evaluator.set_mae(target_port, start_time, grouped_mae, base_port, config.uid) # save history history_file_name = encode_history_file( training_type, target_port.name, start_time, td.base_port_name, config.uid) history_path = os.path.join(output_dir, "data", target_port.name, history_file_name) np.save(history_path, [ result.history, tune_result.history if tune_result else None ]) # plot history plot_dir = os.path.join(output_dir, "plot") plot_history(train_mae, val_mae, plot_dir, target_port.name, start_time, training_type, td.base_port_name, config.uid, tune_train_mae, tune_val_mae) # evaluator.plot_grouped_mae(target_port, training_type, start_time, config.uid) plot_predictions(y_pred, y_test, plot_dir, target_port.name, start_time, training_type, td.base_port_name, config.uid) self.set_transfer(target_port.name, td.base_port_name, config.uid) del checkpoint, early, redonplat del X_train_orig, X_test_orig, y_train_orig, y_test_orig, model, X_train, y_train, X_test, y_test gc.collect() tf.keras.backend.clear_session() gc.collect() del X_ts, y_ts
def fit_and_netptune( build_model_fn, x_train, y_train, x_test, y_test, embeddings=None, batch_size=64, epochs=50, verbose=False, callbacks=None, use_neptune=False, use_wandb=True, run_meta_summary={}, run_meta_config={}, space={}, ): if use_neptune: try: project = neptune.init() except: pass # params = {} # for k in run_meta_config.keys(): # params[k] = run_meta_config[k] # for _, row in pd.io.json.json_normalize({'data':space['data']}).iterrows(): # for k in row.keys(): # params[k] = row[k] # for _, row in pd.io.json.json_normalize({'build_model':space['build_model']}).iterrows(): # for k in row.keys(): # params[k] = row[k] # for _, row in pd.io.json.json_normalize({'fit':space['fit']}).iterrows(): # for k in row.keys(): # params[k] = row[k] # for _, row in pd.io.json.json_normalize(kwargs).iterrows(): # for k in row.keys(): # params[k] = row[k] import copy build_model_fn_kwargs = copy.deepcopy(space["build_model"]["args"]) build_model_fn_kwargs["embedding_matrix_weights"] = embeddings[ space["data"]["embedding_matrix_weights"]] model = build_model_fn(**build_model_fn_kwargs) model.summary() total_count = model.count_params() non_trainable_count = count_params(model.non_trainable_weights) trainable_count = total_count - non_trainable_count # params['build_model_fn'] = build_model_fn.__name__ # params['batch_size'] = batch_size # params['epochs'] = epochs # if use_wandb: # wandb.init(project="pan20apdl", config=space, reinit=True, allow_val_change=True) if use_wandb: wandb.run.summary["build_model_hash_kwargs"] = hashlib.md5( json.dumps( { "build_model_fn": build_model_fn.__name__, "args": space["build_model"]["args"], }, sort_keys=True, ).encode()).hexdigest() wandb.run.summary["build_model_hash"] = hashlib.md5( json.dumps(space["build_model"], sort_keys=True).encode()).hexdigest() # print('build_model_hash_kwargs', wandb.run.summary['build_model_hash_kwargs']) # print('build_model_hash', wandb.run.summary['build_model_hash']) # print('params_total_count', total_count) # print('params_non_trainable_count', non_trainable_count) # print('params_trainable_count', trainable_count) wandb.run.summary["params_total_count"] = total_count wandb.run.summary["params_non_trainable_count"] = non_trainable_count wandb.run.summary["params_trainable_count"] = trainable_count for k in run_meta_summary.keys(): wandb.run.summary[k] = run_meta_summary[k] if use_neptune: neptune_aborted = None def stop_training(): nonlocal neptune_aborted neptune_aborted = True model.stop_training = True tags = [] # tags = [params['build_model_fn']] try: project.create_experiment( name="runner_qualified_name", params=params, upload_source_files=[], abort_callback=stop_training, tags=tags, ) except: pass # if use_wandb: # wandb.config.update(params, allow_val_change=True) # for k, v in params.items(): # setattr(wandb.config, k, v) print(space) cb = [] history = History() cb.append(history) if use_neptune: cb.append(NeptuneMonitor()) if use_wandb: cb.append( WandbCallback( save_model=False, validation_data=(x_test, y_test), monitor="val_accuracy", )) if callbacks: cb += callbacks model.fit( x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), callbacks=cb, verbose=verbose, ) if use_neptune: print("neptune_aborted", neptune_aborted) if not neptune_aborted: try: neptune.stop() except: pass # loss_tr, accuracy_tr = model.evaluate(x_train, y_train, verbose=verbose) # print("Training Accuracy: {:.4f}".format(accuracy_tr)) # loss_te, accuracy_te = model.evaluate(x_test, y_test, verbose=verbose) # print(loss_te, accuracy_te) # y_pred = model.predict(x_test) # y_pred = np.floor(y_pred * 2) y_pred = model.predict_classes(x_test, verbose=1) y_pred = np.squeeze(y_pred) accuracy_te = metrics.accuracy_score(y_test, y_pred) ( precisions_weighted, recalls_weighted, f_measures_weighted, support_weighted, ) = metrics.precision_recall_fscore_support(y_test, y_pred, average="weighted") print("Testing Accuracy: {:.4f}".format(accuracy_te)) # plot_history(history) # if wandb: # wandb.join() return ( model, (None, accuracy_te), (precisions_weighted, recalls_weighted, f_measures_weighted, support_weighted), )