def evaluate_model(self, model_filename, imgs_validation, msks_validation): """ Evaluate the best model on the validation dataset """ model = K.models.load_model(model_filename, custom_objects=self.custom_objects) K.backend.set_learning_phase(0) start_inference = time.time() print("Evaluating model on test dataset. Please wait...") metrics = model.evaluate(imgs_validation, msks_validation, batch_size=self.batch_size, verbose=1) elapsed_time = time.time() - start_inference print("{} images in {:.2f} seconds => {:.3f} images per " "second inference".format( imgs_validation.shape[0], elapsed_time, imgs_validation.shape[0] / elapsed_time)) for idx, metric in enumerate(metrics): print("Test dataset {} = {:.4f}".format(model.metrics_names[idx], metric)) foundations.log_metric(model.metrics_names[idx], metric)
def start(self): self.current_epoch = 0 for epoch in range(self.num_epochs): self.current_epoch = epoch self.step() self.validate() if settings.USE_FOUNDATIONS: for key, value in self.history_best.items(): foundations.log_metric(key, float(value))
def on_epoch_end(self, epoch, logs={}): y_val = self.y_val datagen_val = DataGenerator(self.x_val, mode='test') y_pred = self.model.predict_generator(datagen_val, use_multiprocessing=False, max_queue_size=50) y_pred_labels = np.zeros((len(y_pred))) y_pred_labels[y_pred.flatten() > 0.5] = 1 self._val_f1 = f1_score(y_val, y_pred_labels.astype(int)) print(f"val_f1: {self._val_f1:.4f}") self.f1_score_value.append(self._val_f1) if self.save_model: if self._val_f1 >= max(self.f1_score_value): print("F1 score has improved. Saving model.") self.model.save(self.model_save_filename) try: foundations.log_metric('epoch_val_f1_score',self._val_f1) foundations.log_metric('best_f1_score', max(self.f1_score_value)) except Exception as e: print(e) return
def validate(self): self.eval() for inputs, labels, data in self.val_dl: loss, output = self.forward(inputs, labels) output = output.detach().cpu() self.meter_val.update(labels, output, loss.item()) dices, iou, loss = self.meter_val.log_metric() selection_metric = loss if selection_metric <= self.best_metric: self.best_metric = selection_metric print(f'>>> Saving best model metric={selection_metric:.4f}') checkpoint = {'model': self.model} torch.save(checkpoint, 'checkpoints/best_model.pth') if settings.USE_FOUNDATIONS: foundations.save_artifact('checkpoints/best_model.pth', key='best_model_checkpoint') foundations.log_metric("train_loss", float(np.mean(self.meter_train.losses))) foundations.log_metric("val_loss", float(loss)) foundations.log_metric("val_dice", float(dices[0])) foundations.log_metric("val_iou", float(iou)) try: inputs, labels, data = next(self.visual_iter) except: self.visual_iter = iter(self.val_dl) inputs, labels, data = next(self.visual_iter) _, output = self.forward(inputs, labels) output = torch.sigmoid(output.detach().cpu()) self.writer.add_images(f'validate/{self.current_epoch}_inputs.png', self.unnorm(inputs), self.current_epoch) self.writer.add_images(f'validate/{self.current_epoch}_mask.png', labels, self.current_epoch) self.writer.add_images(f'validate/{self.current_epoch}_predict.png', output, self.current_epoch) print(f'Epoch {self.current_epoch}: val loss={loss:.4f} | val iou={iou:.4f}')
import foundations from foundations_contrib.global_state import current_foundations_context, redis_connection foundations.log_metric('ugh', 10) with open('thomas_text.txt', 'w') as f: f.write('ugh_square') foundations.save_artifact('thomas_text.txt', 'just_some_artifact') foundations.log_param('blah', 20) redis_connection.set('foundations_testing_job_id', current_foundations_context().pipeline_context().job_id)
import numpy as np import foundations model_params = { 'num_freq_bin': 240, 'num_conv_blocks': 8, 'num_conv_filters': 32, 'spatial_dropout_fraction': 0.05, 'num_dense_layers': 1, 'num_dense_neurons': 50, 'dense_dropout': 0, 'learning_rate': 0.0001, 'epochs': 100, 'batch_size': 156, 'residual_con': 2, 'use_default': True, 'model_save_dir': 'fitted_objects' } for k, v in model_params.items(): foundations.log_param(k, v) train_accuracy = np.random.rand() foundations.log_metric("train_accuracy", train_accuracy) foundations.log_metric("val_accuracy", train_accuracy * 0.85) # foundations.save_artifact('visualize_inference_spectrogram.png', key='spectrogram')
def log_predictions_for_assertion(predictions): for prediction in predictions["Survived"]: foundations.log_metric("predictions", prediction)
import foundations import sys foundations.log_metric("Task", sys.argv[1])
import foundations from time import sleep foundations.log_metric("metric_int", 1) foundations.log_metric("metric_large_int", 8888888888888888888888888) foundations.log_metric("metric_list_of_ints", [1, 2]) foundations.log_metric("metric_long_list_of_ints", [ 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 ]) foundations.log_metric("metric_long_list_of_long_ints", [ 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888, 8888888888888888888888888 ]) foundations.log_metric("metric_mixed_type", 1) for i in range(20): foundations.log_metric("metric_repeat", i) sleep(.1)
from foundations import log_metric from foundations.global_state import redis_connection, current_foundations_context log_metric('hello', 1) log_metric('hello', 2) log_metric('world', 3)
def train(train_dl, val_base_dl, val_augment_dl, display_dl_iter, model, optimizer, n_epochs, max_lr, scheduler, criterion, train_source): records = Records() best_metric = 0. os.makedirs('checkpoints', exist_ok=True) for epoch in range(n_epochs): train_one_epoch(epoch, model, train_dl, max_lr, optimizer, criterion, scheduler, records) validate(model, val_base_dl, criterion, records, data_name='base') validate(model, val_augment_dl, criterion, records, data_name='augment') if train_source == 'both': selection_metric = [ getattr(records, 'base_val_accs')[-1], getattr(records, 'augment_val_accs')[-1] ] selection_metric = np.mean(selection_metric) else: selection_metric = getattr(records, f"{train_source}_val_accs")[-1] if selection_metric >= best_metric: print( f'>>> Saving best model metric={selection_metric:.4f} compared to previous best {best_metric:.4f}' ) checkpoint = { 'model': model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(checkpoint, 'checkpoints/best_model.pth') foundations.save_artifact('checkpoints/best_model.pth', key='pretrained_model_checkpoint') display_filename = f'{epoch}_display.png' display_predictions_on_image(model, val_base_dl.dataset.cached_path, display_dl_iter, name=display_filename) # Save eyeball plot to Atlas GUI foundations.save_artifact(display_filename, key=f'{epoch}_display') # Save metrics plot visualize_metrics(records, extra_metric=extra_metric, name='metrics.png') # Save metrics plot to Atlas GUI foundations.save_artifact('metrics.png', key='metrics_plot') # Log metrics to GUI if train_source == 'both': avg_metric = [ getattr(records, 'base_val_accs'), getattr(records, 'augment_val_accs') ] avg_metric = np.mean(avg_metric, axis=0) max_index = np.argmax(avg_metric) else: max_index = np.argmax(getattr(records, f'{train_source}_val_accs')) useful_metrics = records.get_metrics() for metric in useful_metrics: foundations.log_metric(metric, float(getattr(records, metric)[max_index]))
def add(x, y): result = x + y foundations.log_metric('Score', result) return result
import foundations foundations.log_metric('name', 'job1') deployment = foundations.submit(command=["job2.py"]) deployment.wait_for_deployment_to_complete(wait_seconds=10)
import foundations from time import sleep foundations.log_metric("metric_str", str(1.)) foundations.log_metric( "metric_long_str", "asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdf" ) foundations.log_metric("metric_long_list_of_str", [ "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe", "qwe",
def train(train_dl, val_dl, test_dl, val_dl_iter, model, optimizer, scheduler, criterion, params): n_epochs = params['n_epochs'] max_lr = params['max_lr'] val_rate = params['val_rate'] batch_repeat = params['batch_repeat'] records = Records() best_metric = 1e9 os.makedirs('checkpoints', exist_ok=True) for epoch in range(n_epochs): train_one_epoch(epoch, model, train_dl, max_lr, optimizer, criterion, scheduler, records, batch_repeat) if epoch % val_rate == 0: validate(model, val_dl, criterion, records) # validate(model, test_dl, criterion, records) selection_metric = getattr(records, "val_losses")[-1] if selection_metric <= best_metric: best_metric = selection_metric print( f'>>> Saving best model metric={selection_metric:.4f} compared to previous best {best_metric:.4f}' ) checkpoint = {'model': model} torch.save(checkpoint, 'checkpoints/best_model.pth') if settings.USE_FOUNDATIONS: foundations.save_artifact('checkpoints/best_model.pth', key='best_model_checkpoint') # Save eyeball plot to Atlas GUI if settings.USE_FOUNDATIONS: display_filename = f'{epoch}_display.png' try: data = next(val_dl_iter) except: val_dl_iter = iter(val_dl) data = next(val_dl_iter) # display_predictions_on_image(model, data, name=display_filename) # foundations.save_artifact(display_filename, key=f'{epoch}_display') # Save metrics plot visualize_metrics(records, extra_metric=extra_metric, name='metrics.png') # Save metrics plot to Atlas GUI if settings.USE_FOUNDATIONS: foundations.save_artifact('metrics.png', key='metrics_plot') # Log metrics to GUI max_index = np.argmin(getattr(records, 'val_losses')) useful_metrics = records.get_useful_metrics() for metric in useful_metrics: if settings.USE_FOUNDATIONS: foundations.log_metric(metric, float(getattr(records, metric)[max_index])) else: print(metric, float(getattr(records, metric)[max_index]))
def train(train_dl, val_dl, test_dl, val_dl_iter, model, optimizer, scheduler, criterion, params, train_sampler, val_sampler, rank): n_epochs = params['n_epochs'] max_lr = params['max_lr'] val_rate = params['val_rate'] batch_repeat = params['batch_repeat'] history_best = {} best_metric = 0 if rank == 0: os.makedirs('checkpoints', exist_ok=True) os.makedirs('tensorboard', exist_ok=True) if settings.USE_FOUNDATIONS: foundations.set_tensorboard_logdir('tensorboard') writer = SummaryWriter("tensorboard") else: writer = None for epoch in range(n_epochs): train_records = DistributedClassificationMeter(writer=writer, phase="train", epoch=epoch, workers=params["gpus"], criterion=criterion) if train_sampler: train_sampler.set_epoch(epoch) train_one_epoch(epoch, model, train_dl, max_lr, optimizer, criterion, scheduler, train_records, batch_repeat, rank, writer, params) if epoch % val_rate == 0: val_records = DistributedClassificationMeter( writer=writer, phase="validation", epoch=epoch, workers=params["gpus"], criterion=criterion) if val_sampler: val_sampler.set_epoch(epoch) validate(model, val_dl, criterion, val_records, rank) # 改的时候记得改大于小于啊!!! # aaaa记得改初始值啊 info = val_records.log_metric(write_scalar=False) selection_metric = info["acc"] if selection_metric >= best_metric and rank == 0: best_metric = selection_metric print( f'>>> Saving best model metric={selection_metric:.4f} compared to previous best {best_metric:.4f}' ) checkpoint = { 'model': model.module.state_dict(), 'params': params } history_best = { "train_" + key: value for key, value in train_records.get_metric().items() } for key, value in val_records.get_metric().items(): history_best["val_" + key] = value torch.save(checkpoint, 'checkpoints/best_model.pth') if settings.USE_FOUNDATIONS: foundations.save_artifact('checkpoints/best_model.pth', key='best_model_checkpoint') # Log metrics to GUI if rank == 0: for metric, value in history_best.items(): if settings.USE_FOUNDATIONS: foundations.log_metric(metric, float(value)) else: print(metric, float(value))
import foundations foundations.log_metric('hello', 20) foundations.set_tag('this_tag', value='this_value') foundations.set_tag('that_tag', value='that_value')
import foundations foundations.log_metric('int_metric', '5' * 5000)
import foundations from foundations import set_tag from model import * set_tag('model', 'cnn') def print_words(): print('Hello World!') print_words() addition_result = add(82, 2) set_tag('Loss', addition_result) subtraction_result = subtract(44, 2) foundations.log_metric('Accuracy', subtraction_result) cached_subtraction_result = subtract(44, 2) foundations.log_metric('Cached_accuracy', cached_subtraction_result)
def evaluate(self, xtrain, ytrain, xval, yval, num_examples=1): ytrain_pred = self.predict_labels(xtrain, raw_prob=True) yval_pred = self.predict_labels(xval, raw_prob=True) try: self.optimum_threshold_filename = f"model_threshold_{'_'.join(str(v) for k, v in model_params.items())}.npy" self.opt_threshold = np.load(os.path.join(f"{model_params['model_save_dir']}",self.optimum_threshold_filename)).item() print(f"loaded optimum threshold: {self.opt_threshold}") except: self.opt_threshold = 0.5 ytrain_pred_labels = self.get_labels_from_prob(ytrain_pred, threshold=self.opt_threshold) yval_pred_labels = self.get_labels_from_prob(yval_pred, threshold=self.opt_threshold) train_accuracy = accuracy_score(ytrain, ytrain_pred_labels) val_accuracy = accuracy_score(yval, yval_pred_labels) train_f1_score = f1_score(ytrain, ytrain_pred_labels) val_f1_score = f1_score(yval, yval_pred_labels) print (f"train accuracy: {train_accuracy}, train_f1_score: {train_f1_score}," f"val accuracy: {val_accuracy}, val_f1_score: {val_f1_score} ") try: foundations.log_metric('train_accuracy',np.round(train_accuracy,2)) foundations.log_metric('val_accuracy', np.round(val_accuracy,2)) foundations.log_metric('train_f1_score', np.round(train_f1_score,2)) foundations.log_metric('val_f1_score', np.round(val_f1_score,2)) foundations.log_metric('optimum_threshold', np.round(self.opt_threshold,2)) except Exception as e: print(e) # True Positive Example ind_tp = np.argwhere(np.equal((yval_pred_labels + yval).astype(int), 2)).reshape(-1, ) # True Negative Example ind_tn = np.argwhere(np.equal((yval_pred_labels + yval).astype(int), 0)).reshape(-1, ) # False Positive Example ind_fp =np.argwhere( np.greater(yval_pred_labels, yval)).reshape(-1, ) # False Negative Example ind_fn = np.argwhere(np.greater(yval, yval_pred_labels)).reshape(-1, ) path_to_save_spetrograms = './spectrograms' if not os.path.isdir(path_to_save_spetrograms): os.makedirs(path_to_save_spetrograms) specs_saved = os.listdir(path_to_save_spetrograms) if len(specs_saved)>0: for file_ in specs_saved: os.remove(os.path.join(path_to_save_spetrograms,file_)) ind_random_tp = np.random.choice(ind_tp, num_examples).reshape(-1,) tp_x = [xtrain[i] for i in ind_random_tp] ind_random_tn = np.random.choice(ind_tn, num_examples).reshape(-1,) tn_x = [xtrain[i] for i in ind_random_tn] ind_random_fp = np.random.choice(ind_fp, num_examples).reshape(-1,) fp_x = [xtrain[i] for i in ind_random_fp] ind_random_fn = np.random.choice(ind_fn, num_examples).reshape(-1,) fn_x = [xtrain[i] for i in ind_random_fn] print("Plotting spectrograms to show what the hell the model has learned") for i in range(num_examples): plot_spectrogram(tp_x[i], path=os.path.join(path_to_save_spetrograms, f'true_positive_{i}.png')) plot_spectrogram(tn_x[i], path=os.path.join(path_to_save_spetrograms,f'true_negative_{i}.png')) plot_spectrogram(fp_x[i], path=os.path.join(path_to_save_spetrograms,f'false_positive_{i}.png')) plot_spectrogram(fn_x[i], path=os.path.join(path_to_save_spetrograms,f'fale_negative_{i}.png')) try: foundations.save_artifact(os.path.join(path_to_save_spetrograms, f'true_positive_{i}.png'), key='true_positive_example') foundations.save_artifact(os.path.join(path_to_save_spetrograms,f'true_negative_{i}.png'), key='true_negative_example') foundations.save_artifact(os.path.join(path_to_save_spetrograms,f'false_positive_{i}.png'), key='false_positive_example') foundations.save_artifact(os.path.join(path_to_save_spetrograms,f'fale_negative_{i}.png'), key='false_negative_example') except Exception as e: print(e)
import foundations from time import sleep foundations.log_metric("metric_float", 1.) foundations.log_metric("metric_large_float", 999999999.8888888888888888) foundations.log_metric("metric_list_of_floats", [1., 2.]) foundations.log_metric("metric_long_list_of_floats", [1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., ]) foundations.log_metric("metric_long_list_of_long_floats", [999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888, 999999999.8888888888888888]) foundations.log_metric("metric_mixed_type", 2.222) for i in range(20): foundations.log_metric("metric_repeat", i/3.) sleep(.1)
def training_loop(iteration): """The main training loop encapsulated in a function.""" step = 0 epoch = 0 print("Running training loop") while True: sess.run(dataset.train_initializer) epoch += 1 # End training if we have passed the epoch limit. #if training_len[0] == 'epochs' and epoch > NUM_EPOCHS: #training_len[1]: if epoch > NUM_EPOCHS: #training_len[1]: break start_time = time.time() # One training epoch. print("Epoch: {} out of {}".format(epoch, NUM_EPOCHS)) #training_len[1])) while True: try: step += 1 # End training if we have passed the step limit. # training_len = ('iterations', 50000) if training_len[ 0] == 'iterations' and step > training_len[1]: return # Train. step_time = time.time() records = sess.run([ optimize, model.loss, model.targets, model.outputs, model.inputs ] + model.train_summaries, {dataset.handle: train_handle})[1:] loss, targets, outputs, inputs = records[0], records[ 1], records[2], records[3] records = records[4:] record_summaries(step, records, train_file) #print(step) if step % 10 == 0: logger.info( "Step {} - Loss: {} - Time per step: {}".format( step, loss, time.time() - step_time)) collect_test_summaries(step) except tf.errors.OutOfRangeError: break logger.info("Time for epoch: {}".format(time.time() - start_time)) outputs = output_to_rgb(outputs) targets = output_to_rgb(targets) inputs_artifact_path = save_image( inputs, 'inputs_{}'.format(iteration) + '.png') targets_artifact_path = save_image( targets, 'targets_{}'.format(iteration) + '.png') outputs_artifact_path = save_image( outputs, 'outputs_{}'.format(iteration) + '.png') tensorboard_path = 'lottery_ticket/{}/unet/summaries/'.format( iteration) tensorboard_file = os.path.join(tensorboard_path, os.listdir(tensorboard_path)[0]) f9s.save_artifact(tensorboard_file, 'tensorboard_{}'.format(iteration)) f9s.log_metric('loss_{}'.format(iteration), float(loss)) f9s.save_artifact(inputs_artifact_path, 'inputs_{}'.format(iteration)) f9s.save_artifact(targets_artifact_path, 'targets_{}'.format(iteration)) f9s.save_artifact(outputs_artifact_path, 'outputs_{}'.format(iteration)) # End of epoch handling. return
import foundations foundations.log_metric('key', 'value') foundations.set_tag('key', value='value') foundations.log_param('param', 'param_value') print('Hello World!')
import foundations print("I should not print out because the worker image is invalid") foundations.log_metric("Worker", "invalid image")
params = load_parameters() seed_everything(params['seed']) log_params(params) params = parse_params(params) print(params) model = CIFAR_Module(params).cuda() lr_logger = LearningRateLogger() logger = TensorBoardLogger("../logs", name=params["backbone"]) if USE_FOUNDATIONS: from foundations import set_tensorboard_logdir set_tensorboard_logdir(f'../logs/{params["backbone"]}') checkpoint_callback = ModelCheckpoint(save_top_k=1, monitor='acc', prefix=str(params["seed"])) t_params = get_trainer_params(params) trainer = Trainer(callbacks=[lr_logger], logger=logger, checkpoint_callback=checkpoint_callback, **t_params) trainer.fit(model) if USE_FOUNDATIONS and checkpoint_callback.best_model_path != "": from foundations import log_metric, save_artifact save_artifact(checkpoint_callback.best_model_path, key='best_model_checkpoint') log_metric("val_acc", float(checkpoint_callback.best_model_score)) print("Training finished")
import foundations import json params = foundations.load_parameters() foundations.log_metric('how_i_lern', params['learning_rate']) foundations.log_metric('first_boi', params['layers'][0]['neurons']) foundations.log_metric('second_boi', params['layers'][1]['neurons'])
""" This sample main.py shows basic Atlas functionality. In this script, we will log some arbitrary values & artifacts that can be viewed in the Atlas GUI """ import foundations depth = 3 epochs = 5 batch_size = 256 lrate = 1e-3 # Log some hyper-parameters foundations.log_param('depth', depth) foundations.log_params({'epochs': epochs, 'batch_size': batch_size, 'learning_rate': lrate}) # Log some metrics accuracy = 0.9 loss = 0.1 foundations.log_metric('accuracy', accuracy) foundations.log_metric('loss', loss) # Log an artifact that is already saved to disk foundations.save_artifact('README.txt', 'Project_README')
def get_and_log_python_path_as_metric(): import sys python_path = sys.executable foundations.log_metric("python_path", python_path)
import foundations from foundations import set_tag from foundations_contrib.global_state import current_foundations_job from model import * set_tag('model', 'cnn') def print_words(): print(f'Job \'{current_foundations_job().job_id}\' deployed') print('Hello World!') print_words() addition_result = add(82, 2) set_tag('Loss', addition_result) subtraction_result = subtract(44, 2) foundations.log_metric('Accuracy', subtraction_result)
def experiment(make_dataset, make_model, train_model, prune_masks, iterations, presets=None): """Run the lottery ticket experiment for the specified number of iterations. Args: make_dataset: A function that, when called with no arguments, will create an object that descends from dataset_base. make_model: A function that, when called with four arguments (input_tensor, label_tensor, presets, masks), creates a model object that descends from model_base. Presets and masks are optional. train_model: A function that, when called with four arguments (session, pruning iteration number, dataset, model), trains the model using the dataset and returns the model's initial and final weights as dictionaries. prune_masks: A function that, when called with two arguments (dictionary of current masks, dictionary of final weights), returns a new dictionary of masks that have been pruned. Each dictionary key is the name of a tensor in the network; each value is a numpy array containing the values of the tensor (1/0 values for mask, weights for the dictionary of final weights). iterations: The number of pruning iterations to perform. presets: (optional) The presets to use for the first iteration of training. In the form of a dictionary where each key is the name of a tensor and each value is a numpy array of the values to which that tensor should be initialized. """ # A helper function that trains the network once according to the behavior # determined internally by the train_model function. logger.info('Training once according to the base model behaviour') def train_once(iteration, presets=None, masks=None): tf.reset_default_graph() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) dataset = make_dataset() input_tensor, label_tensor = dataset.placeholders model = make_model(input_tensor, label_tensor, presets=presets, masks=masks) return train_model(sess, iteration, dataset, model) # Run once normally. initial, final = train_once(0, presets=presets) logger.info('Create the initial masks with no weights pruned.') masks = {} for k, v in initial.items(): masks[k] = np.ones(v.shape) logger.info('Begin the training loop.') for iteration in range(1, iterations + 1): logger.info('Prune the network, iteration {}'.format(iteration)) masks = prune_masks(masks, final) num_weights = int(sum([v.sum() for v in masks.values()])) f9s.log_metric('num_weights_{}'.format(iteration), num_weights) logger.info('Train the network again after pruning') _, final = train_once(iteration, presets=initial, masks=masks)