def __init__(self, output_path, output_file_ext, reporter): self.output_path = output_path self.output_file_ext = output_file_ext self.reporter = reporter self.reporter.verbose("Ensure directory exists: \033[34m%s\033[0m" % (self.output_path)) util.ensure_dir(self.output_path) return None
def save(self, data): item = data[1] html = data[0] filename = item.get_output_path() new_directory = os.path.dirname(filename) if html == False: return False try: sig = signal('wranglerBeforeSaveItem') sig.send('item', item=item, path=filename) util.ensure_dir(new_directory) file_object = open(filename, "w") file_object.write(html.encode('utf8')) self.reporter.print_stdout(item.get_file_path(), filename, item.get_template()) item.on_save() siggy = signal('wranglerOnSaveItem') siggy.send('item', item=item, path=filename) except: print messages.file_write_error % (filename) traceback.print_exc() self.reporter.log_item_saved(item.get_file_path(), item.get_template(), 0) return False finally: self.reporter.log_item_saved(item.get_file_path(), item.get_template(), 1) return True
def save(self, data): item=data[1] html=data[0] filename = item.get_output_path() new_directory = os.path.dirname(filename) if html == False: return False try: sig = signal('wranglerBeforeSaveItem') sig.send('item', item=item, path=filename) util.ensure_dir(new_directory) file_object = open(filename, "w") file_object.write(html.encode('utf8')) self.reporter.print_stdout(item.get_file_path(), filename, item.get_template()) item.on_save() siggy = signal('wranglerOnSaveItem') siggy.send('item', item=item, path=filename) except: print messages.file_write_error % (filename) traceback.print_exc() self.reporter.log_item_saved(item.get_file_path(), item.get_template(), 0) return False finally: self.reporter.log_item_saved(item.get_file_path(), item.get_template(), 1) return True
def __init__(self, checkpoint_dir, monitor, logger, arch, save_best_only = True, best_model_name = None, epoch_model_name = None, mode='min', epoch_freq=1, best = None): self.monitor = monitor self.checkpoint_dir = checkpoint_dir self.save_best_only = save_best_only self.epoch_freq = epoch_freq self.arch = arch self.logger = logger self.best_model_name = best_model_name self.epoch_model_name = epoch_model_name self.use = 'on_epoch_end' # 计算模式 if mode == 'min': self.monitor_op = np.less self.best = np.Inf elif mode == 'max': self.monitor_op = np.greater self.best = -np.Inf # 这里主要重新加载模型时候 #对best重新赋值 if best: self.best = best ensure_dir(self.checkpoint_dir.format(arch = self.arch))
def init(self): # Setup the env self.env = Environment( undefined=SilentUndefined, loader=FileSystemLoader(self.config['templates_dir']), extensions=[ MarkdownExtension ], trim_blocks = True, lstrip_blocks = True ) self.env.filters["markdown"] = markdown_filter self.env.filters['rst'] = rst_filter if not os.path.exists(self.config['templates_dir']): self.reporter.log("Couldn't find %s. Check your templates exist, and your config in wrangler.yaml" % (self.config['templates_dir']), "red") exit() # Load up some custom, project specific filters # if "lib_path" in self.config and os.path.exists(self.config["lib_path"]): sig = signal("template_filter") results = sig.send("renderer", env=self.env, config=self.config) for filter_function in results: name = filter_function[1].__name__ self.env.filters[name] = filter_function[1] self.template_trees = {} self.template_modified_times = {} var_path = os.path.dirname(self.config['compiled_templates_file']) self.reporter.verbose("Ensure directory exists: \033[34m%s\033[0m" % (var_path)) util.ensure_dir(var_path) self.reporter.verbose("Loading templates from: \033[34m%s\033[0m" % (self.config['templates_dir'])) self.env.compile_templates( self.config['compiled_templates_file'], ignore_errors=False, filter_func=self.filter_hidden_files ) self.reporter.verbose("Compile templates to .zip: \033[32m%s\033[0m" % (self.config['compiled_templates_file']))
def init(self): # Setup the env self.env = Environment(undefined=SilentUndefined, loader=FileSystemLoader( self.config['templates_dir']), extensions=[MarkdownExtension], trim_blocks=True, lstrip_blocks=True) self.env.filters["markdown"] = markdown_filter self.env.filters['rst'] = rst_filter if not os.path.exists(self.config['templates_dir']): self.reporter.log( "Couldn't find %s. Check your templates exist, and your config in wrangler.yaml" % (self.config['templates_dir']), "red") exit() # Load up some custom, project specific filters # if "lib_path" in self.config and os.path.exists(self.config["lib_path"]): sig = signal("template_filter") results = sig.send("renderer", env=self.env, config=self.config) for filter_function in results: name = filter_function[1].__name__ self.env.filters[name] = filter_function[1] self.template_trees = {} self.template_modified_times = {} var_path = os.path.dirname(self.config['compiled_templates_file']) self.reporter.verbose("Ensure directory exists: \033[34m%s\033[0m" % (var_path)) util.ensure_dir(var_path) self.reporter.verbose("Loading templates from: \033[34m%s\033[0m" % (self.config['templates_dir'])) self.env.compile_templates(self.config['compiled_templates_file'], ignore_errors=False, filter_func=self.filter_hidden_files) self.reporter.verbose("Compile templates to .zip: \033[32m%s\033[0m" % (self.config['compiled_templates_file']))
def __init__(self, writer_dir, logger, enable): self.writer = None ensure_dir(writer_dir) if enable: log_path = writer_dir try: self.writer = importlib.import_module('tensorboardX').SummaryWriter(log_path) except ModuleNotFoundError: message = """TensorboardX visualization is configured to use, but currently not installed on this machine. Please install the package by 'pip install tensorboardx' command or turn off the option in the 'configs.json' file.""" warnings.warn(message, UserWarning) logger.warn(message) os.system('pip install tensorboardX') self.writer = importlib.import_module('tensorboardX').SummaryWriter(log_path) self.step = 0 self.mode = '' self.tensorboard_writer_ftns = ['add_scalar', 'add_scalars', 'add_image', 'add_audio', 'add_text', 'add_histogram', 'add_pr_curve', 'add_embedding']
def save_timeline(param_dict): trace = timeline.Timeline(step_stats=param_dict["run_metadata"].step_stats) ut.ensure_dir(param_dict["run_timelines_file"]) json_str = trace.generate_chrome_trace_format() json_obj_new = json.loads(json_str) try: with open(param_dict["run_timelines_file"], 'r') as f: json_obj_old = json.load(f) except Exception as e: # couldn't load file, so make a new one json_obj_old = {} json_obj_old["traceEvents"] = [] json_obj_new["traceEvents"] = (json_obj_old["traceEvents"] + json_obj_new["traceEvents"]) json_str = json.dumps(json_obj_new, indent=4) with open(param_dict["run_timelines_file"], 'w') as f: f.write(json_str)
def saveCanvas(can, name): """Saves Canvas in plots/ folder as pdf/*.pdf (and C/root/png). Arguments: can: TCanvas to be saved. name: Name of the files: e.g. name='Trigger' --> plots/pdf/Trigger.pdf'. """ ensure_dir("plots/C") ensure_dir("plots/pdf") ensure_dir("plots/png") ensure_dir("plots/root") can.SaveAs("plots/C/" + name + ".C") can.SaveAs("plots/png/" + name + ".png") can.SaveAs("plots/pdf/" + name + ".pdf") can.SaveAs("plots/root/" + name + ".root")
def run_fcnn_model(param_dict, generators, opts, opt_key, models, eta, pandas_save=True, save_weights=True, epoch_batch_size=10): assert (epoch_batch_size >= 1) model_key = "fcnn" if (param_dict["run_verbosity"] > 0): print("Using hidden size {} and optimizer {}...".format( param_dict["hidden_size"], opt_key)) # Set up the input to accept FMA images: inp = keras.layers.Input(shape=( 256, 256, 3, )) # Add a flatten layer to make the input play nicely with these non-convolutional # layers: x = keras.layers.Flatten()(inp) # Add a Flatten/Affine/BatchNorm/ReLU/Dropout/Affine-softmax-categorization block: predict = stack_two_layer_block(param_dict, x) # Construct the model: models[model_key] = keras.models.Model(inp, predict) # Compile the model models[model_key].compile(optimizer=opts[opt_key], **param_dict["compile_args"]) fcnn_pass_epochs = param_dict[ "pass_epochs"] * 6 # Because all the other networks # train in multiple passes # Train the model: timer.tic() run_started = timer.datetimestamp() if (param_dict["run_verbosity"] > 0): print(("Fully connected network run begun at {}." "\n\t[{} epochs on {} FMA on {} takes" "\n\t{}.]\n").format(run_started, fcnn_pass_epochs, param_dict["which_size"], param_dict["spu"].upper(), eta)) initial_epoch = 0 results = None # This loop of multiple checkpoints helps with memory management, which is # probably not necessary for FCNN but is included just in case - see also # http://bit.ly/2hDHJay for more information. while initial_epoch < fcnn_pass_epochs: # Split into "epoch_batch_size"-epoch training batches final_epoch = min(initial_epoch + epoch_batch_size, fcnn_pass_epochs) if (param_dict["run_verbosity"] > 0): print("\nTraining for epochs {} to {}...".format( initial_epoch + 1, final_epoch)) results_new = models[model_key].fit_generator( generators["train"], validation_data=generators["val"], verbose=param_dict["run_verbosity"], epochs=final_epoch, steps_per_epoch=param_dict["steps_per_epoch"], validation_steps=param_dict["validation_steps"], use_multiprocessing=True, initial_epoch=initial_epoch) # Merge these new results with existing results for previous batches: if results is not None: # Merge the two results lists: for key in results.history: results.history[key].extend(results_new.history[key]) else: results = results_new # Now start from where we stopped on this round initial_epoch = final_epoch runsec = timer.toc() # Create a new row for these results: if (pandas_save): new_res = pd.Series() assign_run_key(new_res, param_dict, run_started) assign_opt_key(new_res, opt_key) train_acc, val_acc = assign_results_history(new_res, model_key, runsec, results) # Add this to the results dataframe: try: fma_results = ut.load_obj(param_dict["fma_results_name"]) except: fma_results = pd.DataFrame(dtype=float, columns=RESULTS_COLS) fma_results = fma_results.append(new_res, ignore_index="True") # And save: ut.save_obj(fma_results, param_dict["fma_results_name"]) else: train_acc = results.history["categorical_accuracy"][-1] val_acc = results.history["val_categorical_accuracy"][-1] if (param_dict["run_verbosity"] > 0): print(("\n{} for {} to yield {:0.1%} training accuracy " "and {:0.1%} validation accuracy in {:d} \nepochs " "(x3 training phases).").format( timer.time_from_sec(runsec), param_dict["model_names"][model_key], train_acc, val_acc, param_dict["pass_epochs"])) # Save trained weights: if save_weights: weights_save_name = os.path.join( "saved_weights", "{}_{}_{}_{}.h5".format( model_key, formatted(opt_key), # 4 elements formatted(run_key(param_dict, run_started)), timer.datetimepath())) ut.ensure_dir(weights_save_name) models[model_key].save_weights(weights_save_name) if (param_dict["run_verbosity"] > 0): print("\nFully connected run complete at {}.".format( timer.datetimestamp())) # Tell keras to clear the the tensorflow backend session (helps with memory leaks; # see: http://bit.ly/2xJZbAt ) if param_dict[ "run_verbosity"] > 0: # i.e. this is not a short-running/crossval run-- # can't reset during crossval because tensorflow # will get cross about the optimizer being # created on a different graph... print("Clearing keras's backend Tensorflow session...\n") K.clear_session() if (pandas_save): return new_res
def run_pretrained_model(param_dict, generators, models, opts, model_class, model_key, opt_key, print_layers, freeze_to, eta, save_weights=True, epoch_batch_size=5): assert (epoch_batch_size >= 1) try: len(freeze_to) except: freeze_to = [freeze_to] # turn single elements into a one-item list print("Using optimizer {}...".format(opt_key)) timer.tic() run_started = timer.datetimestamp() print(("{} run begun at {}." "\n\t[{} epochs (x{} passes) on {} FMA on {} takes" "\n\t{}.]\n").format(param_dict["model_names"][model_key], run_started, param_dict["pass_epochs"], len(freeze_to) + 1, param_dict["which_size"], param_dict["spu"].upper(), eta)) # Adapted from https://keras.io/applications/ # Get the pre-trained base model, without the top layer (because our input is a # different shape), using the trained weights for ImageNet, to use as a starting # point: basemodel = model_class(include_top=False, input_shape=param_dict["mean_img"].shape, weights='imagenet') x = basemodel.output # Add a global spatial average pooling layer at the output for regularization and # to reduce overfitting: x = keras.layers.GlobalAveragePooling2D()(x) # Add Affine/BatchNorm/ReLU/Dropout/Affine-softmax-categorization block: predict = stack_two_layer_block(param_dict, x) # Now make the model: models[model_key] = keras.models.Model(basemodel.input, predict) # Train only the top layers (which were randomly initialized) while freezing # all convolutional layers (which were pretrained on ImageNet): for layer in basemodel.layers: layer.trainable = False # Compile the model (must be done after setting layer trainability): models[model_key].compile(optimizer=opts[opt_key], **param_dict["compile_args"]) # Train just the classifier for the requested number of epochs: print("First-round training (training the classifier)...") initial_epoch = 0 results = None # This loop of multiple checkpoints helps with memory management, esp. for VGG16/ # VGG19, which have a huge number of parameters - see also # http://bit.ly/2hDHJay for more information. while initial_epoch < param_dict["pass_epochs"]: # Split into "epoch_batch_size"-epoch training batches final_epoch = min(initial_epoch + epoch_batch_size, param_dict["pass_epochs"]) print("\nTraining for epochs {} to {}...".format( initial_epoch + 1, final_epoch)) results_new = models[model_key].fit_generator( generators["train"], validation_data=generators["val"], verbose=param_dict["run_verbosity"], epochs=final_epoch, steps_per_epoch=param_dict["steps_per_epoch"], validation_steps=param_dict["validation_steps"], use_multiprocessing=True, initial_epoch=initial_epoch) # Merge these new results with existing results for previous batches: if results is not None: # Merge the two results lists: for key in results.history: results.history[key].extend(results_new.history[key]) else: results = results_new # Now start from where we stopped on this round initial_epoch = final_epoch # At this point, the top layers are well trained and we can start fine-tuning # convolutional layers from Xception. We will freeze the bottom N layers # and train the remaining top layers. # Visualize layer names and layer indices to see how many layers we should freeze: if print_layers: for i, layer in enumerate(models[model_key].layers): print(i, layer.name) pass_num = 1 for freeze in freeze_to: pass_num += 1 # Freeze all layers up to the specified value; unfreeze everything # after (and including): for layer in models[model_key].layers[:freeze]: layer.trainable = False for layer in models[model_key].layers[freeze:]: layer.trainable = True # we need to recompile the model for these modifications to take effect # we use SGD with a low learning rate because SGD trains more slowly than RMSprop # (a good thing, in this case): models[model_key].compile(optimizer=keras.optimizers.SGD(lr=0.0001, momentum=0.9), **param_dict["compile_args"]) # Train again for the requested number of epochs: print(( "\n\nFurther training (refining convolutional blocks, starting with" "\n\tlayer {})...").format(freeze)) while initial_epoch < pass_num * param_dict["pass_epochs"]: # Split into "epoch_batch_size"-epoch training batches final_epoch = min(initial_epoch + epoch_batch_size, pass_num * param_dict["pass_epochs"]) print("\nTraining for epochs {} to {}...".format( initial_epoch + 1, final_epoch)) results_new = models[model_key].fit_generator( generators["train"], validation_data=generators["val"], verbose=param_dict["run_verbosity"], epochs=final_epoch, steps_per_epoch=param_dict["steps_per_epoch"], validation_steps=param_dict["validation_steps"], use_multiprocessing=True, initial_epoch=initial_epoch) # Merge these new results with existing results for previous batches: if results is not None: # Merge the two results lists: for key in results.history: results.history[key].extend(results_new.history[key]) else: results = results_new initial_epoch = final_epoch runsec = timer.toc() # Create a new row for these results: new_res = pd.Series() assign_run_key(new_res, param_dict, run_started) assign_opt_key(new_res, opt_key) train_acc, val_acc = assign_results_history(new_res, model_key, runsec, results) # Add this to the results dataframe: try: fma_results = ut.load_obj(param_dict["fma_results_name"]) except: fma_results = pd.DataFrame(dtype=float, columns=RESULTS_COLS) fma_results = fma_results.append(new_res, ignore_index=True) # And save: ut.save_obj(fma_results, param_dict["fma_results_name"]) print( ("\n{} for {} to yield {:0.1%} training accuracy " "and {:0.1%} validation accuracy in {:d} \nepochs " "(x{} training phases).").format(timer.time_from_sec(runsec), param_dict["model_names"][model_key], train_acc, val_acc, param_dict["pass_epochs"], len(freeze_to) + 1)) # Save trained weights: if save_weights: weights_save_name = os.path.join( "saved_weights", "{}_{}_{}_{}.h5".format( model_key, formatted(opt_key), # 4 elements formatted(run_key(param_dict, run_started)), timer.datetimepath())) ut.ensure_dir(weights_save_name) models[model_key].save_weights(weights_save_name) print("\n{} run complete at {}.".format( param_dict["model_names"][model_key], timer.datetimestamp())) # Tell keras to clear the the tensorflow backend session (helps with memory leaks; # see: http://bit.ly/2xJZbAt ) print("Clearing keras's backend Tensorflow session...\n") K.clear_session()
def set_up_generators(param_dict): print("Creating generators with batch size {}...".format( param_dict["batch_size"])) # Calculate the mean and std. dev. for the training set (slow, but we only have to # do it once): fma_stats_file = "fma_{}_{}_stats.npz".format(param_dict["which_size"], param_dict["which_wavelet"]) fma_stats_file = os.path.join("saved_objects", fma_stats_file) if os.path.exists(fma_stats_file): print(("Loading mean and standard deviation for the training set from " "file '{}'.\n").format(fma_stats_file)) with np.load(fma_stats_file) as data: param_dict["mean_img"] = data["mean_img"] param_dict["std_dev"] = data["std_dev"] else: ut.ensure_dir(fma_stats_file) print(("Calculating mean and standard deviation for the {} {} " "training set...").format(param_dict["which_size"], param_dict["which_wavelet"].upper()), end="") (param_dict["mean_img"], param_dict["std_dev"]) = calc_img_stats( os.path.join(param_dict["img_dir"], "train")) # Drop the alpha channel param_dict["mean_img"] = param_dict["mean_img"][:, :, :3] param_dict["std_dev"] = param_dict["std_dev"][:, :, :3] np.savez(fma_stats_file, mean_img=param_dict["mean_img"], std_dev=param_dict["std_dev"]) print("done!\n") # Then flow data from file system, using the mean and std. dev. calculated. # Unless param_dict["augmentation"] is > 0, we're not actually going to do any # real data augmentation, but we always want to subtract the mean and normalize: data_gen_args = dict(featurewise_center=True, featurewise_std_normalization=True) # If we are doing data augmentation, restrict the augmentation to transforms that # make sense for our *WT-of-audio files, i.e. time shift only: ## Only apply data augmentation (if it's being used) to training data: data_gen_args_aug = data_gen_args.copy() if param_dict["augmentation"] > 0: print("Using up to {:0.1%} horizontal shift to augment training data.". format(param_dict["augmentation"])) data_gen_args_aug["width_shift_range"] = param_dict["augmentation"] # horizontal (time) shift--not vertical # shift, which would be equivalent to # shifting the spectrum. data_gen_args_aug["fill_mode"] = "wrap" train_datagen = keras.preprocessing.image.ImageDataGenerator( **data_gen_args_aug) fit_from_directory(train_datagen, mean=param_dict["mean_img"], std=param_dict["std_dev"]) ## No augmentation for validation/testing data: val_datagen = keras.preprocessing.image.ImageDataGenerator(**data_gen_args) test_datagen = keras.preprocessing.image.ImageDataGenerator( **data_gen_args) # The test generator should generate the whole dataset test_gen_size = (800 if param_dict["which_size"] == "small" else 4651) # The fit_from_directory method gets around the fact that .fit only works for # numpy data, and even the small FMA data set is too large for local memory--it # sets the mean/std. dev. images manually: fit_from_directory(val_datagen, mean=param_dict["mean_img"], std=param_dict["std_dev"]) fit_from_directory(test_datagen, mean=param_dict["mean_img"], std=param_dict["std_dev"]) # Use the generators to flow data from the relevant directory train_image_generator = train_datagen.flow_from_directory( os.path.join(param_dict["img_dir"], 'train/'), class_mode="categorical", seed=param_dict["seed"], batch_size=param_dict["batch_size"]) val_image_generator = val_datagen.flow_from_directory( os.path.join(param_dict["img_dir"], 'validation/'), class_mode="categorical", seed=param_dict["seed"], batch_size=param_dict["batch_size"]) test_image_generator = test_datagen.flow_from_directory( os.path.join(param_dict["img_dir"], 'test/'), class_mode="categorical", seed=param_dict["seed"], # These are both necessary to make the test # generator see every image exactly once (it # only can generate images of the same class in a # single batch, so for the extended dataset, where # the classes aren't balanced and have odd numbers # of elements, it'll wrap on some classes and skip # elements on others if batch_size is > 1) shuffle=False, batch_size=1) return (train_image_generator, val_image_generator, test_image_generator)
def save_state(self, file_name): ensure_dir(file_name) if file_name: cPickle.dump(self, open(file_name, 'wb')) else: cPickle.dump(self, open(str(id(self)) + '.dat', 'wb'))