def collect_results(config, destination, experiment_name=None, use_integration=False): print(utils.colored("Collecting Results")) if use_integration: config = INT_CONFIG_PATH if experiment_name is None: experiments = hcnn.driver.Driver.available_experiments(config) else: experiments = [experiment_name] results = [] for experiment_name in experiments: if not use_integration and "integrationtest" in experiment_name: continue elif (use_integration and "integrationtest" not in experiment_name): continue print("Collecting experiment", utils.colored(experiment_name, 'cyan')) driver = hcnn.driver.Driver(config, experiment_name=experiment_name, load_features=False, skip_load_dataset=True) results.append(driver.collect_results(destination)) return all(results)
def find_best_model(self): """Perform model selection on the validation set with a binary search for minimum validation loss. (Bayesean optimization might be another approach?) Parameters ---------- validation_df : pd.DataFrame Name of the held out dataset (used to specify the valid file) Returns ------- results_df : pd.DataFrame DataFrame containing the resulting losses. """ logger.info("Finding best model for {}".format( utils.colored(self.experiment_name, "magenta"))) # Commenting out skipping a previous model selection for exisitng file. # if not self.check_features_input(): # logger.error("find_best_model features missing invalid.") # return False validation_df = self.valid_set.to_df() # load all necessary config parameters from the ORIGINAL config original_config = C.Config.load(self._experiment_config_path) validation_error_file = os.path.join( self._cv_model_dir, original_config['experiment/validation_loss']) slicer = get_slicer_from_feature(self.feature_mode) t_len = original_config['training/t_len'] # if not os.path.exists(validation_error_file): model_files = glob.glob( os.path.join(self._params_dir, "params*.npz")) if len(model_files) > 0: result_df, best_model = MS.CompleteLinearWeightedF1Search( model_files, validation_df, slicer, t_len, show_progress=True)() result_df.to_pickle(validation_error_file) best_path = os.path.join(self._params_dir, original_config['experiment/best_params']) shutil.copyfile(best_model['model_file'], best_path) else: logger.warn(utils.colored( "No param files exist yet; did you skip training without " "running this model yet?", "red")) result_df = pd.DataFrame() # else: # logger.info("Model Search already done; printing previous results") # result_df = pd.read_pickle(validation_error_file) # # make sure model_iteration is an int so sorting makes sense. # result_df["model_iteration"].apply(int) # logger.info("\n{}".format( # result_df.sort_values("model_iteration"))) return result_df
def load_dataset(self, dataset=None, load_features=True): """Load the selected dataset in specified in the config file. Parameters ---------- load_features : bool If true, tries to load the features version of the dataset, else just loads the original specified version. """ # Always start by loading the dataset. if dataset: logger.info("load_dataset() - Using dataset passed as a parameter") # If it's a str, it's a path. if isinstance(dataset, str): self.dataset = hcnn.data.dataset.Dataset.load( dataset, data_root=self.data_root) elif isinstance(dataset, hcnn.data.dataset.Dataset): self.dataset = dataset else: logger.info( utils.colored("load_dataset() - loading from {}".format( self.dataset_index))) self.dataset = hcnn.data.dataset.Dataset.load( self.dataset_index, data_root=self.data_root) logger.info(utils.colored("load_dataset() ... complete")) assert len(self.dataset) > 0 # If we want the features, additionally add it to the dataset. if load_features: logger.info(utils.colored("load_dataset() - extracting features.")) self.dataset = self.extract_features()
def find_best_model(self): """Perform model selection on the validation set with a binary search for minimum validation loss. (Bayesean optimization might be another approach?) Parameters ---------- validation_df : pd.DataFrame Name of the held out dataset (used to specify the valid file) Returns ------- results_df : pd.DataFrame DataFrame containing the resulting losses. """ logger.info("Finding best model for {}".format( utils.colored(self.experiment_name, "magenta"))) # Commenting out skipping a previous model selection for exisitng file. # if not self.check_features_input(): # logger.error("find_best_model features missing invalid.") # return False validation_df = self.valid_set.to_df() # load all necessary config parameters from the ORIGINAL config original_config = C.Config.load(self._experiment_config_path) validation_error_file = os.path.join( self._cv_model_dir, original_config['experiment/validation_loss']) slicer = get_slicer_from_feature(self.feature_mode) t_len = original_config['training/t_len'] # if not os.path.exists(validation_error_file): model_files = glob.glob(os.path.join(self._params_dir, "params*.npz")) if len(model_files) > 0: result_df, best_model = MS.CompleteLinearWeightedF1Search( model_files, validation_df, slicer, t_len, show_progress=True)() result_df.to_pickle(validation_error_file) best_path = os.path.join(self._params_dir, original_config['experiment/best_params']) shutil.copyfile(best_model['model_file'], best_path) else: logger.warn( utils.colored( "No param files exist yet; did you skip training without " "running this model yet?", "red")) result_df = pd.DataFrame() # else: # logger.info("Model Search already done; printing previous results") # result_df = pd.read_pickle(validation_error_file) # # make sure model_iteration is an int so sorting makes sense. # result_df["model_iteration"].apply(int) # logger.info("\n{}".format( # result_df.sort_values("model_iteration"))) return result_df
def load_dataset(self, dataset=None, load_features=True): """Load the selected dataset in specified in the config file. Parameters ---------- load_features : bool If true, tries to load the features version of the dataset, else just loads the original specified version. """ # Always start by loading the dataset. if dataset: logger.info("load_dataset() - Using dataset passed as a parameter") # If it's a str, it's a path. if isinstance(dataset, str): self.dataset = hcnn.data.dataset.Dataset.load( dataset, data_root=self.data_root) elif isinstance(dataset, hcnn.data.dataset.Dataset): self.dataset = dataset else: logger.info(utils.colored( "load_dataset() - loading from {}".format(self.dataset_index))) self.dataset = hcnn.data.dataset.Dataset.load( self.dataset_index, data_root=self.data_root) logger.info(utils.colored("load_dataset() ... complete")) assert len(self.dataset) > 0 # If we want the features, additionally add it to the dataset. if load_features: logger.info(utils.colored("load_dataset() - extracting features.")) self.dataset = self.extract_features()
def analyze(self, predictions, model_iter): logger.info("Evaluating experient {} with params from {}".format( utils.colored(self.experiment_name, "magenta"), utils.colored(model_iter, "cyan"))) analyzer = hcnn.evaluate.analyze.PredictionAnalyzer(predictions) analysis_path = self._format_analysis_fn(model_iter) logger.info("Saving analysis to:".format(analysis_path)) analyzer.save(analysis_path) return os.path.exists(analysis_path)
def analyze(self, predictions, model_iter): logger.info("Evaluating experient {} with params from {}".format( utils.colored(self.experiment_name, "magenta"), utils.colored(model_iter, "cyan"))) analyzer = hcnn.evaluate.analyze.PredictionAnalyzer(predictions) analysis_path = self._format_analysis_fn(model_iter) logger.info("Saving analysis to:".format(analysis_path)) analyzer.save(analysis_path) return os.path.exists(analysis_path)
def analyze(master_config, experiment_name, select_epoch=None): """Predict results on all datasets and report results. Parameters ---------- master_config : str experiment_name : str Name of the experiment. Files are saved in a folder of this name. dataset : str Dataset to select results from for analysis. select_epoch : str or None Which model params to select. Use the epoch number for this, for instance "1830" would use the model file "params1830.npz". If None, uses "final.npz" """ print(utils.colored("Analyzing")) config = C.Config.load(master_config) hold_out_set = config["experiment/hold_out_set"] driver = hcnn.driver.Driver(config, experiment_name=experiment_name, load_features=True) driver.analyze(select_epoch, hold_out_set) return 0
def extract_features(self): """Extract CQTs from all files collected in collect.""" if self.skip_features: logger.info(utils.colored("--skip_features selected; " "loading from the constructed dataframe instead.")) updated_ds = self.load_existing_features() else: logger.info(utils.colored("Extracting features.")) updated_ds = hcnn.data.cqt.cqt_from_dataset( self.dataset, self.feature_dir, **self.config["features/cqt"]) if updated_ds is not None and \ len(updated_ds) == len(self.dataset): updated_ds.save(self.feature_ds_path) return updated_ds
def run_experiment(model_name, config, experiment_root=None, skip_features=False, skip_training=False): """Run an experiment using the specified input feature Parameters ---------- model_name : str Name of the NN model configuration [in models.py]. """ logger.info("run_experiment(model_name='{}')".format(model_name)) config = C.Config.load(config) experiment_name = "{}{}".format( "{}_".format(experiment_root) if experiment_root else "", model_name) logger.info("Running Experiment: {}".format( utils.colored(experiment_name, 'magenta'))) driver = hcnn.driver.Driver(config, model_name=model_name, experiment_name=experiment_name, load_features=True, skip_features=skip_features, skip_training=skip_training, skip_cleaning=skip_training) result = driver.fit_and_predict_cross_validation() return result
def print_stats(self): dataset_df = self.dataset.to_df() datasets = ["rwc", "uiowa", "philharmonia"] def print_datasetcount(dataset): print("{:<20} {:<30}".format( "{} count".format(dataset), len(dataset_df[dataset_df["dataset"] == dataset]))) for dataset in datasets: print_datasetcount(dataset) def print_dataset_instcount(df, instrument): inst_filter = df[df["instrument"] == instrument] print("{:<20} {:<30} {:<30} {:<30}".format( "{} count".format(instrument), len(inst_filter[inst_filter["dataset"] == "rwc"]), len(inst_filter[inst_filter["dataset"] == "uiowa"]), len(inst_filter[inst_filter["dataset"] == "philharmonia"]))) classmap = hcnn.common.labels.InstrumentClassMap() print("---------------------------") print("Datasets-Instrument count / dataset") print("---------------------------") print( utils.colored("{:<20} {:<30} {:<30} {:<30}".format( "item", "rwc", "uiowa", "philharmonia"))) for inst in sorted(dataset_df["instrument"].unique()): if inst in classmap.allnames: print_dataset_instcount(dataset_df, inst)
def print_stats(self): dataset_df = self.dataset.to_df() datasets = ["rwc", "uiowa", "philharmonia"] def print_datasetcount(dataset): print("{:<20} {:<30}".format( "{} count".format(dataset), len(dataset_df[dataset_df["dataset"] == dataset]))) for dataset in datasets: print_datasetcount(dataset) def print_dataset_instcount(df, instrument): inst_filter = df[df["instrument"] == instrument] print("{:<20} {:<30} {:<30} {:<30}".format( "{} count".format(instrument), len(inst_filter[inst_filter["dataset"] == "rwc"]), len(inst_filter[inst_filter["dataset"] == "uiowa"]), len(inst_filter[inst_filter["dataset"] == "philharmonia"]))) classmap = hcnn.common.labels.InstrumentClassMap() print("---------------------------") print("Datasets-Instrument count / dataset") print("---------------------------") print(utils.colored("{:<20} {:<30} {:<30} {:<30}".format( "item", "rwc", "uiowa", "philharmonia"))) for inst in sorted(dataset_df["instrument"].unique()): if inst in classmap.allnames: print_dataset_instcount(dataset_df, inst)
def predict(config, experiment_name, test_set, model_name, select_epoch=None): """Predict results on all datasets and report results. Parameters ---------- config : str experiment_name : str Name of the experiment. Files are saved in a folder of this name. model_name : str Name of the model to use for training. Must match the training configuration. select_epoch : str or None Which model params to select. Use the epoch number for this, for instance "1830" would use the model file "params1830.npz". If None, uses "final.npz" """ print(utils.colored("Evaluating")) config = C.Config.load(config) driver = hcnn.driver.Driver(config, model_name=model_name, experiment_name=experiment_name, load_features=True) results = driver.predict(select_epoch) logger.info("Generated results for {} files.".format(len(results)))
def train(config, experiment_name, test_set, model_name): """Run training loop. Parameters ---------- config : str Full path experiment_name : str Name of the experiment. Files are saved in a folder of this name. test_set : str String in ["rwc", "uiowa", "philharmonia"] specifying which dataset to use as the test set. model_name : str Name of the model to use for training. """ print(utils.colored("Training experiment: {}".format(experiment_name))) logger.info("Training model '{}' with test_set '{}'" .format(model_name, test_set)) driver = hcnn.driver.Driver(config, test_set, model_name=model_name, experiment_name=experiment_name, load_features=True) return driver.train_model()
def fit_and_predict(config, experiment_name, test_set, model_name): """Runs: - train - model_selection_df - predict - analyze """ run_name = "fit_and_predict:{}:{}:{}".format( experiment_name, test_set, model_name) config = C.Config.load(config) print(utils.colored("Running {} end-to-end.".format(run_name))) timer = utils.TimerHolder() timer.start(run_name) logger.debug("Running model={} with experiment_name={} at {}" .format(model_name, experiment_name, timer.get_start(run_name))) driver = hcnn.driver.Driver(config, model_name=model_name, experiment_name=experiment_name, load_features=True) result = driver.fit_and_predict_one(test_set) print("{} - {} complted in duration {}".format( run_name, utils.result_colored(result), timer.end(run_name))) return result
def extract_features(self): """Extract CQTs from all files collected in collect.""" if self.skip_features: logger.info( utils.colored( "--skip_features selected; " "loading from the constructed dataframe instead.")) updated_ds = self.load_existing_features() else: logger.info(utils.colored("Extracting features.")) updated_ds = hcnn.data.cqt.cqt_from_dataset( self.dataset, self.feature_dir, **self.config["features/cqt"]) if updated_ds is not None and \ len(updated_ds) == len(self.dataset): updated_ds.save(self.feature_ds_path) return updated_ds
def extract_features(master_config): """Extract CQTs from all files collected in collect.""" config = C.Config.load(master_config) print(utils.colored("Extracting CQTs from note audio.")) driver = hcnn.driver.Driver(config, load_features=False) result = driver.extract_features() print("Extraction {}".format(utils.result_colored(result))) return result
def integration_test(config): """AKA "model" test. This is equivalent to running python manage.py -c data/integrationtest_config.yaml run_all_experiments """ # Load integrationtest config experiment_name = "integrationtest" print(utils.colored("Extracting features from tinydata set.")) print(utils.colored( "Running integration test on tinydata set : {}." .format(config))) # Begin by cleaning the feature data result = clean(config, force=True) if result: result = run_all_experiments(config, experiment_root=experiment_name) print("IntegrationTest Result: {}".format(utils.result_colored(result))) return result
def predict(self, model_iter): """Generates a prediction for *all* files, and writes them to disk as a dataframe. If features_df_override, replace the features_df with this dataframe (for testing) """ if not self.check_features_input(): logger.error("predict - features missing.") return False logger.info("Evaluating experient {} with params from iter {}".format( utils.colored(self.experiment_name, "magenta"), utils.colored(model_iter, "cyan"))) selected_param_file = self._format_params_fn(model_iter) original_config = C.Config.load(self._experiment_config_path) params_file = os.path.join(self._params_dir, selected_param_file) slicer = get_slicer_from_feature(self.feature_mode) logger.info("Deserializing Network & Params...") model = models.NetworkManager.deserialize_npz(params_file) dataset_df = self.dataset.to_df() logger.debug("Predicting across {} files.".format( len(dataset_df['cqt'].nonzero()[0]))) predictions_df_path = self._format_predictions_fn(model_iter) t_len = original_config['training/t_len'] logger.info("Running evaluation on all files...") predictions_df = hcnn.evaluate.predict.predict_many(dataset_df, model, slicer, t_len, show_progress=True) predictions_df.to_pickle(predictions_df_path) return predictions_df
def model_search(self): """Do a model search with binary search. Returns ------- results : pandas.DataFrame selected_model : dict or pandas.Series Containing with keys: model_file model_iteration mean_loss mean_acc f1_weighted """ results = {} # Don't allow the zero index; We should never select an # untrained model! index = 1 if len(self.param_list) > 0 else 0 end_ind = len(self.param_list) - 1 logger.info("Linear Model Search from:{} to:{} [total #: {}]".format( utils.filebase(self.param_list[index]), utils.filebase(self.param_list[end_ind]), len(self.param_list) - 1)) # kinda hacky, but it'll do for now. increment_amount = int(np.round(min(max(10**(np.log10( len(self.param_list)) - 1), 1), 25))) while index < end_ind: logger.info("Evaluating {}".format( utils.filebase(self.param_list[index]))) if index not in results: model = self.param_list[index] results[index] = self.evaluate_model(model) index += increment_amount results_df = pandas.DataFrame.from_dict(results, orient='index') selected_index = results_df['f1_weighted'].idxmax() # Now select the one with the lowest score logger.info( utils.colored("Selected model index:{} / params: {}".format( selected_index, utils.filebase(self.param_list[selected_index])))) logger.info("For reference, here's the model selection results:") logger.info("\n{}".format(results_df.to_string())) return results_df, results[selected_index]
def predict(self, model_iter): """Generates a prediction for *all* files, and writes them to disk as a dataframe. If features_df_override, replace the features_df with this dataframe (for testing) """ if not self.check_features_input(): logger.error("predict - features missing.") return False logger.info("Evaluating experient {} with params from iter {}".format( utils.colored(self.experiment_name, "magenta"), utils.colored(model_iter, "cyan"))) selected_param_file = self._format_params_fn(model_iter) original_config = C.Config.load(self._experiment_config_path) params_file = os.path.join(self._params_dir, selected_param_file) slicer = get_slicer_from_feature(self.feature_mode) logger.info("Deserializing Network & Params...") model = models.NetworkManager.deserialize_npz(params_file) dataset_df = self.dataset.to_df() logger.debug("Predicting across {} files.".format( len(dataset_df['cqt'].nonzero()[0]))) predictions_df_path = self._format_predictions_fn(model_iter) t_len = original_config['training/t_len'] logger.info("Running evaluation on all files...") predictions_df = hcnn.evaluate.predict.predict_many( dataset_df, model, slicer, t_len, show_progress=True) predictions_df.to_pickle(predictions_df_path) return predictions_df
def train_model(self): """ Train a model, writing intermediate params to disk. Trains for max_iterations or max_time, whichever is fewer. [Specified in the config.] """ if self.skip_training: logger.info(utils.colored("--skip_training specified - skipping")) return True assert hasattr(self, 'train_set') and hasattr(self, 'valid_set') logger.info("Starting training for experiment: {}".format( self.experiment_name)) # Save the config we used in the model directory, just in case. self.config.save(self._experiment_config_path) # Duration parameters max_iterations = self.config['training/max_iterations'] max_time = self.config['training/max_time'] # in seconds # Collect various necessary parameters t_len = self.config['training/t_len'] batch_size = self.config['training/batch_size'] n_targets = self.config['training/n_targets'] logger.debug( "Hyperparams:\nt_len: {}\nbatch_size: {}\n" "n_targets: {}\nmax_iterations: {}\nmax_time: {}s or {}h".format( t_len, batch_size, n_targets, max_iterations, max_time, (max_time / 60. / 60.))) slicer = get_slicer_from_feature(self.feature_mode) # Set up our streamer logger.info("[{}] Setting up streamer".format(self.experiment_name)) slice_logger = utils.SliceLogger() streamer = streams.InstrumentStreamer( self.train_set.to_df(), slicer, slicer_kwargs={'slice_logger': slice_logger}, t_len=t_len, batch_size=batch_size) # create our model logger.info("[{}] Setting up model: {}".format(self.experiment_name, self.model_definition)) network_def = getattr(models, self.model_definition)(t_len, n_targets) model = models.NetworkManager(network_def) iter_print_freq = self.config.get('training/iteration_print_frequency', None) iter_write_freq = self.config.get('training/iteration_write_frequency', None) timers = utils.TimerHolder() iter_count = 0 train_stats = pd.DataFrame( columns=['timestamp', 'batch_train_dur', 'iteration', 'loss']) min_train_loss = np.inf timers.start("train") logger.info("[{}] Beginning training loop at {}".format( self.experiment_name, timers.get("train"))) try: timers.start(("stream", iter_count)) for batch in streamer: timers.end(("stream", iter_count)) timers.start(("batch_train", iter_count)) loss = model.train(batch) timers.end(("batch_train", iter_count)) row = dict(timestamp=timers.get_end( ("batch_train", iter_count)), batch_train_dur=timers.get( ("batch_train", iter_count)), iteration=iter_count, loss=loss) train_stats.loc[len(train_stats)] = row # Time Logging logger.debug("[Iter timing] iter: {} | loss: {} | " "stream: {} | train: {}".format( iter_count, loss, timers.get(("stream", iter_count)), timers.get(("batch_train", iter_count)))) # Print status if iter_print_freq and (iter_count % iter_print_freq == 0): mean_train_loss = \ train_stats["loss"][-iter_print_freq:].mean() output_str = ("Iteration: {} | Mean_Train_loss: {}".format( iter_count, utils.conditional_colored(mean_train_loss, min_train_loss))) # On some small probability, do a randomly sampled # validation so we can see approximately how we're doing # on the validation set. if np.random.random() < .3: timers.start(("sampled_validation", iter_count)) valid_loss = self.sampled_validation_loss( model, slicer, t_len) output_str += " | Sampled_Valid_loss: {:0.4f}".format( valid_loss) timers.end(("sampled_validation", iter_count)) output_str += " | Val_time: {:0.2f}s".format( timers.get(("sampled_validation", iter_count)).total_seconds()) logger.info(output_str) min_train_loss = min(mean_train_loss, min_train_loss) # Print the mean times for the last n frames logger.debug( "Mean stream time: {}, Mean train time: {}".format( timers.mean("stream", iter_count - iter_print_freq, iter_count), timers.mean("batch_train", iter_count - iter_print_freq, iter_count))) # save model, maybe if iter_write_freq and (iter_count % iter_write_freq == 0): save_path = os.path.join( self._params_dir, self.param_format_str.format(iter_count)) logger.debug("Writing params to {}".format(save_path)) model.save(save_path) slice_log = os.path.join(self._cv_model_dir, "slice_log.csv") slice_logger.save(slice_log) if datetime.datetime.now() > \ (timers.get("train") + datetime.timedelta( seconds=max_time)): raise EarlyStoppingException("Max Time reached") iter_count += 1 timers.start(("stream", iter_count)) # Stopping conditions if (iter_count >= max_iterations): raise EarlyStoppingException("Max Iterations Reached") except KeyboardInterrupt: logger.warn(utils.colored("Training Cancelled", "red")) print("User cancelled training at epoch:", iter_count) except EarlyStoppingException as e: logger.warn( utils.colored("Training Stopped for {}".format(e), "red")) print("Training halted for: ", e) timers.end("train") # Print final training loss logger.info("Total iterations: {}".format(iter_count)) logger.info("Trained for {}".format(timers.get("train"))) logger.info("Final training loss: {}".format( train_stats["loss"].iloc[-1])) # Make sure to save the final iteration's model. save_path = os.path.join(self._params_dir, self.param_format_str.format(iter_count)) model.save(save_path) logger.info("Completed training for experiment: {}".format( self.experiment_name)) # Save training loss logger.info("Writing training stats to {}".format( self._training_loss_path)) train_stats.to_pickle(self._training_loss_path) # We need these files for models election, so make sure they exist return os.path.exists(self._training_loss_path)
def train_model(self): """ Train a model, writing intermediate params to disk. Trains for max_iterations or max_time, whichever is fewer. [Specified in the config.] """ if self.skip_training: logger.info(utils.colored("--skip_training specified - skipping")) return True assert hasattr(self, 'train_set') and hasattr(self, 'valid_set') logger.info("Starting training for experiment: {}".format( self.experiment_name)) # Save the config we used in the model directory, just in case. self.config.save(self._experiment_config_path) # Duration parameters max_iterations = self.config['training/max_iterations'] max_time = self.config['training/max_time'] # in seconds # Collect various necessary parameters t_len = self.config['training/t_len'] batch_size = self.config['training/batch_size'] n_targets = self.config['training/n_targets'] logger.debug("Hyperparams:\nt_len: {}\nbatch_size: {}\n" "n_targets: {}\nmax_iterations: {}\nmax_time: {}s or {}h" .format(t_len, batch_size, n_targets, max_iterations, max_time, (max_time / 60. / 60.))) slicer = get_slicer_from_feature(self.feature_mode) # Set up our streamer logger.info("[{}] Setting up streamer".format(self.experiment_name)) slice_logger = utils.SliceLogger() streamer = streams.InstrumentStreamer( self.train_set.to_df(), slicer, slicer_kwargs={'slice_logger': slice_logger}, t_len=t_len, batch_size=batch_size) # create our model logger.info("[{}] Setting up model: {}".format(self.experiment_name, self.model_definition)) network_def = getattr(models, self.model_definition)(t_len, n_targets) model = models.NetworkManager(network_def) iter_print_freq = self.config.get( 'training/iteration_print_frequency', None) iter_write_freq = self.config.get( 'training/iteration_write_frequency', None) timers = utils.TimerHolder() iter_count = 0 train_stats = pd.DataFrame(columns=['timestamp', 'batch_train_dur', 'iteration', 'loss']) min_train_loss = np.inf timers.start("train") logger.info("[{}] Beginning training loop at {}".format( self.experiment_name, timers.get("train"))) try: timers.start(("stream", iter_count)) for batch in streamer: timers.end(("stream", iter_count)) timers.start(("batch_train", iter_count)) loss = model.train(batch) timers.end(("batch_train", iter_count)) row = dict(timestamp=timers.get_end( ("batch_train", iter_count)), batch_train_dur=timers.get( ("batch_train", iter_count)), iteration=iter_count, loss=loss) train_stats.loc[len(train_stats)] = row # Time Logging logger.debug("[Iter timing] iter: {} | loss: {} | " "stream: {} | train: {}".format( iter_count, loss, timers.get(("stream", iter_count)), timers.get(("batch_train", iter_count)))) # Print status if iter_print_freq and (iter_count % iter_print_freq == 0): mean_train_loss = \ train_stats["loss"][-iter_print_freq:].mean() output_str = ("Iteration: {} | Mean_Train_loss: {}" .format(iter_count, utils.conditional_colored( mean_train_loss, min_train_loss))) # On some small probability, do a randomly sampled # validation so we can see approximately how we're doing # on the validation set. if np.random.random() < .3: timers.start(("sampled_validation", iter_count)) valid_loss = self.sampled_validation_loss( model, slicer, t_len) output_str += " | Sampled_Valid_loss: {:0.4f}".format( valid_loss) timers.end(("sampled_validation", iter_count)) output_str += " | Val_time: {:0.2f}s".format( timers.get(( "sampled_validation", iter_count)).total_seconds()) logger.info(output_str) min_train_loss = min(mean_train_loss, min_train_loss) # Print the mean times for the last n frames logger.debug("Mean stream time: {}, Mean train time: {}" .format( timers.mean( "stream", iter_count - iter_print_freq, iter_count), timers.mean( "batch_train", iter_count - iter_print_freq, iter_count))) # save model, maybe if iter_write_freq and (iter_count % iter_write_freq == 0): save_path = os.path.join( self._params_dir, self.param_format_str.format(iter_count)) logger.debug("Writing params to {}".format(save_path)) model.save(save_path) slice_log = os.path.join(self._cv_model_dir, "slice_log.csv") slice_logger.save(slice_log) if datetime.datetime.now() > \ (timers.get("train") + datetime.timedelta( seconds=max_time)): raise EarlyStoppingException("Max Time reached") iter_count += 1 timers.start(("stream", iter_count)) # Stopping conditions if (iter_count >= max_iterations): raise EarlyStoppingException("Max Iterations Reached") except KeyboardInterrupt: logger.warn(utils.colored("Training Cancelled", "red")) print("User cancelled training at epoch:", iter_count) except EarlyStoppingException as e: logger.warn( utils.colored("Training Stopped for {}".format(e), "red")) print("Training halted for: ", e) timers.end("train") # Print final training loss logger.info("Total iterations: {}".format(iter_count)) logger.info("Trained for {}".format(timers.get("train"))) logger.info("Final training loss: {}".format( train_stats["loss"].iloc[-1])) # Make sure to save the final iteration's model. save_path = os.path.join( self._params_dir, self.param_format_str.format(iter_count)) model.save(save_path) logger.info("Completed training for experiment: {}".format( self.experiment_name)) # Save training loss logger.info("Writing training stats to {}".format( self._training_loss_path)) train_stats.to_pickle( self._training_loss_path) # We need these files for models election, so make sure they exist return os.path.exists(self._training_loss_path)