def __init__(self, experiment_id, output_path='.', update_period=1, max_num_lines=1000, mongo_db='neuralnilm', mongo_host=None): """ Parameters ---------- max_num_lines : int Number of pixels. """ self.experiment_id = experiment_id self.output_path = output_path self.update_period = update_period self.max_num_lines = max_num_lines self._last_iteration_processed = {'train': 0, 'validation': 0} if mongo_host is None: self.mongo_host = config.get("MongoDB", "address") else: self.mongo_host = mongo_host self.mongo_client = pymongo.MongoClient(self.mongo_host) self.db = self.mongo_client[mongo_db] self.mongo_db = mongo_db self._validation_metric_names = None
def __init__(self, net, data_pipeline, experiment_id, mongo_host=None, mongo_db='neuralnilm', loss_func=squared_error, loss_aggregation_mode='mean', updates_func=nesterov_momentum, updates_func_kwards=None, learning_rates=None, callbacks=None, repeat_callbacks=None, epoch_callbacks=None, metrics=None, num_seqs_to_plot=8): """ Parameters ---------- experiment_id : list of strings concatenated together with an underscore. Defines output path mongo_host : Address of PyMongo database. See http://docs.mongodb.org/manual/reference/connection-string/ callbacks : list of 2-tuples (<iteration>, <function>) Function must accept a single argument: this Trainer object. repeat_callbacks : list of 2-tuples (<iteration>, <function>) Function must accept a single argument: this Trainer object. For example, to run validation every 100 iterations, set `repeat_callbacks=[(100, Trainer.validate)]`. epoch_callbacks : list of functions Functions are called at the end of each training epoch. metrics : neuralnilm.Metrics object Run during `Trainer.validation()` """ # Database if mongo_host is None: mongo_host = config.get("MongoDB", "address") mongo_client = MongoClient(mongo_host) self.db = mongo_client[mongo_db] # Training and validation state self.requested_learning_rates = ({ 0: 1E-2 } if learning_rates is None else learning_rates) self.experiment_id = "_".join(experiment_id) self._train_func = None self.metrics = metrics self.net = net self.data_pipeline = data_pipeline self.min_train_cost = float("inf") self.num_seqs_to_plot = num_seqs_to_plot # Check if this experiment already exists in database delete_or_quit = None if self.db.experiments.find_one({'_id': self.experiment_id}): delete_or_quit = input( "Database already has an experiment with _id == {}." " Should the old experiment be deleted (d)" " (both from the database and from disk)? Or quit (q)?" " Or append (a) '_try<i>` string to _id? [A/q/d] ".format( self.experiment_id)).lower() if delete_or_quit == 'd': logger.info("Deleting documents for old experiment.") self.db.experiments.delete_one({'_id': self.experiment_id}) for collection in COLLECTIONS: self.db[collection].delete_many( {'experiment_id': self.experiment_id}) elif delete_or_quit == 'q': raise KeyboardInterrupt() else: self.modification_since_last_try = input( "Enter a short description of what has changed since" " the last try: ") try_i = 2 while True: candidate_id = self.experiment_id + '_try' + str(try_i) if self.db.experiments.find_one({'_id': candidate_id}): try_i += 1 else: self.experiment_id = candidate_id logger.info("experiment_id set to {}".format( self.experiment_id)) break # Output path path_list = [config.get('Paths', 'output')] path_list += self.experiment_id.split('_') self.output_path = os.path.join(*path_list) try: os.makedirs(self.output_path) except OSError as os_error: if os_error.errno == 17: # file exists logger.info("Directory exists = '{}'".format(self.output_path)) if delete_or_quit == 'd': logger.info(" Deleting directory.") shutil.rmtree(self.output_path) os.makedirs(self.output_path) else: logger.info(" Re-using directory.") else: raise # Loss and updates def aggregated_loss_func(prediction, target, weights=None): loss = loss_func(prediction, target) return aggregate(loss, mode=loss_aggregation_mode, weights=weights) self.loss_func_name = loss_func.__name__ self.loss_func = aggregated_loss_func self.updates_func_name = updates_func.__name__ self.updates_func_kwards = none_to_dict(updates_func_kwards) self.updates_func = partial(updates_func, **self.updates_func_kwards) self.loss_aggregation_mode = loss_aggregation_mode # Learning rate # Set _learning_rate to -1 so when we set self.learning_rate # during the training loop, it will be logger correctly. self._learning_rate = theano.shared(sfloatX(-1), name='learning_rate') # Callbacks def callbacks_dataframe(lst): return pd.DataFrame(lst, columns=['iteration', 'function']) self.callbacks = callbacks_dataframe(callbacks) self.repeat_callbacks = callbacks_dataframe(repeat_callbacks) self.epoch_callbacks = none_to_list(epoch_callbacks)
#!/usr/bin/env python from __future__ import print_function, division from os.path import join import sys from neuralnilm.config import config import logging logger = logging.getLogger('neuralnilm') experiment_definition_path = config.get('Paths', 'experiment_definitions') job_list_filename = join(experiment_definition_path, 'job_list.txt') def main(): if experiment_definition_path not in sys.path: sys.path.insert(0, experiment_definition_path) next_job = _get_next_job() while next_job: try: _run_job(next_job) except KeyboardInterrupt: delete_this_job = raw_input( "Delete this job from job list [Y/n]? ").lower() if delete_this_job != "n": _delete_completed_job() continue_running = raw_input( "Continue running other experiments [N/y]? ").lower() if continue_running != "y": break else:
def __init__(self, net, data_pipeline, experiment_id, mongo_host=None, mongo_db='neuralnilm', loss_func=squared_error, loss_aggregation_mode='mean', updates_func=nesterov_momentum, updates_func_kwards=None, learning_rates=None, callbacks=None, repeat_callbacks=None, epoch_callbacks=None, metrics=None, num_seqs_to_plot=8): """ Parameters ---------- experiment_id : list of strings concatenated together with an underscore. Defines output path mongo_host : Address of PyMongo database. See http://docs.mongodb.org/manual/reference/connection-string/ callbacks : list of 2-tuples (<iteration>, <function>) Function must accept a single argument: this Trainer object. repeat_callbacks : list of 2-tuples (<iteration>, <function>) Function must accept a single argument: this Trainer object. For example, to run validation every 100 iterations, set `repeat_callbacks=[(100, Trainer.validate)]`. epoch_callbacks : list of functions Functions are called at the end of each training epoch. metrics : neuralnilm.Metrics object Run during `Trainer.validation()` """ # Database if mongo_host is None: mongo_host = config.get("MongoDB", "address") mongo_client = MongoClient(mongo_host) self.db = mongo_client[mongo_db] # Training and validation state self.requested_learning_rates = ( {0: 1E-2} if learning_rates is None else learning_rates) self.experiment_id = "_".join(experiment_id) self._train_func = None self.metrics = metrics self.net = net self.data_pipeline = data_pipeline self.min_train_cost = float("inf") self.num_seqs_to_plot = num_seqs_to_plot # Check if this experiment already exists in database delete_or_quit = None if self.db.experiments.find_one({'_id': self.experiment_id}): delete_or_quit = raw_input( "Database already has an experiment with _id == {}." " Should the old experiment be deleted (d)" " (both from the database and from disk)? Or quit (q)?" " Or append (a) '_try<i>` string to _id? [A/q/d] " .format(self.experiment_id)).lower() if delete_or_quit == 'd': logger.info("Deleting documents for old experiment.") self.db.experiments.delete_one({'_id': self.experiment_id}) for collection in COLLECTIONS: self.db[collection].delete_many( {'experiment_id': self.experiment_id}) elif delete_or_quit == 'q': raise KeyboardInterrupt() else: self.modification_since_last_try = raw_input( "Enter a short description of what has changed since" " the last try: ") try_i = 2 while True: candidate_id = self.experiment_id + '_try' + str(try_i) if self.db.experiments.find_one({'_id': candidate_id}): try_i += 1 else: self.experiment_id = candidate_id logger.info("experiment_id set to {}" .format(self.experiment_id)) break # Output path path_list = [config.get('Paths', 'output')] path_list += self.experiment_id.split('_') self.output_path = os.path.join(*path_list) try: os.makedirs(self.output_path) except OSError as os_error: if os_error.errno == 17: # file exists logger.info("Directory exists = '{}'".format(self.output_path)) if delete_or_quit == 'd': logger.info(" Deleting directory.") shutil.rmtree(self.output_path) os.makedirs(self.output_path) else: logger.info(" Re-using directory.") else: raise # Loss and updates def aggregated_loss_func(prediction, target, weights=None): loss = loss_func(prediction, target) return aggregate(loss, mode=loss_aggregation_mode, weights=weights) self.loss_func_name = loss_func.__name__ self.loss_func = aggregated_loss_func self.updates_func_name = updates_func.__name__ self.updates_func_kwards = none_to_dict(updates_func_kwards) self.updates_func = partial(updates_func, **self.updates_func_kwards) self.loss_aggregation_mode = loss_aggregation_mode # Learning rate # Set _learning_rate to -1 so when we set self.learning_rate # during the training loop, it will be logger correctly. self._learning_rate = theano.shared(sfloatX(-1), name='learning_rate') # Callbacks def callbacks_dataframe(lst): return pd.DataFrame(lst, columns=['iteration', 'function']) self.callbacks = callbacks_dataframe(callbacks) self.repeat_callbacks = callbacks_dataframe(repeat_callbacks) self.epoch_callbacks = none_to_list(epoch_callbacks)