Exemple #1
0
 def __init__(self,
              experiment_id,
              output_path='.',
              update_period=1,
              max_num_lines=1000,
              mongo_db='neuralnilm',
              mongo_host=None):
     """
     Parameters
     ----------
     max_num_lines : int
         Number of pixels.
     """
     self.experiment_id = experiment_id
     self.output_path = output_path
     self.update_period = update_period
     self.max_num_lines = max_num_lines
     self._last_iteration_processed = {'train': 0, 'validation': 0}
     if mongo_host is None:
         self.mongo_host = config.get("MongoDB", "address")
     else:
         self.mongo_host = mongo_host
     self.mongo_client = pymongo.MongoClient(self.mongo_host)
     self.db = self.mongo_client[mongo_db]
     self.mongo_db = mongo_db
     self._validation_metric_names = None
Exemple #2
0
 def __init__(self, experiment_id, output_path='.',
              update_period=1, max_num_lines=1000,
              mongo_db='neuralnilm',
              mongo_host=None):
     """
     Parameters
     ----------
     max_num_lines : int
         Number of pixels.
     """
     self.experiment_id = experiment_id
     self.output_path = output_path
     self.update_period = update_period
     self.max_num_lines = max_num_lines
     self._last_iteration_processed = {'train': 0, 'validation': 0}
     if mongo_host is None:
         self.mongo_host = config.get("MongoDB", "address")
     else:
         self.mongo_host = mongo_host
     self.mongo_client = pymongo.MongoClient(self.mongo_host)
     self.db = self.mongo_client[mongo_db]
     self.mongo_db = mongo_db
     self._validation_metric_names = None
Exemple #3
0
    def __init__(self,
                 net,
                 data_pipeline,
                 experiment_id,
                 mongo_host=None,
                 mongo_db='neuralnilm',
                 loss_func=squared_error,
                 loss_aggregation_mode='mean',
                 updates_func=nesterov_momentum,
                 updates_func_kwards=None,
                 learning_rates=None,
                 callbacks=None,
                 repeat_callbacks=None,
                 epoch_callbacks=None,
                 metrics=None,
                 num_seqs_to_plot=8):
        """
        Parameters
        ----------
        experiment_id : list of strings
            concatenated together with an underscore.
            Defines output path
        mongo_host : Address of PyMongo database.
            See http://docs.mongodb.org/manual/reference/connection-string/
        callbacks : list of 2-tuples (<iteration>, <function>)
            Function must accept a single argument: this Trainer object.
        repeat_callbacks : list of 2-tuples (<iteration>, <function>)
            Function must accept a single argument: this Trainer object.
            For example, to run validation every 100 iterations, set
            `repeat_callbacks=[(100, Trainer.validate)]`.
        epoch_callbacks : list of functions
            Functions are called at the end of each training epoch.
        metrics : neuralnilm.Metrics object
            Run during `Trainer.validation()`
        """
        # Database
        if mongo_host is None:
            mongo_host = config.get("MongoDB", "address")
        mongo_client = MongoClient(mongo_host)
        self.db = mongo_client[mongo_db]

        # Training and validation state
        self.requested_learning_rates = ({
            0: 1E-2
        } if learning_rates is None else learning_rates)
        self.experiment_id = "_".join(experiment_id)
        self._train_func = None
        self.metrics = metrics
        self.net = net
        self.data_pipeline = data_pipeline
        self.min_train_cost = float("inf")
        self.num_seqs_to_plot = num_seqs_to_plot

        # Check if this experiment already exists in database
        delete_or_quit = None
        if self.db.experiments.find_one({'_id': self.experiment_id}):
            delete_or_quit = input(
                "Database already has an experiment with _id == {}."
                " Should the old experiment be deleted (d)"
                " (both from the database and from disk)? Or quit (q)?"
                " Or append (a) '_try<i>` string to _id? [A/q/d] ".format(
                    self.experiment_id)).lower()
            if delete_or_quit == 'd':
                logger.info("Deleting documents for old experiment.")
                self.db.experiments.delete_one({'_id': self.experiment_id})
                for collection in COLLECTIONS:
                    self.db[collection].delete_many(
                        {'experiment_id': self.experiment_id})
            elif delete_or_quit == 'q':
                raise KeyboardInterrupt()
            else:
                self.modification_since_last_try = input(
                    "Enter a short description of what has changed since"
                    " the last try: ")
                try_i = 2
                while True:
                    candidate_id = self.experiment_id + '_try' + str(try_i)
                    if self.db.experiments.find_one({'_id': candidate_id}):
                        try_i += 1
                    else:
                        self.experiment_id = candidate_id
                        logger.info("experiment_id set to {}".format(
                            self.experiment_id))
                        break

        # Output path
        path_list = [config.get('Paths', 'output')]
        path_list += self.experiment_id.split('_')
        self.output_path = os.path.join(*path_list)
        try:
            os.makedirs(self.output_path)
        except OSError as os_error:
            if os_error.errno == 17:  # file exists
                logger.info("Directory exists = '{}'".format(self.output_path))
                if delete_or_quit == 'd':
                    logger.info("  Deleting directory.")
                    shutil.rmtree(self.output_path)
                    os.makedirs(self.output_path)
                else:
                    logger.info("  Re-using directory.")
            else:
                raise

        # Loss and updates
        def aggregated_loss_func(prediction, target, weights=None):
            loss = loss_func(prediction, target)
            return aggregate(loss, mode=loss_aggregation_mode, weights=weights)

        self.loss_func_name = loss_func.__name__
        self.loss_func = aggregated_loss_func
        self.updates_func_name = updates_func.__name__
        self.updates_func_kwards = none_to_dict(updates_func_kwards)
        self.updates_func = partial(updates_func, **self.updates_func_kwards)
        self.loss_aggregation_mode = loss_aggregation_mode

        # Learning rate
        # Set _learning_rate to -1 so when we set self.learning_rate
        # during the training loop, it will be logger correctly.
        self._learning_rate = theano.shared(sfloatX(-1), name='learning_rate')

        # Callbacks
        def callbacks_dataframe(lst):
            return pd.DataFrame(lst, columns=['iteration', 'function'])

        self.callbacks = callbacks_dataframe(callbacks)
        self.repeat_callbacks = callbacks_dataframe(repeat_callbacks)
        self.epoch_callbacks = none_to_list(epoch_callbacks)
#!/usr/bin/env python
from __future__ import print_function, division
from os.path import join
import sys
from neuralnilm.config import config

import logging
logger = logging.getLogger('neuralnilm')

experiment_definition_path = config.get('Paths', 'experiment_definitions')
job_list_filename = join(experiment_definition_path, 'job_list.txt')


def main():
    if experiment_definition_path not in sys.path:
        sys.path.insert(0, experiment_definition_path)

    next_job = _get_next_job()
    while next_job:
        try:
            _run_job(next_job)
        except KeyboardInterrupt:
            delete_this_job = raw_input(
                "Delete this job from job list [Y/n]? ").lower()
            if delete_this_job != "n":
                _delete_completed_job()
            continue_running = raw_input(
                "Continue running other experiments [N/y]? ").lower()
            if continue_running != "y":
                break
        else:
Exemple #5
0
    def __init__(self, net, data_pipeline, experiment_id,
                 mongo_host=None,
                 mongo_db='neuralnilm',
                 loss_func=squared_error,
                 loss_aggregation_mode='mean',
                 updates_func=nesterov_momentum,
                 updates_func_kwards=None,
                 learning_rates=None,
                 callbacks=None,
                 repeat_callbacks=None,
                 epoch_callbacks=None,
                 metrics=None,
                 num_seqs_to_plot=8):
        """
        Parameters
        ----------
        experiment_id : list of strings
            concatenated together with an underscore.
            Defines output path
        mongo_host : Address of PyMongo database.
            See http://docs.mongodb.org/manual/reference/connection-string/
        callbacks : list of 2-tuples (<iteration>, <function>)
            Function must accept a single argument: this Trainer object.
        repeat_callbacks : list of 2-tuples (<iteration>, <function>)
            Function must accept a single argument: this Trainer object.
            For example, to run validation every 100 iterations, set
            `repeat_callbacks=[(100, Trainer.validate)]`.
        epoch_callbacks : list of functions
            Functions are called at the end of each training epoch.
        metrics : neuralnilm.Metrics object
            Run during `Trainer.validation()`
        """
        # Database
        if mongo_host is None:
            mongo_host = config.get("MongoDB", "address")
        mongo_client = MongoClient(mongo_host)
        self.db = mongo_client[mongo_db]

        # Training and validation state
        self.requested_learning_rates = (
            {0: 1E-2} if learning_rates is None else learning_rates)
        self.experiment_id = "_".join(experiment_id)
        self._train_func = None
        self.metrics = metrics
        self.net = net
        self.data_pipeline = data_pipeline
        self.min_train_cost = float("inf")
        self.num_seqs_to_plot = num_seqs_to_plot

        # Check if this experiment already exists in database
        delete_or_quit = None
        if self.db.experiments.find_one({'_id': self.experiment_id}):
            delete_or_quit = raw_input(
                "Database already has an experiment with _id == {}."
                " Should the old experiment be deleted (d)"
                " (both from the database and from disk)? Or quit (q)?"
                " Or append (a) '_try<i>` string to _id? [A/q/d] "
                .format(self.experiment_id)).lower()
            if delete_or_quit == 'd':
                logger.info("Deleting documents for old experiment.")
                self.db.experiments.delete_one({'_id': self.experiment_id})
                for collection in COLLECTIONS:
                    self.db[collection].delete_many(
                        {'experiment_id': self.experiment_id})
            elif delete_or_quit == 'q':
                raise KeyboardInterrupt()
            else:
                self.modification_since_last_try = raw_input(
                    "Enter a short description of what has changed since"
                    " the last try: ")
                try_i = 2
                while True:
                    candidate_id = self.experiment_id + '_try' + str(try_i)
                    if self.db.experiments.find_one({'_id': candidate_id}):
                        try_i += 1
                    else:
                        self.experiment_id = candidate_id
                        logger.info("experiment_id set to {}"
                                    .format(self.experiment_id))
                        break

        # Output path
        path_list = [config.get('Paths', 'output')]
        path_list += self.experiment_id.split('_')
        self.output_path = os.path.join(*path_list)
        try:
            os.makedirs(self.output_path)
        except OSError as os_error:
            if os_error.errno == 17:  # file exists
                logger.info("Directory exists = '{}'".format(self.output_path))
                if delete_or_quit == 'd':
                    logger.info("  Deleting directory.")
                    shutil.rmtree(self.output_path)
                    os.makedirs(self.output_path)
                else:
                    logger.info("  Re-using directory.")
            else:
                raise

        # Loss and updates
        def aggregated_loss_func(prediction, target, weights=None):
            loss = loss_func(prediction, target)
            return aggregate(loss, mode=loss_aggregation_mode, weights=weights)
        self.loss_func_name = loss_func.__name__
        self.loss_func = aggregated_loss_func
        self.updates_func_name = updates_func.__name__
        self.updates_func_kwards = none_to_dict(updates_func_kwards)
        self.updates_func = partial(updates_func, **self.updates_func_kwards)
        self.loss_aggregation_mode = loss_aggregation_mode

        # Learning rate
        # Set _learning_rate to -1 so when we set self.learning_rate
        # during the training loop, it will be logger correctly.
        self._learning_rate = theano.shared(sfloatX(-1), name='learning_rate')

        # Callbacks
        def callbacks_dataframe(lst):
            return pd.DataFrame(lst, columns=['iteration', 'function'])
        self.callbacks = callbacks_dataframe(callbacks)
        self.repeat_callbacks = callbacks_dataframe(repeat_callbacks)
        self.epoch_callbacks = none_to_list(epoch_callbacks)