Beispiel #1
0
    def _restore(self, path):
        """Restores this estimator from given path.

        Note: will rebuild the graph and initialize all parameters,
        and will ignore provided model.

        Args:
            path: Path to checkpoints and other information.
        """
        # Currently Saver requires absolute path to work correctly.
        path = os.path.abspath(path)

        self._graph = tf.Graph()
        with self._graph.as_default():
            endpoints_filename = os.path.join(path, 'endpoints')
            if not os.path.exists(endpoints_filename):
                raise ValueError("Restore folder doesn't contain endpoints.")
            with open(endpoints_filename) as foutputs:
                endpoints = foutputs.read().split('\n')
            graph_filename = os.path.join(path, 'graph.pbtxt')
            if not os.path.exists(graph_filename):
                raise ValueError("Restore folder doesn't contain graph definition.")
            with open(graph_filename) as fgraph:
                graph_def = tf.GraphDef()
                text_format.Merge(fgraph.read(), graph_def)
                (self._inp, self._out,
                 self._model_predictions, self._model_loss) = tf.import_graph_def(
                     graph_def, name='', return_elements=endpoints)
            saver_filename = os.path.join(path, 'saver.pbtxt')
            if not os.path.exists(saver_filename):
                raise ValueError("Restore folder doesn't contain saver defintion.")
            with open(saver_filename) as fsaver:
                saver_def = tf.python.training.saver_pb2.SaverDef()
                text_format.Merge(fsaver.read(), saver_def)
                self._saver = tf.train.Saver(saver_def=saver_def)

            # Restore trainer
            self._global_step = self._graph.get_tensor_by_name('global_step:0')
            trainer_op = self._graph.get_operation_by_name('train')
            self._trainer = RestoredTrainer(
                self._model_loss, self._global_step, trainer_op)

            # Restore summaries.
            self._summaries = self._graph.get_operation_by_name('MergeSummary/MergeSummary')

            # Restore session.
            self._session = tf.Session(
                self.tf_master,
                config=ConfigAddon(verbose=self.verbose).config)
            checkpoint_path = tf.train.latest_checkpoint(path)
            if checkpoint_path is None:
                raise ValueError("Missing checkpoint files in the %s. Please "
                                 "make sure you are you have checkpoint file that describes "
                                 "latest checkpoints and appropriate checkpoints are there. "
                                 "If you have moved the folder, you at this point need to "
                                 "update manually update the paths in the checkpoint file." % path)
            self._saver.restore(self._session, checkpoint_path)
        # Set to be initialized.
        self._initialized = True
Beispiel #2
0
    def _setup_training(self):
        """Sets up graph, model and trainer."""
        self._graph = tf.Graph()
        self._graph.add_to_collection("IS_TRAINING", True)
        with self._graph.as_default():
            tf.set_random_seed(self.tf_random_seed)
            self._global_step = tf.Variable(
                0, name="global_step", trainable=False)

            # Setting up input and output placeholders.
            input_shape = [None] + self._data_feeder.input_shape[1:]
            output_shape = [None] + self._data_feeder.output_shape[1:]
            self._inp = tf.placeholder(
                tf.as_dtype(self._data_feeder.input_dtype), input_shape,
                name="input")
            self._out = tf.placeholder(
                tf.as_dtype(self._data_feeder.output_dtype), output_shape,
                name="output")

            # If class weights are provided, add them to the graph.
            # Different loss functions can use this tensor by name.
            if self.class_weight:
                self._class_weight_node = tf.constant(
                    self.class_weight, name='class_weight')

            # Add histograms for X and y if they are floats.
            if self._data_feeder.input_dtype in (np.float32, np.float64):
                tf.histogram_summary("X", self._inp)
            if self._data_feeder.output_dtype in (np.float32, np.float64):
                tf.histogram_summary("y", self._out)

            # Create model's graph.
            self._model_predictions, self._model_loss = self.model_fn(
                self._inp, self._out)

            # Create summary to monitor loss
            tf.scalar_summary("loss", self._model_loss)

            # Set up a single operator to merge all the summaries
            self._summaries = tf.merge_all_summaries()

            # Create trainer and augment graph with gradients and optimizer.
            # Additionally creates initialization ops.
            self._trainer = TensorFlowTrainer(
                loss=self._model_loss, global_step=self._global_step,
                optimizer=self.optimizer, learning_rate=self.learning_rate)

            # Create model's saver capturing all the nodes created up until now.
            self._saver = tf.train.Saver(
                max_to_keep=self.max_to_keep,
                keep_checkpoint_every_n_hours=self.keep_checkpoint_every_n_hours)

            # Enable monitor to create validation data dict with appropriate tf placeholders
            self._monitor.create_val_feed_dict(self._inp, self._out)

            # Create session to run model with.
            if self.config_addon is None:
                self.config_addon = ConfigAddon(verbose=self.verbose)
            self._session = tf.Session(self.tf_master, config=self.config_addon.config)
Beispiel #3
0
    def _setup_training(self):
        """Sets up graph, model and trainer."""
        self._graph = tf.Graph()
        self._graph.add_to_collection("IS_TRAINING", True)
        with self._graph.as_default():
            tf.set_random_seed(self.tf_random_seed)
            self._global_step = tf.Variable(
                0, name="global_step", trainable=False)

            # Setting up input and output placeholders.
            input_shape = [None] + self._data_feeder.input_shape[1:]
            output_shape = [None] + self._data_feeder.output_shape[1:]
            self._inp = tf.placeholder(
                tf.as_dtype(self._data_feeder.input_dtype), input_shape,
                name="input")
            self._out = tf.placeholder(
                tf.as_dtype(self._data_feeder.output_dtype), output_shape,
                name="output")

            # Add histograms for X and y if they are floats.
            if self._data_feeder.input_dtype in (np.float32, np.float64):
                tf.histogram_summary("X", self._inp)
            if self._data_feeder.output_dtype in (np.float32, np.float64):
                tf.histogram_summary("y", self._out)

            # Create model's graph.
            # pdb.set_trace()
            self._model_predictions, self._model_loss = self.model_fn(
                self._inp, self._out)

            # Create summary to monitor loss
            tf.scalar_summary("loss", self._model_loss)

            # Set up a single operator to merge all the summaries
            self._summaries = tf.merge_all_summaries()

            with tf.name_scope("test") as scope:
                if self.n_classes > 0:
                    correct_prediction = tf.equal(tf.argmax(self._out,1), tf.argmax(self._model_predictions,1))
                    self._cv_accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
                    self._cv_accuracy_summary = tf.scalar_summary("cv accuracy", self._cv_accuracy)

            # Create trainer and augment graph with gradients and optimizer.
            # Additionally creates initialization ops.
            self._trainer = TensorFlowTrainer(
                loss=self._model_loss, global_step=self._global_step,
                optimizer=self.optimizer, learning_rate=self.learning_rate)

            # Create model's saver capturing all the nodes created up until now.
            self._saver = tf.train.Saver(
                max_to_keep=self.max_to_keep,
                keep_checkpoint_every_n_hours=self.keep_checkpoint_every_n_hours)

            # Create session to run model with.
            self._session = tf.Session(self.tf_master,
                                       config=tf.ConfigProto(
                                           log_device_placement=self.verbose > 1,
                                           inter_op_parallelism_threads=self.num_cores,
                                           intra_op_parallelism_threads=self.num_cores))
Beispiel #4
0
    def _setup_training(self):
        """Sets up graph, model and trainer."""
        self._graph = tf.Graph()
        self._graph.add_to_collection("IS_TRAINING", True)
        with self._graph.as_default():
            tf.set_random_seed(self.tf_random_seed)
            self._global_step = tf.Variable(0,
                                            name="global_step",
                                            trainable=False)

            # Setting up input and output placeholders.
            input_shape = [None] + self._data_feeder.input_shape[1:]
            output_shape = [None] + self._data_feeder.output_shape[1:]
            self._inp = tf.placeholder(tf.as_dtype(
                self._data_feeder.input_dtype),
                                       input_shape,
                                       name="input")
            self._out = tf.placeholder(tf.as_dtype(
                self._data_feeder.output_dtype),
                                       output_shape,
                                       name="output")

            # Add histograms for X and y if they are floats.
            if self._data_feeder.input_dtype in (np.float32, np.float64):
                tf.histogram_summary("X", self._inp)
            if self._data_feeder.output_dtype in (np.float32, np.float64):
                tf.histogram_summary("y", self._out)

            # Create model's graph.
            self._model_predictions, self._model_loss = self.model_fn(
                self._inp, self._out)

            # Create summary to monitor loss
            tf.scalar_summary("loss", self._model_loss)

            # Set up a single operator to merge all the summaries
            self._summaries = tf.merge_all_summaries()

            # Create trainer and augment graph with gradients and optimizer.
            # Additionally creates initialization ops.
            self._trainer = TensorFlowTrainer(loss=self._model_loss,
                                              global_step=self._global_step,
                                              optimizer=self.optimizer,
                                              learning_rate=self.learning_rate)

            # Create model's saver capturing all the nodes created up until now.
            self._saver = tf.train.Saver(max_to_keep=self.max_to_keep,
                                         keep_checkpoint_every_n_hours=self.
                                         keep_checkpoint_every_n_hours)

            # Create session to run model with.
            self._session = tf.Session(
                self.tf_master,
                config=tf.ConfigProto(
                    log_device_placement=self.verbose > 1,
                    inter_op_parallelism_threads=self.num_cores,
                    intra_op_parallelism_threads=self.num_cores))
Beispiel #5
0
class TensorFlowEstimator(BaseEstimator):
    """Base class for all TensorFlow estimators.

    Parameters:
        model_fn: Model function, that takes input X, y tensors and outputs
                  prediction and loss tensors.
        n_classes: Number of classes in the target.
        tf_master: TensorFlow master. Empty string is default for local.
        batch_size: Mini batch size.
        steps: Number of steps to run over data.
        optimizer: Optimizer name (or class), for example "SGD", "Adam",
                   "Adagrad".
        learning_rate: If this is constant float value, no decay function is used.
            Instead, a customized decay function can be passed that accepts
            global_step as parameter and returns a Tensor.
            e.g. exponential decay function:
            def exp_decay(global_step):
                return tf.train.exponential_decay(
                    learning_rate=0.1, global_step,
                    decay_steps=2, decay_rate=0.001)
        tf_random_seed: Random seed for TensorFlow initializers.
            Setting this value, allows consistency between reruns.
        continue_training: when continue_training is True, once initialized
            model will be continuely trained on every call of fit.
        num_cores: Number of cores to be used. (default: 4)
        verbose: Controls the verbosity, possible values:
                 0: the algorithm and debug information is muted.
                 1: trainer prints the progress.
                 2: log device placement is printed.
        early_stopping_rounds: Activates early stopping if this is not None.
            Loss needs to decrease at least every every <early_stopping_rounds>
            round(s) to continue training. (default: None)
        max_to_keep: The maximum number of recent checkpoint files to keep.
            As new files are created, older files are deleted.
            If None or 0, all checkpoint files are kept.
            Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
        keep_checkpoint_every_n_hours: Number of hours between each checkpoint
            to be saved. The default value of 10,000 hours effectively disables the feature.
    """

    def __init__(self, model_fn, n_classes, tf_master="", batch_size=32,
                 steps=200, optimizer="SGD",
                 learning_rate=0.1, tf_random_seed=42, continue_training=False,
                 num_cores=4, verbose=1, early_stopping_rounds=None,
                 max_to_keep=5, keep_checkpoint_every_n_hours=10000):
        self.n_classes = n_classes
        self.tf_master = tf_master
        self.batch_size = batch_size
        self.steps = steps
        self.verbose = verbose
        self.optimizer = optimizer
        self.learning_rate = learning_rate
        self.tf_random_seed = tf_random_seed
        self.model_fn = model_fn
        self.continue_training = continue_training
        self.num_cores = num_cores
        self._initialized = False
        self._early_stopping_rounds = early_stopping_rounds
        self.max_to_keep = max_to_keep
        self.keep_checkpoint_every_n_hours = keep_checkpoint_every_n_hours

    def _setup_training(self):
        """Sets up graph, model and trainer."""
        self._graph = tf.Graph()
        with self._graph.as_default():
            tf.set_random_seed(self.tf_random_seed)
            self._global_step = tf.Variable(
                0, name="global_step", trainable=False)

            # Setting up input and output placeholders.
            input_shape = [None] + self._data_feeder.input_shape[1:]
            output_shape = [None] + self._data_feeder.output_shape[1:]
            self._inp = tf.placeholder(
                tf.as_dtype(self._data_feeder.input_dtype), input_shape,
                name="input")
            self._out = tf.placeholder(
                tf.as_dtype(self._data_feeder.output_dtype), output_shape,
                name="output")

            # Add histograms for X and y if they are floats.
            if self._data_feeder.input_dtype in (np.float32, np.float64):
                tf.histogram_summary("X", self._inp)
            if self._data_feeder.output_dtype in (np.float32, np.float64):
                tf.histogram_summary("y", self._out)

            # Create model's graph.
            self._model_predictions, self._model_loss = self.model_fn(
                self._inp, self._out)

            # Create summary to monitor loss
            tf.scalar_summary("loss", self._model_loss)

            # Set up a single operator to merge all the summaries
            self._summaries = tf.merge_all_summaries()

            # Create trainer and augment graph with gradients and optimizer.
            # Additionally creates initialization ops.
            self._trainer = TensorFlowTrainer(
                loss=self._model_loss, global_step=self._global_step,
                optimizer=self.optimizer, learning_rate=self.learning_rate)

            # Create model's saver capturing all the nodes created up until now.
            self._saver = tf.train.Saver(
                max_to_keep=self.max_to_keep,
                keep_checkpoint_every_n_hours=self.keep_checkpoint_every_n_hours)

            # Create session to run model with.
            self._session = tf.Session(self.tf_master,
                                       config=tf.ConfigProto(
                                           log_device_placement=self.verbose > 1,
                                           inter_op_parallelism_threads=self.num_cores,
                                           intra_op_parallelism_threads=self.num_cores))

    def _setup_summary_writer(self, logdir):
        """Sets up the summary writer to prepare for later optional visualization."""
        self._summary_writer = tf.train.SummaryWriter(
            os.path.join(logdir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')),
            graph_def=self._session.graph_def)

    def fit(self, X, y, logdir=None):
        """Builds a neural network model given provided `model_fn` and training
        data X and y.

        Note: called first time constructs the graph and initializers
        variables. Consecutives times it will continue training the same model.
        This logic follows partial_fit() interface in scikit-learn.

        To restart learning, create new estimator.

        Args:
            X: matrix or tensor of shape [n_samples, n_features...]. Can be
            iterator that returns arrays of features. The training input
            samples for fitting the model.
            y: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
            iterator that returns array of targets. The training target values
            (class labels in classification, real numbers in regression).
            logdir: the directory to save the log file that can be used for
            optional visualization.

        Returns:
            Returns self.
        """
        # Sets up data feeder.
        self._data_feeder = setup_train_data_feeder(X, y,
                                                    self.n_classes,
                                                    self.batch_size)
        if not self.continue_training or not self._initialized:
            # Sets up model and trainer.
            self._setup_training()
            # Initialize model parameters.
            self._trainer.initialize(self._session)
            self._initialized = True

        # Sets up summary writer for later optional visualization.
        # Due to not able to setup _summary_writer in __init__ as it's not a
        # parameter of the model, here we need to check if such variable exists
        # and if it's None or not (in case it was setup in a previous run).
        # It is initialized only in the case where it wasn't before and log dir
        # is provided.
        if (logdir and (not hasattr(self, "_summary_writer") or
                        (hasattr(self, "_summary_writer") and self._summary_writer is None))):
            self._setup_summary_writer(logdir)
        else:
            self._summary_writer = None

        # Train model for given number of steps.
        self._trainer.train(self._session,
                            self._data_feeder.get_feed_dict_fn(
                                self._inp, self._out),
                            self.steps,
                            self._summary_writer,
                            self._summaries,
                            verbose=self.verbose,
                            early_stopping_rounds=self._early_stopping_rounds,
                            feed_params_fn=self._data_feeder.get_feed_params)
        return self

    def partial_fit(self, X, y):
        """Incremental fit on a batch of samples.

        This method is expected to be called several times consecutively
        on different or the same chunks of the dataset. This either can
        implement iterative training or out-of-core/online training.

        This is especially useful when the whole dataset is too big to
        fit in memory at the same time. Or when model is taking long time
        to converge, and you want to split up training into subparts.

        Args:
            X: matrix or tensor of shape [n_samples, n_features...]. Can be
            iterator that returns arrays of features. The training input
            samples for fitting the model.
            y: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
            iterator that returns array of targets. The training target values
            (class label in classification, real numbers in regression).

        Returns:
            Returns self.
        """
        return self.fit(X, y)

    def _predict(self, X):
        if not self._initialized:
            raise NotFittedError()
        predict_data_feeder = setup_predict_data_feeder(X)
        preds = []
        dropouts = self._graph.get_collection(DROPOUTS)
        feed_dict = {prob: 1.0 for prob in dropouts}
        for data in predict_data_feeder:
            feed_dict[self._inp] = data
            preds.append(self._session.run(
                self._model_predictions,
                feed_dict))
        return np.concatenate(preds, axis=0)

    def predict(self, X, axis=1):
        """Predict class or regression for X.

        For a classification model, the predicted class for each sample in X is
        returned. For a regression model, the predicted value based on X is
        returned.

        Args:
            X: array-like matrix, [n_samples, n_features...] or iterator.

        Returns:
            y: array of shape [n_samples]. The predicted classes or predicted
            value.
        """
        pred = self._predict(X)
        if self.n_classes < 2:
            return pred
        return pred.argmax(axis=axis)

    def predict_proba(self, X):
        """Predict class probability of the input samples X.

        Args:
            X: array-like matrix, [n_samples, n_features...] or iterator.

        Returns:
            y: array of shape [n_samples, n_classes]. The predicted
            probabilities for each class.
        """
        return self._predict(X)

    def get_tensor(self, name):
        """Returns tensor by name.

        Args:
            name: string, name of the tensor.

        Returns:
            Tensor.
        """
        return self._graph.get_tensor_by_name(name)

    def get_tensor_value(self, name):
        """Returns value of the tensor give by name.

        Args:
            name: string, name of the tensor.

        Returns:
            Numpy array - value of the tensor.
        """
        return self._session.run(self.get_tensor(name))

    def save(self, path):
        """Saves checkpoints and graph to given path.

        Args:
            path: Folder to save model to.
        """
        if not self._initialized:
            raise NotFittedError()

        # Currently Saver requires absolute path to work correctly.
        path = os.path.abspath(path)

        if not os.path.exists(path):
            os.makedirs(path)
        if not os.path.isdir(path):
            raise ValueError("Path %s should be a directory to save"
                             "checkpoints and graph." % path)
        with open(os.path.join(path, 'model.def'), 'w') as fmodel:
            all_params = self.get_params()
            params = {}
            for key, value in all_params.items():
                if not callable(value) and value is not None:
                    params[key] = value
            params['class_name'] = type(self).__name__
            fmodel.write(json.dumps(params))
        with open(os.path.join(path, 'endpoints'), 'w') as foutputs:
            foutputs.write('%s\n%s\n%s\n%s' % (
                self._inp.name,
                self._out.name,
                self._model_predictions.name,
                self._model_loss.name))
        with open(os.path.join(path, 'graph.pbtxt'), 'w') as fgraph:
            fgraph.write(str(self._graph.as_graph_def()))
        with open(os.path.join(path, 'saver.pbtxt'), 'w') as fsaver:
            fsaver.write(str(self._saver.as_saver_def()))
        self._saver.save(self._session, os.path.join(path, 'model'),
                         global_step=self._global_step)

    def _restore(self, path):
        """Restores this estimator from given path.

        Note: will rebuild the graph and initialize all parameters,
        and will ignore provided model.

        Args:
            path: Path to checkpoints and other information.
        """
        # Currently Saver requires absolute path to work correctly.
        path = os.path.abspath(path)

        self._graph = tf.Graph()
        with self._graph.as_default():
            endpoints_filename = os.path.join(path, 'endpoints')
            if not os.path.exists(endpoints_filename):
                raise ValueError("Restore folder doesn't contain endpoints.")
            with open(endpoints_filename) as foutputs:
                endpoints = foutputs.read().split('\n')
            graph_filename = os.path.join(path, 'graph.pbtxt')
            if not os.path.exists(graph_filename):
                raise ValueError("Restore folder doesn't contain graph definition.")
            with open(graph_filename) as fgraph:
                graph_def = tf.GraphDef()
                text_format.Merge(fgraph.read(), graph_def)
                (self._inp, self._out,
                 self._model_predictions, self._model_loss) = tf.import_graph_def(
                     graph_def, name='', return_elements=endpoints)
            saver_filename = os.path.join(path, 'saver.pbtxt')
            if not os.path.exists(saver_filename):
                raise ValueError("Restore folder doesn't contain saver defintion.")
            with open(saver_filename) as fsaver:
                from tensorflow.python.training import saver_pb2
                saver_def = saver_pb2.SaverDef()
                text_format.Merge(fsaver.read(), saver_def)
                self._saver = tf.train.Saver(saver_def=saver_def)

            # Restore trainer
            self._global_step = self._graph.get_tensor_by_name('global_step:0')
            trainer_op = self._graph.get_operation_by_name('train')
            self._trainer = RestoredTrainer(
                self._model_loss, self._global_step, trainer_op)

            # Restore summaries.
            self._summaries = self._graph.get_operation_by_name('MergeSummary/MergeSummary')

            # Restore session.
            self._session = tf.Session(self.tf_master,
                                       config=tf.ConfigProto(
                                           log_device_placement=self.verbose > 1,
                                           inter_op_parallelism_threads=self.num_cores,
                                           intra_op_parallelism_threads=self.num_cores))
            checkpoint_path = tf.train.latest_checkpoint(path)
            if checkpoint_path is None:
                raise ValueError("Missing checkpoint files in the %s. Please "
                                 "make sure you are you have checkpoint file that describes "
                                 "latest checkpoints and appropriate checkpoints are there. "
                                 "If you have moved the folder, you at this point need to "
                                 "update manually update the paths in the checkpoint file." % path)
            self._saver.restore(self._session, checkpoint_path)
        # Set to be initialized.
        self._initialized = True

    @classmethod
    def restore(cls, path):
        """Restores model from give path.

        Args:
            path: Path to the checkpoints and other model information.

        Returns:
            Estiamator, object of the subclass of TensorFlowEstimator.
        """
        model_def_filename = os.path.join(path, 'model.def')
        if not os.path.exists(model_def_filename):
            raise ValueError("Restore folder doesn't contain model definition.")
        with open(model_def_filename) as fmodel:
            model_def = json.loads(fmodel.read())
            # TensorFlow binding requires parameters to be strings not unicode.
            # Only issue in Python2.
            for key, value in model_def.items():
                if (isinstance(value, string_types) and
                        not isinstance(value, str)):
                    model_def[key] = str(value)
        class_name = model_def.pop('class_name')
        if class_name == 'TensorFlowEstimator':
            custom_estimator = TensorFlowEstimator(model_fn=None, **model_def)
            custom_estimator._restore(path)
            return custom_estimator

        # To avoid cyclical dependencies, import inside the function instead of
        # the beginning of the file.
        from skflow import estimators
        # Estimator must be one of the defined estimators in the __init__ file.
        estimator = getattr(estimators, class_name)(**model_def)
        estimator._restore(path)
        return estimator
Beispiel #6
0
class TensorFlowEstimator(BaseEstimator):
    """Base class for all TensorFlow estimators.

    Parameters:
        model_fn: Model function, that takes input X, y tensors and outputs
                  prediction and loss tensors.
        n_classes: Number of classes in the target.
        tf_master: TensorFlow master. Empty string is default for local.
        batch_size: Mini batch size.
        steps: Number of steps to run over data.
        optimizer: Optimizer name (or class), for example "SGD", "Adam",
                   "Adagrad".
        learning_rate: If this is constant float value, no decay function is used.
            Instead, a customized decay function can be passed that accepts
            global_step as parameter and returns a Tensor.
            e.g. exponential decay function:
            def exp_decay(global_step):
                return tf.train.exponential_decay(
                    learning_rate=0.1, global_step,
                    decay_steps=2, decay_rate=0.001)
        class_weight: None or list of n_classes floats. Weight associated with
                     classes for loss computation. If not given, all classes are suppose to have
                     weight one.
        tf_random_seed: Random seed for TensorFlow initializers.
            Setting this value, allows consistency between reruns.
        continue_training: when continue_training is True, once initialized
            model will be continuely trained on every call of fit.
        config_addon: ConfigAddon object that controls the configurations of the session,
            e.g. num_cores, gpu_memory_fraction, etc.
        verbose: Controls the verbosity, possible values:
                 0: the algorithm and debug information is muted.
                 1: trainer prints the progress.
                 2: log device placement is printed.
        max_to_keep: The maximum number of recent checkpoint files to keep.
            As new files are created, older files are deleted.
            If None or 0, all checkpoint files are kept.
            Defaults to 5 (that is, the 5 most recent checkpoint files are kept.)
        keep_checkpoint_every_n_hours: Number of hours between each checkpoint
            to be saved. The default value of 10,000 hours effectively disables the feature.
    """
    def __init__(self,
                 model_fn,
                 n_classes,
                 tf_master="",
                 batch_size=32,
                 steps=200,
                 optimizer="SGD",
                 learning_rate=0.1,
                 class_weight=None,
                 tf_random_seed=42,
                 continue_training=False,
                 config_addon=None,
                 verbose=1,
                 max_to_keep=5,
                 keep_checkpoint_every_n_hours=10000):

        self.n_classes = n_classes
        self.tf_master = tf_master
        self.batch_size = batch_size
        self.steps = steps
        self.verbose = verbose
        self.optimizer = optimizer
        self.learning_rate = learning_rate
        self.tf_random_seed = tf_random_seed
        self.model_fn = model_fn
        self.continue_training = continue_training
        self._initialized = False
        self.max_to_keep = max_to_keep
        self.keep_checkpoint_every_n_hours = keep_checkpoint_every_n_hours
        self.class_weight = class_weight
        self.config_addon = config_addon

    def _setup_training(self):
        """Sets up graph, model and trainer."""
        self._graph = tf.Graph()
        self._graph.add_to_collection("IS_TRAINING", True)
        with self._graph.as_default():
            tf.set_random_seed(self.tf_random_seed)
            self._global_step = tf.Variable(0,
                                            name="global_step",
                                            trainable=False)

            # Setting up input and output placeholders.
            input_shape = [None] + self._data_feeder.input_shape[1:]
            output_shape = [None] + self._data_feeder.output_shape[1:]
            self._inp = tf.placeholder(tf.as_dtype(
                self._data_feeder.input_dtype),
                                       input_shape,
                                       name="input")
            self._out = tf.placeholder(tf.as_dtype(
                self._data_feeder.output_dtype),
                                       output_shape,
                                       name="output")

            # If class weights are provided, add them to the graph.
            # Different loss functions can use this tensor by name.
            if self.class_weight:
                self._class_weight_node = tf.constant(self.class_weight,
                                                      name='class_weight')

            # Add histograms for X and y if they are floats.
            if self._data_feeder.input_dtype in (np.float32, np.float64):
                tf.histogram_summary("X", self._inp)
            if self._data_feeder.output_dtype in (np.float32, np.float64):
                tf.histogram_summary("y", self._out)

            # Create model's graph.
            self._model_predictions, self._model_loss = self.model_fn(
                self._inp, self._out)

            # Create summary to monitor loss
            tf.scalar_summary("loss", self._model_loss)

            # Set up a single operator to merge all the summaries
            self._summaries = tf.merge_all_summaries()

            # Create trainer and augment graph with gradients and optimizer.
            # Additionally creates initialization ops.
            self._trainer = TensorFlowTrainer(loss=self._model_loss,
                                              global_step=self._global_step,
                                              optimizer=self.optimizer,
                                              learning_rate=self.learning_rate)

            # Create model's saver capturing all the nodes created up until now.
            self._saver = tf.train.Saver(max_to_keep=self.max_to_keep,
                                         keep_checkpoint_every_n_hours=self.
                                         keep_checkpoint_every_n_hours)

            # Enable monitor to create validation data dict with appropriate tf placeholders
            self._monitor.create_val_feed_dict(self._inp, self._out)

            # Create session to run model with.
            if self.config_addon is None:
                self.config_addon = ConfigAddon(verbose=self.verbose)
            self._session = tf.Session(self.tf_master,
                                       config=self.config_addon.config)

    def _setup_summary_writer(self, logdir):
        """Sets up the summary writer to prepare for later optional visualization."""
        self._summary_writer = tf.train.SummaryWriter(
            os.path.join(
                logdir,
                datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')),
            graph_def=self._session.graph_def)

    def fit(self, X, y, monitor=None, logdir=None):
        """Builds a neural network model given provided `model_fn` and training
        data X and y.

        Note: called first time constructs the graph and initializers
        variables. Consecutives times it will continue training the same model.
        This logic follows partial_fit() interface in scikit-learn.

        To restart learning, create new estimator.

        Args:
            X: matrix or tensor of shape [n_samples, n_features...]. Can be
            iterator that returns arrays of features. The training input
            samples for fitting the model.
            y: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
            iterator that returns array of targets. The training target values
            (class labels in classification, real numbers in regression).
            monitor: Monitor object to print training progress and invoke early stopping
            logdir: the directory to save the log file that can be used for
            optional visualization.

        Returns:
            Returns self.
        """
        # Sets up data feeder.
        self._data_feeder = setup_train_data_feeder(X, y, self.n_classes,
                                                    self.batch_size)

        if monitor is None:
            self._monitor = monitors.default_monitor()
        else:
            self._monitor = monitor

        if not self.continue_training or not self._initialized:
            # Sets up model and trainer.
            self._setup_training()
            # Initialize model parameters.
            self._trainer.initialize(self._session)
            self._initialized = True

        # Sets up summary writer for later optional visualization.
        # Due to not able to setup _summary_writer in __init__ as it's not a
        # parameter of the model, here we need to check if such variable exists
        # and if it's None or not (in case it was setup in a previous run).
        # It is initialized only in the case where it wasn't before and log dir
        # is provided.
        if logdir:
            if (not hasattr(self, "_summary_writer")
                    or (hasattr(self, "_summary_writer")
                        and self._summary_writer is None)):
                self._setup_summary_writer(logdir)
        else:
            self._summary_writer = None

        # Train model for given number of steps.
        self._trainer.train(self._session,
                            self._data_feeder.get_feed_dict_fn(
                                self._inp, self._out),
                            self.steps,
                            self._monitor,
                            self._summary_writer,
                            self._summaries,
                            feed_params_fn=self._data_feeder.get_feed_params)
        return self

    def partial_fit(self, X, y):
        """Incremental fit on a batch of samples.

        This method is expected to be called several times consecutively
        on different or the same chunks of the dataset. This either can
        implement iterative training or out-of-core/online training.

        This is especially useful when the whole dataset is too big to
        fit in memory at the same time. Or when model is taking long time
        to converge, and you want to split up training into subparts.

        Args:
            X: matrix or tensor of shape [n_samples, n_features...]. Can be
            iterator that returns arrays of features. The training input
            samples for fitting the model.
            y: vector or matrix [n_samples] or [n_samples, n_outputs]. Can be
            iterator that returns array of targets. The training target values
            (class label in classification, real numbers in regression).

        Returns:
            Returns self.
        """
        return self.fit(X, y)

    def _predict(self, X, axis=-1, batch_size=-1):
        if not self._initialized:
            raise NotFittedError()
        self._graph.add_to_collection("IS_TRAINING", False)
        predict_data_feeder = setup_predict_data_feeder(X,
                                                        batch_size=batch_size)
        preds = []
        dropouts = self._graph.get_collection(DROPOUTS)
        feed_dict = {prob: 1.0 for prob in dropouts}
        for data in predict_data_feeder:
            feed_dict[self._inp] = data
            predictions_for_batch = self._session.run(self._model_predictions,
                                                      feed_dict)
            if self.n_classes > 1 and axis != -1:
                preds.append(predictions_for_batch.argmax(axis=axis))
            else:
                preds.append(predictions_for_batch)

        return np.concatenate(preds, axis=0)

    def predict(self, X, axis=1, batch_size=-1):
        """Predict class or regression for X.

        For a classification model, the predicted class for each sample in X is
        returned. For a regression model, the predicted value based on X is
        returned.

        Args:
            X: array-like matrix, [n_samples, n_features...] or iterator.
            axis: Which axis to argmax for classification.
                  By default axis 1 (next after batch) is used.
                  Use 2 for sequence predictions.
            batch_size: If test set is too big, use batch size to split
                        it into mini batches. By default full dataset is used.

        Returns:
            y: array of shape [n_samples]. The predicted classes or predicted
            value.
        """
        return self._predict(X, axis=axis, batch_size=batch_size)

    def predict_proba(self, X, batch_size=-1):
        """Predict class probability of the input samples X.

        Args:
            X: array-like matrix, [n_samples, n_features...] or iterator.
            batch_size: If test set is too big, use batch size to split
                        it into mini batches. By default full dataset is used.

        Returns:
            y: array of shape [n_samples, n_classes]. The predicted
            probabilities for each class.

        """
        return self._predict(X, batch_size=batch_size)

    def get_tensor(self, name):
        """Returns tensor by name.

        Args:
            name: string, name of the tensor.

        Returns:
            Tensor.
        """
        return self._graph.get_tensor_by_name(name)

    def get_tensor_value(self, name):
        """Returns value of the tensor give by name.

        Args:
            name: string, name of the tensor.

        Returns:
            Numpy array - value of the tensor.
        """
        return self._session.run(self.get_tensor(name))

    def save(self, path):
        """Saves checkpoints and graph to given path.

        Args:
            path: Folder to save model to.
        """
        if not self._initialized:
            raise NotFittedError()

        # Currently Saver requires absolute path to work correctly.
        path = os.path.abspath(path)

        if not os.path.exists(path):
            os.makedirs(path)
        if not os.path.isdir(path):
            raise ValueError("Path %s should be a directory to save"
                             "checkpoints and graph." % path)
        with open(os.path.join(path, 'model.def'), 'w') as fmodel:
            all_params = self.get_params()
            params = {}
            for key, value in all_params.items():
                if not callable(value) and value is not None:
                    params[key] = value
            params['class_name'] = type(self).__name__
            fmodel.write(
                json.dumps(params,
                           default=lambda o: o.__dict__
                           if hasattr(o, '__dict__') else None))
        with open(os.path.join(path, 'endpoints'), 'w') as foutputs:
            foutputs.write(
                '%s\n%s\n%s\n%s' %
                (self._inp.name, self._out.name, self._model_predictions.name,
                 self._model_loss.name))
        with open(os.path.join(path, 'graph.pbtxt'), 'w') as fgraph:
            fgraph.write(str(self._graph.as_graph_def()))
        with open(os.path.join(path, 'saver.pbtxt'), 'w') as fsaver:
            fsaver.write(str(self._saver.as_saver_def()))
        self._saver.save(self._session,
                         os.path.join(path, 'model'),
                         global_step=self._global_step)

    def _restore(self, path):
        """Restores this estimator from given path.

        Note: will rebuild the graph and initialize all parameters,
        and will ignore provided model.

        Args:
            path: Path to checkpoints and other information.
        """
        # Currently Saver requires absolute path to work correctly.
        path = os.path.abspath(path)

        self._graph = tf.Graph()
        with self._graph.as_default():
            endpoints_filename = os.path.join(path, 'endpoints')
            if not os.path.exists(endpoints_filename):
                raise ValueError("Restore folder doesn't contain endpoints.")
            with open(endpoints_filename) as foutputs:
                endpoints = foutputs.read().split('\n')
            graph_filename = os.path.join(path, 'graph.pbtxt')
            if not os.path.exists(graph_filename):
                raise ValueError(
                    "Restore folder doesn't contain graph definition.")
            with open(graph_filename) as fgraph:
                graph_def = tf.GraphDef()
                text_format.Merge(fgraph.read(), graph_def)
                (self._inp, self._out, self._model_predictions,
                 self._model_loss) = tf.import_graph_def(
                     graph_def, name='', return_elements=endpoints)
            saver_filename = os.path.join(path, 'saver.pbtxt')
            if not os.path.exists(saver_filename):
                raise ValueError(
                    "Restore folder doesn't contain saver defintion.")
            with open(saver_filename) as fsaver:
                saver_def = tf.python.training.saver.saver_pb2.SaverDef()
                text_format.Merge(fsaver.read(), saver_def)
                self._saver = tf.train.Saver(saver_def=saver_def)

            # Restore trainer
            self._global_step = self._graph.get_tensor_by_name('global_step:0')
            trainer_op = self._graph.get_operation_by_name('train')
            self._trainer = RestoredTrainer(self._model_loss,
                                            self._global_step, trainer_op)

            # Restore summaries.
            self._summaries = self._graph.get_operation_by_name(
                'MergeSummary/MergeSummary')

            # Restore session.
            if not isinstance(self.config_addon, ConfigAddon):
                self.config_addon = ConfigAddon(verbose=self.verbose)
            self._session = tf.Session(self.tf_master,
                                       config=self.config_addon.config)
            checkpoint_path = tf.train.latest_checkpoint(path)
            if checkpoint_path is None:
                raise ValueError(
                    "Missing checkpoint files in the %s. Please "
                    "make sure you are you have checkpoint file that describes "
                    "latest checkpoints and appropriate checkpoints are there. "
                    "If you have moved the folder, you at this point need to "
                    "update manually update the paths in the checkpoint file."
                    % path)
            self._saver.restore(self._session, checkpoint_path)
        # Set to be initialized.
        self._initialized = True

    # pylint: disable=unused-argument
    @classmethod
    def restore(cls, path, config_addon=None):
        """Restores model from give path.

        Args:
            path: Path to the checkpoints and other model information.
            config_addon: ConfigAddon object that controls the configurations of the session,
                e.g. num_cores, gpu_memory_fraction, etc. This is allowed to be reconfigured.

        Returns:
            Estiamator, object of the subclass of TensorFlowEstimator.
        """
        model_def_filename = os.path.join(path, 'model.def')
        if not os.path.exists(model_def_filename):
            raise ValueError(
                "Restore folder doesn't contain model definition.")
        # list of parameters that are allowed to be reconfigured
        reconfigurable_params = ['config_addon']
        with open(model_def_filename) as fmodel:
            model_def = json.loads(fmodel.read())
            # TensorFlow binding requires parameters to be strings not unicode.
            # Only issue in Python2.
            for key, value in model_def.items():
                if (isinstance(value, string_types)
                        and not isinstance(value, str)):
                    model_def[key] = str(value)
                if key in reconfigurable_params:
                    newValue = locals()[key]
                    if newValue is not None:
                        model_def[key] = newValue
        class_name = model_def.pop('class_name')
        if class_name == 'TensorFlowEstimator':
            custom_estimator = TensorFlowEstimator(model_fn=None, **model_def)
            custom_estimator._restore(path)
            return custom_estimator

        # To avoid cyclical dependencies, import inside the function instead of
        # the beginning of the file.
        from skflow import estimators
        # Estimator must be one of the defined estimators in the __init__ file.
        estimator = getattr(estimators, class_name)(**model_def)
        estimator._restore(path)
        return estimator
Beispiel #7
0
    def _restore(self, path):
        """Restores this estimator from given path.

        Note: will rebuild the graph and initialize all parameters,
        and will ignore provided model.

        Args:
            path: Path to checkpoints and other information.
        """
        # Currently Saver requires absolute path to work correctly.
        path = os.path.abspath(path)

        self._graph = tf.Graph()
        with self._graph.as_default():
            endpoints_filename = os.path.join(path, 'endpoints')
            if not os.path.exists(endpoints_filename):
                raise ValueError("Restore folder doesn't contain endpoints.")
            with open(endpoints_filename) as foutputs:
                endpoints = foutputs.read().split('\n')
            graph_filename = os.path.join(path, 'graph.pbtxt')
            if not os.path.exists(graph_filename):
                raise ValueError(
                    "Restore folder doesn't contain graph definition.")
            with open(graph_filename) as fgraph:
                graph_def = tf.GraphDef()
                text_format.Merge(fgraph.read(), graph_def)
                (self._inp, self._out, self._model_predictions,
                 self._model_loss) = tf.import_graph_def(
                     graph_def, name='', return_elements=endpoints)
            saver_filename = os.path.join(path, 'saver.pbtxt')
            if not os.path.exists(saver_filename):
                raise ValueError(
                    "Restore folder doesn't contain saver defintion.")
            with open(saver_filename) as fsaver:
                saver_def = tf.python.training.saver.saver_pb2.SaverDef()
                text_format.Merge(fsaver.read(), saver_def)
                self._saver = tf.train.Saver(saver_def=saver_def)

            # Restore trainer
            self._global_step = self._graph.get_tensor_by_name('global_step:0')
            trainer_op = self._graph.get_operation_by_name('train')
            self._trainer = RestoredTrainer(self._model_loss,
                                            self._global_step, trainer_op)

            # Restore summaries.
            self._summaries = self._graph.get_operation_by_name(
                'MergeSummary/MergeSummary')

            # Restore session.
            if not isinstance(self.config_addon, ConfigAddon):
                self.config_addon = ConfigAddon(verbose=self.verbose)
            self._session = tf.Session(self.tf_master,
                                       config=self.config_addon.config)
            checkpoint_path = tf.train.latest_checkpoint(path)
            if checkpoint_path is None:
                raise ValueError(
                    "Missing checkpoint files in the %s. Please "
                    "make sure you are you have checkpoint file that describes "
                    "latest checkpoints and appropriate checkpoints are there. "
                    "If you have moved the folder, you at this point need to "
                    "update manually update the paths in the checkpoint file."
                    % path)
            self._saver.restore(self._session, checkpoint_path)
        # Set to be initialized.
        self._initialized = True