def _training_stats(self):
        """
        Return a dictionary containing statistics collected during model
        training. These statistics are also available with the ``get`` method,
        and are described in more detail in the documentation for that method.

        Notes
        -----
        """
        opts = {'model': self.__proxy__, 'model_name': self.__name__}
        results = _graphlab.toolkits._main.run("supervised_learning_get_train_stats", opts)
        return _map_unity_proxy_to_object(results)
    def _training_stats(self):
        """
        Return a dictionary containing statistics collected during model
        training. These statistics are also available with the ``get`` method,
        and are described in more detail in the documentation for that method.

        Notes
        -----
        """
        opts = {'model': self.__proxy__, 'model_name': self.__name__}
        results = _graphlab.toolkits._main.run("supervised_learning_get_train_stats", opts)
        return _map_unity_proxy_to_object(results)
    def evaluate(self, dataset, metric="auto",
                 missing_value_action='auto', options={}, **kwargs):
        """
        Evaluate the model by making predictions of target values and comparing
        these to actual values.

        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, list[str]
            Evaluation metric(s) to be computed.

        missing_value_action: str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Choose a model dependent missing value policy.
            - 'impute': Proceed with evaluation by filling in the missing
                        values with the mean of the training data. Missing
                        values are also imputed if an entire column of data is
                        missing during evaluation.
            - 'none': Treat missing value as is. Model must be able to handle missing value.
            - 'error' : Do not proceed with prediction and terminate with
                        an error message.

        options : dict
            additional options to be passed in to prediction

        kwargs : dict
            additional options to be passed into prediction
        """
        if missing_value_action == 'auto':
            missing_value_action = select_default_missing_value_policy(
                                                             self, 'evaluate')

        _raise_error_if_not_sframe(dataset, "dataset")
        options = options.copy()
        options.update(kwargs)

        options.update({'model': self.__proxy__,
                        'dataset': dataset,
                        'model_name': self.__name__,
                        'missing_value_action': missing_value_action,
                        'metric': metric
                        })
        results = _graphlab.toolkits._main.run(
                'supervised_learning_evaluate', options)
        return _map_unity_proxy_to_object(results)
    def evaluate(self, dataset, metric="auto",
                 missing_value_action='auto', options={}, **kwargs):
        """
        Evaluate the model by making predictions of target values and comparing
        these to actual values.

        Parameters
        ----------
        dataset : SFrame
            Dataset in the same format used for training. The columns names and
            types of the dataset must be the same as that used in training.

        metric : str, list[str]
            Evaluation metric(s) to be computed.

        missing_value_action: str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Choose a model dependent missing value policy.
            - 'impute': Proceed with evaluation by filling in the missing
                        values with the mean of the training data. Missing
                        values are also imputed if an entire column of data is
                        missing during evaluation.
            - 'none': Treat missing value as is. Model must be able to handle missing value.
            - 'error' : Do not proceed with prediction and terminate with
                        an error message.

        options : dict
            additional options to be passed in to prediction

        kwargs : dict
            additional options to be passed into prediction
        """
        if missing_value_action == 'auto':
            missing_value_action = select_default_missing_value_policy(
                                                             self, 'evaluate')

        _raise_error_if_not_sframe(dataset, "dataset")
        options = options.copy()
        options.update(kwargs)

        options.update({'model': self.__proxy__,
                        'dataset': dataset,
                        'model_name': self.__name__,
                        'missing_value_action': missing_value_action,
                        'metric': metric
                        })
        results = _graphlab.toolkits._main.run(
                'supervised_learning_evaluate', options)
        return _map_unity_proxy_to_object(results)
    def predict(self, dataset, missing_value_action = 'error',
                                      output_type='', options = {},
                                      **kwargs):
        """
        Return predictions for ``dataset``, using the trained supervised_learning
        model. Predictions are generated as class labels (0 or
        1).

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.
        missing_value_action: str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'impute': Proceed with evaluation by filling in the missing
                        values with the mean of the training data. Missing
                        values are also imputed if an entire column of data is
                        missing during evaluation.
            - 'error' : Do not proceed with prediction and terminate with
                        an error message.
        output_type : str, optional
            output type that maybe needed by some of the toolkits
        options : dict
            additional options to be passed in to prediction
        kwargs : dict
            additional options to be passed into prediction
        Returns
        -------
        out : SArray
            An SArray with model predictions.
        """

        _raise_error_if_not_sframe(dataset, "dataset")

        options = options.copy()
        options.update(kwargs)
        options.update({'model': self.__proxy__,
                        'model_name': self.__name__,
                        'dataset': dataset,
                        'missing_value_action' : missing_value_action,
                        'output_type' : output_type
                        })

        target = _graphlab.toolkits._main.run('supervised_learning_predict', options)
        return _map_unity_proxy_to_object(target['predicted'])
    def classify(self, dataset, missing_value_action='auto'):
        """
        Return predictions for ``dataset``, using the trained supervised_learning
        model. Predictions are generated as class labels (0 or
        1).

        Parameters
        ----------
        dataset: SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        missing_value_action: str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Choose model dependent missing value action
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with prediction and terminate with
              an error message.
        Returns
        -------
        out : SFrame
            An SFrame with model predictions.
        """
        if (missing_value_action == 'auto'):
            missing_value_action = select_default_missing_value_policy(self, 'classify')

        # Low latency path
        if isinstance(dataset, list):
            return _graphlab.extensions._fast_classify(self.__proxy__, dataset,
                    missing_value_action)
        if isinstance(dataset, dict):
            return _graphlab.extensions._fast_classify(self.__proxy__, [dataset],
                    missing_value_action)

        _raise_error_if_not_sframe(dataset, "dataset")
        options = {}
        options.update({'model': self.__proxy__,
                        'model_name': self.__name__,
                        'dataset': dataset,
                        'missing_value_action': missing_value_action,
                        })
        target = _graphlab.toolkits._main.run('supervised_learning_classify', options)
        return _map_unity_proxy_to_object(target['classify'])
    def classify(self, dataset, missing_value_action='auto'):
        """
        Return predictions for ``dataset``, using the trained supervised_learning
        model. Predictions are generated as class labels (0 or
        1).

        Parameters
        ----------
        dataset: SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        missing_value_action: str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Choose model dependent missing value action
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with prediction and terminate with
              an error message.
        Returns
        -------
        out : SFrame
            An SFrame with model predictions.
        """
        if (missing_value_action == 'auto'):
            missing_value_action = select_default_missing_value_policy(self, 'classify')

        # Low latency path
        if isinstance(dataset, list):
            return _graphlab.extensions._fast_classify(self.__proxy__, dataset,
                    missing_value_action)
        if isinstance(dataset, dict):
            return _graphlab.extensions._fast_classify(self.__proxy__, [dataset],
                    missing_value_action)

        _raise_error_if_not_sframe(dataset, "dataset")
        options = {}
        options.update({'model': self.__proxy__,
                        'model_name': self.__name__,
                        'dataset': dataset,
                        'missing_value_action': missing_value_action,
                        })
        target = _graphlab.toolkits._main.run('supervised_learning_classify', options)
        return _map_unity_proxy_to_object(target['classify'])
    def get(self, field):
        """
        Get the value of a given field.

        Parameters
        ----------
        field : string
            Name of the field to be retrieved.

        Returns
        -------
        out : [various]
            The current value of the requested field.

        See Also
        --------
        list_fields
        """
        opts = {'model': self.__proxy__,
                'model_name': self.__name__,
                'field': field}
        response = _graphlab.toolkits._main.run('supervised_learning_get_value', opts)
        return _map_unity_proxy_to_object(response['value'])
    def get(self, field):
        """
        Get the value of a given field.

        Parameters
        ----------
        field : string
            Name of the field to be retrieved.

        Returns
        -------
        out : [various]
            The current value of the requested field.

        See Also
        --------
        list_fields
        """
        opts = {'model': self.__proxy__,
                'model_name': self.__name__,
                'field': field}
        response = _graphlab.toolkits._main.run('supervised_learning_get_value', opts)
        return _map_unity_proxy_to_object(response['value'])
    def predict(self, dataset, missing_value_action='auto',
                output_type='', options={}, **kwargs):
        """
        Return predictions for ``dataset``, using the trained supervised_learning
        model. Predictions are generated as class labels (0 or
        1).

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        missing_value_action: str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Choose a model dependent missing value policy.
            - 'impute': Proceed with evaluation by filling in the missing
                        values with the mean of the training data. Missing
                        values are also imputed if an entire column of data is
                        missing during evaluation.
            - 'none': Treat missing value as is. Model must be able to handle missing value.
            - 'error' : Do not proceed with prediction and terminate with
                        an error message.

        output_type : str, optional
            output type that maybe needed by some of the toolkits

        options : dict
            additional options to be passed in to prediction

        kwargs : dict
            additional options to be passed into prediction

        Returns
        -------
        out : SArray
            An SArray with model predictions.
        """
        if missing_value_action == 'auto':
            missing_value_action = select_default_missing_value_policy(self, 'predict')

        # Low latency path
        if isinstance(dataset, list):
            return _graphlab.extensions._fast_predict(self.__proxy__, dataset,
                    output_type, missing_value_action)
        if isinstance(dataset, dict):
            return _graphlab.extensions._fast_predict(self.__proxy__, [dataset],
                    output_type, missing_value_action)

        # Batch predictions path
        else:
            _raise_error_if_not_sframe(dataset, "dataset")

            options = options.copy()
            options.update(kwargs)

            options.update({'model': self.__proxy__,
                            'model_name': self.__name__,
                            'dataset': dataset,
                            'missing_value_action' : missing_value_action,
                            'output_type' : output_type
                            })

            target = _graphlab.toolkits._main.run(
                      'supervised_learning_predict', options)
            return _map_unity_proxy_to_object(target['predicted'])
    def predict_topk(self, dataset, output_type="probability", k=3, missing_value_action='auto'):
        """
        Return top-k predictions for the ``dataset``, using the trained model.
        Predictions are returned as an SFrame with three columns: `row_id`,
        `class`, and `probability`, `margin`,  or `rank`, depending on the ``output_type``
        parameter. Input dataset size must be the same as for training of the model.

        Parameters
        ----------
        dataset : SFrame
            A dataset that has the same columns that were used during training.
            If the target column exists in ``dataset`` it will be ignored
            while making predictions.

        output_type : {'probability', 'rank', 'margin'}, optional
            Choose the return type of the prediction:

            - `probability`: Probability associated with each label in the prediction.
            - `rank`       : Rank associated with each label in the prediction.
            - `margin`     : Margin associated with each label in the prediction.

        k : int, optional
            Number of classes to return for each input example.

        missing_value_action : str, optional
            Action to perform when missing values are encountered. Can be
            one of:

            - 'auto': Default to 'impute'
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : SFrame
            An SFrame with model predictions.

        See Also
        --------
        predict, classify, evaluate

        Examples
        --------
        >>> pred = m.predict_topk(validation_data, k=3)
        >>> pred
        +--------+-------+-------------------+
        | row_id | class |   probability     |
        +--------+-------+-------------------+
        |   0    |   4   |   0.995623886585  |
        |   0    |   9   |  0.0038311756216  |
        |   0    |   7   | 0.000301006948575 |
        |   1    |   1   |   0.928708016872  |
        |   1    |   3   |  0.0440889261663  |
        |   1    |   2   |  0.0176190119237  |
        |   2    |   3   |   0.996967732906  |
        |   2    |   2   |  0.00151345680933 |
        |   2    |   7   | 0.000637513934635 |
        |   3    |   1   |   0.998070061207  |
        |  ...   |  ...  |        ...        |
        +--------+-------+-------------------+
        [35688 rows x 3 columns]
        """
        _mt._get_metric_tracker().track('toolkit.classifier.logistic_classifier.predict_topk')
        _check_categorical_option_type('output_type', output_type,
                                       ['rank', 'margin', 'probability'])
        _check_categorical_option_type('missing_value_action', missing_value_action,
                                       ['auto', 'impute', 'error'])
        if missing_value_action == 'auto':
            missing_value_action = 'impute'

        # Low latency path
        if isinstance(dataset, list):
            return _graphlab.extensions._fast_predict_topk(self.__proxy__, dataset,
                    output_type, missing_value_action, k)
        if isinstance(dataset, dict):
            return _graphlab.extensions._fast_predict_topk(self.__proxy__, [dataset],
                    output_type, missing_value_action, k)
        # Fast path
        _raise_error_if_not_sframe(dataset, "dataset")
        options = dict()
        if (missing_value_action == 'auto'):
            missing_value_action = _sl.select_default_missing_value_policy(
                                                              self, 'predict')
        options.update({'model': self.__proxy__,
                        'model_name': self.__name__,
                        'dataset': dataset,
                        'output_type': output_type,
                        'topk': k,
                        'missing_value_action': missing_value_action})
        target = _graphlab.toolkits._main.run(
                  'supervised_learning_predict_topk', options)
        return _map_unity_proxy_to_object(target['predicted'])
    def predict_topk(self, dataset, output_type="probability", k=3):
        """
        Return top-k predictions for the ``dataset``, using the trained model.
        Predictions are returned as an SFrame with three columns: `row_id`,
        `class`, and `probability`, `margin`,  or `rank`, depending on the ``output_type``
        parameter. Input dataset size must be the same as for training of the model.

        Parameters
        ----------
        dataset : SFrame
            A dataset that has the same columns that were used during training.
            If the target column exists in ``dataset`` it will be ignored
            while making predictions.

        output_type : {'probability', 'rank', 'margin'}, optional
            Choose the return type of the prediction:

            - `probability`: Probability associated with each label in the prediction.
            - `rank`       : Rank associated with each label in the prediction.
            - `margin`     : Margin associated with each label in the prediction.

        k : int, optional
            Number of classes to return for each input example.

        Returns
        -------
        out : SFrame
            An SFrame with model predictions.

        See Also
        --------
        predict, classify, evaluate

        Examples
        --------
        >>> pred = m.predict_topk(validation_data, k=3)
        >>> pred
        +--------+-------+-------------------+
        | row_id | class |   probability     |
        +--------+-------+-------------------+
        |   0    |   4   |   0.995623886585  |
        |   0    |   9   |  0.0038311756216  |
        |   0    |   7   | 0.000301006948575 |
        |   1    |   1   |   0.928708016872  |
        |   1    |   3   |  0.0440889261663  |
        |   1    |   2   |  0.0176190119237  |
        |   2    |   3   |   0.996967732906  |
        |   2    |   2   |  0.00151345680933 |
        |   2    |   7   | 0.000637513934635 |
        |   3    |   1   |   0.998070061207  |
        |  ...   |  ...  |        ...        |
        +--------+-------+-------------------+
        [35688 rows x 3 columns]
        """
        _mt._get_metric_tracker().track('toolkit.classifier.boosted_trees_classifier.predict_topk')
        _raise_error_if_not_sframe(dataset, "dataset")
        _check_categorical_option_type('output_type', output_type, ['rank', 'margin', 'probability'])
        options = dict()
        options.update({'model': self.__proxy__,
                        'model_name': self.__name__,
                        'dataset': dataset,
                        'output_type': output_type,
                        'topk': k,
                        'missing_value_action': 'error'})
        target = _graphlab.toolkits._main.run('supervised_learning_predict_topk', options)
        return _map_unity_proxy_to_object(target['predicted'])
    def get_topics(self, topic_ids=None, num_words=5, cdf_cutoff=1.0,
                   output_type='topic_probabilities'):

        """
        Get the words associated with a given topic. The score column is the
        probability of choosing that word given that you have chosen a
        particular topic.

        Parameters
        ----------
        topic_ids : list of int, optional
            The topics to retrieve words. Topic ids are zero-based.
            Throws an error if greater than or equal to m['num_topics'], or
            if the requested topic name is not present.

        num_words : int, optional
            The number of words to show.

        cdf_cutoff : float, optional
            Allows one to only show the most probable words whose cumulative
            probability is below this cutoff. For example if there exist
            three words where

            .. math::
               p(word_1 | topic_k) = .1

               p(word_2 | topic_k) = .2

               p(word_3 | topic_k) = .05

            then setting :math:`cdf_{cutoff}=.3` would return only
            :math:`word_1` and :math:`word_2` since
            :math:`p(word_1 | topic_k) + p(word_2 | topic_k) <= cdf_{cutoff}`

        output_type : {'topic_probabilities' | 'topic_words'}, optional
            Determine the type of desired output. See below.

        Returns
        -------
        out : SFrame
            If output_type is 'topic_probabilities', then the returned value is
            an SFrame with a column of words ranked by a column of scores for
            each topic. Otherwise, the returned value is a SArray where
            each element is a list of the most probable words for each topic.

        Examples
        --------
        Get the highest ranked words for all topics.

        >>> docs = graphlab.SArray('http://s3.amazonaws.com/GraphLab-Datasets/nips-text')
        >>> m = graphlab.topic_model.create(docs,
                                            num_iterations=50)
        >>> m.get_topics()
        +-------+----------+-----------------+
        | topic |   word   |      score      |
        +-------+----------+-----------------+
        |   0   |   cell   |  0.028974400831 |
        |   0   |  input   | 0.0259470208503 |
        |   0   |  image   | 0.0215721599763 |
        |   0   |  visual  | 0.0173635081992 |
        |   0   |  object  | 0.0172447874156 |
        |   1   | function | 0.0482834508265 |
        |   1   |  input   | 0.0456270024091 |
        |   1   |  point   | 0.0302662839454 |
        |   1   |  result  | 0.0239474934631 |
        |   1   | problem  | 0.0231750116011 |
        |  ...  |   ...    |       ...       |
        +-------+----------+-----------------+

        Get the highest ranked words for topics 0 and 1 and show 15 words per
        topic.

        >>> m.get_topics([0, 1], num_words=15)
        +-------+----------+------------------+
        | topic |   word   |      score       |
        +-------+----------+------------------+
        |   0   |   cell   |  0.028974400831  |
        |   0   |  input   | 0.0259470208503  |
        |   0   |  image   | 0.0215721599763  |
        |   0   |  visual  | 0.0173635081992  |
        |   0   |  object  | 0.0172447874156  |
        |   0   | response | 0.0139740298286  |
        |   0   |  layer   | 0.0122585145062  |
        |   0   | features | 0.0115343177265  |
        |   0   | feature  | 0.0103530459301  |
        |   0   | spatial  | 0.00823387994361 |
        |  ...  |   ...    |       ...        |
        +-------+----------+------------------+

        If one wants to instead just get the top words per topic, one may
        change the format of the output as follows.

        >>> topics = m.get_topics(output_type='topic_words')
        dtype: list
        Rows: 10
        [['cell', 'image', 'input', 'object', 'visual'],
         ['algorithm', 'data', 'learning', 'method', 'set'],
         ['function', 'input', 'point', 'problem', 'result'],
         ['model', 'output', 'pattern', 'set', 'unit'],
         ['action', 'learning', 'net', 'problem', 'system'],
         ['error', 'function', 'network', 'parameter', 'weight'],
         ['information', 'level', 'neural', 'threshold', 'weight'],
         ['control', 'field', 'model', 'network', 'neuron'],
         ['hidden', 'layer', 'system', 'training', 'vector'],
         ['component', 'distribution', 'local', 'model', 'optimal']]
        """
        _mt._get_metric_tracker().track('toolkit.text.topic_model.get_topics')

        _check_categorical_option_type('output_type', output_type,
            ['topic_probabilities', 'topic_words'])

        if topic_ids is None:
            topic_ids = range(self.get('num_topics'))

        assert isinstance(topic_ids, list), \
            "The provided topic_ids is not a list."

        if any([type(x) == str for x in topic_ids]):
            raise ValueError, \
                "Only integer topic_ids can be used at this point in time."
        if not all([x >= 0 and x < self['num_topics']]):
            raise ValueError, \
                "Topic id values must be non-negative and less than the " + \
                "number of topics used to fit the model."

        opts = {'model': self.__proxy__,
                'topic_ids': topic_ids,
                'num_words': num_words,
                'cdf_cutoff': cdf_cutoff}
        response = _graphlab.toolkits._main.run('text_topicmodel_get_topic',
                                               opts)
        ret = _map_unity_proxy_to_object(response['top_words'])

        if output_type != 'topic_probabilities':
            sa = ret.unstack(['word','score'], 'word')['word'].dict_keys()
            ret = _SFrame({'words': sa})

        return ret
    def predict_topk(self, dataset, output_type="probability", k=3):
        """
        Return top-k predictions for the ``dataset``, using the trained model.
        Predictions are returned as an SFrame with three columns: `row_id`,
        `class`, and `probability`,`rank`, or `score`, depending on the ``output_type``
        parameter. Input dataset size must be the same as for training of the
        model, except for images which are automatically resized.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        output_type : {'probability', 'rank', 'score'}, optional
            Choose the return type of the prediction:

            - `rank`: outputs rank along with class label.
            - `probability`: outputs learned probability along with class label.
            - `score`: Same as probability

        k : int, optional
            Number of classes to return for each input example.

        Returns
        -------
        out : SFrame
            An SFrame with model predictions.

        See Also
        --------
        predict, classify, evaluate

        Examples
        --------
        >>> data = graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/mnist/sframe/train')
        >>> training_data, validation_data = data.random_split(0.8)
        >>> net = graphlab.deeplearning.get_builtin_neuralnet('mnist')
        >>> m = graphlab.neuralnet_classifier.create(training_data,
        ...                                          target='label',
        ...                                          network=net,
        ...                                          max_iterations=3)
        ...
        >>> pred = m.predict_topk(validation_data, k=3)
        >>> pred
        +--------+-------+-------------------+
        | row_id | class |    probability    |
        +--------+-------+-------------------+
        |   0    |   4   |   0.995623886585  |
        |   0    |   9   |  0.0038311756216  |
        |   0    |   7   | 0.000301006948575 |
        |   1    |   1   |   0.928708016872  |
        |   1    |   3   |  0.0440889261663  |
        |   1    |   2   |  0.0176190119237  |
        |   2    |   3   |   0.996967732906  |
        |   2    |   2   |  0.00151345680933 |
        |   2    |   7   | 0.000637513934635 |
        |   3    |   1   |   0.998070061207  |
        |  ...   |  ...  |        ...        |
        +--------+-------+-------------------+
        [35688 rows x 3 columns]
        """
        _mt._get_metric_tracker().track('toolkit.classifier.neuralnet_classifier.predict_topk')
        _raise_error_if_not_sframe(dataset, "dataset")
        options = dict()
        options.update({'model': self.__proxy__,
                        'model_name': self.__name__,
                        'dataset': dataset,
                        'output_type': output_type,
                        'topk': k,
                        'missing_value_action': 'error'})
        target = _toolkits_main.run('supervised_learning_predict_topk', options)
        return _map_unity_proxy_to_object(target['predicted'])
    def extract_features(self, dataset, layer_id=None):
        """
        Takes an input dataset, propagates each example through the network,
        and returns an SArray of dense feature vectors, each of which is the concatenation
        of all the hidden unit values at layer[layer_id]. These feature vectors
        can be used as input to train another classifier such as a :py:class:`~graphlab.logistic_classifier.LogisticClassifier`,
        an :py:class:`~graphlab.svm_classifier.SVMClassifier`, another
        :py:class:`~graphlab.neuralnet_classifier.NeuralNetClassifier`, or a :py:class:`~graphlab.boosted_trees_classifier.BoostedTreesClassifier`. Input dataset size must be the same as for the training of the model,
        except for images which are automatically resized.


        We also are releasing a pre-trained model for ImageNet, as described by
        Alex Krizhevsky et. al. It is located at
        http://s3.amazonaws.com/dato-datasets/deeplearning/imagenet_model_iter45 .
        Using it requires 256 x 256 x 3 images.
        Please see Examples and References for more.


        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        layer_id : int , optional
            The index of the layer in neuralnet at which the activations are
            taken to be a dense feature vector. Must be a fully-connected layer.
            Default is None, in which case the layer before the connection
            layer to the output is used.


        Returns
        -------
        out : SArray
            An SArray of dtype array.array containing extracted features.

        See Also
        ------------
        graphlab.deeplearning.layers

        References
        ----------
        - Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet
        classification with deep convolutional neural networks." Advances in
        neural information processing systems. 2012.

        Examples
        --------
        >>> data = graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/mnist/sframe/train6k')
        >>> net = graphlab.deeplearning.get_builtin_neuralnet('mnist')
        >>> m = graphlab.neuralnet_classifier.create(data,
        ...                                          target='label',
        ...                                          network=net,
        ...                                          max_iterations=3)
        >>> # Now, let's extract features from the last layer
        >>> data['features'] = m.extract_features(data)
        >>> # Now, let's build a new classifier on top of extracted features
        >>> m = graphlab.classifier.create(data,
        ...                                          features = ['features'],
        ...                                          target='label')

        Now, let's see how to load the ImageNet model, and use it for extracting
        features after resizing the data:

        >>> imagenet_model = graphlab.load_model('http://s3.amazonaws.com/dato-datasets/deeplearning/imagenet_model_iter45')
        >>> data['image'] = graphlab.image_analysis.resize(data['image'], 256, 256, 3)
        >>> data['imagenet_features'] = imagenet_model.extract_features(data)

        """
        _mt._get_metric_tracker().track('toolkit.classifier.neuralnet_classifier.extract_features')
        _raise_error_if_not_sframe(dataset, "dataset")
        options = dict()

        net = self.get('network').layers
        network_size = len(net) - 1
        if layer_id is None:
            if net[network_size]._type == "CONNECTION":
                layer_id = network_size - 1
            else:
                layer_id = network_size - 2
        _numeric_param_check_range("layer_id", layer_id, 0, network_size)

        conv2flat = False
        for i in range(0, layer_id + 1):
            if net[i]._type == "CONNECTION" or net[i]._type == "TRANSITION":
                conv2flat = True

        if conv2flat is not True:
            raise ValueError("Features must be extracted from either a network "
                    "with non-image input or a layer after a FlattenLayer. "
                    "Try extracting features from layer following a FlattenLayer.")

        options.update({'model': self.__proxy__,
                        'model_name': self.__name__,
                        'dataset': dataset,
                        'layer_id': layer_id})
        target = _toolkits_main.run('supervised_learning_feature_extraction', options)
        return _map_unity_proxy_to_object(target['extracted'])
    def predict_topk(self,
                     dataset,
                     output_type="probability",
                     k=3,
                     missing_value_action='auto'):
        """
        Return top-k predictions for the ``dataset``, using the trained model.
        Predictions are returned as an SFrame with three columns: `row_id`,
        `class`, and `probability`, `margin`,  or `rank`, depending on the ``output_type``
        parameter. Input dataset size must be the same as for training of the model.

        Parameters
        ----------
        dataset : SFrame
            A dataset that has the same columns that were used during training.
            If the target column exists in ``dataset`` it will be ignored
            while making predictions.

        output_type : {'probability', 'rank', 'margin'}, optional
            Choose the return type of the prediction:

            - `probability`: Probability associated with each label in the prediction.
            - `rank`       : Rank associated with each label in the prediction.
            - `margin`     : Margin associated with each label in the prediction.

        k : int, optional
            Number of classes to return for each input example.

        missing_value_action : str, optional
            Action to perform when missing values are encountered. Can be
            one of:

            - 'auto': By default the model will treat missing value as is.
            - 'impute': Proceed with evaluation by filling in the missing
              values with the mean of the training data. Missing
              values are also imputed if an entire column of data is
              missing during evaluation.
            - 'error': Do not proceed with evaluation and terminate with
              an error message.

        Returns
        -------
        out : SFrame
            An SFrame with model predictions.

        See Also
        --------
        predict, classify, evaluate

        Examples
        --------
        >>> pred = m.predict_topk(validation_data, k=3)
        >>> pred
        +--------+-------+-------------------+
        | row_id | class |   probability     |
        +--------+-------+-------------------+
        |   0    |   4   |   0.995623886585  |
        |   0    |   9   |  0.0038311756216  |
        |   0    |   7   | 0.000301006948575 |
        |   1    |   1   |   0.928708016872  |
        |   1    |   3   |  0.0440889261663  |
        |   1    |   2   |  0.0176190119237  |
        |   2    |   3   |   0.996967732906  |
        |   2    |   2   |  0.00151345680933 |
        |   2    |   7   | 0.000637513934635 |
        |   3    |   1   |   0.998070061207  |
        |  ...   |  ...  |        ...        |
        +--------+-------+-------------------+
        [35688 rows x 3 columns]
        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.random_forest_classifier.predict_topk')
        _check_categorical_option_type('output_type', output_type,
                                       ['rank', 'margin', 'probability'])
        if missing_value_action == 'auto':
            missing_value_action = _sl.select_default_missing_value_policy(
                self, 'predict')

        # Low latency path
        if isinstance(dataset, list):
            return _graphlab.extensions._fast_predict_topk(
                self.__proxy__, dataset, output_type, missing_value_action, k)
        if isinstance(dataset, dict):
            return _graphlab.extensions._fast_predict_topk(
                self.__proxy__, [dataset], output_type, missing_value_action,
                k)

        options = dict()
        options.update({
            'model': self.__proxy__,
            'model_name': self.__name__,
            'dataset': dataset,
            'output_type': output_type,
            'topk': k,
            'missing_value_action': missing_value_action
        })
        target = _graphlab.toolkits._main.run(
            'supervised_learning_predict_topk', options)
        return _map_unity_proxy_to_object(target['predicted'])
    def predict_topk(self, dataset, output_type="probability", k=3):
        """
        Return top-k predictions for the ``dataset``, using the trained model.
        Predictions are returned as an SFrame with three columns: `row_id`,
        `class`, and `probability`,`rank`, or `score`, depending on the ``output_type``
        parameter. Input dataset size must be the same as for training of the
        model, except for images which are automatically resized.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        output_type : {'probability', 'rank', 'score'}, optional
            Choose the return type of the prediction:

            - `rank`: outputs rank along with class label.
            - `probability`: outputs learned probability along with class label.
            - `score`: Same as probability

        k : int, optional
            Number of classes to return for each input example.

        Returns
        -------
        out : SFrame
            An SFrame with model predictions.

        See Also
        --------
        predict, classify, evaluate

        Examples
        --------
        >>> data = graphlab.SFrame('https://static.turi.com/datasets/mnist/sframe/train')
        >>> training_data, validation_data = data.random_split(0.8)
        >>> net = graphlab.deeplearning.get_builtin_neuralnet('mnist')
        >>> m = graphlab.neuralnet_classifier.create(training_data,
        ...                                          target='label',
        ...                                          network=net,
        ...                                          max_iterations=3)
        ...
        >>> pred = m.predict_topk(validation_data, k=3)
        >>> pred
        +--------+-------+-------------------+
        | row_id | class |    probability    |
        +--------+-------+-------------------+
        |   0    |   4   |   0.995623886585  |
        |   0    |   9   |  0.0038311756216  |
        |   0    |   7   | 0.000301006948575 |
        |   1    |   1   |   0.928708016872  |
        |   1    |   3   |  0.0440889261663  |
        |   1    |   2   |  0.0176190119237  |
        |   2    |   3   |   0.996967732906  |
        |   2    |   2   |  0.00151345680933 |
        |   2    |   7   | 0.000637513934635 |
        |   3    |   1   |   0.998070061207  |
        |  ...   |  ...  |        ...        |
        +--------+-------+-------------------+
        [35688 rows x 3 columns]
        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.neuralnet_classifier.predict_topk')
        _raise_error_if_not_sframe(dataset, "dataset")
        options = dict()
        options.update({
            'model': self.__proxy__,
            'model_name': self.__name__,
            'dataset': dataset,
            'output_type': output_type,
            'topk': k,
            'missing_value_action': 'error'
        })
        target = _toolkits_main.run('supervised_learning_predict_topk',
                                    options)
        return _map_unity_proxy_to_object(target['predicted'])
    def extract_features(self, dataset, layer_id=None):
        """
        Takes an input dataset, propagates each example through the network,
        and returns an SArray of dense feature vectors, each of which is the concatenation
        of all the hidden unit values at layer[layer_id]. These feature vectors
        can be used as input to train another classifier such as a :py:class:`~graphlab.logistic_classifier.LogisticClassifier`,
        an :py:class:`~graphlab.svm_classifier.SVMClassifier`, another
        :py:class:`~graphlab.neuralnet_classifier.NeuralNetClassifier`, or a :py:class:`~graphlab.boosted_trees_classifier.BoostedTreesClassifier`. Input dataset size must be the same as for the training of the model,
        except for images which are automatically resized.


        We also are releasing a pre-trained model for ImageNet, as described by
        Alex Krizhevsky et. al. It is located at
        https://static.turi.com/products/graphlab-create/resources/models/python2.7/imagenet_model_iter45 .
        Using it requires 256 x 256 x 3 images.
        Please see Examples and References for more.


        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        layer_id : int , optional
            The index of the layer in neuralnet at which the activations are
            taken to be a dense feature vector. Must be a fully-connected layer.
            Default is None, in which case the layer before the connection
            layer to the output is used.


        Returns
        -------
        out : SArray
            An SArray of dtype array.array containing extracted features.

        See Also
        ------------
        graphlab.deeplearning.layers

        References
        ----------
        - Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet
          classification with deep convolutional neural networks." Advances in
          neural information processing systems. 2012.

        Examples
        --------
        >>> data = graphlab.SFrame('https://static.turi.com/datasets/mnist/sframe/train6k')
        >>> net = graphlab.deeplearning.get_builtin_neuralnet('mnist')
        >>> m = graphlab.neuralnet_classifier.create(data,
        ...                                          target='label',
        ...                                          network=net,
        ...                                          max_iterations=3)
        >>> # Now, let's extract features from the last layer
        >>> data['features'] = m.extract_features(data)
        >>> # Now, let's build a new classifier on top of extracted features
        >>> m = graphlab.classifier.create(data,
        ...                                          features = ['features'],
        ...                                          target='label')

        Now, let's see how to load the ImageNet model, and use it for extracting
        features after resizing the data:

        >>> imagenet_model = graphlab.load_model('https://static.turi.com/products/graphlab-create/resources/models/python2.7/imagenet_model_iter45')
        >>> data['image'] = graphlab.image_analysis.resize(data['image'], 256, 256, 3, decode=True)
        >>> data['imagenet_features'] = imagenet_model.extract_features(data)

        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.neuralnet_classifier.extract_features')
        _raise_error_if_not_sframe(dataset, "dataset")
        options = dict()

        net = self.get('network').layers
        network_size = len(net) - 1
        if layer_id is None:
            if net[network_size]._type == "CONNECTION":
                layer_id = network_size - 1
            else:
                layer_id = network_size - 2
        _numeric_param_check_range("layer_id", layer_id, 0, network_size)

        conv2flat = False
        for i in range(0, layer_id + 1):
            if net[i]._type == "CONNECTION" or net[i]._type == "TRANSITION":
                conv2flat = True

        if conv2flat is not True:
            raise ValueError(
                "Features must be extracted from either a network "
                "with non-image input or a layer after a FlattenLayer. "
                "Try extracting features from layer following a FlattenLayer.")

        options.update({
            'model': self.__proxy__,
            'model_name': self.__name__,
            'dataset': dataset,
            'missing_value_action': "error",
            'layer_id': layer_id
        })
        target = _toolkits_main.run('supervised_learning_feature_extraction',
                                    options)
        return _map_unity_proxy_to_object(target['extracted'])
    def extract_features(self, dataset):
        """
        For each example in the dataset, extract the leaf indices of
        each tree as features.

        For multiclass classification, each leaf index contains #num_class
        numbers.

        The returned feature vectors can be used as input to train another
        supervised learning model such as a
        :py:class:`~graphlab.logistic_classifier.LogisticClassifier`,
        an :py:class:`~graphlab.svm_classifier.SVMClassifier`, or a
        :py:class:`~graphlab.neuralnet_classifier.NeuralNetClassifier`.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        Returns
        -------
        out : SArray
            An SArray of dtype array.array containing extracted features.

        Examples
        --------
        >>> data =  graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/regression/houses.csv')

        >>> # Regression Tree Models
        >>> model = graphlab.boosted_trees_regression.create(data,
        ...                           target='price',
        ...                           features=['bath', 'bedroom', 'size'])
        >>> data['boosted_tree_features'] = model.extract_features(data)
        >>> model = graphlab.random_forest_regression.create(data,
        ...                           target='price',
        ...                           features=['bath', 'bedroom', 'size'])
        >>> data['random_forest_features'] = model.extract_features(data)

        >>> # Classification Tree Models
        >>> data['is_expensive'] = data['price'] > 30000
        >>> model = graphlab.boosted_trees_classifier.create(data,
        ...                           target='is_expensive',
        ...                           features=['bath', 'bedroom', 'size'])
        >>> data['boosted_tree_features'] = model.extract_features(data)

        >>> model = graphlab.random_forest_classifier.create(data,
        ...                           target='is_expensive',
        ...                           features=['bath', 'bedroom', 'size'])
        >>> data['random_forest_features'] = model.extract_features(data)
        """
        metric_name = '.'.join([self.__module__, 'extract_features'])
        _mt._get_metric_tracker().track(metric_name)
        _raise_error_if_not_sframe(dataset, "dataset")
        options = dict()
        options.update({'model': self.__proxy__,
                        'model_name': self.__name__,
                        'dataset': dataset})
        target = _toolkits_main.run('supervised_learning_feature_extraction', options)
        return _map_unity_proxy_to_object(target['extracted'])
    def get_topics(self, topic_ids=None, num_words=5, cdf_cutoff=1.0,
                   output_type='topic_probabilities'):

        """
        Get the words associated with a given topic. The score column is the
        probability of choosing that word given that you have chosen a
        particular topic.

        Parameters
        ----------
        topic_ids : list of int, optional
            The topics to retrieve words. Topic ids are zero-based.
            Throws an error if greater than or equal to m['num_topics'], or
            if the requested topic name is not present.

        num_words : int, optional
            The number of words to show.

        cdf_cutoff : float, optional
            Allows one to only show the most probable words whose cumulative
            probability is below this cutoff. For example if there exist
            three words where

            .. math::
               p(word_1 | topic_k) = .1

               p(word_2 | topic_k) = .2

               p(word_3 | topic_k) = .05

            then setting :math:`cdf_{cutoff}=.3` would return only
            :math:`word_1` and :math:`word_2` since
            :math:`p(word_1 | topic_k) + p(word_2 | topic_k) <= cdf_{cutoff}`

        output_type : {'topic_probabilities' | 'topic_words'}, optional
            Determine the type of desired output. See below.

        Returns
        -------
        out : SFrame
            If output_type is 'topic_probabilities', then the returned value is
            an SFrame with a column of words ranked by a column of scores for
            each topic. Otherwise, the returned value is a SArray where
            each element is a list of the most probable words for each topic.

        Examples
        --------
        Get the highest ranked words for all topics.

        >>> docs = graphlab.SArray('https://static.turi.com/datasets/nips-text')
        >>> m = graphlab.topic_model.create(docs,
                                            num_iterations=50)
        >>> m.get_topics()
        +-------+----------+-----------------+
        | topic |   word   |      score      |
        +-------+----------+-----------------+
        |   0   |   cell   |  0.028974400831 |
        |   0   |  input   | 0.0259470208503 |
        |   0   |  image   | 0.0215721599763 |
        |   0   |  visual  | 0.0173635081992 |
        |   0   |  object  | 0.0172447874156 |
        |   1   | function | 0.0482834508265 |
        |   1   |  input   | 0.0456270024091 |
        |   1   |  point   | 0.0302662839454 |
        |   1   |  result  | 0.0239474934631 |
        |   1   | problem  | 0.0231750116011 |
        |  ...  |   ...    |       ...       |
        +-------+----------+-----------------+

        Get the highest ranked words for topics 0 and 1 and show 15 words per
        topic.

        >>> m.get_topics([0, 1], num_words=15)
        +-------+----------+------------------+
        | topic |   word   |      score       |
        +-------+----------+------------------+
        |   0   |   cell   |  0.028974400831  |
        |   0   |  input   | 0.0259470208503  |
        |   0   |  image   | 0.0215721599763  |
        |   0   |  visual  | 0.0173635081992  |
        |   0   |  object  | 0.0172447874156  |
        |   0   | response | 0.0139740298286  |
        |   0   |  layer   | 0.0122585145062  |
        |   0   | features | 0.0115343177265  |
        |   0   | feature  | 0.0103530459301  |
        |   0   | spatial  | 0.00823387994361 |
        |  ...  |   ...    |       ...        |
        +-------+----------+------------------+

        If one wants to instead just get the top words per topic, one may
        change the format of the output as follows.

        >>> topics = m.get_topics(output_type='topic_words')
        dtype: list
        Rows: 10
        [['cell', 'image', 'input', 'object', 'visual'],
         ['algorithm', 'data', 'learning', 'method', 'set'],
         ['function', 'input', 'point', 'problem', 'result'],
         ['model', 'output', 'pattern', 'set', 'unit'],
         ['action', 'learning', 'net', 'problem', 'system'],
         ['error', 'function', 'network', 'parameter', 'weight'],
         ['information', 'level', 'neural', 'threshold', 'weight'],
         ['control', 'field', 'model', 'network', 'neuron'],
         ['hidden', 'layer', 'system', 'training', 'vector'],
         ['component', 'distribution', 'local', 'model', 'optimal']]
        """
        _mt._get_metric_tracker().track('toolkit.text.topic_model.get_topics')

        _check_categorical_option_type('output_type', output_type,
            ['topic_probabilities', 'topic_words'])

        if topic_ids is None:
            topic_ids = list(range(self.get('num_topics')))

        assert isinstance(topic_ids, list), \
            "The provided topic_ids is not a list."

        if any([type(x) == str for x in topic_ids]):
            raise ValueError("Only integer topic_ids can be used at this point in time.")
        if not all([x >= 0 and x < self['num_topics'] for x in topic_ids]):
            raise ValueError("Topic id values must be non-negative and less than the " + \
                "number of topics used to fit the model.")

        opts = {'model': self.__proxy__,
                'topic_ids': topic_ids,
                'num_words': num_words,
                'cdf_cutoff': cdf_cutoff}
        response = _graphlab.toolkits._main.run('text_topicmodel_get_topic',
                                               opts)
        ret = _map_unity_proxy_to_object(response['top_words'])

        def sort_wordlist_by_prob(z):
            words = sorted(z.items(), key=_operator.itemgetter(1), reverse=True)
            return [word for (word, prob) in words]

        if output_type != 'topic_probabilities':
            ret = ret.groupby('topic',
                    {'word': _graphlab.aggregate.CONCAT('word', 'score')})
            words = ret.sort('topic')['word'].apply(sort_wordlist_by_prob)
            ret = _SFrame({'words': words})

        return ret
    def extract_features(self, dataset, missing_value_action='auto'):
        """
        For each example in the dataset, extract the leaf indices of
        each tree as features.

        For multiclass classification, each leaf index contains #num_class
        numbers.

        The returned feature vectors can be used as input to train another
        supervised learning model such as a
        :py:class:`~graphlab.logistic_classifier.LogisticClassifier`,
        an :py:class:`~graphlab.svm_classifier.SVMClassifier`, or a
        :py:class:`~graphlab.neuralnet_classifier.NeuralNetClassifier`.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        missing_value_action: str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Choose a model dependent missing value policy.
            - 'impute': Proceed with evaluation by filling in the missing
                        values with the mean of the training data. Missing
                        values are also imputed if an entire column of data is
                        missing during evaluation.
            - 'none': Treat missing value as is. Model must be able to handle
                      missing value.
            - 'error' : Do not proceed with prediction and terminate with
                        an error message.

        Returns
        -------
        out : SArray
            An SArray of dtype array.array containing extracted features.

        Examples
        --------
        >>> data =  graphlab.SFrame(
            'https://static.turi.com/datasets/regression/houses.csv')

        >>> # Regression Tree Models
        >>> data['regression_tree_features'] = model.extract_features(data)

        >>> # Classification Tree Models
        >>> data['classification_tree_features'] = model.extract_features(data)
        """
        metric_name = '.'.join([self.__module__, 'extract_features'])
        _mt._get_metric_tracker().track(metric_name)
        _raise_error_if_not_sframe(dataset, "dataset")
        if missing_value_action == 'auto':
            missing_value_action = select_default_missing_value_policy(
                self, 'extract_features')

        options = dict()
        options.update({
            'model': self.__proxy__,
            'model_name': self.__name__,
            'missing_value_action': missing_value_action,
            'dataset': dataset
        })
        target = _toolkits_main.run('supervised_learning_feature_extraction',
                                    options)
        return _map_unity_proxy_to_object(target['extracted'])