Example #1
0
    def _save_impl(self, pickler):
        """
        Save the model as a directory, which can be loaded with the
        :py:func:`~graphlab.load_model` method.

        Parameters
        ----------
        pickler : GLPickler
            An opened GLPickle archive (Do not close the archive).

        See Also
        --------
        graphlab.load_model

        Examples
        --------
        >>> model.save('my_model_file')
        >>> loaded_model = graphlab.load_model('my_model_file')
        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.nearest_neighbor_classifier.save')

        state = self._state
        pickler.dump(state)

        knn_model = self._knn_model
        pickler.dump(knn_model)
Example #2
0
    def list_fields(self):
        """
        List the fields stored in the model, including data, model, and training
        options. Each field can be queried with the ``get`` method.

        Returns
        -------
        out : list
            List of fields queryable with the ``get`` method.

        See Also
        --------
        get

        Examples
        --------
        >>> data =  graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/regression/houses.csv')

        >>> data['is_expensive'] = data['price'] > 30000
        >>> model = graphlab.svm_classifier.create(data,
                                  target='is_expensive',
                                  features=['bath', 'bedroom', 'size'])

        >>> model.list_fields()
        """

        _mt._get_metric_tracker().track(
            'toolkit.classifier.svm_classifier.list_fields')
        return super(_Classifier, self).list_fields()
Example #3
0
    def get_current_options(self):
        """
        Return a dictionary with the options used to define and create this
        graph analytics model instance.

        Returns
        -------
        out : dict
            Dictionary of options used to train this model.

        See Also
        --------
        get_default_options, list_fields, get
        """
        _mt._get_metric_tracker().track('toolkit.graph_analytics.get_current_options')

        dispatch_table = {
            'ShortestPathModel': 'sssp_default_options',
            'GraphColoringModel': 'graph_coloring_default_options',
            'PagerankModel': 'pagerank_default_options',
            'ConnectedComponentsModel': 'connected_components_default_options',
            'TriangleCountingModel': 'triangle_counting_default_options',
            'KcoreModel': 'kcore_default_options'
        }

        try:
            model_options = _main.run(dispatch_table[self.name()], {})

            ## for each of the default options, update its current value by querying the model
            for key in model_options:
                current_value = self.get(key)
                model_options[key] = current_value
            return model_options
        except:
            raise RuntimeError('Model %s does not have options' % self.name())
    def frequency_count(self, element):
        """
        Returns a sketched estimate of the number of occurrences of a given
        element. This estimate is based on the count sketch. The element type
        must be of the same type as the input SArray. Throws an exception if
        element is of the incorrect type.

        Parameters
        ----------
        element : val
            An element of the same type as the SArray.

        Raises
        ------
        RuntimeError
            Throws an exception if element is of the incorrect type.

        Returns
        -------
        out : int
            An estimate of the number of occurrences of the element.
        """
        _mt._get_metric_tracker().track('sketch.frequency_count')
        with cython_context():
            return int(self.__proxy__.frequency_count(element))
Example #5
0
def show(obj, **kwargs):
    import graphlab.connect as _mt
    _mt._get_metric_tracker().track('sgraph.show')

    import graphlab.canvas
    import graphlab.canvas.inspect
    import graphlab.canvas.views.sgraph

    graphlab.canvas.inspect.find_vars(obj)
    if 'highlight' in kwargs:
        highlight = kwargs['highlight']
        if isinstance(highlight, SArray):
            # convert to list
            highlight = list(highlight)
            kwargs['highlight'] = highlight
        if isinstance(highlight, list):
            # convert to dict
            highlight_color = kwargs[
                'highlight_color'] if 'highlight_color' in kwargs else []
            highlight_color = [highlight_color] * len(highlight)
            highlight = dict(zip(highlight, highlight_color))
            kwargs['highlight'] = highlight
            kwargs['highlight_color'] = highlight_color
    return graphlab.canvas.show(
        graphlab.canvas.views.sgraph.SGraphView(obj, params=kwargs))
    def _save_impl(self, pickler):
        """
        Save the model as a directory, which can be loaded with the
        :py:func:`~graphlab.load_model` method.

        Parameters
        ----------
        pickler : GLPickler
            An opened GLPickle archive (Do not close the archive).

        See Also
        --------
        graphlab.load_model

        Examples
        --------
        >>> model.save('my_model_file')
        >>> loaded_model = graphlab.load_model('my_model_file')
        """
        _mt._get_metric_tracker().track('toolkit.classifier.nearest_neighbor_classifier.save')

        state = self._state
        pickler.dump(state)

        knn_model = self._knn_model
        pickler.dump(knn_model)
Example #7
0
    def dict_key_summary(self):
        """
        Returns the sketch summary for all dictionary keys. This is only valid
        for sketch object from an SArray of dict type. Dictionary keys are
        converted to strings and then do the sketch summary.

        Examples
        --------
        >>> sa = graphlab.SArray([{'I':1, 'love': 2}, {'nature':3, 'beauty':4}])
        >>> sa.sketch_summary().dict_key_summary()
        +------------------+-------+----------+
        |       item       | value | is exact |
        +------------------+-------+----------+
        |      Length      |   4   |   Yes    |
        | # Missing Values |   0   |   Yes    |
        | # unique values  |   4   |    No    |
        +------------------+-------+----------+
        Most frequent items:
        +-------+---+------+--------+--------+
        | value | I | love | beauty | nature |
        +-------+---+------+--------+--------+
        | count | 1 |  1   |   1    |   1    |
        +-------+---+------+--------+--------+

        """
        _mt._get_metric_tracker().track('sketch.dict_key_summary')
        with cython_context():
            return Sketch(_proxy=self.__proxy__.dict_key_summary())
Example #8
0
    def predict_row(self, row):
        """
        Use the model to predict sentiment of a single string.

        Parameters
        ----------
        row : dict
            A dictionary representing a single row of new observations.
            Must include keys with the same names as the features used for
            model training, but does not require a target column. Additional
            columns are ignored.


        Returns
        -------
        out : float
            Predicted sentiment, where smaller values (near 0) indicate
            negative sentiment and large values (approaching 1) indicate
            positive sentiment.

        Examples
        --------

        >>> m = gl.product_sentiment.create(sf, features=['review'])
        >>> m.predict_row({'review': "I really like this burrito."})
        """
        _mt._get_metric_tracker().track('{}.predict_row'.format(__name__))

        m = self.__proxy__['classifier']
        f = self.__proxy__['feature_extractor']
        return m.predict(f(row), output_type='probability')[0]
Example #9
0
    def predict(self, data):
        """
        Use the model to predict sentiment of a document collection.

        Parameters
        ----------
        data : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        Returns
        -------
        out : SArray of float
            Predicted sentiment, where smaller values (near 0) indicate
            negative sentiment and large values (approaching 1) indicate
            positive sentiment.

        Examples
        --------
        >>> import graphlab as gl
        >>> data = gl.SFrame({'rating': [1, 5], 'text': ['hate it', 'love it']})
        >>> m = gl.sentiment_analysis.create(data, 'rating', features=['text'])
        >>> m.predict(data)

        """
        _mt._get_metric_tracker().track('{}.predict'.format(__name__))

        m = self.__proxy__['classifier']
        f = self.__proxy__['feature_extractor']
        return m.predict(f(data), output_type='probability')
    def add_exception(self, e):
        """
        Add a Python exception to show in the UI.

        Parameters
        ----------
        e : (type, value, traceback)
            A tuple of exception values as returned by sys.exc_info.
        """
        # don't print errors if the unity_server process is no longer running
        # this is normal on Python process shutdown
        try:
            if (graphlab.connect.main.get_server().proc):
                print('[ERROR] GraphLab Canvas: %s' % str(e))
        except:
            pass

        # truncate type, message, stack_trace to a reasonable length
        # (100 for type/message, 1000 for stack_trace)
        # so that we don't hit any size limits on librato or mixpanel
        properties = {
            'type': e[0].__name__[:100],
            'message': str(e[1])[:100],
            'stack_trace': traceback.format_tb(e[2])[:1000]
        }
        _mt._get_metric_tracker().track('canvas.unhandled_exception',
                                        properties=properties)
        with self.__var_lock:
            self.__exceptions.append(properties)
Example #11
0
    def start(self, num_tolerable_ping_failures=3):
        properties = dict(product_key=self.product_key)
        _get_metric_tracker().track('engine-started',
                                    value=1,
                                    properties=properties,
                                    send_sys_info=True)
        _get_metric_tracker().track('engine-started-remote', value=1)

        # try to establish a connection to the server.
        (client_public_key, client_secret_key) = ('', '')
        if self.public_key != '':
            (client_public_key,
             client_secret_key) = get_public_secret_key_pair()
        try:
            c = Client([],
                       self.server_addr,
                       num_tolerable_ping_failures,
                       public_key=client_public_key,
                       secret_key=client_secret_key,
                       server_public_key=self.public_key)
            if self.auth_token:
                c.add_auth_method_token(self.auth_token)
            c.start()
        finally:
            c.stop()
    def set_selected_variable(self, var):
        """
        Marks the variable passed in by name as selected in the UI.

        Parameters
        ----------
        var : str | unicode | tuple | SFrame | SArray
                The variable to select (by name or reference).
        """
        name = None
        if isinstance(var, tuple):
            # look up by name
            name = var
            var = self.__lookup_var(name)
        else:
            ref = var
            if isinstance(var, graphlab.canvas.views.base.BaseView):
                # use underlying object, not view wrapper
                ref = var.obj

            name = self.__find_name(ref)
            if name is None:
                # if we can't find the name, add it as anonymous
                name = var.get_temporary_name()
            # make sure this variable exists.
            self.add_variable(name, var)

        # tracks type of variable added to Canvas
        _mt._get_metric_tracker().track('canvas.set_selected_variable.%s' %
                                        type(var).__name__)

        with self.__var_lock:
            self.__selected_var = (name, self.__lookup_var(name))
    def list_fields(self):
        """
        List of fields stored in the model. Each of these fields can be queried
        using the ``get`` function.

        Returns
        -------
        out : list
            A list of fields that can be queried using the ``get`` method.

        See Also
        --------
        get

        Examples
        --------
        >>> data =  graphlab.SFrame('https://static.turi.com/datasets/regression/houses.csv')

        >>> model = graphlab.linear_regression.create(data,
                                             target='price',
                                             features=['bath', 'bedroom', 'size'])
        >>> fields = model.list_fields()
        """
        _mt._get_metric_tracker().track(
            'toolkit.regression.linear_regression.list_fields')
        return super(LinearRegression, self).list_fields()
    def get_current_options(self):
        """
        A dictionary describing the options requested during training.

        Returns
        -------
        out : dict
             A dictionary with option (name, value) pairs requested during
             train time.

        see also
        --------
        get_current_options, list_fields, get

        Examples
        --------
        >>> data =  graphlab.SFrame('https://static.turi.com/datasets/regression/houses.csv')

        >>> model = graphlab.linear_regression.create(data,
                                             target='price',
                                             features=['bath', 'bedroom', 'size'])
        >>> current_options = model.get_current_options()
        """

        _mt._get_metric_tracker().track(
            'toolkit.regression.linear_regression.get_options')
        return super(LinearRegression, self).get_current_options()
def confusion_matrix(targets, predictions):
    r"""
    Compute the confusion matrix for classifier predictions.

    Parameters
    ----------
    targets : SArray
        Ground truth class labels.

    predictions : SArray
        The prediction that corresponds to each target value.
        This vector must have the same length as ``targets``.

    Returns
    -------
    out : SFrame
        An SFrame containing counts for 'target_label', 'predicted_label' and
        'count' corresponding to each pair of true and predicted labels.

    See Also
    --------
    accuracy

    Examples
    --------
    >>> targets = graphlab.SArray([0, 1, 1, 0])
    >>> predictions = graphlab.SArray([0.1, 0.35, 0.7, 0.99])
    >>> graphlab.evaluation.confusion_matrix(targets, predictions)
    """

    _mt._get_metric_tracker().track('evaluation.confusion_matrix')
    _supervised_evaluation_error_checking(targets, predictions)
    _check_same_type_not_float(targets, predictions)
    return _graphlab.extensions._supervised_streaming_evaluator(targets,
                       predictions, "confusion_matrix_no_map", {})
Example #16
0
    def classify(self, data):
        """
        Use the model to classify sentiment of a text collection.

        Parameters
        ----------
        data : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        Returns
        -------
        out : SArray of int
            Predicted sentiment, where 0 indicates negative sentiment and 1
            indicates positive sentiment.

        Examples
        --------
        >>> import graphlab as gl
        >>> data = gl.SFrame({'rating': [1, 5], 'text': ['hate it', 'love it']})
        >>> m = gl.sentiment_analysis.create(data, 'rating', features=['text'])
        >>> m.predict(data)

        """
        _mt._get_metric_tracker().track('{}.classify'.format(__name__))

        m = self.__proxy__['classifier']
        f = self.__proxy__['feature_extractor']
        return m.classify(f(data))
Example #17
0
def get_product_key(file = __default_config_path):
    """
    Returns the product key found in file, which by default is ~/.graphlab/config
    or in environment variable GRAPHLAB_PRODUCT_KEY.

    Note: Environment variable takes precedence over config file.

    @param file optional parameter to specify which file to use for configuration (defaults to ~/.graphlab/config)
    @return Product key string, or None if not found.
    """
    PRODUCT_KEY_ENV = 'GRAPHLAB_PRODUCT_KEY'
    if not PRODUCT_KEY_ENV in os.environ:
        import graphlab.connect as _mt
        # see if in ~/.graphlab/config
        config_file = file
        if (os.path.isfile(config_file)):
            try:
                import ConfigParser
                config = ConfigParser.ConfigParser()
                config.read(config_file)
                product_key = config.get(__section, __key)
                if product_key == -1:
                    raise BaseException() # will fall into except block below
                else:
                    # set the product key as an environment variable in this session
                    os.environ[PRODUCT_KEY_ENV] = str(product_key).strip('"\'')
            except:
                msg = "Unable to parse product key out of %s. Make sure it is defined in the [%s] section, with key name: '%s'" % (config_file, __section, __key)
                _mt._get_metric_tracker().track('server_launch.config_parser_error')
                raise KeyError(msg)
        else:
            return None
    return os.environ[PRODUCT_KEY_ENV]
    def get(self, field):
        """
        Return the value of a given field. The list of all queryable fields is
        detailed below, and can be obtained programmatically with the
        :func:`~graphlab.frequent_pattern_mining.FrequentPatternMiner.list_fields`
        method.

        +------------------------+---------------------------------------------+
        |      Field             | Description                                 |
        +========================+=============================================+
        | features               | Feature column names                        |
        +------------------------+---------------------------------------------+
        | frequent_patterns      | Most frequent closed itemsets in the        |
        |                        | training data                               |
        +------------------------+---------------------------------------------+
        | item                   | Item column name                            |
        +------------------------+---------------------------------------------+
        | max_patterns           | Maximum number of itemsets to mine          |
        +------------------------+---------------------------------------------+
        | min_support            | Minimum number of transactions for an       |
        |                        | itemset to be frequent                      |
        +------------------------+---------------------------------------------+
        | num_examples           | Number of examples (transactions) in the    |
        |                        | dataset                                     |
        +------------------------+---------------------------------------------+
        | num_features           | Number of feature columns                   |
        +------------------------+---------------------------------------------+
        | num_frequent_patterns  | Number of frequent itemsets mined           |
        +------------------------+---------------------------------------------+
        | num_items              | Number of unique items in the training data |
        +------------------------+---------------------------------------------+
        | training_time          | Total time taken to mine the data           |
        +------------------------+---------------------------------------------+

        Parameters
        ----------
        field : string
            Name of the field to be retrieved.

        Returns
        -------
        out
            Value of the requested fields.

        See Also
        --------
        list_fields

        Examples
        --------

        .. sourcecode:: python

            >>> model['num_frequent_patterns']
            500
        """

        _mt._get_metric_tracker().track(
            'toolkits.frequent_pattern_mining.get')
        return self.__proxy__.get(field)
Example #19
0
    def quantile(self, quantile_val):
        """
        Returns a sketched estimate of the value at a particular quantile
        between 0.0 and 1.0. The quantile is guaranteed to be accurate within
        1%: meaning that if you ask for the 0.55 quantile, the returned value is
        guaranteed to be between the true 0.54 quantile and the true 0.56
        quantile. The quantiles are only defined for numeric arrays and this
        function will throw an exception if called on a sketch constructed for a
        non-numeric column.

        Parameters
        ----------
        quantile_val : float
            A value between 0.0 and 1.0 inclusive. Values below 0.0 will be
            interpreted as 0.0. Values above 1.0 will be interpreted as 1.0.

        Raises
        ------
        RuntimeError
            If the sarray is a non-numeric type.

        Returns
        -------
        out : float | str
            An estimate of the value at a quantile.
        """
        _mt._get_metric_tracker().track('sketch.quantile.%g' % quantile_val)

        with cython_context():
            return self.__proxy__.get_quantile(quantile_val)
    def get_current_options(self):
        """
        Return a dictionary with the options used to define and create the
        current NearestNeighborModel instance.

        Returns
        -------
        out : dict
            Dictionary of options used to train the current instance of the
            NearestNeighborsModel.

        See Also
        --------
        get_default_options, list_fields, get

        Examples
        --------
        >>> sf = graphlab.SFrame({'label': range(3),
        ...                       'feature1': [0.98, 0.62, 0.11],
        ...                       'feature2': [0.69, 0.58, 0.36]})
        >>> model = graphlab.nearest_neighbors.create(sf, 'label')
        >>> model.get_current_options()
        {'distance': 'euclidean', 'leaf_size': 1000}
        """

        _mt._get_metric_tracker().track(
            'toolkit.nearest_neighbors.get_current_options')

        opts = {'model': self.__proxy__, 'model_name': self.__name__}

        return _graphlab.toolkits._main.run(
            '_nearest_neighbors.get_current_options', opts)
    def __init__(self, array=None, background=False, sub_sketch_keys=[], _proxy=None):
        """__init__(array)
        Construct a new Sketch from an SArray.

        Parameters
        ----------
        array : SArray
            Array to sketch.

        background : boolean, optional
            If true, run the sketch in background. The the state of the sketch
            may be queried by calling (:func:`~graphlab.Sketch.sketch_ready`)
            default is False

        sub_sketch_keys : list
            The list of sub sketch to calculate, for SArray of dictionary type.
            key needs to be a string, for SArray of vector(array) type, the key
            needs to be positive integer
        """
        _mt._get_metric_tracker().track('sketch.init')
        if (_proxy):
            self.__proxy__ = _proxy
        else:
            self.__proxy__ = UnitySketchProxy(glconnect.get_client())
            if not isinstance(array, SArray):
                raise TypeError("Sketch object can only be constructed from SArrays")

            self.__proxy__.construct_from_sarray(array.__proxy__, background, sub_sketch_keys)
def create(function, parameter_set):
    """
    Create a DML job.

    For now, we leverage map_job infrastructure for DML job. The difference is
    indicated through _job_type parameter.

    Parameters
    ----------
    function : function
        Function to be executed, with arguments to pass to this
        function specified by parameter_set.

    parameter_set : iterable of dict
        Each element of the list corresponds to an evaluation of the function
        with the dictionary argument.

    """
    environment = _gl._distributed_execution_environment.get_distributed_execution_environment()
    if environment is None:
        raise RuntimeError(
            "Please use graphlab.set_distributed_execution_environment() to set distributed execution environment first."
        )

    if len(parameter_set) > environment.get_num_workers():
        raise RuntimeError("Length of parameter_set cannot not exceed %d" % environment.get_num_workers())

    _get_metric_tracker().track("jobs.dml_job")

    job = _gl.deploy.map_job._create_map_job(
        function, parameter_set=parameter_set, environment=environment, _job_type="DML"
    )

    # submit to DMLExecutionEngine directly
    return environment.run_job(job)
def __set_license_info(product_key,
                       license_info,
                       config_path=__default_config_path):
    """
    Sets the license info provided in file, which by default is ~/.graphlab/config
    Overwrites any existing product key in that file.

    Parameters
    ----------
    license_info : str
        The license info returned from the Turi server.

    config_path : str, optional
        Specifies which file to use for configuration (defaults to ~/.graphlab/config)
    """
    import graphlab.connect as _mt
    try:
        config = _ConfigParser.ConfigParser()
        config.read(config_path)
        if not (config.has_section(__section)):
            config.add_section(__section)
        config.set(__section, __key, product_key)
        config.set(__section, __license, license_info)
        with open(config_path, 'w') as config_file:
            config.write(config_file)
        _mt._get_metric_tracker().track('set_license_info.succeeded')
    except:
        _mt._get_metric_tracker().track('set_license_info.config_parser_error')
        __LOGGER__.warn(
            'Unable to write current GraphLab Create license to %s. Ensure that this user account \
                        has write permission to %s to save the license for offline use.'
            % (config_path, config_path))
        _os.environ['GRAPHLAB_LICENSE_INFO'] = license_info
def __set_license_info(product_key, license_info, config_path=(_os.path.join(_os.path.expanduser("~"), ".graphlab", "config"))):
    """
    Sets the license info provided in file, which by default is ~/.graphlab/config
    Overwrites any existing product key in that file.

    Parameters
    ----------
    license_info : str
        The license info returned from the Dato server.

    config_path : str, optional
        Specifies which file to use for configuration (defaults to ~/.graphlab/config)
    """
    import graphlab.connect as _mt
    try:
        import ConfigParser
        config = ConfigParser.ConfigParser()
        config.read(config_path)
        if not(config.has_section(__section)):
            config.add_section(__section)
        config.set(__section, __key, product_key)
        config.set(__section, __license, license_info)
        with open(config_path, 'wb') as config_file:
            config.write(config_file)
        _mt._get_metric_tracker().track('set_license_info.succeeded')
    except:
        _mt._get_metric_tracker().track('set_license_info.config_parser_error')
        __LOGGER__.warn('Unable to write current GraphLab Create license to %s. Ensure that this user account has write permission to %s to save the license for offline use.' % (config_path, config_path))
        _os.environ['GRAPHLAB_LICENSE_INFO'] = license_info
def connect_odbc(conn_str):
    """
    Create a stateful connection with a database.

    An ODBC driver manager program (unixODBC) must be installed with one or
    more functional drivers in order to use this feature.  Please see the `User Guide 
    <http://www.graphlab.com/learn/userguide.html#ODBC_Integration>`_
    for more details.

    Parameters
    ----------
    conn_str : str
        A standard ODBC connection string.

    Returns
    -------
    out : graphlab.extensions._odbc_connection.unity_odbc_connection

    Examples
    --------
    >>> db = graphlab.connect_odbc("DSN=my_awesome_dsn;UID=user;PWD=mypassword")
    """
    db = gl.extensions._odbc_connection.unity_odbc_connection()
    db._construct_from_odbc_conn_str(conn_str)
    _mt._get_metric_tracker().track('connect_odbc', properties={'dbms_name':db.dbms_name,'dbms_version':db.dbms_version})
    return db
Example #26
0
    def summary(self, output=None):
        """
        Print a summary of the model.
        The summary includes a description of training
        data, options, hyper-parameters, and statistics measured during model
        creation.

        Examples
        --------
        >>> m.summary()

        Parameters
        ----------
        output : string, None
            The type of summary to return.
            None or 'stdout' : prints directly to stdout
            'str' : string of summary
            'dict' : a dict with 'sections' and 'section_titles' ordered lists.
            The entries in the 'sections' list are tuples of the form
            ('label', 'value').
        """
        if output is None or output == 'stdout':
            pass
        elif (output == 'str'):
            return self.__repr__()
        elif output == 'dict':
            return _toolkit_serialize_summary_struct( self, \
                                            *self._get_summary_struct() )
        _mt._get_metric_tracker().track(self.__class__.__module__ + '.summary')
        try:
            print self.__repr__()
        except:
            return self.__class__.__name__
    def list_fields(self):
        """
        List of fields stored in the model. Each of these fields can be queried
        using the ``get`` function. 

        Returns
        -------
        out : list
            A list of fields that can be queried using the ``get`` method.

        See Also
        --------
        get

        Examples
        --------
        >>> data =  graphlab.SFrame('http://s3.amazonaws.com/GraphLab-Datasets/regression/houses.csv')

        >>> model = graphlab.linear_regression.create(data,
                                             target='price',
                                             features=['bath', 'bedroom', 'size'])
        >>> fields = model.list_fields()
        """
        _mt._get_metric_tracker().track(
                            'toolkit.regression.linear_regression.list_fields')
        return super(LinearRegression, self).list_fields()
def set_target(target):
    """
    Set the target for GraphLab Canvas view output. By default, view output is set to
    'browser'. To change output target, use graphlab.canvas.set_target(target). Specifying
    'ipynb' will attempt to render to an output cell in the IPython Notebook. If any other 
    value is specified, the target will not be changed.

    To set the output target to browser and use GraphLab Canvas as an interactive web application (default):

    >>> graphlab.canvas.set_target('browser')

    To set the output target to cells within the IPython Notebook:

    >>> graphlab.canvas.set_target('ipynb')

    To disable Canvas output:

    >>> graphlab.canvas.set_target('none')

    Parameters
    ----------
    target : "browser" | "ipynb"
    """
    import target as __target
    global _active_target

    # track metrics on target
    _mt._get_metric_tracker().track('canvas.set_target.%s' % target)

    if target == 'browser' and not isinstance(_active_target, __target.InteractiveTarget):
        _active_target = __target.InteractiveTarget()
    elif target == 'ipynb' and not isinstance(_active_target, __target.IPythonTarget):
        _active_target = __target.IPythonTarget()
    elif target == 'none' and not isinstance(_active_target, __target.NoneTarget):
        _active_target = __target.NoneTarget()
    def get_current_options(self):
        """
        Return a dictionary with the options used to define and create this
        graph analytics model instance.

        Returns
        -------
        out : dict
            Dictionary of options used to train this model.

        See Also
        --------
        get_default_options, list_fields, get
        """
        _mt._get_metric_tracker().track('toolkit.graph_analytics.get_current_options')

        dispatch_table = {
            'ShortestPathModel': 'sssp_default_options',
            'GraphColoringModel': 'graph_coloring_default_options',
            'PagerankModel': 'pagerank_default_options',
            'ConnectedComponentsModel': 'connected_components_default_options',
            'TriangleCountingModel': 'triangle_counting_default_options',
            'KcoreModel': 'kcore_default_options'
        }

        try:
            model_options = _main.run(dispatch_table[self.name()], {})

            ## for each of the default options, update its current value by querying the model
            for key in model_options:
                current_value = self.get(key)
                model_options[key] = current_value
            return model_options
        except:
            raise RuntimeError('Model %s does not have options' % self.name())
    def __init__(self, num_hidden_layers, num_hidden_units, input_dimension=1, activation="sigmoid", **kwargs):
        super(self.__class__, self).__init__()
        _mt._get_metric_tracker().track("toolkit.deeplearning.MultiLayerPerceptrons")
        if input_dimension not in (1, 2):
            raise ValueError("input_dimension must be either 1 or 2")

        ActivationLayer = _activation_layer_from_string(activation)
        if num_hidden_layers != len(num_hidden_units):
            raise ValueError("Length of num_hidden_units must equal to num_hidden_layers")
        for i in range(0, num_hidden_layers - 1):
            self._layers.append(layers.FullConnectionLayer(num_hidden_units[i]))
            self._layers[-1]._set_params(**kwargs)
            self._layers.append(ActivationLayer())
            self._layers[-1]._set_params(**kwargs)

        # output layer
        if num_hidden_units[-1] > 0:
            self._layers.append(layers.FullConnectionLayer(num_hidden_units[-1]))
            self._layers[-1]._set_params(**kwargs)
            if num_hidden_units[-1] > 1:
                self._layers.append(layers.SoftmaxLayer())

        # input layer
        if input_dimension == 2:
            self._layers.insert(0, layers.FlattenLayer())
    def predict(self, dataset):
        """
        Predict the target column of the given dataset.

        The target column is provided during
        :func:`~graphlab.boosted_trees_regression.create`. If the target column is in the
        `dataset` it will be ignored.

        Parameters
        ----------
        dataset : SFrame
          A dataset that has the same columns that were used during training.
          If the target column exists in ``dataset`` it will be ignored
          while making predictions.

        Returns
        -------
        out : SArray
           Predicted target value for each example (i.e. row) in the dataset.

        See Also
        ----------
        create, predict

        Examples
        --------
        >>> m.predict(testdata)
        """
        _mt._get_metric_tracker().track('toolkit.regression.boosted_trees_regression.predict')
        return super(BoostedTreesRegression, self).predict(dataset, 
                                                        output_type='margin');
    def __init__(
        self,
        num_convolution_layers,
        kernel_size,
        num_channels,
        num_output_units,
        stride=layers._LayerDefaults.STRIDE_DEFAULT,
        pooling="max_pooling",
        **kwargs
    ):
        _mt._get_metric_tracker().track("toolkit.deeplearning.ConvolutionNet")
        super(self.__class__, self).__init__()
        PoolingLayer = _pooling_layer_from_string(pooling)
        for n in range(num_convolution_layers):
            self._layers.append(
                layers.ConvolutionLayer(kernel_size=kernel_size, num_channels=num_channels, stride=stride)
            )
            self._layers[-1]._set_params(**kwargs)
            self._layers.append(PoolingLayer(kernel_size=kernel_size, stride=stride))
            self._layers[-1]._set_params(**kwargs)
        self._layers.append(layers.FlattenLayer())
        self._layers[-1]._set_params(**kwargs)

        # output layer
        if num_output_units > 0:
            self._layers.append(layers.FullConnectionLayer(num_output_units))
            self._layers[-1]._set_params(**kwargs)
            if num_output_units > 1:
                self._layers.append(layers.SoftmaxLayer())
Example #33
0
    def _load_version(cls, unpickler, version):
        """
        A function to load a previously saved SentenceSplitter instance.

        Parameters
        ----------
        unpickler : GLUnpickler
            A GLUnpickler file handler.

        version : int
            Version number maintained by the class writer.
        """

        _mt._get_metric_tracker().track(cls.__name__ + '.load_version')

        state, _exclude, _features = unpickler.load()

        features = state['features']
        excluded_features = state['excluded_features']

        model = cls.__new__(cls)
        model._setup()
        model.__proxy__.update(state)
        model._exclude = _exclude
        model._features = _features

        return model
Example #34
0
    def fit(self, data):
        """
        Fit a transformer using the SFrame `data`.

        Parameters
        ----------
        data : SFrame
            The data used to fit the transformer.

        Returns
        -------
        self (A fitted version of the object)

        See Also
        --------
        transform, fit_transform

        Examples
        --------
        .. sourcecode:: python

        {examples}
        """

        _raise_error_if_not_sframe(data, "data")
        _mt._get_metric_tracker().track(self.__class__.__module__ + '.fit')
        self.__proxy__.fit(data)
        return self
Example #35
0
def set_product_key(product_key, file=(os.path.join(os.path.expanduser("~"), ".graphlab", "config"))):
    """
    Sets the product key provided in file, which by default is ~/.graphlab/config
    Overwrites any existing product key in that file.

    Note: Environment variable GRAPHLAB_PRODUCT_KEY takes precedence over the
    config file and is not affected by this function.

    Parameters
    ----------
    product_key : str
        The product key, provided by registration on https://dato.com/register

    file : str, optional
        Specifies which file to use for configuration (defaults to ~/.graphlab/config)
    """
    import graphlab.connect as _mt
    try:
        import ConfigParser
        config = ConfigParser.ConfigParser()
        config.read(file)
        if not(config.has_section(__section)):
            config.add_section(__section)
        config.set(__section, __key, product_key)
        with open(file, 'wb') as config_file:
            config.write(config_file)
        _mt._get_metric_tracker().track('set_product_key.succeeded')
    except:
        _mt._get_metric_tracker().track('set_product_key.config_parser_error')
        raise
Example #36
0
    def transform(self, data):
        """
        Transform the SFrame `data` using a fitted model.

        Parameters
        ----------
        data : SFrame
            The data  to be transformed.

        Returns
        -------
        A transformed SFrame.

        See Also
        --------
        fit, fit_transform

        Examples
        --------
        .. sourcecode:: python

        {examples}

        """
        _raise_error_if_not_sframe(data, "data")
        _mt._get_metric_tracker().track(self.__class__.__module__ +
                                        '.transform')
        return self.__proxy__.transform(data)
Example #37
0
    def __init__(self,
                 array=None,
                 background=False,
                 sub_sketch_keys=[],
                 _proxy=None):
        """__init__(array)
        Construct a new Sketch from an SArray.

        Parameters
        ----------
        array : SArray
            Array to sketch.

        background : boolean, optional
            If true, run the sketch in background. The the state of the sketch
            may be queried by calling (:func:`~graphlab.Sketch.sketch_ready`)
            default is False

        sub_sketch_keys : list
            The list of sub sketch to calculate, for SArray of dictionary type.
            key needs to be a string, for SArray of vector(array) type, the key
            needs to be positive integer
        """
        _mt._get_metric_tracker().track('sketch.init')
        if (_proxy):
            self.__proxy__ = _proxy
        else:
            self.__proxy__ = UnitySketchProxy(glconnect.get_client())
            if not isinstance(array, SArray):
                raise TypeError(
                    "Sketch object can only be constructed from SArrays")

            self.__proxy__.construct_from_sarray(array.__proxy__, background,
                                                 sub_sketch_keys)
Example #38
0
    def fit_transform(self, data):
        """
        First fit a transformer using the SFrame `data` and then return a
        transformed version of `data`.

        Parameters
        ----------
        data : SFrame
            The data used to fit the transformer. The same data is then also
            transformed.

        Returns
        -------
        Transformed SFrame.

        See Also
        --------
        fit, transform

        Notes
        ------
        - Fit transform modifies self.

        Examples
        --------
        .. sourcecode:: python

        {examples}
        """
        _raise_error_if_not_sframe(data, "data")
        _mt._get_metric_tracker().track(self.__class__.__module__ +
                                        '.fit_transform')
        return self.__proxy__.fit_transform(data)
Example #39
0
    def frequency_count(self, element):
        """
        Returns a sketched estimate of the number of occurrences of a given
        element. This estimate is based on the count sketch. The element type
        must be of the same type as the input SArray. Throws an exception if
        element is of the incorrect type.

        Parameters
        ----------
        element : val
            An element of the same type as the SArray.

        Raises
        ------
        RuntimeError
            Throws an exception if element is of the incorrect type.

        Returns
        -------
        out : int
            An estimate of the number of occurrences of the element.
        """
        _mt._get_metric_tracker().track('sketch.frequency_count')
        with cython_context():
            return int(self.__proxy__.frequency_count(element))
Example #40
0
    def save(self, location):
        """
        Save the transformer into a GraphLab archive. The object is saved as a
        directory which can then be loaded using the
        :py:func:`~graphlab.load_model` method.

        Parameters
        ----------
        location : string
            Target destination for the model. Can be a local path or remote
            URL.

        See Also
        ----------
        graphlab.load_model

        Examples
        ----------
        .. sourcecode:: python

            >>> model.save('my_model_file')
            >>> loaded_model = gl.load_model('my_model_file')

        """
        _mt._get_metric_tracker().track(self.__class__.__module__ + '.save')
        return glconnect.get_unity().save_model(self.__proxy__,
                                                _make_internal_url(location),
                                                self._get_wrapper())
Example #41
0
def connect_odbc(conn_str):
    """
    Create a stateful connection with a database.

    An ODBC driver manager program (unixODBC) must be installed with one or
    more functional drivers in order to use this feature.  Please see the `User Guide 
    <https://dato.com/learn/userguide/index.html#ODBC_Integration>`_
    for more details.

    Parameters
    ----------
    conn_str : str
        A standard ODBC connection string.

    Returns
    -------
    out : graphlab.extensions._odbc_connection.unity_odbc_connection

    Examples
    --------
    >>> db = graphlab.connect_odbc("DSN=my_awesome_dsn;UID=user;PWD=mypassword")
    """
    db = gl.extensions._odbc_connection.unity_odbc_connection()
    db._construct_from_odbc_conn_str(conn_str)
    _mt._get_metric_tracker().track('connect_odbc',
                                    properties={
                                        'dbms_name': db.dbms_name,
                                        'dbms_version': db.dbms_version
                                    })
    return db
Example #42
0
    def get(self, field):
        """Return the value for the queried field.

        Each of these fields can be queried in one of two ways:

        >>> out = m['field']
        >>> out = m.get('field')  # equivalent to previous line

        Parameters
        ----------
        field : string
            Name of the field to be retrieved.

        See Also
        ---------
        list_fields

        Returns
        -------
        out : value
            The current value of the requested field.
        """
        _mt._get_metric_tracker().track(self.__class__.__module__ + '.get')
        if field in self.list_fields():
            return self.__proxy__.get(field)
        else:
            raise KeyError('Field \"%s\" not in model. Available fields are '
                           '%s.' % (field, ', '.join(self.list_fields())))
    def quantile(self, quantile_val):
        """
        Returns a sketched estimate of the value at a particular quantile
        between 0.0 and 1.0. The quantile is guaranteed to be accurate within
        1%: meaning that if you ask for the 0.55 quantile, the returned value is
        guaranteed to be between the true 0.54 quantile and the true 0.56
        quantile. The quantiles are only defined for numeric arrays and this
        function will throw an exception if called on a sketch constructed for a
        non-numeric column.

        Parameters
        ----------
        quantile_val : float
            A value between 0.0 and 1.0 inclusive. Values below 0.0 will be
            interpreted as 0.0. Values above 1.0 will be interpreted as 1.0.

        Raises
        ------
        RuntimeError
            If the sarray is a non-numeric type.

        Returns
        -------
        out : float | str
            An estimate of the value at a quantile.
        """
        _mt._get_metric_tracker().track('sketch.quantile.%g' % quantile_val)

        with cython_context():
            return self.__proxy__.get_quantile(quantile_val)
    def get(self, field):
        """
        Return the value contained in the model's ``field``.

        Parameters
        ----------
        field : string
            Name of the field to be retrieved.

        Returns
        -------
        out
            Value of the requested field.

        See Also
        --------
        list_fields
        """
        _mt._get_metric_tracker().track(
            'toolkits.anomaly_detection.bayesian_changepoints.get')
        if field == "scores" and self.__proxy__.get(
                'dataset_type') == 'TimeSeries':
            ts = self.__proxy__.get('scores')
            return _gl.TimeSeries(ts,
                                  index=self.__proxy__.get_index_col_name())
        else:
            return self.__proxy__.get(field)
    def dict_key_summary(self):
        """
        Returns the sketch summary for all dictionary keys. This is only valid
        for sketch object from an SArray of dict type. Dictionary keys are
        converted to strings and then do the sketch summary.

        Examples
        --------
        >>> sa = graphlab.SArray([{'I':1, 'love': 2}, {'nature':3, 'beauty':4}])
        >>> sa.sketch_summary().dict_key_summary()
        +------------------+-------+----------+
        |       item       | value | is exact |
        +------------------+-------+----------+
        |      Length      |   4   |   Yes    |
        | # Missing Values |   0   |   Yes    |
        | # unique values  |   4   |    No    |
        +------------------+-------+----------+
        Most frequent items:
        +-------+---+------+--------+--------+
        | value | I | love | beauty | nature |
        +-------+---+------+--------+--------+
        | count | 1 |  1   |   1    |   1    |
        +-------+---+------+--------+--------+

        """
        _mt._get_metric_tracker().track('sketch.dict_key_summary')
        with cython_context():
            return Sketch(_proxy = self.__proxy__.dict_key_summary())
Example #46
0
def accuracy(targets, predictions):
    r"""
    Compute the proportion of correct predictions.

    Parameters
    ----------
    targets : SArray
        Ground truth class labels.

    predictions : SArray
        The prediction that corresponds to each target value.
        This vector must have the same length as ``targets``.

    Returns
    -------
    out : float
        The ratio of the number of correct classifications and the total number
        of data points.

    See Also
    --------
    confusion_matrix

    Examples
    --------
    >>> targets = graphlab.SArray([0, 1, 1, 0])
    >>> predictions = graphlab.SArray([0.1, 0.35, 0.7, 0.99])
    >>> graphlab.evaluation.accuracy(targets, predictions)
    """
    _mt._get_metric_tracker().track('evaluation.accuracy')
    _supervised_evaluation_error_checking(targets, predictions)
    return _graphlab.extensions._supervised_streaming_evaluator(targets,
                                                    predictions, "accuracy")
def set_product_key(product_key, file=__default_config_path):
    """
    Sets the product key provided in file, which by default is ~/.graphlab/config
    Overwrites any existing product key in that file.

    Note: Environment variable GRAPHLAB_PRODUCT_KEY takes precedence over the
    config file and is not affected by this function.

    Parameters
    ----------
    product_key : str
        The product key

    file : str, optional
        Specifies which file to use for configuration (defaults to ~/.graphlab/config)
    """
    import graphlab.connect as _mt
    try:
        config = _ConfigParser.ConfigParser()
        config.read(file)
        if not (config.has_section(__section)):
            config.add_section(__section)
        config.set(__section, __key, product_key)
        with open(file, 'w') as config_file:
            config.write(config_file)
        _mt._get_metric_tracker().track('set_product_key.succeeded')
    except:
        _mt._get_metric_tracker().track('set_product_key.config_parser_error')
        __LOGGER__.warn(
            'Unable to write GraphLab Create product key to %s. Ensure that this user account \
                        has write permission to %s to save the product key locally.'
            % (file, file))
        _os.environ['GRAPHLAB_PRODUCT_KEY'] = str(product_key)
    def fit(self, data):
        """
        Fit a transformer using the SFrame `data`.

        Parameters
        ----------
        data : SFrame
            The data used to fit the transformer.

        Returns
        -------
        self (A fitted version of the object)

        See Also
        --------
        transform fit_transform

        Examples
        --------
        .. sourcecode:: python
        {examples}
        """

        _raise_error_if_not_sframe(data, "data")
        _mt._get_metric_tracker().track(self.__class__.__module__ + '.fit')
        self.__proxy__.fit(data)
        return self
Example #49
0
def get_product_key(file=__default_config_path):
    """
    Returns the product key found in file, which by default is ~/.graphlab/config
    or in environment variable GRAPHLAB_PRODUCT_KEY.

    Note: Environment variable takes precedence over config file.

    @param file optional parameter to specify which file to use for configuration (defaults to ~/.graphlab/config)
    @return Product key string, or None if not found.
    """
    PRODUCT_KEY_ENV = 'GRAPHLAB_PRODUCT_KEY'
    if not PRODUCT_KEY_ENV in os.environ:
        import graphlab.connect as _mt
        # see if in ~/.graphlab/config
        config_file = file
        if (os.path.isfile(config_file)):
            try:
                import ConfigParser
                config = ConfigParser.ConfigParser()
                config.read(config_file)
                product_key = config.get(__section, __key)
                if product_key == -1:
                    raise BaseException()  # will fall into except block below
                else:
                    # set the product key as an environment variable in this session
                    os.environ[PRODUCT_KEY_ENV] = str(product_key).strip('"\'')
            except:
                msg = "Unable to parse product key out of %s. Make sure it is defined in the [%s] section, with key name: '%s'" % (
                    config_file, __section, __key)
                _mt._get_metric_tracker().track(
                    'server_launch.config_parser_error')
                raise KeyError(msg)
        else:
            return None
    return os.environ[PRODUCT_KEY_ENV]
    def transform(self, data):
        """
        Transform the SFrame `data` using a fitted model.

        Parameters
        ----------
        data : SFrame
            The data  to be transformed.

        Returns
        -------
        A transformed SFrame.

        See Also
        --------
        transform fit_transform

        Examples
        --------
        .. sourcecode:: python
        {examples}

        """
        _raise_error_if_not_sframe(data, "data")
        _mt._get_metric_tracker().track(self.__class__.__module__ + '.transform')
        return self.__proxy__.transform(data)
Example #51
0
    def get_current_options(self):
        """
        Return a dictionary with the options used to define and train the model.

        Returns
        -------
        out : dict
            Dictionary with options used to define and train the model.

        See Also
        --------
        get_default_options, list_fields, get

        Examples
        --------
        >>> data =  graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/regression/houses.csv')

        >>> data['is_expensive'] = data['price'] > 30000
        >>> model = graphlab.svm_classifier.create(data,
                                  target='is_expensive',
                                  features=['bath', 'bedroom', 'size'])
        >>> current_options = model.get_current_options()
        """
        _mt._get_metric_tracker().track('toolkit.classifier.svm_classifier.get_current_options')
        return super(_Classifier, self).get_current_options()
    def fit_transform(self, data):
        """
        First fit a transformer using the SFrame `data` and then return a transformed
        version of `data`.

        Parameters
        ----------
        data : SFrame
            The data used to fit the transformer. The same data is then also
            transformed.

        Returns
        -------
        Transformed SFrame.

        See Also
        --------
        fit fit_transform

        Notes
        ------
        - Fit transform modifies self.

        Examples
        --------
        .. sourcecode:: python
        {examples}
        """
        _raise_error_if_not_sframe(data, "data")
        _mt._get_metric_tracker().track(self.__class__.__module__ + '.fit_transform')
        return self.__proxy__.fit_transform(data)
    def get_current_options(self):
        """
        A dictionary describing the options requested during training.

        Returns
        -------
        out : dict
             A dictionary with option (name, value) pairs requested during
             train time.

        see also
        --------
        get_current_options, list_fields, get

        Examples
        --------
        >>> data =  graphlab.SFrame('http://s3.amazonaws.com/GraphLab-Datasets/regression/houses.csv')

        >>> model = graphlab.linear_regression.create(data,
                                             target='price',
                                             features=['bath', 'bedroom', 'size'])
        >>> current_options = model.get_current_options()
        """

        _mt._get_metric_tracker().track('toolkit.regression.linear_regression.get_options')
        return super(LinearRegression, self).get_current_options()
    def save(self, location):
        """
        Save the transformer into a GraphLab archive. The object is saved as a
        directory which can then be loaded using the
        :py:func:`~graphlab.load_model` method.

        Parameters
        ----------
        location : string
            Target destination for the model. Can be a local path or remote URL.

        See Also
        ----------
        graphlab.load_model

        Examples
        ----------
        .. sourcecode:: python
        >>> model.save('my_model_file')
        >>> loaded_model = gl.load_model('my_model_file')

        """
        _mt._get_metric_tracker().track(self.__class__.__module__ + '.save')
        return glconnect.get_unity().save_model(self.__proxy__,
                             _make_internal_url(location), self._get_wrapper())
Example #55
0
    def get_current_options(self):
        """
        Return a dictionary with the options used to define and create the
        current KmeansModel instance.

        Returns
        -------
        out : dict
            Dictionary of options used to train the current instance of the
            KmeansModel.

        See Also
        --------
        get_default_options, list_fields, get

        Examples
        --------
        >>> sf = graphlab.SFrame({'a' : [0.1, 8, 3.5], 'b':[-3, 7.6, 3]})
        >>> model = graphlab.kmeans.create(sf, 2)
        >>> model.get_current_options()
        {'num_clusters': 2, 'max_iterations': 10}
        """

        _mt._get_metric_tracker().track('toolkit.kmeans.get_current_options')

        opts = {'model': self.__proxy__, 'model_name': self.__name__}

        return _graphlab.toolkits._main.run(
            'kmeans_get_current_options', opts)
    def get(self, field):
        """Return the value for the queried field.

        Each of these fields can be queried in one of two ways:

        >>> out = m['field']
        >>> out = m.get('field')  # equivalent to previous line

        Parameters
        ----------
        field : string
            Name of the field to be retrieved.

        See Also
        ---------
        list_fields

        Returns
        -------
        out : value
            The current value of the requested field.

        """
        _mt._get_metric_tracker().track(self.__class__.__module__ + '.get')
        if field in self.list_fields():
            return self.__proxy__.get(field)
        else:
            raise KeyError('Field \"%s\" not in model. Available fields are'
                         '%s.' % (field, ', '.join(self.list_fields())))
Example #57
0
def set_product_key(product_key,
                    file=(os.path.join(os.path.expanduser("~"), ".graphlab",
                                       "config"))):
    """
    Sets the product key provided in file, which by default is ~/.graphlab/config
    Overwrites any existing product key in that file.

    Note: Environment variable GRAPHLAB_PRODUCT_KEY takes precedence over the
    config file and is not affected by this function.

    Parameters
    ----------
    product_key : str
        The product key, provided by registration on https://dato.com/register

    file : str, optional
        Specifies which file to use for configuration (defaults to ~/.graphlab/config)
    """
    import graphlab.connect as _mt
    try:
        import ConfigParser
        config = ConfigParser.ConfigParser()
        config.read(file)
        if not (config.has_section(__section)):
            config.add_section(__section)
        config.set(__section, __key, product_key)
        with open(file, 'wb') as config_file:
            config.write(config_file)
        _mt._get_metric_tracker().track('set_product_key.succeeded')
    except:
        _mt._get_metric_tracker().track('set_product_key.config_parser_error')
        raise
    def summary(self):
        """
        Display a summary of the TopicModel.

        Examples
        --------

        >>> docs = graphlab.SArray('http://s3.amazonaws.com/GraphLab-Datasets/nips-text')
        >>> m = graphlab.topic_model.create(docs)
        >>> m.summary()
        Topic Model
            Data:
                Vocabulary size:     12375
            Settings:
                Number of topics:    10
                alpha:               5.0
                beta:                0.1
                Iterations:          10
                Verbose:             False
            Accessible attributes:
                m['topics']          An SFrame containing the topics.
                m['vocabulary']      An SArray containing the topics.
            Useful methods:
                m.get_topics()       Get the most probable words per topic.
                m.predict(new_docs)  Make predictions for new documents.

        """

        _mt._get_metric_tracker().track('toolkit.text.topic_model.summary')
        print self.__repr__()
    def _save_impl(self, pickler):
        """
        Save the model.

        The model is saved as a directory which can then be loaded using the
        :py:func:`~graphlab.load_model` method.

        Parameters
        ----------
        pickler : GLPickler
            An opened GLPickle archive (Do not close the archive.)

        See Also
        ----------
        graphlab.load_model

        Examples
        ----------
        >>> model.save('my_model_file')
        >>> loaded_model = graphlab.load_model('my_model_file')
        """
        _mt._get_metric_tracker().track(self.__module__ + '.save_impl')

        state = self.__proxy__
        pickler.dump(state)
    def __init__(self,
                 num_convolution_layers,
                 kernel_size,
                 num_channels,
                 num_output_units,
                 stride=layers._LayerDefaults.STRIDE_DEFAULT,
                 pooling='max_pooling',
                 **kwargs):
        _mt._get_metric_tracker().track('toolkit.deeplearning.ConvolutionNet')
        super(self.__class__, self).__init__()
        PoolingLayer = _pooling_layer_from_string(pooling)
        for n in range(num_convolution_layers):
            self._layers.append(
                layers.ConvolutionLayer(kernel_size=kernel_size,
                                        num_channels=num_channels,
                                        stride=stride))
            self._layers[-1]._set_params(**kwargs)
            self._layers.append(
                PoolingLayer(kernel_size=kernel_size, stride=stride))
            self._layers[-1]._set_params(**kwargs)
        self._layers.append(layers.FlattenLayer())
        self._layers[-1]._set_params(**kwargs)

        # output layer
        if (num_output_units > 0):
            self._layers.append(layers.FullConnectionLayer(num_output_units))
            self._layers[-1]._set_params(**kwargs)
            if (num_output_units > 1):
                self._layers.append(layers.SoftmaxLayer())