Ejemplo n.º 1
0
    def get_current_options(self):
        """
        Return a dictionary with the options used to define and create this
        graph analytics model instance.

        Returns
        -------
        out : dict
            Dictionary of options used to train this model.

        See Also
        --------
        get_default_options, list_fields, get
        """
        _mt._get_metric_tracker().track('toolkit.graph_analytics.get_current_options')

        dispatch_table = {
            'ShortestPathModel': 'sssp_default_options',
            'GraphColoringModel': 'graph_coloring_default_options',
            'PagerankModel': 'pagerank_default_options',
            'ConnectedComponentsModel': 'connected_components_default_options',
            'TriangleCountingModel': 'triangle_counting_default_options',
            'KcoreModel': 'kcore_default_options'
        }

        try:
            model_options = _main.run(dispatch_table[self.name()], {})

            ## for each of the default options, update its current value by querying the model
            for key in model_options:
                current_value = self.get(key)
                model_options[key] = current_value
            return model_options
        except:
            raise RuntimeError('Model %s does not have options' % self.name())
    def get_current_options(self):
        """
        Return a dictionary with the options used to define and create this
        graph analytics model instance.

        Returns
        -------
        out : dict
            Dictionary of options used to train this model.

        See Also
        --------
        get_default_options, list_fields, get
        """
        _mt._get_metric_tracker().track('toolkit.graph_analytics.get_current_options')

        dispatch_table = {
            'ShortestPathModel': 'sssp_default_options',
            'GraphColoringModel': 'graph_coloring_default_options',
            'PagerankModel': 'pagerank_default_options',
            'ConnectedComponentsModel': 'connected_components_default_options',
            'TriangleCountingModel': 'triangle_counting_default_options',
            'KcoreModel': 'kcore_default_options'
        }

        try:
            model_options = _main.run(dispatch_table[self.name()], {})

            ## for each of the default options, update its current value by querying the model
            for key in model_options:
                current_value = self.get(key)
                model_options[key] = current_value
            return model_options
        except:
            raise RuntimeError('Model %s does not have options' % self.name())
def create(graph, verbose=True):
    """
    Compute the number of triangles each vertex belongs to, ignoring edge
    directions. A triangle is a complete subgraph with only three vertices.
    Return a model object with total number of triangles as well as the triangle
    counts for each vertex in the graph.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute triangle counts.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : TriangleCountingModel

    References
    ----------
    - T. Schank. (2007) `Algorithmic Aspects of Triangle-Based Network Analysis
      <http://digbib.ubka.uni-karlsruhe.de/volltexte/documents/4541>`_.

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create a
    :class:`~graphlab.traingle_counting.TriangleCountingModel` as follows:

    >>> g =
    >>> graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz',
            >>> format='snap') tc = graphlab.triangle_counting.create(g)

    We can obtain the number of triangles that each vertex in the graph ``g``
    is present in:

    >>> tc_out = tc['triangle_count']  # SFrame

    We can add the new "triangle_count" field to the original graph g using:

    >>> g.vertices['triangle_count'] = tc['graph'].vertices['triangle_count']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    See Also
    --------
    TriangleCountingModel
    """
    _mt._get_metric_tracker().track(
        'toolkit.graph_analytics.triangle_counting.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('triangle_counting', {'graph': graph.__proxy__},
                       verbose)
    return TriangleCountingModel(params['model'])
Ejemplo n.º 4
0
def create(graph, kmin=0, kmax=10, verbose=True):
    """
    Compute the K-core decomposition of the graph. Return a model object with
    total number of cores as well as the core id for each vertex in the graph.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute the k-core decomposition.

    kmin : int, optional
        Minimun core id. Vertices having smaller core id than `kmin` will be
        assigned with core_id = `kmin`.

    kmax : int, optional
        Maximun core id. Vertices having larger core id than `kmax` will be
        assigned with core_id=`kmax`.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : KcoreModel

    References
    ----------
    - Alvarez-Hamelin, J.I., et al. (2005) `K-Core Decomposition: A Tool for the
      Visualization of Large Networks <http://arxiv.org/abs/cs/0504107>`_.

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.kcore.KcoreModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> kc = graphlab.kcore.create(g)

    We can obtain the ``core id`` corresponding to each vertex in the graph
    ``g`` using:

    >>> kcore_id = kc['core_id']     # SFrame

    See Also
    --------
    KcoreModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.kcore.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    opts = {'graph': graph.__proxy__, 'kmin': kmin, 'kmax': kmax}
    params = _main.run('kcore', opts, verbose)

    return KcoreModel(params['model'])
def create(graph, verbose=True):
    """
    Compute the number of triangles each vertex belongs to, ignoring edge
    directions. A triangle is a complete subgraph with only three vertices.
    Return a model object with total number of triangles as well as the triangle
    counts for each vertex in the graph.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute triangle counts.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : TriangleCountingModel

    References
    ----------
    - T. Schank. (2007) `Algorithmic Aspects of Triangle-Based Network Analysis
      <http://digbib.ubka.uni-karlsruhe.de/volltexte/documents/4541>`_.

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create a
    :class:`~graphlab.traingle_counting.TriangleCountingModel` as follows:

    >>> g =
    >>> graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz',
            >>> format='snap') tc = graphlab.triangle_counting.create(g)

    We can obtain the number of triangles that each vertex in the graph ``g``
    is present in:

    >>> tc_out = tc['triangle_count']  # SFrame

    We can add the new "triangle_count" field to the original graph g using:

    >>> g.vertices['triangle_count'] = tc['graph'].vertices['triangle_count']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    See Also
    --------
    TriangleCountingModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.triangle_counting.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('triangle_counting', {'graph': graph.__proxy__}, verbose)
    return TriangleCountingModel(params['model'])
Ejemplo n.º 6
0
def create(graph, verbose=True):
    """
    Compute the graph coloring. Assign a color to each vertex such that no
    adjacent vertices have the same color. Return a model object with total
    number of colors used as well as the color ID for each vertex in the graph.
    This algorithm is greedy and is not guaranteed to find the **minimum** graph
    coloring. It is also not deterministic, so successive runs may return
    different answers.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute the coloring.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : GraphColoringModel

    References
    ----------
    - `Wikipedia - graph coloring <http://en.wikipedia.org/wiki/Graph_coloring>`_

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.graph_coloring.GraphColoringModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> gc = graphlab.graph_coloring.create(g)

    We can obtain the ``color id`` corresponding to each vertex in the graph ``g``
    as follows:

    >>> color_id = gc['color_id']  # SFrame

    We can obtain the total number of colors required to color the graph ``g``
    as follows:

    >>> num_colors = gc['num_colors']

    See Also
    --------
    GraphColoringModel
    """
    _mt._get_metric_tracker().track(
        'toolkit.graph_analytics.graph_coloring.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('graph_coloring', {'graph': graph.__proxy__}, verbose)
    return GraphColoringModel(params['model'])
def create(graph, verbose=True):
    """
    Compute the number of weakly connected components in the graph. Return a
    model object with total number of weakly connected components as well as the
    component ID for each vertex in the graph.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute the triangle counts.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : ConnectedComponentsModel

    References
    ----------
    - `Mathworld Wolfram - Weakly Connected Component
      <http://mathworld.wolfram.com/WeaklyConnectedComponent.html>`_

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.connected_components.ConnectedComponentsModel` as
    follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> cc = graphlab.connected_components.create(g)
    >>> cc.summary()

    We can obtain the ``component id`` corresponding to each vertex in the
    graph ``g`` as follows:

    >>> cc_ids = cc['component_id']  # SFrame

    We can obtain a graph with additional information about the ``component
    id`` corresponding to each vertex as follows:

    >>> cc_graph = cc['graph']      # SGraph

    See Also
    --------
    ConnectedComponentsModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.connected_components.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('connected_components', {'graph': graph.__proxy__},
                       verbose)
    return ConnectedComponentsModel(params['model'])
def create(graph, verbose=True):
    """
    Compute the graph coloring. Assign a color to each vertex such that no
    adjacent vertices have the same color. Return a model object with total
    number of colors used as well as the color ID for each vertex in the graph.
    This algorithm is greedy and is not guaranteed to find the **minimum** graph
    coloring. It is also not deterministic, so successive runs may return
    different answers.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute the coloring.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : GraphColoringModel

    References
    ----------
    - `Wikipedia - graph coloring <http://en.wikipedia.org/wiki/Graph_coloring>`_

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.graph_coloring.GraphColoringModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> gc = graphlab.graph_coloring.create(g)

    We can obtain the ``color id`` corresponding to each vertex in the graph ``g``
    as follows:

    >>> color_id = gc['color_id']  # SFrame

    We can obtain the total number of colors required to color the graph ``g``
    as follows:

    >>> num_colors = gc['num_colors']

    See Also
    --------
    GraphColoringModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.graph_coloring.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('graph_coloring', {'graph': graph.__proxy__}, verbose)
    return GraphColoringModel(params['model'])
Ejemplo n.º 9
0
def get_default_options():
    """
    Get the default options for :func:`graphlab.graph_coloring.create`.

    Returns
    -------
    out : dict

    Examples
    --------
    >>> graphlab.graph_coloring.get_default_options()
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.graph_coloring.get_default_options')
    return _main.run('graph_coloring_default_options', {})
def get_default_options():
    """
    Get the default options for :func:`graphlab.connected_components.create`.

    Returns
    -------
    out : dict

    Examples
    --------
    >>> graphlab.connected_components.get_default_options()
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.connected_components.get_default_options')
    return _main.run('connected_components_default_options', {})
def get_default_options():
    """
    Get the default options for :func:`graphlab.degree_counting.create`.

    Returns
    -------
    out : dict

    Examples
    --------
    >>> graphlab.degree_counting.get_default_options()
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.degree_counting.get_default_options')

    return _main.run('degree_count_default_options', {})
def get_default_options():
    """
    Get the default options for :func:`graphlab.label_propagation.create`.

    Returns
    -------
    out : dict

    See Also
    --------
    LabelPropagationModel.get_current_options

    Examples
    --------
    >>> graphlab.label_propagation.get_default_options()
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.label_propagation.get_default_options')

    return _main.run('label_propagation_default_options', {})
def get_default_options():
    """
    Get the default options for :func:`graphlab.shortest_path.create`.

    Returns
    -------
    out : dict

    See Also
    --------
    ShortestPathModel.get_current_options

    Examples
    --------
    >>> graphlab.shortest_path.get_default_options()
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.sssp.get_default_options')

    return _main.run('sssp_default_options', {})
Ejemplo n.º 14
0
def get_default_options():
    """
    Get the default options for :func:`graphlab.pagerank.create`.

    Returns
    -------
    out : dict

    See Also
    --------
    PagerankModel.get_current_options

    Examples
    --------
    >>> graphlab.pagerank.get_default_options()
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.pagerank.get_default_options')

    return _main.run('pagerank_default_options', {})
Ejemplo n.º 15
0
def get_default_options():
    """
    Get the default options for :func:`graphlab.kcore.create`.

    Returns
    -------
    out : dict

    See Also
    --------
    KcoreModel.get_current_options

    Examples
    --------
    >>> graphlab.kcore.get_default_options()
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.kcore.get_default_options')

    return _main.run('kcore_default_options', {})
Ejemplo n.º 16
0
def get_default_options():
    """
    Get the default options for :func:`graphlab.label_propagation.create`.

    Returns
    -------
    out : dict

    See Also
    --------
    LabelPropagationModel.get_current_options

    Examples
    --------
    >>> graphlab.label_propagation.get_default_options()
    """
    _mt._get_metric_tracker().track(
        'toolkit.graph_analytics.label_propagation.get_default_options')

    return _main.run('label_propagation_default_options', {})
def get_default_options():
    """
    Get the default options for :func:`graphlab.shortest_path.create`.

    Returns
    -------
    out : dict

    See Also
    --------
    ShortestPathModel.get_current_options

    Examples
    --------
    >>> graphlab.shortest_path.get_default_options()
    """
    _mt._get_metric_tracker().track(
        'toolkit.graph_analytics.sssp.get_default_options')

    return _main.run('sssp_default_options', {})
Ejemplo n.º 18
0
 def _describe_fields(cls):
     """
     Return a dictionary for the class fields description.
     Fields should NOT be wrapped by _precomputed_field, if necessary
     """
     dispatch_table = {
         'ShortestPathModel': 'sssp_model_fields',
         'GraphColoringModel': 'graph_coloring_model_fields',
         'PagerankModel': 'pagerank_model_fields',
         'ConnectedComponentsModel': 'connected_components_model_fields',
         'TriangleCountingModel': 'triangle_counting_model_fields',
         'KcoreModel': 'kcore_model_fields',
         'DegreeCountingModel': 'degree_count_model_fields',
         'LabelPropagationModel': 'label_propagation_model_fields'
     }
     try:
         fields_description = _main.run(dispatch_table[cls.__name__], {})
         return fields_description
     except:
         raise RuntimeError('Model %s does not have fields description' % cls.__name__)
 def _describe_fields(cls):
     """
     Return a dictionary for the class fields description.
     Fields should NOT be wrapped by _precomputed_field, if necessary
     """
     dispatch_table = {
         'ShortestPathModel': 'sssp_model_fields',
         'GraphColoringModel': 'graph_coloring_model_fields',
         'PagerankModel': 'pagerank_model_fields',
         'ConnectedComponentsModel': 'connected_components_model_fields',
         'TriangleCountingModel': 'triangle_counting_model_fields',
         'KcoreModel': 'kcore_model_fields',
         'DegreeCountingModel': 'degree_count_model_fields',
         'LabelPropagationModel': 'label_propagation_model_fields'
     }
     try:
         fields_description = _main.run(dispatch_table[cls.__name__], {})
         return fields_description
     except:
         raise RuntimeError('Model %s does not have fields description' %
                            cls.__name__)
Ejemplo n.º 20
0
 def _describe_fields(cls):
     """
     Return a pretty table for the class fields description.
     """
     dispatch_table = {
         'ShortestPathModel': 'sssp_model_fields',
         'GraphColoringModel': 'graph_coloring_model_fields',
         'PagerankModel': 'pagerank_model_fields',
         'ConnectedComponentsModel': 'connected_components_model_fields',
         'TriangleCountingModel': 'triangle_counting_model_fields',
         'KcoreModel': 'kcore_model_fields'
     }
     try:
         fields_description = _main.run(dispatch_table[cls.__name__], {})
         tbl = _PrettyTable(['Field', 'Description'])
         for k, v in fields_description.iteritems():
             tbl.add_row([k, v])
         tbl.align['Field'] = 'l'
         tbl.align['Description'] = 'l'
         return tbl
     except:
         raise RuntimeError('Model %s does not have fields description' % cls.__name__)
 def _describe_fields(cls):
     """
     Return a pretty table for the class fields description.
     """
     dispatch_table = {
         'ShortestPathModel': 'sssp_model_fields',
         'GraphColoringModel': 'graph_coloring_model_fields',
         'PagerankModel': 'pagerank_model_fields',
         'ConnectedComponentsModel': 'connected_components_model_fields',
         'TriangleCountingModel': 'triangle_counting_model_fields',
         'KcoreModel': 'kcore_model_fields'
     }
     try:
         fields_description = _main.run(dispatch_table[cls.__name__], {})
         tbl = _PrettyTable(['Field', 'Description'])
         for k, v in fields_description.iteritems():
             tbl.add_row([k, v])
         tbl.align['Field'] = 'l'
         tbl.align['Description'] = 'l'
         return tbl
     except:
         raise RuntimeError('Model %s does not have fields description' % cls.__name__)
Ejemplo n.º 22
0
def create(graph, reset_probability=0.15,
           threshold=1e-2,
           max_iterations=20,
           _single_precision=False,
           _distributed='auto',
           verbose=True):
    """
    Compute the PageRank for each vertex in the graph. Return a model object
    with total PageRank as well as the PageRank value for each vertex in the
    graph.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute the pagerank value.

    reset_probability : float, optional
        Probability that a random surfer jumps to an arbitrary page.

    threshold : float, optional
        Threshold for convergence, measured in the L1 norm
        (the sum of absolute value) of the delta of each vertex's
        pagerank value.

    max_iterations : int, optional
        The maximun number of iterations to run.

    _single_precision : bool, optional
        If true, running pagerank in single precision. The resulting
        pagerank values may not be accurate for large graph, but
        should run faster and use less memory.

    _distributed : distributed environment, internal

    verbose : bool, optional
        If True, print progress updates.


    Returns
    -------
    out : PagerankModel

    References
    ----------
    - `Wikipedia - PageRank <http://en.wikipedia.org/wiki/PageRank>`_
    - Page, L., et al. (1998) `The PageRank Citation Ranking: Bringing Order to
      the Web <http://ilpubs.stanford.edu:8090/422/1/1999-66.pdf>`_.

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.pagerank.PageRankModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> pr = graphlab.pagerank.create(g)

    We can obtain the page rank corresponding to each vertex in the graph ``g``
    using:

    >>> pr_out = pr['pagerank']     # SFrame

    We can add the new pagerank field to the original graph g using:

    >>> g.vertices['pagerank'] = pr['graph'].vertices['pagerank']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    See Also
    --------
    PagerankModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.pagerank.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    opts = {'threshold': threshold, 'reset_probability': reset_probability,
            'max_iterations': max_iterations,
            'single_precision': _single_precision,
            'graph': graph.__proxy__}

    distributed_context = _get_distributed_execution_environment()
    if distributed_context is None:
        params = _main.run('pagerank', opts, verbose)
        model = params['model']
    else:
        model = _distributed_run('distributed_pagerank', opts, env=_distributed, verbose=verbose)

    return PagerankModel(model)
def create(graph,
           source_vid,
           weight_field="",
           max_distance=1e30,
           verbose=True):
    """
    Compute the single source shortest path distance from the source vertex to
    all vertices in the graph. Note that because SGraph is directed, shortest
    paths are also directed. To find undirected shortes paths add edges to the
    SGraph in both directions. Return a model object with distance each of
    vertex in the graph.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute shortest paths.

    source_vid : vertex ID
        ID of the source vertex.

    weight_field : string, optional
        The edge field representing the edge weights. If empty, uses unit
        weights.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : ShortestPathModel

    References
    ----------
    - `Wikipedia - ShortestPath <http://en.wikipedia.org/wiki/Shortest_path_problem>`_

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.shortest_path.ShortestPathModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> sp = graphlab.shortest_path.create(g, source_vid=1)

    We can obtain the shortest path distance from the source vertex to each
    vertex in the graph ``g`` as follows:

    >>> sp_sframe = sp['distance']   # SFrame

    We can add the new distance field to the original graph g using:

    >>> g.vertices['distance_to_1'] = sp['graph'].vertices['distance']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    To get the actual path from the source vertex to any destination vertex:

    >>> path = sp.get_path(vid=10)


    We can obtain an auxiliary graph with additional information corresponding
    to the shortest path from the source vertex to each vertex in the graph
    ``g`` as follows:

    >>> sp_graph = sp.get['graph']       # SGraph

    See Also
    --------
    ShortestPathModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.sssp.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    opts = {
        'source_vid': source_vid,
        'weight_field': weight_field,
        'max_distance': max_distance,
        'graph': graph.__proxy__
    }
    params = _main.run('sssp', opts, verbose)
    return ShortestPathModel(params['model'])
    def extract_features(self, dataset, layer_id=None):
        """
        Takes an input dataset, propagates each example through the network,
        and returns an SArray of dense feature vectors, each of which is the concatenation
        of all the hidden unit values at layer[layer_id]. These feature vectors
        can be used as input to train another classifier such as a :py:class:`~graphlab.logistic_classifier.LogisticClassifier`,
        an :py:class:`~graphlab.svm_classifier.SVMClassifier`, another
        :py:class:`~graphlab.neuralnet_classifier.NeuralNetClassifier`, or a :py:class:`~graphlab.boosted_trees_classifier.BoostedTreesClassifier`. Input dataset size must be the same as for the training of the model,
        except for images which are automatically resized.


        We also are releasing a pre-trained model for ImageNet, as described by
        Alex Krizhevsky et. al. It is located at
        https://static.turi.com/products/graphlab-create/resources/models/python2.7/imagenet_model_iter45 .
        Using it requires 256 x 256 x 3 images.
        Please see Examples and References for more.


        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        layer_id : int , optional
            The index of the layer in neuralnet at which the activations are
            taken to be a dense feature vector. Must be a fully-connected layer.
            Default is None, in which case the layer before the connection
            layer to the output is used.


        Returns
        -------
        out : SArray
            An SArray of dtype array.array containing extracted features.

        See Also
        ------------
        graphlab.deeplearning.layers

        References
        ----------
        - Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet
          classification with deep convolutional neural networks." Advances in
          neural information processing systems. 2012.

        Examples
        --------
        >>> data = graphlab.SFrame('https://static.turi.com/datasets/mnist/sframe/train6k')
        >>> net = graphlab.deeplearning.get_builtin_neuralnet('mnist')
        >>> m = graphlab.neuralnet_classifier.create(data,
        ...                                          target='label',
        ...                                          network=net,
        ...                                          max_iterations=3)
        >>> # Now, let's extract features from the last layer
        >>> data['features'] = m.extract_features(data)
        >>> # Now, let's build a new classifier on top of extracted features
        >>> m = graphlab.classifier.create(data,
        ...                                          features = ['features'],
        ...                                          target='label')

        Now, let's see how to load the ImageNet model, and use it for extracting
        features after resizing the data:

        >>> imagenet_model = graphlab.load_model('https://static.turi.com/products/graphlab-create/resources/models/python2.7/imagenet_model_iter45')
        >>> data['image'] = graphlab.image_analysis.resize(data['image'], 256, 256, 3, decode=True)
        >>> data['imagenet_features'] = imagenet_model.extract_features(data)

        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.neuralnet_classifier.extract_features')
        _raise_error_if_not_sframe(dataset, "dataset")
        options = dict()

        net = self.get('network').layers
        network_size = len(net) - 1
        if layer_id is None:
            if net[network_size]._type == "CONNECTION":
                layer_id = network_size - 1
            else:
                layer_id = network_size - 2
        _numeric_param_check_range("layer_id", layer_id, 0, network_size)

        conv2flat = False
        for i in range(0, layer_id + 1):
            if net[i]._type == "CONNECTION" or net[i]._type == "TRANSITION":
                conv2flat = True

        if conv2flat is not True:
            raise ValueError(
                "Features must be extracted from either a network "
                "with non-image input or a layer after a FlattenLayer. "
                "Try extracting features from layer following a FlattenLayer.")

        options.update({
            'model': self.__proxy__,
            'model_name': self.__name__,
            'dataset': dataset,
            'missing_value_action': "error",
            'layer_id': layer_id
        })
        target = _toolkits_main.run('supervised_learning_feature_extraction',
                                    options)
        return _map_unity_proxy_to_object(target['extracted'])
    def predict_topk(self, dataset, output_type="probability", k=3):
        """
        Return top-k predictions for the ``dataset``, using the trained model.
        Predictions are returned as an SFrame with three columns: `row_id`,
        `class`, and `probability`,`rank`, or `score`, depending on the ``output_type``
        parameter. Input dataset size must be the same as for training of the
        model, except for images which are automatically resized.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        output_type : {'probability', 'rank', 'score'}, optional
            Choose the return type of the prediction:

            - `rank`: outputs rank along with class label.
            - `probability`: outputs learned probability along with class label.
            - `score`: Same as probability

        k : int, optional
            Number of classes to return for each input example.

        Returns
        -------
        out : SFrame
            An SFrame with model predictions.

        See Also
        --------
        predict, classify, evaluate

        Examples
        --------
        >>> data = graphlab.SFrame('https://static.turi.com/datasets/mnist/sframe/train')
        >>> training_data, validation_data = data.random_split(0.8)
        >>> net = graphlab.deeplearning.get_builtin_neuralnet('mnist')
        >>> m = graphlab.neuralnet_classifier.create(training_data,
        ...                                          target='label',
        ...                                          network=net,
        ...                                          max_iterations=3)
        ...
        >>> pred = m.predict_topk(validation_data, k=3)
        >>> pred
        +--------+-------+-------------------+
        | row_id | class |    probability    |
        +--------+-------+-------------------+
        |   0    |   4   |   0.995623886585  |
        |   0    |   9   |  0.0038311756216  |
        |   0    |   7   | 0.000301006948575 |
        |   1    |   1   |   0.928708016872  |
        |   1    |   3   |  0.0440889261663  |
        |   1    |   2   |  0.0176190119237  |
        |   2    |   3   |   0.996967732906  |
        |   2    |   2   |  0.00151345680933 |
        |   2    |   7   | 0.000637513934635 |
        |   3    |   1   |   0.998070061207  |
        |  ...   |  ...  |        ...        |
        +--------+-------+-------------------+
        [35688 rows x 3 columns]
        """
        _mt._get_metric_tracker().track(
            'toolkit.classifier.neuralnet_classifier.predict_topk')
        _raise_error_if_not_sframe(dataset, "dataset")
        options = dict()
        options.update({
            'model': self.__proxy__,
            'model_name': self.__name__,
            'dataset': dataset,
            'output_type': output_type,
            'topk': k,
            'missing_value_action': 'error'
        })
        target = _toolkits_main.run('supervised_learning_predict_topk',
                                    options)
        return _map_unity_proxy_to_object(target['predicted'])
def create(graph, label_field,
           threshold=1e-3,
           weight_field='',
           self_weight=1.0,
           undirected=False,
           max_iterations=None,
           _single_precision=False,
           _distributed='auto',
           verbose=True):
    """
    Given a weighted graph with observed class labels of a subset of vertices,
    infer the label probability for the unobserved vertices using the
    "label propagation" algorithm.

    The algorithm iteratively updates the label probability of current vertex
    as a weighted sum of label probability of self and the neighboring vertices
    until converge.  See
    :class:`graphlab.label_propagation.LabelPropagationModel` for the details
    of the algorithm.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute the label propagation.

    label_field: str
        Vertex field storing the initial vertex labels. The values in
        must be [0, num_classes). None values indicate unobserved vertex labels.

    threshold : float, optional
        Threshold for convergence, measured in the average L2 norm
        (the sum of squared values) of the delta of each vertex's
        label probability vector.

    max_iterations: int, optional
        The max number of iterations to run. Default is unlimited.
        If set, the algorithm terminates when either max_iterations
        or convergence threshold is reached.

    weight_field: str, optional
        Vertex field for edge weight. If empty, all edges are assumed
        to have unit weight.

    self_weight: float, optional
        The weight for self edge.

    undirected: bool, optional
        If true, treat each edge as undirected, and propagates label in
        both directions.

    _single_precision : bool, optional
        If true, running label propagation in single precision. The resulting
        probability values may less accurate, but should run faster
        and use less memory.

    _distributed : distributed environment, internal

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : LabelPropagationModel

    References
    ----------
    - Zhu, X., & Ghahramani, Z. (2002). `Learning from labeled and unlabeled data
      with label propagation <http://www.cs.cmu.edu/~zhuxj/pub/CMU-CALD-02-107.pdf>`_.

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.label_propagation.LabelPropagationModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz',
    ...                         format='snap')
    # Initialize random classes for a subset of vertices
    # Leave the unobserved vertices with None label.
    >>> import random
    >>> def init_label(vid):
    ...     x = random.random()
    ...     if x < 0.2:
    ...         return 0
    ...     elif x > 0.9:
    ...         return 1
    ...     else:
    ...         return None
    >>> g.vertices['label'] = g.vertices['__id'].apply(init_label, int)
    >>> m = graphlab.label_propagation.create(g, label_field='label')

    We can obtain for each vertex the predicted label and the probability of
    each label in the graph ``g`` using:

    >>> labels = m['labels']     # SFrame
    >>> labels
    +------+-------+-----------------+-------------------+----------------+
    | __id | label | predicted_label |         P0        |       P1       |
    +------+-------+-----------------+-------------------+----------------+
    |  5   |   1   |        1        |        0.0        |      1.0       |
    |  7   |  None |        0        |    0.8213214997   |  0.1786785003  |
    |  8   |  None |        1        | 5.96046447754e-08 | 0.999999940395 |
    |  10  |  None |        0        |   0.534984718273  | 0.465015281727 |
    |  27  |  None |        0        |   0.752801638549  | 0.247198361451 |
    |  29  |  None |        1        | 5.96046447754e-08 | 0.999999940395 |
    |  33  |  None |        1        | 5.96046447754e-08 | 0.999999940395 |
    |  47  |   0   |        0        |        1.0        |      0.0       |
    |  50  |  None |        0        |   0.788279032657  | 0.211720967343 |
    |  52  |  None |        0        |   0.666666666667  | 0.333333333333 |
    +------+-------+-----------------+-------------------+----------------+
    [36692 rows x 5 columns]

    See Also
    --------
    LabelPropagationModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.label_propagation.create')

    _raise_error_if_not_of_type(label_field, str)
    _raise_error_if_not_of_type(weight_field, str)

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    if graph.vertices[label_field].dtype() != int:
        raise TypeError('label_field %s must be integer typed.' % label_field)

    opts = {'label_field': label_field,
            'threshold': threshold,
            'weight_field': weight_field,
            'self_weight': self_weight,
            'undirected': undirected,
            'max_iterations': max_iterations,
            'single_precision': _single_precision,
            'graph': graph.__proxy__}

    distributed_context = _get_distributed_execution_environment()
    if distributed_context is None:
        params = _main.run('label_propagation', opts, verbose)
        model = params['model']
    else:
        model = _distributed_run('distributed_labelprop', opts, env=_distributed, verbose=verbose)
    return LabelPropagationModel(model)
def create(graph, verbose=True):
    """
    Compute the in degree, out degree and total degree of each vertex.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute degree counts.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : DegreeCountingModel

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.degree_counting.DegreeCountingModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/web-Google.txt.gz',
    ...                         format='snap')
    >>> m = graphlab.degree_counting.create(g)
    >>> g2 = m['graph']
    >>> g2
    SGraph({'num_edges': 5105039, 'num_vertices': 875713})
    Vertex Fields:['__id', 'in_degree', 'out_degree', 'total_degree']
    Edge Fields:['__src_id', '__dst_id']

    >>> g2.vertices.head(5)
    Columns:
        __id	int
        in_degree	int
        out_degree	int
        total_degree	int
    <BLANKLINE>
    Rows: 5
    <BLANKLINE>
    Data:
    +------+-----------+------------+--------------+
    | __id | in_degree | out_degree | total_degree |
    +------+-----------+------------+--------------+
    |  5   |     15    |     7      |      22      |
    |  7   |     3     |     16     |      19      |
    |  8   |     1     |     2      |      3       |
    |  10  |     13    |     11     |      24      |
    |  27  |     19    |     16     |      35      |
    +------+-----------+------------+--------------+

    See Also
    --------
    DegreeCountingModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.degree_counting.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('degree_count', {'graph': graph.__proxy__}, verbose)
    return DegreeCountingModel(params['model'])
Ejemplo n.º 28
0
def create(graph, reset_probability=0.15,
           threshold=1e-2,
           max_iterations=20,
           verbose=True):
    """
    Compute the PageRank for each vertex in the graph. Return a model object
    with total PageRank as well as the PageRank value for each vertex in the
    graph.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute the pagerank value.

    reset_probability : float, optional
        Probability that a random surfer jumps to an arbitrary page.

    threshold : float, optional
        Threshold for convergence, measured in the L1 norm
        (the sum of absolute value) of the delta of each vertex's
        pagerank value.

    max_iterations : int, optional
        The maximun number of iterations to run.

    verbose : bool, optional
        If True, print progress updates.


    Returns
    -------
    out : PagerankModel

    References
    ----------
    - `Wikipedia - PageRank <http://en.wikipedia.org/wiki/PageRank>`_
    - Page, L., et al. (1998) `The PageRank Citation Ranking: Bringing Order to
      the Web <http://ilpubs.stanford.edu:8090/422/1/1999-66.pdf>`_.

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.pagerank.PageRankModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> pr = graphlab.pagerank.create(g)

    We can obtain the page rank corresponding to each vertex in the graph ``g``
    using:

    >>> pr_out = pr['pagerank']     # SFrame

    See Also
    --------
    PagerankModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.pagerank.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    opts = {'threshold': threshold, 'reset_probability': reset_probability,
            'max_iterations': max_iterations, 'graph': graph.__proxy__}
    params = _main.run('pagerank', opts, verbose)
    return PagerankModel(params['model'])
Ejemplo n.º 29
0
def create(graph,
           label_field,
           threshold=1e-3,
           weight_field='',
           self_weight=1.0,
           undirected=False,
           max_iterations=None,
           _single_precision=False,
           _distributed='auto',
           verbose=True):
    """
    Given a weighted graph with observed class labels of a subset of vertices,
    infer the label probability for the unobserved vertices using the
    "label propagation" algorithm.

    The algorithm iteratively updates the label probability of current vertex
    as a weighted sum of label probability of self and the neighboring vertices
    until converge.  See
    :class:`graphlab.label_propagation.LabelPropagationModel` for the details
    of the algorithm.

    Notes: label propagation works well with small number of labels, i.e. binary
    labels, or less than 1000 classes. The toolkit will throw error
    if the number of classes exceeds the maximum value (1000).

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute the label propagation.

    label_field: str
        Vertex field storing the initial vertex labels. The values in
        must be [0, num_classes). None values indicate unobserved vertex labels.

    threshold : float, optional
        Threshold for convergence, measured in the average L2 norm
        (the sum of squared values) of the delta of each vertex's
        label probability vector.

    max_iterations: int, optional
        The max number of iterations to run. Default is unlimited.
        If set, the algorithm terminates when either max_iterations
        or convergence threshold is reached.

    weight_field: str, optional
        Vertex field for edge weight. If empty, all edges are assumed
        to have unit weight.

    self_weight: float, optional
        The weight for self edge.

    undirected: bool, optional
        If true, treat each edge as undirected, and propagates label in
        both directions.

    _single_precision : bool, optional
        If true, running label propagation in single precision. The resulting
        probability values may less accurate, but should run faster
        and use less memory.

    _distributed : distributed environment, internal

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : LabelPropagationModel

    References
    ----------
    - Zhu, X., & Ghahramani, Z. (2002). `Learning from labeled and unlabeled data
      with label propagation <http://www.cs.cmu.edu/~zhuxj/pub/CMU-CALD-02-107.pdf>`_.

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.label_propagation.LabelPropagationModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz',
    ...                         format='snap')
    # Initialize random classes for a subset of vertices
    # Leave the unobserved vertices with None label.
    >>> import random
    >>> def init_label(vid):
    ...     x = random.random()
    ...     if x < 0.2:
    ...         return 0
    ...     elif x > 0.9:
    ...         return 1
    ...     else:
    ...         return None
    >>> g.vertices['label'] = g.vertices['__id'].apply(init_label, int)
    >>> m = graphlab.label_propagation.create(g, label_field='label')

    We can obtain for each vertex the predicted label and the probability of
    each label in the graph ``g`` using:

    >>> labels = m['labels']     # SFrame
    >>> labels
    +------+-------+-----------------+-------------------+----------------+
    | __id | label | predicted_label |         P0        |       P1       |
    +------+-------+-----------------+-------------------+----------------+
    |  5   |   1   |        1        |        0.0        |      1.0       |
    |  7   |  None |        0        |    0.8213214997   |  0.1786785003  |
    |  8   |  None |        1        | 5.96046447754e-08 | 0.999999940395 |
    |  10  |  None |        0        |   0.534984718273  | 0.465015281727 |
    |  27  |  None |        0        |   0.752801638549  | 0.247198361451 |
    |  29  |  None |        1        | 5.96046447754e-08 | 0.999999940395 |
    |  33  |  None |        1        | 5.96046447754e-08 | 0.999999940395 |
    |  47  |   0   |        0        |        1.0        |      0.0       |
    |  50  |  None |        0        |   0.788279032657  | 0.211720967343 |
    |  52  |  None |        0        |   0.666666666667  | 0.333333333333 |
    +------+-------+-----------------+-------------------+----------------+
    [36692 rows x 5 columns]

    See Also
    --------
    LabelPropagationModel
    """
    _mt._get_metric_tracker().track(
        'toolkit.graph_analytics.label_propagation.create')

    _raise_error_if_not_of_type(label_field, str)
    _raise_error_if_not_of_type(weight_field, str)

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    if graph.vertices[label_field].dtype() != int:
        raise TypeError('label_field %s must be integer typed.' % label_field)

    opts = {
        'label_field': label_field,
        'threshold': threshold,
        'weight_field': weight_field,
        'self_weight': self_weight,
        'undirected': undirected,
        'max_iterations': max_iterations,
        'single_precision': _single_precision,
        'graph': graph.__proxy__
    }

    params = _main.run('label_propagation', opts, verbose)
    model = params['model']
    return LabelPropagationModel(model)
def create(graph, source_vid, weight_field="", max_distance=1e30, verbose=True):
    """
    Compute the single source shortest path distance from the source vertex to
    all vertices in the graph. Note that because SGraph is directed, shortest
    paths are also directed. To find undirected shortes paths add edges to the
    SGraph in both directions. Return a model object with distance each of
    vertex in the graph.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute shortest paths.

    source_vid : vertex ID
        ID of the source vertex.

    weight_field : string, optional
        The edge field representing the edge weights. If empty, uses unit
        weights.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : ShortestPathModel

    References
    ----------
    - `Wikipedia - ShortestPath <http://en.wikipedia.org/wiki/Shortest_path_problem>`_

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.shortest_path.ShortestPathModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> sp = graphlab.shortest_path.create(g, source_vid=1)

    We can obtain the shortest path distance from the source vertex to each
    vertex in the graph ``g`` as follows:

    >>> sp_sframe = sp['distance']   # SFrame

    We can add the new distance field to the original graph g using:

    >>> g.vertices['distance_to_1'] = sp['graph'].vertices['distance']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    To get the actual path from the source vertex to any destination vertex:

    >>> path = sp.get_path(vid=10)


    We can obtain an auxiliary graph with additional information corresponding
    to the shortest path from the source vertex to each vertex in the graph
    ``g`` as follows:

    >>> sp_graph = sp.get['graph']       # SGraph

    See Also
    --------
    ShortestPathModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.sssp.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    opts = {'source_vid': source_vid, 'weight_field': weight_field,
            'max_distance': max_distance, 'graph': graph.__proxy__}
    params = _main.run('sssp', opts, verbose)
    return ShortestPathModel(params['model'])
def create(graph, verbose=True):
    """
    Compute the in degree, out degree and total degree of each vertex.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute degree counts.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : DegreeCountingModel

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.degree_counting.DegreeCountingModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/web-Google.txt.gz',
    ...                         format='snap')
    >>> m = graphlab.degree_counting.create(g)
    >>> g2 = m['graph']
    >>> g2
    SGraph({'num_edges': 5105039, 'num_vertices': 875713})
    Vertex Fields:['__id', 'in_degree', 'out_degree', 'total_degree']
    Edge Fields:['__src_id', '__dst_id']

    >>> g2.vertices.head(5)
    Columns:
        __id	int
        in_degree	int
        out_degree	int
        total_degree	int
    <BLANKLINE>
    Rows: 5
    <BLANKLINE>
    Data:
    +------+-----------+------------+--------------+
    | __id | in_degree | out_degree | total_degree |
    +------+-----------+------------+--------------+
    |  5   |     15    |     7      |      22      |
    |  7   |     3     |     16     |      19      |
    |  8   |     1     |     2      |      3       |
    |  10  |     13    |     11     |      24      |
    |  27  |     19    |     16     |      35      |
    +------+-----------+------------+--------------+

    See Also
    --------
    DegreeCountingModel
    """
    _mt._get_metric_tracker().track(
        'toolkit.graph_analytics.degree_counting.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('degree_count', {'graph': graph.__proxy__}, verbose)
    return DegreeCountingModel(params['model'])
def create(graph, verbose=True):
    """
    Compute the number of weakly connected components in the graph. Return a
    model object with total number of weakly connected components as well as the
    component ID for each vertex in the graph.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute the triangle counts.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : ConnectedComponentsModel

    References
    ----------
    - `Mathworld Wolfram - Weakly Connected Component
      <http://mathworld.wolfram.com/WeaklyConnectedComponent.html>`_

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.connected_components.ConnectedComponentsModel` as
    follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> cc = graphlab.connected_components.create(g)
    >>> cc.summary()

    We can obtain the ``component id`` corresponding to each vertex in the
    graph ``g`` as follows:

    >>> cc_ids = cc['component_id']  # SFrame

    We can obtain a graph with additional information about the ``component
    id`` corresponding to each vertex as follows:

    >>> cc_graph = cc['graph']      # SGraph

    We can add the new component_id field to the original graph g using:

    >>> g.vertices['component_id'] = cc['graph'].vertices['component_id']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.


    See Also
    --------
    ConnectedComponentsModel
    """
    _mt._get_metric_tracker().track(
        'toolkit.graph_analytics.connected_components.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    params = _main.run('connected_components', {'graph': graph.__proxy__},
                       verbose)
    return ConnectedComponentsModel(params['model'])
Ejemplo n.º 33
0
    def extract_features(self, dataset, layer_id=None):
        """
        Takes an input dataset, propagates each example through the network,
        and returns an SArray of dense feature vectors, each of which is the concatenation
        of all the hidden unit values at layer[layer_id]. These feature vectors
        can be used as input to train another classifier such as a :py:class:`~graphlab.logistic_classifier.LogisticClassifier`,
        an :py:class:`~graphlab.svm_classifier.SVMClassifier`, another
        :py:class:`~graphlab.neuralnet_classifier.NeuralNetClassifier`, or a :py:class:`~graphlab.boosted_trees_classifier.BoostedTreesClassifier`. Input dataset size must be the same as for the training of the model,
        except for images which are automatically resized.


        We also are releasing a pre-trained model for ImageNet, as described by
        Alex Krizhevsky et. al. It is located at
        http://s3.amazonaws.com/dato-datasets/deeplearning/imagenet_model_iter45 .
        Using it requires 256 x 256 x 3 images.
        Please see Examples and References for more.


        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        layer_id : int , optional
            The index of the layer in neuralnet at which the activations are
            taken to be a dense feature vector. Must be a fully-connected layer.
            Default is None, in which case the layer before the connection
            layer to the output is used.


        Returns
        -------
        out : SArray
            An SArray of dtype array.array containing extracted features.

        See Also
        ------------
        graphlab.deeplearning.layers

        References
        ----------
        - Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet
        classification with deep convolutional neural networks." Advances in
        neural information processing systems. 2012.

        Examples
        --------
        >>> data = graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/mnist/sframe/train6k')
        >>> net = graphlab.deeplearning.get_builtin_neuralnet('mnist')
        >>> m = graphlab.neuralnet_classifier.create(data,
        ...                                          target='label',
        ...                                          network=net,
        ...                                          max_iterations=3)
        >>> # Now, let's extract features from the last layer
        >>> data['features'] = m.extract_features(data)
        >>> # Now, let's build a new classifier on top of extracted features
        >>> m = graphlab.classifier.create(data,
        ...                                          features = ['features'],
        ...                                          target='label')

        Now, let's see how to load the ImageNet model, and use it for extracting
        features after resizing the data:

        >>> imagenet_model = graphlab.load_model('http://s3.amazonaws.com/dato-datasets/deeplearning/imagenet_model_iter45')
        >>> data['image'] = graphlab.image_analysis.resize(data['image'], 256, 256, 3)
        >>> data['imagenet_features'] = imagenet_model.extract_features(data)

        """
        _mt._get_metric_tracker().track('toolkit.classifier.neuralnet_classifier.extract_features')
        _raise_error_if_not_sframe(dataset, "dataset")
        options = dict()

        net = self.get('network').layers
        network_size = len(net) - 1
        if layer_id is None:
            if net[network_size]._type == "CONNECTION":
                layer_id = network_size - 1
            else:
                layer_id = network_size - 2
        _numeric_param_check_range("layer_id", layer_id, 0, network_size)

        conv2flat = False
        for i in range(0, layer_id + 1):
            if net[i]._type == "CONNECTION" or net[i]._type == "TRANSITION":
                conv2flat = True

        if conv2flat is not True:
            raise ValueError("Features must be extracted from either a network "
                    "with non-image input or a layer after a FlattenLayer. "
                    "Try extracting features from layer following a FlattenLayer.")

        options.update({'model': self.__proxy__,
                        'model_name': self.__name__,
                        'dataset': dataset,
                        'layer_id': layer_id})
        target = _toolkits_main.run('supervised_learning_feature_extraction', options)
        return _map_unity_proxy_to_object(target['extracted'])
Ejemplo n.º 34
0
    def predict_topk(self, dataset, output_type="probability", k=3):
        """
        Return top-k predictions for the ``dataset``, using the trained model.
        Predictions are returned as an SFrame with three columns: `row_id`,
        `class`, and `probability`,`rank`, or `score`, depending on the ``output_type``
        parameter. Input dataset size must be the same as for training of the
        model, except for images which are automatically resized.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        output_type : {'probability', 'rank', 'score'}, optional
            Choose the return type of the prediction:

            - `rank`: outputs rank along with class label.
            - `probability`: outputs learned probability along with class label.
            - `score`: Same as probability

        k : int, optional
            Number of classes to return for each input example.

        Returns
        -------
        out : SFrame
            An SFrame with model predictions.

        See Also
        --------
        predict, classify, evaluate

        Examples
        --------
        >>> data = graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/mnist/sframe/train')
        >>> training_data, validation_data = data.random_split(0.8)
        >>> net = graphlab.deeplearning.get_builtin_neuralnet('mnist')
        >>> m = graphlab.neuralnet_classifier.create(training_data,
        ...                                          target='label',
        ...                                          network=net,
        ...                                          max_iterations=3)
        ...
        >>> pred = m.predict_topk(validation_data, k=3)
        >>> pred
        +--------+-------+-------------------+
        | row_id | class |    probability    |
        +--------+-------+-------------------+
        |   0    |   4   |   0.995623886585  |
        |   0    |   9   |  0.0038311756216  |
        |   0    |   7   | 0.000301006948575 |
        |   1    |   1   |   0.928708016872  |
        |   1    |   3   |  0.0440889261663  |
        |   1    |   2   |  0.0176190119237  |
        |   2    |   3   |   0.996967732906  |
        |   2    |   2   |  0.00151345680933 |
        |   2    |   7   | 0.000637513934635 |
        |   3    |   1   |   0.998070061207  |
        |  ...   |  ...  |        ...        |
        +--------+-------+-------------------+
        [35688 rows x 3 columns]
        """
        _mt._get_metric_tracker().track('toolkit.classifier.neuralnet_classifier.predict_topk')
        _raise_error_if_not_sframe(dataset, "dataset")
        options = dict()
        options.update({'model': self.__proxy__,
                        'model_name': self.__name__,
                        'dataset': dataset,
                        'output_type': output_type,
                        'topk': k,
                        'missing_value_action': 'error'})
        target = _toolkits_main.run('supervised_learning_predict_topk', options)
        return _map_unity_proxy_to_object(target['predicted'])
def create(graph,
           reset_probability=0.15,
           threshold=1e-2,
           max_iterations=20,
           _single_precision=False,
           _distributed='auto',
           verbose=True):
    """
    Compute the PageRank for each vertex in the graph. Return a model object
    with total PageRank as well as the PageRank value for each vertex in the
    graph.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute the pagerank value.

    reset_probability : float, optional
        Probability that a random surfer jumps to an arbitrary page.

    threshold : float, optional
        Threshold for convergence, measured in the L1 norm
        (the sum of absolute value) of the delta of each vertex's
        pagerank value.

    max_iterations : int, optional
        The maximun number of iterations to run.

    _single_precision : bool, optional
        If true, running pagerank in single precision. The resulting
        pagerank values may not be accurate for large graph, but
        should run faster and use less memory.

    _distributed : distributed environment, internal

    verbose : bool, optional
        If True, print progress updates.


    Returns
    -------
    out : PagerankModel

    References
    ----------
    - `Wikipedia - PageRank <http://en.wikipedia.org/wiki/PageRank>`_
    - Page, L., et al. (1998) `The PageRank Citation Ranking: Bringing Order to
      the Web <http://ilpubs.stanford.edu:8090/422/1/1999-66.pdf>`_.

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.pagerank.PageRankModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> pr = graphlab.pagerank.create(g)

    We can obtain the page rank corresponding to each vertex in the graph ``g``
    using:

    >>> pr_out = pr['pagerank']     # SFrame

    We can add the new pagerank field to the original graph g using:

    >>> g.vertices['pagerank'] = pr['graph'].vertices['pagerank']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    See Also
    --------
    PagerankModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.pagerank.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    opts = {
        'threshold': threshold,
        'reset_probability': reset_probability,
        'max_iterations': max_iterations,
        'single_precision': _single_precision,
        'graph': graph.__proxy__
    }

    params = _main.run('pagerank', opts, verbose)
    model = params['model']

    return PagerankModel(model)
Ejemplo n.º 36
0
def create(graph, kmin=0, kmax=10, verbose=True):
    """
    Compute the K-core decomposition of the graph. Return a model object with
    total number of cores as well as the core id for each vertex in the graph.

    Parameters
    ----------
    graph : SGraph
        The graph on which to compute the k-core decomposition.

    kmin : int, optional
        Minimun core id. Vertices having smaller core id than `kmin` will be
        assigned with core_id = `kmin`.

    kmax : int, optional
        Maximun core id. Vertices having larger core id than `kmax` will be
        assigned with core_id=`kmax`.

    verbose : bool, optional
        If True, print progress updates.

    Returns
    -------
    out : KcoreModel

    References
    ----------
    - Alvarez-Hamelin, J.I., et al. (2005) `K-Core Decomposition: A Tool for the
      Visualization of Large Networks <http://arxiv.org/abs/cs/0504107>`_.

    Examples
    --------
    If given an :class:`~graphlab.SGraph` ``g``, we can create
    a :class:`~graphlab.kcore.KcoreModel` as follows:

    >>> g = graphlab.load_graph('http://snap.stanford.edu/data/email-Enron.txt.gz', format='snap')
    >>> kc = graphlab.kcore.create(g)

    We can obtain the ``core id`` corresponding to each vertex in the graph
    ``g`` using:

    >>> kcore_id = kc['core_id']     # SFrame

    We can add the new core id field to the original graph g using:

    >>> g.vertices['core_id'] = kc['graph'].vertices['core_id']

    Note that the task above does not require a join because the vertex
    ordering is preserved through ``create()``.

    See Also
    --------
    KcoreModel
    """
    _mt._get_metric_tracker().track('toolkit.graph_analytics.kcore.create')

    if not isinstance(graph, _SGraph):
        raise TypeError('graph input must be a SGraph object.')

    opts = {'graph': graph.__proxy__, 'kmin': kmin, 'kmax': kmax}
    params = _main.run('kcore', opts, verbose)

    return KcoreModel(params['model'])
    def extract_features(self, dataset):
        """
        For each example in the dataset, extract the leaf indices of
        each tree as features.

        For multiclass classification, each leaf index contains #num_class
        numbers.

        The returned feature vectors can be used as input to train another
        supervised learning model such as a
        :py:class:`~graphlab.logistic_classifier.LogisticClassifier`,
        an :py:class:`~graphlab.svm_classifier.SVMClassifier`, or a
        :py:class:`~graphlab.neuralnet_classifier.NeuralNetClassifier`.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        Returns
        -------
        out : SArray
            An SArray of dtype array.array containing extracted features.

        Examples
        --------
        >>> data =  graphlab.SFrame('http://s3.amazonaws.com/dato-datasets/regression/houses.csv')

        >>> # Regression Tree Models
        >>> model = graphlab.boosted_trees_regression.create(data,
        ...                           target='price',
        ...                           features=['bath', 'bedroom', 'size'])
        >>> data['boosted_tree_features'] = model.extract_features(data)
        >>> model = graphlab.random_forest_regression.create(data,
        ...                           target='price',
        ...                           features=['bath', 'bedroom', 'size'])
        >>> data['random_forest_features'] = model.extract_features(data)

        >>> # Classification Tree Models
        >>> data['is_expensive'] = data['price'] > 30000
        >>> model = graphlab.boosted_trees_classifier.create(data,
        ...                           target='is_expensive',
        ...                           features=['bath', 'bedroom', 'size'])
        >>> data['boosted_tree_features'] = model.extract_features(data)

        >>> model = graphlab.random_forest_classifier.create(data,
        ...                           target='is_expensive',
        ...                           features=['bath', 'bedroom', 'size'])
        >>> data['random_forest_features'] = model.extract_features(data)
        """
        metric_name = '.'.join([self.__module__, 'extract_features'])
        _mt._get_metric_tracker().track(metric_name)
        _raise_error_if_not_sframe(dataset, "dataset")
        options = dict()
        options.update({'model': self.__proxy__,
                        'model_name': self.__name__,
                        'dataset': dataset})
        target = _toolkits_main.run('supervised_learning_feature_extraction', options)
        return _map_unity_proxy_to_object(target['extracted'])
    def extract_features(self, dataset, missing_value_action='auto'):
        """
        For each example in the dataset, extract the leaf indices of
        each tree as features.

        For multiclass classification, each leaf index contains #num_class
        numbers.

        The returned feature vectors can be used as input to train another
        supervised learning model such as a
        :py:class:`~graphlab.logistic_classifier.LogisticClassifier`,
        an :py:class:`~graphlab.svm_classifier.SVMClassifier`, or a
        :py:class:`~graphlab.neuralnet_classifier.NeuralNetClassifier`.

        Parameters
        ----------
        dataset : SFrame
            Dataset of new observations. Must include columns with the same
            names as the features used for model training, but does not require
            a target column. Additional columns are ignored.

        missing_value_action: str, optional
            Action to perform when missing values are encountered. This can be
            one of:

            - 'auto': Choose a model dependent missing value policy.
            - 'impute': Proceed with evaluation by filling in the missing
                        values with the mean of the training data. Missing
                        values are also imputed if an entire column of data is
                        missing during evaluation.
            - 'none': Treat missing value as is. Model must be able to handle
                      missing value.
            - 'error' : Do not proceed with prediction and terminate with
                        an error message.

        Returns
        -------
        out : SArray
            An SArray of dtype array.array containing extracted features.

        Examples
        --------
        >>> data =  graphlab.SFrame(
            'https://static.turi.com/datasets/regression/houses.csv')

        >>> # Regression Tree Models
        >>> data['regression_tree_features'] = model.extract_features(data)

        >>> # Classification Tree Models
        >>> data['classification_tree_features'] = model.extract_features(data)
        """
        metric_name = '.'.join([self.__module__, 'extract_features'])
        _mt._get_metric_tracker().track(metric_name)
        _raise_error_if_not_sframe(dataset, "dataset")
        if missing_value_action == 'auto':
            missing_value_action = select_default_missing_value_policy(
                self, 'extract_features')

        options = dict()
        options.update({
            'model': self.__proxy__,
            'model_name': self.__name__,
            'missing_value_action': missing_value_action,
            'dataset': dataset
        })
        target = _toolkits_main.run('supervised_learning_feature_extraction',
                                    options)
        return _map_unity_proxy_to_object(target['extracted'])