def _SGraphFromJsonTree(json_str): """ Convert the Json Tree to SGraph """ g = json.loads(json_str) vertices = [_Vertex(x['id'], dict([(str(k), v) for k, v in x.iteritems() if k != 'id'])) for x in g['vertices']] edges = [_Edge(x['src'], x['dst'], dict([(str(k), v) for k, v in x.iteritems() if k != 'src' and k != 'dst'])) for x in g['edges']] sg = _SGraph().add_vertices(vertices) if len(edges) > 0: sg = sg.add_edges(edges) return sg
import json from graphlab.data_structures.sframe import SArray as _SArray from graphlab.data_structures.sframe import SFrame as _SFrame from graphlab.data_structures.sgraph import SGraph as _SGraph from graphlab.data_structures.sgraph import Vertex as _Vertex from graphlab.data_structures.sgraph import Edge as _Edge from graphlab.cython.cy_sarray import UnitySArrayProxy from graphlab.cython.cy_sframe import UnitySFrameProxy from graphlab.cython.cy_graph import UnityGraphProxy from graphlab.toolkits._main import ToolkitError import logging as _logging _proxy_map = {UnitySFrameProxy: (lambda x: _SFrame(_proxy=x)), UnitySArrayProxy: (lambda x: _SArray(_proxy=x)), UnityGraphProxy: (lambda x: _SGraph(_proxy=x))} def _add_docstring(format_dict): """ Format a doc-string on the fly. @arg format_dict: A dictionary to format the doc-strings Example: @add_docstring({'context': __doc_string_context}) def predict(x): ''' {context} >> model.predict(data) ''' return x """
def get_path(self, vid, show=False, highlight=None, **kwargs): """ Get the shortest path. Return one of the shortest paths between the source vertex defined in the model and the query vertex. The source vertex is specified by the original call to shortest path. Optionally, plots the path with networkx. Parameters ---------- vid : string ID of the destination vertex. The source vertex ID is specified when the shortest path result is first computed. show : boolean Indicates whether the path should be plotted. Default is False. highlight : list If the path is plotted, identifies the vertices (by vertex ID) that should be highlighted by plotting in a different color. kwargs : Additional parameters passed into the :func:`graphlab.SGraph.show` when `show` is True. Returns ------- path : list List of pairs of (vertex_id, distance) in the path. Examples -------- >>> m.get_path(vid=0, show=True) See Also -------- SGraph.show """ if self._path_query_table is None: self._path_query_table = self._generate_path_sframe() source_vid = self['source_vid'] path = [] path_query_table = self._path_query_table if not vid in path_query_table['vid']: raise ValueError('Destination vertex id ' + str(vid) + ' not found') record = path_query_table[path_query_table['vid'] == vid][0] dist = record['distance'] if dist > 1e5: raise ValueError( 'The distance to {} is too large to show the path.'.format( vid)) path = [(vid, dist)] max_iter = len(path_query_table) num_iter = 0 while record['distance'] != 0 and num_iter < max_iter: parent_id = record['parent_row_id'] assert parent_id < len(path_query_table) assert parent_id >= 0 record = path_query_table[parent_id] path.append((record['vid'], record['distance'])) num_iter += 1 assert record['vid'] == source_vid assert num_iter < max_iter path.reverse() if show is True and len(path) > 1: sub_g = _SGraph() for i, j in zip(path, path[1:]): sub_g = sub_g.add_edges(self['graph'].get_edges(src_ids=[i[0]], dst_ids=[j[0] ]), src_field='__src_id', dst_field='__dst_id') path_highlight = [] if highlight is not None: if not isinstance(highlight, list): raise TypeError("Input 'highlight' must be a list.") path_names = set([x[0] for x in path]) path_highlight = list( set.intersection(path_names, set(highlight))) plot = sub_g.show(vlabel='id', highlight=path_highlight, **kwargs) if _HAS_IPYTHON: _IPython.display(plot) return path
def get_path(self, vid, show=False, highlight=None, **kwargs): """ Get the shortest path. Return one of the shortest paths between the source vertex defined in the model and the query vertex. The source vertex is specified by the original call to shortest path. Optionally, plots the path with networkx. Parameters ---------- vid : string ID of the destination vertex. The source vertex ID is specified when the shortest path result is first computed. show : boolean Indicates whether the path should be plotted. Default is False. highlight : list If the path is plotted, identifies the vertices (by vertex ID) that should be highlighted by plotting in a different color. kwargs : Additional parameters passed into the :func:`graphlab.SGraph.show` when `show` is True. Returns ------- path : list List of pairs of (vertex_id, distance) in the path. Examples -------- >>> m.get_path(vid=0, show=True) See Also -------- SGraph.show """ if self._path_query_table is None: self._path_query_table = self._generate_path_sframe() source_vid = self['source_vid'] path = [] path_query_table = self._path_query_table if not vid in path_query_table['vid']: raise ValueError('Destination vertex id ' + str(vid) + ' not found') record = path_query_table[path_query_table['vid'] == vid][0] dist = record['distance'] if dist > 1e5: raise ValueError('The distance to {} is too large to show the path.'.format(vid)) path = [(vid, dist)] max_iter = len(path_query_table) num_iter = 0 while record['distance'] != 0 and num_iter < max_iter: parent_id = record['parent_row_id'] assert parent_id < len(path_query_table) assert parent_id >= 0 record = path_query_table[parent_id] path.append((record['vid'], record['distance'])) num_iter += 1 assert record['vid'] == source_vid assert num_iter < max_iter path.reverse() if show is True and len(path) > 1: sub_g = _SGraph() for i, j in zip(path, path[1:]): sub_g = sub_g.add_edges(self['graph'].get_edges(src_ids=[i[0]], dst_ids=[j[0]]), src_field='__src_id', dst_field='__dst_id') path_highlight = [] if highlight is not None: if not isinstance(highlight, list): raise TypeError("Input 'highlight' must be a list.") path_names = set([x[0] for x in path]) path_highlight = list(set.intersection(path_names, set(highlight))) plot = sub_g.show(vlabel='id', highlight=path_highlight, **kwargs) if _HAS_IPYTHON: _IPython.display(plot) return path
def similarity_graph(self, k=5, radius=None, include_self_edges=False, output_type='SGraph', verbose=True): """ Construct the similarity graph on the reference dataset, which is already stored in the model. This is conceptually very similar to running `query` with the reference set, but this method is optimized for the purpose, syntactically simpler, and automatically removes self-edges. Parameters ---------- k : int, optional Maximum number of neighbors to return for each point in the dataset. Setting this to ``None`` deactivates the constraint, so that all neighbors are returned within ``radius`` of a given point. radius : float, optional For a given point, only neighbors within this distance are returned. The default is ``None``, in which case the ``k`` nearest neighbors are returned for each query point, regardless of distance. include_self_edges : bool, optional For most distance functions, each point in the model's reference dataset is its own nearest neighbor. If this parameter is set to False, this result is ignored, and the nearest neighbors are returned *excluding* the point itself. output_type : {'SGraph', 'SFrame'}, optional By default, the results are returned in the form of an SGraph, where each point in the reference dataset is a vertex and an edge A -> B indicates that vertex B is a nearest neighbor of vertex A. If 'output_type' is set to 'SFrame', the output is in the same form as the results of the 'query' method: an SFrame with columns indicating the query label (in this case the query data is the same as the reference data), reference label, distance between the two points, and the rank of the neighbor. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : SFrame or SGraph The type of the output object depends on the 'output_type' parameter. See the parameter description for more detail. Notes ----- - If both ``k`` and ``radius`` are set to ``None``, each data point is matched to the entire dataset. If the reference dataset has :math:`n` rows, the output is an SFrame with :math:`n^2` rows (or an SGraph with :math:`n^2` edges). - For models created with the 'lsh' method, the output similarity graph may have fewer vertices than there are data points in the original reference set. Because LSH is an approximate method, a query point may have fewer than 'k' neighbors. If LSH returns no neighbors at all for a query and self-edges are excluded, the query point is omitted from the results. Examples -------- First construct an SFrame and create a nearest neighbors model: >>> sf = graphlab.SFrame({'x1': [0.98, 0.62, 0.11], ... 'x2': [0.69, 0.58, 0.36]}) ... >>> model = graphlab.nearest_neighbors.create(sf, distance='euclidean') Unlike the ``query`` method, there is no need for a second dataset with ``similarity_graph``. >>> g = model.similarity_graph(k=1) # an SGraph >>> g.show() >>> g.edges +----------+----------+----------------+------+ | __src_id | __dst_id | distance | rank | +----------+----------+----------------+------+ | 0 | 1 | 0.376430604494 | 1 | | 2 | 1 | 0.55542776308 | 1 | | 1 | 0 | 0.376430604494 | 1 | +----------+----------+----------------+------+ """ _mt._get_metric_tracker().track( 'toolkit.nearest_neighbors.similarity_graph') ## Validate inputs. if k is not None: if not isinstance(k, int): raise ValueError("Input 'k' must be an integer.") if k <= 0: raise ValueError("Input 'k' must be larger than 0.") if radius is not None: if not isinstance(radius, (int, float)): raise ValueError("Input 'radius' must be an integer or float.") if radius < 0: raise ValueError("Input 'radius' must be non-negative.") ## Set k and radius to special values to indicate 'None' if k is None: k = -1 if radius is None: radius = -1.0 opts = { 'model': self.__proxy__, 'model_name': self.__name__, 'k': k, 'radius': radius, 'include_self_edges': include_self_edges } result = _graphlab.toolkits._main.run( '_nearest_neighbors.similarity_graph', opts, verbose) knn = _SFrame(None, _proxy=result['neighbors']) if output_type == "SFrame": return knn else: sg = _SGraph(edges=knn, src_field='query_label', dst_field='reference_label') return sg
def similarity_graph(self, k=5, radius=None, include_self_edges=False, output_type='SGraph', verbose=True): """ Construct the similarity graph on the reference dataset, which is already stored in the model. This is conceptually very similar to running `query` with the reference set, but this method is optimized for the purpose, syntactically simpler, and automatically removes self-edges. Parameters ---------- k : int, optional Maximum number of neighbors to return for each point in the dataset. Setting this to ``None`` deactivates the constraint, so that all neighbors are returned within ``radius`` of a given point. radius : float, optional For a given point, only neighbors within this distance are returned. The default is ``None``, in which case the ``k`` nearest neighbors are returned for each query point, regardless of distance. include_self_edges : bool, optional For most distance functions, each point in the model's reference dataset is its own nearest neighbor. If this parameter is set to False, this result is ignored, and the nearest neighbors are returned *excluding* the point itself. output_type : {'SGraph', 'SFrame'}, optional By default, the results are returned in the form of an SGraph, where each point in the reference dataset is a vertex and an edge A -> B indicates that vertex B is a nearest neighbor of vertex A. If 'output_type' is set to 'SFrame', the output is in the same form as the results of the 'query' method: an SFrame with columns indicating the query label (in this case the query data is the same as the reference data), reference label, distance between the two points, and the rank of the neighbor. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : SFrame or SGraph The type of the output object depends on the 'output_type' parameter. See the parameter description for more detail. Notes ----- - If both ``k`` and ``radius`` are set to ``None``, each data point is matched to the entire dataset. If the reference dataset has :math:`n` rows, the output is an SFrame with :math:`n^2` rows (or an SGraph with :math:`n^2` edges). - For models created with the 'lsh' method, the output similarity graph may have fewer vertices than there are data points in the original reference set. Because LSH is an approximate method, a query point may have fewer than 'k' neighbors. If LSH returns no neighbors at all for a query and self-edges are excluded, the query point is omitted from the results. Examples -------- First construct an SFrame and create a nearest neighbors model: >>> sf = graphlab.SFrame({'x1': [0.98, 0.62, 0.11], ... 'x2': [0.69, 0.58, 0.36]}) ... >>> model = graphlab.nearest_neighbors.create(sf, distance='euclidean') Unlike the ``query`` method, there is no need for a second dataset with ``similarity_graph``. >>> g = model.similarity_graph(k=1) # an SGraph >>> g.show() >>> g.edges +----------+----------+----------------+------+ | __src_id | __dst_id | distance | rank | +----------+----------+----------------+------+ | 0 | 1 | 0.376430604494 | 1 | | 2 | 1 | 0.55542776308 | 1 | | 1 | 0 | 0.376430604494 | 1 | +----------+----------+----------------+------+ """ _mt._get_metric_tracker().track( 'toolkit.nearest_neighbors.similarity_graph') ## Validate inputs. if k is not None: if not isinstance(k, int): raise ValueError("Input 'k' must be an integer.") if k <= 0: raise ValueError("Input 'k' must be larger than 0.") if radius is not None: if not isinstance(radius, (int, float)): raise ValueError("Input 'radius' must be an integer or float.") if radius < 0: raise ValueError("Input 'radius' must be non-negative.") ## Set k and radius to special values to indicate 'None' if k is None: k = -1 if radius is None: radius = -1.0 opts = {'model': self.__proxy__, 'model_name': self.__name__, 'k': k, 'radius': radius, 'include_self_edges': include_self_edges} result = _graphlab.toolkits._main.run('_nearest_neighbors.similarity_graph', opts, verbose) knn = _SFrame(None, _proxy=result['neighbors']) if output_type == "SFrame": return knn else: sg = _SGraph(edges=knn, src_field='query_label', dst_field='reference_label') return sg