def test_missing_value_vids(self): vertices = SFrame() vertices['vid'] = [1, 2, 3, None] edges = SFrame() edges['src'] = [1, 2, 3, None] edges['dst'] = [4, 4, 4, 4] self.assertRaises( RuntimeError, lambda: SGraph().add_vertices(vertices, 'vid').summary()) self.assertRaises( RuntimeError, lambda: SGraph().add_edges(edges, 'src', 'dst').summary()) self.assertRaises( RuntimeError, lambda: SGraph().add_edges(edges, 'dst', 'src').summary())
def test_select_query_with_same_vertex_edge_field(self): vertices = SFrame({'__id': range(10)}) edges = SFrame({'__src_id': range(10), '__dst_id': range(1, 11)}) g = SGraph(vertices, edges) g.vertices['weight'] = 0 g.vertices['v'] = 0 g.edges['weight'] = 0 g.edges['e'] = 0 self.assertItemsEqual( g.get_fields(), ['v', 'e', 'weight', 'weight', '__id', '__src_id', '__dst_id']) g2 = g.select_fields('weight') self.assertItemsEqual( g2.get_fields(), ['weight', 'weight', '__id', '__src_id', '__dst_id'])
def test_sframe_le_append_skip_row_bug_is_fixed(self): """ This test is actually for SFrame lazy evaluation. The reason it is here is because the repro can only be done in SGraph. The bug appears when the SFrame has lazy_append and when passing through the logical filter, skip_rows is not done correctly. So the edge_sframe is in a bad state when not materialized. This unit test stays here to ensure the bug is fixed until we can find a more clean repro. """ n = 12 # smallest n to repro the le_append bug # A graph with edge i -> i + 1 g = SGraph().add_edges( SFrame({ 'src': range(n), 'dst': range(1, n + 1) }), 'src', 'dst') lazy_sf = g.get_edges() materialized_sf = g.get_edges() materialized_sf.__materialize__() assert_frame_equal( lazy_sf[lazy_sf['__dst_id'] == n].to_dataframe(), materialized_sf[materialized_sf['__dst_id'] == n].to_dataframe())
def test_graph_constructor(self): g = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') g2 = SGraph(g.vertices, g.edges) g3 = SGraph(g.vertices, g.edges, src_field="__dst_id", dst_field="__src_id") #flip around src and dst assert_frame_equal( g.vertices.to_dataframe().sort('__id').reset_index(drop=True), g2.vertices.to_dataframe().sort('__id').reset_index(drop=True)) assert_frame_equal( g.edges.to_dataframe().sort(['__src_id', '__dst_id']).reset_index(drop=True), g2.edges.to_dataframe().sort(['__src_id', '__dst_id']).reset_index(drop=True)) self.assertRaises( ValueError, lambda: SGraph(SFrame(self.vertices), SFrame(self.edges))) self.assertRaises( ValueError, lambda: SGraph(SFrame(self.vertices), SFrame( self.edges), 'vid', '__src_id', '__dst_id')) self.assertRaises( ValueError, lambda: SGraph(SFrame(self.vertices), SFrame(self.edges), vid_field=None, src_field='src_id', dst_field='dst_id'))
def get(self, field): """ Return the value for the queried field. Get the value of a given field. The list of all queryable fields is documented in the beginning of the model class. Each of these fields can be queried in one of two ways: >>> out = m['graph'] # m is a trained graph analytics model >>> out = m.get('graph') # equivalent to previous line Parameters ---------- field : string Name of the field to be retrieved. Returns ------- out : value The current value of the requested field. See Also -------- list_fields Examples -------- >>> g = m.get('graph') """ _mt._get_metric_tracker().track('toolkit.graph_analytics.get') if field in self.list_fields(): obj = self.__proxy__.get(field) if type(obj) == UnityGraphProxy: return SGraph(_proxy=obj) elif type(obj) == UnitySFrameProxy: return SFrame(_proxy=obj) else: return obj else: raise KeyError( 'Key \"%s\" not in model. Available fields are %s.' % (field, ', '.join(self.list_fields())))
def set_edges_empty(g): g.edges = SFrame()
def set_vertices_empty(g): g.vertices = SFrame()
def __repr__(self): """ Emits a brief summary of all the statistics as a string. """ fields = [['size', 'Length', 'Yes'], ['min', 'Min', 'Yes'], ['max', 'Max', 'Yes'], ['mean', 'Mean', 'Yes'], ['sum', 'Sum', 'Yes'], ['var', 'Variance', 'Yes'], ['std', 'Standard Deviation', 'Yes'], [ 'num_undefined', '# Missing Values', 'Yes', ], ['num_unique', '# unique values', 'No']] s = '\n' result = [] for field in fields: try: method_to_call = getattr(self, field[0]) result.append([field[1], str(method_to_call()), field[2]]) except: pass sf = SArray(result).unpack(column_name_prefix="") sf.rename({'0': 'item', '1': 'value', '2': 'is exact'}) s += sf.__str__(footer=False) s += "\n" s += "\nMost frequent items:\n" frequent = self.frequent_items() sorted_freq = sorted(frequent.iteritems(), key=operator.itemgetter(1), reverse=True) if len(sorted_freq) == 0: s += " -- All elements appear with less than 0.01% frequency -- \n" else: sorted_freq = sorted_freq[:10] sf = SFrame() sf.add_column(SArray(['count']), 'value') for elem in sorted_freq: sf.add_column(SArray([elem[1]]), str(elem[0])) s += sf.__str__(footer=False) + "\n" s += "\n" try: # print quantiles t = self.quantile(0) s += "Quantiles: \n" sf = SFrame() for q in [0.0, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 1.00]: sf.add_column(SArray([self.quantile(q)]), str(int(q * 100)) + '%') s += sf.__str__(footer=False) + "\n" except: pass try: t_k = self.dict_key_summary() t_v = self.dict_value_summary() s += "\n******** Dictionary Element Key Summary ********\n" s += t_k.__repr__() s += "\n******** Dictionary Element Value Summary ********\n" s += t_v.__repr__() + '\n' except: pass try: t_k = self.element_summary() s += "\n******** Element Summary ********\n" s += t_k.__repr__() + '\n' except: pass return s.expandtabs(8)