def test_gframe(self): g = SGraph() v = g.vertices self.assertSequenceEqual(v.column_names(), ['__id']) e = g.edges self.assertSequenceEqual(e.column_names(), ['__src_id', '__dst_id']) # Test vertices and edge attributes cannot be modified def set_vertices_empty(g): g.vertices = SFrame() def set_edges_empty(g): g.edges = SFrame() def remove_vertices(g): del g.vertices def remove_edges(g): del g.edges def remove_edge_column(gf, name): del gf[name] self.assertRaises(AttributeError, lambda: remove_vertices(g)) self.assertRaises(AttributeError, lambda: remove_edges(g)) self.assertRaises(AttributeError, lambda: set_vertices_empty(g)) self.assertRaises(AttributeError, lambda: set_edges_empty(g)) # Test gframe operations has the same effect as its sframe+graph equivalent g = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') v = g.vertices v['id_col'] = v['__id'] e = g.edges e['src_id_col'] = e['__src_id'] e['dst_id_col'] = e['__dst_id'] g2 = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') new_vdata = g2.get_vertices() new_vdata['id_col'] = new_vdata['__id'] new_edata = g2.get_edges() new_edata['src_id_col'] = new_edata['__src_id'] new_edata['dst_id_col'] = new_edata['__dst_id'] g2 = SGraph().add_vertices(new_vdata, '__id').add_edges(new_edata, '__src_id', '__dst_id') assert_frame_equal(g.get_vertices().to_dataframe().sort('__id').reset_index(drop=True), g2.get_vertices().to_dataframe().sort('__id').reset_index(drop=True)) assert_frame_equal(g.get_edges().to_dataframe().sort(['__src_id', '__dst_id']).reset_index(drop=True), g2.get_edges().to_dataframe().sort(['__src_id', '__dst_id']).reset_index(drop=True)) # check delete a column with exception, and edges is still in a valid state self.assertRaises(KeyError, lambda: remove_edge_column(g.edges, 'badcolumn')) g.edges.head() # test slicing assert_frame_equal(g.edges[:3].to_dataframe(), g.get_edges()[:3].to_dataframe()) assert_frame_equal(g.vertices[:3].to_dataframe(), g.get_vertices()[:3].to_dataframe()) # test add row number e_expected = g.get_edges().to_dataframe() v_expected = g.get_vertices().to_dataframe() e_expected['id'] = range(len(e_expected)) v_expected['id'] = range(len(v_expected))
def test_sframe_le_append_skip_row_bug_is_fixed(self): """ This test is actually for SFrame lazy evaluation. The reason it is here is because the repro can only be done in SGraph. The bug appears when the SFrame has lazy_append and when passing through the logical filter, skip_rows is not done correctly. So the edge_sframe is in a bad state when not materialized. This unit test stays here to ensure the bug is fixed until we can find a more clean repro. """ n = 12 # smallest n to repro the le_append bug # A graph with edge i -> i + 1 g = SGraph().add_edges( SFrame({ 'src': range(n), 'dst': range(1, n + 1) }), 'src', 'dst') lazy_sf = g.get_edges() materialized_sf = g.get_edges() materialized_sf.__materialize__() assert_frame_equal( lazy_sf[lazy_sf['__dst_id'] == n].to_dataframe(), materialized_sf[materialized_sf['__dst_id'] == n].to_dataframe())
def test_empty_graph(self): g = SGraph() self.assertEqual(g.summary(), {'num_vertices': 0, 'num_edges': 0}) self.assertEqual(len(g.get_fields()), 3) self.assertTrue(g.get_vertices(format='sframe').shape, (0, 1)) self.assertTrue(g.get_edges(format='sframe').shape, (0, 2)) self.assertTrue(g.vertices.shape, (0, 1)) self.assertTrue(g.edges.shape, (0, 2)) self.assertTrue(len(g.get_vertices(format='list')) == 0) self.assertTrue(len(g.get_edges(format='list')) == 0)
def test_simple_graph(self): for input_type in [pd.DataFrame, SFrame, list]: g = SGraph() if input_type is list: vertices = [Vertex(x[1]['vid'], {'color': x[1]['color'], 'vec': x[1]['vec']}) for x in self.vertices.iterrows()] edges = [Edge(x[1]['src_id'], x[1]['dst_id'], {'weight': x[1]['weight']}) for x in self.edges.iterrows()] g = g.add_vertices(vertices) g = g.add_edges(edges) else: g = g.add_vertices(input_type(self.vertices), vid_field='vid') g = g.add_edges(input_type(self.edges), src_field='src_id', dst_field='dst_id') self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3}) self.assertItemsEqual(g.get_fields(), ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight']) self.assertItemsEqual(g.get_vertices(format='dataframe').columns.values, ['color', 'vec']) self.assertItemsEqual(g.get_edges(format='dataframe').columns.values, ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3)) self.assertTrue(g.get_vertices(format='dataframe', fields={'color': 'g'}).shape, (1, 2)) self.assertTrue(g.get_edges(format='dataframe', fields={'weight': 0.}).shape, (1, 3)) self.assertItemsEqual(g.get_vertices(format='sframe').column_names(), ['__id', 'color', 'vec']) self.assertItemsEqual(g.get_edges(format='sframe').column_names(), ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='sframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3)) self.assertTrue(g.get_vertices(format='sframe', fields={'color': 'g'}).shape, (1, 2)) self.assertTrue(g.get_edges(format='sframe', fields={'weight': 0.}).shape, (1, 3)) vertices = g.get_vertices(format='list') edges = g.get_edges(format='list') self.assertEqual(len(vertices), 4) self.assertEqual(len(edges), 3) # get edges is lazy edges = g.get_edges() self.assertFalse(edges.__is_materialized__())
def test_sframe_le_append_skip_row_bug_is_fixed(self): """ This test is actually for SFrame lazy evaluation. The reason it is here is because the repro can only be done in SGraph. The bug appears when the SFrame has lazy_append and when passing through the logical filter, skip_rows is not done correctly. So the edge_sframe is in a bad state when not materialized. This unit test stays here to ensure the bug is fixed until we can find a more clean repro. """ n = 12 # smallest n to repro the le_append bug # A graph with edge i -> i + 1 g = SGraph().add_edges(SFrame({'src': range(n), 'dst': range(1, n + 1)}), 'src', 'dst') lazy_sf = g.get_edges() materialized_sf = g.get_edges() materialized_sf.__materialize__() assert_frame_equal(lazy_sf[lazy_sf['__dst_id'] == n].to_dataframe(), materialized_sf[materialized_sf['__dst_id'] == n].to_dataframe())
def test_simple_graph(self): for input_type in [pd.DataFrame, SFrame, list]: g = SGraph() if input_type is list: vertices = [ Vertex(x[1]['vid'], { 'color': x[1]['color'], 'vec': x[1]['vec'] }) for x in self.vertices.iterrows() ] edges = [ Edge(x[1]['src_id'], x[1]['dst_id'], {'weight': x[1]['weight']}) for x in self.edges.iterrows() ] g = g.add_vertices(vertices) g = g.add_edges(edges) else: g = g.add_vertices(input_type(self.vertices), vid_field='vid') g = g.add_edges(input_type(self.edges), src_field='src_id', dst_field='dst_id') self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3}) self.assertItemsEqual( g.get_fields(), ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight']) self.assertItemsEqual( g.get_vertices(format='dataframe').columns.values, ['color', 'vec']) self.assertItemsEqual( g.get_edges(format='dataframe').columns.values, ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3)) self.assertTrue( g.get_vertices(format='dataframe', fields={ 'color': 'g' }).shape, (1, 2)) self.assertTrue( g.get_edges(format='dataframe', fields={ 'weight': 0. }).shape, (1, 3)) self.assertItemsEqual( g.get_vertices(format='sframe').column_names(), ['__id', 'color', 'vec']) self.assertItemsEqual( g.get_edges(format='sframe').column_names(), ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='sframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3)) self.assertTrue( g.get_vertices(format='sframe', fields={ 'color': 'g' }).shape, (1, 2)) self.assertTrue( g.get_edges(format='sframe', fields={ 'weight': 0. }).shape, (1, 3)) vertices = g.get_vertices(format='list') edges = g.get_edges(format='list') self.assertEqual(len(vertices), 4) self.assertEqual(len(edges), 3) # get edges is lazy edges = g.get_edges() self.assertFalse(edges.__is_materialized__())
def test_gframe(self): g = SGraph() v = g.vertices self.assertSequenceEqual(v.column_names(), ['__id']) e = g.edges self.assertSequenceEqual(e.column_names(), ['__src_id', '__dst_id']) # Test vertices and edge attributes cannot be modified def set_vertices_empty(g): g.vertices = SFrame() def set_edges_empty(g): g.edges = SFrame() def remove_vertices(g): del g.vertices def remove_edges(g): del g.edges def remove_edge_column(gf, name): del gf[name] self.assertRaises(AttributeError, lambda: remove_vertices(g)) self.assertRaises(AttributeError, lambda: remove_edges(g)) self.assertRaises(AttributeError, lambda: set_vertices_empty(g)) self.assertRaises(AttributeError, lambda: set_edges_empty(g)) # Test gframe operations has the same effect as its sframe+graph equivalent g = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') v = g.vertices v['id_col'] = v['__id'] e = g.edges e['src_id_col'] = e['__src_id'] e['dst_id_col'] = e['__dst_id'] g2 = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') new_vdata = g2.get_vertices() new_vdata['id_col'] = new_vdata['__id'] new_edata = g2.get_edges() new_edata['src_id_col'] = new_edata['__src_id'] new_edata['dst_id_col'] = new_edata['__dst_id'] g2 = SGraph().add_vertices(new_vdata, '__id').add_edges(new_edata, '__src_id', '__dst_id') assert_frame_equal( g.get_vertices().to_dataframe().sort('__id').reset_index( drop=True), g2.get_vertices().to_dataframe().sort('__id').reset_index( drop=True)) assert_frame_equal( g.get_edges().to_dataframe().sort(['__src_id', '__dst_id' ]).reset_index(drop=True), g2.get_edges().to_dataframe().sort(['__src_id', '__dst_id' ]).reset_index(drop=True)) # check delete a column with exception, and edges is still in a valid state self.assertRaises(KeyError, lambda: remove_edge_column(g.edges, 'badcolumn')) g.edges.head() # test slicing assert_frame_equal(g.edges[:3].to_dataframe(), g.get_edges()[:3].to_dataframe()) assert_frame_equal(g.vertices[:3].to_dataframe(), g.get_vertices()[:3].to_dataframe()) # test add row number e_expected = g.get_edges().to_dataframe() v_expected = g.get_vertices().to_dataframe() e_expected['id'] = range(len(e_expected)) v_expected['id'] = range(len(v_expected))