def test_simple_graph(self): for input_type in [pd.DataFrame, SFrame, list]: g = SGraph() if input_type is list: vertices = [Vertex(x[1]['vid'], {'color': x[1]['color'], 'vec': x[1]['vec']}) for x in self.vertices.iterrows()] edges = [Edge(x[1]['src_id'], x[1]['dst_id'], {'weight': x[1]['weight']}) for x in self.edges.iterrows()] g = g.add_vertices(vertices) g = g.add_edges(edges) else: g = g.add_vertices(input_type(self.vertices), vid_field='vid') g = g.add_edges(input_type(self.edges), src_field='src_id', dst_field='dst_id') self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3}) self.assertItemsEqual(g.get_fields(), ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight']) self.assertItemsEqual(g.get_vertices(format='dataframe').columns.values, ['color', 'vec']) self.assertItemsEqual(g.get_edges(format='dataframe').columns.values, ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3)) self.assertTrue(g.get_vertices(format='dataframe', fields={'color': 'g'}).shape, (1, 2)) self.assertTrue(g.get_edges(format='dataframe', fields={'weight': 0.}).shape, (1, 3)) self.assertItemsEqual(g.get_vertices(format='sframe').column_names(), ['__id', 'color', 'vec']) self.assertItemsEqual(g.get_edges(format='sframe').column_names(), ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='sframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3)) self.assertTrue(g.get_vertices(format='sframe', fields={'color': 'g'}).shape, (1, 2)) self.assertTrue(g.get_edges(format='sframe', fields={'weight': 0.}).shape, (1, 3)) vertices = g.get_vertices(format='list') edges = g.get_edges(format='list') self.assertEqual(len(vertices), 4) self.assertEqual(len(edges), 3) # get edges is lazy edges = g.get_edges() self.assertFalse(edges.__is_materialized__())
def test_save_load(self): g = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') with util.TempDirectory() as f: g.save(f) g2 = load_graph(f, 'binary') self.assertEqual(g2.summary(), {'num_vertices': 4, 'num_edges': 3}) self.assertItemsEqual( g2.get_fields(), {'__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'}) with util.TempDirectory() as f: g.save(f, format='csv') vertices = SFrame.read_csv(f + "/vertices.csv") edges = SFrame.read_csv(f + "/edges.csv") g2 = SGraph().add_edges(edges, '__src_id', '__dst_id').add_vertices(vertices, '__id') self.assertEqual(g2.summary(), {'num_vertices': 4, 'num_edges': 3}) self.assertItemsEqual( g2.get_fields(), {'__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'}) with tempfile.NamedTemporaryFile(suffix='.json') as f: g.save(f.name) with open(f.name, 'r') as f2: data = f2.read() g2 = json.loads(data) self.assertTrue("vertices" in g2) self.assertTrue("edges" in g2)
def test_empty_graph(self): g = SGraph() self.assertEqual(g.summary(), {'num_vertices': 0, 'num_edges': 0}) self.assertEqual(len(g.get_fields()), 3) self.assertTrue(g.get_vertices(format='sframe').shape, (0, 1)) self.assertTrue(g.get_edges(format='sframe').shape, (0, 2)) self.assertTrue(g.vertices.shape, (0, 1)) self.assertTrue(g.edges.shape, (0, 2)) self.assertTrue(len(g.get_vertices(format='list')) == 0) self.assertTrue(len(g.get_edges(format='list')) == 0)
def test_empty_graph(self): g = SGraph() self.assertEqual(g.summary(), {'num_vertices': 0, 'num_edges': 0}) self.assertEqual(len(g.get_fields()), 3) self.assertTrue(g.get_vertices(format='sframe').shape, (0, 1)) self.assertTrue(g.get_edges(format='sframe').shape, (0, 2)) self.assertTrue(g.vertices.shape, (0, 1)) self.assertTrue(g.edges.shape, (0, 2)) self.assertTrue(len(g.get_vertices(format='list')) == 0) self.assertTrue(len(g.get_edges(format='list')) == 0)
def test_select_query_with_same_vertex_edge_field(self): vertices = SFrame({'__id': range(10)}) edges = SFrame({'__src_id': range(10), '__dst_id': range(1, 11)}) g = SGraph(vertices, edges) g.vertices['weight'] = 0 g.vertices['v'] = 0 g.edges['weight'] = 0 g.edges['e'] = 0 self.assertItemsEqual(g.get_fields(), ['v', 'e', 'weight', 'weight', '__id', '__src_id', '__dst_id']) g2 = g.select_fields('weight') self.assertItemsEqual(g2.get_fields(), ['weight', 'weight', '__id', '__src_id', '__dst_id'])
def test_select_query(self): g = SGraph() g = g.add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') g2 = g.select_fields(["color", "weight"]) self.assertSequenceEqual((g2.get_fields()), ['__id', 'color', '__src_id', '__dst_id', 'weight']) g2 = g.select_fields(["color"]) self.assertSequenceEqual((g2.get_fields()), ['__id', 'color', '__src_id', '__dst_id']) del g.edges['weight'] del g.vertices['vec'] g.vertices['color2'] = g.vertices['color'] self.assertSequenceEqual((g.get_fields()), ['__id', 'color', 'color2', '__src_id', '__dst_id']) g2 = g.select_fields([]) self.assertSequenceEqual((g2.get_fields()), ['__id', '__src_id', '__dst_id'])
def test_select_query_with_same_vertex_edge_field(self): vertices = SFrame({'__id': range(10)}) edges = SFrame({'__src_id': range(10), '__dst_id': range(1, 11)}) g = SGraph(vertices, edges) g.vertices['weight'] = 0 g.vertices['v'] = 0 g.edges['weight'] = 0 g.edges['e'] = 0 self.assertItemsEqual( g.get_fields(), ['v', 'e', 'weight', 'weight', '__id', '__src_id', '__dst_id']) g2 = g.select_fields('weight') self.assertItemsEqual( g2.get_fields(), ['weight', 'weight', '__id', '__src_id', '__dst_id'])
def test_robust_parse(self): df = pd.DataFrame({'int': [1, 2, 3], 'float': [1., 2., 3.], 'str': ['one', 'two', 'three'], 'nan': [np.nan, np.nan, np.nan], 'sparse_int': [1, 2, np.nan], 'sparse_float': [np.nan, 2., 3.], 'sparse_str': [None, 'two', None] }) g = SGraph().add_vertices(df) self.assertItemsEqual(g.get_fields(), df.columns.tolist() + ['__id', '__src_id', '__dst_id']) df2 = g.get_vertices(format='dataframe') sf = g.get_vertices(format='sframe') for col in df.columns: # potential bug: df2 is missing the 'nan' column. if (col != 'nan'): self.assertItemsEqual(sorted(list(df2[col].dropna())), sorted(list(df[col].dropna()))) self.assertItemsEqual(sorted(list(sf[col].dropna())), sorted(list(df[col].dropna())))
def test_select_query(self): g = SGraph() g = g.add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') g2 = g.select_fields(["color", "weight"]) self.assertSequenceEqual( (g2.get_fields()), ['__id', 'color', '__src_id', '__dst_id', 'weight']) g2 = g.select_fields(["color"]) self.assertSequenceEqual((g2.get_fields()), ['__id', 'color', '__src_id', '__dst_id']) del g.edges['weight'] del g.vertices['vec'] g.vertices['color2'] = g.vertices['color'] self.assertSequenceEqual( (g.get_fields()), ['__id', 'color', 'color2', '__src_id', '__dst_id']) g2 = g.select_fields([]) self.assertSequenceEqual((g2.get_fields()), ['__id', '__src_id', '__dst_id'])
def test_save_load(self): g = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id') with util.TempDirectory() as f: g.save(f) g2 = load_graph(f, 'binary') self.assertEqual(g2.summary(), {'num_vertices': 4, 'num_edges': 3}) self.assertItemsEqual(g2.get_fields(), {'__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'}) with util.TempDirectory() as f: g.save(f, format='csv') vertices = SFrame.read_csv(f + "/vertices.csv") edges = SFrame.read_csv(f + "/edges.csv") g2 = SGraph().add_edges(edges, '__src_id', '__dst_id').add_vertices(vertices, '__id') self.assertEqual(g2.summary(), {'num_vertices': 4, 'num_edges': 3}) self.assertItemsEqual(g2.get_fields(), {'__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'}) with tempfile.NamedTemporaryFile(suffix='.json') as f: g.save(f.name) with open(f.name, 'r') as f2: data = f2.read() g2 = json.loads(data) self.assertTrue("vertices" in g2) self.assertTrue("edges" in g2)
def test_robust_parse(self): df = pd.DataFrame({ 'int': [1, 2, 3], 'float': [1., 2., 3.], 'str': ['one', 'two', 'three'], 'nan': [np.nan, np.nan, np.nan], 'sparse_int': [1, 2, np.nan], 'sparse_float': [np.nan, 2., 3.], 'sparse_str': [None, 'two', None] }) g = SGraph().add_vertices(df) self.assertItemsEqual( g.get_fields(), df.columns.tolist() + ['__id', '__src_id', '__dst_id']) df2 = g.get_vertices(format='dataframe') sf = g.get_vertices(format='sframe') for col in df.columns: # potential bug: df2 is missing the 'nan' column. if (col != 'nan'): self.assertItemsEqual(sorted(list(df2[col].dropna())), sorted(list(df[col].dropna()))) self.assertItemsEqual(sorted(list(sf[col].dropna())), sorted(list(df[col].dropna())))
def test_simple_graph(self): for input_type in [pd.DataFrame, SFrame, list]: g = SGraph() if input_type is list: vertices = [ Vertex(x[1]['vid'], { 'color': x[1]['color'], 'vec': x[1]['vec'] }) for x in self.vertices.iterrows() ] edges = [ Edge(x[1]['src_id'], x[1]['dst_id'], {'weight': x[1]['weight']}) for x in self.edges.iterrows() ] g = g.add_vertices(vertices) g = g.add_edges(edges) else: g = g.add_vertices(input_type(self.vertices), vid_field='vid') g = g.add_edges(input_type(self.edges), src_field='src_id', dst_field='dst_id') self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3}) self.assertItemsEqual( g.get_fields(), ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight']) self.assertItemsEqual( g.get_vertices(format='dataframe').columns.values, ['color', 'vec']) self.assertItemsEqual( g.get_edges(format='dataframe').columns.values, ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3)) self.assertTrue( g.get_vertices(format='dataframe', fields={ 'color': 'g' }).shape, (1, 2)) self.assertTrue( g.get_edges(format='dataframe', fields={ 'weight': 0. }).shape, (1, 3)) self.assertItemsEqual( g.get_vertices(format='sframe').column_names(), ['__id', 'color', 'vec']) self.assertItemsEqual( g.get_edges(format='sframe').column_names(), ['__src_id', '__dst_id', 'weight']) self.assertTrue(g.get_edges(format='sframe').shape, (3, 3)) self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3)) self.assertTrue( g.get_vertices(format='sframe', fields={ 'color': 'g' }).shape, (1, 2)) self.assertTrue( g.get_edges(format='sframe', fields={ 'weight': 0. }).shape, (1, 3)) vertices = g.get_vertices(format='list') edges = g.get_edges(format='list') self.assertEqual(len(vertices), 4) self.assertEqual(len(edges), 3) # get edges is lazy edges = g.get_edges() self.assertFalse(edges.__is_materialized__())