Exemple #1
0
    def test_simple_graph(self):
        for input_type in [pd.DataFrame, SFrame, list]:
            g = SGraph()
            if input_type is list:
                vertices = [Vertex(x[1]['vid'], {'color': x[1]['color'], 'vec': x[1]['vec']}) for x in self.vertices.iterrows()]
                edges = [Edge(x[1]['src_id'], x[1]['dst_id'], {'weight': x[1]['weight']}) for x in self.edges.iterrows()]
                g = g.add_vertices(vertices)
                g = g.add_edges(edges)
            else:
                g = g.add_vertices(input_type(self.vertices), vid_field='vid')
                g = g.add_edges(input_type(self.edges), src_field='src_id', dst_field='dst_id')
            self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3})
            self.assertItemsEqual(g.get_fields(), ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'])
            self.assertItemsEqual(g.get_vertices(format='dataframe').columns.values, ['color', 'vec'])
            self.assertItemsEqual(g.get_edges(format='dataframe').columns.values, ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3))
            self.assertTrue(g.get_vertices(format='dataframe', fields={'color': 'g'}).shape, (1, 2))
            self.assertTrue(g.get_edges(format='dataframe', fields={'weight': 0.}).shape, (1, 3))

            self.assertItemsEqual(g.get_vertices(format='sframe').column_names(), ['__id', 'color', 'vec'])
            self.assertItemsEqual(g.get_edges(format='sframe').column_names(), ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='sframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3))
            self.assertTrue(g.get_vertices(format='sframe', fields={'color': 'g'}).shape, (1, 2))
            self.assertTrue(g.get_edges(format='sframe', fields={'weight': 0.}).shape, (1, 3))

            vertices = g.get_vertices(format='list')
            edges = g.get_edges(format='list')
            self.assertEqual(len(vertices), 4)
            self.assertEqual(len(edges), 3)

            # get edges is lazy
            edges = g.get_edges()
            self.assertFalse(edges.__is_materialized__())
Exemple #2
0
    def test_save_load(self):
        g = SGraph().add_vertices(self.vertices,
                                  'vid').add_edges(self.edges, 'src_id',
                                                   'dst_id')
        with util.TempDirectory() as f:
            g.save(f)
            g2 = load_graph(f, 'binary')
            self.assertEqual(g2.summary(), {'num_vertices': 4, 'num_edges': 3})
            self.assertItemsEqual(
                g2.get_fields(),
                {'__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'})

        with util.TempDirectory() as f:
            g.save(f, format='csv')
            vertices = SFrame.read_csv(f + "/vertices.csv")
            edges = SFrame.read_csv(f + "/edges.csv")
            g2 = SGraph().add_edges(edges, '__src_id',
                                    '__dst_id').add_vertices(vertices, '__id')
            self.assertEqual(g2.summary(), {'num_vertices': 4, 'num_edges': 3})
            self.assertItemsEqual(
                g2.get_fields(),
                {'__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'})

        with tempfile.NamedTemporaryFile(suffix='.json') as f:
            g.save(f.name)
            with open(f.name, 'r') as f2:
                data = f2.read()
                g2 = json.loads(data)
            self.assertTrue("vertices" in g2)
            self.assertTrue("edges" in g2)
Exemple #3
0
 def test_empty_graph(self):
     g = SGraph()
     self.assertEqual(g.summary(), {'num_vertices': 0, 'num_edges': 0})
     self.assertEqual(len(g.get_fields()), 3)
     self.assertTrue(g.get_vertices(format='sframe').shape, (0, 1))
     self.assertTrue(g.get_edges(format='sframe').shape, (0, 2))
     self.assertTrue(g.vertices.shape, (0, 1))
     self.assertTrue(g.edges.shape, (0, 2))
     self.assertTrue(len(g.get_vertices(format='list')) == 0)
     self.assertTrue(len(g.get_edges(format='list')) == 0)
Exemple #4
0
 def test_empty_graph(self):
     g = SGraph()
     self.assertEqual(g.summary(), {'num_vertices': 0, 'num_edges': 0})
     self.assertEqual(len(g.get_fields()), 3)
     self.assertTrue(g.get_vertices(format='sframe').shape, (0, 1))
     self.assertTrue(g.get_edges(format='sframe').shape, (0, 2))
     self.assertTrue(g.vertices.shape, (0, 1))
     self.assertTrue(g.edges.shape, (0, 2))
     self.assertTrue(len(g.get_vertices(format='list')) == 0)
     self.assertTrue(len(g.get_edges(format='list')) == 0)
Exemple #5
0
 def test_select_query_with_same_vertex_edge_field(self):
     vertices = SFrame({'__id': range(10)})
     edges = SFrame({'__src_id': range(10), '__dst_id': range(1, 11)})
     g = SGraph(vertices, edges)
     g.vertices['weight'] = 0
     g.vertices['v'] = 0
     g.edges['weight'] = 0
     g.edges['e'] = 0
     self.assertItemsEqual(g.get_fields(), ['v', 'e', 'weight', 'weight', '__id', '__src_id', '__dst_id'])
     g2 = g.select_fields('weight')
     self.assertItemsEqual(g2.get_fields(), ['weight', 'weight', '__id', '__src_id', '__dst_id'])
Exemple #6
0
 def test_select_query(self):
     g = SGraph()
     g = g.add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id')
     g2 = g.select_fields(["color", "weight"])
     self.assertSequenceEqual((g2.get_fields()), ['__id', 'color', '__src_id', '__dst_id', 'weight'])
     g2 = g.select_fields(["color"])
     self.assertSequenceEqual((g2.get_fields()), ['__id', 'color', '__src_id', '__dst_id'])
     del g.edges['weight']
     del g.vertices['vec']
     g.vertices['color2'] = g.vertices['color']
     self.assertSequenceEqual((g.get_fields()), ['__id', 'color', 'color2', '__src_id', '__dst_id'])
     g2 = g.select_fields([])
     self.assertSequenceEqual((g2.get_fields()), ['__id', '__src_id', '__dst_id'])
Exemple #7
0
 def test_select_query_with_same_vertex_edge_field(self):
     vertices = SFrame({'__id': range(10)})
     edges = SFrame({'__src_id': range(10), '__dst_id': range(1, 11)})
     g = SGraph(vertices, edges)
     g.vertices['weight'] = 0
     g.vertices['v'] = 0
     g.edges['weight'] = 0
     g.edges['e'] = 0
     self.assertItemsEqual(
         g.get_fields(),
         ['v', 'e', 'weight', 'weight', '__id', '__src_id', '__dst_id'])
     g2 = g.select_fields('weight')
     self.assertItemsEqual(
         g2.get_fields(),
         ['weight', 'weight', '__id', '__src_id', '__dst_id'])
Exemple #8
0
    def test_robust_parse(self):
        df = pd.DataFrame({'int': [1, 2, 3],
                           'float': [1., 2., 3.],
                           'str': ['one', 'two', 'three'],
                           'nan': [np.nan, np.nan, np.nan],
                           'sparse_int': [1, 2, np.nan],
                           'sparse_float': [np.nan, 2., 3.],
                           'sparse_str': [None, 'two', None]
                           })
        g = SGraph().add_vertices(df)
        self.assertItemsEqual(g.get_fields(), df.columns.tolist() + ['__id', '__src_id', '__dst_id'])

        df2 = g.get_vertices(format='dataframe')
        sf = g.get_vertices(format='sframe')
        for col in df.columns:
            # potential bug: df2 is missing the 'nan' column.
            if (col != 'nan'):
                self.assertItemsEqual(sorted(list(df2[col].dropna())), sorted(list(df[col].dropna())))
                self.assertItemsEqual(sorted(list(sf[col].dropna())), sorted(list(df[col].dropna())))
Exemple #9
0
 def test_select_query(self):
     g = SGraph()
     g = g.add_vertices(self.vertices,
                        'vid').add_edges(self.edges, 'src_id', 'dst_id')
     g2 = g.select_fields(["color", "weight"])
     self.assertSequenceEqual(
         (g2.get_fields()),
         ['__id', 'color', '__src_id', '__dst_id', 'weight'])
     g2 = g.select_fields(["color"])
     self.assertSequenceEqual((g2.get_fields()),
                              ['__id', 'color', '__src_id', '__dst_id'])
     del g.edges['weight']
     del g.vertices['vec']
     g.vertices['color2'] = g.vertices['color']
     self.assertSequenceEqual(
         (g.get_fields()),
         ['__id', 'color', 'color2', '__src_id', '__dst_id'])
     g2 = g.select_fields([])
     self.assertSequenceEqual((g2.get_fields()),
                              ['__id', '__src_id', '__dst_id'])
Exemple #10
0
    def test_save_load(self):
        g = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id')
        with util.TempDirectory() as f:
            g.save(f)
            g2 = load_graph(f, 'binary')
            self.assertEqual(g2.summary(), {'num_vertices': 4, 'num_edges': 3})
            self.assertItemsEqual(g2.get_fields(), {'__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'})

        with util.TempDirectory() as f:
            g.save(f, format='csv')
            vertices = SFrame.read_csv(f + "/vertices.csv")
            edges = SFrame.read_csv(f + "/edges.csv")
            g2 = SGraph().add_edges(edges, '__src_id', '__dst_id').add_vertices(vertices, '__id')
            self.assertEqual(g2.summary(), {'num_vertices': 4, 'num_edges': 3})
            self.assertItemsEqual(g2.get_fields(), {'__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'})

        with tempfile.NamedTemporaryFile(suffix='.json') as f:
            g.save(f.name)
            with open(f.name, 'r') as f2:
                data = f2.read()
                g2 = json.loads(data)
            self.assertTrue("vertices" in g2)
            self.assertTrue("edges" in g2)
Exemple #11
0
    def test_robust_parse(self):
        df = pd.DataFrame({
            'int': [1, 2, 3],
            'float': [1., 2., 3.],
            'str': ['one', 'two', 'three'],
            'nan': [np.nan, np.nan, np.nan],
            'sparse_int': [1, 2, np.nan],
            'sparse_float': [np.nan, 2., 3.],
            'sparse_str': [None, 'two', None]
        })
        g = SGraph().add_vertices(df)
        self.assertItemsEqual(
            g.get_fields(),
            df.columns.tolist() + ['__id', '__src_id', '__dst_id'])

        df2 = g.get_vertices(format='dataframe')
        sf = g.get_vertices(format='sframe')
        for col in df.columns:
            # potential bug: df2 is missing the 'nan' column.
            if (col != 'nan'):
                self.assertItemsEqual(sorted(list(df2[col].dropna())),
                                      sorted(list(df[col].dropna())))
                self.assertItemsEqual(sorted(list(sf[col].dropna())),
                                      sorted(list(df[col].dropna())))
Exemple #12
0
    def test_simple_graph(self):
        for input_type in [pd.DataFrame, SFrame, list]:
            g = SGraph()
            if input_type is list:
                vertices = [
                    Vertex(x[1]['vid'], {
                        'color': x[1]['color'],
                        'vec': x[1]['vec']
                    }) for x in self.vertices.iterrows()
                ]
                edges = [
                    Edge(x[1]['src_id'], x[1]['dst_id'],
                         {'weight': x[1]['weight']})
                    for x in self.edges.iterrows()
                ]
                g = g.add_vertices(vertices)
                g = g.add_edges(edges)
            else:
                g = g.add_vertices(input_type(self.vertices), vid_field='vid')
                g = g.add_edges(input_type(self.edges),
                                src_field='src_id',
                                dst_field='dst_id')
            self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3})
            self.assertItemsEqual(
                g.get_fields(),
                ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'])
            self.assertItemsEqual(
                g.get_vertices(format='dataframe').columns.values,
                ['color', 'vec'])
            self.assertItemsEqual(
                g.get_edges(format='dataframe').columns.values,
                ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3))
            self.assertTrue(
                g.get_vertices(format='dataframe', fields={
                    'color': 'g'
                }).shape, (1, 2))
            self.assertTrue(
                g.get_edges(format='dataframe', fields={
                    'weight': 0.
                }).shape, (1, 3))

            self.assertItemsEqual(
                g.get_vertices(format='sframe').column_names(),
                ['__id', 'color', 'vec'])
            self.assertItemsEqual(
                g.get_edges(format='sframe').column_names(),
                ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='sframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3))
            self.assertTrue(
                g.get_vertices(format='sframe', fields={
                    'color': 'g'
                }).shape, (1, 2))
            self.assertTrue(
                g.get_edges(format='sframe', fields={
                    'weight': 0.
                }).shape, (1, 3))

            vertices = g.get_vertices(format='list')
            edges = g.get_edges(format='list')
            self.assertEqual(len(vertices), 4)
            self.assertEqual(len(edges), 3)

            # get edges is lazy
            edges = g.get_edges()
            self.assertFalse(edges.__is_materialized__())