Esempio n. 1
0
    def test_gframe(self):
        g = SGraph()
        v = g.vertices
        self.assertSequenceEqual(v.column_names(), ['__id'])
        e = g.edges
        self.assertSequenceEqual(e.column_names(), ['__src_id', '__dst_id'])

        # Test vertices and edge attributes cannot be modified
        def set_vertices_empty(g):
            g.vertices = SFrame()

        def set_edges_empty(g):
            g.edges = SFrame()

        def remove_vertices(g):
            del g.vertices

        def remove_edges(g):
            del g.edges

        def remove_edge_column(gf, name):
            del gf[name]

        self.assertRaises(AttributeError, lambda: remove_vertices(g))
        self.assertRaises(AttributeError, lambda: remove_edges(g))
        self.assertRaises(AttributeError, lambda: set_vertices_empty(g))
        self.assertRaises(AttributeError, lambda: set_edges_empty(g))

        # Test gframe operations has the same effect as its sframe+graph equivalent
        g = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id')
        v = g.vertices
        v['id_col'] = v['__id']
        e = g.edges
        e['src_id_col'] = e['__src_id']
        e['dst_id_col'] = e['__dst_id']
        g2 = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id')
        new_vdata = g2.get_vertices()
        new_vdata['id_col'] = new_vdata['__id']
        new_edata = g2.get_edges()
        new_edata['src_id_col'] = new_edata['__src_id']
        new_edata['dst_id_col'] = new_edata['__dst_id']
        g2 = SGraph().add_vertices(new_vdata, '__id').add_edges(new_edata, '__src_id', '__dst_id')
        assert_frame_equal(g.get_vertices().to_dataframe().sort('__id').reset_index(drop=True),
                           g2.get_vertices().to_dataframe().sort('__id').reset_index(drop=True))
        assert_frame_equal(g.get_edges().to_dataframe().sort(['__src_id', '__dst_id']).reset_index(drop=True),
                           g2.get_edges().to_dataframe().sort(['__src_id', '__dst_id']).reset_index(drop=True))

        # check delete a column with exception, and edges is still in a valid state
        self.assertRaises(KeyError, lambda: remove_edge_column(g.edges, 'badcolumn'))
        g.edges.head()

        # test slicing
        assert_frame_equal(g.edges[:3].to_dataframe(), g.get_edges()[:3].to_dataframe())
        assert_frame_equal(g.vertices[:3].to_dataframe(), g.get_vertices()[:3].to_dataframe())

        # test add row number
        e_expected = g.get_edges().to_dataframe()
        v_expected = g.get_vertices().to_dataframe()
        e_expected['id'] = range(len(e_expected))
        v_expected['id'] = range(len(v_expected))
Esempio n. 2
0
 def test_empty_graph(self):
     g = SGraph()
     self.assertEqual(g.summary(), {'num_vertices': 0, 'num_edges': 0})
     self.assertEqual(len(g.get_fields()), 3)
     self.assertTrue(g.get_vertices(format='sframe').shape, (0, 1))
     self.assertTrue(g.get_edges(format='sframe').shape, (0, 2))
     self.assertTrue(g.vertices.shape, (0, 1))
     self.assertTrue(g.edges.shape, (0, 2))
     self.assertTrue(len(g.get_vertices(format='list')) == 0)
     self.assertTrue(len(g.get_edges(format='list')) == 0)
Esempio n. 3
0
 def test_empty_graph(self):
     g = SGraph()
     self.assertEqual(g.summary(), {'num_vertices': 0, 'num_edges': 0})
     self.assertEqual(len(g.get_fields()), 3)
     self.assertTrue(g.get_vertices(format='sframe').shape, (0, 1))
     self.assertTrue(g.get_edges(format='sframe').shape, (0, 2))
     self.assertTrue(g.vertices.shape, (0, 1))
     self.assertTrue(g.edges.shape, (0, 2))
     self.assertTrue(len(g.get_vertices(format='list')) == 0)
     self.assertTrue(len(g.get_edges(format='list')) == 0)
Esempio n. 4
0
    def test_simple_graph(self):
        for input_type in [pd.DataFrame, SFrame, list]:
            g = SGraph()
            if input_type is list:
                vertices = [Vertex(x[1]['vid'], {'color': x[1]['color'], 'vec': x[1]['vec']}) for x in self.vertices.iterrows()]
                edges = [Edge(x[1]['src_id'], x[1]['dst_id'], {'weight': x[1]['weight']}) for x in self.edges.iterrows()]
                g = g.add_vertices(vertices)
                g = g.add_edges(edges)
            else:
                g = g.add_vertices(input_type(self.vertices), vid_field='vid')
                g = g.add_edges(input_type(self.edges), src_field='src_id', dst_field='dst_id')
            self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3})
            self.assertItemsEqual(g.get_fields(), ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'])
            self.assertItemsEqual(g.get_vertices(format='dataframe').columns.values, ['color', 'vec'])
            self.assertItemsEqual(g.get_edges(format='dataframe').columns.values, ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3))
            self.assertTrue(g.get_vertices(format='dataframe', fields={'color': 'g'}).shape, (1, 2))
            self.assertTrue(g.get_edges(format='dataframe', fields={'weight': 0.}).shape, (1, 3))

            self.assertItemsEqual(g.get_vertices(format='sframe').column_names(), ['__id', 'color', 'vec'])
            self.assertItemsEqual(g.get_edges(format='sframe').column_names(), ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='sframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3))
            self.assertTrue(g.get_vertices(format='sframe', fields={'color': 'g'}).shape, (1, 2))
            self.assertTrue(g.get_edges(format='sframe', fields={'weight': 0.}).shape, (1, 3))

            vertices = g.get_vertices(format='list')
            edges = g.get_edges(format='list')
            self.assertEqual(len(vertices), 4)
            self.assertEqual(len(edges), 3)

            # get edges is lazy
            edges = g.get_edges()
            self.assertFalse(edges.__is_materialized__())
Esempio n. 5
0
    def test_robust_parse(self):
        df = pd.DataFrame({'int': [1, 2, 3],
                           'float': [1., 2., 3.],
                           'str': ['one', 'two', 'three'],
                           'nan': [np.nan, np.nan, np.nan],
                           'sparse_int': [1, 2, np.nan],
                           'sparse_float': [np.nan, 2., 3.],
                           'sparse_str': [None, 'two', None]
                           })
        g = SGraph().add_vertices(df)
        self.assertItemsEqual(g.get_fields(), df.columns.tolist() + ['__id', '__src_id', '__dst_id'])

        df2 = g.get_vertices(format='dataframe')
        sf = g.get_vertices(format='sframe')
        for col in df.columns:
            # potential bug: df2 is missing the 'nan' column.
            if (col != 'nan'):
                self.assertItemsEqual(sorted(list(df2[col].dropna())), sorted(list(df[col].dropna())))
                self.assertItemsEqual(sorted(list(sf[col].dropna())), sorted(list(df[col].dropna())))
Esempio n. 6
0
    def test_robust_parse(self):
        df = pd.DataFrame({
            'int': [1, 2, 3],
            'float': [1., 2., 3.],
            'str': ['one', 'two', 'three'],
            'nan': [np.nan, np.nan, np.nan],
            'sparse_int': [1, 2, np.nan],
            'sparse_float': [np.nan, 2., 3.],
            'sparse_str': [None, 'two', None]
        })
        g = SGraph().add_vertices(df)
        self.assertItemsEqual(
            g.get_fields(),
            df.columns.tolist() + ['__id', '__src_id', '__dst_id'])

        df2 = g.get_vertices(format='dataframe')
        sf = g.get_vertices(format='sframe')
        for col in df.columns:
            # potential bug: df2 is missing the 'nan' column.
            if (col != 'nan'):
                self.assertItemsEqual(sorted(list(df2[col].dropna())),
                                      sorted(list(df[col].dropna())))
                self.assertItemsEqual(sorted(list(sf[col].dropna())),
                                      sorted(list(df[col].dropna())))
Esempio n. 7
0
    def test_simple_graph(self):
        for input_type in [pd.DataFrame, SFrame, list]:
            g = SGraph()
            if input_type is list:
                vertices = [
                    Vertex(x[1]['vid'], {
                        'color': x[1]['color'],
                        'vec': x[1]['vec']
                    }) for x in self.vertices.iterrows()
                ]
                edges = [
                    Edge(x[1]['src_id'], x[1]['dst_id'],
                         {'weight': x[1]['weight']})
                    for x in self.edges.iterrows()
                ]
                g = g.add_vertices(vertices)
                g = g.add_edges(edges)
            else:
                g = g.add_vertices(input_type(self.vertices), vid_field='vid')
                g = g.add_edges(input_type(self.edges),
                                src_field='src_id',
                                dst_field='dst_id')
            self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3})
            self.assertItemsEqual(
                g.get_fields(),
                ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'])
            self.assertItemsEqual(
                g.get_vertices(format='dataframe').columns.values,
                ['color', 'vec'])
            self.assertItemsEqual(
                g.get_edges(format='dataframe').columns.values,
                ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3))
            self.assertTrue(
                g.get_vertices(format='dataframe', fields={
                    'color': 'g'
                }).shape, (1, 2))
            self.assertTrue(
                g.get_edges(format='dataframe', fields={
                    'weight': 0.
                }).shape, (1, 3))

            self.assertItemsEqual(
                g.get_vertices(format='sframe').column_names(),
                ['__id', 'color', 'vec'])
            self.assertItemsEqual(
                g.get_edges(format='sframe').column_names(),
                ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='sframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3))
            self.assertTrue(
                g.get_vertices(format='sframe', fields={
                    'color': 'g'
                }).shape, (1, 2))
            self.assertTrue(
                g.get_edges(format='sframe', fields={
                    'weight': 0.
                }).shape, (1, 3))

            vertices = g.get_vertices(format='list')
            edges = g.get_edges(format='list')
            self.assertEqual(len(vertices), 4)
            self.assertEqual(len(edges), 3)

            # get edges is lazy
            edges = g.get_edges()
            self.assertFalse(edges.__is_materialized__())
Esempio n. 8
0
    def test_gframe(self):
        g = SGraph()
        v = g.vertices
        self.assertSequenceEqual(v.column_names(), ['__id'])
        e = g.edges
        self.assertSequenceEqual(e.column_names(), ['__src_id', '__dst_id'])

        # Test vertices and edge attributes cannot be modified
        def set_vertices_empty(g):
            g.vertices = SFrame()

        def set_edges_empty(g):
            g.edges = SFrame()

        def remove_vertices(g):
            del g.vertices

        def remove_edges(g):
            del g.edges

        def remove_edge_column(gf, name):
            del gf[name]

        self.assertRaises(AttributeError, lambda: remove_vertices(g))
        self.assertRaises(AttributeError, lambda: remove_edges(g))
        self.assertRaises(AttributeError, lambda: set_vertices_empty(g))
        self.assertRaises(AttributeError, lambda: set_edges_empty(g))

        # Test gframe operations has the same effect as its sframe+graph equivalent
        g = SGraph().add_vertices(self.vertices,
                                  'vid').add_edges(self.edges, 'src_id',
                                                   'dst_id')
        v = g.vertices
        v['id_col'] = v['__id']
        e = g.edges
        e['src_id_col'] = e['__src_id']
        e['dst_id_col'] = e['__dst_id']
        g2 = SGraph().add_vertices(self.vertices,
                                   'vid').add_edges(self.edges, 'src_id',
                                                    'dst_id')
        new_vdata = g2.get_vertices()
        new_vdata['id_col'] = new_vdata['__id']
        new_edata = g2.get_edges()
        new_edata['src_id_col'] = new_edata['__src_id']
        new_edata['dst_id_col'] = new_edata['__dst_id']
        g2 = SGraph().add_vertices(new_vdata,
                                   '__id').add_edges(new_edata, '__src_id',
                                                     '__dst_id')
        assert_frame_equal(
            g.get_vertices().to_dataframe().sort('__id').reset_index(
                drop=True),
            g2.get_vertices().to_dataframe().sort('__id').reset_index(
                drop=True))
        assert_frame_equal(
            g.get_edges().to_dataframe().sort(['__src_id', '__dst_id'
                                               ]).reset_index(drop=True),
            g2.get_edges().to_dataframe().sort(['__src_id', '__dst_id'
                                                ]).reset_index(drop=True))

        # check delete a column with exception, and edges is still in a valid state
        self.assertRaises(KeyError,
                          lambda: remove_edge_column(g.edges, 'badcolumn'))
        g.edges.head()

        # test slicing
        assert_frame_equal(g.edges[:3].to_dataframe(),
                           g.get_edges()[:3].to_dataframe())
        assert_frame_equal(g.vertices[:3].to_dataframe(),
                           g.get_vertices()[:3].to_dataframe())

        # test add row number
        e_expected = g.get_edges().to_dataframe()
        v_expected = g.get_vertices().to_dataframe()
        e_expected['id'] = range(len(e_expected))
        v_expected['id'] = range(len(v_expected))