Esempio n. 1
0
    def test_gframe(self):
        g = SGraph()
        v = g.vertices
        self.assertSequenceEqual(v.column_names(), ['__id'])
        e = g.edges
        self.assertSequenceEqual(e.column_names(), ['__src_id', '__dst_id'])

        # Test vertices and edge attributes cannot be modified
        def set_vertices_empty(g):
            g.vertices = SFrame()

        def set_edges_empty(g):
            g.edges = SFrame()

        def remove_vertices(g):
            del g.vertices

        def remove_edges(g):
            del g.edges

        def remove_edge_column(gf, name):
            del gf[name]

        self.assertRaises(AttributeError, lambda: remove_vertices(g))
        self.assertRaises(AttributeError, lambda: remove_edges(g))
        self.assertRaises(AttributeError, lambda: set_vertices_empty(g))
        self.assertRaises(AttributeError, lambda: set_edges_empty(g))

        # Test gframe operations has the same effect as its sframe+graph equivalent
        g = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id')
        v = g.vertices
        v['id_col'] = v['__id']
        e = g.edges
        e['src_id_col'] = e['__src_id']
        e['dst_id_col'] = e['__dst_id']
        g2 = SGraph().add_vertices(self.vertices, 'vid').add_edges(self.edges, 'src_id', 'dst_id')
        new_vdata = g2.get_vertices()
        new_vdata['id_col'] = new_vdata['__id']
        new_edata = g2.get_edges()
        new_edata['src_id_col'] = new_edata['__src_id']
        new_edata['dst_id_col'] = new_edata['__dst_id']
        g2 = SGraph().add_vertices(new_vdata, '__id').add_edges(new_edata, '__src_id', '__dst_id')
        assert_frame_equal(g.get_vertices().to_dataframe().sort('__id').reset_index(drop=True),
                           g2.get_vertices().to_dataframe().sort('__id').reset_index(drop=True))
        assert_frame_equal(g.get_edges().to_dataframe().sort(['__src_id', '__dst_id']).reset_index(drop=True),
                           g2.get_edges().to_dataframe().sort(['__src_id', '__dst_id']).reset_index(drop=True))

        # check delete a column with exception, and edges is still in a valid state
        self.assertRaises(KeyError, lambda: remove_edge_column(g.edges, 'badcolumn'))
        g.edges.head()

        # test slicing
        assert_frame_equal(g.edges[:3].to_dataframe(), g.get_edges()[:3].to_dataframe())
        assert_frame_equal(g.vertices[:3].to_dataframe(), g.get_vertices()[:3].to_dataframe())

        # test add row number
        e_expected = g.get_edges().to_dataframe()
        v_expected = g.get_vertices().to_dataframe()
        e_expected['id'] = range(len(e_expected))
        v_expected['id'] = range(len(v_expected))
Esempio n. 2
0
    def test_sframe_le_append_skip_row_bug_is_fixed(self):
        """
        This test is actually for SFrame lazy evaluation.
        The reason it is here is because the repro can only be done in SGraph.

        The bug appears when the SFrame has lazy_append and when passing through
        the logical filter, skip_rows is not done correctly. So the edge_sframe
        is in a bad state when not materialized.

        This unit test stays here to ensure the bug is fixed until we can find
        a more clean repro.
        """
        n = 12  # smallest n to repro the le_append bug

        # A graph with edge i -> i + 1
        g = SGraph().add_edges(
            SFrame({
                'src': range(n),
                'dst': range(1, n + 1)
            }), 'src', 'dst')

        lazy_sf = g.get_edges()
        materialized_sf = g.get_edges()
        materialized_sf.__materialize__()
        assert_frame_equal(
            lazy_sf[lazy_sf['__dst_id'] == n].to_dataframe(),
            materialized_sf[materialized_sf['__dst_id'] == n].to_dataframe())
Esempio n. 3
0
 def test_empty_graph(self):
     g = SGraph()
     self.assertEqual(g.summary(), {'num_vertices': 0, 'num_edges': 0})
     self.assertEqual(len(g.get_fields()), 3)
     self.assertTrue(g.get_vertices(format='sframe').shape, (0, 1))
     self.assertTrue(g.get_edges(format='sframe').shape, (0, 2))
     self.assertTrue(g.vertices.shape, (0, 1))
     self.assertTrue(g.edges.shape, (0, 2))
     self.assertTrue(len(g.get_vertices(format='list')) == 0)
     self.assertTrue(len(g.get_edges(format='list')) == 0)
Esempio n. 4
0
 def test_empty_graph(self):
     g = SGraph()
     self.assertEqual(g.summary(), {'num_vertices': 0, 'num_edges': 0})
     self.assertEqual(len(g.get_fields()), 3)
     self.assertTrue(g.get_vertices(format='sframe').shape, (0, 1))
     self.assertTrue(g.get_edges(format='sframe').shape, (0, 2))
     self.assertTrue(g.vertices.shape, (0, 1))
     self.assertTrue(g.edges.shape, (0, 2))
     self.assertTrue(len(g.get_vertices(format='list')) == 0)
     self.assertTrue(len(g.get_edges(format='list')) == 0)
Esempio n. 5
0
    def test_simple_graph(self):
        for input_type in [pd.DataFrame, SFrame, list]:
            g = SGraph()
            if input_type is list:
                vertices = [Vertex(x[1]['vid'], {'color': x[1]['color'], 'vec': x[1]['vec']}) for x in self.vertices.iterrows()]
                edges = [Edge(x[1]['src_id'], x[1]['dst_id'], {'weight': x[1]['weight']}) for x in self.edges.iterrows()]
                g = g.add_vertices(vertices)
                g = g.add_edges(edges)
            else:
                g = g.add_vertices(input_type(self.vertices), vid_field='vid')
                g = g.add_edges(input_type(self.edges), src_field='src_id', dst_field='dst_id')
            self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3})
            self.assertItemsEqual(g.get_fields(), ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'])
            self.assertItemsEqual(g.get_vertices(format='dataframe').columns.values, ['color', 'vec'])
            self.assertItemsEqual(g.get_edges(format='dataframe').columns.values, ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3))
            self.assertTrue(g.get_vertices(format='dataframe', fields={'color': 'g'}).shape, (1, 2))
            self.assertTrue(g.get_edges(format='dataframe', fields={'weight': 0.}).shape, (1, 3))

            self.assertItemsEqual(g.get_vertices(format='sframe').column_names(), ['__id', 'color', 'vec'])
            self.assertItemsEqual(g.get_edges(format='sframe').column_names(), ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='sframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3))
            self.assertTrue(g.get_vertices(format='sframe', fields={'color': 'g'}).shape, (1, 2))
            self.assertTrue(g.get_edges(format='sframe', fields={'weight': 0.}).shape, (1, 3))

            vertices = g.get_vertices(format='list')
            edges = g.get_edges(format='list')
            self.assertEqual(len(vertices), 4)
            self.assertEqual(len(edges), 3)

            # get edges is lazy
            edges = g.get_edges()
            self.assertFalse(edges.__is_materialized__())
Esempio n. 6
0
    def test_sframe_le_append_skip_row_bug_is_fixed(self):
        """
        This test is actually for SFrame lazy evaluation.
        The reason it is here is because the repro can only be done in SGraph.

        The bug appears when the SFrame has lazy_append and when passing through
        the logical filter, skip_rows is not done correctly. So the edge_sframe
        is in a bad state when not materialized.

        This unit test stays here to ensure the bug is fixed until we can find
        a more clean repro.
        """
        n = 12  # smallest n to repro the le_append bug

        # A graph with edge i -> i + 1
        g = SGraph().add_edges(SFrame({'src': range(n), 'dst': range(1, n + 1)}), 'src', 'dst')

        lazy_sf = g.get_edges()
        materialized_sf = g.get_edges()
        materialized_sf.__materialize__()
        assert_frame_equal(lazy_sf[lazy_sf['__dst_id'] == n].to_dataframe(), materialized_sf[materialized_sf['__dst_id'] == n].to_dataframe())
Esempio n. 7
0
    def test_simple_graph(self):
        for input_type in [pd.DataFrame, SFrame, list]:
            g = SGraph()
            if input_type is list:
                vertices = [
                    Vertex(x[1]['vid'], {
                        'color': x[1]['color'],
                        'vec': x[1]['vec']
                    }) for x in self.vertices.iterrows()
                ]
                edges = [
                    Edge(x[1]['src_id'], x[1]['dst_id'],
                         {'weight': x[1]['weight']})
                    for x in self.edges.iterrows()
                ]
                g = g.add_vertices(vertices)
                g = g.add_edges(edges)
            else:
                g = g.add_vertices(input_type(self.vertices), vid_field='vid')
                g = g.add_edges(input_type(self.edges),
                                src_field='src_id',
                                dst_field='dst_id')
            self.assertEqual(g.summary(), {'num_vertices': 4, 'num_edges': 3})
            self.assertItemsEqual(
                g.get_fields(),
                ['__id', '__src_id', '__dst_id', 'color', 'vec', 'weight'])
            self.assertItemsEqual(
                g.get_vertices(format='dataframe').columns.values,
                ['color', 'vec'])
            self.assertItemsEqual(
                g.get_edges(format='dataframe').columns.values,
                ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='dataframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='dataframe').shape, (4, 3))
            self.assertTrue(
                g.get_vertices(format='dataframe', fields={
                    'color': 'g'
                }).shape, (1, 2))
            self.assertTrue(
                g.get_edges(format='dataframe', fields={
                    'weight': 0.
                }).shape, (1, 3))

            self.assertItemsEqual(
                g.get_vertices(format='sframe').column_names(),
                ['__id', 'color', 'vec'])
            self.assertItemsEqual(
                g.get_edges(format='sframe').column_names(),
                ['__src_id', '__dst_id', 'weight'])
            self.assertTrue(g.get_edges(format='sframe').shape, (3, 3))
            self.assertTrue(g.get_vertices(format='sframe').shape, (4, 3))
            self.assertTrue(
                g.get_vertices(format='sframe', fields={
                    'color': 'g'
                }).shape, (1, 2))
            self.assertTrue(
                g.get_edges(format='sframe', fields={
                    'weight': 0.
                }).shape, (1, 3))

            vertices = g.get_vertices(format='list')
            edges = g.get_edges(format='list')
            self.assertEqual(len(vertices), 4)
            self.assertEqual(len(edges), 3)

            # get edges is lazy
            edges = g.get_edges()
            self.assertFalse(edges.__is_materialized__())
Esempio n. 8
0
    def test_gframe(self):
        g = SGraph()
        v = g.vertices
        self.assertSequenceEqual(v.column_names(), ['__id'])
        e = g.edges
        self.assertSequenceEqual(e.column_names(), ['__src_id', '__dst_id'])

        # Test vertices and edge attributes cannot be modified
        def set_vertices_empty(g):
            g.vertices = SFrame()

        def set_edges_empty(g):
            g.edges = SFrame()

        def remove_vertices(g):
            del g.vertices

        def remove_edges(g):
            del g.edges

        def remove_edge_column(gf, name):
            del gf[name]

        self.assertRaises(AttributeError, lambda: remove_vertices(g))
        self.assertRaises(AttributeError, lambda: remove_edges(g))
        self.assertRaises(AttributeError, lambda: set_vertices_empty(g))
        self.assertRaises(AttributeError, lambda: set_edges_empty(g))

        # Test gframe operations has the same effect as its sframe+graph equivalent
        g = SGraph().add_vertices(self.vertices,
                                  'vid').add_edges(self.edges, 'src_id',
                                                   'dst_id')
        v = g.vertices
        v['id_col'] = v['__id']
        e = g.edges
        e['src_id_col'] = e['__src_id']
        e['dst_id_col'] = e['__dst_id']
        g2 = SGraph().add_vertices(self.vertices,
                                   'vid').add_edges(self.edges, 'src_id',
                                                    'dst_id')
        new_vdata = g2.get_vertices()
        new_vdata['id_col'] = new_vdata['__id']
        new_edata = g2.get_edges()
        new_edata['src_id_col'] = new_edata['__src_id']
        new_edata['dst_id_col'] = new_edata['__dst_id']
        g2 = SGraph().add_vertices(new_vdata,
                                   '__id').add_edges(new_edata, '__src_id',
                                                     '__dst_id')
        assert_frame_equal(
            g.get_vertices().to_dataframe().sort('__id').reset_index(
                drop=True),
            g2.get_vertices().to_dataframe().sort('__id').reset_index(
                drop=True))
        assert_frame_equal(
            g.get_edges().to_dataframe().sort(['__src_id', '__dst_id'
                                               ]).reset_index(drop=True),
            g2.get_edges().to_dataframe().sort(['__src_id', '__dst_id'
                                                ]).reset_index(drop=True))

        # check delete a column with exception, and edges is still in a valid state
        self.assertRaises(KeyError,
                          lambda: remove_edge_column(g.edges, 'badcolumn'))
        g.edges.head()

        # test slicing
        assert_frame_equal(g.edges[:3].to_dataframe(),
                           g.get_edges()[:3].to_dataframe())
        assert_frame_equal(g.vertices[:3].to_dataframe(),
                           g.get_vertices()[:3].to_dataframe())

        # test add row number
        e_expected = g.get_edges().to_dataframe()
        v_expected = g.get_vertices().to_dataframe()
        e_expected['id'] = range(len(e_expected))
        v_expected['id'] = range(len(v_expected))