Python Graph Examples, trustedanalytics.Graph Python Examples

Example #1

0

Show file

    def test_duplicate_graph_rename(self):
        graph_name1 = str(uuid.uuid1()).replace('-', '_')
        graph_name2 = str(uuid.uuid1()).replace('-', '_')
        model_name = str(uuid.uuid1()).replace('-', '_')
        frame_name = str(uuid.uuid1()).replace('-', '_')

        # Create graphs, model, and frame
        graph1 = ta.Graph(name=graph_name1)
        graph2 = ta.Graph(name=graph_name2)
        ta.KMeansModel(name=model_name)
        ta.Frame(name=frame_name)

        # After creating graphs, check that graphs with each name exists on the server
        self.assertTrue(graph_name1 in ta.get_graph_names(),
                        graph_name1 + " should exist in list of graphs")
        self.assertTrue(graph_name2 in ta.get_graph_names(),
                        graph_name2 + " should exist in list of graphs")

        # Try to rename graph2 to have the same name as graph1 (we expect an exception here)
        with self.assertRaises(Exception):
            graph2.name = graph_name1

        # Both graph names should still exist on the server
        self.assertTrue(graph_name1 in ta.get_graph_names(),
                        graph_name1 + " should still exist in list of graphs")
        self.assertTrue(graph_name2 in ta.get_graph_names(),
                        graph_name2 + " should still exist in list of graphs")

        # Try to rename graph1 to have the same name as the frame (we expect an exception here)
        with self.assertRaises(Exception):
            graph1.name = frame_name

        # graph1 and the frame name should still exist on the server
        self.assertTrue(
            graph_name1 in ta.get_graph_names(),
            graph_name1 + " should still exist in the list of graphs")
        self.assertTrue(
            frame_name in ta.get_frame_names(),
            frame_name + " should still exist in the list of frames")

        # Try to rename graph1 to have the same name as the model (we expect an exception here)
        with self.assertRaises(Exception):
            graph1.name = model_name

        # graph1 and the frame name should still exist on the server
        self.assertTrue(
            graph_name1 in ta.get_graph_names(),
            graph_name1 + " should still exist in the list of graphs")
        self.assertTrue(
            model_name in ta.get_model_names(),
            model_name + " should still exist in the list of models")

Example #2

0

Show file

File: frame_filter_test.py Project: trustedanalytics/atk

    def setUp(self):
        csv = ta.CsvFile("/datasets/oregon-cities.csv",
                         schema=[('rank', ta.int32), ('city', str),
                                 ('population_2013', str), ('pop_2010', str),
                                 ('change', str), ('county', str)],
                         delimiter='|',
                         skip_header_lines=1)
        self.frame = ta.Frame(csv)
        self.graph = ta.Graph()
        self.graph.define_vertex_type('city')
        self.graph.define_vertex_type('population_2013')
        self.graph.define_edge_type('rank',
                                    'city',
                                    'population_2013',
                                    directed=False)

        self.graph.vertices['city'].add_vertices(self.frame, 'city')
        self.graph.vertices['population_2013'].add_vertices(
            self.frame, 'population_2013')
        self.graph.edges['rank'].add_edges(self.frame,
                                           'city',
                                           'population_2013', ['rank'],
                                           create_missing_vertices=False)

        self.vertex_frame = self.graph.vertices['city']

Example #3

0

Show file

    def test_page_rank(self):
        """tests page_rank, +piggyback last_read_date testing"""
        graph_data = "/datasets/page_rank_test_data.csv"
        schema = [("followed", ta.int32), ("follows", ta.int32)]
        frame = ta.Frame(ta.CsvFile(graph_data, schema))

        graph = ta.Graph()
        t0 = graph.last_read_date
        graph.define_vertex_type("node")
        graph.vertices["node"].add_vertices(frame, "follows")
        t1 = graph.last_read_date
        self.assertLess(t0, t1)  # make sure the last_read_date is updating

        graph.vertices["node"].add_vertices(frame, "followed")

        graph.define_edge_type("e1", "node", "node", directed=True)
        graph.edges["e1"].add_edges(frame, "follows", "followed")
        t2 = graph.last_read_date
        self.assertLess(t1, t2)  # make sure the last_read_date is updating
        result = graph.graphx_pagerank(output_property="PageRank",
                                       max_iterations=2,
                                       convergence_tolerance=0.001)
        t3 = graph.last_read_date
        self.assertLess(t2, t3)  # make sure the last_read_date is updating

        vertex_dict = result['vertex_dictionary']
        edge_dict = result['edge_dictionary']

        self.assertTrue(dict(vertex_dict['node'].schema).has_key('PageRank'))

        self.assertTrue(dict(edge_dict['e1'].schema).has_key('PageRank'))

        t4 = graph.last_read_date
        self.assertEqual(
            t3, t4)  # metadata access should not have updated the date

Example #4

0

Show file

    def test_graph(self):
        print "define csv file"
        csv = ta.CsvFile("/datasets/movie.csv", schema= [('user', ta.int32),
                                            ('vertex_type', str),
                                            ('movie', ta.int32),
                                            ('rating', ta.int32),
                                            ('splits', str)])

        print "creating frame"
        frame = ta.Frame(csv)

        # TODO: add asserts verifying inspect is working
        print
        print frame.inspect(20)
        print
        self.assertEquals(frame.row_count, 20, "frame should have 20 rows")
        #self.assertEqual(frame.column_names, ['', '', '', '', ''])
        self.assertEquals(len(frame.column_names), 5, "frame should have 5 columns")

        print "create graph"
        graph = ta.Graph()

        self.assertIsNotNone(graph.uri)

        print "define vertices and edges"
        graph.define_vertex_type('movies')
        graph.define_vertex_type('users')
        graph.define_edge_type('ratings', 'users', 'movies', directed=True)
        self.assertEquals(graph.vertices['users'].row_count, 0, "making sure newly defined vertex frame does not have rows")
        self.assertEquals(graph.vertices['movies'].row_count, 0, "making sure newly defined vertex frame does not have rows")
        self.assertEquals(graph.edges['ratings'].row_count, 0, "making sure newly defined edge frame does not have rows")
        #self.assertEquals(graph.vertex_count, 0, "no vertices expected yet")
        #self.assertEquals(graph.edge_count, 0, "no edges expected yet")

        print "add_vertices() users"
        graph.vertices['users'].add_vertices( frame, 'user', [])

        # TODO: add asserts verifying inspect is working
        print
        print graph.vertices['users'].inspect(20)
        print
        self.assertEquals(graph.vertices['users'].row_count, 13)
        self.assertEquals(len(graph.vertices['users'].column_names), 3)
        #self.assertEquals(graph.vertices['users'].row_count, graph.vertex_count, "row count of user vertices should be same as vertex count on graph")

        print "add_vertices() movies"
        graph.vertices['movies'].add_vertices( frame, 'movie', [])
        self.assertEquals(graph.vertices['users'].row_count, 13)
        self.assertEquals(graph.vertices['movies'].row_count, 11)
        self.assertEquals(len(graph.vertices['users'].column_names), 3)
        self.assertEquals(len(graph.vertices['movies'].column_names), 3)
        #self.assertEquals(graph.vertex_count, 24, "vertex_count should be the total number of users and movies")

        print "add_edges()"
        graph.edges['ratings'].add_edges(frame, 'user', 'movie', ['rating'], create_missing_vertices=False)
        self.assertEquals(len(graph.edges['ratings'].column_names), 5)
        self.assertEquals(graph.edges['ratings'].row_count, 20, "expected 20 rating edges")

Example #5

0

Show file

    def test_create_kmeans_model_with_duplicte_graph_name(self):
        graph_name = str(uuid.uuid1()).replace('-', '_')

        # Create graph
        ta.Graph(name=graph_name)
        self.assertTrue(graph_name in ta.get_graph_names(),
                        graph_name + " should be in the list of graphs")

        # Try to create a model with the same name as the graph (we expect an exception)
        with self.assertRaises(Exception):
            ta.KMeansModel(name=graph_name)

Example #6

0

Show file

    def test_graph_rename(self):
        graph_name = str(uuid.uuid1()).replace('-', '_')
        new_graph_name = str(uuid.uuid1()).replace('-', '_')

        # Create graph
        graph = ta.Graph(name=graph_name)
        graph.name = new_graph_name

        self.assertTrue(new_graph_name in ta.get_graph_names(),
                        new_graph_name + " should be in list of graphs")
        self.assertFalse(graph_name in ta.get_graph_names(),
                         graph_name + " should not be in list of graphs")

Example #7

0

Show file

    def test_drop_graph_by_object(self):
        graph_name = str(uuid.uuid1()).replace('-', '_')

        # Create graph and verify that it's in the get_graph_names() list
        graph = ta.Graph(name=graph_name)
        self.assertTrue(graph_name in ta.get_graph_names(),
                        graph_name + " should exist in the list of graphs")

        # Drop graph using the graph object
        self.assertEqual(1, ta.drop_graphs(graph),
                         "drop_graphs() should have deleted one graph.")
        self.assertFalse(
            graph_name in ta.get_graph_names(),
            graph_name + " should not exist in the list of graphs")

Example #8

0

Show file

    def test_generic_drop_by_object(self):
        # Create graph
        graph_name = str(uuid.uuid1()).replace('-', '_')
        graph = ta.Graph(name=graph_name)

        # Check that the graph we just created now exists
        self.assertTrue(
            graph_name in ta.get_graph_names(),
            graph_name + " should exist in the list of graph names")

        # drop item using graph object
        self.assertEqual(1, ta.drop(graph),
                         "drop() should have deleted one item")

        # check that the graph no longer exists
        self.assertFalse(graph_name in ta.get_graph_names(),
                         graph_name + " should not exist in the list of graph")

Example #9

0

Show file

    def test_triangle_count(self):
        graph_data = "/datasets/triangle_count_small.csv"
        schema = [('from_node', str), ('to_node', str), ('max_k', ta.int64),
                  ('cc', ta.int64)]
        frame = ta.Frame(ta.CsvFile(graph_data, schema))
        graph = ta.Graph()
        graph.define_vertex_type("node")
        graph.vertices["node"].add_vertices(frame, "from_node",
                                            ["max_k", "cc"])
        graph.vertices["node"].add_vertices(frame, "to_node", ["max_k", "cc"])
        graph.define_edge_type("edge", "node", "node", directed=True)
        graph.edges["edge"].add_edges(frame, "from_node", "to_node")

        result = graph.graphx_triangle_count(output_property="triangle")

        frame_result = result['node']
        self.assertTrue(dict(frame_result.schema).has_key('triangle'))

Example #10

0

Show file

File: graph_kclique_tests.py Project: trustedanalytics/atk

    def test_kclique(self):
        print "define csv file"
        noun_graph_data ="datasets/noun_graph_small.csv"
        schema = [("source",str),("target",str)]
        noun_words_frame = ta.Frame(ta.CsvFile(noun_graph_data,schema))
        graph = ta.Graph()

        graph.define_vertex_type("source")
        graph.vertices["source"].add_vertices(noun_words_frame,"source")
        graph.vertices["source"].add_vertices(noun_words_frame,"target")

        graph.define_edge_type("edge", "source", "source", False)
        graph.edges["edge"].add_edges(noun_words_frame,"source","target")

        output = graph.kclique_percolation(clique_size = 3, community_property_label = "community")
        output_dictionary = output['vertex_dictionary']

        self.assertTrue('source' in output_dictionary)

Example #11

0

Show file

File: graph_pagerank_tests.py Project: xoltar/atk

    def test_page_rank(self):
        graph_data = "/datasets/page_rank_test_data.csv"
        schema = [("followed", ta.int32),("follows",ta.int32)]
        frame = ta.Frame(ta.CsvFile(graph_data,schema))

        graph = ta.Graph()
        graph.define_vertex_type("node")
        graph.vertices["node"].add_vertices(frame,"follows")
        graph.vertices["node"].add_vertices(frame,"followed")

        graph.define_edge_type("e1","node","node",directed=True)
        graph.edges["e1"].add_edges(frame,"follows","followed")

        result = graph.graphx_pagerank(output_property="PageRank",max_iterations=2,convergence_tolerance=0.001)

        vertex_dict = result['vertex_dictionary']
        edge_dict = result['edge_dictionary']

        self.assertTrue(dict(vertex_dict['node'].schema).has_key('PageRank'))

        self.assertTrue(dict(edge_dict['e1'].schema).has_key('PageRank'))

Example #12

0

Show file

    def test_annotate_weighted_degrees(self):
        print "define csv file"
        schema_node = [("nodename", str), ("in", ta.int64), ("out", ta.int64),
                       ("undirectedcount", ta.int64),
                       ("isundirected", ta.int64), ("outlabel", ta.int64),
                       ("insum", ta.float64), ("outsum", ta.float64),
                       ("undirectedsum", ta.float64), ("labelsum", ta.float64),
                       ("nolabelsum", ta.float64), ("defaultsum", ta.float64),
                       ("integersum", ta.int64)]

        schema_directed = [("nodefrom", str), ("nodeto", str),
                           ("value", ta.float64), ("badvalue", str),
                           ("intvalue", ta.int32), ("int64value", ta.int64)]

        schema_undirected = [("node1", str), ("node2", str),
                             ("value", ta.float64)]

        schema_directed_label = [("nodefrom", str), ("nodeto", str),
                                 ("labeltest", ta.float64)]

        node_frame = ta.Frame(
            ta.CsvFile("/datasets/annotate_node_list.csv", schema_node))
        directed_frame = ta.Frame(
            ta.CsvFile("/datasets/annotate_directed_list.csv",
                       schema_directed))
        undirected_frame = ta.Frame(
            ta.CsvFile("/datasets/annotate_undirected_list.csv",
                       schema_undirected))
        directed_label_frame = ta.Frame(
            ta.CsvFile("/datasets/annotate_directed_label_list.csv",
                       schema_directed_label))

        graph = ta.Graph()
        graph.define_vertex_type("primary")
        graph.vertices['primary'].add_vertices(node_frame, "nodename", [
            "out", "undirectedcount", "isundirected", "outlabel", "in",
            "insum", "outsum", "undirectedsum", "labelsum", "nolabelsum",
            "defaultsum", "integersum"
        ])
        graph.define_edge_type("directed", "primary", "primary", directed=True)
        graph.define_edge_type("labeldirected",
                               "primary",
                               "primary",
                               directed=True)
        graph.define_edge_type("undirected",
                               "primary",
                               "primary",
                               directed=False)

        graph.edges['directed'].add_edges(
            directed_frame, "nodefrom", "nodeto",
            ["value", "badvalue", "intvalue", "int64value"])
        graph.edges['labeldirected'].add_edges(directed_label_frame,
                                               "nodefrom", "nodeto",
                                               ["labeltest"])
        graph.edges['undirected'].add_edges(undirected_frame, "node1", "node2",
                                            ["value"])
        output = graph.annotate_weighted_degrees("sumName",
                                                 degree_option="in",
                                                 edge_weight_property="value")
        self.assertTrue(type(output) is dict)
        self.assertTrue(output.has_key('primary'))
        frame_parquet = output['primary']
        self.assertTrue(dict(frame_parquet.schema).has_key('sumName'))

Example #13

0

Show file

File: employee.py Project: trustedanalytics/atk

employees_frame = ta.Frame(
    ta.CsvFile("employees.csv",
               schema=[('Employee', str), ('Manager', str), ('Title', str),
                       ('Years', ta.int64)],
               skip_header_lines=1), 'employees_frame')

employees_frame.inspect()

#A bipartite graph
#Notice that this is a funny example since managers are also employees!
#Preseuambly Steve the manager and Steve the employee are the same person

#Option 1

graph = ta.Graph()
graph.define_vertex_type('Employee')
graph.define_edge_type('worksunder', 'Employee', 'Employee', directed=False)
graph.vertices['Employee'].add_vertices(employees_frame, 'Manager', [])
graph.vertices['Employee'].add_vertices(employees_frame, 'Employee', ['Title'])
graph.edges['worksunder'].add_edges(employees_frame, 'Employee', 'Manager',
                                    ['Years'])

graph.vertex_count
graph.edge_count
graph.vertices['Employee'].inspect(9)
graph.edges['worksunder'].inspect(20)

#Option 2

ta.drop_graphs(graph)