def test_duplicate_graph_rename(self): graph_name1 = str(uuid.uuid1()).replace('-', '_') graph_name2 = str(uuid.uuid1()).replace('-', '_') model_name = str(uuid.uuid1()).replace('-', '_') frame_name = str(uuid.uuid1()).replace('-', '_') # Create graphs, model, and frame graph1 = ta.Graph(name=graph_name1) graph2 = ta.Graph(name=graph_name2) ta.KMeansModel(name=model_name) ta.Frame(name=frame_name) # After creating graphs, check that graphs with each name exists on the server self.assertTrue(graph_name1 in ta.get_graph_names(), graph_name1 + " should exist in list of graphs") self.assertTrue(graph_name2 in ta.get_graph_names(), graph_name2 + " should exist in list of graphs") # Try to rename graph2 to have the same name as graph1 (we expect an exception here) with self.assertRaises(Exception): graph2.name = graph_name1 # Both graph names should still exist on the server self.assertTrue(graph_name1 in ta.get_graph_names(), graph_name1 + " should still exist in list of graphs") self.assertTrue(graph_name2 in ta.get_graph_names(), graph_name2 + " should still exist in list of graphs") # Try to rename graph1 to have the same name as the frame (we expect an exception here) with self.assertRaises(Exception): graph1.name = frame_name # graph1 and the frame name should still exist on the server self.assertTrue( graph_name1 in ta.get_graph_names(), graph_name1 + " should still exist in the list of graphs") self.assertTrue( frame_name in ta.get_frame_names(), frame_name + " should still exist in the list of frames") # Try to rename graph1 to have the same name as the model (we expect an exception here) with self.assertRaises(Exception): graph1.name = model_name # graph1 and the frame name should still exist on the server self.assertTrue( graph_name1 in ta.get_graph_names(), graph_name1 + " should still exist in the list of graphs") self.assertTrue( model_name in ta.get_model_names(), model_name + " should still exist in the list of models")
def setUp(self): csv = ta.CsvFile("/datasets/oregon-cities.csv", schema=[('rank', ta.int32), ('city', str), ('population_2013', str), ('pop_2010', str), ('change', str), ('county', str)], delimiter='|', skip_header_lines=1) self.frame = ta.Frame(csv) self.graph = ta.Graph() self.graph.define_vertex_type('city') self.graph.define_vertex_type('population_2013') self.graph.define_edge_type('rank', 'city', 'population_2013', directed=False) self.graph.vertices['city'].add_vertices(self.frame, 'city') self.graph.vertices['population_2013'].add_vertices( self.frame, 'population_2013') self.graph.edges['rank'].add_edges(self.frame, 'city', 'population_2013', ['rank'], create_missing_vertices=False) self.vertex_frame = self.graph.vertices['city']
def test_page_rank(self): """tests page_rank, +piggyback last_read_date testing""" graph_data = "/datasets/page_rank_test_data.csv" schema = [("followed", ta.int32), ("follows", ta.int32)] frame = ta.Frame(ta.CsvFile(graph_data, schema)) graph = ta.Graph() t0 = graph.last_read_date graph.define_vertex_type("node") graph.vertices["node"].add_vertices(frame, "follows") t1 = graph.last_read_date self.assertLess(t0, t1) # make sure the last_read_date is updating graph.vertices["node"].add_vertices(frame, "followed") graph.define_edge_type("e1", "node", "node", directed=True) graph.edges["e1"].add_edges(frame, "follows", "followed") t2 = graph.last_read_date self.assertLess(t1, t2) # make sure the last_read_date is updating result = graph.graphx_pagerank(output_property="PageRank", max_iterations=2, convergence_tolerance=0.001) t3 = graph.last_read_date self.assertLess(t2, t3) # make sure the last_read_date is updating vertex_dict = result['vertex_dictionary'] edge_dict = result['edge_dictionary'] self.assertTrue(dict(vertex_dict['node'].schema).has_key('PageRank')) self.assertTrue(dict(edge_dict['e1'].schema).has_key('PageRank')) t4 = graph.last_read_date self.assertEqual( t3, t4) # metadata access should not have updated the date
def test_graph(self): print "define csv file" csv = ta.CsvFile("/datasets/movie.csv", schema= [('user', ta.int32), ('vertex_type', str), ('movie', ta.int32), ('rating', ta.int32), ('splits', str)]) print "creating frame" frame = ta.Frame(csv) # TODO: add asserts verifying inspect is working print print frame.inspect(20) print self.assertEquals(frame.row_count, 20, "frame should have 20 rows") #self.assertEqual(frame.column_names, ['', '', '', '', '']) self.assertEquals(len(frame.column_names), 5, "frame should have 5 columns") print "create graph" graph = ta.Graph() self.assertIsNotNone(graph.uri) print "define vertices and edges" graph.define_vertex_type('movies') graph.define_vertex_type('users') graph.define_edge_type('ratings', 'users', 'movies', directed=True) self.assertEquals(graph.vertices['users'].row_count, 0, "making sure newly defined vertex frame does not have rows") self.assertEquals(graph.vertices['movies'].row_count, 0, "making sure newly defined vertex frame does not have rows") self.assertEquals(graph.edges['ratings'].row_count, 0, "making sure newly defined edge frame does not have rows") #self.assertEquals(graph.vertex_count, 0, "no vertices expected yet") #self.assertEquals(graph.edge_count, 0, "no edges expected yet") print "add_vertices() users" graph.vertices['users'].add_vertices( frame, 'user', []) # TODO: add asserts verifying inspect is working print print graph.vertices['users'].inspect(20) print self.assertEquals(graph.vertices['users'].row_count, 13) self.assertEquals(len(graph.vertices['users'].column_names), 3) #self.assertEquals(graph.vertices['users'].row_count, graph.vertex_count, "row count of user vertices should be same as vertex count on graph") print "add_vertices() movies" graph.vertices['movies'].add_vertices( frame, 'movie', []) self.assertEquals(graph.vertices['users'].row_count, 13) self.assertEquals(graph.vertices['movies'].row_count, 11) self.assertEquals(len(graph.vertices['users'].column_names), 3) self.assertEquals(len(graph.vertices['movies'].column_names), 3) #self.assertEquals(graph.vertex_count, 24, "vertex_count should be the total number of users and movies") print "add_edges()" graph.edges['ratings'].add_edges(frame, 'user', 'movie', ['rating'], create_missing_vertices=False) self.assertEquals(len(graph.edges['ratings'].column_names), 5) self.assertEquals(graph.edges['ratings'].row_count, 20, "expected 20 rating edges")
def test_create_kmeans_model_with_duplicte_graph_name(self): graph_name = str(uuid.uuid1()).replace('-', '_') # Create graph ta.Graph(name=graph_name) self.assertTrue(graph_name in ta.get_graph_names(), graph_name + " should be in the list of graphs") # Try to create a model with the same name as the graph (we expect an exception) with self.assertRaises(Exception): ta.KMeansModel(name=graph_name)
def test_graph_rename(self): graph_name = str(uuid.uuid1()).replace('-', '_') new_graph_name = str(uuid.uuid1()).replace('-', '_') # Create graph graph = ta.Graph(name=graph_name) graph.name = new_graph_name self.assertTrue(new_graph_name in ta.get_graph_names(), new_graph_name + " should be in list of graphs") self.assertFalse(graph_name in ta.get_graph_names(), graph_name + " should not be in list of graphs")
def test_drop_graph_by_object(self): graph_name = str(uuid.uuid1()).replace('-', '_') # Create graph and verify that it's in the get_graph_names() list graph = ta.Graph(name=graph_name) self.assertTrue(graph_name in ta.get_graph_names(), graph_name + " should exist in the list of graphs") # Drop graph using the graph object self.assertEqual(1, ta.drop_graphs(graph), "drop_graphs() should have deleted one graph.") self.assertFalse( graph_name in ta.get_graph_names(), graph_name + " should not exist in the list of graphs")
def test_generic_drop_by_object(self): # Create graph graph_name = str(uuid.uuid1()).replace('-', '_') graph = ta.Graph(name=graph_name) # Check that the graph we just created now exists self.assertTrue( graph_name in ta.get_graph_names(), graph_name + " should exist in the list of graph names") # drop item using graph object self.assertEqual(1, ta.drop(graph), "drop() should have deleted one item") # check that the graph no longer exists self.assertFalse(graph_name in ta.get_graph_names(), graph_name + " should not exist in the list of graph")
def test_triangle_count(self): graph_data = "/datasets/triangle_count_small.csv" schema = [('from_node', str), ('to_node', str), ('max_k', ta.int64), ('cc', ta.int64)] frame = ta.Frame(ta.CsvFile(graph_data, schema)) graph = ta.Graph() graph.define_vertex_type("node") graph.vertices["node"].add_vertices(frame, "from_node", ["max_k", "cc"]) graph.vertices["node"].add_vertices(frame, "to_node", ["max_k", "cc"]) graph.define_edge_type("edge", "node", "node", directed=True) graph.edges["edge"].add_edges(frame, "from_node", "to_node") result = graph.graphx_triangle_count(output_property="triangle") frame_result = result['node'] self.assertTrue(dict(frame_result.schema).has_key('triangle'))
def test_kclique(self): print "define csv file" noun_graph_data ="datasets/noun_graph_small.csv" schema = [("source",str),("target",str)] noun_words_frame = ta.Frame(ta.CsvFile(noun_graph_data,schema)) graph = ta.Graph() graph.define_vertex_type("source") graph.vertices["source"].add_vertices(noun_words_frame,"source") graph.vertices["source"].add_vertices(noun_words_frame,"target") graph.define_edge_type("edge", "source", "source", False) graph.edges["edge"].add_edges(noun_words_frame,"source","target") output = graph.kclique_percolation(clique_size = 3, community_property_label = "community") output_dictionary = output['vertex_dictionary'] self.assertTrue('source' in output_dictionary)
def test_page_rank(self): graph_data = "/datasets/page_rank_test_data.csv" schema = [("followed", ta.int32),("follows",ta.int32)] frame = ta.Frame(ta.CsvFile(graph_data,schema)) graph = ta.Graph() graph.define_vertex_type("node") graph.vertices["node"].add_vertices(frame,"follows") graph.vertices["node"].add_vertices(frame,"followed") graph.define_edge_type("e1","node","node",directed=True) graph.edges["e1"].add_edges(frame,"follows","followed") result = graph.graphx_pagerank(output_property="PageRank",max_iterations=2,convergence_tolerance=0.001) vertex_dict = result['vertex_dictionary'] edge_dict = result['edge_dictionary'] self.assertTrue(dict(vertex_dict['node'].schema).has_key('PageRank')) self.assertTrue(dict(edge_dict['e1'].schema).has_key('PageRank'))
def test_annotate_weighted_degrees(self): print "define csv file" schema_node = [("nodename", str), ("in", ta.int64), ("out", ta.int64), ("undirectedcount", ta.int64), ("isundirected", ta.int64), ("outlabel", ta.int64), ("insum", ta.float64), ("outsum", ta.float64), ("undirectedsum", ta.float64), ("labelsum", ta.float64), ("nolabelsum", ta.float64), ("defaultsum", ta.float64), ("integersum", ta.int64)] schema_directed = [("nodefrom", str), ("nodeto", str), ("value", ta.float64), ("badvalue", str), ("intvalue", ta.int32), ("int64value", ta.int64)] schema_undirected = [("node1", str), ("node2", str), ("value", ta.float64)] schema_directed_label = [("nodefrom", str), ("nodeto", str), ("labeltest", ta.float64)] node_frame = ta.Frame( ta.CsvFile("/datasets/annotate_node_list.csv", schema_node)) directed_frame = ta.Frame( ta.CsvFile("/datasets/annotate_directed_list.csv", schema_directed)) undirected_frame = ta.Frame( ta.CsvFile("/datasets/annotate_undirected_list.csv", schema_undirected)) directed_label_frame = ta.Frame( ta.CsvFile("/datasets/annotate_directed_label_list.csv", schema_directed_label)) graph = ta.Graph() graph.define_vertex_type("primary") graph.vertices['primary'].add_vertices(node_frame, "nodename", [ "out", "undirectedcount", "isundirected", "outlabel", "in", "insum", "outsum", "undirectedsum", "labelsum", "nolabelsum", "defaultsum", "integersum" ]) graph.define_edge_type("directed", "primary", "primary", directed=True) graph.define_edge_type("labeldirected", "primary", "primary", directed=True) graph.define_edge_type("undirected", "primary", "primary", directed=False) graph.edges['directed'].add_edges( directed_frame, "nodefrom", "nodeto", ["value", "badvalue", "intvalue", "int64value"]) graph.edges['labeldirected'].add_edges(directed_label_frame, "nodefrom", "nodeto", ["labeltest"]) graph.edges['undirected'].add_edges(undirected_frame, "node1", "node2", ["value"]) output = graph.annotate_weighted_degrees("sumName", degree_option="in", edge_weight_property="value") self.assertTrue(type(output) is dict) self.assertTrue(output.has_key('primary')) frame_parquet = output['primary'] self.assertTrue(dict(frame_parquet.schema).has_key('sumName'))
employees_frame = ta.Frame( ta.CsvFile("employees.csv", schema=[('Employee', str), ('Manager', str), ('Title', str), ('Years', ta.int64)], skip_header_lines=1), 'employees_frame') employees_frame.inspect() #A bipartite graph #Notice that this is a funny example since managers are also employees! #Preseuambly Steve the manager and Steve the employee are the same person #Option 1 graph = ta.Graph() graph.define_vertex_type('Employee') graph.define_edge_type('worksunder', 'Employee', 'Employee', directed=False) graph.vertices['Employee'].add_vertices(employees_frame, 'Manager', []) graph.vertices['Employee'].add_vertices(employees_frame, 'Employee', ['Title']) graph.edges['worksunder'].add_edges(employees_frame, 'Employee', 'Manager', ['Years']) graph.vertex_count graph.edge_count graph.vertices['Employee'].inspect(9) graph.edges['worksunder'].inspect(20) #Option 2 ta.drop_graphs(graph)