def testDataFrame(self): edges = pd.DataFrame([["C", "A", 0.4], ["A", "B", 0.1]], columns=[0, 1, "weight"]) g = Graph.DataFrame(edges, directed=False) self.assertTrue(g.es["weight"] == [0.4, 0.1]) vertices = pd.DataFrame( [["A", "blue"], ["B", "yellow"], ["C", "blue"]], columns=[0, "color"]) g = Graph.DataFrame(edges, directed=True, vertices=vertices) self.assertTrue(g.vs["name"] == ["A", "B", "C"]) self.assertTrue(g.vs["color"] == ["blue", "yellow", "blue"]) self.assertTrue(g.es["weight"] == [0.4, 0.1]) # Issue #347 edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) vertices = pd.DataFrame({ "node": [1, 2, 3, 4, 5, 6], "label": ["1", "2", "3", "4", "5", "6"] })[["node", "label"]] g = Graph.DataFrame( edges, directed=True, vertices=vertices, ) self.assertTrue(g.vs["name"] == [1, 2, 3, 4, 5, 6]) self.assertTrue(g.vs["label"] == ["1", "2", "3", "4", "5", "6"]) # Vertex ids edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) g = Graph.DataFrame(edges) self.assertTrue(g.vcount() == 6) edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) g = Graph.DataFrame(edges, use_vids=True) self.assertTrue(g.vcount() == 7)
def clustering(df_fingerprint_comparison, df_single_taxonomy): """ Obtains two data frames. One with only single entries in the taxonomy row and the other with the two aglycons, their Tanimoto Index. Creates clusters for the data frame with the Tanimoto Index using the igraph modul. Passes a data frame with single taxonomy entries and the created cluster object and the cluster nodes. """ df_all_aglycons_with_single_taxonomy = pd.read_pickle(df_single_taxonomy) fpc = pd.read_pickle(df_fingerprint_comparison) fpc_graph = Graph.DataFrame(fpc, directed=False) fpc_cluster = Graph.components(fpc_graph) fpc_nodes = fpc_graph.get_vertex_dataframe() with open("output_data/clustered_similarity_network.txt", "wb") as outfile: pickle.dump(fpc_cluster, outfile) cluster_in_lists(fpc_cluster, fpc_nodes, df_all_aglycons_with_single_taxonomy)
def testDataFrame(self): edges = pd.DataFrame( [["C", "A", 0.4], ["A", "B", 0.1]], columns=[0, 1, "weight"] ) g = Graph.DataFrame(edges, directed=False) self.assertTrue(g.es["weight"] == [0.4, 0.1]) vertices = pd.DataFrame( [["A", "blue"], ["B", "yellow"], ["C", "blue"]], columns=[0, "color"] ) g = Graph.DataFrame(edges, directed=True, vertices=vertices) self.assertTrue(g.vs["name"] == ["A", "B", "C"]) self.assertTrue(g.vs["color"] == ["blue", "yellow", "blue"]) self.assertTrue(g.es["weight"] == [0.4, 0.1]) # Issue #347 edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) vertices = pd.DataFrame( {"node": [1, 2, 3, 4, 5, 6], "label": ["1", "2", "3", "4", "5", "6"]} )[["node", "label"]] g = Graph.DataFrame( edges, directed=True, vertices=vertices ) self.assertTrue(g.vs["name"] == [1, 2, 3, 4, 5, 6]) self.assertTrue(g.vs["label"] == ["1", "2", "3", "4", "5", "6"]) # Vertex ids edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) g = Graph.DataFrame(edges) self.assertTrue(g.vcount() == 6) edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) g = Graph.DataFrame(edges, use_vids=True) self.assertTrue(g.vcount() == 7) # Graph clone g = Graph.Full(n=100, directed=True, loops=True) g.vs["name"] = [f"v{i}" for i in range(g.vcount())] g.vs["x"] = [float(i) for i in range(g.vcount())] g.es["w"] = [1.0] * g.ecount() df_edges = g.get_edge_dataframe() df_vertices = g.get_vertex_dataframe() g_clone = Graph.DataFrame(df_edges, g.is_directed(), df_vertices, True) self.assertTrue(df_edges.equals(g_clone.get_edge_dataframe())) self.assertTrue(df_vertices.equals(g_clone.get_vertex_dataframe())) # Invalid input with self.assertRaisesRegex(ValueError, "two columns"): edges = pd.DataFrame({"source": [1, 2, 3]}) Graph.DataFrame(edges) with self.assertRaisesRegex(ValueError, "one column"): edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) Graph.DataFrame(edges, vertices=pd.DataFrame()) with self.assertRaisesRegex(TypeError, "integers"): edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}).astype(str) Graph.DataFrame(edges, use_vids=True) with self.assertRaisesRegex(ValueError, "negative"): edges = -pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) Graph.DataFrame(edges, use_vids=True) with self.assertRaisesRegex(TypeError, "integers"): edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) vertices = pd.DataFrame({0: [1, 2, 3]}, index=["1", "2", "3"]) Graph.DataFrame(edges, vertices=vertices, use_vids=True) with self.assertRaisesRegex(ValueError, "negative"): edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) vertices = pd.DataFrame({0: [1, 2, 3]}, index=[-1, 2, 3]) Graph.DataFrame(edges, vertices=vertices, use_vids=True) with self.assertRaisesRegex(ValueError, "sequence"): edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) vertices = pd.DataFrame({0: [1, 2, 3]}, index=[1, 2, 4]) Graph.DataFrame(edges, vertices=vertices, use_vids=True) with self.assertRaisesRegex(TypeError, "integers"): edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) vertices = pd.DataFrame({0: [1, 2, 3]}, index=pd.MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)])) Graph.DataFrame(edges, vertices=vertices, use_vids=True) with self.assertRaisesRegex(ValueError, "unique"): edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) vertices = pd.DataFrame({0: [1, 2, 2]}) Graph.DataFrame(edges, vertices=vertices) with self.assertRaisesRegex(ValueError, "already contains"): edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) vertices = pd.DataFrame({0: [1, 2, 3], "name": [1, 2, 2]}) Graph.DataFrame(edges, vertices=vertices) with self.assertRaisesRegex(ValueError, "missing from"): edges = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]}) vertices = pd.DataFrame({0: [1, 2, 3]}, index=[0, 1, 2]) Graph.DataFrame(edges, vertices=vertices, use_vids=True)