class _ReactionTreeLoader(abc.ABC): """ Base class for classes that creates a reaction tree object """ def __init__(self, *args, **kwargs): # To avoid circular imports from aizynthfinder.analysis import ReactionTree # noqa self.tree = ReactionTree() self._load(*args, **kwargs) self.tree.is_solved = all( self.tree.in_stock(node) for node in self.tree.leafs()) _RepeatingPatternIdentifier.find(self.tree) def _add_node(self, node, depth=0, transform=0, in_stock=False, hide=False): attributes = { "hide": hide, "depth": depth, } if isinstance(node, Molecule): attributes.update({"transform": transform, "in_stock": in_stock}) self.tree.graph.add_node(node, **attributes) if not self.tree.root: self.tree.root = node @abc.abstractmethod def _load(self, *args, **kwargs): pass
def test_template_occurence_scorer_tree_one_node(): rt = ReactionTree() rt.root = Molecule(smiles="CCCCOc1ccc(CC(=O)N(C)O)cc1") rt.graph.add_node(rt.root) scorer = AverageTemplateOccurenceScorer() assert scorer(rt) == 0.0
def find(reaction_tree: ReactionTree) -> None: """ Find the repeating patterns and mark the nodes :param reaction_tree: the reaction tree to process """ for node in reaction_tree.reactions(): # We are only interesting of starting at the very first reaction if any(reaction_tree.graph[mol] for mol in node.reactants[0]): continue actions = _RepeatingPatternIdentifier._list_reactions( reaction_tree, node) if len(actions) < 5: continue hashes = [ hash_reactions([rxn1, rxn2], sort=False) for rxn1, rxn2 in zip(actions[:-1:2], actions[1::2]) ] for idx, (hash1, hash2) in enumerate(zip(hashes[:-1], hashes[1:])): if hash1 == hash2: _RepeatingPatternIdentifier._hide_reaction( reaction_tree, actions[idx * 2]) _RepeatingPatternIdentifier._hide_reaction( reaction_tree, actions[idx * 2 + 1]) reaction_tree.has_repeating_patterns = True # The else-clause prevents removing repeating patterns in the middle of a route else: break
def test_find_repetetive_patterns_created_tree(default_config, mock_stock, shared_datadir): mock_stock(default_config, Molecule(smiles="CC"), Molecule(smiles="C")) # Try one with 2 repetetive units search_tree = SearchTree.from_json( shared_datadir / "tree_with_repetition.json", default_config) analysis = TreeAnalysis(search_tree) rt = ReactionTree.from_analysis(analysis) assert rt.has_repeating_patterns hidden_nodes = [ node for node in rt.graph if rt.graph.nodes[node].get("hide", False) ] assert len(hidden_nodes) == 5 # Try one with 3 repetetive units search_tree = SearchTree.from_json( shared_datadir / "tree_with_3_repetitions.json", default_config) analysis = TreeAnalysis(search_tree) rt = ReactionTree.from_analysis(analysis) assert rt.has_repeating_patterns hidden_nodes = [ node for node in rt.graph if rt.graph.nodes[node].get("hide", False) ] assert len(hidden_nodes) == 10
def test_find_repetetive_patterns_created_tree_no_patterns( default_config, mock_stock, shared_datadir): mock_stock(default_config, Molecule(smiles="CC"), Molecule(smiles="CCCO")) # Try with a short tree (3 nodes, 1 reaction) search_tree = SearchTree.from_json( shared_datadir / "tree_without_repetition.json", default_config) analysis = TreeAnalysis(search_tree) rt = ReactionTree.from_analysis(analysis) assert not rt.has_repeating_patterns hidden_nodes = [ node for node in rt.graph if rt.graph.nodes[node].get("hide", False) ] assert len(hidden_nodes) == 0 # Try with something longer search_tree = SearchTree.from_json( shared_datadir / "tree_without_repetition_longer.json", default_config) analysis = TreeAnalysis(search_tree) rt = ReactionTree.from_analysis(analysis) assert not rt.has_repeating_patterns
def _score_reaction_tree(self, tree: ReactionTree) -> float: mols = [ TreeMolecule(parent=None, transform=tree.depth(leaf) // 2, smiles=leaf.smiles) for leaf in tree.leafs() ] state = State(mols, self._config) return state.score
def test_route_distance_other(load_reaction_tree): dict_ = load_reaction_tree("routes_for_clustering.json", 0) rt1 = ReactionTree.from_dict(dict_) dict_ = load_reaction_tree("routes_for_clustering.json", 1) rt2 = ReactionTree.from_dict(dict_) dist = rt1.distance_to(rt2, content="molecules") assert pytest.approx(dist, abs=1e-2) == 2.6522
def __init__(self, *args, **kwargs): # To avoid circular imports from aizynthfinder.analysis import ReactionTree # noqa self.tree = ReactionTree() self._load(*args, **kwargs) self.tree.is_solved = all( self.tree.in_stock(node) for node in self.tree.leafs()) _RepeatingPatternIdentifier.find(self.tree)
def test_route_distances_random(load_reaction_tree): tree1 = ReactionTree.from_dict(load_reaction_tree("routes_for_clustering.json", 0)) wrapper1 = ReactionTreeWrapper(tree1, exhaustive_limit=1) tree2 = ReactionTree.from_dict(load_reaction_tree("routes_for_clustering.json", 1)) wrapper2 = ReactionTreeWrapper(tree2, exhaustive_limit=1) distances = list(wrapper1.distance_iter(wrapper2, exhaustive_limit=1)) assert len(distances) == 2 assert pytest.approx(distances[0], abs=1e-2) == 2.6522
def test_scorers_tree_one_node_route(default_config): tree = ReactionTree() tree.root = Molecule(smiles="CCCCOc1ccc(CC(=O)N(C)O)cc1") tree.graph.add_node(tree.root) assert pytest.approx(StateScorer(default_config)(tree), abs=1e-3) == 0.0497 assert NumberOfReactionsScorer(default_config)(tree) == 0 assert NumberOfPrecursorsScorer(default_config)(tree) == 1 assert NumberOfPrecursorsInStockScorer(default_config)(tree) == 0 assert PriceSumScorer(default_config)(tree) == 10 assert RouteCostScorer(default_config)(tree) == 10
def test_create_two_trees_of_everything(load_reaction_tree): tree = ReactionTree.from_dict( load_reaction_tree("routes_for_clustering.json", 0)) wrapper = ReactionTreeWrapper(tree, content=TreeContent.BOTH) assert wrapper.info["tree count"] == 2 assert len(wrapper.trees) == 2 mol_nodes = list(tree.molecules()) rxn_nodes = list(tree.reactions()) # Assert first tree assert wrapper.first_tree["smiles"] == mol_nodes[0].smiles assert len(wrapper.first_tree["children"]) == 1 child1 = wrapper.first_tree["children"][0] assert child1["smiles"] == rxn_nodes[0].smiles assert len(child1["children"]) == 2 child_smiles = [child["smiles"] for child in child1["children"]] expected_smiles = [node.smiles for node in mol_nodes[1:]] assert child_smiles == expected_smiles # Assert second tree assert wrapper.trees[1]["smiles"] == mol_nodes[0].smiles assert len(wrapper.trees[1]["children"]) == 1 child1 = wrapper.trees[1]["children"][0] assert child1["smiles"] == rxn_nodes[0].smiles assert len(child1["children"]) == 2 child_smiles = [child["smiles"] for child in child1["children"]] expected_smiles = [node.smiles for node in mol_nodes[1:]] assert child_smiles == expected_smiles[::-1]
def test_create_all_trees_of_molecules(load_reaction_tree): tree = ReactionTree.from_dict( load_reaction_tree("routes_for_clustering.json", 0)) wrapper = ReactionTreeWrapper(tree) assert wrapper.info["tree count"] == 2 assert len(wrapper.trees) == 2 mol_nodes = list(tree.molecules()) # Assert first tree assert wrapper.first_tree["smiles"] == mol_nodes[0].smiles assert len(wrapper.first_tree["children"]) == 2 child_smiles = [ child["smiles"] for child in wrapper.first_tree["children"] ] expected_smiles = [node.smiles for node in mol_nodes[1:]] assert child_smiles == expected_smiles # Assert second tree assert wrapper.trees[1]["smiles"] == mol_nodes[0].smiles assert len(wrapper.trees[1]["children"]) == 2 child_smiles = [child["smiles"] for child in wrapper.trees[1]["children"]] expected_smiles = [node.smiles for node in mol_nodes[1:]] assert child_smiles == expected_smiles[::-1]
def test_reactiontree_to_image_hiding(load_reaction_tree, mocker): patched_add_mol = mocker.patch( "aizynthfinder.utils.image.GraphvizReactionGraph.add_molecule") patched_add_reaction = mocker.patch( "aizynthfinder.utils.image.GraphvizReactionGraph.add_reaction") patched_add_edge = mocker.patch( "aizynthfinder.utils.image.GraphvizReactionGraph.add_edge") mocker.patch("aizynthfinder.utils.image.GraphvizReactionGraph.to_image") tree = load_reaction_tree("sample_reaction_with_hidden.json", 1) rt = ReactionTree.from_dict(tree) assert rt.has_repeating_patterns rt.to_image(show_all=True) assert patched_add_mol.call_count == len(list(rt.molecules())) assert patched_add_reaction.call_count == len(list(rt.reactions())) assert patched_add_edge.call_count == len(rt.graph.edges) patched_add_mol.reset_mock() patched_add_reaction.reset_mock() patched_add_edge.reset_mock() rt.to_image(show_all=False) assert patched_add_mol.call_count == len(list(rt.molecules())) - 3 assert patched_add_reaction.call_count == len(list(rt.reactions())) - 2 assert patched_add_edge.call_count == len(rt.graph.edges) - 5
def test_create_wrapper(load_reaction_tree, route_index): tree = ReactionTree.from_dict( load_reaction_tree("routes_for_clustering.json", route_index) ) wrapper = ReactionTreeWrapper(tree) assert wrapper.info["content"] == TreeContent.MOLECULES assert wrapper.info["tree count"] == 4 assert wrapper.info["root"] is tree.root assert len(wrapper.trees) == 4 wrapper = ReactionTreeWrapper(tree, TreeContent.REACTIONS) assert wrapper.info["content"] == TreeContent.REACTIONS assert wrapper.info["tree count"] == 1 assert wrapper.info["root"] is list(tree.graph[tree.root])[0] assert len(wrapper.trees) == 1 wrapper = ReactionTreeWrapper(tree, TreeContent.BOTH) assert wrapper.info["content"] == TreeContent.BOTH assert wrapper.info["tree count"] == 4 assert wrapper.info["root"] is tree.root assert len(wrapper.trees) == 4
def test_route_node_depth_from_json(load_reaction_tree): dict_ = load_reaction_tree("sample_reaction_with_hidden.json", 0) rt = ReactionTree.from_dict(dict_) mols = list(rt.molecules()) assert rt.depth(mols[0]) == 0 assert rt.depth(mols[1]) == 2 assert rt.depth(mols[2]) == 4 assert rt.depth(mols[3]) == 6 assert rt.depth(mols[4]) == 6 assert rt.depth(mols[5]) == 8 assert rt.depth(mols[6]) == 8 assert rt.depth(mols[7]) == 10 assert rt.depth(mols[8]) == 12 assert rt.depth(mols[9]) == 12 assert rt.depth(mols[10]) == 2 rxns = list(rt.reactions()) assert rt.depth(rxns[0]) == 1 assert rt.depth(rxns[1]) == 3 for mol in rt.molecules(): assert rt.depth(mol) == 2 * rt.graph.nodes[mol]["transform"]
def test_state_scorer_tree(load_reaction_tree, default_config, mock_stock): mock_stock(default_config, "N#Cc1cccc(N)c1F", "O=C(Cl)c1ccc(F)cc1", "CN1CCC(Cl)CC1", "O") tree = ReactionTree.from_dict(load_reaction_tree("sample_reaction.json")) scorer = StateScorer(default_config) assert round(scorer(tree), 4) == 0.994
def test_scoring_branched_route(load_reaction_tree, default_config): tree = ReactionTree.from_dict(load_reaction_tree("branched_route.json")) assert pytest.approx(StateScorer(default_config)(tree), abs=1e-6) == 0.00012363 assert NumberOfReactionsScorer(default_config)(tree) == 14 assert NumberOfPrecursorsScorer(default_config)(tree) == 8 assert NumberOfPrecursorsInStockScorer(default_config)(tree) == 0
def test_reactiontree_from_dict(load_reaction_tree): expected = load_reaction_tree("sample_reaction.json") rt = ReactionTree.from_dict(expected) # Simply check that the to_dict() and from_dict() gives/produces the same dict resp = rt.to_dict() assert resp == expected
def _score_reaction_tree(self, tree: ReactionTree) -> float: reactions = list(tree.reactions()) if not reactions: return 0.0 occurences = [ reaction.metadata.get("library_occurence", 0) for reaction in reactions ] return sum(occurences) / len(reactions)
def test_reactiontree_to_json(setup_complete_tree, shared_datadir): filename = str(shared_datadir / "sample_reaction.json") with open(filename, "r") as fileobj: expected = json.load(fileobj) tree, nodes = setup_complete_tree analysis = TreeAnalysis(tree) resp = ReactionTree.from_analysis(analysis).to_json() assert json.loads(resp) == expected
def test_find_repetetive_patterns_no_patterns(load_reaction_tree): tree_with_no_repetetive_patterns = load_reaction_tree( "sample_reaction_with_hidden.json", 0) rt = ReactionTree.from_dict(tree_with_no_repetetive_patterns) assert not rt.has_repeating_patterns assert len([node for node in rt.graph if rt.graph.nodes[node]["hide"]]) == 0
def test_create_combine_tree_to_visjs(load_reaction_tree, tmpdir): collection = RouteCollection(reaction_trees=[ ReactionTree.from_dict( load_reaction_tree("routes_for_clustering.json", 0)), ReactionTree.from_dict( load_reaction_tree("routes_for_clustering.json", 1)), ReactionTree.from_dict( load_reaction_tree("routes_for_clustering.json", 2)), ]) tar_filename = str(tmpdir / "routes.tar") combined = collection.combined_reaction_trees() combined.to_visjs_page(tar_filename) assert os.path.exists(tar_filename) with TarFile(tar_filename) as tarobj: assert "./route.html" in tarobj.getnames() assert len( [name for name in tarobj.getnames() if name.endswith(".png")]) == 8
def test_reactiontree_from_dict(shared_datadir): filename = str(shared_datadir / "sample_reaction.json") with open(filename, "r") as fileobj: expected = json.load(fileobj) rt = ReactionTree.from_dict(expected) # Simply check that the to_dict() and from_dict() gives/produces the same dict resp = rt.to_dict() assert resp == expected
def test_clustering_collection_timeout(load_reaction_tree): collection = RouteCollection(reaction_trees=[ ReactionTree.from_dict( load_reaction_tree("routes_for_clustering.json", idx)) for idx in range(3) ]) cluster_labels = collection.cluster(n_clusters=1, timeout=0) assert len(cluster_labels) == 0 assert collection.clusters is None
def test_create_wrapper_no_reaction(): tree = ReactionTree() mol = Molecule(smiles="CCC") tree.graph.add_node(mol) tree.root = mol wrapper = ReactionTreeWrapper(tree) assert wrapper.info["tree count"] == 1 assert wrapper.info["root"] is mol assert len(wrapper.trees) == 1 wrapper = ReactionTreeWrapper(tree, TreeContent.REACTIONS) assert wrapper.info["tree count"] == 0 assert wrapper.info["root"] is None assert len(wrapper.trees) == 0 wrapper = ReactionTreeWrapper(tree, TreeContent.BOTH) assert wrapper.info["tree count"] == 1 assert wrapper.info["root"] is mol assert len(wrapper.trees) == 1
def test_reaction_hash(load_reaction_tree): rt = ReactionTree.from_dict(load_reaction_tree("branched_route.json")) reactions = list(rt.reactions())[:4] hash_ = hash_reactions(reactions) assert hash_ == "359045e74d757c7895304337c855817748b9eefe0e1e680258d4574e" hash_ = hash_reactions(reactions, sort=False) assert hash_ == "d0cf86e9a5e3a8539964ae62dab51952f64db8c84d750a3cc5b381a6"
def test_reactiontree_to_image(load_reaction_tree, mocker): patched_make_image = mocker.patch( "aizynthfinder.analysis.make_graphviz_image") tree = load_reaction_tree("sample_reaction.json") rt = ReactionTree.from_dict(tree) rt.to_image() patched_make_image.assert_called_once() assert len(patched_make_image.call_args[0][0]) == len(list(rt.molecules())) assert len(patched_make_image.call_args[0][1]) == len(list(rt.reactions()))
def test_create_combine_tree_dict_from_json(load_reaction_tree): collection = RouteCollection(reaction_trees=[ ReactionTree.from_dict( load_reaction_tree("routes_for_clustering.json", 0)), ReactionTree.from_dict( load_reaction_tree("routes_for_clustering.json", 1)), ReactionTree.from_dict( load_reaction_tree("routes_for_clustering.json", 2)), ]) expected = load_reaction_tree("combined_example_tree.json") combined_dict = collection.combined_reaction_trees().to_dict() assert len(combined_dict["children"]) == 2 assert combined_dict["children"][0]["is_reaction"] assert len(combined_dict["children"][0]["children"]) == 2 assert len(combined_dict["children"][1]["children"]) == 2 assert len(combined_dict["children"][1]["children"][0]["children"]) == 2 assert combined_dict["children"][1]["children"][0]["children"][0][ "is_reaction"] assert combined_dict == expected
def test_rescore_collection_for_trees(default_config, mock_stock, load_reaction_tree): mock_stock(default_config, "N#Cc1cccc(N)c1F", "O=C(Cl)c1ccc(F)cc1", "CN1CCC(Cl)CC1", "O") rt = ReactionTree.from_dict(load_reaction_tree("sample_reaction.json")) routes = RouteCollection(reaction_trees=[rt]) routes.compute_scores(StateScorer(default_config)) routes.rescore(NumberOfReactionsScorer()) assert routes.scores[0] == 2 assert np.round(routes.all_scores[0]["state score"], 3) == 0.994 assert routes.all_scores[0]["number of reactions"] == 2
def test_create_clustering_gui(mocker, load_reaction_tree): collection = RouteCollection( reaction_trees=[ ReactionTree.from_dict( load_reaction_tree("routes_for_clustering.json", idx) ) for idx in range(3) ] ) display_patch = mocker.patch("aizynthfinder.interfaces.gui.clustering.display") ClusteringGui(collection) display_patch.assert_called()