def pt_parser(file): """ parser to work with specified file :return tuple of 2 reaction containers. Each container consist of reactant(initaial state - mol container), product(final state - mol container), reagent(transition state - mol container). Each of mol containers should have {"energy":float} in meta dictionary. """ pts = [] tmp = [] flag = False for i in file: if i[0] == "#": flag = True continue if flag: if "Item" in i: continue if "ENERGY" in i: structure = {} structure["mol"] = tmp structure["energy"] = float(i.split()[1]) structure['type'] = "TMP" tmp = [] pts.append(structure) flag = False continue at, x, y, z = i.split() tmp.append((at, float(x), float(y), float(z))) pts[0]["type"] = "EQ" pts[-1]["type"] = "EQ" if len(pts) < 3: raise ValueError structure = sorted(pts, key=lambda x: x["energy"], reverse=True)[0] if structure["type"] == "EQ": raise ValueError structure["type"] = "TS" mol1 = xyz(pts[0]['mol']) mol1.meta['energy'] = pts[0]['energy'] mol1.meta['type'] = pts[0]['type'] mol2 = xyz(pts[-1]['mol']) mol2.meta['energy'] = pts[-1]['energy'] mol2.meta['type'] = pts[-1]['type'] ts = xyz(structure['mol']) ts.meta['energy'] = structure['energy'] ts.meta['type'] = structure['type'] a = ReactionContainer(reagents=[ts], reactants=[mol1], products=[mol2]) b = ReactionContainer(reagents=[ts], reactants=[mol2], products=[mol1]) return a, b
def load_structures_combinations(cls, reactions): """ preload all combinations of reaction structures :param reactions: reactions entities """ # preload all molecules and structures ms = defaultdict(list) for x in select(ms for ms in cls._database_.MoleculeStructure for mr in cls._database_.MoleculeReaction if ms.molecule == mr.molecule and mr.reaction in reactions).prefetch( cls._database_.Molecule): if x.last: x.molecule._cached_structure = x ms[x.molecule].append(x) for m, s in ms.items(): m._cached_structures_all = tuple(s) combos, mapping, last = defaultdict(list), defaultdict( list), defaultdict(list) for x in cls._database_.MoleculeReaction.select( lambda x: x.reaction in reactions).order_by(lambda x: x.id): r = x.reaction combos[r].append(x.molecule.structures_all) mapping[r].append((x.is_product, x.mapping)) last[r].append(x.molecule.structure) for x in reactions: # load last structure x._cached_structure = r = ReactionContainer() for s, (is_p, m) in zip(last[x], mapping[x]): r['products' if is_p else 'reagents'].append( s.remap(m, copy=True) if m else s) # load all structures combos_x = list(product(*combos[x])) if len(combos_x) == 1: x._cached_structures_all = (r, ) else: rs = [] for combo in combos_x: r = ReactionContainer() rs.append(r) for s, (is_p, m) in zip(combo, mapping[x]): r['products' if is_p else 'reagents'].append( s.remap(m, copy=True) if m else s) x._cached_structures_all = tuple(rs)
def _remove_unchanged_parts( self, reaction: ReactionContainer) -> ReactionContainer: """ Ungroup molecules, remove unchanged parts from reactants and products. :param reaction: current reaction :return: ReactionContainer """ meta = reaction.meta new_reactants = [m for m in reaction.reactants] new_reagents = [m for m in reaction.reagents] if self._reagents_to_reactants: new_reactants.extend(new_reagents) new_reagents = [] reactants = new_reactants.copy() new_products = [m for m in reaction.products] for reactant in reactants: if reactant in new_products: # if self._ignore_mapping: new_reagents.append(reactant) new_reactants.remove(reactant) new_products.remove(reactant) # elif self._confirm_equivalence_by_mapping(reactant, new_products): # new_reagents.append(reactant) # new_reactants = [m for m in new_reactants if # not self._confirm_equivalence_by_mapping(reactant, tuple([m]))] # new_products = [m for m in new_products if # not self._confirm_equivalence_by_mapping(reactant, tuple([m]))] if not self._keep_reagents: new_reagents = [] return ReactionContainer(reactants=tuple(new_reactants), reagents=tuple(new_reagents), products=tuple(new_products), meta=meta)
def transform(self, x): assert all(isinstance(s, ReactionContainer) for s in x), 'invalid dtype, olny ReactionContainers acceptable' shifts = {} mols = [] for i in ('reagents', 'products'): sh = shifts[i] = [len(mols)] for s in x: si = s[i] sh.append(len(si) + sh[-1]) mols.extend(si) transformed = super().transform(mols) assert len(transformed) == len( mols), 'unexpected transformed molecules amount' out = [] for s, r, p in zip( x, (transformed[y:z] for y, z in self.__pairwise(shifts['reagents'])), (transformed[y:z] for y, z in self.__pairwise(shifts['products']))): if any(i is None for i in chain(r, p)): out.append(None) else: out.append(ReactionContainer(r, p, meta=s.meta)) return iter2array(out, allow_none=True)
def _split_ions(self, reaction: ReactionContainer): """ Split ions in a reaction. Returns a tuple with the corresponding ReactionContainer and a return code as int (0 - nothing was changed, 1 - ions were split, 2 - ions were split but the reaction is imbalanced). :param reaction: current reaction :return: tuple[ReactionContainer, int] """ meta = reaction.meta reaction_parts = [] return_codes = [] for molecules in (reaction.reactants, reaction.reagents, reaction.products): divided_molecules = [x for m in molecules for x in m.split('.')] total_charge = 0 ions_present = False for molecule in divided_molecules: mol_charge = self._calc_charge(molecule) total_charge += mol_charge if mol_charge != 0: ions_present = True if ions_present and total_charge: return_codes.append(2) elif ions_present: return_codes.append(1) else: return_codes.append(0) reaction_parts.append(tuple(divided_molecules)) return ReactionContainer(reactants=reaction_parts[0], reagents=reaction_parts[1], products=reaction_parts[2], meta=meta), max(return_codes)
def _tautomerize(self, reaction: ReactionContainer) -> ReactionContainer: """ Perform ChemAxon tautomerization. :param reaction: reaction that needs to be tautomerized :return: ReactionContainer """ new_molecules = [] for part in [reaction.reactants, reaction.reagents, reaction.products]: tmp = [] for mol in part: with io.StringIO() as f, SDFWrite(f) as i: i.write(mol) sdf = f.getvalue() mol_handler = self._MolHandler(sdf) mol_handler.clean(True, '2') molecule = mol_handler.getMolecule() self._standardizer.standardize(molecule) new_mol_handler = self._MolHandler(molecule) new_sdf = new_mol_handler.toFormat('SDF') with io.StringIO('\n ' + new_sdf.strip()) as f, SDFRead(f, remap=False) as i: new_mol = next(i) tmp.append(new_mol) new_molecules.append(tmp) return ReactionContainer(reactants=tuple(new_molecules[0]), reagents=tuple(new_molecules[1]), products=tuple(new_molecules[2]), meta=reaction.meta)
def transform(self, x): if not all(isinstance(s, ReactionContainer) for s in x): raise TypeError( 'invalid dtype, only ReactionContainers acceptable') shifts = {} mols = [] for i in ('reactants', 'products'): sh = shifts[i] = [len(mols)] for s in x: si = s[i] sh.append(len(si) + sh[-1]) mols.extend(si) transformed = super().transform(mols) if len(transformed) != len(mols): raise ValueError('unexpected transformed molecules amount') return iter2array( ReactionContainer(r, p, meta=s.meta) for s, r, p in zip(x, ( transformed[y:z] for y, z in self.__pairwise(shifts['reactants'])), ( transformed[y:z] for y, z in self.__pairwise(shifts['products']))))
def structures(self): # mapping and molecules preload mrs = self._molecules.order_by(lambda x: x.id).prefetch( self._database_.Molecule)[:] # structures preload ms = {x.molecule: [] for x in mrs} for x in self._database_.MoleculeStructure.select( lambda x: x.molecule in ms.keys()): if x.is_canonic: x.molecule.__dict__['structure_entity'] = x ms[x.molecule].append(x) for m, s in ms.items(): m.__dict__['structures_entities'] = tuple(s) # all possible reaction structure combinations combinations = tuple(product(*(x.molecule.structures for x in mrs))) structures = [] for x in combinations: r, p = [], [] for s, m in zip(x, mrs): if m.mapping: s = s.remap(m.mapping, copy=True) if m.is_product: p.append(s) else: r.append(s) structures.append(ReactionContainer(r, p)) if 'structure' not in self.__dict__: if len(structures) == 1: # optimize self.__dict__['structure'] = structures[0] else: r, p = [], [] for m in mrs: s = m.molecule.structure if m.mapping: s = s.remap(m.mapping, copy=True) if m.is_product: p.append(s) else: r.append(s) self.__dict__['structure'] = ReactionContainer(r, p) return tuple(structures)
def structures_all(self): if self._cached_structures_all is None: # mapping and molecules preload mrs = self.molecules.order_by(lambda x: x.id).prefetch( self._database_.Molecule)[:] # structures preload ms = {x.molecule: [] for x in mrs} for x in self._database_.MoleculeStructure.select( lambda x: x.molecule in ms.keys()): if x.last: x.molecule._cached_structure = x ms[x.molecule].append(x) for m, s in ms.items(): m._cached_structures_all = tuple(s) # all possible reaction structure combinations combinations = tuple( product(*(x.molecule.structures_all for x in mrs))) structures = [] for x in combinations: r = ReactionContainer() structures.append(r) for s, m in zip(x, mrs): r['products' if m.is_product else 'reagents'].append( s.remap(m.mapping, copy=True) if m.mapping else s) self._cached_structures_all = tuple(structures) if self._cached_structure is None: if len(structures) == 1: # optimize self._cached_structure = structures[0] else: self._cached_structure = r = ReactionContainer() for m in mrs: s = m.molecule.structure r['products' if m.is_product else 'reagents'].append( s.remap(m.mapping, copy=True) if m.mapping else s) return self._cached_structures_all
def _check_valence(self, reaction: ReactionContainer) -> bool: """ Checks valences. :param reaction: ReactionContainer :return: bool """ mistakes = [] for molecule in (reaction.reactants + reaction.products + reaction.reagents): mistakes.extend(molecule.check_valence()) if mistakes: reaction.meta['mistake'] = f'Valence mistake in {set(mistakes)}' return True return False
def create_mol_from_pattern(pattern_nums_in_file, molecule): cgr_patterns = [] cgr_prob = [] prob_end = [] destroy_all = [] created_reactions = [] for cgr in CGR_env_hyb: if cgr.meta["id"] in pattern_nums_in_file.keys(): cgr_patterns.append(cgr) cgr_prob.append(pattern_nums_in_file[cgr.meta["id"]]) reagents_reactions_probs = [] for cgr_nums in range(len(cgr_patterns)): cgr_pattern = cgr_patterns[cgr_nums] cgr_to_reaction = CGRpreparer.decompose(cgr_pattern) prod_from_cgr = cgr_to_reaction.products reag_from_cgr = cgr_to_reaction.reagents if len(reag_from_cgr) < 3: react = CGRreactor() with RDFwrite("/home/aigul/Retro/templates/test" + str(len(lemon_tree.nodes)) + ".template") as f: f.write(ReactionContainer(reagents=[prod_from_cgr[0]], products=reag_from_cgr)) with RDFread("/home/aigul/Retro/templates/test" + str(len(lemon_tree.nodes)) + ".template", is_template=True) as f: template = react.prepare_templates(f.read()) searcher = react.get_template_searcher(templates=template) for i in searcher(molecule): destroy = react.patcher(structure=molecule, patch=i.patch) try: destroy_all.append(CGRpreparer.split(destroy)) prob_end.append(cgr_prob[cgr_nums]) for j in range(len(destroy_all)): created_reactions.append(ReactionContainer(reagents=destroy_all[j], products=[molecule])) created_reactions = standardizer.transform(created_reactions).as_matrix() reagents_reactions_probs.append([[created_reactions[0].reagents], created_reactions, prob_end]) except: reagents_reactions_probs.append([[], [], []]) return reagents_reactions_probs
def _check_valence(self, reaction: ReactionContainer) -> bool: """ Checks valences. :param reaction: ReactionContainer :return: bool """ mistakes = [] for molecule in (reaction.reactants + reaction.products + reaction.reagents): valence_mistakes = molecule.check_valence() if valence_mistakes: mistakes.append(("|".join([str(num) for num in valence_mistakes]), "|".join([str(molecule.atom(n)) for n in valence_mistakes]), str(molecule))) if mistakes: message = ",".join([f'{atom_nums} at {atoms} in {smiles}' for atom_nums, atoms, smiles in mistakes]) reaction.meta['mistake'] = f'Valence mistake: {message}' return True return False
def structure(self): """ ReactionContainer object """ if self._cached_structure is None: # mapping and molecules preload mrs = self.molecules.order_by(lambda x: x.id).prefetch( self._database_.Molecule)[:] # last molecule structures preload ms = {x.molecule for x in mrs} for x in self._database_.MoleculeStructure.select( lambda x: x.last and x.molecule in ms): x.molecule._cached_structure = x self._cached_structure = r = ReactionContainer() for m in mrs: s = m.molecule.structure r['products' if m.is_product else 'reagents'].append( s.remap(m.mapping, copy=True) if m.mapping else s) return self._cached_structure
def group_search_and_pickles(): with RDFread('/home/tansu/Documents/new_rules_1.rdf') as rule_file_1, \ RDFread('/home/tansu/Documents/new_rules_2.rdf') as rule_file_2: fg_in_react_dict_1 = {} fg_in_react_dict_2 = {} group_dict = {} for n, rule in enumerate(rule_file_1, start=1): if len(rule.meta['id']) > 50: reactants_list = rule.reactants reactants = reactants_list[0].split() print('I am reaction - ', n) for group in reactants: i = len(group_dict) + 1 i = group_dict.setdefault(group, i) fg_in_react_dict_1.setdefault(rule, []).append(i) for n, rule in enumerate(rule_file_2, start=1): if len(rule.meta['id']) > 50: reactants_list = rule.reactants reactants = reactants_list[0].split() print('I am reaction - ', n) rule = ReactionContainer(reactants, rule.products[0].split()) for group in reactants: i = len(group_dict) + 1 i = group_dict.setdefault(group, i) fg_in_react_dict_2.setdefault(rule, []).append(i) with SDFwrite('/home/tansu/Documents/groups_12.sdf') as group_file: for group in group_dict: group_file.write(group) with open('group_dict_12.pickle', 'wb') as f: pickle.dump({i: group for group, i in group_dict.items()}, f) with open('fg_in_react_dict_1.pickle', 'wb') as e: pickle.dump(fg_in_react_dict_1, e) with open('fg_in_react_dict_2.pickle', 'wb') as e: pickle.dump(fg_in_react_dict_2, e)
def load_structures(cls, reactions): """ preload reaction last structures :param reactions: Reaction entities """ # preload all molecules and last structures for x in select(ms for ms in cls._database_.MoleculeStructure for mr in cls._database_.MoleculeReaction if ms.molecule == mr.molecule and ms.last and mr.reaction in reactions).prefetch( cls._database_.Molecule): x.molecule._cached_structure = x for x in reactions: x._cached_structure = ReactionContainer() # load mapping and fill reaction for x in cls._database_.MoleculeReaction.select( lambda x: x.reaction in reactions).order_by(lambda x: x.id): s = x.molecule.structure x.reaction.structure[ 'products' if x.is_product else 'reagents'].append( s.remap(x.mapping, copy=True) if x.mapping else s)
def prefetch_structure(cls, reactions): """ preload reaction canonical structures :param reactions: Reaction entities list """ # preload all molecules and canonic structures for x in select(ms for ms in cls._database_.MoleculeStructure for mr in cls._database_.MoleculeReaction if ms.molecule == mr.molecule and ms.is_canonic and mr.reaction in reactions).prefetch( cls._database_.Molecule): x.molecule.__dict__['structure_entity'] = x # load mapping and fill reaction rxn = defaultdict(lambda: ([], [])) for x in cls._database_.MoleculeReaction.select( lambda x: x.reaction in reactions).order_by(lambda x: x.id): s = x.molecule.structure if x.mapping: s = s.remap(x.mapping, copy=True) rxn[x.reaction][x.is_product].append(s) for r, rp in rxn.items(): r.__dict__['structure'] = ReactionContainer(*rp)
def structure(self): """ canonical structure of reaction """ # mapping and molecules preload mrs = self._molecules.order_by(lambda x: x.id).prefetch( self._database_.Molecule)[:] # canonic molecule structures preload ms = {x.molecule for x in mrs} for x in self._database_.MoleculeStructure.select( lambda x: x.is_canonic and x.molecule in ms): x.molecule.__dict__['structure_entity'] = x r, p = [], [] for m in mrs: s = m.molecule.structure if m.mapping: s = s.remap(m.mapping, copy=True) if m.is_product: p.append(s) else: r.append(s) return ReactionContainer(r, p)
def _remove_unchanged_parts(self, reaction: ReactionContainer) -> ReactionContainer: """ Ungroup molecules, remove unchanged parts from reactants and products. :param reaction: current reaction :return: ReactionContainer """ meta = reaction.meta new_reactants = [m for m in reaction.reactants] new_reagents = [m for m in reaction.reagents] if self._reagents_to_reactants: new_reactants.extend(new_reagents) new_reagents = [] reactants = new_reactants.copy() new_products = [m for m in reaction.products] for reactant in reactants: if reactant in new_products: new_reagents.append(reactant) new_reactants.remove(reactant) new_products.remove(reactant) if not self._keep_reagents: new_reagents = [] return ReactionContainer(reactants=tuple(new_reactants), reagents=tuple(new_reagents), products=tuple(new_products), meta=meta)
def enumeration_cgr(reaction): cgrs = ~reaction all_prot = diff_atoms(reaction.reactants, reaction.products) all_coming = diff_atoms(reaction.products, reaction.reactants) prot_gr = cgrs.substructure(all_prot).split() coming_gr = cgrs.substructure(all_coming).split() united_prot = defaultdict(list) united_come = defaultdict(list) united = [] other_list = [] for x in cgrs.centers_list: flafg = 1 for y, z in zip(prot_gr, coming_gr): if set(x).intersection(y): flafg = 0 [united_prot[y].append(x) for x in x] if set(x).intersection(z): flafg = 0 [united_come[z].append(x) for x in x] if flafg: other_list.append(x) for y in united_prot.values(): flafg = 1 for x in united_come.values(): if set(x).intersection(y): flafg = 0 x.extend(y) united.append(set(x)) if flafg: other_list.append(y) other_list.extend(united) if 1 < len(other_list): variants_reaction = [] reactants_to_work = big_mol(reaction.reactants) product_to_work = big_mol(reaction.products) list_rc = [] const_at = atom_re_pr( set(reactants_to_work).difference(all_prot).difference( cgrs.center_atoms), reactants_to_work, product_to_work) const_bond = bonds_re_pr(cgrs.bonds(), const_at.keys()) for big_rc in other_list: atom_t = atom_re_pr( set(big_rc).difference(all_prot).difference(all_coming), reactants_to_work, product_to_work) prot_atoms = atom_re_pr(prot_come(big_rc, all_prot, prot_gr), reactants_to_work, product_to_work) comming_atoms = atom_re_pr( prot_come(big_rc, all_coming, coming_gr), reactants_to_work, product_to_work) prot_bonds = bonds_re_pr(cgrs.bonds(), prot_atoms.keys()) comming_bonds = bonds_re_pr(cgrs.bonds(), comming_atoms.keys()) t_bond = bonds_re_pr(cgrs.bonds(), atom_t.keys()) list_rc.append([ atom_t, prot_atoms, comming_atoms, t_bond, prot_bonds, comming_bonds ]) for e, t_rc in enumerate(list_rc): for state in list(product([1, 0], repeat=len(list_rc) - 1)): new_reactant = MoleculeContainer() new_product = MoleculeContainer() new_all_bonds_reactants = [] new_all_bonds_products = [] new_reactant = add_at(const_at, reactants_to_work, new_reactant) new_product = add_at(const_at, product_to_work, new_product) for x, y in t_rc[0].items(): new_reactant.add_atom(y['re'], x) new_product.add_atom(y['pr'], x) for x, y in t_rc[1].items(): new_reactant.add_atom(y['re'], x) # атомы уходящей группы for x, y in t_rc[2].items(): new_product.add_atom(y['pr'], x) # приходящие атомы for x in const_bond.values(): new_all_bonds_reactants.append(x['re']) new_all_bonds_products.append(x['pr']) for x in t_rc[5].values(): new_all_bonds_products.append(x['pr']) for x in t_rc[3].values(): if x['re'][2]: new_all_bonds_reactants.append(x['re']) if x['pr'][2]: new_all_bonds_products.append(x['pr']) for x in t_rc[4].values(): new_all_bonds_reactants.append(x['re']) for s, big_rc in zip(state, list_rc[:e] + list_rc[e + 1:]): if s == 0: for x, y in big_rc[0].items(): new_reactant.add_atom(y['re'], x) new_product.add_atom(y['re'], x) for x, y in big_rc[1].items(): new_reactant.add_atom(y['re'], x) new_product.add_atom(y['re'], x) for x in big_rc[3].values(): if x['re'][2]: new_all_bonds_reactants.append(x['re']) new_all_bonds_products.append(x['re']) for x in big_rc[4].values(): new_all_bonds_reactants.append(x['re']) new_all_bonds_products.append(x['re']) if s == 1: for x, y in big_rc[0].items(): new_reactant.add_atom(y['pr'], x) new_product.add_atom(y['pr'], x) for x, y in big_rc[2].items(): new_reactant.add_atom(y['pr'], x) new_product.add_atom(y['pr'], x) for x in big_rc[3].values(): if x['pr'][2]: new_all_bonds_reactants.append(x['pr']) new_all_bonds_products.append(x['pr']) for x in big_rc[5].values(): new_all_bonds_reactants.append(x['pr']) new_all_bonds_products.append(x['pr']) [ new_reactant.add_bond(*x) for x in new_all_bonds_reactants if not new_reactant.has_edge(x[0], x[1]) ] [ new_product.add_bond(*x) for x in new_all_bonds_products if not new_product.has_edge(x[0], x[1]) ] variants_reaction.append( ReactionContainer(reactants=(new_reactant, ), products=(new_product, ))) return variants_reaction return [reaction]
def enumeration_cgr(reaction): cgrs = ~reaction all_prot = diff_atoms(reaction.reactants, reaction.products) all_coming = diff_atoms(reaction.products, reaction.reactants) prot_list = [] coming_list = [] if all_prot: prot_gr = cgrs.substructure(all_prot).split() prot_list = [set(x) for x in prot_gr] if all_coming: coming_gr = cgrs.substructure(all_coming).split() coming_list = [set(x) for x in coming_gr] all_prot_come = prot_list + coming_list other_list = list(cgrs.centers_list) for x in all_prot_come: unio = [] for i, y in enumerate(other_list): if set(x).intersection(y): unio.append(i) if len(unio) > 1: qq = set() for i in reversed(unio): qq.update(other_list[i]) other_list.pop(i) other_list.append(list(qq)) del (unio) del (prot_list, coming_list) cycles = [] for x in reaction.reactants: cycles.extend(x.sssr) for x in reaction.products: for y in x.sssr: if y not in cycles: cycles.append(y) # объединение реакционных центров при циклизации в общий для этих рц цикл for y in cycles: kk = cgrs.substructure(y) ept = [] new_ind = [] unite = [] if all(z[2].order == 4 for z in list(kk.bonds())) and any(z[2].p_order != 4 for z in list(kk.bonds()))\ or all(z[2].p_order == 4 for z in list(kk.bonds())) and any(z[2].order != 4 for z in list(kk.bonds())): unite.extend(y) else: for x in other_list: #if set(x).intersection(kk) and len(set(x).intersection(kk))>1: if len(set(x).intersection(kk)) > 1: ept.append(set(x).intersection(kk)) if len(ept) >= 2: for x in ept: for i, p in enumerate(x): for i2, m in enumerate(x): if i != i2: if any((m == mp[0] and p == mp[1]) or (p == mp[0] and m == mp[1]) for mp in kk.bonds()) and ( kk.bond(m, p).order == None or kk.bond(m, p).p_order == None): unite.extend([m, p]) if unite: for i3, zop in enumerate(other_list): if set(zop).intersection(unite): new_ind.append(i3) if len(new_ind) > 1: y = [] new_ind.reverse() for x in new_ind: y.extend(other_list[x]) other_list.pop(x) other_list.append(y) del (ept, new_ind, unite, kk) #конец объединения del (cycles, all_prot_come) # хреновая оптимизация if 1 < len(other_list): #variants_reaction = [] reactants_to_work = reduce(or_, reaction.reactants) product_to_work = reduce(or_, reaction.products) list_rc = [] const_at = atom_re_pr( set(reactants_to_work).difference(all_prot).difference( cgrs.center_atoms), reactants_to_work, product_to_work) const_bond = bonds_re_pr(cgrs.bonds(), const_at.keys()) prot_atoms = {} prot_bonds = {} comming_atoms = {} comming_bonds = {} for big_rc in other_list: if all_prot: prot_atoms = atom_re_pr(prot_come(big_rc, all_prot, prot_gr), reactants_to_work, product_to_work) prot_bonds = bonds_re_pr(cgrs.bonds(), prot_atoms.keys()) if all_coming: comming_atoms = atom_re_pr( prot_come(big_rc, all_coming, coming_gr), reactants_to_work, product_to_work) comming_bonds = bonds_re_pr(cgrs.bonds(), comming_atoms.keys()) atom_t = atom_re_pr(set(big_rc), reactants_to_work, product_to_work) t_bond = bonds_re_pr_t(cgrs.bonds(), atom_t.keys(), all_prot, all_coming) list_rc.append([ atom_t, prot_atoms, comming_atoms, t_bond, prot_bonds, comming_bonds ]) for e, t_rc in enumerate(list_rc): for state in list(product([1, 0], repeat=len(list_rc) - 1)): new_reactant = MoleculeContainer() new_product = MoleculeContainer() new_all_bonds_reactants = [] new_all_bonds_products = [] add_at(const_at, reactants_to_work, new_reactant) add_at(const_at, product_to_work, new_product) for x in const_bond.values(): new_all_bonds_reactants.append(x['re']) new_all_bonds_products.append(x['pr']) for x, y in t_rc[0].items(): if y['re']: new_reactant.add_atom( y['re'].copy(), x, charge=reactants_to_work.atom(x).charge) if y['pr']: new_product.add_atom( y['pr'].copy(), x, charge=product_to_work.atom(x).charge) for x, y in t_rc[1].items(): new_reactant.add_atom( y['re'].copy(), x, charge=reactants_to_work.atom( x).charge) # атомы уходящей группы for x, y in t_rc[2].items(): new_product.add_atom(y['pr'].copy(), x, charge=product_to_work.atom( x).charge) # приходящие атомы for x in t_rc[5].values(): new_all_bonds_products.append(x['pr']) for x in t_rc[3].values(): if x['re'][2]: new_all_bonds_reactants.append(x['re']) if x['pr'][2]: new_all_bonds_products.append(x['pr']) for x in t_rc[4].values(): new_all_bonds_reactants.append(x['re']) for s, big_rc in zip(state, list_rc[:e] + list_rc[e + 1:]): if s == 0: for x, y in big_rc[0].items(): if y['re']: new_reactant.add_atom( y['re'].copy(), x, charge=reactants_to_work.atom(x).charge) new_product.add_atom( y['re'].copy(), x, charge=reactants_to_work.atom(x).charge) for x, y in big_rc[1].items(): new_reactant.add_atom( y['re'].copy(), x, charge=reactants_to_work.atom(x).charge) new_product.add_atom( y['re'].copy(), x, charge=reactants_to_work.atom(x).charge) for x in big_rc[3].values(): if x['re'][2]: new_all_bonds_reactants.append(x['re']) new_all_bonds_products.append(x['re']) for x in big_rc[4].values(): new_all_bonds_reactants.append(x['re']) new_all_bonds_products.append(x['re']) if s == 1: for x, y in big_rc[0].items(): if y['pr']: new_reactant.add_atom( y['pr'].copy(), x, charge=product_to_work.atom(x).charge) new_product.add_atom( y['pr'].copy(), x, charge=product_to_work.atom(x).charge) for x, y in big_rc[2].items(): new_reactant.add_atom( y['pr'].copy(), x, charge=product_to_work.atom(x).charge) new_product.add_atom( y['pr'].copy(), x, charge=product_to_work.atom(x).charge) for x in big_rc[3].values(): if x['pr'][2]: new_all_bonds_reactants.append(x['pr']) new_all_bonds_products.append(x['pr']) for x in big_rc[5].values(): new_all_bonds_reactants.append(x['pr']) new_all_bonds_products.append(x['pr']) [new_reactant.add_bond(*x) for x in new_all_bonds_reactants] [new_product.add_bond(*x) for x in new_all_bonds_products] # [new_product.add_bond(*x) for x in new_all_bonds_products if not new_product.has_edge(x[0], x[1])] new_reaction = ReactionContainer(reactants=(new_reactant, ), products=(new_product, )) new_reaction.meta.update(reaction.meta) # variants_reaction.append(new_reaction) yield new_reaction else: yield reaction
def _group_ions(self, reaction: ReactionContainer): """ Ungroup molecules recorded as ions, regroup ions. Returns a tuple with the corresponding ReactionContainer and return code as int (0 - nothing was changed, 1 - ions were regrouped, 2 - ions are unbalanced). :param reaction: current reaction :return: tuple[ReactionContainer, int] """ meta = reaction.meta reaction_parts = [] return_codes = [] for molecules in (reaction.reactants, reaction.reagents, reaction.products): divided_molecules = [x for m in molecules for x in m.split('.')] if len(divided_molecules) == 0: reaction_parts.append(()) continue elif len(divided_molecules) == 1 and self._calc_charge(divided_molecules[0]) == 0: return_codes.append(0) reaction_parts.append(molecules) continue elif len(divided_molecules) == 1: return_codes.append(2) reaction_parts.append(molecules) continue new_molecules = [] cations, anions, ions = [], [], [] total_charge = 0 for molecule in divided_molecules: mol_charge = self._calc_charge(molecule) total_charge += mol_charge if mol_charge == 0: new_molecules.append(molecule) elif mol_charge > 0: cations.append((mol_charge, molecule)) ions.append((mol_charge, molecule)) else: anions.append((mol_charge, molecule)) ions.append((mol_charge, molecule)) if len(cations) == 0 and len(anions) == 0: return_codes.append(0) reaction_parts.append(tuple(new_molecules)) continue elif total_charge != 0: return_codes.append(2) reaction_parts.append(tuple(divided_molecules)) continue else: salt = MoleculeContainer() for ion_charge, ion in ions: salt = salt.union(ion) total_charge += ion_charge if total_charge == 0: new_molecules.append(salt) salt = MoleculeContainer() if total_charge != 0: new_molecules.append(salt) return_codes.append(2) reaction_parts.append(tuple(new_molecules)) else: return_codes.append(1) reaction_parts.append(tuple(new_molecules)) return ReactionContainer(reactants=reaction_parts[0], reagents=reaction_parts[1], products=reaction_parts[2], meta=meta), max(return_codes)
def standardize(self, reaction: ReactionContainer) -> ReactionContainer: """ Standardization protocol: transform functional groups, kekulize, remove explicit hydrogens, check for radicals (remove if something was found), check for isotopes, regroup ions (if the total charge of reactants and/or products is not zero, and the 'keep_unbalanced_ions' option is False which is by default, such reactions are removed; if the 'keep_unbalanced_ions' option is set True, they are kept), check valences (remove if something is wrong), aromatize (thiele method), fix mapping (for symmetric functional groups) if such is in, remove unchanged parts. :param reaction: ReactionContainer :return: ReactionContainer """ self.logger.info('Reaction {0}..'.format(reaction.meta[self._id_tag])) try: reaction.standardize() except: self.logger.exception( 'Reaction {0}: Cannot standardize functional groups..'.format(reaction.meta[self._id_tag])) if not self._skip_errors: raise Exception( 'Reaction {0}: Cannot standardize functional groups..'.format(reaction.meta[self._id_tag])) else: return try: reaction.kekule() except: self.logger.exception('Reaction {0}: Cannot kekulize..'.format(reaction.meta[self._id_tag])) if not self._skip_errors: raise Exception('Reaction {0}: Cannot kekulize..'.format(reaction.meta[self._id_tag])) else: return try: if self._check_valence(reaction): self.logger.info( 'Reaction {0}: Bad valence: {1}'.format(reaction.meta[self._id_tag], reaction.meta['mistake'])) return except: self.logger.exception('Reaction {0}: Cannot check valence..'.format(reaction.meta[self._id_tag])) if not self._skip_errors: self.logger.critical('Stop the algorithm!') raise Exception('Reaction {0}: Cannot check valence..'.format(reaction.meta[self._id_tag])) else: return try: if not self._skip_tautomerize: reaction = self._tautomerize(reaction) except: self.logger.exception('Reaction {0}: Cannot tautomerize..'.format(reaction.meta[self._id_tag])) if not self._skip_errors: raise Exception('Reaction {0}: Cannot tautomerize..'.format(reaction.meta[self._id_tag])) else: return try: reaction.implicify_hydrogens() except: self.logger.exception( 'Reaction {0}: Cannot remove explicit hydrogens..'.format(reaction.meta[self._id_tag])) if not self._skip_errors: raise Exception('Reaction {0}: Cannot remove explicit hydrogens..'.format(reaction.meta[self._id_tag])) else: return try: if self._check_radicals(reaction): self.logger.info('Reaction {0}: Radicals were found..'.format(reaction.meta[self._id_tag])) return except: self.logger.exception('Reaction {0}: Cannot check radicals..'.format(reaction.meta[self._id_tag])) if not self._skip_errors: raise Exception('Reaction {0}: Cannot check radicals..'.format(reaction.meta[self._id_tag])) else: return try: if self._action_on_isotopes == 1 and self._check_isotopes(reaction): self.logger.info('Reaction {0}: Isotopes were found..'.format(reaction.meta[self._id_tag])) return elif self._action_on_isotopes == 2 and self._check_isotopes(reaction): reaction.clean_isotopes() self.logger.info('Reaction {0}: Isotopes were removed but the reaction was kept..'.format( reaction.meta[self._id_tag])) except: self.logger.exception('Reaction {0}: Cannot check for isotopes..'.format(reaction.meta[self._id_tag])) if not self._skip_errors: raise Exception('Reaction {0}: Cannot check for isotopes..'.format(reaction.meta[self._id_tag])) else: return try: reaction, return_code = self._split_ions(reaction) if return_code == 1: self.logger.info('Reaction {0}: Ions were split..'.format(reaction.meta[self._id_tag])) elif return_code == 2: self.logger.info('Reaction {0}: Ions were split but the reaction is imbalanced..'.format( reaction.meta[self._id_tag])) if not self._keep_unbalanced_ions: return except: self.logger.exception('Reaction {0}: Cannot group ions..'.format(reaction.meta[self._id_tag])) if not self._skip_errors: raise Exception('Reaction {0}: Cannot group ions..'.format(reaction.meta[self._id_tag])) else: return try: reaction.thiele() except: self.logger.exception('Reaction {0}: Cannot aromatize..'.format(reaction.meta[self._id_tag])) if not self._skip_errors: raise Exception('Reaction {0}: Cannot aromatize..'.format(reaction.meta[self._id_tag])) else: return try: reaction.fix_mapping() except: self.logger.exception('Reaction {0}: Cannot fix mapping..'.format(reaction.meta[self._id_tag])) if not self._skip_errors: raise Exception('Reaction {0}: Cannot fix mapping..'.format(reaction.meta[self._id_tag])) else: return try: if self._remove_unchanged_parts_flag: reaction = self._remove_unchanged_parts(reaction) if not reaction.reactants and reaction.products: self.logger.info('Reaction {0}: Reactants are empty..'.format(reaction.meta[self._id_tag])) return if not reaction.products and reaction.reactants: self.logger.info('Reaction {0}: Products are empty..'.format(reaction.meta[self._id_tag])) return if not reaction.reactants and not reaction.products: self.logger.exception( 'Reaction {0}: Cannot remove unchanged parts or the reaction is empty..'.format( reaction.meta[self._id_tag])) return except: self.logger.exception('Reaction {0}: Cannot remove unchanged parts or the reaction is empty..'.format( reaction.meta[self._id_tag])) if not self._skip_errors: raise Exception('Reaction {0}: Cannot remove unchanged parts or the reaction is empty..'.format( reaction.meta[self._id_tag])) else: return self.logger.debug('Reaction {0} is done..'.format(reaction.meta[self._id_tag])) return reaction
def add_reagent(self, action): print('self STATE', self.state) action = self.map[action] if action == 'next': if self.reactions_list: if len(self.reactions_list) == 1: reaction = self.reactions_list[0] state = reaction.products[0] reward = reaction.meta['tanimoto'] if self.path: self.path[ -1] = reaction # заменяем последнюю реакцию в пути else: self.path.append(reaction) self.reactions_list = self.saved_reactions return state, reward, { 'info': 'the last molecule at the list' } else: reaction = self.reactions_list.pop(0) state = reaction.products[0] reward = reaction.meta['tanimoto'] if self.path: self.path[ -1] = reaction # заменяем последнюю реакцию в пути else: self.path.append(reaction) return state, reward, {} else: if self.state is None: return None, -1, {'info': 'no reaction products found'} else: reward = evaluation(self.state, self.target) return self.state, reward, { 'info': 'no another reaction products at the list' } if action == 'none': # однореагентная реакция if self.state is None: return None, -1, {'info': 'no current molecule'} else: reactions_list = [] groups_list = group_list(self.state, self.db) rules = reactions_by_fg(groups_list) for rule in rules: reactor = Reactor(rule, delete_atoms=True) reaction = next(reactor([self.state]), None) if reaction: # for new_mol in reactions[0].products: # print('!self.state, new_mol!', self.state, new_mol) reactions_list.append((reaction, rule)) else: reactions_list = [] with db_session: reagent = self.db.Molecule[action].structure if self.state: # print('if self.state') groups_list = group_list(self.state, self.db) rules = reactions_by_fg(groups_list, single=False) # print('RULES', rules) for rule in rules: reactor = Reactor(rule, delete_atoms=True) reaction = next(reactor([self.state, reagent]), None) # print('REACTIONS', reactions) if reaction: # for new_mol in reactions[0].products: # print('!(2) self.state, new_mol!', self.state, new_mol) reactions_list.append((reaction, rule)) else: self.state = reagent # print('reagent', reagent) # print('LOGPPPPPPPPPP', logp(reagent)) reward = evaluation(self.state, self.target) return self.state, reward, { 'info': 'the first molecule in the path' } # groups_list = group_list(reagent, self.db) # rules = reactions_by_fg(groups_list) # # print('RULES', rules) # for rule in rules: # reactor = Reactor(rule, delete_atoms=True) # reaction = next(reactor([reagent]), None) # # print('REACTIONS', reactions) # if reaction: # # for new_mol in reactions[0].products: # # print('!(1) new_mol!', new_mol) # reactions_list.append((reaction, rule)) if reactions_list: reactions_list = list(set(reactions_list)) react_list = [] for i in reactions_list: product = max(i[0].products, key=lambda x: len(list(x.atoms()))) meta = { 'tanimoto': evaluation(product, self.target), 'rule': i[1] } new_reaction = ReactionContainer(reactants=i[0].reactants, products=[product], meta=meta) react_list.append(new_reaction) # print('REACT LIST1', len(reactions_list), reactions_list) reactions_list = best_n_molecules(react_list, 10) print('REACT LIST 10 best', (len(reactions_list)), reactions_list) self.saved_reactions = reactions_list if len(reactions_list) > 1: reaction = reactions_list.pop(0) state = reaction.products[0] reward = reaction.meta['tanimoto'] self.path.append(reaction) self.reactions_list = reactions_list return state, reward, {} else: reaction = reactions_list[0] state = reaction.products[0] reward = reaction.meta['tanimoto'] self.path.append(reaction) self.reactions_list = reactions_list return state, reward, {'info': 'the last molecule at the list'} else: reward = evaluation(self.state, self.target) return self.state, reward, { 'info': 'no new reaction products at the list' }
def __init__(self, structure, user, special=None): """ storing reaction in DB. :param structure: CGRtools ReactionContainer :param user: user entity :param special: Json serializable Data (expected dict) """ super().__init__(user=user) # preload all molecules and structures signatures = {bytes(m) for m in structure.reagents } | {bytes(m) for m in structure.products} ms, s2ms = defaultdict(list), {} for x in select(x for x in self._database_.MoleculeStructure if x.molecule in select( y.molecule for y in self._database_.MoleculeStructure if y.signature in signatures)).prefetch( self._database_.Molecule): # NEED PR # select(y for x in db.MoleculeStructure if x.signature in signatures_set # for y in db.MoleculeStructure if y.molecule == x.molecule) if x.signature in signatures: s2ms[x.signature] = x if x.last: x.molecule._cached_structure = x ms[x.molecule].append(x) for m, s in ms.items(): m._cached_structures_all = tuple(s) combinations, duplicates = [], {} for sl, is_p in ((structure.reagents, False), (structure.products, True)): for s in sl: sig = bytes(s) ms = s2ms.get(sig) if ms: mapping = ms.structure.get_mapping(s) self._database_.MoleculeReaction(reaction=self, molecule=ms.molecule, is_product=is_p, mapping=mapping) # first MoleculeStructure always last if ms.last: # last structure equal to reaction structure c = [s] c.extend( x.structure.remap(mapping, copy=True) for x in ms.molecule.all_editions if not x.last) else: # last structure remapping c = [ms.molecule.structure.remap(mapping, copy=True)] c.extend( x.structure.remap(mapping, copy=True ) if x != ms else s for x in ms.molecule.all_editions if not x.last) combinations.append(c) else: # New molecule if sig not in duplicates: m = duplicates[sig] = self._database_.Molecule(s, user) mapping = None else: m = duplicates[sig] mapping = m.structure.get_mapping(s) self._database_.MoleculeReaction(reaction=self, molecule=m, is_product=is_p, mapping=mapping) combinations.append([s]) reagents_len = len(structure.reagents) combinations = tuple(product(*combinations)) if len(combinations) == 1: # optimize self._cached_structures_all = (structure, ) self._cached_structure = structure self._database_.ReactionIndex(self, structure, True) else: x = combinations[0] self._cached_structure = ReactionContainer( reagents=x[:reagents_len], products=x[reagents_len:]) self._database_.ReactionIndex(self, self._cached_structure, True) cgr = {} for x in combinations[1:]: x = ReactionContainer(reagents=x[:reagents_len], products=x[reagents_len:]) cgr[~x] = x self._cached_structures_all = (self._cached_structure, *cgr.values()) for x in cgr: self._database_.ReactionIndex(self, x, False) if special: self.special = special