def test_example_wang_4(self) -> None: uncapped_molecule = Molecule( { 1: Atom(index=1, element='O', valence=1, capped=True, coordinates=None), 2: Atom(index=2, element='C', valence=3, capped=True, coordinates=None), 3: Atom(index=3, element='C', valence=3, capped=False, coordinates=None), 4: Atom(index=4, element='C', valence=3, capped=False, coordinates=None), 5: Atom(index=5, element='C', valence=3, capped=False, coordinates=None), 6: Atom(index=6, element='N', valence=2, capped=True, coordinates=None), }, [ (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 2), ], name='example_wang_4', ) uncapped_molecule.write_graph('uncapped_molecule') for use_ILP in (True, False): capped_molecule = getattr(uncapped_molecule, CAPPING_FUNCTION_NAME)(debug=None) capped_molecule.write_graph('capped_molecule_with_{0}'.format( 'ILP' if use_ILP else 'bruteforce')) assert capped_molecule.formula( charge=True) == 'C4H3NO', capped_molecule.formula(charge=True)
def test_example_2(self) -> None: uncapped_molecule = Molecule( { 1: Atom(index=1, element='C', valence=3, capped=False, coordinates=None), 2: Atom(index=2, element='O', valence=1, capped=False, coordinates=None), }, [ (1, 2), ], name='example_2', ) uncapped_molecule.write_graph('uncapped_molecule') for use_ILP in (True, ): capped_molecule = getattr(uncapped_molecule, CAPPING_FUNCTION_NAME)(debug=None) capped_molecule.write_graph('capped_molecule_with_{0}'.format( 'ILP' if use_ILP else 'bruteforce')) assert capped_molecule.formula( charge=True) == 'CH2O', capped_molecule.formula(charge=True)
def test_example_0(self) -> None: molecule = Molecule( [ Atom(index=1, element='C', valence=3, capped=True, coordinates=None), Atom(index=2, element='O', valence=1, capped=True, coordinates=None), Atom(index=3, element='O', valence=2, capped=True, coordinates=None), Atom(index=4, element='H', valence=1, capped=True, coordinates=None), Atom(index=5, element='H', valence=1, capped=True, coordinates=None), ], [ (1, 2), (1, 3), (3, 4), (1, 5), ], name='methoanoic acid', ) molecule.write_graph('raw_molecule') molecule.assign_bond_orders_and_charges_with_ILP() molecule.write_graph('molecule_with_electrons')
molecule.get_all_tautomers( **OPTIONS[molecule_name] if molecule_name in OPTIONS else {})) else: print( molecule.assign_bond_orders_and_charges_with_ILP( enforce_octet_rule=True)) print( molecule.write_graph(molecule_name, output_size=(int(2100 / 1.5), int(2970 / 1.5)))) if molecule_name == 'warfarin': print(molecule) print() molecule = Molecule([ Atom(index=1, element='C', valence=3, capped=True, coordinates=None), Atom(index=2, element='C', valence=3, capped=True, coordinates=None) ], [(1, 2)]) print(molecule.get_all_tautomers()) print(molecule.write_graph('ethene', output_size=(200, 200))) molecule = Molecule([ Atom(index=1, element='H', valence=1, capped=True, coordinates=None), Atom(index=2, element='O', valence=1, capped=True, coordinates=None) ], [(1, 2)], netcharge=0, name='hydroxyl_radical') print(molecule.assign_bond_orders_and_charges_with_ILP()) print(molecule.write_graph('', output_size=(200, 200)))
def test_example_wang_1(self) -> None: uncapped_molecule = Molecule( { 1: Atom(index=1, element='C', valence=3, capped=True, coordinates=None), 2: Atom(index=2, element='C', valence=3, capped=True, coordinates=None), 3: Atom(index=3, element='C', valence=3, capped=True, coordinates=None), 4: Atom(index=4, element='C', valence=3, capped=True, coordinates=None), 5: Atom(index=5, element='C', valence=3, capped=True, coordinates=None), 6: Atom(index=6, element='C', valence=3, capped=True, coordinates=None), 7: Atom(index=7, element='H', valence=1, capped=True, coordinates=None), 8: Atom(index=8, element='H', valence=1, capped=True, coordinates=None), 9: Atom(index=9, element='H', valence=1, capped=True, coordinates=None), 10: Atom(index=10, element='H', valence=1, capped=True, coordinates=None), 11: Atom(index=11, element='H', valence=1, capped=True, coordinates=None), 12: Atom(index=12, element='O', valence=1, capped=True, coordinates=None), }, [ (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 1), (7, 1), (8, 2), (9, 3), (10, 4), (11, 5), (12, 6), ], name='example_wang_1', ) uncapped_molecule.write_graph('uncapped_molecule') for use_ILP in (True, False): capped_molecule = getattr(uncapped_molecule, CAPPING_FUNCTION_NAME)(debug=None) capped_molecule.write_graph('capped_molecule_with_{0}'.format( 'ILP' if use_ILP else 'bruteforce')) assert capped_molecule.formula( charge=True) == 'C6H5O 1-', capped_molecule.formula(charge=True)
def test_example_taxol_core(self) -> None: uncapped_molecule = Molecule( { 5: Atom(index=5, element="C", valence=3, capped=False, coordinates=(0.746, 3.138, 0.794)), 7: Atom(index=7, element="O", valence=2, capped=True, coordinates=(1.175, 1.853, 0.61)), 8: Atom(index=8, element="C", valence=4, capped=True, coordinates=(1.672, 1.0, 1.641)), 9: Atom(index=9, element="C", valence=4, capped=True, coordinates=(2.696, 1.617, 2.644)), 10: Atom(index=10, element="H", valence=1, capped=True, coordinates=(2.871, 2.705, 2.545)), 11: Atom(index=11, element="H", valence=1, capped=True, coordinates=(3.667, 1.113, 2.766)), 12: Atom(index=12, element="O", valence=2, capped=True, coordinates=(1.805, 1.347, 3.756)), 13: Atom(index=13, element="C", valence=4, capped=True, coordinates=(0.741, 0.845, 2.897)), 14: Atom(index=14, element="H", valence=1, capped=True, coordinates=(-0.111, 1.567, 2.942)), 15: Atom(index=15, element="C", valence=4, capped=False, coordinates=(0.262, -0.558, 3.171)), 18: Atom(index=18, element="C", valence=4, capped=False, coordinates=(1.353, -1.632, 3.131)), 20: Atom(index=20, element="O", valence=2, capped=True, coordinates=(1.998, -1.605, 4.383)), 21: Atom(index=21, element="H", valence=1, capped=True, coordinates=(2.798, -2.116, 4.304)), 22: Atom(index=22, element="C", valence=4, capped=False, coordinates=(2.315, -1.541, 1.879)), 29: Atom(index=29, element="C", valence=4, capped=True, coordinates=(1.951, -0.352, 0.936)), 30: Atom(index=30, element="H", valence=1, capped=True, coordinates=(0.931, -0.608, 0.548)), 31: Atom(index=31, element="C", valence=4, capped=True, coordinates=(2.911, -0.285, -0.293)), 32: Atom(index=32, element="H", valence=1, capped=True, coordinates=(3.702, -1.082, -0.163)), 33: Atom(index=33, element="O", valence=2, capped=False, coordinates=(3.52, 1.006, -0.367)), 101: Atom(index=101, element="C", valence=4, capped=False, coordinates=(0.908, -3.278, 0.308)), 47: Atom(index=47, element="C", valence=4, capped=True, coordinates=(2.191, -0.548, -1.701)), 48: Atom(index=48, element="O", valence=2, capped=True, coordinates=(3.132, -0.275, -2.729)), 49: Atom(index=49, element="H", valence=1, capped=True, coordinates=(3.241, 0.668, -2.768)), 50: Atom(index=50, element="C", valence=4, capped=False, coordinates=(0.937, 0.345, -1.879)), 53: Atom(index=53, element="C", valence=4, capped=False, coordinates=(1.845, -2.08, -1.847)), 54: Atom(index=54, element="C", valence=3, capped=True, coordinates=(0.733, -2.32, -0.843)), 59: Atom(index=59, element="C", valence=3, capped=False, coordinates=(-0.388, -1.56, -0.899)) }, { frozenset({5, 7}), frozenset({13, 15}), frozenset({18, 20}), frozenset({18, 15}), frozenset({50, 47}), frozenset({18, 22}), frozenset({8, 29}), frozenset({8, 9}), frozenset({32, 31}), frozenset({48, 47}), frozenset({9, 10}), frozenset({29, 30}), frozenset({29, 31}), frozenset({9, 11}), frozenset({53, 54}), frozenset({9, 12}), frozenset({20, 21}), frozenset({48, 49}), frozenset({29, 22}), frozenset({33, 31}), frozenset({31, 47}), frozenset({12, 13}), frozenset({13, 14}), frozenset({59, 54}), frozenset({8, 7}), frozenset({53, 47}), frozenset({101, 54}), frozenset({8, 13}) }, name='example_taxol_core', ) uncapped_molecule.write_graph('uncapped_molecule', output_size=(1200, 1200)) for use_ILP in (True, ): capped_molecule = getattr(uncapped_molecule, CAPPING_FUNCTION_NAME)(debug=None) capped_molecule.write_graph('capped_molecule_with_{0}'.format( 'ILP' if use_ILP else 'bruteforce'), output_size=(1200, 1200)) assert capped_molecule.formula( charge=True) == 'C16H26O5', capped_molecule.formula(charge=True)
CAN_FAIL, CAN_NOT_FAIL = True, False USE_OCTET_RULE, DO_NOT_USE_OCTET_RULE = True, False ALL_EXAMPLES = { Molecule( [ Atom(index=1, element='C', valence=4, capped=True, coordinates=None), Atom(index=2, element='C', valence=3, capped=True, coordinates=None), Atom(index=3, element='H', valence=None, capped=False, coordinates=None), Atom(index=4, element='H', valence=None, capped=False, coordinates=None), Atom(index=5, element='H', valence=None, capped=False, coordinates=None), Atom(index=6, element='N', valence=None, capped=False, coordinates=None), Atom(index=7, element='C', valence=None, capped=False, coordinates=None), ], [ (1, 2), (1, 3), (1, 4), (1, 5), (2, 6), (2, 7), ], name='H,H,H_C_C_N,C', ): (0, 0, 'C3H7N', USE_OCTET_RULE, CAN_NOT_FAIL), Molecule( [ Atom(index=1, element='C', valence=4, capped=True, coordinates=None), Atom(index=2, element='C', valence=4, capped=True, coordinates=None), Atom(index=3, element='H', valence=None, capped=False, coordinates=None), Atom(index=4, element='H', valence=None, capped=False, coordinates=None),
def uncapped_molecule_for_dihedral_fragment(dihedral_fragment: Fragment, debug: bool = False ) -> Uncapped_Molecule: if dihedral_fragment.count('|') == 3: neighbours_1, atom_2, atom_3, neighbours_4 = dihedral_fragment.split( '|') cycles = [] neighbours_1, neighbours_4 = neighbours_1.split( ','), neighbours_4.split(',') elif dihedral_fragment.count('|') == 4: neighbours_1, atom_2, atom_3, neighbours_4, cycles = dihedral_fragment.split( '|') neighbours_1, neighbours_4, cycles = neighbours_1.split( ','), neighbours_4.split(','), cycles.split(',') else: raise Exception( 'Invalid dihedral_fragment: "{0}"'.format(dihedral_fragment)) ids = [ n for (n, _) in enumerate(neighbours_1 + [atom_2, atom_3] + neighbours_4, start=1) ] neighbours_id_1, atom_id_2, atom_id_3, neighbours_id_4 = ids[0:len( neighbours_1)], ids[len(neighbours_1)], ids[len(neighbours_1) + 1], ids[len(neighbours_1) + 2:] CENTRAL_BOND = (atom_id_2, atom_id_3) elements = dict( list( zip( ids, [ element_valence_for_atom(neighbour)[0] for neighbour in neighbours_1 ] + [atom_2, atom_3] + [ element_valence_for_atom(neighbour)[0] for neighbour in neighbours_4 ], )), ) valences = dict( list( zip( ids, [ element_valence_for_atom(neighbour)[1] for neighbour in neighbours_1 ] + [len(neighbours_1) + 1, len(neighbours_4) + 1] + [ element_valence_for_atom(neighbour)[1] for neighbour in neighbours_4 ], )), ) bonds = ([(neighbour_id, atom_id_2) for neighbour_id in neighbours_id_1] + [CENTRAL_BOND] + [(atom_id_3, neighbour_id) for neighbour_id in neighbours_id_4]) def coordinates_for_atom_id(atom_id: int, d: float = 1.5) -> Tuple[float, float, float]: if atom_id == atom_id_2: return (-d / 2, 0, 0) elif atom_id == atom_id_3: return (d / 2, 0, 0) else: e = 0.5 * d f = sqrt(d**2 - e**2) assert e**2 + f**2 == d**2, (e**2 + f**2, d**2) if atom_id in neighbours_id_1: left_theta = 2 * pi / len( neighbours_id_1) * neighbours_id_1.index(atom_id) return (-d / 2 - e, -f * cos(left_theta), f * sin(left_theta)) elif atom_id in neighbours_id_4: right_theta = 2 * pi / len( neighbours_id_4) * neighbours_id_4.index(atom_id) return (d / 2 + e, f * cos(right_theta), f * sin(right_theta)) else: raise Exception('Impossible id: {0}'.format(atom_id)) molecule = Molecule( dict( list( zip( ids, [ Atom( index=atom_id, element=elements[atom_id], valence=valences[atom_id], capped=atom_id not in (neighbours_id_1 + neighbours_id_4), coordinates=coordinates_for_atom_id(atom_id), ) for atom_id in ids ], ))), bonds, name=dihedral_fragment.replace('|', '_'), ) if debug: print(molecule) for (i, n, j) in map(lambda cycle: map(int, cycle), cycles): i_id, j_id = neighbours_id_1[i], neighbours_id_4[j] if n == 0: # i and j are actually the same atoms del molecule.atoms[j_id] replace_j_by_i = lambda x: i_id if x == j_id else x molecule.bonds = { frozenset(map(replace_j_by_i, bond)) for bond in molecule.bonds } else: NEW_ATOM_ID = -1 NEW_ATOM = Atom( index= NEW_ATOM_ID, # This will get overwritten by Molecule.add_atom element='C', valence=NO_VALENCE, capped=False, coordinates=None, ) atom_chain_id = [i_id] + [ molecule.add_atom(NEW_ATOM) for i in range(n - 1) ] + [j_id] new_bonds = zip(atom_chain_id[:-1], atom_chain_id[1:]) molecule.add_bonds(new_bonds) if debug: print(molecule) return molecule