def test_enhance_atoms(): p_table = Get_periodic_table() ##### mols = dmy.get_rndethane_mols(distance=True) atoms = GNR.make_atom_df(mols) structure_dict = GNR.make_struc_dict(atoms) BCAI.enhance_structure_dict(structure_dict) ########### BCAI.enhance_atoms(atoms, structure_dict) for i, idx in enumerate(atoms['atom_index'].values): molid = atoms['molecule_name'][i] mol = 0 for ml_fnd in mols: if ml_fnd.molid == molid: mol = ml_fnd atid = atoms['atom_index'][i] assert p_table.index(atoms['typestr'][i]) == mol.types[atid] assert np.array_equal(atoms['conn'][i], mol.conn[atid]) assert np.array_equal(atoms['distance'][i], mol.dist[atid])
def flag_to_target(flag): p_table = Get_periodic_table() if len(flag) == 4: if str(flag[0]) == 'J': length = int(flag[1]) else: length = int(flag[0]) atype1 = int(p_table.index(str(flag[2]))) atype2 = int(p_table.index(str(flag[3]))) if atype1 >= atype2: return [length, atype1, atype2] else: return [length, atype2, atype1] elif len(flag) == 3: atype = int(p_table.index(str(flag[0]))) return [atype] else: print('flag, ', flag, ' not recognised, coupling flag format is <nJxy> . . .') print('flag, ', flag, ' not recognised, chemical shift flag format is <XCS> . . .') return 0
def test_make_bonds_df(): p_table = Get_periodic_table() ##### mols = dmy.get_rndethane_mols() ##### bonds = GNR.make_bonds_df(mols) assert len(bonds["molecule_name"].unique()) == len(mols) for idx, bond in enumerate(bonds): molid = bonds['molecule_name'][idx] at1 = bonds['atom_index_0'][idx] at2 = bonds['atom_index_1'][idx] mol = 0 for ml_fnd in mols: if ml_fnd.molid == molid: mol = ml_fnd assert mol.coupling_len[at1][at2] == int(bonds['type'][idx][0]) assert mol.coupling[at1][at2] == bonds['scalar_coupling_constant'][idx]
def make_optin(prefs, molname, xyz, types, path=''): # Input: # prefs: preferences dictionary # molname: name of molecule # xyz: xyz coordinates of conformer # types: type list of conformer (numeric) # path: path to molecule folder # Returns: filename for input file # Get preferences from prefs charge = prefs['mol']['charge'] multiplicity = prefs['mol']['multiplicity'] functional = prefs['optimisation']['functional'] basis_set = prefs['optimisation']['basisset'] solvent = prefs['optimisation']['solvent'] direct_cmd_line_opt = prefs['optimisation']['custom_cmd_line'] processors = prefs['optimisation']['processors'] # Get periodic table Periodic_table = Get_periodic_table() # Define instruction line for ORCA instr = '! ' + str(functional) + ' ' + str(basis_set) + ' TightSCF OPT miniprint' # Add parallel option if multiple processors requested if processors != 1: instr += ' PAL{0:<d}'.format(processors) # Add solvent model/solvent if requested if solvent != 'none': instr += ' CPCM(' + solvent + ')' # If direct line input specified then overwrite all of this if direct_cmd_line_opt: instr = direct_cmd_line_opt # Define input file path/name infile = path.strip() + molname.strip() + '_OPT.in' # Construct file strings strings = [] strings.append(instr) strings.append('') strings.append("* xyz {0:<1d} {1:<1d}".format(charge, multiplicity)) for i in range(len(xyz)): str_type = Periodic_table[types[i]] string = " {0:<2s} {1:>10.5f} {2:>10.5f} {3:>10.5f}".format(str_type, xyz[i][0], xyz[i][1], xyz[i][2]) strings.append(string) strings.append('*') strings.append('') strings.append('%geom') strings.append(' AddExtraBonds true # switch on/off assigning bonds to atom pairs that are') strings.append(' # connected by more than <Max_Length> bonds and are less') strings.append(' # than <MaxDist> Ang. apart (default true)') strings.append(' AddExtraBonds_MaxLength 10 # cutoff for number of bonds connecting the two') strings.append(' # atoms (default 10)') strings.append(' AddExtraBonds_MaxDist 5 # cutoff for distance between two atoms (default 5 Ang.)') strings.append('end') # Write file with open(infile, 'w') as f_handle: for string in strings: print(string, file=f_handle) return infile
def make_nmrin(prefs, molname, xyz, types, path=''): # Input: # prefs: preferences dictionary # molname: name of molecule # xyz: xyz coordinates of conformer # types: type list of conformer (numeric) # path: path to molecule folder # Returns: input file path/name # Get values from preferences charge = prefs['mol']['charge'] multiplicity = prefs['mol']['multiplicity'] functional = prefs['NMR']['functional'] basis_set = prefs['NMR']['basisset'] aux_basis_set = prefs['NMR']['aux_basis_set'] solvent = prefs['NMR']['solvent'] direct_cmd_line_nmr = prefs['NMR']['custom_cmd_line'] processors = prefs['NMR']['processors'] # Get periodic table Periodic_table = Get_periodic_table() # Construct instruction line for ORCE instr = '! ' + str(functional) + ' ' + str(basis_set) + ' ' + str(aux_basis_set) + ' TightSCF miniprint' + ' NMR ' # Add parallel option if multiple processors requested if processors != 1: instr += ' PAL{0:<d}'.format(processors) # Add solvent model/solvent if requested if solvent != 'none': instr += ' CPCM(' + solvent + ')' # If direct line input specified then overwrite all of this if direct_cmd_line_nmr: instr = direct_cmd_line_nmr # Define input file path/name infile = path.strip() + molname.strip() + '_NMR.in' # Construct file strings strings = [] strings.append(instr) strings.append("") strings.append("* xyz {0:<1d} {1:<1d}".format(charge, multiplicity)) for i in range(len(xyz)): str_type = Periodic_table[types[i]] string = " {0:<2s} {1:>10.6f} {2:>10.6f} {3:>10.6f}".format(str_type, xyz[i][0], xyz[i][1], xyz[i][2]) strings.append(string) strings.append('*') strings.append('%eprnmr') # Needed for the functional we commonly use, ORCA shouted at me strings.append(" GIAO_2el = GIAO_2el_RIJCOSX") for type in prefs['NMR']['shift_nuclei']: strings.append(" Nuclei = all {type:<2s}".format(type=type) + ' { shift }') for type in prefs['NMR']['spin_nuclei']: strings.append(" Nuclei = all {type:<2s}".format(type=type) + ' { ssall }') strings.append('SpinSpinRThresh {0:<f}'.format(prefs['NMR']['spin_thresh'])) strings.append('end') # Write file with open(infile, 'w') as f_handle: for string in strings: print(string, file=f_handle) return infile
def print_mol_csv(outname, refs, typerefs, values, labels): # Input: # outname: name of output file # refs: molecular property references (m x k) m = number of properties, # k = number of atoms in reference + 1, # ref[y, z] = (molid, atomid1, atomid2, atomid3, . . . ) # typerefs: atom types corresponding to the atom ids in refs (m x k) # ref[y, z] = (molid, atomtype1, atomtype2, atomtype3, . . . ) # values: molecular properties (n, m, k) # labels: Labels for molecule sets # Returns: None # start empty line array (to print at the end) lines = [] # get periodic table array p_table = Get_periodic_table() # Get number of molecule sets sets = len(refs[0]) # Get first part of header string (molecule set labels for mol IDs) idstring = "" for x in range(len(refs[0])): idstring = idstring + "{label:<s}MOLID,".format(label=labels[x]) # Get second part of header string (atom references) refstring = "" for y in range(1, len(refs[0][0])): refstring = refstring + "{atom:<s},{type:<s},".format( atom='Atom', type='Type', ) # Get third part of header string (molecule set labels for values) valstring = "" for z in range(len(values[0])): valstring = valstring + "{label:<s}VALUE,".format(label=labels[z]) # Add header string lines.append(idstring + refstring + valstring) # Loop through property references for i in range(len(refs)): # Get molid string idstring = "" for x in range(len(refs[i])): idstring = idstring + "{id:<s},".format(id=refs[i][x][0]) # Get atomic ref string (atomid, atomtype) x number of atom references refstring = "" for y in range(1, len(refs[0][0])): refstring = refstring + "{atom:<s},{type:<s},".format( atom=str(refs[i][0][y]), type=p_table[typerefs[i][0][y - 1]]) # Get value string (value1, value2, . . .) valstring = "" for z in range(len(values[i])): valstring = valstring + "{value:<.4f},".format(value=values[i][z]) # Construct line and add to list lines.append(idstring + refstring + valstring) # Print all lines to file with open(outname, 'w') as f: for line in lines: print(line, file=f)
def scale_shifts(self, scaling_factors={}): periodic_table = Get_periodic_table() for nucleus, factor in scaling_factors.items(): if nucleus in ['basis_set', 'functional']: continue for i in range(len(self.shift)): if periodic_table[self.types[i]] == nucleus: self.shift[i] = (self.shift[i] - factor[1]) / float( factor[0])
def labelmaker(i, j, mol): Periodic_table = Get_periodic_table() lent = mol.coupling_len[i][j] label = str(lent) + str('J') if mol.types[int(i)] >= mol.types[int(j)]: label = label + str(Periodic_table[mol.types[int(i)]]) + str(Periodic_table[mol.types[int(j)]]) else: label = label + str(Periodic_table[mol.types[int(j)]]) + str(Periodic_table[mol.types[int(i)]]) return label
def mol_read_type(mol): type_list = [] type_array = np.zeros(len(mol.atoms), dtype=np.int32) Periodic_table = Get_periodic_table() for i in range(len(mol.atoms)): type = int(mol.atoms[i].atomicnum) type_array[i] = type type_list.append(Periodic_table[type]) return type_list, type_array
def test_make_triplets(): p_table = Get_periodic_table() ##### mols = dmy.get_rndethane_mols(distance=True) atoms = GNR.make_atom_df(mols) structure_dict = GNR.make_struc_dict(atoms) BCAI.enhance_structure_dict(structure_dict) BCAI.enhance_atoms(atoms, structure_dict) bonds = GNR.make_bonds_df(mols) BCAI.enhance_bonds(bonds, structure_dict, flag='3JHH') ############ triplets = BCAI.make_triplets(bonds["molecule_name"].unique(), structure_dict) assert len(triplets["molecule_name"].unique()) == len(mols) count = 0 for mol in mols: for atom1, type1 in enumerate(mol.types): for atom2, type2 in enumerate(mol.types): if atom1 == atom2: continue for atom3, type3 in enumerate(mol.types): if atom3 in [atom1, atom2] or atom3 < atom2: continue if mol.conn[atom1][atom2] != 1 or mol.conn[atom1][ atom3] != 1: continue row = triplets.loc[(triplets.molecule_name == mol.molid) & (triplets.atom_index_0 == atom1) & (triplets.atom_index_1 == atom2) & (triplets.atom_index_2 == atom3)] assert len(row.index) == 1 ba = mol.xyz[atom2] - mol.xyz[atom1] bc = mol.xyz[atom3] - mol.xyz[atom1] angle = np.sum( ba * bc) / (np.linalg.norm(ba) * np.linalg.norm(bc)) angle = np.arccos(np.clip(angle, -1.0, 1.0)) assert angle == row.angle.values count += 1 assert count == len(triplets.index)
def target_to_flag(target): p_table = Get_periodic_table() if len(target) == 3: flag = str(target[0]) + 'J' + str(p_table[target[1]]) + str( p_table[target[2]]) elif len(target) == 1: flag = str(target[0]) + 'CS' else: print('Error, target ', target, ' not recognised') return flag
def test_make_struc_dict(): p_table = Get_periodic_table() mols = dmy.get_rndethane_mols() atoms = GNR.make_atom_df(mols) structure_dict = GNR.make_struc_dict(atoms) assert len(structure_dict.keys()) == len(mols) for mol in mols: assert structure_dict[mol.molid]['typesstr'] == [p_table[type] for type in mol.types] assert np.array_equal(structure_dict[mol.molid]['positions'], mol.xyz) assert np.array_equal(structure_dict[mol.molid]['conn'], mol.conn)
def test_enhance_bonds(): p_table = Get_periodic_table() ##### mols = dmy.get_rndethane_mols(distance=True) atoms = GNR.make_atom_df(mols) structure_dict = GNR.make_struc_dict(atoms) BCAI.enhance_structure_dict(structure_dict) BCAI.enhance_atoms(atoms, structure_dict) bonds = GNR.make_bonds_df(mols) ############ BCAI.enhance_bonds(bonds, structure_dict, flag='3JHH') for idx, bond in enumerate(bonds): molid = bonds['molecule_name'][idx] at1 = bonds['atom_index_0'][idx] at2 = bonds['atom_index_1'][idx] mol = 0 for ml_fnd in mols: if ml_fnd.molid == molid: mol = ml_fnd assert mol.coupling_len[at1][at2] == int(bonds['type'][idx][0]) assert mol.coupling[at1][at2] == bonds['scalar_coupling_constant'][idx] if bonds['labeled_type'][idx] == '3JHH': assert bonds['predict'][idx] == 1 else: assert bonds['predict'][idx] == 0 for mol in mols: for atom1, type1 in enumerate(mol.types): for atom2, type2 in enumerate(mol.types): if atom1 == atom2: continue row = bonds.loc[(bonds['molecule_name'] == mol.molid) & (bonds['atom_index_0'] == atom1) & (bonds['atom_index_1'] == atom2)] cpl = row['scalar_coupling_constant'].values if type1 == 1 and type2 == 1 and mol.coupling_len[atom1][ atom2] == 3: assert row.predict.values == 1 assert mol.coupling[atom1][atom2] == cpl[0]
def make_atom_df(mols): p_table = Get_periodic_table() # construct dataframe as BCAI requires from mols # atoms has: molecule_name, atom, labeled atom, molecule_name = [] # molecule name atom_index = [] # atom index atom = [] # atom type (letter) x = [] # x coordinate y = [] # y coordinate z = [] # z coordinate conns = [] mol_order = [] m = -1 for molrf in tqdm(mols, desc='Constructing atom dictionary'): m += 1 if len(mols) > 2000: mol = nmrmol(molid=molrf[1]) if molrf[2] == '': ftype = get_type(molrf[2]) else: ftype = molrf[2] mol.read_nmr(molrf[0], ftype) else: mol = molrf mol_order.append(mol.molid) for t, type in enumerate(mol.types): molecule_name.append(mol.molid) atom_index.append(t) atom.append(p_table[type]) x.append(mol.xyz[t][0]) y.append(mol.xyz[t][1]) z.append(mol.xyz[t][2]) conns.append(mol.conn[t]) atoms = { 'molecule_name': molecule_name, 'atom_index': atom_index, 'atom': atom, 'x': x, 'y': y, 'z': z, 'conn': conns, } atoms = pd.DataFrame(atoms) return atoms
def nmrmol_to_xyz(mol, outname, num=-404): periodic_table = Get_periodic_table() with open(outname, 'w') as f: print(len(mol.types), file=f) if num == -404: print(mol.molid, file=f) else: string = "{0:<10d}\t{1:<20s}".format(num, mol.molid) print(string, file=f) for i in range(len(mol.types)): string = "{i:<10s}\t{x:<10.6f}\t{y:<10.6f}\t{z:<10.6f}".format( i=periodic_table[mol.types[i]], x=mol.xyz[i][0], y=mol.xyz[i][1], z=mol.xyz[i][2]) print(string, file=f)
def test_enhance_structure_dict(): p_table = Get_periodic_table() ##### mols = dmy.get_rndethane_mols(distance=True) atoms = GNR.make_atom_df(mols) structure_dict = GNR.make_struc_dict(atoms) ##### BCAI.enhance_structure_dict(structure_dict) for mol in mols: assert structure_dict[mol.molid]['typesstr'] == [ p_table[type] for type in mol.types ] assert np.array_equal(structure_dict[mol.molid]['positions'], mol.xyz) assert np.array_equal(structure_dict[mol.molid]['conn'], mol.conn) assert np.array_equal(structure_dict[mol.molid]['distances'], mol.dist)
def test_add_embedding(): p_table = Get_periodic_table() ##### mols = dmy.get_rndethane_mols(distance=True) atoms = GNR.make_atom_df(mols) structure_dict = GNR.make_struc_dict(atoms) BCAI.enhance_structure_dict(structure_dict) BCAI.enhance_atoms(atoms, structure_dict) bonds = GNR.make_bonds_df(mols) BCAI.enhance_bonds(bonds, structure_dict, flag='3JHH') triplets = BCAI.make_triplets(bonds["molecule_name"].unique(), structure_dict) ##### embeddings, atoms, bonds, triplets = BCAI.add_embedding( atoms, bonds, triplets)
def make_atom_df(mols, progress=False): p_table = Get_periodic_table() # construct dataframe as BCAI requires from mols # atoms has: molecule_name, atom, labeled atom, molecule_name = [] # molecule name atom_index = [] # atom index typestr = [] # atom type (string) typeint = [] # atom type (integer) x = [] # x coordinate y = [] # y coordinate z = [] # z coordinate conns = [] shifts = [] mol_order = [] m = -1 if progress: pbar = tqdm(mols, desc='Constructing atom dictionary') else: pbar = mols for molrf in pbar: m += 1 if len(mols) > 2000: mol = nmrmol(molid=molrf[1]) if molrf[2] == '': ftype = get_type(molrf[2]) else: ftype = molrf[2] mol.read_nmr(molrf[0], ftype) else: mol = molrf mol_order.append(mol.molid) for t, type in enumerate(mol.types): molecule_name.append(mol.molid) atom_index.append(t) typestr.append(p_table[type]) typeint.append(type) x.append(mol.xyz[t][0]) y.append(mol.xyz[t][1]) z.append(mol.xyz[t][2]) conns.append(mol.conn[t]) shifts.append(mol.shift[t]) atoms = { 'molecule_name': molecule_name, 'atom_index': atom_index, 'typestr': typestr, 'typeint': typeint, 'x': x, 'y': y, 'z': z, 'conn': conns, 'shift': shifts } atoms = pd.DataFrame(atoms) return atoms
def nmrmol_to_nmredata(mol, outfile): # Get periodic table periodic_table = Get_periodic_table() # Assume molecule is not chiral chiral = 0 # Determine number of bonds bonds = 0 for at1 in range(len(mol.types)): for at2 in range(at1, len(mol.types)): if mol.conn[at1][at2] >= 1: bonds += 1 atoms = len(mol.types) # Start putting together file lines lines = [] # print molecule name if '/' in outfile: lines.append(outfile.split('/')[-1].split('.')[0]) else: lines.append(outfile.split('.')[0]) # print file and author lines.append('auto-ENRICH - 2020 - Will Gerrard') lines.append('') # Structure section string = '{atoms:>3d}{bonds:>3d} 0 0{chiral:>3d} 0 0 0 0 0 1 V2000'.format( atoms=atoms, bonds=bonds, chiral=chiral) lines.append(string) # Print xyz coordinates and types for i, xyz in enumerate(mol.xyz): string = '{x:>10.4f}{y:>10.4f}{z:>10.4f} {typechar:>3s} 0 0 0 0 0 0 0 0 0 0 0 0'.format( x=xyz[0], y=xyz[1], z=xyz[2], typechar=periodic_table[mol.types[i]]) lines.append(string) # Print bonds and bond types for at1 in range(len(mol.types)): for at2 in range(at1, len(mol.types)): if mol.conn[at1][at2] >= 1: string = '{at1:>3d}{at2:>3d}{bond:>3d} 0 0 0 0'.format( at1=at1 + 1, at2=at2 + 1, bond=mol.conn[at1][at2]) lines.append(string) # Terminate structure section lines.append('M\tEND'.format()) # assignment section lines.append('') lines.append('> <NMREDATA_ASSIGNMENT>') # Print chemical shifts with variance for i, shift, type, var in zip(range(len(mol.types)), mol.shift, mol.types, mol.shift_var): string = " {atom:<5d}, {shift:<15.8f}, {type:<5d}, {variance:<15.8f}\\".format( atom=i, shift=shift, type=type, variance=var) lines.append(string) lines.append('') lines.append('> <NMREDATA_J>') # Print couplings with variance and label for i in range(len(mol.types)): for j in range(len(mol.types)): if i >= j: continue if mol.coupling_len[i][j] == 0: continue label = labelmaker(i, j, mol) string = " {a1:<10d}, {a2:<10d}, {coupling:<15.8f}, {label:<10s}, {var:<15.8f}".format( a1=i, a2=j, coupling=mol.coupling[i][j], label=label, var=mol.coupling_var[i][j]) lines.append(string) # Print assembled lines to output file with open(outfile, 'w') as f: for line in lines: print(line, file=f)
def make_bonds_df(mols): p_table = Get_periodic_table() # construct dataframe as BCAI requires from mols # atoms has: molecule_name, atom, labeled atom, id = [] # number molecule_name = [] # molecule name atom_index_0 = [] # atom index for atom 1 atom_index_1 = [] # atom index for atom 2 cpltype = [] # coupling type coupling = [] # coupling value r = [] y = [] i = -1 m = -1 for molrf in tqdm(mols, desc='Constructing bond dictionary'): m += 1 if len(mols) > 2000: mol = nmrmol(molid=molrf[1]) if molrf[2] == '': ftype = get_type(molrf[2]) else: ftype = molrf[2] mol.read_nmr(molrf[0], ftype) else: mol = molrf moly = [] molr = [] for t, type in enumerate(mol.types): for t2, type2 in enumerate(mol.types): if t == t2: continue TFM_flag = str(mol.coupling_len[t] [t2]) + 'J' + p_table[type] + p_table[type2] #if TFM_flag != flag and flag != 'all': # continue i += 1 id.append(i) molecule_name.append(mol.molid) atom_index_0.append(t) atom_index_1.append(t2) cpltype.append(TFM_flag) coupling.append(mol.coupling[t][t2]) moly.append(mol.coupling[t][t2]) molr.append([mol.molid, t, t2]) y.append(moly) r.append(molr) bonds = { 'id': id, 'molecule_name': molecule_name, 'atom_index_0': atom_index_0, 'atom_index_1': atom_index_1, 'type': cpltype, 'scalar_coupling_constant': coupling } bonds = pd.DataFrame(bonds) return bonds