def gen_rl_attach(mol='Oc1nc(O)c2nc3c{0}c{1}c{2}c{3}c3nc2n1', r_l=['', '(S(O)(=O)=O)'], disp=False, graph=False): """ generate molecules with R group fragment get_r_list becomes get_multi_r_list so as to generate molecules with multiple R-group attached. """ N_group = len(re.findall('{[0-9]*}', mol)) # find number of R group positions pdr = get_multi_r_list(N_group, r_l, disp=disp, pdForm=True) so_l = pdr['Rgroups'].tolist() aso_l = [] for so in so_l: aso = mol.format(*so) aso_l.append(aso) if disp: print(so, aso) if graph: jchem.show_mol(aso) pdr['SMILES'] = aso_l pdr['BaseMol'] = [aso_l[0]] * len(aso_l) pdr['BaseStr'] = [mol] * len(aso_l) return pdr
def gen_rl_2attach( mol, mol_nH, r_l = ['', '(S(O)(=O)=O)'], disp = False, graph = False): """ generate molecules with R group fragment get_r_list becomes get_multi_r_list so as to generate molecules with multiple R-group attached. Reduced (or hydrated) SMILES strings will be generated as well. """ N_group = len( re.findall( '{[0-9]*}', mol)) # find number of R group positions pdr = get_multi_r_list( N_group, r_l, disp = disp, pdForm = True) so_l = pdr['Rgroups'].tolist() aso_l = [] aso_nH_l = [] for so in so_l: aso = mol.format(*so) aso_l.append( aso) aso_nH = mol_nH.format(*so) aso_nH_l.append( aso_nH) if disp: print(so, aso, aso_nH) if graph: print("Oxidated molecule:") jchem.show_mol( aso) print("Hydrated molecule:") jchem.show_mol( aso_nH) # Storing canonical smiles strings pdr['SMILES'] = jchem.csmiles_l( aso_l) pdr['R-SMILES'] = jchem.csmiles_l( aso_nH_l) pdr['BaseMol'] = [aso_l[0]] * len( aso_l) pdr['BaseStr'] = [mol] * len( aso_l) return pdr
def get_sa( self, s): m = Chem.MolFromSmiles( s) sa = sascorer.calculateScore( m) if self.graph: jchem.show_mol( s) if self.disp: print('NP Score is', sa) return sa
def get_np( self, s): fscore = self.fscore m = Chem.MolFromSmiles( s) np = npscorer.scoreMol(m,fscore) if self.graph: jchem.show_mol( s) if self.disp: print('NP Score is', np) return np
def show_Alloxazine(): print('Original Alloxazine') jchem.show_mol( 'O=C1NC(=O)C2=NC3=CC=CC=C3N=C2N1') jchem.show_mol( 'O=C1N{0}C(=O)C2=NC3=C{1}C{2}=C{3}C{4}=C3N=C2N1{5}'.format( '(O)','(O)','(O)','(O)','(O)','(O)')) print('Hydro Alloxazine') jchem.show_mol( 'O=C1NC2NC3=CC=CC=C3NC2C(=O)N1') jchem.show_mol( 'O=C1N{5}C2NC3=C{4}C{3}=C{2}C{1}=C3NC2C(=O)N1{0}'.format( '(O)','(O)','(O)','(O)','(O)','(O)'))
def _gen_27aqds_with_oh_r1( self, Frag6_D, show = True): """ 2,7-AQDS with OH attachment are performed using smiles interpolation """ mol_smiles_list = [] for ix, mol_symb in enumerate(Frag6_D): mol = bq14_oh2 = Chem.MolFromSmiles( 'C1(O)c2c{B3}c{B4}c(S(=O)(=O)O)c{B5}c2C(O)c2c{B0}c(S(=O)(=O)O)c{B1}c{B2}c21', replacements=mol_symb) mol_smiles = Chem.MolToSmiles( mol) mol_smiles_list.append( mol_smiles) if show: print(ix+1, mol_smiles) jchem.show_mol( mol_smiles) return mol_smiles_list
def show_Alloxazine(): print('Original Alloxazine') jchem.show_mol('O=C1NC(=O)C2=NC3=CC=CC=C3N=C2N1') jchem.show_mol('O=C1N{0}C(=O)C2=NC3=C{1}C{2}=C{3}C{4}=C3N=C2N1{5}'.format( '(O)', '(O)', '(O)', '(O)', '(O)', '(O)')) print('Hydro Alloxazine') jchem.show_mol('O=C1NC2NC3=CC=CC=C3NC2C(=O)N1') jchem.show_mol('O=C1N{5}C2NC3=C{4}C{3}=C{2}C{1}=C3NC2C(=O)N1{0}'.format( '(O)', '(O)', '(O)', '(O)', '(O)', '(O)'))
def _gen_27aqds_with_oh_r1(self, Frag6_D, show=True): """ 2,7-AQDS with OH attachment are performed using smiles interpolation """ mol_smiles_list = [] for ix, mol_symb in enumerate(Frag6_D): mol = bq14_oh2 = Chem.MolFromSmiles( 'C1(O)c2c{B3}c{B4}c(S(=O)(=O)O)c{B5}c2C(O)c2c{B0}c(S(=O)(=O)O)c{B1}c{B2}c21', replacements=mol_symb) mol_smiles = Chem.MolToSmiles(mol) mol_smiles_list.append(mol_smiles) if show: print(ix + 1, mol_smiles) jchem.show_mol(mol_smiles) return mol_smiles_list
def gen_27aqds_with_R( self, Frag6_D, r_gr, show = True): """ 2,7-AQDS with OH attachment are performed using smiles interpolation """ mol_smiles_list = [] for ix, mol_symb in enumerate(Frag6_D): # r_gr = 'S(=O)(=O)O' #[N+]([O-])=O base_smiles = 'C1(O)c2c{B3}c{B4}c(%s)c{B5}c2C(O)c2c{B0}c(%s)c{B1}c{B2}c21' % (r_gr, r_gr) mol = Chem.MolFromSmiles( base_smiles, replacements=mol_symb) mol_smiles = Chem.MolToSmiles( mol) mol_smiles_list.append( mol_smiles) if show: print(ix+1, mol_smiles) jchem.show_mol( mol_smiles) return mol_smiles_list
def gen_27aqds_with_R(self, Frag6_D, r_gr, show=True): """ 2,7-AQDS with OH attachment are performed using smiles interpolation """ mol_smiles_list = [] for ix, mol_symb in enumerate(Frag6_D): # r_gr = 'S(=O)(=O)O' #[N+]([O-])=O base_smiles = 'C1(O)c2c{B3}c{B4}c(%s)c{B5}c2C(O)c2c{B0}c(%s)c{B1}c{B2}c21' % ( r_gr, r_gr) mol = Chem.MolFromSmiles(base_smiles, replacements=mol_symb) mol_smiles = Chem.MolToSmiles(mol) mol_smiles_list.append(mol_smiles) if show: print(ix + 1, mol_smiles) jchem.show_mol(mol_smiles) return mol_smiles_list
def gen_r_attach_Alloxazine_R123457(so3h='(S(O)(=O)=O)', disp=False, graph=False): """ generate molecules with R group fragment """ # n1{R5}c2nc3c{R1}c{R2}c{R3}c{R4}c3nc2c(=O)n{R7}c1=O # N_group = 6 #R1234 5 7 -> 0123 4 5 pdr = get_r_list(N_group, so3h, disp=disp, pdForm=True) so_l = pdr['Rgroups'].tolist() aso_l = [] mol_l = [] for so in so_l: if so[4] != '' and so[5] != '': aso = 'n1{4}c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)n{5}c1=O'.format(*so) mol_l.append('n1{4}c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)n{5}c1=O') elif so[4] == '' and so[5] == '': aso = '[nH]1c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)[nH]c1=O'.format( *so[:4]) mol_l.append('[nH]1c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)[nH]c1=O') elif so[4] == '': aso = '[nH]1c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)n{4}c1=O'.format( so[0], so[1], so[2], so[3], so[5]) mol_l.append('[nH]1c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)n{4}c1=O') else: #so[5] == '': aso = 'n1{4}c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)[nH]c1=O'.format( *so[:5]) mol_l.append('n1{4}c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)[nH]c1=O') aso_l.append(aso) if disp: print(so, aso) if graph: jchem.show_mol(aso) pdr['SMILES'] = aso_l pdr['BaseMol'] = [aso_l[0]] * len(aso_l) pdr['BaseStr'] = mol_l return pdr
def gen_r_attach( mol = 'Oc1nc(O)c2nc3c{0}c{1}c{2}c{3}c3nc2n1', so3h = '(S(O)(=O)=O)', disp = False, graph = False): """ generate molecules with R group fragment """ N_group = len( re.findall( '{[0-9]*}', mol)) # find number of R group positions pdr = get_r_list( N_group, so3h, disp = disp, pdForm = True) so_l = pdr['Rgroups'].tolist() aso_l = [] for so in so_l: aso = mol.format(*so) aso_l.append( aso) if disp: print(so, aso) if graph: jchem.show_mol( aso) pdr['SMILES'] = aso_l pdr['BaseMol'] = [aso_l[0]] * len( aso_l) pdr['BaseStr'] = [mol] * len( aso_l) return pdr
def bq1x(x='(S(=O)(=O)O)', disp=False): """ It generates new quinone molecules with mono functional group attachment. For anthraquinone(AQ), only two attachment positions are unique and All the other position attachment generate a duplicated result. """ MaxAttach = 1 cs_l = [] en_mol = [''] * MaxAttach for ix in range(MaxAttach): en_mol[ix] = x s = 'C1=CC(=O)C{0}=CC1=O'.format(*en_mol) cs_l.append(jchem.csmiles(s)) en_mol[ix] = '' if disp: print(ix) jchem.show_mol(s) return cs_l
def mol1x( mol = 'C1=CC(=O)C{0}=CC1=O', MaxAttach = 1, x = '(S(=O)(=O)O)', disp = False): """ It generates new quinone molecules with mono functional group attachment. For anthraquinone(AQ), only two attachment positions are unique and All the other position attachment generate a duplicated result. """ #MaxAttach = 1 cs_l = [] en_mol = [''] * MaxAttach for ix in range( MaxAttach): en_mol[ix] = x s = mol.format( *en_mol) cs_l.append( jchem.csmiles( s)) en_mol[ix] = '' if disp: print(ix) jchem.show_mol( s) return cs_l
def gen_rl_2attach(mol, mol_nH, r_l=['', '(S(O)(=O)=O)'], disp=False, graph=False): """ generate molecules with R group fragment get_r_list becomes get_multi_r_list so as to generate molecules with multiple R-group attached. Reduced (or hydrated) SMILES strings will be generated as well. """ N_group = len(re.findall('{[0-9]*}', mol)) # find number of R group positions pdr = get_multi_r_list(N_group, r_l, disp=disp, pdForm=True) so_l = pdr['Rgroups'].tolist() aso_l = [] aso_nH_l = [] for so in so_l: aso = mol.format(*so) aso_l.append(aso) aso_nH = mol_nH.format(*so) aso_nH_l.append(aso_nH) if disp: print(so, aso, aso_nH) if graph: print("Oxidated molecule:") jchem.show_mol(aso) print("Hydrated molecule:") jchem.show_mol(aso_nH) # Storing canonical smiles strings pdr['SMILES'] = jchem.csmiles_l(aso_l) pdr['R-SMILES'] = jchem.csmiles_l(aso_nH_l) pdr['BaseMol'] = [aso_l[0]] * len(aso_l) pdr['BaseStr'] = [mol] * len(aso_l) return pdr
def aq1x( x = '(S(=O)(=O)O)', disp = False): """ It generates new quinone molecules with mono functional group attachment. For anthraquinone(AQ), only two attachment positions are unique and All the other position attachment generate a duplicated result. """ MaxAttach = 2 cs_l = [] en_mol = [''] * MaxAttach for ix in range( MaxAttach): en_mol[ix] = x s = 'O=C1c2c{0}c{1}ccc2C(=O)c2ccccc21'.format( *en_mol) cs_l.append( jchem.csmiles( s)) en_mol[ix] = '' if disp: print(ix) jchem.show_mol( s) return cs_l
def gen_r_attach_Alloxazine_R123457( so3h = '(S(O)(=O)=O)', disp = False, graph = False): """ generate molecules with R group fragment """ # n1{R5}c2nc3c{R1}c{R2}c{R3}c{R4}c3nc2c(=O)n{R7}c1=O # N_group = 6 #R1234 5 7 -> 0123 4 5 pdr = get_r_list( N_group, so3h, disp = disp, pdForm = True) so_l = pdr['Rgroups'].tolist() aso_l = [] mol_l = [] for so in so_l: if so[4] != '' and so[5] != '': aso = 'n1{4}c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)n{5}c1=O'.format(*so) mol_l.append('n1{4}c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)n{5}c1=O') elif so[4] == '' and so[5] == '': aso = '[nH]1c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)[nH]c1=O'.format(*so[:4]) mol_l.append('[nH]1c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)[nH]c1=O') elif so[4] == '': aso = '[nH]1c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)n{4}c1=O'.format(so[0],so[1],so[2],so[3], so[5]) mol_l.append('[nH]1c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)n{4}c1=O') else: #so[5] == '': aso = 'n1{4}c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)[nH]c1=O'.format(*so[:5]) mol_l.append('n1{4}c2nc3c{0}c{1}c{2}c{3}c3nc2c(=O)[nH]c1=O') aso_l.append( aso) if disp: print(so, aso) if graph: jchem.show_mol( aso) pdr['SMILES'] = aso_l pdr['BaseMol'] = [aso_l[0]] * len( aso_l) pdr['BaseStr'] = mol_l return pdr
def gen_r_attach_lowpot_Flavins(disp=False, graph=False): oh = '(O)' h = '' oc = '(OC)' rl = [] rl.append(([h, oh, oh, oh, h, h], -0.47)) rl.append(([oh, oh, h, h, h, h], -0.47)) rl.append(([oh, oh, oh, oh, oh, h], -0.47)) rl.append(([oh, oh, oh, oh, h, h], -0.51)) rl.append(([h, oh, h, oh, h, h], -0.50)) rl.append(([h, oh, h, h, h, h], -0.45)) rl.append(([oh, oh, h, oh, oh, h], -0.50)) rl.append(([h, oh, h, oh, oh, h], -0.46)) rl.append(([oh, oh, h, oh, h, h], -0.53)) rl.append(([h, oc, oc, oc, h, h], -0.48)) rl.append(([oc, oc, oc, oc, h, h], -0.48)) rl.append(([oc, oc, h, oc, h, h], -0.47)) rl.append(([h, oc, h, oc, oc, h], -0.46)) rl.append(([oc, oc, h, oc, oc, h], -0.50)) BaseStr = 'n1c2[nH]{5}c3c{4}c{3}c{2}c{1}c3nc2c(=O)[nH]{0}c1=O' N_group = len(re.findall('{[0-9]*}', BaseStr)) emptyR = [''] * N_group BaseMol = BaseStr.format(*emptyR) smiles_l = [BaseStr.format(*r[0]) for r in rl] pdr = pd.DataFrame() pdr['ID'] = list(range(1, len(smiles_l) + 1)) R_group_l = [] Index_l = [] NoOfR_l = [] for r in rl: # Whether it is oh or oc family is determined r_oh_test = [x == oh for x in r[0]] print(r[0], '-->', r_oh_test, '-->', any(r_oh_test)) if any(r_oh_test): r_type = oh else: r_type = oc R_group_l.append(r_type) r_groups = [0 if x == '' else 1 for x in r[0]] Index_l.append(r_groups) NoOfR_l.append(np.sum(r_groups)) pdr['Rgroup'] = R_group_l # This is newly included. pdr['NoOfR'] = NoOfR_l pdr['Index'] = Index_l pdr['Rgroups'] = [r[0] for r in rl] pdr['SMILES'] = smiles_l pdr['BaseMol'] = [BaseMol] * len(rl) pdr['BaseStr'] = [BaseStr] * len(rl) pdr['RedoxPotential'] = [r[1] for r in rl] for ix, s in enumerate(smiles_l): if disp: print(ix + 1, s) if graph: jchem.show_mol(s) return pdr
def gen_r_attach_lowpot_Flavins( disp = False, graph = False): oh = '(O)' h = '' oc = '(OC)' rl = [] rl.append(([h,oh, oh, oh, h, h], -0.47)) rl.append(([oh, oh, h,h,h,h], -0.47)) rl.append(([oh, oh, oh, oh, oh, h], -0.47)) rl.append(([oh, oh, oh, oh, h, h], -0.51)) rl.append(([h, oh, h, oh, h, h], -0.50)) rl.append(([h, oh, h, h, h, h], -0.45)) rl.append(([oh, oh, h, oh, oh, h], -0.50)) rl.append(([h, oh, h, oh, oh, h], -0.46)) rl.append(([oh, oh, h, oh, h, h], -0.53)) rl.append(([h, oc, oc, oc, h, h], -0.48)) rl.append(([oc, oc, oc, oc, h, h], -0.48)) rl.append(([oc, oc, h, oc, h, h], -0.47)) rl.append(([h, oc, h, oc, oc, h], -0.46)) rl.append(([oc, oc, h, oc, oc, h], -0.50)) BaseStr = 'n1c2[nH]{5}c3c{4}c{3}c{2}c{1}c3nc2c(=O)[nH]{0}c1=O' N_group = len( re.findall( '{[0-9]*}', BaseStr)) emptyR = [''] * N_group BaseMol = BaseStr.format( *emptyR) smiles_l = [ BaseStr.format(*r[0]) for r in rl] pdr = pd.DataFrame() pdr['ID'] = list(range( 1, len( smiles_l) + 1)) R_group_l = [] Index_l = [] NoOfR_l = [] for r in rl: # Whether it is oh or oc family is determined r_oh_test = [ x == oh for x in r[0]] print(r[0], '-->', r_oh_test, '-->', any(r_oh_test)) if any(r_oh_test): r_type = oh else: r_type = oc R_group_l.append( r_type) r_groups = [ 0 if x == '' else 1 for x in r[0]] Index_l.append( r_groups) NoOfR_l.append( np.sum( r_groups)) pdr['Rgroup'] = R_group_l # This is newly included. pdr['NoOfR'] = NoOfR_l pdr['Index'] = Index_l pdr['Rgroups'] = [ r[0] for r in rl] pdr['SMILES'] = smiles_l pdr['BaseMol'] = [BaseMol] * len(rl) pdr['BaseStr'] = [BaseStr] * len(rl) pdr['RedoxPotential'] = [ r[1] for r in rl] for ix, s in enumerate( smiles_l): if disp: print(ix+1, s) if graph: jchem.show_mol( s) return pdr