def _ETKDG(ifile) -> (bool, str): """ Assigns 3D structures to the molecular structures provided as input. """ success_list = [True for i in range(sdfu.count_mols(ifile))] LOG.info('Converting to ETKDG 3D structures') try: suppl = Chem.SDMolSupplier(ifile) except Exception as e: LOG.critical('Unable to create supplier') raise e # not true, UNABLE TO CREATE SUPPLIER # return False, 'unable to compute 3D structures' filename, fileext = os.path.splitext(ifile) ofile = filename + '_3d' + fileext LOG.debug(f'3D stucture ouput file is: {ofile}') with open(ofile, 'w') as fo: mcount = 0 for mol in suppl: if mol is None: LOG.debug('Supplier failed to read' f' molecule #{mcount+1} in {ifile}') continue try: mol3 = Chem.AddHs(mol) AllChem.EmbedMolecule(mol3, AllChem.ETKDG()) except: LOG.error('Failed to generate 3D structures using' f'ETKDG method for molecule #{mcount+1} in {ifile}') success_list[mcount]=False mcount += 1 continue ## debug for testing error handling. This code simulates a 3D conversion error # if mcount == 3 : # print ('@ETKDG debug, skipping mol 3') # LOG.error('Failed to generate 3D structures using' # f'ETKDG method for molecule #{mcount+1} in {ifile}') # success_list[mcount]=False # mcount += 1 # continue fo.write(Chem.MolToMolBlock(mol3)) fo.write('\n$$$$\n') # end of mol mcount += 1 return success_list, ofile
def convert3D(self, ifile, method): ''' Assigns 3D structures to the molecular structures provided as input. ''' success_list = [True for i in range(sdfu.count_mols(ifile))] if not method: return success_list, ifile if 'ETKDG' in method: success_list, ofile = convert3D._ETKDG(ifile) return success_list, ofile
def ionize(self, ifile, method): ''' Adjust the ionization status of the molecular structure, using a given pH. ''' success_list = [True for i in range(sdfutils.count_mols(ifile))] if not method: return success_list, ifile else: LOG.debug('ionize called, but no method implemented so far') # methods here return success_list, ifile
def workflow_series(self, input_file): ''' Executes in sequence methods required to generate MD, starting from a single molecular file input : ifile, a molecular file in SDFile format output: results contains the following lists results[0] a numpy bidimensional array containing MD results[1] a list of strings containing the names of the MD vars results[2] a list of booleans indicating for which objects the MD computations succeeded ''' mol_index = [True for i in range(sdfu.count_mols(input_file))] ### # 1. normalize ### success_list, output_normalize_file = self.normalize( input_file, self.parameters['normalize_method']) success, mol_index = self.updateMolIndex(mol_index, success_list) if not success: return False, 'failed to normalize ' + input_file ### # 2. ionize ### success_list, output_ionize_file = self.ionize( output_normalize_file, self.parameters['ionize_method']) success, mol_index = self.updateMolIndex(mol_index, success_list) if not success: return False, 'failed to ionize ' + input_file ### # 3. convert3D ### success_list, output_convert3D_file = self.convert3D( output_ionize_file, self.parameters['convert3D_method']) success, mol_index = self.updateMolIndex(mol_index, success_list) if not success: return False, 'failed to convert 3D ' + input_file ### # 4. compute MD ### success, results = self.computeMD(output_convert3D_file, self.parameters['computeMD_method']) if not success: return False, results x = results[0] xnames = results[1] success_list = results[2] success, mol_index = self.updateMolIndex(mol_index, success_list) return success, (x, xnames, mol_index)
def workflow_objects(self, input_file): ''' Executes in sequence methods required to generate MD, starting from a single molecular file. input : ifile, a molecular file in SDFile format output: results is a numpy bidimensional array containing MD ''' success_list = [] md_results = [] va_results = [] # split in single molecule pieces num_mol = sdfu.count_mols(input_file) success, results = sdfu.split_SDFile(input_file, num_mol) if not success: return success, results file_list = results[0] file_size = results[1] # check if any of the molecules is empty for fsize in file_size: success_list.append(fsize == 1) first_mol = True for i, ifile in enumerate(file_list): if not success_list[i]: # molecule was empty, do not process LOG.error(f'Molecule {i+1} in {ifile} is empty, skiping...') continue success, results = self.workflow_series(ifile) # since the workflow was run for a single molecule, results[2] is ignored, because it must match # the value in success success_list[i] = success if not success: # failed in the workflow LOG.error(f'Workflow failed for molecule #{str(i+1)}' f' in file {input_file}') continue if first_mol: # first molecule md_results = results[0] va_results = results[1] num_var = len(md_results) first_mol = False else: if len(results[0]) != num_var: LOG.warning(f'MD length for molecule #{str(i+1)} in file' f' {input_file} does not match the MD length' 'of the first molecule') success_list[i] = False continue md_results = np.vstack((md_results, results[0])) #print (success_list) return True, (md_results, va_results, success_list)
def normalize(self, ifile, method): ''' Generates a simplified SDFile with MolBlock and an internal ID for further processing Note that this method is applied to every molecule and that it removes mol blocks in the input SDFile not able to generate a valid mol Also, when defined in control, applies chemical standardization protocols, like the one provided by Francis Atkinson (EBI), accessible from: https://github.com/flatkinson/standardiser Returns a tuple containing the result of the method and (if True) the name of the output molecule and an error message otherwyse ''' success_list = [True for i in range(sdfu.count_mols(ifile))] if not method: method = '' LOG.info('Starting normalization...') try: suppl = Chem.SDMolSupplier(ifile) LOG.debug(f'mol supplier created from {ifile}') except Exception as e: LOG.error('Unable to create mol supplier with the exception: ' f'{e}') return False, 'Error at processing input file for standardizing structures' filename, fileext = os.path.splitext(ifile) ofile = filename + '_std' + fileext LOG.debug(f'writing standarized molecules to {ofile}') with open(ofile, 'w') as fo: mcount = 0 # merror = 0 for m in suppl: # molecule not recognised by RDKit if m is None: LOG.error('Unable to process molecule' f' #{mcount+1} in {ifile}') continue name = sdfu.getName(m, count=mcount, field=self.parameters['SDFile_name'], suppl=suppl) parent = None if 'standardize' in method: try: parent = standardise.run(Chem.MolToMolBlock(m)) except standardise.StandardiseException as e: if e.name == "no_non_salt": # very commong warning, use parent mol and proceed LOG.debug( f'"No non salt error" found. Skiped standardize for mol' f' #{mcount} {name}') parent = Chem.MolToMolBlock(m) else: # serious issue, no parent was generated, use original mol if (parent is None): LOG.error( f'Critical standardize exception: {e}' f' when processing mol #{mcount} {name}. Skipping normalization' ) parent = Chem.MolToMolBlock(m) # minor isse, parent was generated, show a warning and proceed else: LOG.info( f'Standardize exception: {e}' f' when processing mol #{mcount} {name}. Normalization applied' ) #return False, e.name except Exception as e: # this error means an execution error running standardizer # the molecule is discarded and therefore the list of molecules must be updated LOG.error( f'Critical standardize execution exception {e}' f' when processing mol #{mcount} {name}. Discarding molecule' ) success_list[mcount] = False continue else: LOG.info(f'Skipping normalization.') parent = Chem.MolToMolBlock(m) # in any case, write parent plus internal ID (flameID) fo.write(parent) # *** discarded method to control errors **** # flameID = 'fl%0.10d' % mcount # fo.write('> <flameID>\n'+flameID+'\n\n') mcount += 1 # terminator fo.write('$$$$\n') return success_list, ofile