Beispiel #1
0
def _ETKDG(ifile) -> (bool, str):
    """ Assigns 3D structures to the molecular structures provided as input.
    """

    success_list = [True for i in range(sdfu.count_mols(ifile))]

    LOG.info('Converting to ETKDG 3D structures')
    try:
        suppl = Chem.SDMolSupplier(ifile)
    except Exception as e:
        LOG.critical('Unable to create supplier')
        raise e
        # not true, UNABLE TO CREATE SUPPLIER
        # return False, 'unable to compute 3D structures'

    filename, fileext = os.path.splitext(ifile)
    ofile = filename + '_3d' + fileext
    LOG.debug(f'3D stucture ouput file is: {ofile}')

    with open(ofile, 'w') as fo:

        mcount = 0
        for mol in suppl:
            if mol is None:
                LOG.debug('Supplier failed to read'
                            f' molecule #{mcount+1} in {ifile}')
                continue
            try:
                mol3 = Chem.AddHs(mol)
                AllChem.EmbedMolecule(mol3, AllChem.ETKDG())
            except:
                LOG.error('Failed to generate 3D structures using'
                            f'ETKDG method for molecule #{mcount+1} in {ifile}')
                success_list[mcount]=False
                mcount += 1
                continue

            ## debug for testing error handling. This code simulates a 3D conversion error
            # if mcount == 3 :
            #     print ('@ETKDG debug, skipping mol 3')
            #     LOG.error('Failed to generate 3D structures using'
            #                 f'ETKDG method for molecule #{mcount+1} in {ifile}')
            #     success_list[mcount]=False
            #     mcount += 1
            #     continue

            fo.write(Chem.MolToMolBlock(mol3))
            fo.write('\n$$$$\n')  # end of mol
            mcount += 1

    return success_list, ofile
Beispiel #2
0
    def convert3D(self, ifile, method):
        '''
        Assigns 3D structures to the molecular structures provided as input.
        '''

        success_list = [True for i in range(sdfu.count_mols(ifile))]

        if not method:
            return success_list, ifile

        if 'ETKDG' in method:
            success_list, ofile = convert3D._ETKDG(ifile)

        return success_list, ofile
Beispiel #3
0
    def ionize(self, ifile, method):
        '''
        Adjust the ionization status of the molecular structure,
        using a given pH.
        '''

        success_list = [True for i in range(sdfutils.count_mols(ifile))]

        if not method:
            return success_list, ifile

        else:
            LOG.debug('ionize called, but no method implemented so far')
            # methods here

        return success_list, ifile
Beispiel #4
0
    def workflow_series(self, input_file):
        '''
        Executes in sequence methods required to generate MD,
        starting from a single molecular file

        input : ifile, a molecular file in SDFile format
        output: results contains the following  lists
                results[0] a numpy bidimensional array containing MD
                results[1] a list of strings containing the names of the MD vars
                results[2] a list of booleans indicating for which objects the 
                           MD computations succeeded    

        '''

        mol_index = [True for i in range(sdfu.count_mols(input_file))]

        ###
        # 1. normalize
        ###
        success_list, output_normalize_file = self.normalize(
            input_file, self.parameters['normalize_method'])
        success, mol_index = self.updateMolIndex(mol_index, success_list)

        if not success:
            return False, 'failed to normalize ' + input_file

        ###
        # 2. ionize
        ###
        success_list, output_ionize_file = self.ionize(
            output_normalize_file, self.parameters['ionize_method'])
        success, mol_index = self.updateMolIndex(mol_index, success_list)

        if not success:
            return False, 'failed to ionize ' + input_file

        ###
        # 3. convert3D
        ###
        success_list, output_convert3D_file = self.convert3D(
            output_ionize_file, self.parameters['convert3D_method'])
        success, mol_index = self.updateMolIndex(mol_index, success_list)

        if not success:
            return False, 'failed to convert 3D ' + input_file

        ###
        # 4. compute MD
        ###
        success, results = self.computeMD(output_convert3D_file,
                                          self.parameters['computeMD_method'])

        if not success:
            return False, results

        x = results[0]
        xnames = results[1]
        success_list = results[2]

        success, mol_index = self.updateMolIndex(mol_index, success_list)

        return success, (x, xnames, mol_index)
Beispiel #5
0
    def workflow_objects(self, input_file):
        '''
        Executes in sequence methods required to generate MD,
        starting from a single molecular file.

        input : ifile, a molecular file in SDFile format
        output: results is a numpy bidimensional array containing MD
        '''

        success_list = []
        md_results = []
        va_results = []

        # split in single molecule pieces
        num_mol = sdfu.count_mols(input_file)
        success, results = sdfu.split_SDFile(input_file, num_mol)

        if not success:
            return success, results

        file_list = results[0]
        file_size = results[1]

        # check if any of the molecules is empty
        for fsize in file_size:
            success_list.append(fsize == 1)

        first_mol = True

        for i, ifile in enumerate(file_list):

            if not success_list[i]:  # molecule was empty, do not process
                LOG.error(f'Molecule {i+1} in {ifile} is empty, skiping...')
                continue

            success, results = self.workflow_series(ifile)

            # since the workflow was run for a single molecule, results[2] is ignored, because it must match
            # the value in success
            success_list[i] = success

            if not success:  # failed in the workflow
                LOG.error(f'Workflow failed for molecule #{str(i+1)}'
                          f' in file {input_file}')
                continue

            if first_mol:  # first molecule
                md_results = results[0]
                va_results = results[1]
                num_var = len(md_results)
                first_mol = False
            else:
                if len(results[0]) != num_var:
                    LOG.warning(f'MD length for molecule #{str(i+1)} in file'
                                f' {input_file} does not match the MD length'
                                'of the first molecule')
                    success_list[i] = False
                    continue

                md_results = np.vstack((md_results, results[0]))

        #print (success_list)

        return True, (md_results, va_results, success_list)
Beispiel #6
0
    def normalize(self, ifile, method):
        '''
        Generates a simplified SDFile with MolBlock and an internal ID for
        further processing

        Note that this method is applied to every molecule and that it removes
        mol blocks in the input SDFile not able to generate a valid mol

        Also, when defined in control, applies chemical standardization
        protocols, like the one provided by Francis Atkinson (EBI),
        accessible from:

            https://github.com/flatkinson/standardiser

        Returns a tuple containing the result of the method and (if True)
        the name of the output molecule and an error message otherwyse

        '''

        success_list = [True for i in range(sdfu.count_mols(ifile))]

        if not method:
            method = ''

        LOG.info('Starting normalization...')
        try:
            suppl = Chem.SDMolSupplier(ifile)
            LOG.debug(f'mol supplier created from {ifile}')
        except Exception as e:
            LOG.error('Unable to create mol supplier with the exception: '
                      f'{e}')
            return False, 'Error at processing input file for standardizing structures'

        filename, fileext = os.path.splitext(ifile)
        ofile = filename + '_std' + fileext
        LOG.debug(f'writing standarized molecules to {ofile}')
        with open(ofile, 'w') as fo:
            mcount = 0
            # merror = 0
            for m in suppl:

                # molecule not recognised by RDKit
                if m is None:
                    LOG.error('Unable to process molecule'
                              f' #{mcount+1} in {ifile}')
                    continue

                name = sdfu.getName(m,
                                    count=mcount,
                                    field=self.parameters['SDFile_name'],
                                    suppl=suppl)

                parent = None

                if 'standardize' in method:
                    try:

                        parent = standardise.run(Chem.MolToMolBlock(m))

                    except standardise.StandardiseException as e:

                        if e.name == "no_non_salt":
                            # very commong warning, use parent mol and proceed
                            LOG.debug(
                                f'"No non salt error" found. Skiped standardize for mol'
                                f' #{mcount} {name}')
                            parent = Chem.MolToMolBlock(m)
                        else:
                            # serious issue, no parent was generated, use original mol
                            if (parent is None):
                                LOG.error(
                                    f'Critical standardize exception: {e}'
                                    f' when processing mol #{mcount} {name}. Skipping normalization'
                                )
                                parent = Chem.MolToMolBlock(m)
                            # minor isse, parent was generated, show a warning and proceed
                            else:
                                LOG.info(
                                    f'Standardize exception: {e}'
                                    f' when processing mol #{mcount} {name}. Normalization applied'
                                )
                        #return False, e.name

                    except Exception as e:
                        # this error means an execution error running standardizer
                        # the molecule is discarded and therefore the list of molecules must be updated
                        LOG.error(
                            f'Critical standardize execution exception {e}'
                            f' when processing mol #{mcount} {name}. Discarding molecule'
                        )
                        success_list[mcount] = False
                        continue

                else:
                    LOG.info(f'Skipping normalization.')
                    parent = Chem.MolToMolBlock(m)

                # in any case, write parent plus internal ID (flameID)
                fo.write(parent)

                # *** discarded method to control errors ****
                # flameID = 'fl%0.10d' % mcount
                # fo.write('>  <flameID>\n'+flameID+'\n\n')

                mcount += 1

                # terminator
                fo.write('$$$$\n')

        return success_list, ofile