コード例 #1
0
def rdchiralRun(rxn, reactants, keep_isotopes=False, combine_enantiomers=True):
    '''
    rxn = rdchiralReaction (rdkit reaction + auxilliary information)
    reactants = rdchiralReactants (rdkit mol + auxilliary information)

    note: there is a fair amount of initialization (assigning stereochem), most
    importantly assigning isotope numbers to the reactant atoms. It is 
    HIGHLY recommended to use the custom classes for initialization.
    '''

    final_outcomes = set()

    # We need to keep track of what map numbers 
    # (i.e., isotopes) correspond to which atoms
    # note: all reactant atoms must be mapped, so this is safe
    atoms_r = reactants.atoms_r

    # Copy reaction template so we can play around with isotopes
    template_r, template_p = rxn.template_r, rxn.template_p

    # Get molAtomMapNum->atom dictionary for tempalte reactants and products
    atoms_rt_map = rxn.atoms_rt_map
    atoms_pt_map = rxn.atoms_pt_map

    ###############################################################################
    # Run naive RDKit on ACHIRAL version of molecules

    outcomes = rxn.rxn.RunReactants((reactants.reactants_achiral,))
    vprint(2, 'Using naive RunReactants, {} outcomes', len(outcomes))
    if not outcomes:
        return []

    ###############################################################################

    for outcome in outcomes:
        ###############################################################################
        # Look for new atoms in products that were not in 
        # reactants (e.g., LGs for a retro reaction)
        vprint(2, 'Processing {}', str([Chem.MolToSmiles(x, True) for x in outcome]))
        unmapped = 900
        for m in outcome:
            for a in m.GetAtoms():
                # Assign "map" number via isotope
                if not a.GetIsotope():
                    a.SetIsotope(unmapped)
                    unmapped += 1
        vprint(2, 'Added {} map numbers to product', unmapped-900)
        ###############################################################################


        ###############################################################################
        # Check to see if reactants should not have been matched (based on chirality)

        # Define isotope -> reactant template atom map
        atoms_rt =  {a.GetIsotope(): atoms_rt_map[a.GetIntProp('old_mapno')] \
            for m in outcome for a in m.GetAtoms() if a.HasProp('old_mapno')}

        # Set isotopes of reactant template
        # note: this is okay to do within the loop, because ALL atoms must be matched
        # in the templates, so the isotopes will get overwritten every time
        [a.SetIsotope(i) for (i, a) in atoms_rt.items()]

        # Make sure each atom matches
        if not all(atom_chirality_matches(atoms_rt[i], atoms_r[i]) for i in atoms_rt):
            vprint(2, 'Chirality violated! Should not have gotten this match')
            continue
        vprint(2, 'Chirality matches! Just checked with atom_chirality_matches')

        # Check bond chirality
        #TODO: add bond chirality considerations to exclude improper matches

        ###############################################################################



        ###############################################################################
        # Convert product(s) to single product so that all 
        # reactions can be treated as pseudo-intramolecular
        # But! check for ring openings mistakenly split into multiple
        # This can be diagnosed by duplicate map numbers (i.e., SMILES)

        isotopes = [a.GetIsotope() for m in outcome for a in m.GetAtoms() if a.GetIsotope()]
        if len(isotopes) != len(set(isotopes)): # duplicate?
            vprint(1, 'Found duplicate isotopes in product - need to stitch')
            # need to do a fancy merge
            merged_mol = Chem.RWMol(outcome[0])
            merged_iso_to_id = {a.GetIsotope(): a.GetIdx() for a in outcome[0].GetAtoms() if a.GetIsotope()}
            for j in range(1, len(outcome)):
                new_mol = outcome[j]
                for a in new_mol.GetAtoms():
                    if a.GetIsotope() not in merged_iso_to_id:
                        merged_iso_to_id[a.GetIsotope()] = merged_mol.AddAtom(a)
                for b in new_mol.GetBonds():
                    bi = b.GetBeginAtom().GetIsotope()
                    bj = b.GetEndAtom().GetIsotope()
                    vprint(10, 'stitching bond between {} and {} in stich has chirality {}, {}'.format(
                        bi, bj, b.GetStereo(), b.GetBondDir()
                    ))
                    if not merged_mol.GetBondBetweenAtoms(
                            merged_iso_to_id[bi], merged_iso_to_id[bj]):
                        merged_mol.AddBond(merged_iso_to_id[bi],
                            merged_iso_to_id[bj], b.GetBondType())
                        merged_mol.GetBondBetweenAtoms(
                            merged_iso_to_id[bi], merged_iso_to_id[bj]
                        ).SetStereo(b.GetStereo())
                        merged_mol.GetBondBetweenAtoms(
                            merged_iso_to_id[bi], merged_iso_to_id[bj]
                        ).SetBondDir(b.GetBondDir())
            outcome = merged_mol.GetMol()
            vprint(1, 'Merged editable mol, converted back to real mol, {}', Chem.MolToSmiles(outcome, True))
        else:
            new_outcome = outcome[0]
            for j in range(1, len(outcome)):
                new_outcome = AllChem.CombineMols(new_outcome, outcome[j])
            outcome = new_outcome
        vprint(2, 'Converted all outcomes to single molecules')
        ###############################################################################




        ###############################################################################
        # Figure out which atoms were matched in the templates
        # atoms_rt and atoms_p will be outcome-specific.
        atoms_pt = {a.GetIsotope(): atoms_pt_map[a.GetIntProp('old_mapno')] \
            for a in outcome.GetAtoms() if a.HasProp('old_mapno')}
        atoms_p = {a.GetIsotope(): a for a in outcome.GetAtoms() if a.GetIsotope()}

        # Set isotopes of product template
        # note: this is okay to do within the loop, because ALL atoms must be matched
        # in the templates, so the isotopes will get overwritten every time
        # This makes it easier to check parity changes
        #[a.SetIsotope(i) for (i, a) in atoms_pt.iteritems()]
        [a.SetIsotope(i) for (i, a) in atoms_pt.items()]
        ###############################################################################



        ###############################################################################
        # Check for missing bonds. These are bonds that are present in the reactants,
        # not specified in the reactant template, and not in the product. Accidental
        # fragmentation can occur for intramolecular ring openings
        missing_bonds = []
        for (i, j, b) in reactants.bonds_by_isotope:
            if i in atoms_p and j in atoms_p:
                # atoms from reactant bond show up in product
                if not outcome.GetBondBetweenAtoms(atoms_p[i].GetIdx(), atoms_p[j].GetIdx()):
                    #...but there is not a bond in the product between those atoms
                    if i not in atoms_rt or j not in atoms_rt or not template_r.GetBondBetweenAtoms(atoms_rt[i].GetIdx(), atoms_rt[j].GetIdx()):
                        # the reactant template did not specify a bond between those atoms (e.g., intentionally destroy)
                        missing_bonds.append((i, j, b))
        if missing_bonds:
            vprint(1, 'Product is missing non-reacted bonds that were present in reactants!')
            outcome = Chem.RWMol(outcome)
            rwmol_iso_to_id = {a.GetIsotope(): a.GetIdx() for a in outcome.GetAtoms() if a.GetIsotope()}
            for (i, j, b) in missing_bonds:
                outcome.AddBond(rwmol_iso_to_id[i], rwmol_iso_to_id[j])
                new_b = outcome.GetBondBetweenAtoms(rwmol_iso_to_id[i], rwmol_iso_to_id[j])
                new_b.SetBondType(b.GetBondType())
                new_b.SetBondDir(b.GetBondDir())
                new_b.SetIsAromatic(b.GetIsAromatic())
            outcome = outcome.GetMol()
        else:
            vprint(3, 'No missing bonds')
        ###############################################################################


        # Now that we've fixed any bonds, connectivity is set. This is a good time
        # to udpate the property cache, since all that is left is fixing atom/bond
        # stereochemistry.
        try:
            outcome.UpdatePropertyCache()
        except ValueError as e: 
            vprint(1, '{}, {}'.format(Chem.MolToSmiles(outcome, True), e))
            continue


        ###############################################################################
        # Correct tetra chirality in the outcome

        for a in outcome.GetAtoms():
            # Participants in reaction core (from reactants) will have old_mapno
            # Spectators present in reactants will have react_atom_idx
            # ...so new atoms will have neither!
            if not a.HasProp('old_mapno'):
                # Not part of the reactants template
                
                if not a.HasProp('react_atom_idx'):
                    # Atoms only appear in product template - their chirality
                    # should be properly instantiated by RDKit...hopefully...
                    vprint(4, 'Atom {} created by product template, should have right chirality', a.GetIsotope())
                
                else:
                    vprint(4, 'Atom {} outside of template, copy chirality from reactants', a.GetIsotope())
                    copy_chirality(atoms_r[a.GetIsotope()], a)
            else:
                # Part of reactants and reaction core
                
                if template_atom_could_have_been_tetra(atoms_rt[a.GetIsotope()]):
                    vprint(3, 'Atom {} was in rct template (could have been tetra)', a.GetIsotope())
                    
                    if template_atom_could_have_been_tetra(atoms_pt[a.GetIsotope()]):
                        vprint(3, 'Atom {} in product template could have been tetra, too', a.GetIsotope())
                        
                        # Was the product template specified?
                        
                        if atoms_pt[a.GetIsotope()].GetChiralTag() == ChiralType.CHI_UNSPECIFIED:
                            # No, leave unspecified in product
                            vprint(3, '...but it is not specified in product, so destroy chirality')
                            a.SetChiralTag(ChiralType.CHI_UNSPECIFIED)
                        
                        else:
                            # Yes
                            vprint(3, '...and product is specified')
                            
                            # Was the reactant template specified?
                            
                            if atoms_rt[a.GetIsotope()].GetChiralTag() == ChiralType.CHI_UNSPECIFIED:
                                # No, so the reaction introduced chirality
                                vprint(3, '...but reactant template was not, so copy from product template')
                                copy_chirality(atoms_pt[a.GetIsotope()], a)
                            
                            else:
                                # Yes, so we need to check if chirality should be preserved or inverted
                                vprint(3, '...and reactant template was, too! copy from reactants')
                                copy_chirality(atoms_r[a.GetIsotope()], a)
                                if not atom_chirality_matches(atoms_pt[a.GetIsotope()], atoms_rt[a.GetIsotope()]):
                                    vprint(3, 'but! reactant template and product template have opposite stereochem, so invert')
                                    a.InvertChirality()
                    
                    else:
                        # Reactant template chiral, product template not - the
                        # reaction is supposed to destroy chirality, so leave
                        # unspecified
                        vprint(3, 'If reactant template could have been ' +
                            'chiral, but the product template could not, then we dont need ' +
                            'to worry about specifying product atom chirality')

                else:
                    vprint(3, 'Atom {} could not have been chiral in reactant template', a.GetIsotope())
                    
                    if not template_atom_could_have_been_tetra(atoms_pt[a.GetIsotope()]):
                        vprint(3, 'Atom {} also could not have been chiral in product template', a.GetIsotope())
                        vprint(3, '...so, copy chirality from reactant instead')
                        copy_chirality(atoms_r[a.GetIsotope()], a)
                    
                    else:
                        vprint(3, 'Atom could/does have product template chirality!', a.GetIsotope())
                        vprint(3, '...so, copy chirality from product template')
                        copy_chirality(atoms_pt[a.GetIsotope()], a)
                    
            vprint(3, 'New chiral tag {}', a.GetChiralTag())
        vprint(2, 'After attempting to re-introduce chirality, outcome = {}',
            Chem.MolToSmiles(outcome, True))
        ###############################################################################


        ###############################################################################
        # Correct bond directionality in the outcome
        # TODO


        # Clear isotope
        if not keep_isotopes:
            [a.SetIsotope(0) for a in outcome.GetAtoms()]

        # Canonicalize
        smiles = canonicalize_outcome_smiles(outcome)
        if smiles is not None:
            final_outcomes.add(smiles)

    ###############################################################################
    # One last fix for consolidating multiple stereospecified products...
    if combine_enantiomers:
        final_outcomes = combine_enantiomers_into_racemic(final_outcomes)
    ###############################################################################

    return list(final_outcomes)
コード例 #2
0
def rdchiralRun(rxn,
                reactants,
                keep_mapnums=False,
                combine_enantiomers=True,
                return_mapped=False):
    '''
    rxn = rdchiralReaction (rdkit reaction + auxilliary information)
    reactants = rdchiralReactants (rdkit mol + auxilliary information)

    note: there is a fair amount of initialization (assigning stereochem), most
    importantly assigning atom map numbers to the reactant atoms. It is 
    HIGHLY recommended to use the custom classes for initialization.
    '''

    # New: reset atom map numbers for templates in case they have been overwritten
    # by previous uses of this template!
    rxn.reset()

    ###############################################################################
    # Run naive RDKit on ACHIRAL version of molecules
    outcomes = rxn.rxn.RunReactants((reactants.reactants_achiral, ))
    if PLEVEL >= (1):
        print('Using naive RunReactants, {} outcomes'.format(len(outcomes)))
    if not outcomes:
        return []
    ###############################################################################

    ###############################################################################
    # Initialize, now that there is at least one outcome

    final_outcomes = set()
    mapped_outcomes = {}
    # We need to keep track of what map numbers correspond to which atoms
    # note: all reactant atoms must be mapped, so this is safe
    atoms_r = reactants.atoms_r

    # Copy reaction template so we can play around with map numbers
    template_r, template_p = rxn.template_r, rxn.template_p

    # Get molAtomMapNum->atom dictionary for tempalte reactants and products
    atoms_rt_map = rxn.atoms_rt_map
    # TODO: cannot change atom map numbers in atoms_rt permanently?
    atoms_pt_map = rxn.atoms_pt_map
    ###############################################################################

    for outcome in outcomes:

        ###############################################################################
        # Look for new atoms in products that were not in
        # reactants (e.g., LGs for a retro reaction)
        if PLEVEL >= (2):
            print('Processing {}'.format(
                str([Chem.MolToSmiles(x, True) for x in outcome])))
        unmapped = 900
        for m in outcome:
            for a in m.GetAtoms():
                # Assign map number to outcome based on react_atom_idx
                if a.HasProp('react_atom_idx'):
                    a.SetAtomMapNum(
                        reactants.idx_to_mapnum(
                            int(a.GetProp('react_atom_idx'))))
                if not a.GetAtomMapNum():
                    a.SetAtomMapNum(unmapped)
                    unmapped += 1
        if PLEVEL >= 2:
            print('Added {} map numbers to product'.format(unmapped - 900))
        ###############################################################################

        ###############################################################################
        # Check to see if reactants should not have been matched (based on chirality)

        # Define map num -> reactant template atom map
        atoms_rt =  {a.GetAtomMapNum(): atoms_rt_map[a.GetIntProp('old_mapno')] \
            for m in outcome for a in m.GetAtoms() if a.HasProp('old_mapno')}

        # Set map numbers of reactant template to be consistent with reactant/product molecules
        # note: this is okay to do within the loop, because ALL atoms must be matched
        # in the templates, so the atommapnum will get overwritten every time
        [a.SetAtomMapNum(i) for (i, a) in atoms_rt.items()]

        # Make sure each atom matches
        # note: this is a little weird because atom_chirality_matches takes three values,
        #       -1 (both tetra but opposite), 0 (not a match), and +1 (both tetra and match)
        #       and we only want to continue if they all equal -1 or all equal +1
        prev = None
        skip_outcome = False
        for match in (atom_chirality_matches(atoms_rt[i], atoms_r[i])
                      for i in atoms_rt):
            if match == 0:
                if PLEVEL >= 2:
                    print(
                        'Chirality violated! Should not have gotten this match'
                    )
                skip_outcome = True
                break
            elif match == 2:  # ambiguous case
                continue
            elif prev is None:
                prev = match
            elif match != prev:
                if PLEVEL >= 2:
                    print(
                        'Part of the template matched reactant chirality, part is inverted! Should not match'
                    )
                skip_outcome = True
                break
        if skip_outcome:
            continue
        if PLEVEL >= 2:
            print(
                'Chirality matches! Just checked with atom_chirality_matches')

        # Check bond chirality - iterate through reactant double bonds where
        # chirality is specified (or not). atoms defined by map number
        skip_outcome = False
        for atoms, dirs, is_implicit in reactants.atoms_across_double_bonds:
            if all(i in atoms_rt for i in atoms):
                # All atoms definining chirality were matched to the reactant template
                # So, check if it is consistent with how the template is defined
                #...but /=/ should match \=\ since they are both trans...
                matched_atom_map_nums = tuple(atoms_rt[i].GetAtomMapNum()
                                              for i in atoms)

                # Convert atoms_rt to original template's atom map numbers:
                matched_atom_map_nums = tuple(
                    rxn.atoms_rt_idx_to_map[atoms_rt[i].GetIdx()]
                    for i in atoms)

                if matched_atom_map_nums not in rxn.required_rt_bond_defs:
                    continue  # this can happen in ring openings, for example
                dirs_template = rxn.required_rt_bond_defs[
                    matched_atom_map_nums]
                if dirs != dirs_template and \
                        (BondDirOpposite[dirs[0]], BondDirOpposite[dirs[1]]) != dirs_template and \
                        not (dirs_template == (BondDir.NONE, BondDir.NONE) and is_implicit):
                    if PLEVEL >= 5:
                        print(
                            'Reactant bond chirality does not match template!')
                    if PLEVEL >= 5: print('Based on map numbers...')
                    if PLEVEL >= 5:
                        print('  rct: {} -> {}'.format(matched_atom_map_nums,
                                                       dirs))
                    if PLEVEL >= 5:
                        print('  tmp: {} -> {}'.format(matched_atom_map_nums,
                                                       dirs_template))
                    if PLEVEL >= 5:
                        print(
                            'skipping this outcome, should not have matched...'
                        )
                    skip_outcome = True
                    break
        if skip_outcome:
            continue

        ###############################################################################

        ###############################################################################
        # Convert product(s) to single product so that all
        # reactions can be treated as pseudo-intramolecular
        # But! check for ring openings mistakenly split into multiple
        # This can be diagnosed by duplicate map numbers (i.e., SMILES)

        mapnums = [
            a.GetAtomMapNum() for m in outcome for a in m.GetAtoms()
            if a.GetAtomMapNum()
        ]
        if len(mapnums) != len(set(mapnums)):  # duplicate?
            if PLEVEL >= 1:
                print('Found duplicate mapnums in product - need to stitch')
            # need to do a fancy merge
            merged_mol = Chem.RWMol(outcome[0])
            merged_map_to_id = {
                a.GetAtomMapNum(): a.GetIdx()
                for a in outcome[0].GetAtoms() if a.GetAtomMapNum()
            }
            for j in range(1, len(outcome)):
                new_mol = outcome[j]
                for a in new_mol.GetAtoms():
                    if a.GetAtomMapNum() not in merged_map_to_id:
                        merged_map_to_id[
                            a.GetAtomMapNum()] = merged_mol.AddAtom(a)
                for b in new_mol.GetBonds():
                    bi = b.GetBeginAtom().GetAtomMapNum()
                    bj = b.GetEndAtom().GetAtomMapNum()
                    if PLEVEL >= 10:
                        print(
                            'stitching bond between {} and {} in stich has chirality {}, {}'
                            .format(bi, bj, b.GetStereo(), b.GetBondDir()))
                    if not merged_mol.GetBondBetweenAtoms(
                            merged_map_to_id[bi], merged_map_to_id[bj]):
                        merged_mol.AddBond(merged_map_to_id[bi],
                                           merged_map_to_id[bj],
                                           b.GetBondType())
                        merged_mol.GetBondBetweenAtoms(
                            merged_map_to_id[bi],
                            merged_map_to_id[bj]).SetStereo(b.GetStereo())
                        merged_mol.GetBondBetweenAtoms(
                            merged_map_to_id[bi],
                            merged_map_to_id[bj]).SetBondDir(b.GetBondDir())
            outcome = merged_mol.GetMol()
            if PLEVEL >= 1:
                print('Merged editable mol, converted back to real mol, {}'.
                      format(Chem.MolToSmiles(outcome, True)))
        else:
            new_outcome = outcome[0]
            for j in range(1, len(outcome)):
                new_outcome = AllChem.CombineMols(new_outcome, outcome[j])
            outcome = new_outcome
        if PLEVEL >= 2: print('Converted all outcomes to single molecules')
        ###############################################################################

        ###############################################################################
        # Figure out which atoms were matched in the templates
        # atoms_rt and atoms_p will be outcome-specific.
        atoms_pt = {a.GetAtomMapNum(): atoms_pt_map[a.GetIntProp('old_mapno')] \
            for a in outcome.GetAtoms() if a.HasProp('old_mapno')}
        atoms_p = {
            a.GetAtomMapNum(): a
            for a in outcome.GetAtoms() if a.GetAtomMapNum()
        }

        # Set map numbers of product template
        # note: this is okay to do within the loop, because ALL atoms must be matched
        # in the templates, so the map numbers will get overwritten every time
        # This makes it easier to check parity changes
        [a.SetAtomMapNum(i) for (i, a) in atoms_pt.items()]
        ###############################################################################

        ###############################################################################
        # Check for missing bonds. These are bonds that are present in the reactants,
        # not specified in the reactant template, and not in the product. Accidental
        # fragmentation can occur for intramolecular ring openings
        missing_bonds = []
        for (i, j, b) in reactants.bonds_by_mapnum:
            if i in atoms_p and j in atoms_p:
                # atoms from reactant bond show up in product
                if not outcome.GetBondBetweenAtoms(atoms_p[i].GetIdx(),
                                                   atoms_p[j].GetIdx()):
                    #...but there is not a bond in the product between those atoms
                    if i not in atoms_rt or j not in atoms_rt or not template_r.GetBondBetweenAtoms(
                            atoms_rt[i].GetIdx(), atoms_rt[j].GetIdx()):
                        # the reactant template did not specify a bond between those atoms (e.g., intentionally destroy)
                        missing_bonds.append((i, j, b))
        if missing_bonds:
            if PLEVEL >= 1:
                print(
                    'Product is missing non-reacted bonds that were present in reactants!'
                )
            outcome = Chem.RWMol(outcome)
            rwmol_map_to_id = {
                a.GetAtomMapNum(): a.GetIdx()
                for a in outcome.GetAtoms() if a.GetAtomMapNum()
            }
            for (i, j, b) in missing_bonds:
                outcome.AddBond(rwmol_map_to_id[i], rwmol_map_to_id[j])
                new_b = outcome.GetBondBetweenAtoms(rwmol_map_to_id[i],
                                                    rwmol_map_to_id[j])
                new_b.SetBondType(b.GetBondType())
                new_b.SetBondDir(b.GetBondDir())
                new_b.SetIsAromatic(b.GetIsAromatic())
            outcome = outcome.GetMol()
            atoms_p = {
                a.GetAtomMapNum(): a
                for a in outcome.GetAtoms() if a.GetAtomMapNum()
            }
        else:
            if PLEVEL >= 3: print('No missing bonds')
        ###############################################################################

        # Now that we've fixed any bonds, connectivity is set. This is a good time
        # to udpate the property cache, since all that is left is fixing atom/bond
        # stereochemistry.
        try:
            Chem.SanitizeMol(outcome)
            outcome.UpdatePropertyCache()
        except ValueError as e:
            if PLEVEL >= 1:
                print('{}, {}'.format(Chem.MolToSmiles(outcome, True), e))
            continue

        ###############################################################################
        # Correct tetra chirality in the outcome
        tetra_copied_from_reactants = []
        for a in outcome.GetAtoms():
            # Participants in reaction core (from reactants) will have old_mapno
            # Spectators present in reactants will have react_atom_idx
            # ...so new atoms will have neither!
            if not a.HasProp('old_mapno'):
                # Not part of the reactants template

                if not a.HasProp('react_atom_idx'):
                    # Atoms only appear in product template - their chirality
                    # should be properly instantiated by RDKit...hopefully...
                    if PLEVEL >= 4:
                        print(
                            'Atom {} created by product template, should have right chirality'
                            .format(a.GetAtomMapNum()))

                else:
                    if PLEVEL >= 4:
                        print(
                            'Atom {} outside of template, copy chirality from reactants'
                            .format(a.GetAtomMapNum()))
                    copy_chirality(atoms_r[a.GetAtomMapNum()], a)
                    if a.GetChiralTag() != ChiralType.CHI_UNSPECIFIED:
                        tetra_copied_from_reactants.append(a)

            else:
                # Part of reactants and reaction core

                if template_atom_could_have_been_tetra(
                        atoms_rt[a.GetAtomMapNum()]):
                    if PLEVEL >= 3:
                        print(
                            'Atom {} was in rct template (could have been tetra)'
                            .format(a.GetAtomMapNum()))

                    if template_atom_could_have_been_tetra(
                            atoms_pt[a.GetAtomMapNum()]):
                        if PLEVEL >= 3:
                            print(
                                'Atom {} in product template could have been tetra, too'
                                .format(a.GetAtomMapNum()))

                        # Was the product template specified?

                        if atoms_pt[a.GetAtomMapNum()].GetChiralTag(
                        ) == ChiralType.CHI_UNSPECIFIED:
                            # No, leave unspecified in product
                            if PLEVEL >= 3:
                                print(
                                    '...but it is not specified in product, so destroy chirality'
                                )
                            a.SetChiralTag(ChiralType.CHI_UNSPECIFIED)

                        else:
                            # Yes
                            if PLEVEL >= 3:
                                print('...and product is specified')

                            # Was the reactant template specified?

                            if atoms_rt[a.GetAtomMapNum()].GetChiralTag(
                            ) == ChiralType.CHI_UNSPECIFIED:
                                # No, so the reaction introduced chirality
                                if PLEVEL >= 3:
                                    print(
                                        '...but reactant template was not, so copy from product template'
                                    )
                                copy_chirality(atoms_pt[a.GetAtomMapNum()], a)

                            else:
                                # Yes, so we need to check if chirality should be preserved or inverted
                                if PLEVEL >= 3:
                                    print(
                                        '...and reactant template was, too! copy from reactants'
                                    )
                                copy_chirality(atoms_r[a.GetAtomMapNum()], a)
                                if atom_chirality_matches(
                                        atoms_pt[a.GetAtomMapNum()],
                                        atoms_rt[a.GetAtomMapNum()]) == -1:
                                    if PLEVEL >= 3:
                                        print(
                                            'but! reactant template and product template have opposite stereochem, so invert'
                                        )
                                    a.InvertChirality()

                    else:
                        # Reactant template chiral, product template not - the
                        # reaction is supposed to destroy chirality, so leave
                        # unspecified
                        if PLEVEL >= 3:
                            print(
                                'If reactant template could have been ' +
                                'chiral, but the product template could not, then we dont need '
                                +
                                'to worry about specifying product atom chirality'
                            )

                else:
                    if PLEVEL >= 3:
                        print(
                            'Atom {} could not have been chiral in reactant template'
                            .format(a.GetAtomMapNum()))

                    if not template_atom_could_have_been_tetra(
                            atoms_pt[a.GetAtomMapNum()]):
                        if PLEVEL >= 3:
                            print(
                                'Atom {} also could not have been chiral in product template',
                                a.GetAtomMapNum())
                        if PLEVEL >= 3:
                            print(
                                '...so, copy chirality from reactant instead')
                        copy_chirality(atoms_r[a.GetAtomMapNum()], a)
                        if a.GetChiralTag() != ChiralType.CHI_UNSPECIFIED:
                            tetra_copied_from_reactants.append(a)

                    else:
                        if PLEVEL >= 3:
                            print(
                                'Atom could/does have product template chirality!'
                                .format(a.GetAtomMapNum()))
                        if PLEVEL >= 3:
                            print(
                                '...so, copy chirality from product template')
                        copy_chirality(atoms_pt[a.GetAtomMapNum()], a)

            if PLEVEL >= 3: print('New chiral tag {}'.format(a.GetChiralTag()))
        if skip_outcome:
            if PLEVEL >= 2: print('Skipping this outcome - chirality broken?')
            continue
        if PLEVEL >= 2:
            print('After attempting to re-introduce chirality, outcome = {}'.
                  format(Chem.MolToSmiles(outcome, True)))
        ###############################################################################

        ###############################################################################
        # Correct bond directionality in the outcome
        for b in outcome.GetBonds():
            if b.GetBondType() != BondType.DOUBLE:
                continue

            # Ring double bonds do not need to be touched(?)
            if b.IsInRing():
                continue

            ba = b.GetBeginAtom()
            bb = b.GetEndAtom()

            # Is it possible at all to specify this bond?
            if ba.GetDegree() == 1 or bb.GetDegree() == 1:
                continue

            if PLEVEL >= 5:
                print('Looking at outcome bond {}={}'.format(
                    ba.GetAtomMapNum(), bb.GetAtomMapNum()))

            if ba.HasProp('old_mapno') and bb.HasProp('old_mapno'):
                # Need to rely on templates for bond chirality, both atoms were
                # in the reactant template
                if PLEVEL >= 5:
                    print(
                        'Both atoms in this double bond were in the reactant template'
                    )
                if (ba.GetIntProp('old_mapno'), bb.GetIntProp('old_mapno')) in \
                        rxn.required_bond_defs_coreatoms:
                    if PLEVEL >= 5:
                        print(
                            'and reactant template *could* have specified the chirality!'
                        )
                    if PLEVEL >= 5:
                        print('..product should be property instantiated')
                    continue
                if PLEVEL >= 5:
                    print(
                        'But it was impossible to have specified chirality (e.g., aux C=C for context)'
                    )

            elif not ba.HasProp('react_atom_idx') and not bb.HasProp(
                    'react_atom_idx'):
                # The atoms were both created by the product template, so any bond
                # stereochemistry should have been instantiated by the product template
                # already...hopefully...otherwise it isn't specific enough?
                continue

            # Need to copy from reactants, this double bond was simply carried over,
            # *although* one of the atoms could have reacted and been an auxilliary
            # atom in the reaction, e.g., C/C=C(/CO)>>C/C=C(/C[Br])
            if PLEVEL >= 5:
                print(
                    'Restoring cis/trans character of bond {}={} from reactants'
                    .format(ba.GetAtomMapNum(), bb.GetAtomMapNum()))

            # Start with setting the BeginAtom
            begin_atom_specified = restore_bond_stereo_to_sp2_atom(
                ba, reactants.bond_dirs_by_mapnum)

            if not begin_atom_specified:
                # don't bother setting other side of bond, since we won't be able to
                # fully specify this bond as cis/trans
                continue

            # Look at other side of the bond now, the EndAtom
            end_atom_specified = restore_bond_stereo_to_sp2_atom(
                bb, reactants.bond_dirs_by_mapnum)
            if not end_atom_specified:
                # note: this can happen if C=C/C-N turns into C=C/C=N
                if PLEVEL >= 1:
                    print(reactants.bond_dirs_by_mapnum)
                    print(ba.GetAtomMapNum())
                    print(bb.GetAtomMapNum())
                    print(Chem.MolToSmiles(reactants.reactants, True))
                    print(Chem.MolToSmiles(outcome, True))
                    print(
                        'Uh oh, looks like bond direction is only specified for half of this bond?'
                    )

        ###############################################################################

        #Keep track of the reacting atoms for later use in grouping
        atoms_diff = {
            x: atoms_are_different(atoms_r[x], atoms_p[x])
            for x in atoms_rt
        }
        #make tuple of changed atoms
        atoms_changed = tuple(
            [x for x in atoms_diff.keys() if atoms_diff[x] == True])
        mapped_outcome = Chem.MolToSmiles(outcome, True)

        if not keep_mapnums:
            for a in outcome.GetAtoms():
                a.SetAtomMapNum(0)

        # Now, check to see if we have destroyed chirality
        # this occurs when chirality was not actually possible (e.g., due to
        # symmetry) but we had assigned a tetrahedral center originating
        # from the reactants.
        #    ex: SMILES C(=O)1C[C@H](Cl)CCC1
        #        SMARTS [C:1]-[C;H0;D3;+0:2](-[C:3])=[O;H0;D1;+0]>>[C:1]-[CH2;D2;+0:2]-[C:3]
        skip_outcome = False
        if len(tetra_copied_from_reactants) > 0:
            Chem.AssignStereochemistry(outcome, cleanIt=True, force=True)
            for a in tetra_copied_from_reactants:
                if a.GetChiralTag() == ChiralType.CHI_UNSPECIFIED:
                    if PLEVEL >= 2:
                        print(
                            'Auxiliary reactant atom was chiral, now is broken -> skip outcome'
                        )
                    skip_outcome = True
                    break
        if skip_outcome:
            continue

        smiles = Chem.MolToSmiles(outcome, True)
        smiles_new = canonicalize_outcome_smiles(smiles)
        if smiles_new is None:
            continue

        final_outcomes.add(smiles_new)
        mapped_outcomes[smiles_new] = (mapped_outcome, atoms_changed)
    ###############################################################################
    # One last fix for consolidating multiple stereospecified products...
    if combine_enantiomers:
        final_outcomes = combine_enantiomers_into_racemic(final_outcomes)
    ###############################################################################
    if return_mapped:
        return list(final_outcomes), mapped_outcomes
    else:
        return list(final_outcomes)
コード例 #3
0
ファイル: main.py プロジェクト: Furuidemu/retrosim
def rdchiralRun(rxn, reactants, keep_isotopes=False, combine_enantiomers=True):
    '''
    rxn = rdchiralReaction (rdkit reaction + auxilliary information)
    reactants = rdchiralReactants (rdkit mol + auxilliary information)

    note: there is a fair amount of initialization (assigning stereochem), most
    importantly assigning isotope numbers to the reactant atoms. It is 
    HIGHLY recommended to use the custom classes for initialization.
    '''

    final_outcomes = set()

    # We need to keep track of what map numbers
    # (i.e., isotopes) correspond to which atoms
    # note: all reactant atoms must be mapped, so this is safe
    atoms_r = reactants.atoms_r

    # Copy reaction template so we can play around with isotopes
    template_r, template_p = rxn.template_r, rxn.template_p

    # Get molAtomMapNum->atom dictionary for tempalte reactants and products
    atoms_rt_map = rxn.atoms_rt_map
    atoms_pt_map = rxn.atoms_pt_map

    ###############################################################################
    # Run naive RDKit on ACHIRAL version of molecules

    outcomes = rxn.rxn.RunReactants((reactants.reactants_achiral, ))
    vprint(2, 'Using naive RunReactants, {} outcomes', len(outcomes))
    if not outcomes:
        return []

    ###############################################################################

    for outcome in outcomes:
        ###############################################################################
        # Look for new atoms in products that were not in
        # reactants (e.g., LGs for a retro reaction)
        vprint(2, 'Processing {}',
               str([Chem.MolToSmiles(x, True) for x in outcome]))
        unmapped = 900
        for m in outcome:
            for a in m.GetAtoms():
                # Assign "map" number via isotope
                if not a.GetIsotope():
                    a.SetIsotope(unmapped)
                    unmapped += 1
        vprint(2, 'Added {} map numbers to product', unmapped - 900)
        ###############################################################################

        ###############################################################################
        # Check to see if reactants should not have been matched (based on chirality)

        # Define isotope -> reactant template atom map
        atoms_rt =  {a.GetIsotope(): atoms_rt_map[a.GetIntProp('old_mapno')] \
            for m in outcome for a in m.GetAtoms() if a.HasProp('old_mapno')}

        # Set isotopes of reactant template
        # note: this is okay to do within the loop, because ALL atoms must be matched
        # in the templates, so the isotopes will get overwritten every time
        [a.SetIsotope(i) for (i, a) in atoms_rt.items()]

        # Make sure each atom matches
        if not all(
                atom_chirality_matches(atoms_rt[i], atoms_r[i])
                for i in atoms_rt):
            vprint(2, 'Chirality violated! Should not have gotten this match')
            continue
        vprint(2,
               'Chirality matches! Just checked with atom_chirality_matches')

        # Check bond chirality
        #TODO: add bond chirality considerations to exclude improper matches

        ###############################################################################

        ###############################################################################
        # Convert product(s) to single product so that all
        # reactions can be treated as pseudo-intramolecular
        # But! check for ring openings mistakenly split into multiple
        # This can be diagnosed by duplicate map numbers (i.e., SMILES)

        isotopes = [
            a.GetIsotope() for m in outcome for a in m.GetAtoms()
            if a.GetIsotope()
        ]
        if len(isotopes) != len(set(isotopes)):  # duplicate?
            vprint(1, 'Found duplicate isotopes in product - need to stitch')
            # need to do a fancy merge
            merged_mol = Chem.RWMol(outcome[0])
            merged_iso_to_id = {
                a.GetIsotope(): a.GetIdx()
                for a in outcome[0].GetAtoms() if a.GetIsotope()
            }
            for j in range(1, len(outcome)):
                new_mol = outcome[j]
                for a in new_mol.GetAtoms():
                    if a.GetIsotope() not in merged_iso_to_id:
                        merged_iso_to_id[a.GetIsotope()] = merged_mol.AddAtom(
                            a)
                for b in new_mol.GetBonds():
                    bi = b.GetBeginAtom().GetIsotope()
                    bj = b.GetEndAtom().GetIsotope()
                    vprint(
                        10,
                        'stitching bond between {} and {} in stich has chirality {}, {}'
                        .format(bi, bj, b.GetStereo(), b.GetBondDir()))
                    if not merged_mol.GetBondBetweenAtoms(
                            merged_iso_to_id[bi], merged_iso_to_id[bj]):
                        merged_mol.AddBond(merged_iso_to_id[bi],
                                           merged_iso_to_id[bj],
                                           b.GetBondType())
                        merged_mol.GetBondBetweenAtoms(
                            merged_iso_to_id[bi],
                            merged_iso_to_id[bj]).SetStereo(b.GetStereo())
                        merged_mol.GetBondBetweenAtoms(
                            merged_iso_to_id[bi],
                            merged_iso_to_id[bj]).SetBondDir(b.GetBondDir())
            outcome = merged_mol.GetMol()
            vprint(1, 'Merged editable mol, converted back to real mol, {}',
                   Chem.MolToSmiles(outcome, True))
        else:
            new_outcome = outcome[0]
            for j in range(1, len(outcome)):
                new_outcome = AllChem.CombineMols(new_outcome, outcome[j])
            outcome = new_outcome
        vprint(2, 'Converted all outcomes to single molecules')
        ###############################################################################

        ###############################################################################
        # Figure out which atoms were matched in the templates
        # atoms_rt and atoms_p will be outcome-specific.
        atoms_pt = {a.GetIsotope(): atoms_pt_map[a.GetIntProp('old_mapno')] \
            for a in outcome.GetAtoms() if a.HasProp('old_mapno')}
        atoms_p = {
            a.GetIsotope(): a
            for a in outcome.GetAtoms() if a.GetIsotope()
        }

        # Set isotopes of product template
        # note: this is okay to do within the loop, because ALL atoms must be matched
        # in the templates, so the isotopes will get overwritten every time
        # This makes it easier to check parity changes
        #[a.SetIsotope(i) for (i, a) in atoms_pt.iteritems()]
        [a.SetIsotope(i) for (i, a) in atoms_pt.items()]
        ###############################################################################

        ###############################################################################
        # Check for missing bonds. These are bonds that are present in the reactants,
        # not specified in the reactant template, and not in the product. Accidental
        # fragmentation can occur for intramolecular ring openings
        missing_bonds = []
        for (i, j, b) in reactants.bonds_by_isotope:
            if i in atoms_p and j in atoms_p:
                # atoms from reactant bond show up in product
                if not outcome.GetBondBetweenAtoms(atoms_p[i].GetIdx(),
                                                   atoms_p[j].GetIdx()):
                    #...but there is not a bond in the product between those atoms
                    if i not in atoms_rt or j not in atoms_rt or not template_r.GetBondBetweenAtoms(
                            atoms_rt[i].GetIdx(), atoms_rt[j].GetIdx()):
                        # the reactant template did not specify a bond between those atoms (e.g., intentionally destroy)
                        missing_bonds.append((i, j, b))
        if missing_bonds:
            vprint(
                1,
                'Product is missing non-reacted bonds that were present in reactants!'
            )
            outcome = Chem.RWMol(outcome)
            rwmol_iso_to_id = {
                a.GetIsotope(): a.GetIdx()
                for a in outcome.GetAtoms() if a.GetIsotope()
            }
            for (i, j, b) in missing_bonds:
                outcome.AddBond(rwmol_iso_to_id[i], rwmol_iso_to_id[j])
                new_b = outcome.GetBondBetweenAtoms(rwmol_iso_to_id[i],
                                                    rwmol_iso_to_id[j])
                new_b.SetBondType(b.GetBondType())
                new_b.SetBondDir(b.GetBondDir())
                new_b.SetIsAromatic(b.GetIsAromatic())
            outcome = outcome.GetMol()
        else:
            vprint(3, 'No missing bonds')
        ###############################################################################

        # Now that we've fixed any bonds, connectivity is set. This is a good time
        # to udpate the property cache, since all that is left is fixing atom/bond
        # stereochemistry.
        try:
            outcome.UpdatePropertyCache()
        except ValueError as e:
            vprint(1, '{}, {}'.format(Chem.MolToSmiles(outcome, True), e))
            continue

        ###############################################################################
        # Correct tetra chirality in the outcome

        for a in outcome.GetAtoms():
            # Participants in reaction core (from reactants) will have old_mapno
            # Spectators present in reactants will have react_atom_idx
            # ...so new atoms will have neither!
            if not a.HasProp('old_mapno'):
                # Not part of the reactants template

                if not a.HasProp('react_atom_idx'):
                    # Atoms only appear in product template - their chirality
                    # should be properly instantiated by RDKit...hopefully...
                    vprint(
                        4,
                        'Atom {} created by product template, should have right chirality',
                        a.GetIsotope())

                else:
                    vprint(
                        4,
                        'Atom {} outside of template, copy chirality from reactants',
                        a.GetIsotope())
                    copy_chirality(atoms_r[a.GetIsotope()], a)
            else:
                # Part of reactants and reaction core

                if template_atom_could_have_been_tetra(
                        atoms_rt[a.GetIsotope()]):
                    vprint(
                        3,
                        'Atom {} was in rct template (could have been tetra)',
                        a.GetIsotope())

                    if template_atom_could_have_been_tetra(
                            atoms_pt[a.GetIsotope()]):
                        vprint(
                            3,
                            'Atom {} in product template could have been tetra, too',
                            a.GetIsotope())

                        # Was the product template specified?

                        if atoms_pt[a.GetIsotope()].GetChiralTag(
                        ) == ChiralType.CHI_UNSPECIFIED:
                            # No, leave unspecified in product
                            vprint(
                                3,
                                '...but it is not specified in product, so destroy chirality'
                            )
                            a.SetChiralTag(ChiralType.CHI_UNSPECIFIED)

                        else:
                            # Yes
                            vprint(3, '...and product is specified')

                            # Was the reactant template specified?

                            if atoms_rt[a.GetIsotope()].GetChiralTag(
                            ) == ChiralType.CHI_UNSPECIFIED:
                                # No, so the reaction introduced chirality
                                vprint(
                                    3,
                                    '...but reactant template was not, so copy from product template'
                                )
                                copy_chirality(atoms_pt[a.GetIsotope()], a)

                            else:
                                # Yes, so we need to check if chirality should be preserved or inverted
                                vprint(
                                    3,
                                    '...and reactant template was, too! copy from reactants'
                                )
                                copy_chirality(atoms_r[a.GetIsotope()], a)
                                if not atom_chirality_matches(
                                        atoms_pt[a.GetIsotope()],
                                        atoms_rt[a.GetIsotope()]):
                                    vprint(
                                        3,
                                        'but! reactant template and product template have opposite stereochem, so invert'
                                    )
                                    a.InvertChirality()

                    else:
                        # Reactant template chiral, product template not - the
                        # reaction is supposed to destroy chirality, so leave
                        # unspecified
                        vprint(
                            3, 'If reactant template could have been ' +
                            'chiral, but the product template could not, then we dont need '
                            +
                            'to worry about specifying product atom chirality')

                else:
                    vprint(
                        3,
                        'Atom {} could not have been chiral in reactant template',
                        a.GetIsotope())

                    if not template_atom_could_have_been_tetra(
                            atoms_pt[a.GetIsotope()]):
                        vprint(
                            3,
                            'Atom {} also could not have been chiral in product template',
                            a.GetIsotope())
                        vprint(3,
                               '...so, copy chirality from reactant instead')
                        copy_chirality(atoms_r[a.GetIsotope()], a)

                    else:
                        vprint(
                            3,
                            'Atom could/does have product template chirality!',
                            a.GetIsotope())
                        vprint(3,
                               '...so, copy chirality from product template')
                        copy_chirality(atoms_pt[a.GetIsotope()], a)

            vprint(3, 'New chiral tag {}', a.GetChiralTag())
        vprint(2, 'After attempting to re-introduce chirality, outcome = {}',
               Chem.MolToSmiles(outcome, True))
        ###############################################################################

        ###############################################################################
        # Correct bond directionality in the outcome
        # TODO

        # Clear isotope
        if not keep_isotopes:
            [a.SetIsotope(0) for a in outcome.GetAtoms()]

        # Canonicalize
        smiles = canonicalize_outcome_smiles(outcome)
        if smiles is not None:
            final_outcomes.add(smiles)

    ###############################################################################
    # One last fix for consolidating multiple stereospecified products...
    if combine_enantiomers:
        final_outcomes = combine_enantiomers_into_racemic(final_outcomes)
    ###############################################################################

    return list(final_outcomes)