Ejemplo n.º 1
0
def stabilize_charges_by_proximity(mol_list):
    """
    Only keep structures that obey the charge proximity rule.
    Opposite charges will be as close as possible to one another, and vice versa.
    """
    indices_to_pop = []
    charge_distance_list = []  # indices match mol_list
    for i, mol in enumerate(mol_list):
        # Try finding well-defined pairs of formally-charged atoms to apply the proximity principle
        # (opposite charges will be as close as possible to one another, and vice versa)
        cumulative_opposite_charge_distance = cumulative_similar_charge_distance = 0
        for atom1 in mol.vertices:
            if atom1.charge:
                for atom2 in mol.vertices:
                    if atom2.charge and atom2.sortingLabel > atom1.sortingLabel:
                        # found two charged atoms
                        if (atom1.charge > 0) ^ (atom2.charge > 0):  # xor
                            # they have opposing signs when ONLY one is positive
                            cumulative_opposite_charge_distance += len(
                                find_shortest_path(atom1, atom2))
                        else:
                            # they have similar signs
                            cumulative_similar_charge_distance += len(
                                find_shortest_path(atom1, atom2))
        charge_distance_list.append([
            cumulative_opposite_charge_distance,
            cumulative_similar_charge_distance
        ])
    min_cumulative_opposite_charge_distance = min(
        [distances[0] for distances in charge_distance_list]
        or [0])  # in Python 3 use `min(list, default=0)`
    for i, distances in enumerate(charge_distance_list):
        # after generating the charge_distance_list, iterate through it and mark structures to pop
        if distances[0] > min_cumulative_opposite_charge_distance:
            indices_to_pop.append(i)
    max_cumulative_similar_charge_distance = max([
        distances[1] for i, distances in enumerate(charge_distance_list)
        if i not in indices_to_pop
    ] or [0])
    for i, distances in enumerate(charge_distance_list):
        if distances[0] < max_cumulative_similar_charge_distance:
            indices_to_pop.append(i)
    for i in reversed(xrange(len(
            mol_list))):  # pop starting from the end, so indices won't change
        if i in indices_to_pop:
            mol_list.pop(i)
    return mol_list
Ejemplo n.º 2
0
    def test_bicyclo420octane(self):
        smi = 'C12CCC1CCCC2'
        mol = Molecule().from_smiles(smi)
        start = mol.atoms[0]
        end = mol.atoms[4]

        path = find_shortest_path(start, end)
        self.assertEquals(len(path), 3)
Ejemplo n.º 3
0
    def test_cyclohexane(self):
        smi = 'C1CCCCC1'
        mol = Molecule().from_smiles(smi)
        start = mol.atoms[0]
        end = mol.atoms[2]

        path = find_shortest_path(start, end)
        self.assertEquals(len(path), 3)
def stabilize_charges_by_proximity(mol_list):
    """
    Only keep structures that obey the charge proximity rule.
    Opposite charges will be as close as possible to one another, and vice versa.
    """
    indices_to_pop = []
    charge_distance_list = []  # indices match mol_list
    for i, mol in enumerate(mol_list):
        # Try finding well-defined pairs of formally-charged atoms to apply the proximity principle
        # (opposite charges will be as close as possible to one another, and vice versa)
        cumulative_opposite_charge_distance = cumulative_similar_charge_distance = 0
        for atom1 in mol.vertices:
            if atom1.charge:
                for atom2 in mol.vertices:
                    if atom2.charge and atom2.sortingLabel > atom1.sortingLabel:
                        # found two charged atoms
                        if (atom1.charge > 0) ^ (atom2.charge > 0):  # xor
                            # they have opposing signs when ONLY one is positive
                            cumulative_opposite_charge_distance += len(find_shortest_path(atom1, atom2))
                        else:
                            # they have similar signs
                            cumulative_similar_charge_distance += len(find_shortest_path(atom1, atom2))
        charge_distance_list.append([cumulative_opposite_charge_distance,
                                     cumulative_similar_charge_distance])
    min_cumulative_opposite_charge_distance = min([distances[0] for distances in charge_distance_list]
                                                  or [0])  # in Python 3 use `min(list, default=0)`
    for i, distances in enumerate(charge_distance_list):
        # after generating the charge_distance_list, iterate through it and mark structures to pop
        if distances[0] > min_cumulative_opposite_charge_distance:
            indices_to_pop.append(i)
    max_cumulative_similar_charge_distance = max([distances[1] for i, distances in
                                                  enumerate(charge_distance_list) if i not in indices_to_pop] or [0])
    for i, distances in enumerate(charge_distance_list):
        if distances[0] < max_cumulative_similar_charge_distance:
            indices_to_pop.append(i)
    for i in reversed(xrange(len(mol_list))):  # pop starting from the end, so indices won't change
        if i in indices_to_pop:
            mol_list.pop(i)
    return mol_list
Ejemplo n.º 5
0
    def kinetics_checkUnimolecularGroups(self,family_name):
        """
        This test goes through all unimolecular groups that have more than one top level, top level groups
        that overlap with family.reactant are assumed to be backbones(contains the whole reactant molecule)
        and the other top levels are assumedto be endgroups

        the following are format requirements are checked:
        1)endgroup entries hav exactly the same labels as their top level entry
        2)backbone groups have all labels that endgroups have
        3)backbone groups have labels tracing between the endgroups that follow the shortest path
        4)The end subgraph inside each backbone is exactly the same as the top level of the correspodning end tree
        """

        def getEndFromBackbone(backbone, endLabels):
            """
            :param backbone: :class: Entry for a backbone of molecule
            :param endLabels: Labels in the end groups
            :return: A subgraph representing the end group of the molecule
            """
            #make copy for manipulation
            copyGroup = backbone.item.copy(True)

            #Find the endGroup atoms
            for atom in copyGroup.atoms:
                if atom.label in endLabels:
                    midAtom = atom
                    break

            #find the bonds to break
            bondsToBreak = []
            for atom2, bond in midAtom.bonds.iteritems():
                if atom2.label is None or atom2.label not in endLabels: #
                    bondsToBreak.append(bond)


            for bond in bondsToBreak:
                copyGroup.removeBond(bond)

            #split group into end and backbone fragment
            groups = copyGroup.split()

            #verify group was split correctly and identify the correct end group
            endLabels = set(endLabels)
            for group in groups:
                groupLabels = set(atom.label for atom in group.atoms)
                groupLabels.discard('')
                if endLabels == groupLabels:
                    break
            else:
                raise Exception("Group {0} not split correctly".format(backbone.label))

            return group
        #################################################################################
        family = self.database.kinetics.families[family_name]

        backbone =  family.getBackboneRoots()[0]

        endGroups = family.getEndRoots()

        endLabels = {}
        for endGroup in endGroups:
            labels = []
            for atom in endGroup.item.atoms:
                if atom.label:
                    labels.append(atom.label)
            endLabels[endGroup] = set(labels)

        #get boundary atoms to test that backbones have labels between end groups
        nose.tools.assert_is_not_none(family.boundaryAtoms)

        # set of all end_labels should be backbone label
        backboneLabel = set([])
        for end, end_label in endLabels.iteritems():
            for label in end_label:
                backboneLabel.add(label)

        #define types of errors
        A = [] #end groups have too many labels
        B = [] #end group lacks necessary label
        C = [] #backbone missing end group labels
        D = [] #backbone missing labels in between groups
        E = [] #backbone tries to define atoms inside end groups
        for group_name, entry in family.groups.entries.iteritems():
            if isinstance(entry.item, Group):
                group = entry.item
                if backbone in family.ancestors(entry):
                    for atom in group.atoms:
                        if atom.label: presentLabels.add(atom.label)
                    #Check C
                    for endGroup, labels in endLabels.iteritems():
                        if not labels.issubset(presentLabels):
                            C.append([endGroup, entry])
                    #check D
                    midAtoms = [group.getLabeledAtom(x) for x in family.boundaryAtoms]
                    pathAtoms = find_shortest_path(midAtoms[0], midAtoms[1])
                    for atom in pathAtoms:
                        if not atom.label:
                            D.append([backbone, entry])
                            break
                    #check E
                    for endGroup, labels in endLabels.iteritems():
                        endFromBackbone = getEndFromBackbone(entry, labels)
                        presentLabels = endFromBackbone.getLabeledAtoms()
                        presentLabels = set(presentLabels.keys())
                        if labels == presentLabels:
                            if not endGroup.item.isIdentical(endFromBackbone):
                                E.append([endGroup, entry])
                        else: raise Exception("Group {0} has split into end group {1}, but does not match any root".format(entry.label, endFromBackbone.toAdjacencyList()))

                else:
                    presentLabels = set([])
                    for endNode, labelledAtoms in endLabels.iteritems():
                        if endNode in family.ancestors(entry):
                            for atom in group.atoms:
                                if atom.label: presentLabels.add(atom.label)
                            #Check A
                            if not presentLabels.issubset(labelledAtoms):
                                A.append([endNode, entry])
                            #Check B
                            if not labelledAtoms.issubset(presentLabels):
                                B.append([endNode, entry])


        #print outputs
        if A != []:
            s = "These end groups have extra labels that their top level end group do not have:"+"\n [root group, error group]"
            for x in A:
                s += '\n'+str(x)
            nose.tools.assert_true(False,s)
        if B != []:
            s = "These end groups are missing labels that their top level end group have:"+"\n [root group, error group]"
            for x in B:
                s += '\n'+str(x)
            nose.tools.assert_true(False,s)
        if C != []:
            s = "These backbone groups are missing labels that are in the end groups:"+"\n [root group, error group]"
            for x in C:
                s += '\n'+str(x)
            nose.tools.assert_true(False,s)
        if D != []:
            s = "These backbone groups are missing labels along the path atoms:"+"\n [root group, error group]"
            for x in D:
                s += '\n'+str(x)
            nose.tools.assert_true(False,s)
        if E != []:
            s = "These backbone have end subgraphs that don't match a root:"+"\n [root group, error group]"
            for x in E:
                s += '\n'+str(x)
            nose.tools.assert_true(False,s)
Ejemplo n.º 6
0
    def kinetics_checkUnimolecularGroups(self,family_name):
        """
        This test goes through all unimolecular groups that have more than one top level, top level groups
        that overlap with family.reactant are assumed to be backbones(contains the whole reactant molecule)
        and the other top levels are assumedto be endgroups

        the following are format requirements are checked:
        1)endgroup entries hav exactly the same labels as their top level entry
        2)backbone groups have all labels that endgroups have
        3)backbone groups have labels tracing between the endgroups that follow the shortest path
        4)The end subgraph inside each backbone is exactly the same as the top level of the correspodning end tree
        """

        def getEndFromBackbone(backbone, endLabels):
            """
            :param backbone: :class: Entry for a backbone of molecule
            :param endLabels: Labels in the end groups
            :return: A subgraph representing the end group of the molecule
            """
            #make copy for manipulation
            copyGroup = backbone.item.copy(True)

            #Find the endGroup atoms
            for atom in copyGroup.atoms:
                if atom.label in endLabels:
                    midAtom = atom
                    break

            #find the bonds to break
            bondsToBreak = []
            for atom2, bond in midAtom.bonds.iteritems():
                if atom2.label is None or atom2.label not in endLabels: #
                    bondsToBreak.append(bond)


            for bond in bondsToBreak:
                copyGroup.removeBond(bond)

            #split group into end and backbone fragment
            groups = copyGroup.split()

            #verify group was split correctly and identify the correct end group
            endLabels = set(endLabels)
            for group in groups:
                groupLabels = set(atom.label for atom in group.atoms)
                groupLabels.discard('')
                if endLabels == groupLabels:
                    break
            else:
                raise Exception("Group {0} not split correctly".format(backbone.label))

            return group
        #################################################################################
        family = self.database.kinetics.families[family_name]

        backbone =  family.getBackboneRoots()[0]

        endGroups = family.getEndRoots()

        endLabels = {}
        for endGroup in endGroups:
            labels = []
            for atom in endGroup.item.atoms:
                if atom.label:
                    labels.append(atom.label)
            endLabels[endGroup] = set(labels)

        #get boundary atoms to test that backbones have labels between end groups
        nose.tools.assert_is_not_none(family.boundaryAtoms)

        # set of all end_labels should be backbone label
        backboneLabel = set([])
        for end, end_label in endLabels.iteritems():
            for label in end_label:
                backboneLabel.add(label)

        #define types of errors
        A = [] #end groups have too many labels
        B = [] #end group lacks necessary label
        C = [] #backbone missing end group labels
        D = [] #backbone missing labels in between groups
        E = [] #backbone tries to define atoms inside end groups
        for group_name, entry in family.groups.entries.iteritems():
            if isinstance(entry.item, Group):
                group = entry.item
                if backbone in family.ancestors(entry):
                    for atom in group.atoms:
                        if atom.label: presentLabels.add(atom.label)
                    #Check C
                    for endGroup, labels in endLabels.iteritems():
                        if not labels.issubset(presentLabels):
                            C.append([endGroup, entry])
                    #check D
                    midAtoms = [group.getLabeledAtom(x) for x in family.boundaryAtoms]
                    pathAtoms = find_shortest_path(midAtoms[0], midAtoms[1])
                    for atom in pathAtoms:
                        if not atom.label:
                            D.append([backbone, entry])
                            break
                    #check E
                    for endGroup, labels in endLabels.iteritems():
                        endFromBackbone = getEndFromBackbone(entry, labels)
                        presentLabels = endFromBackbone.getLabeledAtoms()
                        presentLabels = set(presentLabels.keys())
                        if labels == presentLabels:
                            if not endGroup.item.isIdentical(endFromBackbone):
                                E.append([endGroup, entry])
                        else: raise Exception("Group {0} has split into end group {1}, but does not match any root".format(entry.label, endFromBackbone.toAdjacencyList()))

                else:
                    presentLabels = set([])
                    for endNode, labelledAtoms in endLabels.iteritems():
                        if endNode in family.ancestors(entry):
                            for atom in group.atoms:
                                if atom.label: presentLabels.add(atom.label)
                            #Check A
                            if not presentLabels.issubset(labelledAtoms):
                                A.append([endNode, entry])
                            #Check B
                            if not labelledAtoms.issubset(presentLabels):
                                B.append([endNode, entry])


        #print outputs
        if A != []:
            s = "These end groups have extra labels that their top level end group do not have:"+"\n [root group, error group]"
            for x in A:
                s += '\n'+str(x)
            nose.tools.assert_true(False,s)
        if B != []:
            s = "These end groups are missing labels that their top level end group have:"+"\n [root group, error group]"
            for x in B:
                s += '\n'+str(x)
            nose.tools.assert_true(False,s)
        if C != []:
            s = "These backbone groups are missing labels that are in the end groups:"+"\n [root group, error group]"
            for x in C:
                s += '\n'+str(x)
            nose.tools.assert_true(False,s)
        if D != []:
            s = "These backbone groups are missing labels along the path atoms:"+"\n [root group, error group]"
            for x in D:
                s += '\n'+str(x)
            nose.tools.assert_true(False,s)
        if E != []:
            s = "These backbone have end subgraphs that don't match a root:"+"\n [root group, error group]"
            for x in E:
                s += '\n'+str(x)
            nose.tools.assert_true(False,s)