def stabilize_charges_by_proximity(mol_list): """ Only keep structures that obey the charge proximity rule. Opposite charges will be as close as possible to one another, and vice versa. """ indices_to_pop = [] charge_distance_list = [] # indices match mol_list for i, mol in enumerate(mol_list): # Try finding well-defined pairs of formally-charged atoms to apply the proximity principle # (opposite charges will be as close as possible to one another, and vice versa) cumulative_opposite_charge_distance = cumulative_similar_charge_distance = 0 for atom1 in mol.vertices: if atom1.charge: for atom2 in mol.vertices: if atom2.charge and atom2.sortingLabel > atom1.sortingLabel: # found two charged atoms if (atom1.charge > 0) ^ (atom2.charge > 0): # xor # they have opposing signs when ONLY one is positive cumulative_opposite_charge_distance += len( find_shortest_path(atom1, atom2)) else: # they have similar signs cumulative_similar_charge_distance += len( find_shortest_path(atom1, atom2)) charge_distance_list.append([ cumulative_opposite_charge_distance, cumulative_similar_charge_distance ]) min_cumulative_opposite_charge_distance = min( [distances[0] for distances in charge_distance_list] or [0]) # in Python 3 use `min(list, default=0)` for i, distances in enumerate(charge_distance_list): # after generating the charge_distance_list, iterate through it and mark structures to pop if distances[0] > min_cumulative_opposite_charge_distance: indices_to_pop.append(i) max_cumulative_similar_charge_distance = max([ distances[1] for i, distances in enumerate(charge_distance_list) if i not in indices_to_pop ] or [0]) for i, distances in enumerate(charge_distance_list): if distances[0] < max_cumulative_similar_charge_distance: indices_to_pop.append(i) for i in reversed(xrange(len( mol_list))): # pop starting from the end, so indices won't change if i in indices_to_pop: mol_list.pop(i) return mol_list
def test_bicyclo420octane(self): smi = 'C12CCC1CCCC2' mol = Molecule().from_smiles(smi) start = mol.atoms[0] end = mol.atoms[4] path = find_shortest_path(start, end) self.assertEquals(len(path), 3)
def test_cyclohexane(self): smi = 'C1CCCCC1' mol = Molecule().from_smiles(smi) start = mol.atoms[0] end = mol.atoms[2] path = find_shortest_path(start, end) self.assertEquals(len(path), 3)
def stabilize_charges_by_proximity(mol_list): """ Only keep structures that obey the charge proximity rule. Opposite charges will be as close as possible to one another, and vice versa. """ indices_to_pop = [] charge_distance_list = [] # indices match mol_list for i, mol in enumerate(mol_list): # Try finding well-defined pairs of formally-charged atoms to apply the proximity principle # (opposite charges will be as close as possible to one another, and vice versa) cumulative_opposite_charge_distance = cumulative_similar_charge_distance = 0 for atom1 in mol.vertices: if atom1.charge: for atom2 in mol.vertices: if atom2.charge and atom2.sortingLabel > atom1.sortingLabel: # found two charged atoms if (atom1.charge > 0) ^ (atom2.charge > 0): # xor # they have opposing signs when ONLY one is positive cumulative_opposite_charge_distance += len(find_shortest_path(atom1, atom2)) else: # they have similar signs cumulative_similar_charge_distance += len(find_shortest_path(atom1, atom2)) charge_distance_list.append([cumulative_opposite_charge_distance, cumulative_similar_charge_distance]) min_cumulative_opposite_charge_distance = min([distances[0] for distances in charge_distance_list] or [0]) # in Python 3 use `min(list, default=0)` for i, distances in enumerate(charge_distance_list): # after generating the charge_distance_list, iterate through it and mark structures to pop if distances[0] > min_cumulative_opposite_charge_distance: indices_to_pop.append(i) max_cumulative_similar_charge_distance = max([distances[1] for i, distances in enumerate(charge_distance_list) if i not in indices_to_pop] or [0]) for i, distances in enumerate(charge_distance_list): if distances[0] < max_cumulative_similar_charge_distance: indices_to_pop.append(i) for i in reversed(xrange(len(mol_list))): # pop starting from the end, so indices won't change if i in indices_to_pop: mol_list.pop(i) return mol_list
def kinetics_checkUnimolecularGroups(self,family_name): """ This test goes through all unimolecular groups that have more than one top level, top level groups that overlap with family.reactant are assumed to be backbones(contains the whole reactant molecule) and the other top levels are assumedto be endgroups the following are format requirements are checked: 1)endgroup entries hav exactly the same labels as their top level entry 2)backbone groups have all labels that endgroups have 3)backbone groups have labels tracing between the endgroups that follow the shortest path 4)The end subgraph inside each backbone is exactly the same as the top level of the correspodning end tree """ def getEndFromBackbone(backbone, endLabels): """ :param backbone: :class: Entry for a backbone of molecule :param endLabels: Labels in the end groups :return: A subgraph representing the end group of the molecule """ #make copy for manipulation copyGroup = backbone.item.copy(True) #Find the endGroup atoms for atom in copyGroup.atoms: if atom.label in endLabels: midAtom = atom break #find the bonds to break bondsToBreak = [] for atom2, bond in midAtom.bonds.iteritems(): if atom2.label is None or atom2.label not in endLabels: # bondsToBreak.append(bond) for bond in bondsToBreak: copyGroup.removeBond(bond) #split group into end and backbone fragment groups = copyGroup.split() #verify group was split correctly and identify the correct end group endLabels = set(endLabels) for group in groups: groupLabels = set(atom.label for atom in group.atoms) groupLabels.discard('') if endLabels == groupLabels: break else: raise Exception("Group {0} not split correctly".format(backbone.label)) return group ################################################################################# family = self.database.kinetics.families[family_name] backbone = family.getBackboneRoots()[0] endGroups = family.getEndRoots() endLabels = {} for endGroup in endGroups: labels = [] for atom in endGroup.item.atoms: if atom.label: labels.append(atom.label) endLabels[endGroup] = set(labels) #get boundary atoms to test that backbones have labels between end groups nose.tools.assert_is_not_none(family.boundaryAtoms) # set of all end_labels should be backbone label backboneLabel = set([]) for end, end_label in endLabels.iteritems(): for label in end_label: backboneLabel.add(label) #define types of errors A = [] #end groups have too many labels B = [] #end group lacks necessary label C = [] #backbone missing end group labels D = [] #backbone missing labels in between groups E = [] #backbone tries to define atoms inside end groups for group_name, entry in family.groups.entries.iteritems(): if isinstance(entry.item, Group): group = entry.item if backbone in family.ancestors(entry): for atom in group.atoms: if atom.label: presentLabels.add(atom.label) #Check C for endGroup, labels in endLabels.iteritems(): if not labels.issubset(presentLabels): C.append([endGroup, entry]) #check D midAtoms = [group.getLabeledAtom(x) for x in family.boundaryAtoms] pathAtoms = find_shortest_path(midAtoms[0], midAtoms[1]) for atom in pathAtoms: if not atom.label: D.append([backbone, entry]) break #check E for endGroup, labels in endLabels.iteritems(): endFromBackbone = getEndFromBackbone(entry, labels) presentLabels = endFromBackbone.getLabeledAtoms() presentLabels = set(presentLabels.keys()) if labels == presentLabels: if not endGroup.item.isIdentical(endFromBackbone): E.append([endGroup, entry]) else: raise Exception("Group {0} has split into end group {1}, but does not match any root".format(entry.label, endFromBackbone.toAdjacencyList())) else: presentLabels = set([]) for endNode, labelledAtoms in endLabels.iteritems(): if endNode in family.ancestors(entry): for atom in group.atoms: if atom.label: presentLabels.add(atom.label) #Check A if not presentLabels.issubset(labelledAtoms): A.append([endNode, entry]) #Check B if not labelledAtoms.issubset(presentLabels): B.append([endNode, entry]) #print outputs if A != []: s = "These end groups have extra labels that their top level end group do not have:"+"\n [root group, error group]" for x in A: s += '\n'+str(x) nose.tools.assert_true(False,s) if B != []: s = "These end groups are missing labels that their top level end group have:"+"\n [root group, error group]" for x in B: s += '\n'+str(x) nose.tools.assert_true(False,s) if C != []: s = "These backbone groups are missing labels that are in the end groups:"+"\n [root group, error group]" for x in C: s += '\n'+str(x) nose.tools.assert_true(False,s) if D != []: s = "These backbone groups are missing labels along the path atoms:"+"\n [root group, error group]" for x in D: s += '\n'+str(x) nose.tools.assert_true(False,s) if E != []: s = "These backbone have end subgraphs that don't match a root:"+"\n [root group, error group]" for x in E: s += '\n'+str(x) nose.tools.assert_true(False,s)
def kinetics_checkUnimolecularGroups(self,family_name): """ This test goes through all unimolecular groups that have more than one top level, top level groups that overlap with family.reactant are assumed to be backbones(contains the whole reactant molecule) and the other top levels are assumedto be endgroups the following are format requirements are checked: 1)endgroup entries hav exactly the same labels as their top level entry 2)backbone groups have all labels that endgroups have 3)backbone groups have labels tracing between the endgroups that follow the shortest path 4)The end subgraph inside each backbone is exactly the same as the top level of the correspodning end tree """ def getEndFromBackbone(backbone, endLabels): """ :param backbone: :class: Entry for a backbone of molecule :param endLabels: Labels in the end groups :return: A subgraph representing the end group of the molecule """ #make copy for manipulation copyGroup = backbone.item.copy(True) #Find the endGroup atoms for atom in copyGroup.atoms: if atom.label in endLabels: midAtom = atom break #find the bonds to break bondsToBreak = [] for atom2, bond in midAtom.bonds.iteritems(): if atom2.label is None or atom2.label not in endLabels: # bondsToBreak.append(bond) for bond in bondsToBreak: copyGroup.removeBond(bond) #split group into end and backbone fragment groups = copyGroup.split() #verify group was split correctly and identify the correct end group endLabels = set(endLabels) for group in groups: groupLabels = set(atom.label for atom in group.atoms) groupLabels.discard('') if endLabels == groupLabels: break else: raise Exception("Group {0} not split correctly".format(backbone.label)) return group ################################################################################# family = self.database.kinetics.families[family_name] backbone = family.getBackboneRoots()[0] endGroups = family.getEndRoots() endLabels = {} for endGroup in endGroups: labels = [] for atom in endGroup.item.atoms: if atom.label: labels.append(atom.label) endLabels[endGroup] = set(labels) #get boundary atoms to test that backbones have labels between end groups nose.tools.assert_is_not_none(family.boundaryAtoms) # set of all end_labels should be backbone label backboneLabel = set([]) for end, end_label in endLabels.iteritems(): for label in end_label: backboneLabel.add(label) #define types of errors A = [] #end groups have too many labels B = [] #end group lacks necessary label C = [] #backbone missing end group labels D = [] #backbone missing labels in between groups E = [] #backbone tries to define atoms inside end groups for group_name, entry in family.groups.entries.iteritems(): if isinstance(entry.item, Group): group = entry.item if backbone in family.ancestors(entry): for atom in group.atoms: if atom.label: presentLabels.add(atom.label) #Check C for endGroup, labels in endLabels.iteritems(): if not labels.issubset(presentLabels): C.append([endGroup, entry]) #check D midAtoms = [group.getLabeledAtom(x) for x in family.boundaryAtoms] pathAtoms = find_shortest_path(midAtoms[0], midAtoms[1]) for atom in pathAtoms: if not atom.label: D.append([backbone, entry]) break #check E for endGroup, labels in endLabels.iteritems(): endFromBackbone = getEndFromBackbone(entry, labels) presentLabels = endFromBackbone.getLabeledAtoms() presentLabels = set(presentLabels.keys()) if labels == presentLabels: if not endGroup.item.isIdentical(endFromBackbone): E.append([endGroup, entry]) else: raise Exception("Group {0} has split into end group {1}, but does not match any root".format(entry.label, endFromBackbone.toAdjacencyList())) else: presentLabels = set([]) for endNode, labelledAtoms in endLabels.iteritems(): if endNode in family.ancestors(entry): for atom in group.atoms: if atom.label: presentLabels.add(atom.label) #Check A if not presentLabels.issubset(labelledAtoms): A.append([endNode, entry]) #Check B if not labelledAtoms.issubset(presentLabels): B.append([endNode, entry]) #print outputs if A != []: s = "These end groups have extra labels that their top level end group do not have:"+"\n [root group, error group]" for x in A: s += '\n'+str(x) nose.tools.assert_true(False,s) if B != []: s = "These end groups are missing labels that their top level end group have:"+"\n [root group, error group]" for x in B: s += '\n'+str(x) nose.tools.assert_true(False,s) if C != []: s = "These backbone groups are missing labels that are in the end groups:"+"\n [root group, error group]" for x in C: s += '\n'+str(x) nose.tools.assert_true(False,s) if D != []: s = "These backbone groups are missing labels along the path atoms:"+"\n [root group, error group]" for x in D: s += '\n'+str(x) nose.tools.assert_true(False,s) if E != []: s = "These backbone have end subgraphs that don't match a root:"+"\n [root group, error group]" for x in E: s += '\n'+str(x) nose.tools.assert_true(False,s)