def GenInteractions_int( G_system, g_bond_pattern, typepattern_to_coefftypes, canonical_order, # function to sort atoms and bonds atomtypes_int2str, bondtypes_int2str, report_progress=False, # print messages to sys.stderr? check_undefined_atomids_str=None): """ GenInteractions() automatically determines a list of interactions present in a system of bonded atoms (argument "G_system"), which satisfy the bond topology present in "g_bond_pattern", and satisfy the atom and bond type requirements in "typepattern_to_coefftypes". Whenever a set of atoms in "G_system" are bonded together in a way which matches "g_bond_pattern", and when the atom and bond types is consistent with one of the entries in "typepattern_to_coefftypes", the corresponding list of atoms from G_system is appended to the list of results. These results (the list of lists of atoms participating in an interaction) are organized according their corresponding "coefftype", a string which identifies the type of interaction they obey as explained above. results are returned as a dictionary using "coefftype" as the lookup key. Arguments: -- typepattern_to_coefftypes is a list of 2-tuples -- The first element of the 2-tuple is the "typepattern". It contains a string describing a list of atom types and bond types. The typepattern is associated with a "coefftype", which is the second element of the 2-tuple. This is a string which identifies the type of interaction between the atoms. Later on, this string can be used to lookup the force field parameters for this interaction elsewhere.) -- Arguments: G_system, g_bond_pattern, atomtypes_int2str, bondtypes_int2str -- G_system stores a list of atoms and bonds, and their attributes in "Ugraph" format. In this format: Atom ID numbers are represented by indices into the G_system.verts[] list. Bond ID numbers are represented by indices into the G_system.edges[] list. Atom types are represented as integers in the G_system.verts[i].attr list. Bond types are represented as integers in the G_system.edges[i].attr list. They are converted into strings using atomtypes_int2str, and bondtypes_int2str. g_bond_pattern is a graph which specifies the type of bonding between the atoms required for a match. It is in Ugraph format (however the atom and bond types are left blank.) Atom and bond types are supplied by the user in string format. (These strings typically encode integers, but could be any string in principle.) The string-version of the ith atom type is stored in atomtypes_int2str[ G_system.verts[i].attr ] The string-version of the ith bond type is stored in bondtypes_int2str[ G_system.edges[i].attr ] -- The "canonical_order" argument: -- The search for atoms with a given bond pattern often yields redundant matches. There is no difference for example between the angle formed between three consecutively bonded atoms (named, 1, 2, 3, for example), and the angle between the same atoms in reverse order (3, 2, 1). However both triplets of atoms will be returned by the subgraph- matching algorithm when searching for ALL 3-body interactions.) To eliminate this redundancy, the caller must supply a "canonical_order" argument. This is a function which sorts the atoms and bonds in a way which is consistent with the type of N-body interaction being considered. The atoms (and bonds) in a candidate match are rearranged by the canonical_order(). Then the re-ordered list of atom and bond ids is tested against the list of atom/bond ids in the matches-found-so-far, before it is added. """ if report_progress: startatomid = 0 sys.stderr.write(' searching for matching bond patterns:\n') sys.stderr.write(' 0%') # Figure out which atoms from "G_system" bond together in a way which # matches the "g_bond_pattern" argument. Organize these matches by # atom and bond types and store all of the non-redundant ones in # the "interactions_by_type" variable. gm = GraphMatcher(G_system, g_bond_pattern) interactions_by_type = defaultdict(list) for atombondids in gm.Matches(): # "atombondids" is a tuple. # atombondids[0] has atomIDs from G_system corresponding to g_bond_pattern # (These atomID numbers are indices into the G_system.verts[] list.) # atombondids[1] has bondIDs from G_system corresponding to g_bond_pattern # (These bondID numbers are indices into the G_system.edges[] list.) # It's convenient to organize the list of interactions-between- # atoms in a dictionary indexed by atomtypes and bondtypes. # (Because many atoms and bonds typically share the same type, # organizing the results this way makes it faster to check # whether a given interaction matches a "typepattern" defined # by the user. We only have to check once for the whole group.) atombondtypes = \ (tuple([G_system.GetVert(Iv).attr for Iv in atombondids[0]]), tuple([G_system.GetEdge(Ie).attr for Ie in atombondids[1]])) interactions_by_type[atombondtypes].append(atombondids) if report_progress: # GraphMatcher.Matches() searches for matches in an order # that selects a different atomid number from G_system, # starting at 0, and continuing up to the number of atoms (-1) # in the system (G_system.nv-1), and using this as the first # atom in the match (ie match[0][0]). This number can be used # to guess much progress has been made so far. oldatomid = startatomid startatomid = atombondids[0][0] percent_complete = (100 * startatomid) // G_system.GetNumVerts() # report less often as more progress made if percent_complete <= 4: old_pc = (100 * oldatomid) // G_system.GetNumVerts() if percent_complete > old_pc: sys.stderr.write(' ' + str(percent_complete) + '%') elif percent_complete <= 8: pc_d2 = (100 * startatomid) // (2 * G_system.GetNumVerts()) oldpc_d2 = (100 * oldatomid) // (2 * G_system.GetNumVerts()) if pc_d2 > oldpc_d2: sys.stderr.write(' ' + str(percent_complete) + '%') elif percent_complete <= 20: pc_d4 = (100 * startatomid) // (4 * G_system.GetNumVerts()) oldpc_d4 = (100 * oldatomid) // (4 * G_system.GetNumVerts()) if pc_d4 > oldpc_d4: sys.stderr.write(' ' + str(percent_complete) + '%') else: pc_d10 = (100 * startatomid) // (10 * G_system.GetNumVerts()) oldpc_d10 = (100 * oldatomid) // (10 * G_system.GetNumVerts()) if pc_d10 > oldpc_d10: sys.stderr.write(' ' + str(percent_complete) + '%') if report_progress: sys.stderr.write(' 100%\n') #sys.stderr.write(' ...done\n') #sys.stderr.write(' Looking up available atom and bond types...') #coefftype_to_atomids = defaultdict(list) #abids_to_coefftypes = defaultdict(list) coefftype_to_atomids = OrderedDict() abids_to_coefftypes = OrderedDict() # -------------------- reporting progress ----------------------- if report_progress: # The next interval of code is not technically necessary, but it makes # the printed output easier to read by excluding irrelevant interactions # Now, test each match to see if the atoms and bonds involved match # any of the type-patterns in the "typepattern_to_coefftypes" argument. types_atoms_all_str = set([]) types_bonds_all_str = set([]) for typepattern, coefftype in typepattern_to_coefftypes: for atombondtypes, abidslist in interactions_by_type.items(): for Iv in atombondtypes[0]: types_atoms_all_str.add(atomtypes_int2str[Iv]) for Ie in atombondtypes[1]: types_bonds_all_str.add(bondtypes_int2str[Ie]) # ------------------ reporting progress (end) ------------------- # ------------------ check to make sure all interactions are defined ------ if check_undefined_atomids_str: # Checking for missing interactions is a headache. # Please excuse the messy code below. atomids_matched = OrderedDict() # Then loop through all the interactions (tuples of atoms) found by # GraphMatcher, sort the atoms and store them in dictionary # (atomids_matched) which keeps track of which interactions have # been defined (ie have force-field parameters assigned to them). # Initialize them to False, and update as interactions are found. for atombondtypes, abidslist in interactions_by_type.items(): for abids in abidslist: abids = canonical_order(abids) atomids_int = tuple(abids[0]) # NOTE TO SELF: # If in the future, different interactions (type_patterns) have # different symmetries, and canonical_order() varies from # interaction to interaction, then DONT loop over type_pattern: # for type_pattern, coefftype in typepattern_to_coefftypes) # abids = canonical_order(abids, type_pattern) # Why: When checking for undefined interactions, # we just want to make sure that SOME kind of interaction # involving these atoms exists. The gruesome details of # force-field symmetry should not enter into this. # (We certainly don't want to require that different # interactions are simultaneously present for the same set of # atoms for ALL the possible different atom orderings for the # different possible symmetries in the force field you are using # Perhaps, in the future I should just use something like this: # atomids_int = abids[0] # atomids_int.sort() # atomids_int = tuple(atomids_int) # This would work for most molecules. # I suppose that in some some bizarre molecules containing # triangular or square cycles, for example, this would not # distinguish all 3 angles in the triangle, for example. # mistakenly thinking there was only one interaction there. # But these cases are rare.) if not atomids_int in atomids_matched: atomids_matched[atomids_int] = False # (Later on, we'll set some of these to True) # ------------------ check to make sure all interactions are defined (end) count = 0 for typepattern, coefftype in typepattern_to_coefftypes: # ------------------ reporting progress ----------------------- # The next interval of code is not technically necessary, but it makes # the printed output easier to read by excluding irrelevant # interactions if report_progress: # Check to see if the atoms or bonds referred to in typepattern # are (potentially) satisfied by any of the atoms present in the system. # If any of the required atoms for this typepattern are not present # in this system, then skip to the next typepattern. atoms_available_Iv = [ False for Iv in range(0, g_bond_pattern.GetNumVerts()) ] for Iv in range(0, g_bond_pattern.GetNumVerts()): for type_atom_str in types_atoms_all_str: if MatchesPattern(type_atom_str, typepattern[Iv]): atoms_available_Iv[Iv] = True atoms_available = True for Iv in range(0, g_bond_pattern.GetNumVerts()): if not atoms_available_Iv[Iv]: atoms_available = False bonds_available_Ie = [ False for Ie in range(0, g_bond_pattern.GetNumEdges()) ] for Ie in range(0, g_bond_pattern.GetNumEdges()): for type_bond_str in types_bonds_all_str: if MatchesPattern( type_bond_str, typepattern[g_bond_pattern.GetNumVerts() + Ie]): bonds_available_Ie[Ie] = True bonds_available = True for Ie in range(0, g_bond_pattern.GetNumEdges()): if not bonds_available_Ie[Ie]: bonds_available = False if atoms_available and bonds_available: # Explanation: # (Again) only if ALL of the atoms and bond requirements for # this type pattern are satisfied by at least SOME of the atoms # present in the this system, ...THEN print a status message. # (Because for complex all-atom force-fields, the number of # possible atom types, and typepatterns far exceeds the number # of atom types typically present in the system. Otherwise # hundreds of kB of irrelevant information can be printed.) sys.stderr.write(' checking ' + coefftype + ' type requirements:' #' (atom-types,bond-types) ' '\n ' + str(typepattern) + '\n') # ------------------ reporting progress (end) ------------------- for atombondtypes, abidslist in interactions_by_type.items(): # express atom & bond types in a tuple of the original string # format types_atoms = [atomtypes_int2str[Iv] for Iv in atombondtypes[0]] types_bonds = [bondtypes_int2str[Ie] for Ie in atombondtypes[1]] type_strings = types_atoms + types_bonds # use string comparisons to check for a match with typepattern if MatchesAll(type_strings, typepattern): # <-see "ttree_lex.py" for abids in abidslist: # Re-order the atoms (and bonds) in a "canonical" way. # Only add new interactions to the list after re-ordering # them and checking that they have not been added earlier. # (...well not when using the same coefftype at least. # This prevents the same triplet of atoms from # being used to calculate the bond-angle twice: # once for 1-2-3 and 3-2-1, for example.) abids = canonical_order(abids) redundant = False if abids in abids_to_coefftypes: coefftypes = abids_to_coefftypes[abids] if coefftype in coefftypes: redundant = True if check_undefined_atomids_str: atomids_int = tuple(abids[0]) atomids_matched[atomids_int] = True if not redundant: # (It's too bad python does not # have an Ordered defaultdict) if coefftype in coefftype_to_atomids: coefftype_to_atomids[coefftype].append(abids[0]) else: coefftype_to_atomids[coefftype] = [abids[0]] if abids in abids_to_coefftypes: abids_to_coefftypes[abids].append(coefftype) else: abids_to_coefftypes[abids] = [coefftype] count += 1 if report_progress: sys.stderr.write(' (found ' + str(count) + ' non-redundant matches)\n') if check_undefined_atomids_str: for atomids_int, found_match in atomids_matched.items(): if not found_match: atomids_str = [ check_undefined_atomids_str[Iv] for Iv in atomids_int ] raise InputError( 'Error: A bonded interaction should exist between atoms:\n' + ' ' + (',\n '.join(atomids_str)) + '\n' + ' ...however no interaction between these types of atoms has been defined\n' + ' This usually means that at least one of your atom TYPES is incorrect.\n' + ' If this is not the case, then you can override this error message by\n' + ' invoking moltemplate.sh without the \"-checkff\" argument.\n' ) return coefftype_to_atomids
def GenInteractions_int(G_system, g_bond_pattern, typepattern_to_coefftype, canonical_order, #function to sort atoms and bonds atomtypes_int2str, bondtypes_int2str, report_progress = False): #print messages to sys.stderr? """ GenInteractions() automatically determines a list of interactions present in a system of bonded atoms (argument "G_system"), which satisfy the bond topology present in "g_bond_pattern", and satisfy the atom and bond type requirements in "typepattern_to_coefftype". Whenever a set of atoms in "G_system" are bonded together in a way which matches "g_bond_pattern", and when the atom and bond types is consistent with one of the entries in "typepattern_to_coefftype", the corresponding list of atoms from G_system is appended to the list of results. These results (the list of lists of atoms participating in an interaction) are organized according their corresponding "coefftype", a string which identifies the type of interaction they obey as explained above. results are returned as a dictionary using "coefftype" as the lookup key. Arguments: -- typepattern_to_coefftype is a list of 2-tuples -- The first element of the 2-tuple is the "typepattern". It contains a string describing a list of atom types and bond types. The typepattern is associated with a "coefftype", which is the second element of the 2-tuple. This is a string which identifies the type of interaction between the atoms. Later on, this string can be used to lookup the force field parameters for this interaction elsewhere.) -- Arguments: G_system, g_bond_pattern, atomtypes_int2str, bondtypes_int2str -- G_system stores a list of atoms and bonds, and their attributes in "Ugraph" format. In this format: Atom ID numbers are represented by indices into the G_system.verts[] list. Bond ID numbers are represented by indices into the G_system.edges[] list. Atom types are represented as integers in the G_system.verts[i].attr list. Bond types are represented as integers in the G_system.edges[i].attr list. They are converted into strings using atomtypes_int2str, and bondtypes_int2str. g_bond_pattern is a graph which specifies the type of bonding between the atoms required for a match. It is in Ugraph format (however the atom and bond types are left blank.) Atom and bond types are supplied by the user in string format. (These strings typically encode integers, but could be any string in principle.) The string-version of the ith atom type is stored in atomtypes_int2str[ G_system.verts[i].attr ] The string-version of the ith bond type is stored in bondtypes_int2str[ G_system.edges[i].attr ] -- The "canonical_order" argument: -- The search for atoms with a given bond pattern often yields redundant matches. There is no difference for example between the angle formed between three consecutively bonded atoms (named, 1, 2, 3, for example), and the angle between the same atoms in reverse order (3, 2, 1). However both triplets of atoms will be returned by the subgraph- matching algorithm when searching for ALL 3-body interactions.) To eliminate this redundancy, the caller must supply a "canonical_order" argument. This is a function which sorts the atoms and bonds in a way which is consistent with the type of N-body interaction being considered. The atoms (and bonds) in a candidate match are rearranged by the canonical_order(). Then the re-ordered list of atom and bond ids is tested against the list of atom/bond ids in the matches-found-so-far, before it is added. """ if report_progress: startatomid = 0 sys.stderr.write(' Searching for matching bond patterns:\n') sys.stderr.write(' 0%') # Figure out which atoms from "G_system" bond together in a way which # matches the "g_bond_pattern" argument. Organize these matches by # atom and bond types and store all of the non-redundant ones in # the "interactions_by_type" variable. gm = GraphMatcher(G_system, g_bond_pattern) interactions_by_type = defaultdict(list) for atombondids in gm.Matches(): # "atombondids" is a tuple. # atombondids[0] has atomIDs from G_system corresponding to g_bond_pattern # (These atomID numbers are indices into the G_system.verts[] list.) # atombondids[1] has bondIDs from G_system corresponding to g_bond_pattern # (These bondID numbers are indices into the G_system.edges[] list.) # It's convenient to organize the list of interactions-between- # atoms in a dictionary indexed by atomtypes and bondtypes. # (Because many atoms and bonds typically share the same type, # organizing the results this way makes it faster to check # whether a given interaction matches a "typepattern" defined # by the user. We only have to check once for the whole group.) atombondtypes = \ (tuple([G_system.GetVert(Iv).attr for Iv in atombondids[0]]), tuple([G_system.GetEdge(Ie).attr for Ie in atombondids[1]])) interactions_by_type[atombondtypes].append(atombondids) if report_progress: # GraphMatcher.Matches() searches for matches in an order # that selects a different atomid number from G_system, # starting at 0, and continuing up to the number of atoms (-1) # in the system (G_system.nv-1), and using this as the first # atom in the match (ie match[0][0]). This number can be used # to guess much progress has been made so far. oldatomid = startatomid startatomid = atombondids[0][0] percent_complete = (100 * startatomid) // G_system.GetNumVerts() # report less often as more progress made if percent_complete <= 4: old_pc = (100 * oldatomid) // G_system.GetNumVerts() if percent_complete > old_pc: sys.stderr.write(' '+str(percent_complete)+'%') elif percent_complete <= 8: pc_d2 = (100 * startatomid) // (2*G_system.GetNumVerts()) oldpc_d2 = (100 * oldatomid) // (2*G_system.GetNumVerts()) if pc_d2 > oldpc_d2: sys.stderr.write(' '+str(percent_complete)+'%') elif percent_complete <= 20: pc_d4 = (100 * startatomid) // (4*G_system.GetNumVerts()) oldpc_d4 = (100 * oldatomid) // (4*G_system.GetNumVerts()) if pc_d4 > oldpc_d4: sys.stderr.write(' '+str(percent_complete)+'%') else: pc_d10 = (100 * startatomid) // (10*G_system.GetNumVerts()) oldpc_d10 = (100 * oldatomid) // (10*G_system.GetNumVerts()) if pc_d10 > oldpc_d10: sys.stderr.write(' '+str(percent_complete)+'%') if report_progress: sys.stderr.write(' 100%\n') sys.stderr.write(' Looking up atom and bond types...') # Now test each match to see if the types of atoms and bonds involved match # any of the type-patterns in the "typepattern_to_coefftype" argument. # If so, store them in the out_topo list #coefftype_to_atomids = defaultdict(list) #abids_to_coefftypes = defaultdict(list) coefftype_to_atomids = OrderedDict() abids_to_coefftypes = OrderedDict() count = 0 for typepattern, coefftype in typepattern_to_coefftype: if report_progress: sys.stderr.write(' Checking (atom-types,bond-types) against \n '+str(typepattern)+'-->'+coefftype+'\n') for atombondtypes, abidslist in interactions_by_type.items(): # express atom & bond types in a tuple of the original string format types_atoms = [atomtypes_int2str[Iv] for Iv in atombondtypes[0]] types_bonds = [bondtypes_int2str[Ie] for Ie in atombondtypes[1]] type_strings = types_atoms + types_bonds # use string comparisons to check for a match with typepattern if MatchesAll(type_strings, typepattern): #<-see "ttree_lex.py" for abids in abidslist: # Re-order the atoms (and bonds) in a "canonical" way. # Only add new interactions to the list after re-ordering # them and checking that they have not been added earlier. # (...well not when using the same coefftype at least. # This prevents the same triplet of atoms from # being used to calculate the bond-angle twice: # once for 1-2-3 and 3-2-1, for example.) abids = canonical_order(abids) redundant = False if abids in abids_to_coefftypes: coefftypes = abids_to_coefftypes[abids] if coefftype in coefftypes: redundant = True if not redundant: # (It's too bad python does not # have an Ordered defaultdict) if coefftype in coefftype_to_atomids: coefftype_to_atomids[coefftype].append(abids[0]) else: coefftype_to_atomids[coefftype]=[abids[0]] if abids in abids_to_coefftypes: abids_to_coefftypes[abids].append(coefftype) else: abids_to_coefftypes[abids] = [coefftype] count += 1 if report_progress: sys.stderr.write(' done\n (found '+ str(count)+' non-redundant matches)\n') return coefftype_to_atomids