def write_restraints(inp, initial_confs, start, end, tpr, top, includes, n, ndxfn, Nchains): n = int(n) # number of points in the string, including start and end point ndx_atoms = res_selection.read_ndx(ndxfn) use_interpolation = False if initial_confs is None or len(initial_confs) == 0: use_interpolation = True # Read the starting and ending atom configurations for later interpolation TODO #startpts = readxvg.readxvg(start_xvg, selection) #endpts = readxvg.readxvg(end_xvg, selection) # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any) # There will be one topol_x.top per intermediate string point sys.stderr.write('%s' % includes) for k in range(n): with open(top) as in_topf: in_top = in_topf.read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top) with open('topol_%d.top' % k, 'w') as out_top: # sys.stderr.write('%s'%in_top) out_top.write(in_top) # Generate/copy and write-out the restraint atom and force spec for each intermediate point # This is really unnecessary here since the restraint positions are not in these files so they are the same # for all points and chains. TODO for k in range(n): for mol in range(Nchains): with open('res_%d_chain_%d.itp' % (k, mol), 'w') as restraint_itp: if Nchains > 1: with open(includes[mol]) as moltop_f: moltop = moltop_f.read() restraint_itp.write(moltop) if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: with open('topol_%d.top' % k, 'w') as out_top: protein = molecule(top) with open(top, 'r') as in_itp_f: in_itp = in_itp_f.read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol)) out_top.write(in_itp[1]) # Go through the atoms in the selection index and write one row for each one with the KFAC # force constant placeholder restraint_itp.write("\n[ position_restraints ]\n") restraint_itp.write("; atom type fx fy fz\n") for a in ndx_atoms: if a < 5566: # GLIC HACK: only write one chain, and do it relative atom 1 since the .itp maps to the topology molecule. restraint_itp.write("%6d 1 KFAC KFAC KFAC\n" % int(a))
def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains): # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index # per residue, since it's probably generated by make_ndx) ndx_atoms = res_selection.read_ndx(ndx_file) # Map them to each affected residue so we just get the residue numbers back selection = res_selection.res_select(start, ndx_atoms) n = int(n) # number of points in the string, including start and end point use_interpolation = False if initial_confs is None or len(initial_confs) == 0: use_interpolation = True # Read the starting and ending dihedrals for later interpolation startpts = readxvg.readxvg(start_xvg, selection) endpts = readxvg.readxvg(end_xvg, selection) else: # Have to generate the dihedrals ourselves from the given initial structures # Note: when we get an initial_confs[] array, we use it for all points and # the start/end input parameters are completely ignored # TODO: assert that len(initial_confs) == n otherwise? ramaprocs = {} # Run g_rama (in parallel) on each structure and output to a temporary .xvg FNULL = open(os.devnull, 'w') # dont generate spam from g_rama for i in range(n): # TODO: check for and use g_rama_mpi.. like everywhere else ramaprocs[i] = Popen(['g_rama', '-f', initial_confs[i], '-s', tpr, '-o', '0%3d.xvg' % i], stdout=FNULL, stderr=FNULL) # Go through the output from the rama sub-processes and read the xvg outputs stringpts = {} # Will have 4 levels: stringpoint, residue, chain, phi/psi value for i in range(n): # Start array indexed by residue xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i) # Make sure the corresponding g_rama task has ended ramaprocs[i].communicate() # Read back and parse like for the start/end_xvg above stringpts[i] = readxvg.readxvg(xvg_i, selection) # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any) # There will be one topol_x.top per string point sys.stderr.write('%s' % includes) for k in range(n): with open(top) as in_topf: in_top = in_topf.read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top) with open('topol_%d.top' % k,'w') as out_top: # sys.stderr.write('%s'%in_top) out_top.write(in_top) # Generate/copy and write-out the dihedrals for each point for k in range(n): for mol in range(Nchains): # TODO: use with statement for restraint_itp as well restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w') if Nchains > 1: with open(includes[mol]) as moltop_f: moltop = moltop_f.read() restraint_itp.write(moltop) # write the initial part of the topology file # Note: gromacs 4.6+ required restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al type phi dphi kfac\n") if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: with open('topol_%d.top' % k, 'w') as out_top: protein = molecule(top) with open(top,'r') as in_itp_f: in_itp = in_itp_f.read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol)) out_top.write(in_itp[1]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'): try: dih_atoms[a.resnr][a.atomname] = a.atomnr; except KeyError: dih_atoms[a.resnr] = { a.atomname: a.atomnr } # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. for r in selection: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ] # psi is N, CA and C on this residue and N on the next psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ] # Write phi, psi angles and the associated k factor into a row in the restraint file # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Also see reparametrize.py if use_interpolation: # k is from 0 to n-1, so map it so we get a factor from 0 to 1 phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1) psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1) else: # Use the values extracted from the initial_confs[] structures above phi_val = stringpts[k][r][mol][0] psi_val = stringpts[k][r][mol][1] # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later. KFAC is normally # a %8.4f number. restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)) restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0)) restraint_itp.close()
def reparametrize(use_posres, fix_endpoints, cvs, ndx_file, Nchains, start_conf, start_xvg, end_conf, end_xvg, last_resconfs, top, includes): Nswarms = len(cvs[0]) ndx_atoms = res_selection.read_ndx(ndx_file) # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have # to first expand the index so it covers all chains. # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering. # We can detect the chain-repeat in rwgro, by looking for repeating first residue name. # Hardcode a repeat for testing for now. if use_posres == 0: # Map atoms to residues for the dihedral selection rsel = res_selection.res_select('%s' % start_conf, ndx_atoms) #sys.stderr.write('Residue selection: %s' %rsel) # else: # selected_atoms = [] # for ch in range(5): # for i in range(len(ndx_atoms)): # selected_atoms += [ ndx_atoms[i] + ch * 5566 ] # Calculate the average drift in CV space # newpts is a per-swarm-point list of CV points (each a list of the CV dimension length) newpts = [] # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed, # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below. for pathpt in range(len(cvs)): swarmpts = [] for i in range(len(cvs[pathpt])): if use_posres == 1: zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms) #sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt))) else: zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel) swarmpts.append(zpt) zptsum = reduce(mapadd, swarmpts) avgdrift = scale((1 / float(Nswarms)), zptsum) newpts.append(avgdrift) # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will # be allowed to drift just like the other points, and they will already then be a part of the newpts array) if fix_endpoints == 1: if use_posres == 1: # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/ # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number. initpt = rwgro.readgro_flat(start_conf, ndx_atoms) targetpt = rwgro.readgro_flat(end_conf, ndx_atoms) else: initpt = readxvg.readxvg_flat(start_xvg, rsel) targetpt = readxvg.readxvg_flat(end_xvg, rsel) sys.stderr.write('Length of initpt %d, targetpt %d\n' % (len(initpt), len(targetpt))) # Insert the start/end in the beginning and last of newpts newpts.insert(0, initpt) newpts.append(targetpt) # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore paddingpt = [0] * len(newpts[0]) newpts.append(paddingpt) # Do the actual reparameterization # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted # points in [1] # Initial iteration rep_it1 = ext_rep_pts(newpts) adjusted = rep_it1[1] # get the points only, ignore the spread result # Keep iterating, feeding the result of the previous result into rep_pts again # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts. # We can abort early when the maximum spread between points in the updated string goes # below a threshold iters = [adjusted] i = 0 maxspread = 100.0 # Do max 150 iterations even if we don't reach our goal while i < 150 and maxspread > 0.012: sys.stderr.write('Rep iter %d: \n' % i) sys.stderr.flush() rep_it = ext_rep_pts(iters[i]) maxspread = rep_it[0] sys.stderr.write(' maxspread was %f\n' % maxspread) # Remember the adjusted points iters.append(rep_it[1]) i = i + 1 sys.stderr.write('Final maximum spread %f after %d iterations.\n' % (maxspread, i)) # Get the final iteration's result adjusted = iters[-1] # delete the padding point adjusted = adjusted[:-1] newpts = newpts[:-1] #sys.stderr.write('Pts before repa:\n %s\n' % newpts) #sys.stderr.write('The adjusted pts:\n %s\n' % adjusted) # Possibility to test skipping reparametrize by uncommenting the next row. # The stringpoints will drift along the string and probably end up in the # endpoints or a minima along the string. #adjusted = newpts # calculate reparam distance sys.stderr.write('Length of the adjusted vector: %d\n' % len(adjusted)) # TODO Nchains should depend on the specific residue (?) # Given as function argument now. #Nchains = len(initpt) / (2 * len(rsel)) # write the CV control data for the next iteration # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint. # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain. # for k in range(len(adjusted)): # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is # just bypassed in the caller script if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)): continue if use_posres == 1: # Open the output resconf which will go into the next iteration as minimization target with open('rep_resconf_%d.gro' % k, 'w') as rep_resconf: # Open and read the previous (input) resconf, which has basically tagged along since the last # reparametrization step (or was set initially at swarm-start) with open(last_resconfs[k], 'r') as in_resconf_f: in_resconf = in_resconf_f.readlines() # TODO: maybe this chunk of code could be done by the rwgro module for us. # Copy the first 2 rows (title and number of atoms) straight over rep_resconf.write(in_resconf[0]) rep_resconf.write(in_resconf[1]) # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize # step moved # Note: we are only copying over positions here. The velocities are not needed as the use for these files # will only be as a base for the next iterations position restraint coordinates. pathpoint = adjusted[ k] # the 1-D list of CVs (positions): x,y,z * nbr atoms in index if len(pathpoint) != (1555 * 3): # assert on GLIC length (TODO) sys.stderr.write('adjusted[] entry of wrong length %d\n' % len(pathpoint)) cvpos = 0 for line in in_resconf[2:][:-1]: resname = line[ 0: 8] # python-ranges are inclusive the first index and exclusive the second... atname = line[8:15] atomnr = int(line[15:20]) x = float(line[20:28]) y = float(line[28:36]) z = float(line[36:44]) if atomnr in ndx_atoms: # Update to new coords x = pathpoint[cvpos] y = pathpoint[cvpos + 1] z = pathpoint[cvpos + 2] cvpos += 3 # Write out the row, updated or not rep_resconf.write('%s%s%5d%8.3f%8.3f%8.3f\n' % (resname, atname, atomnr, x, y, z)) # Copy the last row which was the cell dimensions rep_resconf.write(in_resconf[len(in_resconf) - 1]) else: for chain in range(Nchains): with open('res_%d_chain_%d.itp' % (k, chain), 'w') as restraint_itp: with open(includes[k][chain], 'r') as in_itpf: in_itp = in_itpf.read() moltop = in_itp.split('[ dihedral_restraints ]')[0] restraint_itp.write('%s' % moltop) sys.stderr.write( "Writing restraints for stringpoint %d chain %d\n" % (k, chain)) # Note: this format is for Gromacs 4.6+ restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write( "; ai aj ak al type phi dphi kfac phiB dphiB kfacB\n" ) pathpoint = adjusted[k] # just a list of phi/psi angles if Nchains == 1: protein = molecule(top) else: protein = molecule('%s' % includes[k][chain]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'): try: dih_atoms[a.resnr][a.atomname] = a.atomnr except KeyError: dih_atoms[a.resnr] = {a.atomname: a.atomnr} # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. pos = 0 for r in rsel: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ] # psi is N, CA and C on this residue and N on the next psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ] # get phi and psi values from the reparametrization vector phi_val = pathpoint[pos + chain] psi_val = pathpoint[pos + chain + 1] # Go to the next residue (phi,phi vals * number of chains apart) pos += 2 * Nchains # write phi, psi angles and k-factor # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)) restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))
def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains): cmdnames = cmds.GromacsCommands() # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index # per residue, since it's probably generated by make_ndx) ndx_atoms = res_selection.read_ndx(ndx_file) # Map them to each affected residue so we just get the residue numbers back selection = res_selection.res_select(start, ndx_atoms) n = int(n) # number of points in the string, including start and end point use_interpolation = False if initial_confs is None or len(initial_confs) == 0: use_interpolation = True # Read the starting and ending dihedrals for later interpolation startpts = readxvg.readxvg(start_xvg, selection) endpts = readxvg.readxvg(end_xvg, selection) else: # Have to generate the dihedrals ourselves from the given initial structures # Note: when we get an initial_confs[] array, we use it for all points and # the start/end input parameters are completely ignored # TODO: assert that len(initial_confs) == n otherwise? ramaprocs = {} # Run g_rama (in parallel) on each structure and output to a temporary .xvg FNULL = open(os.devnull, 'w') # dont generate spam from g_rama for i in range(n): # TODO: check for and use g_rama_mpi.. like everywhere else cmd = cmdnames.rama.split() + ['-f', initial_confs[i], '-s', tpr, '-o', '0%3d.xvg' % i] ramaprocs[i] = Popen(cmd, stdout=FNULL, stderr=FNULL) # Go through the output from the rama sub-processes and read the xvg outputs stringpts = {} # Will have 4 levels: stringpoint, residue, chain, phi/psi value for i in range(n): # Start array indexed by residue xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i) # Make sure the corresponding g_rama task has ended ramaprocs[i].communicate() # Read back and parse like for the start/end_xvg above stringpts[i] = readxvg.readxvg(xvg_i, selection) # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any) # There will be one topol_x.top per string point sys.stderr.write('%s' % includes) for k in range(n): with open(top) as in_topf: in_top = in_topf.read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top) with open('topol_%d.top' % k,'w') as out_top: # sys.stderr.write('%s'%in_top) out_top.write(in_top) # Generate/copy and write-out the dihedrals for each point for k in range(n): for mol in range(Nchains): # TODO: use with statement for restraint_itp as well restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w') if Nchains > 1: with open(includes[mol]) as moltop_f: moltop = moltop_f.read() restraint_itp.write(moltop) # write the initial part of the topology file # Note: gromacs 4.6+ required restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al type phi dphi kfac\n") if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: with open('topol_%d.top' % k, 'w') as out_top: protein = molecule(top) with open(top,'r') as in_itp_f: in_itp = in_itp_f.read().split('; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol)) out_top.write(in_itp[1]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'): try: dih_atoms[a.resnr][a.atomname] = a.atomnr; except KeyError: dih_atoms[a.resnr] = { a.atomname: a.atomnr } # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. for r in selection: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ] # psi is N, CA and C on this residue and N on the next psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ] # Write phi, psi angles and the associated k factor into a row in the restraint file # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Also see reparametrize.py if use_interpolation: # k is from 0 to n-1, so map it so we get a factor from 0 to 1 phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1) psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1) else: # Use the values extracted from the initial_confs[] structures above phi_val = stringpts[k][r][mol][0] psi_val = stringpts[k][r][mol][1] # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later. KFAC is normally # a %8.4f number. restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)) restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0)) restraint_itp.close()
def write_restraints(inp, initial_confs, start, end, tpr, top, includes, n, ndxfn, Nchains): n = int(n) # number of points in the string, including start and end point ndx_atoms = res_selection.read_ndx(ndxfn) use_interpolation = False if initial_confs is None or len(initial_confs) == 0: use_interpolation = True # Read the starting and ending atom configurations for later interpolation TODO #startpts = readxvg.readxvg(start_xvg, selection) #endpts = readxvg.readxvg(end_xvg, selection) # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any) # There will be one topol_x.top per intermediate string point sys.stderr.write('%s' % includes) for k in range(n): with open(top) as in_topf: in_top = in_topf.read() for mol in range(Nchains): if len(includes) > 0: includename = includes[mol].split('/')[-1] in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top) with open('topol_%d.top' % k, 'w') as out_top: # sys.stderr.write('%s'%in_top) out_top.write(in_top) # Generate/copy and write-out the restraint atom and force spec for each intermediate point # This is really unnecessary here since the restraint positions are not in these files so they are the same # for all points and chains. TODO for k in range(n): for mol in range(Nchains): with open('res_%d_chain_%d.itp' % (k, mol), 'w') as restraint_itp: if Nchains > 1: with open(includes[mol]) as moltop_f: moltop = moltop_f.read() restraint_itp.write(moltop) if len(includes) > 0: protein = molecule(includes[mol]) # replace the chain names with the chain names else: with open('topol_%d.top' % k, 'w') as out_top: protein = molecule(top) with open(top, 'r') as in_itp_f: in_itp = in_itp_f.read().split( '; Include Position restraint file') out_top.write(in_itp[0]) out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol)) out_top.write(in_itp[1]) # Go through the atoms in the selection index and write one row for each one with the KFAC # force constant placeholder restraint_itp.write("\n[ position_restraints ]\n") restraint_itp.write("; atom type fx fy fz\n") for a in ndx_atoms: if a < 5566: # GLIC HACK: only write one chain, and do it relative atom 1 since the .itp maps to the topology molecule. restraint_itp.write("%6d 1 KFAC KFAC KFAC\n" % int(a))
def reparametrize( use_posres, fix_endpoints, cvs, ndx_file, Nchains, start_conf, start_xvg, end_conf, end_xvg, last_resconfs, top, includes, ): Nswarms = len(cvs[0]) ndx_atoms = res_selection.read_ndx(ndx_file) # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have # to first expand the index so it covers all chains. # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering. # We can detect the chain-repeat in rwgro, by looking for repeating first residue name. # Hardcode a repeat for testing for now. if use_posres == 0: # Map atoms to residues for the dihedral selection rsel = res_selection.res_select("%s" % start_conf, ndx_atoms) # sys.stderr.write('Residue selection: %s' %rsel) # else: # selected_atoms = [] # for ch in range(5): # for i in range(len(ndx_atoms)): # selected_atoms += [ ndx_atoms[i] + ch * 5566 ] # Calculate the average drift in CV space # newpts is a per-swarm-point list of CV points (each a list of the CV dimension length) newpts = [] # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed, # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below. for pathpt in range(len(cvs)): swarmpts = [] for i in range(len(cvs[pathpt])): if use_posres == 1: zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms) # sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt))) else: zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel) swarmpts.append(zpt) zptsum = reduce(mapadd, swarmpts) avgdrift = scale((1 / float(Nswarms)), zptsum) newpts.append(avgdrift) # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will # be allowed to drift just like the other points, and they will already then be a part of the newpts array) if fix_endpoints == 1: if use_posres == 1: # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/ # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number. initpt = rwgro.readgro_flat(start_conf, ndx_atoms) targetpt = rwgro.readgro_flat(end_conf, ndx_atoms) else: initpt = readxvg.readxvg_flat(start_xvg, rsel) targetpt = readxvg.readxvg_flat(end_xvg, rsel) sys.stderr.write("Length of initpt %d, targetpt %d\n" % (len(initpt), len(targetpt))) # Insert the start/end in the beginning and last of newpts newpts.insert(0, initpt) newpts.append(targetpt) # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore paddingpt = [0] * len(newpts[0]) newpts.append(paddingpt) # Do the actual reparameterization # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted # points in [1] # Initial iteration rep_it1 = ext_rep_pts(newpts) adjusted = rep_it1[1] # get the points only, ignore the spread result # Keep iterating, feeding the result of the previous result into rep_pts again # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts. # We can abort early when the maximum spread between points in the updated string goes # below a threshold iters = [adjusted] i = 0 maxspread = 100.0 # Do max 150 iterations even if we don't reach our goal while i < 150 and maxspread > 0.012: sys.stderr.write("Rep iter %d: \n" % i) sys.stderr.flush() rep_it = ext_rep_pts(iters[i]) maxspread = rep_it[0] sys.stderr.write(" maxspread was %f\n" % maxspread) # Remember the adjusted points iters.append(rep_it[1]) i = i + 1 sys.stderr.write("Final maximum spread %f after %d iterations.\n" % (maxspread, i)) # Get the final iteration's result adjusted = iters[-1] # delete the padding point adjusted = adjusted[:-1] newpts = newpts[:-1] # sys.stderr.write('Pts before repa:\n %s\n' % newpts) # sys.stderr.write('The adjusted pts:\n %s\n' % adjusted) # Possibility to test skipping reparametrize by uncommenting the next row. # The stringpoints will drift along the string and probably end up in the # endpoints or a minima along the string. # adjusted = newpts # calculate reparam distance sys.stderr.write("Length of the adjusted vector: %d\n" % len(adjusted)) # TODO Nchains should depend on the specific residue (?) # Given as function argument now. # Nchains = len(initpt) / (2 * len(rsel)) # write the CV control data for the next iteration # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint. # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain. # for k in range(len(adjusted)): # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is # just bypassed in the caller script if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)): continue if use_posres == 1: # Open the output resconf which will go into the next iteration as minimization target with open("rep_resconf_%d.gro" % k, "w") as rep_resconf: # Open and read the previous (input) resconf, which has basically tagged along since the last # reparametrization step (or was set initially at swarm-start) with open(last_resconfs[k], "r") as in_resconf_f: in_resconf = in_resconf_f.readlines() # TODO: maybe this chunk of code could be done by the rwgro module for us. # Copy the first 2 rows (title and number of atoms) straight over rep_resconf.write(in_resconf[0]) rep_resconf.write(in_resconf[1]) # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize # step moved # Note: we are only copying over positions here. The velocities are not needed as the use for these files # will only be as a base for the next iterations position restraint coordinates. pathpoint = adjusted[k] # the 1-D list of CVs (positions): x,y,z * nbr atoms in index if len(pathpoint) != (1555 * 3): # assert on GLIC length (TODO) sys.stderr.write("adjusted[] entry of wrong length %d\n" % len(pathpoint)) cvpos = 0 for line in in_resconf[2:][:-1]: resname = line[0:8] # python-ranges are inclusive the first index and exclusive the second... atname = line[8:15] atomnr = int(line[15:20]) x = float(line[20:28]) y = float(line[28:36]) z = float(line[36:44]) if atomnr in ndx_atoms: # Update to new coords x = pathpoint[cvpos] y = pathpoint[cvpos + 1] z = pathpoint[cvpos + 2] cvpos += 3 # Write out the row, updated or not rep_resconf.write("%s%s%5d%8.3f%8.3f%8.3f\n" % (resname, atname, atomnr, x, y, z)) # Copy the last row which was the cell dimensions rep_resconf.write(in_resconf[len(in_resconf) - 1]) else: for chain in range(Nchains): with open("res_%d_chain_%d.itp" % (k, chain), "w") as restraint_itp: with open(includes[k][chain], "r") as in_itpf: in_itp = in_itpf.read() moltop = in_itp.split("[ dihedral_restraints ]")[0] restraint_itp.write("%s" % moltop) sys.stderr.write("Writing restraints for stringpoint %d chain %d\n" % (k, chain)) # Note: this format is for Gromacs 4.6+ restraint_itp.write("[ dihedral_restraints ]\n") restraint_itp.write("; ai aj ak al type phi dphi kfac phiB dphiB kfacB\n") pathpoint = adjusted[k] # just a list of phi/psi angles if Nchains == 1: protein = molecule(top) else: protein = molecule("%s" % includes[k][chain]) # Create a lookup-table for the protein topology that maps residue to dihedrally relevant # backbone atom indices for N, CA and C. dih_atoms = {} for a in protein: if a.atomname == "CA" or a.atomname == "N" or a.atomname == "C": try: dih_atoms[a.resnr][a.atomname] = a.atomnr except KeyError: dih_atoms[a.resnr] = {a.atomname: a.atomnr} # Use the lookup-table built above and get the dihedral specification atoms needed for each # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table. pos = 0 for r in rsel: # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each) # phi is C on the previous residue, and N, CA, C on this phi = [dih_atoms[r - 1]["C"], dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"]] # psi is N, CA and C on this residue and N on the next psi = [dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"], dih_atoms[r + 1]["N"]] # get phi and psi values from the reparametrization vector phi_val = pathpoint[pos + chain] psi_val = pathpoint[pos + chain + 1] # Go to the next residue (phi,phi vals * number of chains apart) pos += 2 * Nchains # write phi, psi angles and k-factor # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as # dihre_fc. # Since we need different force constants in different stages, we need to put # a searchable placeholder in the file here and replace it later restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0) ) restraint_itp.write( "%5d%5d%5d%5d%5d %8.4f%5d KFAC\n" % (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0) )