예제 #1
0
def write_restraints(inp, initial_confs, start, end, tpr, top, includes, n, ndxfn, Nchains):
    
    n = int(n)  # number of points in the string, including start and end point

    ndx_atoms = res_selection.read_ndx(ndxfn)

    use_interpolation = False

    if initial_confs is None or len(initial_confs) == 0:
        use_interpolation = True
        # Read the starting and ending atom configurations for later interpolation TODO
        #startpts = readxvg.readxvg(start_xvg, selection)
        #endpts = readxvg.readxvg(end_xvg, selection)

    # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any)
    # There will be one topol_x.top per intermediate string point

    sys.stderr.write('%s' % includes)
    for k in range(n):
        with open(top) as in_topf:
            in_top = in_topf.read()       
            for mol in range(Nchains):
                if len(includes) > 0:
                    includename = includes[mol].split('/')[-1]
                    in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top)
            with open('topol_%d.top' % k, 'w') as out_top:
                # sys.stderr.write('%s'%in_top)
                out_top.write(in_top)   

    # Generate/copy and write-out the restraint atom and force spec for each intermediate point
    # This is really unnecessary here since the restraint positions are not in these files so they are the same
    # for all points and chains. TODO
    for k in range(n):
        for mol in range(Nchains):
            with open('res_%d_chain_%d.itp' % (k, mol), 'w') as restraint_itp:
                if Nchains > 1:
                    with open(includes[mol]) as moltop_f:
                        moltop = moltop_f.read()
                        restraint_itp.write(moltop)

                if len(includes) > 0:
                    protein = molecule(includes[mol])
                    # replace the chain names with the chain names
                else:
                    with open('topol_%d.top' % k, 'w') as out_top:
                        protein = molecule(top)
                        with open(top, 'r') as in_itp_f:
                            in_itp = in_itp_f.read().split('; Include Position restraint file')
                            out_top.write(in_itp[0])
                            out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol))
                            out_top.write(in_itp[1])

                # Go through the atoms in the selection index and write one row for each one with the KFAC
                # force constant placeholder

                restraint_itp.write("\n[ position_restraints ]\n")
                restraint_itp.write("; atom  type      fx      fy      fz\n")

                for a in ndx_atoms:
                    if a < 5566:  # GLIC HACK: only write one chain, and do it relative atom 1 since the .itp maps to the topology molecule.
                        restraint_itp.write("%6d     1  KFAC  KFAC  KFAC\n" % int(a))
예제 #2
0
def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains):

    # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index
    # per residue, since it's probably generated by make_ndx)
    ndx_atoms = res_selection.read_ndx(ndx_file)
    # Map them to each affected residue so we just get the residue numbers back
    selection = res_selection.res_select(start, ndx_atoms)

    n = int(n)  # number of points in the string, including start and end point

    use_interpolation = False

    if initial_confs is None or len(initial_confs) == 0:
        use_interpolation = True
        # Read the starting and ending dihedrals for later interpolation
        startpts = readxvg.readxvg(start_xvg, selection)
        endpts = readxvg.readxvg(end_xvg, selection)
    else:
        # Have to generate the dihedrals ourselves from the given initial structures
        # Note: when we get an initial_confs[] array, we use it for all points and 
        # the start/end input parameters are completely ignored
        # TODO: assert that len(initial_confs) == n otherwise?

        ramaprocs = {}

        # Run g_rama (in parallel) on each structure and output to a temporary .xvg
        FNULL = open(os.devnull, 'w') # dont generate spam from g_rama 
        for i in range(n):
            # TODO: check for and use g_rama_mpi.. like everywhere else
            ramaprocs[i] = Popen(['g_rama', '-f', initial_confs[i], '-s', tpr, '-o', '0%3d.xvg' % i], 
                                 stdout=FNULL, stderr=FNULL)

        # Go through the output from the rama sub-processes and read the xvg outputs

        stringpts = {}  # Will have 4 levels: stringpoint, residue, chain, phi/psi value

        for i in range(n):
            # Start array indexed by residue
            xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i)
            # Make sure the corresponding g_rama task has ended
            ramaprocs[i].communicate()
            # Read back and parse like for the start/end_xvg above
            stringpts[i] = readxvg.readxvg(xvg_i, selection)

    # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any)
    # There will be one topol_x.top per string point

    sys.stderr.write('%s' % includes)
    for k in range(n):
        with open(top) as in_topf:
            in_top = in_topf.read()       
            for mol in range(Nchains):
                if len(includes) > 0:
                    includename = includes[mol].split('/')[-1]
                    in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top)
            with open('topol_%d.top' % k,'w') as out_top:
                # sys.stderr.write('%s'%in_top)
                out_top.write(in_top)   

    # Generate/copy and write-out the dihedrals for each point
    for k in range(n):
        for mol in range(Nchains):
            # TODO: use with statement for restraint_itp as well
            restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w')
            if Nchains > 1:
                with open(includes[mol]) as moltop_f:
                    moltop = moltop_f.read()
                    restraint_itp.write(moltop)
            # write the initial part of the topology file
            # Note: gromacs 4.6+ required
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write("; ai   aj   ak   al  type phi  dphi  kfac\n")
            if len(includes) > 0:
                protein = molecule(includes[mol])
                # replace the chain names with the chain names
            else:
                with open('topol_%d.top' % k, 'w') as out_top:
                    protein = molecule(top)
                    with open(top,'r') as in_itp_f:
                        in_itp = in_itp_f.read().split('; Include Position restraint file')
                        out_top.write(in_itp[0])
                        out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol))
                        out_top.write(in_itp[1])

            # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
            # backbone atom indices for N, CA and C.

            dih_atoms = {}

            for a in protein:
                if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'):
                    try:
                        dih_atoms[a.resnr][a.atomname] = a.atomnr;
                    except KeyError:
                        dih_atoms[a.resnr] = { a.atomname: a.atomnr }

            # Use the lookup-table built above and get the dihedral specification atoms needed for each
            # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

            for r in selection:
                # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                # phi is C on the previous residue, and N, CA, C on this
                phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ]
                
                # psi is N, CA and C on this residue and N on the next
                psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ]

                # Write phi, psi angles and the associated k factor into a row in the restraint file
                # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                # dihre_fc.
                # Also see reparametrize.py

                if use_interpolation:
                    # k is from 0 to n-1, so map it so we get a factor from 0 to 1
                    phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1)
                    psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1)
                else:
                    # Use the values extracted from the initial_confs[] structures above
                    phi_val = stringpts[k][r][mol][0]
                    psi_val = stringpts[k][r][mol][1]

                # Since we need different force constants in different stages, we need to put
                # a searchable placeholder in the file here and replace it later. KFAC is normally 
                # a %8.4f number.
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0))
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))

            restraint_itp.close()
예제 #3
0
def reparametrize(use_posres, fix_endpoints, cvs, ndx_file, Nchains,
                  start_conf, start_xvg, end_conf, end_xvg, last_resconfs, top,
                  includes):

    Nswarms = len(cvs[0])

    ndx_atoms = res_selection.read_ndx(ndx_file)

    # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and
    # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have
    # to first expand the index so it covers all chains.

    # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times
    # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering.
    # We can detect the chain-repeat in rwgro, by looking for repeating first residue name.
    # Hardcode a repeat for testing for now.

    if use_posres == 0:
        # Map atoms to residues for the dihedral selection
        rsel = res_selection.res_select('%s' % start_conf, ndx_atoms)
        #sys.stderr.write('Residue selection: %s' %rsel)

#    else:
#            selected_atoms = []
#            for ch in range(5):
#                    for i in range(len(ndx_atoms)):
#                            selected_atoms += [ ndx_atoms[i] + ch * 5566 ]

# Calculate the average drift in CV space

# newpts is a per-swarm-point list of CV points (each a list of the CV dimension length)
    newpts = []

    # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed,
    # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include
    # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below.
    for pathpt in range(len(cvs)):
        swarmpts = []
        for i in range(len(cvs[pathpt])):
            if use_posres == 1:
                zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms)
                #sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt)))
            else:
                zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel)
            swarmpts.append(zpt)
        zptsum = reduce(mapadd, swarmpts)
        avgdrift = scale((1 / float(Nswarms)), zptsum)
        newpts.append(avgdrift)

    # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will
    # be allowed to drift just like the other points, and they will already then be a part of the newpts array)
    if fix_endpoints == 1:
        if use_posres == 1:
            # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/
            # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number.
            initpt = rwgro.readgro_flat(start_conf, ndx_atoms)
            targetpt = rwgro.readgro_flat(end_conf, ndx_atoms)
        else:
            initpt = readxvg.readxvg_flat(start_xvg, rsel)
            targetpt = readxvg.readxvg_flat(end_xvg, rsel)

        sys.stderr.write('Length of initpt %d, targetpt %d\n' %
                         (len(initpt), len(targetpt)))

        # Insert the start/end in the beginning and last of newpts
        newpts.insert(0, initpt)
        newpts.append(targetpt)

    # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore
    paddingpt = [0] * len(newpts[0])
    newpts.append(paddingpt)

    # Do the actual reparameterization
    # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs

    # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted
    # points in [1]

    # Initial iteration
    rep_it1 = ext_rep_pts(newpts)
    adjusted = rep_it1[1]  # get the points only, ignore the spread result

    # Keep iterating, feeding the result of the previous result into rep_pts again
    # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time
    # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts.
    # We can abort early when the maximum spread between points in the updated string goes
    # below a threshold
    iters = [adjusted]
    i = 0
    maxspread = 100.0
    # Do max 150 iterations even if we don't reach our goal
    while i < 150 and maxspread > 0.012:
        sys.stderr.write('Rep iter %d: \n' % i)
        sys.stderr.flush()
        rep_it = ext_rep_pts(iters[i])
        maxspread = rep_it[0]
        sys.stderr.write('  maxspread was %f\n' % maxspread)
        # Remember the adjusted points
        iters.append(rep_it[1])
        i = i + 1

    sys.stderr.write('Final maximum spread %f after %d iterations.\n' %
                     (maxspread, i))

    # Get the final iteration's result
    adjusted = iters[-1]

    # delete the padding point
    adjusted = adjusted[:-1]
    newpts = newpts[:-1]

    #sys.stderr.write('Pts before repa:\n %s\n' % newpts)
    #sys.stderr.write('The adjusted pts:\n %s\n' % adjusted)

    # Possibility to test skipping reparametrize by uncommenting the next row.
    # The stringpoints will drift along the string and probably end up in the
    # endpoints or a minima along the string.
    #adjusted = newpts

    # calculate reparam distance

    sys.stderr.write('Length of the adjusted vector: %d\n' % len(adjusted))
    # TODO Nchains should depend on the specific residue (?)
    # Given as function argument now.
    #Nchains = len(initpt) / (2 * len(rsel))

    # write the CV control data for the next iteration

    # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint.
    # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain.
    #
    for k in range(len(adjusted)):
        # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is
        # just bypassed in the caller script
        if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)):
            continue

        if use_posres == 1:
            # Open the output resconf which will go into the next iteration as minimization target
            with open('rep_resconf_%d.gro' % k, 'w') as rep_resconf:
                # Open and read the previous (input) resconf, which has basically tagged along since the last
                # reparametrization step (or was set initially at swarm-start)
                with open(last_resconfs[k], 'r') as in_resconf_f:
                    in_resconf = in_resconf_f.readlines()
                # TODO: maybe this chunk of code could be done by the rwgro module for us.
                # Copy the first 2 rows (title and number of atoms) straight over
                rep_resconf.write(in_resconf[0])
                rep_resconf.write(in_resconf[1])
                # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize
                # step moved
                # Note: we are only copying over positions here. The velocities are not needed as the use for these files
                # will only be as a base for the next iterations position restraint coordinates.
                pathpoint = adjusted[
                    k]  # the 1-D list of CVs (positions): x,y,z * nbr atoms in index
                if len(pathpoint) != (1555 *
                                      3):  # assert on GLIC length (TODO)
                    sys.stderr.write('adjusted[] entry of wrong length %d\n' %
                                     len(pathpoint))
                cvpos = 0
                for line in in_resconf[2:][:-1]:
                    resname = line[
                        0:
                        8]  # python-ranges are inclusive the first index and exclusive the second...
                    atname = line[8:15]
                    atomnr = int(line[15:20])
                    x = float(line[20:28])
                    y = float(line[28:36])
                    z = float(line[36:44])
                    if atomnr in ndx_atoms:
                        # Update to new coords
                        x = pathpoint[cvpos]
                        y = pathpoint[cvpos + 1]
                        z = pathpoint[cvpos + 2]
                        cvpos += 3
                    # Write out the row, updated or not
                    rep_resconf.write('%s%s%5d%8.3f%8.3f%8.3f\n' %
                                      (resname, atname, atomnr, x, y, z))
                # Copy the last row which was the cell dimensions
                rep_resconf.write(in_resconf[len(in_resconf) - 1])
        else:
            for chain in range(Nchains):
                with open('res_%d_chain_%d.itp' % (k, chain),
                          'w') as restraint_itp:
                    with open(includes[k][chain], 'r') as in_itpf:
                        in_itp = in_itpf.read()
                        moltop = in_itp.split('[ dihedral_restraints ]')[0]
                        restraint_itp.write('%s' % moltop)

                    sys.stderr.write(
                        "Writing restraints for stringpoint %d chain %d\n" %
                        (k, chain))
                    # Note: this format is for Gromacs 4.6+
                    restraint_itp.write("[ dihedral_restraints ]\n")
                    restraint_itp.write(
                        "; ai   aj   ak   al  type     phi    dphi    kfac   phiB    dphiB    kfacB\n"
                    )
                    pathpoint = adjusted[k]  # just a list of phi/psi angles

                    if Nchains == 1:
                        protein = molecule(top)
                    else:
                        protein = molecule('%s' % includes[k][chain])

                    # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
                    # backbone atom indices for N, CA and C.

                    dih_atoms = {}

                    for a in protein:
                        if (a.atomname == 'CA' or a.atomname == 'N'
                                or a.atomname == 'C'):
                            try:
                                dih_atoms[a.resnr][a.atomname] = a.atomnr
                            except KeyError:
                                dih_atoms[a.resnr] = {a.atomname: a.atomnr}

                    # Use the lookup-table built above and get the dihedral specification atoms needed for each
                    # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

                    pos = 0

                    for r in rsel:
                        # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                        # phi is C on the previous residue, and N, CA, C on this
                        phi = [
                            dih_atoms[r - 1]['C'], dih_atoms[r]['N'],
                            dih_atoms[r]['CA'], dih_atoms[r]['C']
                        ]

                        # psi is N, CA and C on this residue and N on the next
                        psi = [
                            dih_atoms[r]['N'], dih_atoms[r]['CA'],
                            dih_atoms[r]['C'], dih_atoms[r + 1]['N']
                        ]

                        # get phi and psi values from the reparametrization vector
                        phi_val = pathpoint[pos + chain]
                        psi_val = pathpoint[pos + chain + 1]

                        # Go to the next residue (phi,phi vals * number of chains apart)
                        pos += 2 * Nchains

                        # write phi, psi angles and k-factor
                        # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                        # dihre_fc.

                        # Since we need different force constants in different stages, we need to put
                        # a searchable placeholder in the file here and replace it later
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" %
                            (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0))
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" %
                            (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))
예제 #4
0
def write_restraints(inp, initial_confs, start, end, start_xvg, end_xvg, tpr, top, includes, n, ndx_file, Nchains):
    cmdnames = cmds.GromacsCommands()
    # Get the atoms involved with the residues to use for dihedrals (might be more than one atom in the index
    # per residue, since it's probably generated by make_ndx)
    ndx_atoms = res_selection.read_ndx(ndx_file)
    # Map them to each affected residue so we just get the residue numbers back
    selection = res_selection.res_select(start, ndx_atoms)

    n = int(n)  # number of points in the string, including start and end point

    use_interpolation = False

    if initial_confs is None or len(initial_confs) == 0:
        use_interpolation = True
        # Read the starting and ending dihedrals for later interpolation
        startpts = readxvg.readxvg(start_xvg, selection)
        endpts = readxvg.readxvg(end_xvg, selection)
    else:
        # Have to generate the dihedrals ourselves from the given initial structures
        # Note: when we get an initial_confs[] array, we use it for all points and 
        # the start/end input parameters are completely ignored
        # TODO: assert that len(initial_confs) == n otherwise?

        ramaprocs = {}

        # Run g_rama (in parallel) on each structure and output to a temporary .xvg
        FNULL = open(os.devnull, 'w') # dont generate spam from g_rama 
        for i in range(n):
            # TODO: check for and use g_rama_mpi.. like everywhere else
            cmd = cmdnames.rama.split() + ['-f', initial_confs[i], '-s', tpr,
                                           '-o', '0%3d.xvg' % i]
            ramaprocs[i] = Popen(cmd, stdout=FNULL, stderr=FNULL)

        # Go through the output from the rama sub-processes and read the xvg outputs

        stringpts = {}  # Will have 4 levels: stringpoint, residue, chain, phi/psi value

        for i in range(n):
            # Start array indexed by residue
            xvg_i = os.path.join(inp.getOutputDir(), '0%3d.xvg' % i)
            # Make sure the corresponding g_rama task has ended
            ramaprocs[i].communicate()
            # Read back and parse like for the start/end_xvg above
            stringpts[i] = readxvg.readxvg(xvg_i, selection)

    # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any)
    # There will be one topol_x.top per string point

    sys.stderr.write('%s' % includes)
    for k in range(n):
        with open(top) as in_topf:
            in_top = in_topf.read()       
            for mol in range(Nchains):
                if len(includes) > 0:
                    includename = includes[mol].split('/')[-1]
                    in_top = re.sub(includename, 'res_%d_chain_%d.itp' % (k, mol), in_top)
            with open('topol_%d.top' % k,'w') as out_top:
                # sys.stderr.write('%s'%in_top)
                out_top.write(in_top)   

    # Generate/copy and write-out the dihedrals for each point
    for k in range(n):
        for mol in range(Nchains):
            # TODO: use with statement for restraint_itp as well
            restraint_itp = open('res_%d_chain_%d.itp' % (k, mol), 'w')
            if Nchains > 1:
                with open(includes[mol]) as moltop_f:
                    moltop = moltop_f.read()
                    restraint_itp.write(moltop)
            # write the initial part of the topology file
            # Note: gromacs 4.6+ required
            restraint_itp.write("[ dihedral_restraints ]\n")
            restraint_itp.write("; ai   aj   ak   al  type phi  dphi  kfac\n")
            if len(includes) > 0:
                protein = molecule(includes[mol])
                # replace the chain names with the chain names
            else:
                with open('topol_%d.top' % k, 'w') as out_top:
                    protein = molecule(top)
                    with open(top,'r') as in_itp_f:
                        in_itp = in_itp_f.read().split('; Include Position restraint file')
                        out_top.write(in_itp[0])
                        out_top.write('#include "res_%d_chain_%d.itp"\n' % (k, mol))
                        out_top.write(in_itp[1])

            # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
            # backbone atom indices for N, CA and C.

            dih_atoms = {}

            for a in protein:
                if (a.atomname == 'CA' or a.atomname == 'N' or a.atomname == 'C'):
                    try:
                        dih_atoms[a.resnr][a.atomname] = a.atomnr;
                    except KeyError:
                        dih_atoms[a.resnr] = { a.atomname: a.atomnr }

            # Use the lookup-table built above and get the dihedral specification atoms needed for each
            # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

            for r in selection:
                # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                # phi is C on the previous residue, and N, CA, C on this
                phi = [ dih_atoms[r - 1]['C'], dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'] ]
                
                # psi is N, CA and C on this residue and N on the next
                psi = [ dih_atoms[r]['N'], dih_atoms[r]['CA'], dih_atoms[r]['C'], dih_atoms[r + 1]['N'] ]

                # Write phi, psi angles and the associated k factor into a row in the restraint file
                # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                # dihre_fc.
                # Also see reparametrize.py

                if use_interpolation:
                    # k is from 0 to n-1, so map it so we get a factor from 0 to 1
                    phi_val = startpts[r][mol][0] + k * (endpts[r][mol][0] - startpts[r][mol][0]) / (n - 1)
                    psi_val = startpts[r][mol][1] + k * (endpts[r][mol][1] - startpts[r][mol][1]) / (n - 1)
                else:
                    # Use the values extracted from the initial_confs[] structures above
                    phi_val = stringpts[k][r][mol][0]
                    psi_val = stringpts[k][r][mol][1]

                # Since we need different force constants in different stages, we need to put
                # a searchable placeholder in the file here and replace it later. KFAC is normally 
                # a %8.4f number.
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0))
                restraint_itp.write("%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n"
                                    %(psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0))

            restraint_itp.close()
예제 #5
0
def write_restraints(inp, initial_confs, start, end, tpr, top, includes, n,
                     ndxfn, Nchains):

    n = int(n)  # number of points in the string, including start and end point

    ndx_atoms = res_selection.read_ndx(ndxfn)

    use_interpolation = False

    if initial_confs is None or len(initial_confs) == 0:
        use_interpolation = True
        # Read the starting and ending atom configurations for later interpolation TODO
        #startpts = readxvg.readxvg(start_xvg, selection)
        #endpts = readxvg.readxvg(end_xvg, selection)

    # Rewrite the topology to include the res itp files instead of the original per-chain itps (if any)
    # There will be one topol_x.top per intermediate string point

    sys.stderr.write('%s' % includes)
    for k in range(n):
        with open(top) as in_topf:
            in_top = in_topf.read()
            for mol in range(Nchains):
                if len(includes) > 0:
                    includename = includes[mol].split('/')[-1]
                    in_top = re.sub(includename,
                                    'res_%d_chain_%d.itp' % (k, mol), in_top)
            with open('topol_%d.top' % k, 'w') as out_top:
                # sys.stderr.write('%s'%in_top)
                out_top.write(in_top)

    # Generate/copy and write-out the restraint atom and force spec for each intermediate point
    # This is really unnecessary here since the restraint positions are not in these files so they are the same
    # for all points and chains. TODO
    for k in range(n):
        for mol in range(Nchains):
            with open('res_%d_chain_%d.itp' % (k, mol), 'w') as restraint_itp:
                if Nchains > 1:
                    with open(includes[mol]) as moltop_f:
                        moltop = moltop_f.read()
                        restraint_itp.write(moltop)

                if len(includes) > 0:
                    protein = molecule(includes[mol])
                    # replace the chain names with the chain names
                else:
                    with open('topol_%d.top' % k, 'w') as out_top:
                        protein = molecule(top)
                        with open(top, 'r') as in_itp_f:
                            in_itp = in_itp_f.read().split(
                                '; Include Position restraint file')
                            out_top.write(in_itp[0])
                            out_top.write('#include "res_%d_chain_%d.itp"\n' %
                                          (k, mol))
                            out_top.write(in_itp[1])

                # Go through the atoms in the selection index and write one row for each one with the KFAC
                # force constant placeholder

                restraint_itp.write("\n[ position_restraints ]\n")
                restraint_itp.write("; atom  type      fx      fy      fz\n")

                for a in ndx_atoms:
                    if a < 5566:  # GLIC HACK: only write one chain, and do it relative atom 1 since the .itp maps to the topology molecule.
                        restraint_itp.write("%6d     1  KFAC  KFAC  KFAC\n" %
                                            int(a))
예제 #6
0
def reparametrize(
    use_posres,
    fix_endpoints,
    cvs,
    ndx_file,
    Nchains,
    start_conf,
    start_xvg,
    end_conf,
    end_xvg,
    last_resconfs,
    top,
    includes,
):

    Nswarms = len(cvs[0])

    ndx_atoms = res_selection.read_ndx(ndx_file)

    # For dihedrals, we map the atoms to residues for a single chain, and the readxvg etc. will read the entire file and
    # select the same residues in each chain. But for the position restraints which use the atom indices directly, we have
    # to first expand the index so it covers all chains.

    # TODO: have to figure out or input atoms per chain in the .gro's so we can repeat the atom-selection Nchains times
    # for the posres case. The ndx file is for atoms inside the chain, but the .gro will contain global numbering.
    # We can detect the chain-repeat in rwgro, by looking for repeating first residue name.
    # Hardcode a repeat for testing for now.

    if use_posres == 0:
        # Map atoms to residues for the dihedral selection
        rsel = res_selection.res_select("%s" % start_conf, ndx_atoms)
        # sys.stderr.write('Residue selection: %s' %rsel)

    #    else:
    #            selected_atoms = []
    #            for ch in range(5):
    #                    for i in range(len(ndx_atoms)):
    #                            selected_atoms += [ ndx_atoms[i] + ch * 5566 ]

    # Calculate the average drift in CV space

    # newpts is a per-swarm-point list of CV points (each a list of the CV dimension length)
    newpts = []

    # Note: the cvs[][] array is indexed after the number of stringpoints that actually were swarm-processed,
    # so depending on the fix_endpoints option it may or may not exactly match the path[] which always include
    # all points. If we only read in N-2 points here, the start/end will be added to newpts in the code further below.
    for pathpt in range(len(cvs)):
        swarmpts = []
        for i in range(len(cvs[pathpt])):
            if use_posres == 1:
                zpt = rwgro.readgro_flat(cvs[pathpt][i], ndx_atoms)
                # sys.stderr.write('Read pathpt %d swarm %d (%s), got %d CVs\n' % (pathpt, i, cvs[pathpt][i], len(zpt)))
            else:
                zpt = readxvg.readxvg_flat(cvs[pathpt][i], rsel)
            swarmpts.append(zpt)
        zptsum = reduce(mapadd, swarmpts)
        avgdrift = scale((1 / float(Nswarms)), zptsum)
        newpts.append(avgdrift)

    # Read in the fixed start and end CV values, for the fix_endpoints case (otherwise the start/end will
    # be allowed to drift just like the other points, and they will already then be a part of the newpts array)
    if fix_endpoints == 1:
        if use_posres == 1:
            # TODO: the start/end_conf are full Systems so the atom numbering aliases for the ndx_atoms array :/
            # Currently fixed in readgro_flat temporary, hardcoded for the GLIC Protein number.
            initpt = rwgro.readgro_flat(start_conf, ndx_atoms)
            targetpt = rwgro.readgro_flat(end_conf, ndx_atoms)
        else:
            initpt = readxvg.readxvg_flat(start_xvg, rsel)
            targetpt = readxvg.readxvg_flat(end_xvg, rsel)

        sys.stderr.write("Length of initpt %d, targetpt %d\n" % (len(initpt), len(targetpt)))

        # Insert the start/end in the beginning and last of newpts
        newpts.insert(0, initpt)
        newpts.append(targetpt)

    # something with 1 indexing makes this padding necessary. TODO: check if this is needed anymore
    paddingpt = [0] * len(newpts[0])
    newpts.append(paddingpt)

    # Do the actual reparameterization
    # newpts is a 2D list, first level is one per stringpoint, second is the linear list of CVs

    # rep_pts returns the maximum spread of the CV distances between points in [0] and the adjusted
    # points in [1]

    # Initial iteration
    rep_it1 = ext_rep_pts(newpts)
    adjusted = rep_it1[1]  # get the points only, ignore the spread result

    # Keep iterating, feeding the result of the previous result into rep_pts again
    # Note that with long CV vectors (> 4000 dimensions) iterations takes a long time
    # (at least 45 min for 25 iterations on a single-core 3.5 GHz) when using the python rep_pts.
    # We can abort early when the maximum spread between points in the updated string goes
    # below a threshold
    iters = [adjusted]
    i = 0
    maxspread = 100.0
    # Do max 150 iterations even if we don't reach our goal
    while i < 150 and maxspread > 0.012:
        sys.stderr.write("Rep iter %d: \n" % i)
        sys.stderr.flush()
        rep_it = ext_rep_pts(iters[i])
        maxspread = rep_it[0]
        sys.stderr.write("  maxspread was %f\n" % maxspread)
        # Remember the adjusted points
        iters.append(rep_it[1])
        i = i + 1

    sys.stderr.write("Final maximum spread %f after %d iterations.\n" % (maxspread, i))

    # Get the final iteration's result
    adjusted = iters[-1]

    # delete the padding point
    adjusted = adjusted[:-1]
    newpts = newpts[:-1]

    # sys.stderr.write('Pts before repa:\n %s\n' % newpts)
    # sys.stderr.write('The adjusted pts:\n %s\n' % adjusted)

    # Possibility to test skipping reparametrize by uncommenting the next row.
    # The stringpoints will drift along the string and probably end up in the
    # endpoints or a minima along the string.
    # adjusted = newpts

    # calculate reparam distance

    sys.stderr.write("Length of the adjusted vector: %d\n" % len(adjusted))
    # TODO Nchains should depend on the specific residue (?)
    # Given as function argument now.
    # Nchains = len(initpt) / (2 * len(rsel))

    # write the CV control data for the next iteration

    # The output file expected for the posres case is rep_resconf_%d.gro for each stringpoint.
    # For dihedrals its res_%d_chain_%d.itp for each stringpoint and chain.
    #
    for k in range(len(adjusted)):
        # Not necessary to do this output for the start/end-points in the fix_endpoints case, the data is
        # just bypassed in the caller script
        if fix_endpoints == 1 and (k == 0 or k == (len(adjusted) - 1)):
            continue

        if use_posres == 1:
            # Open the output resconf which will go into the next iteration as minimization target
            with open("rep_resconf_%d.gro" % k, "w") as rep_resconf:
                # Open and read the previous (input) resconf, which has basically tagged along since the last
                # reparametrization step (or was set initially at swarm-start)
                with open(last_resconfs[k], "r") as in_resconf_f:
                    in_resconf = in_resconf_f.readlines()
                # TODO: maybe this chunk of code could be done by the rwgro module for us.
                # Copy the first 2 rows (title and number of atoms) straight over
                rep_resconf.write(in_resconf[0])
                rep_resconf.write(in_resconf[1])
                # Go through the atoms row-by-row and update the xyz coordinates for the atoms the reparametrize
                # step moved
                # Note: we are only copying over positions here. The velocities are not needed as the use for these files
                # will only be as a base for the next iterations position restraint coordinates.
                pathpoint = adjusted[k]  # the 1-D list of CVs (positions): x,y,z * nbr atoms in index
                if len(pathpoint) != (1555 * 3):  # assert on GLIC length (TODO)
                    sys.stderr.write("adjusted[] entry of wrong length %d\n" % len(pathpoint))
                cvpos = 0
                for line in in_resconf[2:][:-1]:
                    resname = line[0:8]  # python-ranges are inclusive the first index and exclusive the second...
                    atname = line[8:15]
                    atomnr = int(line[15:20])
                    x = float(line[20:28])
                    y = float(line[28:36])
                    z = float(line[36:44])
                    if atomnr in ndx_atoms:
                        # Update to new coords
                        x = pathpoint[cvpos]
                        y = pathpoint[cvpos + 1]
                        z = pathpoint[cvpos + 2]
                        cvpos += 3
                    # Write out the row, updated or not
                    rep_resconf.write("%s%s%5d%8.3f%8.3f%8.3f\n" % (resname, atname, atomnr, x, y, z))
                # Copy the last row which was the cell dimensions
                rep_resconf.write(in_resconf[len(in_resconf) - 1])
        else:
            for chain in range(Nchains):
                with open("res_%d_chain_%d.itp" % (k, chain), "w") as restraint_itp:
                    with open(includes[k][chain], "r") as in_itpf:
                        in_itp = in_itpf.read()
                        moltop = in_itp.split("[ dihedral_restraints ]")[0]
                        restraint_itp.write("%s" % moltop)

                    sys.stderr.write("Writing restraints for stringpoint %d chain %d\n" % (k, chain))
                    # Note: this format is for Gromacs 4.6+
                    restraint_itp.write("[ dihedral_restraints ]\n")
                    restraint_itp.write("; ai   aj   ak   al  type     phi    dphi    kfac   phiB    dphiB    kfacB\n")
                    pathpoint = adjusted[k]  # just a list of phi/psi angles

                    if Nchains == 1:
                        protein = molecule(top)
                    else:
                        protein = molecule("%s" % includes[k][chain])

                    # Create a lookup-table for the protein topology that maps residue to dihedrally relevant
                    # backbone atom indices for N, CA and C.

                    dih_atoms = {}

                    for a in protein:
                        if a.atomname == "CA" or a.atomname == "N" or a.atomname == "C":
                            try:
                                dih_atoms[a.resnr][a.atomname] = a.atomnr
                            except KeyError:
                                dih_atoms[a.resnr] = {a.atomname: a.atomnr}

                    # Use the lookup-table built above and get the dihedral specification atoms needed for each
                    # residue in the selection. This is O(n) in residues, thanks to the dih_atoms table.

                    pos = 0

                    for r in rsel:
                        # Get the atom numbers to use for the phi and psi dihedrals (4 atoms each)

                        # phi is C on the previous residue, and N, CA, C on this
                        phi = [dih_atoms[r - 1]["C"], dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"]]

                        # psi is N, CA and C on this residue and N on the next
                        psi = [dih_atoms[r]["N"], dih_atoms[r]["CA"], dih_atoms[r]["C"], dih_atoms[r + 1]["N"]]

                        # get phi and psi values from the reparametrization vector
                        phi_val = pathpoint[pos + chain]
                        psi_val = pathpoint[pos + chain + 1]

                        # Go to the next residue (phi,phi vals * number of chains apart)
                        pos += 2 * Nchains

                        # write phi, psi angles and k-factor
                        # Note: in the Gromacs 4.6+ format, the k-factor is here. Before, it was in the .mdp as
                        # dihre_fc.

                        # Since we need different force constants in different stages, we need to put
                        # a searchable placeholder in the file here and replace it later
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" % (phi[0], phi[1], phi[2], phi[3], 1, phi_val, 0)
                        )
                        restraint_itp.write(
                            "%5d%5d%5d%5d%5d %8.4f%5d  KFAC\n" % (psi[0], psi[1], psi[2], psi[3], 1, psi_val, 0)
                        )