Ejemplo n.º 1
0
def test_unwrap(wrap_universes):
    ref, trans = wrap_universes
    # after rebuild the trans molecule it should match the reference
    unwrap(trans.atoms)(trans.trajectory.ts)
    assert_array_almost_equal(trans.trajectory.ts.positions,
                              ref.trajectory.ts.positions,
                              decimal=6)
Ejemplo n.º 2
0
def centre_protein_gh(dict_of_systs, wrap=False, cent="geometry"):

    # The GroupHug class was created by Richard Gowers (https://github.com/richardjgowers)
    # in response to this question on the MDAnalysis forum:
    # https://groups.google.com/forum/#!topic/mdnalysis-discussion/umDpvbCmQiE

    class GroupHug:
        def __init__(self, center, *others):
            self.c = center
            self.o = others

        @staticmethod
        def calc_restoring_vec(ag1, ag2):
            box = ag1.dimensions[:3]
            dist = ag1.center_of_mass() - ag2.center_of_mass()

            return box * np.rint(dist / box)

        def __call__(self, ts):
            # loop over other atomgroups and shunt them into nearest image to center
            for i in self.o:
                rvec = self.calc_restoring_vec(self.c, i)

                i.translate(+rvec)

            return ts

    # Centre the protein in the box using MDAnalysis
    for ligand_name, syst in dict_of_systs.items():
        u = dict_of_systs[ligand_name]
        ligand_resname = ligand_name[:3]
        print(ligand_resname)

        if ligand_resname == "lar":
            print("WARNING: This script should only be used for the NTRK3 6KZD system!")

        # hard code the protein chains for now -> only for 6KZD model
        chainA = u.select_atoms("resid 527-627")
        chainB = u.select_atoms("resid 648-713")
        chainC = u.select_atoms("resid 728-838")
        lig = u.select_atoms("resname " + ligand_resname)
        ions = u.select_atoms("resname NA CL")

        protein = u.select_atoms("protein or resname ACE NME")
        reference = u.copy().select_atoms("protein or resname ACE NME")
        not_protein = u.select_atoms("not protein and not resname ACE NME")
        protein_and_lig = u.select_atoms("protein or resname ACE NME " + ligand_resname)

        transforms = [
            trans.unwrap(protein),
            trans.unwrap(lig),
            GroupHug(chainA, chainB, chainC, lig),
            trans.center_in_box(protein_and_lig, wrap=wrap, center="geometry"),
            trans.wrap(ions),
            trans.fit_rot_trans(protein, reference),
        ]

        dict_of_systs[ligand_name].trajectory.add_transformations(*transforms)

    return dict_of_systs
Ejemplo n.º 3
0
def test_unwrap_bad_ag(wrap_universes, ag):
    # this universe has a box size zero
    ts = wrap_universes[0].trajectory.ts
    # what happens if something other than an AtomGroup is given?
    bad_ag = ag
    with pytest.raises(AttributeError):
        unwrap(bad_ag)(ts)
Ejemplo n.º 4
0
def calc_water_angle(trj_file, gro_file, cutoff, dim=2, filepath=''):
    """ Calculate angle distribution between a water molecule vector and normal of
    a surface

    Water vector:

        ^
        |
        |

        O
       / \
      H   H

    Parameters
    ----------
    trj_file : trajectory file
        MD trajectory to load
    gro_file : Coordinate file
        MD coordinates to load.  MOL2 file is preferred as it contains bond information.
    cutoff : float
        Cutoff to analyze molecules in z-direction (angstroms)
    dim : int
        Dimension of surface vector
    """
    if dim == 0:
        normal_vector = [1, 0, 0]
    elif dim == 1:
        normal_vector = [0, 1, 0]
    else:
        normal_vector = [0, 0, 1]

    trj_str = f'{filepath}/{trj_file}'
    gro_str = f'{filepath}/{gro_file}'

    universe = mda.Universe(gro_str, trj_str)

    water_groups = universe.select_atoms('resname SOL')
    print("Unwrapping water molecules")
    transform = transformations.unwrap(water_groups)
    universe.trajectory.add_transformations(transform)
    print("Finished unwrapping water molecules")
    coordinates = [water_groups.positions for ts in universe.trajectory]
    angles = list()
    radians = list()
    print("Starting to analyze vectors ... ")
    for frame_num, frame in enumerate(coordinates):
        for idx in np.arange(3,len(frame)+3,3):
            xyz = frame[idx-3:idx]

            if xyz[0][dim] > cutoff:
                continue
            # Get midpoint of hydrogens
            fit = [(xyz[1][i]+xyz[2][i])/2 for i in range(3)]
            # Draw vector of oxygen going through hydrogen midpoint
            vector = [xyz[0][i] - fit[i] for i in range(3)]

            angle = angle_between(np.array([vector[0], vector[1], vector[2]]),
                    np.array(normal_vector)) * (180 / np.pi)

            angle_in_radians = angle * np.pi / 180
            radians.append(angle_in_radians)
            angles.append(angle)


    y, x = np.histogram(angles, bins=180, density=True, range=(0.0, 180.0))
    new_x = list()
    for idx in range(180):
        mid = idx + 0.5
        new_x.append(mid)
    new_x_hist = y / np.sin((np.array(new_x) * np.pi / 180))
    fig, ax = plt.subplots()
    plt.plot(new_x, y)
    plt.xlim((0, 181))
    plt.ylabel('Count')
    plt.xlabel('Angle (Deg)')
    fig, ax = plt.subplots()
    #plt.bar(new_x, new_x_hist)
    plt.bar(new_x, y)
    plt.xlim((0, 181))
    plt.ylabel('Count')
    plt.xlabel('Angle (Deg)')
    plt.savefig(f'{filepath}/water_angles.pdf')
Ejemplo n.º 5
0
import MDAnalysis as mda
from MDAnalysis import transformations
import sys

#error handling
if len(sys.argv) != 4:
    raise Exception('wrong number of arguments. Need 3')
u = mda.Universe(sys.argv[1], sys.argv[2])
prot = u.select_atoms("segid A")
# we load another universe to define the reference
# it uses the same input files, but this doesn't have to be always the case
ref_u = u.copy()
reference = ref_u.select_atoms("segid A")
ag = u.atoms
workflow = (transformations.unwrap(ag), transformations.center_in_box(prot, center='mass'), transformations.wrap(ag, compound='fragments'))
u.trajectory.add_transformations(*workflow)

all_as=u.select_atoms('all')
with mda.Writer(sys.argv[3], all_as.n_atoms) as W:
    for ts in u.trajectory:
        W.write(all_as)
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(
        description='''Computes the minimum distance between\n
    2 selections (sel and sel2) or
    the same selection (sel) and its images in neighbouring periodic images.'''
    )
    parser.add_argument('-t', dest='top', help='topology file')
    parser.add_argument('traj', help='trajectory file')
    parser.add_argument('-sel', help='atom selection')
    parser.add_argument('-sel2',
                        help='atom selection2, ignored in periodic mode.')
    parser.add_argument(
        '-p',
        '--periodic',
        action='store_true',
        help='compute distance of "sel" between contiguous periodic cells')
    parser.add_argument('-u',
                        '--unwrap',
                        action='store_true',
                        help='unwrap PBC')
    parser.add_argument('-o', '--output', help='output file basename')
    parser.add_argument(
        '-c',
        '--cutoff',
        type=float,
        default=np.inf,
        help='compute distances only for atoms closer than cutoff')
    parser.add_argument('-s',
                        '--slice',
                        default="::",
                        help='slice trajectory, START:END:STEP')
    parser.add_argument(
        '-f',
        '--fast',
        action='store_true',
        help=
        'in periodic mode consider only 6 neighboring images, otherwise ignored.'
    )
    parser.add_argument('--print',
                        action='store_true',
                        help='print mindist on stdout.')
    args = parser.parse_args()

    top = args.top  # topologia va bene anche pdb
    traj = args.traj  # traiettoria (.dcd, .xtc, .nc, ...)
    slicer = slice(*[int(x) if x else None for x in args.slice.split(':')])
    u = Universe(top, traj)
    if args.unwrap:
        workflow = [transformations.unwrap(u.atoms)]
        u.trajectory.add_transformations(*workflow)
    if not args.sel:
        p = u.atoms
    else:
        p = u.select_atoms(args.sel)

    min_dist = []

    if args.periodic:
        if args.fast:
            cells = [(1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1),
                     (0, 0, -1)]
        else:
            i = (0, 1, -1)  # --> (0, 0, 0), (0, 0, 1) ...
            cells = np.array(list(product(i, i, i)))
            cells = cells[1:]
        for frame in tqdm(u.trajectory[slicer]):
            distances = []
            pos = p.positions
            if args.periodic:
                box = frame.dimensions[0:3]
                images = (pos + img for img in cells * box)
                for image in images:
                    distances.append(
                        (get_shortest_distance(pos, image, cutoff=args.cutoff),
                         u.trajectory.time))
                    # get_shortest_distance -> (dist, anum1, anum2)
                min_dist.append(min(distances))
    else:
        p2 = u.select_atoms(args.sel2)
        for _ in tqdm(u.trajectory[slicer]):
            pos = p.positions
            pos2 = p2.positions
            min_dist.append(
                (get_shortest_distance(pos, pos2,
                                       cutoff=args.cutoff), u.trajectory.time))

    min_dist = np.array(min_dist)

    with open(args.output + '.csv', 'w') as fh:
        fh.write('time,distance\n')
        for dist in min_dist:
            fh.write(f'{dist[1]},{dist[0]}\n')
    if max(min_dist[:, 1]) > 10000:
        plt.plot(min_dist[:, 1] / 1000.0, min_dist[:, 0])
        plt.xlabel('time (ns)')
    else:
        plt.plot(min_dist[:, 1], min_dist[:, 0])
        plt.xlabel('time (ps)')
    plt.ylabel('min distance (Å)')
    plt.savefig(args.output + '.png')

    if args.print:
        print(f'The minimum distance is {np.min(min_dist[:, 0])} A')
Ejemplo n.º 7
0
    atom2 = i.atoms.ids[1]

    distance = np.linalg.norm(u.trajectory[random_frame][atom1] -
                              u.trajectory[random_frame][atom2])

    if distance > allowed_max_distance:

        print("unusually long bond between " + str(atom1) + " " + str(atom2) +
              " with length " + str(distance / 10) + " nm")
        center_trigger = True

# the below selections must be adjusted when integrating into the AddData.py
membrane_string = 'resname POPC'  #( resname POPC or resname DPPC .... )
not_membrane_string = 'not resname POPC'

if center_trigger:

    u = mda.Universe(topol, traj)

    membrane = u.select_atoms(membrane_string)
    not_membrane = u.select_atoms(not_membrane_string)
    everything = u.select_atoms(everything_string)

    transforms = [
        trans.unwrap(membrane),
        trans.center_in_box(membrane, wrap=True),
        trans.wrap(not_membrane)
    ]

    u.trajectory.add_transformations(*transforms)
Ejemplo n.º 8
0
for dcdfile in sys.argv[1:]:
    if dcdfile[-13:] == "prod-dump.dcd":
        filepath = "/".join(dcdfile.split("/")[:-1])
        pdbname = dcdfile.split("/")[-1].split(".")[0]
        if filepath == "":
            filepath = "."
        psffile = filepath + "/topol.psf"
        pdbfile = filepath + "/{}.pdb".format(pdbname)
        pymolfile = filepath + "/{}.pml".format(pdbname)
        corepdbfile = filepath + "/{}-core.pdb".format(pdbname)
        ilfile = filepath + "/{}-ils.xyz".format(pdbname)
        corefile = filepath + "/{}-core.xyz".format(pdbname)
        # aroundcorepdbfile = filepath + "/{}-aroundcore.pdb".format(pdbname)

        u = mda.Universe(psffile, dcdfile, in_memory=True)
        workflow = transformations.unwrap(u.atoms)
        u.trajectory.add_transformations(workflow)

        nodrudes = u.select_atoms("not type DP_")
        core = nodrudes.select_atoms("resname na1*")
        ils = nodrudes.select_atoms("not group core", core=core)

        with mda.Writer(pdbfile, multiframe=True, bonds=None) as f:
            for ts in u.trajectory:
                f.write(nodrudes)

        u = mda.Universe(psffile, dcdfile, in_memory=True)

        core = u.select_atoms("resname na1* and not type DP_")
        notcore = u.select_atoms("not group core and not type DP_", core=core)
Ejemplo n.º 9
0
def create_full_trajectory(*, nCycles, path, filetype, output):
    ## some setup
    getFilenameTop, getFilenameTrj = helper.make_get_filename(FILETYPE=filetype, PATH=path)
    getReactiveAtomIndices = helper.make_get_reactive_atoms(FILETYPE=filetype, PATH=path)
    
    frameCounter = 0

    write_frame = helper.make_get_write(filename=output)
    FILE = open(output, 'w')

    ## get initial data from cycle 0
    universe = mda.Universe( getFilenameTop(0), getFilenameTrj(0) )
    atomNames = universe.atoms.names
    resNames = universe.atoms.resnames
    resIDs = universe.atoms.resnums
    atomOrder = universe.atoms.ix
    dt = universe.trajectory.dt
    ## important: sort topology such that resnames are in alphabetical order (because rs@md does this, too...)
    # first: sort resnames alphabetically and adapt resIDs, atomNames
    sortedIndices = resNames.argsort(kind='stable')
    atomNames = atomNames[sortedIndices]
    resNames = resNames[sortedIndices]
    resIDs = resIDs[sortedIndices]
    atomOrderRearrangedAlphabetically = atomOrder[sortedIndices]
    # second: renumber residues
    counterResID = 1
    newResIDs = resIDs.copy()
    for ix in range(len(resIDs)-1):
        newResIDs[ix] = counterResID
        if resIDs[ix] != resIDs[ix+1]:
            counterResID += 1
    resIDs = newResIDs

	## create initial coordination file where all residues have been unwrapped and save it to file
    #for res in universe.atoms.residues:
    #    ag = res.atoms
    #    transform = mdatrans.unwrap(ag)
    #    universe.trajectory.add_transformations(transform)
    if not hasattr(universe, 'bonds'):
        guessed_bonds = mdaguess.guess_bonds(universe.atoms, universe.atoms.positions, universe.trajectory[0].dimensions)
        universe.add_TopologyAttr('bonds', guessed_bonds)
    transform = mdatrans.unwrap(universe.atoms)
    universe.trajectory.add_transformations(transform)
    outputinitial = 'initial' + '.' + output.split('.')[-1]
    initialFile = open(outputinitial, 'w')
    box = universe.trajectory[0].dimensions
    positions = universe.atoms.positions
    sortedPositions = positions[sortedIndices]
    write_frame(filestream=initialFile, title=f'rs@md t={0:9.2f} step= {0}', resnames=resNames, resnums=resIDs, names=atomNames, positions=sortedPositions, box=box)
    initialFile.close()
    print(f'-> initial frame has been written to {outputinitial}')

    ## write trajectory to file frame by frame
    for ts in universe.trajectory:  
        positions = universe.atoms.positions
        box = ts.dimensions
        ## sort positions before writing them
        sortedPositions = positions[sortedIndices]
        write_frame(filestream=FILE, title=f'rs@md t={frameCounter*dt:9.2f} step=  {frameCounter}', resnames=resNames, resnums=resIDs, names=atomNames, positions=sortedPositions, box=box)
        frameCounter += 1

    ## loop through all files and record remaining data
    firstReactiveCycle = True
    for cycle in np.arange(1, nCycles+1):
        topfile = getFilenameTop(cycle)
        trjfile = getFilenameTrj(cycle)

        if os.path.isfile( trjfile ):
            ## get reaction infos
            reactantsIx, productsIx = getReactiveAtomIndices(cycle)
            
            ## attention if firstReactiveCycle: their might have been a change in atomOrder between cycles 0 -> cycle
            ## due to reordering molecules in alphabetical order
            ## need to account for that by translating reactantsIx accordingly
            if firstReactiveCycle:
                reactantsIx = np.array( [np.argwhere(atomOrderRearrangedAlphabetically == rix)[0,0] for rix in reactantsIx] )
                firstReactiveCycle = False                

            ## important: you need to go through all transitions in an ordered fashion with respect to the product indices 
            ## (from small to larger iy)
            sortedIndices = productsIx.argsort()
            reactantsIx = reactantsIx[sortedIndices]
            productsIx = productsIx[sortedIndices]
            
            ## apply reactions to atomOrder
            ## i.e. change positions of reactants to products

            ## ... first: get entries at reactant positions and remove them from list
            reactantEntries = [ atomOrder[x] for x in reactantsIx ]
            for entry in reactantEntries:
                atomOrder = np.delete(atomOrder, np.argwhere(atomOrder==entry), axis=0)
           
            ## ... second: put entries back at new positions given by product positions
            for iy, entry in zip(productsIx, reactantEntries):
                atomOrder = np.insert(atomOrder, iy, entry, axis=0)

            ## get sorted indices for new atomOrder
            sortedIndices = atomOrder.argsort()

            ## import trajectory: .tpr, .gro/.xtc/...
            universe = mda.Universe(topfile, trjfile)

            ## write trajectory to file frame by frame
            for ts in universe.trajectory[1:]:  
                positions = universe.atoms.positions
                box = ts.dimensions
                
                ## sort positions before writing them
                sortedPositions = positions[sortedIndices]
                write_frame(filestream=FILE, title=f'rs@md t={frameCounter*dt:9.2f} step=  {frameCounter}', resnames=resNames, resnums=resIDs, names=atomNames, positions=sortedPositions, box=box)
                frameCounter += 1

        else:
            continue

    FILE.close()

    print(f'-> a total of {frameCounter} frames have been written to {output}')
Ejemplo n.º 10
0
        if filepath == "":
            filepath ="."
        psffile     = filepath + "/topol.psf"
        pdbfile     = filepath + "/{}.pdb".format(pdbname)
        pymolfile   = filepath + "/{}.pml".format(pdbname)
        corepdbfile = filepath + "/{}-core.pdb".format(pdbname)
        ilfile = filepath + "/{}-ils.xyz".format(pdbname)
        corefile = filepath + "/{}-core.xyz".format(pdbname)
        # aroundcorepdbfile = filepath + "/{}-aroundcore.pdb".format(pdbname)
        
        u = mda.Universe(psffile, dcdfile, in_memory=True)

        core             = u.select_atoms("resname na1* and not type DP_")
        notcore          = u.select_atoms("not group core and not type DP_", core=core)
    
        workflow = [transformations.unwrap(core),
                    transformations.center_in_box(core, center='mass'),
                    transformations.wrap(notcore)]

        u.trajectory.add_transformations(*workflow)


        with mda.Writer(ilfile, multiframe=True) as f:
            with mda.Writer(corefile, multiframe=True) as g:
                counter = 0
                for ts in u.trajectory:
                    if counter == 100:
                        cations, anions = order_ions(ionpairs)
                        resids = ""
                        for i in range(0, ionpairs):
                            if resids == "":
Ejemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'saves new topology and joined trajectory with only sel(ected) atoms.')
    parser.add_argument('-t',
                        '--topology',
                        help='the topology file (pdb, gro, psf...)')
    parser.add_argument('-sel',
                        '--selection',
                        help='the selection in MDA language')
    parser.add_argument('-o', '--output', help='output basename')
    parser.add_argument('-s',
                        '--slice',
                        help='slicing output trajectory START:END:SKIP')
    parser.add_argument('--reset_time',
                        help='make trajectory start from time 0',
                        action='store_true')
    parser.add_argument('trajectory',
                        help='the trajectory(ies) file(s). Accepts globbing',
                        nargs=argparse.REMAINDER)
    parser.add_argument('--dcd',
                        help='writes traj in dcd format',
                        action='store_true')
    parser.add_argument('-u',
                        '--unwrap',
                        action='store_true',
                        help='unwrap PBC')
    args = parser.parse_args()

    # expand input trajectory names
    traj_files = []
    for traj_arg in args.trajectory:
        tf = [tfile for tfile in glob.glob(traj_arg)]
        traj_files += tf

    traj_files.sort()
    if args.slice:
        slicer = slice(*[int(x) if x else None for x in args.slice.split(':')])
    else:
        slicer = slice(None, None, None)

    u = Universe(args.topology, *traj_files)

    if args.unwrap:
        workflow = [transformations.unwrap(u.atoms)]
        u.trajectory.add_transformations(*workflow)

    if args.reset_time:
        time_offset = u.trajectory[0].time
    else:
        time_offset = 0.0

    if args.selection:
        selection = u.select_atoms(args.selection)
    else:
        selection = u.atoms

    selection.write(f'{args.output}.pdb')

    if args.dcd:
        traj_file = f'{args.output}.dcd'
    else:
        traj_file = f'{args.output}.xtc'
    with Writer(traj_file, selection.n_atoms) as write_handle:
        for time_frame in u.trajectory[slicer]:
            time_frame.time -= time_offset
            write_handle.write(selection)
Ejemplo n.º 12
0
    )

    print("Running production simulation ...")
    simulation.step(production_steps)

if production_steps / production_trajectory_frequency >= 1:

    print("Transforming trajectory ...")
    u = mda.Universe(
        str(output_directory / "equilibration/out_state.pdb"),
        str(output_directory / "trajectory.xtc"),
    )
    backbone = u.select_atoms("backbone")
    not_protein = u.select_atoms("not protein")
    workflow = (
        transformations.unwrap(backbone),
        transformations.center_in_box(backbone),
        transformations.wrap(not_protein, compound="fragments"),
        transformations.fit_rot_trans(backbone, backbone),
    )
    u.trajectory.add_transformations(*workflow)

    print("Saving transformed topology and trajectory ...")
    u.atoms.write(str(output_directory / "topology_wrapped.pdb"))
    with mda.Writer(
        str(output_directory / "trajectory_wrapped.xtc"), u.atoms.n_atoms
    ) as W:
        for ts in u.trajectory:
            W.write(u.atoms)

print("Finished")
Ejemplo n.º 13
0
def runextract(ionpairs, dcdfile):
    def order_ions(ionpairs, u):
        cations = []
        anions = []
        for i in range(10, 150, 2):
            atoms = u.select_atoms(
                "sphlayer {} {} group core and not type DP_".format(
                    i / 10, (i + 2) / 10),
                core=core)
            for j in list(atoms):
                fields = str(j).split()
                if fields[8] == "c4c1pyrr,":
                    if fields[10] not in cations:
                        cations += [fields[10]]
                if fields[8] == "otf," or fields[8] == "tcm," or fields[
                        8] == "mso4,":
                    if fields[10] not in anions:
                        anions += [fields[10]]
        return (cations[0:ionpairs], anions[0:ionpairs])

    filepath = "/".join(dcdfile.split("/")[:-1])
    pdbname = dcdfile.split("/")[-1].split(".")[0]
    if filepath == "":
        filepath = "."
    psffile = filepath + f"/topol.psf"

    u = mda.Universe(psffile, dcdfile, in_memory=True)

    core = u.select_atoms("resname na1* and not type DP_")
    notcore = u.select_atoms("not group core and not type DP_", core=core)

    workflow = [
        transformations.unwrap(core),
        transformations.center_in_box(core, center='mass'),
        transformations.wrap(notcore)
    ]

    u.trajectory.add_transformations(*workflow)

    cations, anions = order_ions(ionpairs, u)
    counter = 1
    resids = ""
    filelist = []

    for j in range(1, ionpairs + 1):
        if resids == "":
            resids = resids + " resid {} or resid {}".format(
                cations[j - 1], anions[j - 1])
        else:
            resids = resids + " or resid {} or resid {}".format(
                cations[j - 1], anions[j - 1])

        with mda.Writer(f"{pdbname}-il-{j}.xyz") as f:
            filelist += [f"{pdbname}-il-{j}.xyz"]
            aroundcore = u.select_atoms("not type DP_ and ({})".format(resids))
            f.write(aroundcore)

        with mda.Writer(f"{pdbname}-core.xyz") as g:
            filelist += [f"{pdbname}-core.xyz"]
            g.write(core)
        counter += 1

    for xyzfile in filelist:
        geoms = readXYZ(xyzfile)
        count = 0

        with open(xyzfile, "w") as f:
            for i in geoms:
                for j in i:
                    f.write(str(j) + "\n")
Ejemplo n.º 14
0
def populate_dict(chunk, dictionary, pdb_file, mutant_sel, project_code,
                  frames_to_stride):

    interface_selection_strings = {
        "rbd":
        "segid A and (backbone and (resid 403 or resid 417 or resid 439 or resid 445-447 or resid 449 or resid 453 or resid 455 or resid 456 or resid 473-477 or resid 484-487 or resid 489 or resid 490 or resid 493-503 or resid 505 or resid 506))",
        "ace2":
        "segid C and (backbone and (resid 18 or resid 21 or resid 23-32 or resid 33-39 or resid 41 or resid 42 or resid 45 or resid 75 or resid 76 or resid 78-84))",
        "rbd_and_ace2":
        "(segid A and (backbone and (resid 403 or resid 417 or resid 439 or resid 445-447 or resid 449 or resid 453 or resid 455 or resid 456 or resid 473-477 or resid 484-487 or resid 489 or resid 490 or resid 493-503 or resid 505 or resid 506))) or (segid C and (backbone and (resid 18 or resid 21 or resid 23-32 or resid 33-39 or resid 41 or resid 42 or resid 45 or resid 75 or resid 76 or resid 78-84)))",
    }

    # Create a dictionary containing selection strings for MDAnalysis
    # residues 417 and 439 are not named since these are mutated across systems
    # segid C = ACE2, segid A = RBD

    # TODO remove project keys, not sure they are needed
    proj_mutant_dict = {
        "17311": {
            "WT": {
                "D30": "segid C and (resid 30 and name OD1 OD2)",
                "res417": "segid A and (resid 417 and name NZ)",
                "E329": "segid C and (resid 329 and name OE1 OE2)",
                "res439": "segid A and (resid 439 and name ND2)",
                "K31": "segid C and (resid 31 and name NZ)",
                "E484": "segid A and (resid 484 and name OE1 OE2)",
                "E35": "segid C and (resid 35 and name OE1 OE2)",
                "K31": "segid C and (resid 31 and name NZ)",
                "Q493": "segid A and (resid 493 and name NE2 OE1)",
                "K353": "segid C and (resid 353 and name NZ)",
                "G496bb": "segid A and (resid 496 and name O C CA N)",
                "D38": "segid C and (resid 38 and name OD1 OD2)",
                "Y449":
                "segid A and (resid 449 and name CG CD1 CE1 CZ CE2 CD2 OH)",
                "Q42": "segid C and (resid 42 and name NE2 OE1)",
                "K353bb": "segid C and (resid 353 and name O C CA N)",
                "G502bb": "segid A and (resid 502 and name O C CA N)",
            },
        },
    }

    # set the reference to be the equilibrated structure
    ref = mda.Universe(pdb_file)
    ref.trajectory[0]  # there is only one frame anyway, but just to be sure
    ref_bb = ref.select_atoms("backbone")  # the ref for RMSD calcs later

    # set reference interfaces
    ref_rbd_interface_bb = ref.select_atoms(interface_selection_strings["rbd"])

    ref_ace2_interface_bb = ref.select_atoms(
        interface_selection_strings["ace2"])

    ref_whole_interface_bb = ref.select_atoms(
        interface_selection_strings["rbd_and_ace2"])

    reference = ref.select_atoms(
        "not resname Na+ Cl- HOH")  # the ref for transforms later

    for traj in chunk:
        print("--> Analysing trajectory: ", traj)

        mobile = mda.Universe(pdb_file, traj)
        # centre the two protein chains in the box
        # this stops chains jumping across PBC
        chainA = mobile.select_atoms("segid A or segid B")  # RBD + glycans
        chainB = mobile.select_atoms("segid C or segid D")  # ACE2 + glycans
        ions = mobile.select_atoms("resname Na+ Cl- HOH")
        protein = mobile.select_atoms("not resname Na+ Cl- HOH")

        transforms = [
            trans.unwrap(protein),
            GroupHug(chainA, chainB),
            trans.center_in_box(protein, wrap=False, center="geometry"),
            trans.wrap(ions),
            trans.fit_rot_trans(protein, reference),
        ]

        print("--> Centring protein chains in the box")
        mobile.trajectory.add_transformations(*transforms)

        # loop over each frame in the current trajectory, with a defined stride
        for ts in mobile.trajectory[::frames_to_stride]:

            print(f"--> Current frame: {ts.frame}")

            # calculate the key interactions
            # RBD --- ACE2

            # D30 --- K417
            D30 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["D30"])
            res417 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["res417"])

            D30_res417_dist_mindist = np.min(
                distances.distance_array(D30.positions, res417.positions))

            # E329 --- N439
            E329 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["E329"])
            res439 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["res439"])

            E329_res439_dist_mindist = np.min(
                distances.distance_array(E329.positions, res439.positions))

            # E484 --- K31
            K31 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["K31"])
            E484 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["E484"])

            E484_K31_dist_mindist = np.min(
                distances.distance_array(E484.positions, K31.positions))

            # E35 --- K31
            E35 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["E35"])

            E35_K31_dist_mindist = np.min(
                distances.distance_array(E35.positions, K31.positions))

            # E35 --- Q493
            Q493 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["Q493"])

            E35_Q493_dist_mindist = np.min(
                distances.distance_array(E35.positions, Q493.positions))

            # Additional interactions
            K31_Q493_dist_mindist = np.min(
                distances.distance_array(K31.positions, Q493.positions))

            # K353 --- G496 (K353 to G496 backbone)
            K353 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["K353"])
            G496bb = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["G496bb"])

            K353_G496bb_dist_mindist = np.min(
                distances.distance_array(K353.positions, G496bb.positions))

            # D38 --- Y449
            D38 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["D38"])
            Y449 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["Y449"])

            D38_Y449_dist_mindist = np.min(
                distances.distance_array(D38.positions, Y449.positions))

            # Q42 --- Y449
            Q42 = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["Q42"])

            Q42_Y449_dist_mindist = np.min(
                distances.distance_array(Q42.positions, Y449.positions))

            # K353bb --- G502bb (Backbone to backbone)
            K353bb = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["K353bb"])
            G502bb = mobile.select_atoms(
                proj_mutant_dict[project_code][mutant]["G502bb"])

            K353bb_G502bb_dist_mindist = np.min(
                distances.distance_array(K353bb.positions, G502bb.positions))

            # sort out names for the dict
            traj_split = traj.split("/")
            key_name = f"{traj_split[6]}/{traj_split[7]}/{traj_split[8]}_{ts.frame}"

            # populate the dict - with placeholder keys for now
            dictionary[key_name] = {
                "d30_res417_mindist": D30_res417_dist_mindist,
                "e329_res439_mindist": E329_res439_dist_mindist,
                "e484_k31_mindist": E484_K31_dist_mindist,
                "e35_k31_mindist": E35_K31_dist_mindist,
                "e35_q493_mindist": E35_Q493_dist_mindist,
                "q493_k31_mindist": K31_Q493_dist_mindist,
                "k353_g496bb_mindist": K353_G496bb_dist_mindist,
                "d38_y449_dist_mindist": D38_Y449_dist_mindist,
                "q42_y449_dist_mindist": Q42_Y449_dist_mindist,
                "k353bb_g502bb_dist_mindist": K353bb_G502bb_dist_mindist,
            }