Esempio n. 1
0
    def _init(self):
        from natsort import natsorted
        folders = natsorted(glob(path.join(self.generatorspath, '*', ''))) # I need the extra ''  to add a finishing /
        if len(folders) == 0:
            logger.info('Generators folder has no subdirectories, using folder itself')
            folders.append(self.generatorspath)

        numF = len(folders)
        numCopies = np.ones(numF, dtype=int) * int(np.floor(self.nmax / numF))
        numExtra = np.mod(self.nmax, numF)
        extraChoices = np.random.choice(numF, numExtra, replace=False) # draw the extra
        numCopies[extraChoices] += 1
        # numCopies = numCopies + np.random.multinomial(numExtra, [1/numF]*numF)  # draw the extra equally from a flat distribution
        if not path.exists(self.inputpath):
            makedirs(self.inputpath)

        # Check if epoch 1 directories already exist in the input folder
        existing = glob(path.join(self.inputpath, 'e1s*'))
        if len(existing) != 0:
            raise NameError('Epoch 1 directories already exist.')

        k = 1
        for i in range(numF):
            for j in range(numCopies[i]):
                name = _simName(folders[i])
                inputdir = path.join(self.inputpath, 'e1s' + str(k) + '_' + name)
                #src = path.join(self.generatorspath, name, '*')
                src = folders[i]
                copytree(src, inputdir, symlinks=True, ignore=ignore_patterns(*_IGNORE_EXTENSIONS))
                k += 1
Esempio n. 2
0
def _writeInputsFunction(i, f, epoch, inputpath, coorname):
    regex = re.compile('(e\d+s\d+)_')
    frameNum = f.frame
    piece = f.piece
    if f.sim.parent is None:
        currSim = f.sim
    else:
        currSim = f.sim.parent

    traj = currSim.trajectory[piece]
    if currSim.input is None:
        raise NameError('Could not find input folder in simulation lists. Cannot create new simulations.')

    wuName = _simName(traj)
    res = regex.search(wuName)
    if res:  # If we are running on top of adaptive, use the first name part for the next sim name
        wuName = res.group(1)

    # create new job directory
    newName = 'e' + str(epoch) + 's' + str(i + 1) + '_' + wuName + 'p' + str(piece) + 'f' + str(frameNum)
    newDir = path.join(inputpath, newName, '')

    # copy previous input directory including input files
    copytree(currSim.input, newDir, symlinks=False, ignore=ignore_patterns('*.coor', '*.rst', '*.out', *_IGNORE_EXTENSIONS))

    # overwrite input file with new one. frameNum + 1 as catdcd does 1 based indexing

    mol = Molecule(currSim.molfile)  # Always read the mol file, otherwise it does not work if we need to save a PDB as coorname
    mol.read(traj)
    mol.dropFrames(keep=frameNum)  # Making sure only specific frame to write is kept
    mol.write(path.join(newDir, coorname))
Esempio n. 3
0
def _writeInputsFunction(i, f, epoch, inputpath, coorname):
    regex = re.compile('(e\d+s\d+)_')
    frameNum = f.frame
    piece = f.piece
    if f.sim.parent is None:
        currSim = f.sim
    else:
        currSim = f.sim.parent

    traj = currSim.trajectory[piece]
    if currSim.input is None:
        raise NameError('Could not find input folder in simulation lists. Cannot create new simulations.')

    wuName = _simName(traj)
    res = regex.search(wuName)
    if res:  # If we are running on top of adaptive, use the first name part for the next sim name
        wuName = res.group(1)

    # create new job directory
    newName = 'e' + str(epoch) + 's' + str(i + 1) + '_' + wuName + 'p' + str(piece) + 'f' + str(frameNum)
    newDir = path.join(inputpath, newName, '')

    # copy previous input directory including input files
    copytree(currSim.input, newDir, symlinks=False, ignore=ignore_patterns('*.coor', '*.rst', '*.out', *_IGNORE_EXTENSIONS))

    # overwrite input file with new one. frameNum + 1 as catdcd does 1 based indexing

    mol = Molecule(currSim.molfile)  # Always read the mol file, otherwise it does not work if we need to save a PDB as coorname
    mol.read(traj)
    mol.dropFrames(keep=frameNum)  # Making sure only specific frame to write is kept
    mol.write(path.join(newDir, coorname))
Esempio n. 4
0
    def _init(self):
        from natsort import natsorted
        folders = natsorted(glob(path.join(self.generatorspath, '*', ''))) # I need the extra ''  to add a finishing /
        if len(folders) == 0:
            logger.info('Generators folder has no subdirectories, using folder itself')
            folders.append(self.generatorspath)

        numF = len(folders)
        numCopies = np.ones(numF, dtype=int) * int(np.floor(self.nmax / numF))
        numExtra = np.mod(self.nmax, numF)
        extraChoices = np.random.choice(numF, numExtra, replace=False) # draw the extra
        numCopies[extraChoices] += 1
        # numCopies = numCopies + np.random.multinomial(numExtra, [1/numF]*numF)  # draw the extra equally from a flat distribution
        if not path.exists(self.inputpath):
            makedirs(self.inputpath)

        # Check if epoch 1 directories already exist in the input folder
        existing = glob(path.join(self.inputpath, 'e1s*'))
        if len(existing) != 0:
            raise NameError('Epoch 1 directories already exist.')

        k = 1
        for i in range(numF):
            for j in range(numCopies[i]):
                name = _simName(folders[i])
                inputdir = path.join(self.inputpath, 'e1s' + str(k) + '_' + name)
                #src = path.join(self.generatorspath, name, '*')
                src = folders[i]
                copytree(src, inputdir, symlinks=True, ignore=ignore_patterns(*_IGNORE_EXTENSIONS))
                k += 1
Esempio n. 5
0
def _writeInputsFunction(i, f, epoch, inputpath):
    regex = re.compile('(e\d+s\d+)_')
    frameNum = f.frame
    piece = f.piece
    if f.sim.parent is None:
        currSim = f.sim
    else:
        currSim = f.sim.parent

    traj = currSim.trajectory[piece]
    if currSim.input is None:
        raise NameError(
            'Could not find input folder in simulation lists. Cannot create new simulations.'
        )

    wuName = _simName(traj)
    res = regex.search(wuName)
    if res:  # If we are running on top of adaptive, use the first name part for the next sim name
        wuName = res.group(1)

    # create new job directory
    newName = 'e' + str(epoch) + 's' + str(i + 1) + '_' + wuName + 'p' + str(
        piece) + 'f' + str(frameNum)
    newDir = path.join(inputpath, newName, '')
    # copy previous input directory including input files
    copytree(currSim.input,
             newDir,
             symlinks=False,
             ignore=ignore_patterns(_TRAJ_EXTENSIONS_COOR))
    # overwrite input file with new one. frameNum + 1 as catdcd does 1 based indexing
    mol = Molecule()
    mol.read(traj)
    mol.frame = frameNum
    mol.write(path.join(newDir, 'input.coor'))
Esempio n. 6
0
    def _writeInputs(self, simsframes, epoch=None):
        if epoch is None:
            epoch = self._getEpoch() + 1

        test = glob(path.join(self.inputpath, 'e' + str(epoch) + '*'))
        if len(test) != 0:
            raise NameError('Input dirs of epoch ' + str(epoch) +
                            ' already exists.')

        if path.exists(
                path.join(self.inputpath,
                          'e' + str(epoch) + '_writeinputs.log')):
            raise NameError('Epoch logfile already exists. Cant overwrite it.')

        fid = open(
            path.join(self.inputpath, 'e' + str(epoch) + '_writeinputs.log'),
            'w')

        regex = re.compile('(e\d+s\d+)_')
        for i, f in enumerate(simsframes):
            frameNum = f.frame
            piece = f.piece
            #print(frameNum)
            if f.sim.parent is None:
                currSim = f.sim
            else:
                currSim = f.sim.parent

            traj = currSim.trajectory[piece]
            if currSim.input is None:
                raise NameError(
                    'Could not find input folder in simulation lists. Cannot create new simulations.'
                )

            wuName = _simName(traj)
            res = regex.search(wuName)
            if res:  # If we are running on top of adaptive, use the first name part for the next sim name
                wuName = res.group(1)

            # create new job directory
            newName = 'e' + str(epoch) + 's' + str(
                i + 1) + '_' + wuName + 'p' + str(piece) + 'f' + str(frameNum)
            newDir = path.join(self.inputpath, newName, '')
            # copy previous input directory including input files
            copytree(currSim.input,
                     newDir,
                     symlinks=False,
                     ignore=ignore_patterns('*.dcd', '*.xtc', '*.coor'))
            # overwrite input file with new one. frameNum + 1 as catdcd does 1 based indexing
            mol = Molecule()
            mol.read(traj)
            mol.frame = frameNum
            mol.write(path.join(newDir, 'input.coor'))

            # write nextInput file
            fid.write('# {0} \n{1} {2}\n'.format(newName, traj, frameNum))

        fid.close()
Esempio n. 7
0
    def _writeInputs(self, simsframes, epoch=None):
        if epoch is None:
            epoch = self._getEpoch() + 1

        test = glob(path.join(self.inputpath, 'e' + str(epoch) + '*'))
        if len(test) != 0:
            raise NameError('Input dirs of epoch ' + str(epoch) + ' already exists.')

        if path.exists(path.join(self.inputpath, 'e' + str(epoch) + '_writeinputs.log')):
            raise NameError('Epoch logfile already exists. Cant overwrite it.')

        fid = open(path.join(self.inputpath, 'e' + str(epoch) + '_writeinputs.log'), 'w')

        regex = re.compile('(e\d+s\d+)_')
        for i, f in enumerate(simsframes):
            frameNum = f.frame
            piece = f.piece
            #print(frameNum)
            if f.sim.parent is None:
                currSim = f.sim
            else:
                currSim = f.sim.parent

            traj = currSim.trajectory[piece]
            if currSim.input is None:
                raise NameError('Could not find input folder in simulation lists. Cannot create new simulations.')

            wuName = _simName(traj)
            res = regex.search(wuName)
            if res:  # If we are running on top of adaptive, use the first name part for the next sim name
                wuName = res.group(1)

            # create new job directory
            newName = 'e' + str(epoch) + 's' + str(i+1) + '_' + wuName + 'p' + str(piece) + 'f' + str(frameNum)
            newDir = path.join(self.inputpath, newName, '')
            # copy previous input directory including input files
            copytree(currSim.input, newDir, symlinks=False, ignore=ignore_patterns('*.dcd', '*.xtc', '*.coor'))
            # overwrite input file with new one. frameNum + 1 as catdcd does 1 based indexing
            mol = Molecule()
            mol.read(traj)
            mol.frame = frameNum
            mol.write(path.join(newDir, 'input.coor'))

            # write nextInput file
            fid.write('# {0} \n{1} {2}\n'.format(newName, traj, frameNum))

        fid.close()
Esempio n. 8
0
def reconstructAdaptiveTraj(simlist, trajID):
    """ Reconstructs a long trajectory out of short adaptive runs.

    Parameters
    ----------
    simlist : numpy.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
        A simulation list generated by the :func:`simlist <htmd.simlist.simlist>` function
    trajID : int
        The id of the trajectory from which to start going back.

    Returns
    -------
    mol : :class:`Molecule <moleculekit.molecule.Molecule>` object
        A Molecule object containing the reconstructed trajectory
    chain : np.ndarray
        The simulation IDs of all simulations involved
    pathlist : np.ndarray of str
        The names of all simulations involved.

    Examples
    --------
    >>> mol, chain, pathlist = reconstructAdaptiveTraj(data.simlist, 52)
    """

    sim = None
    for s in simlist:
        if s.simid == trajID:
            sim = s
            break
    if sim is None:
        raise NameError(
            'Could not find sim with ID {} in the simlist.'.format(trajID))

    pathlist = []
    pathlist.append(sim.trajectory[0])
    chain = []
    chain.append((sim, -1, -1))

    epo = None
    while epo != 1:
        [sim, piece, frame,
         epo] = _findprevioustraj(simlist, _simName(sim.trajectory[0]))
        pathlist.append(sim.trajectory[piece])
        chain.append((sim, piece, frame))
    pathlist = pathlist[::-1]
    chain = chain[::-1]

    mol = Molecule(sim.molfile)
    mol.coords = np.zeros((mol.numAtoms, 3, 0), dtype=np.float32)
    mol.fileloc = []
    mol.box = np.zeros((3, 0))
    for i, c in enumerate(chain):
        tmpmol = Molecule(sim.molfile)
        tmpmol.read(c[0].trajectory)
        endpiece = c[1]
        fileloc = np.vstack(tmpmol.fileloc)
        filenames = fileloc[:, 0]
        pieces = np.unique(filenames)
        firstpieceframe = np.where(filenames == pieces[endpiece])[0][0]
        endFrame = firstpieceframe + c[2]
        if endFrame != -1:
            tmpmol.coords = tmpmol.coords[:, :, 0:endFrame +
                                          1]  # Adding the actual respawned frame (+1) since the respawned sim doesn't include it in the xtc
            tmpmol.fileloc = tmpmol.fileloc[0:endFrame + 1]
            tmpmol.box = tmpmol.box[:, 0:endFrame + 1]
        mol.coords = np.concatenate((mol.coords, tmpmol.coords), axis=2)
        mol.box = np.concatenate((mol.box, tmpmol.box), axis=1)
        mol.fileloc += tmpmol.fileloc
    #mol.fileloc[:, 1] = range(np.size(mol.fileloc, 0))

    return mol, chain, pathlist
Esempio n. 9
0
def reconstructAdaptiveTraj(simlist, trajID):
    """ Reconstructs a long trajectory out of short adaptive runs.

    Parameters
    ----------
    simlist : numpy.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
        A simulation list generated by the :func:`simlist <htmd.simlist.simlist>` function
    trajID : int
        The id of the trajectory from which to start going back.

    Returns
    -------
    mol : :class:`Molecule <htmd.molecule.molecule.Molecule>` object
        A Molecule object containing the reconstructed trajectory
    chain : np.ndarray
        The simulation IDs of all simulations involved
    pathlist : np.ndarray of str
        The names of all simulations involved.

    Examples
    --------
    >>> mol, chain, pathlist = reconstructAdaptiveTraj(data.simlist, 52)
    """

    sim = None
    for s in simlist:
        if s.simid == trajID:
            sim = s
            break
    if sim is None:
        raise NameError('Could not find sim with ID {} in the simlist.'.format(trajID))

    pathlist = []
    pathlist.append(sim.trajectory[0])
    chain = []
    chain.append((sim, -1, -1))

    epo = None
    while epo != 1:
        [sim, piece, frame, epo] = _findprevioustraj(simlist, _simName(sim.trajectory[0]))
        pathlist.append(sim.trajectory[piece])
        chain.append((sim, piece, frame))
    pathlist = pathlist[::-1]
    chain = chain[::-1]

    mol = Molecule(sim.molfile)
    mol.coords = np.zeros((mol.numAtoms, 3, 0), dtype=np.float32)
    mol.fileloc = []
    mol.box = np.zeros((3, 0))
    for i, c in enumerate(chain):
        tmpmol = Molecule(sim.molfile)
        tmpmol.read(c[0].trajectory)
        endpiece = c[1]
        fileloc = np.vstack(tmpmol.fileloc)
        filenames = fileloc[:, 0]
        pieces = np.unique(filenames)
        firstpieceframe = np.where(filenames == pieces[endpiece])[0][0]
        endFrame = firstpieceframe + c[2]
        if endFrame != -1:
            tmpmol.coords = tmpmol.coords[:, :, 0:endFrame + 1]  # Adding the actual respawned frame (+1) since the respawned sim doesn't include it in the xtc
            tmpmol.fileloc = tmpmol.fileloc[0:endFrame + 1]
            tmpmol.box = tmpmol.box[:, 0:endFrame + 1]
        mol.coords = np.concatenate((mol.coords, tmpmol.coords), axis=2)
        mol.box = np.concatenate((mol.box, tmpmol.box), axis=1)
        mol.fileloc += tmpmol.fileloc
    #mol.fileloc[:, 1] = range(np.size(mol.fileloc, 0))

    return mol, chain, pathlist