Example #1
0
 def interpret_sel(s, sel):
     if sel is None:
         return AtomSelection.all(s)
     elif is_string(sel):
         return AtomSelection.from_element(s, sel)
     elif hasattr(sel, '__call__'):
         return sel(s)
Example #2
0
    def put_files(self, localpaths, remotedir):
        """
        Copy files to the remote machine via SFTP.

        | Args:
        |   localpaths (str or [str]): path of file(s) to copy. Can include
        |                              wildcards.
        |   remotedir (str): remote directory to copy the file(s) into.

        """

        if is_string(localpaths):
            localpaths = [localpaths]

        for lpath in localpaths:
            files = glob.glob(lpath)

            for f in files:
                _, fname = os.path.split(f)
                self._sftp.put(f, os.path.join(remotedir, fname), confirm=True)
Example #3
0
    def get_files(self, remotepaths, localdir):
        """
        Download files from the remote machine via SFTP.

        | Args:
        |   remotepaths (str or [str]): path of file(s) to copy. Can include
        |                               wildcards.
        |   localdir (str): local directory to copy the file(s) into.

        """

        if is_string(remotepaths):
            remotepaths = [remotepaths]

        for rpath in remotepaths:
            remotedir, remotefiles = os.path.split(rpath)

            all_files = self._sftp.listdir(remotedir)
            files = fnmatch.filter(all_files, remotefiles)

            for f in files:
                _, fname = os.path.split(f)
                self._sftp.get(os.path.join(remotedir, f),
                               os.path.join(localdir, fname))
Example #4
0
    def load_tree(path, load_format, opt_args={}, safety_check=3):
        """Load a collection's structures from a series of folders, named like
        the structures, inside a given parent folder, as created by save_tree.
        The files can be loaded from a format of choice, or a
        function can be passed that will load them in a custom way.

        | Args:
        |   path (str): folder path in which the collection should be saved.
        |   load_format (str or function): format from which the structures
        |                                  should be loaded.
        |                                  If a string, it will be used as a
        |                                  file extension. If a function, it
        |                                  must take as arguments the load
        |                                  path (a string) and any additional
        |                                  arguments passed as opt_args, and
        |                                  return the loaded structure as an
        |                                  ase.Atoms object.
        |   opt_args(dict): dictionary of additional arguments to pass to
        |                   either ase.io.read (if load_format is a string)
        |                   or to the load_format function.
        |   safety_check (int): how much care should be taken to verify the
        |                       folder that is being loaded. Can be a number
        |                       from 0 to 3.
        |                       Here's the meaning of the codes:
        |
        |                       3 (default): only load a folder if it passes
        |                         fully the check_tree control;
        |                       2: load any folder that has a valid
        |                          .collection file, but only the listed
        |                          subfolders;
        |                       1: load any folder that has a valid
        |                          .collection file, all subfolders. Array
        |                          data will be discarded;
        |                       0: no checks, try to load from all subfolders.

        | Returns:
        |   coll (AtomsCollection): loaded collection

        """

        check = AtomsCollection.check_tree(path)

        if check == -1:
            raise IOError('Folder {0} does not exist'.format(path))

        dirlist = []
        if check < 2:
            with open(os.path.join(path, '.collection'), 'rb') as f:
                coll = pickle.load(f)
            if check == 1 and safety_check == 3:
                raise IOError(('Folder {0} is not a valid collection '
                               'tree').format(path))
            if safety_check >= 2:
                dirlist = coll['dirlist']
            else:
                dirlist = [os.path.relpath(d, path)
                           for d in glob.glob(os.path.join(path, '*')) if
                           os.path.isdir(d)]
        else:
            if safety_check > 0:
                raise IOError(('Folder {0} is not a valid collection '
                               'tree').format(path))
            dirlist = [os.path.relpath(d, path)
                       for d in glob.glob(os.path.join(path, '*')) if
                       os.path.isdir(d)]

        # Format type?
        is_ext = utils.is_string(load_format)
        is_func = hasattr(load_format, '__call__')
        if not (is_ext or is_func):
            raise ValueError('Invalid load_format passed to load_tree')

        structures = []
        for d in dirlist:
            if is_ext:
                s = ase_io.read(os.path.join(path, d, d + '.' + load_format),
                                **opt_args)
            elif is_func:
                s = load_format(os.path.join(path, d), **opt_args)

            structures.append(s)

        if check < 2:
            info = coll['info']
        else:
            info = {}

        loaded_coll = AtomsCollection(structures, info=info)

        if safety_check >= 2:
            arrays = coll['arrays']
            for k, a in arrays.items():
                loaded_coll.set_array(k, a)

        return loaded_coll
Example #5
0
    def save_tree(self, path, save_format, name_root='structure',
                  opt_args={}, safety_check=3):
        """Save the collection's structures as a series of folders, named like
        the structures, inside a given parent folder (that will be created if
        not present). Arrays and info are stored in a pickled .collection file
        which works as metadata for the whole directory tree.
        The files can be saved in a format of choice, or a function can be
        passed that will save them in a custom way. Only one collection can be
        saved per folder.

        | Args:
        |   path (str): folder path in which the collection should be saved.
        |   save_format (str or function): format in which the structures
        |                                  should be saved.
        |                                  If a string, it will be used as a
        |                                  file extension. If a function, it
        |                                  must take as arguments the
        |                                  structure (an ase.Atoms object)
        |                                  the save path (a string), and any
        |                                  additional arguments passed as
        |                                  opt_args, and take care of saving
        |                                  the required files.
        |   name_root (str): name prefix to be used for structures when a name
        |                    is not available in their info dictionary
        |   opt_args (dict): dictionary of additional arguments to pass to
        |                    either ase.io.write (if save_format is a string)
        |                    or to the save_format function.
        |   safety_check (int): how much care should be taken not to overwrite
        |                       potentially important data in path. Can be a
        |                       number from 0 to 3.
        |                       Here's the meaning of the codes:
        |
        |                       3 (default): always ask before overwriting an
        |                         existing folder that passes the check_tree
        |                         control, raise an exception otherwise;
        |                       2: overwite any folder that passes fully the
        |                          check_tree control, raise an exception
        |                          otherwise;
        |                       1: overwrite any folder that passes fully the
        |                          check_tree control, ask for user input
        |                          otherwise;
        |                       0 (DANGER - use at your own risk!): no checks,
        |                         always overwrite path.

        """

        check = AtomsCollection.check_tree(path)

        def ow_ask(path):
            return utils.safe_input(('Folder {0} exists, '
                                     'overwrite (y/n)?').format(path)
                                    ).lower() == 'y'

        if check > -1:
            # The folder exists
            if check == 0:
                if safety_check >= 3:
                    # Ask for permission
                    perm = ow_ask(path)
                else:
                    perm = True
            else:
                if safety_check >= 2:
                    raise IOError(('Trying to overwrite folder {0} which did'
                                   ' not pass check_tree control (result {1})'
                                   ' with safety_check level '
                                   '{2}').format(path,
                                                 check,
                                                 safety_check))
                elif safety_check == 1:
                    perm = ow_ask(path)
                else:
                    perm = True

            if not perm:
                print('Can not overwrite folder {0}, skipping...'.format(path))

            shutil.rmtree(path)

        # Re-create folder
        os.mkdir(path)

        # Format type?
        is_ext = utils.is_string(save_format)
        is_func = hasattr(save_format, '__call__')
        if not (is_ext or is_func):
            raise ValueError('Invalid save_format passed to save_tree')

        dirlist = []
        for i, s in enumerate(self.structures):
            sname = s.info.get('name', '{0}_{1}'.format(name_root, i+1))
            fold = os.path.join(path, sname)
            try:
                os.mkdir(fold)
            except OSError:
                shutil.rmtree(fold)
                os.mkdir(fold)
            if is_ext:
                ase_io.write(os.path.join(fold, sname + '.' + save_format), s,
                             **opt_args)
            elif is_func:
                save_format(s, fold, **opt_args)

            dirlist.append(sname)

        with open(os.path.join(path, '.collection'), 'wb') as f:
            pickle.dump({'dirlist': dirlist,
                         'arrays': self._arrays,
                         'info': self.info}, f,
                        protocol=2)
Example #6
0
    def __init__(self, structures=[],
                 info={},
                 cell_reduce=False,
                 progress=False, suppress_ase_warnings=True):
        """
        Initialize the AtomsCollection

        | Args:
        |    structures (list[str] or list[ase.Atoms]): list of file names or
        |                                               Atoms that will form
        |                                               the collection
        |    info (dict): dictionary of general information to attach
        |                 to this collection
        |    cell_reduce (bool): if True, perform a Niggli cell reduction on
        |                        all loaded structures
        |    progress (bool): visualize a progress bar for the loading process
        |    suppress_ase_warnings (bool): suppress annoying ASE warnings when
        |                                  loading files (default is True)
        """

        # Start by parsing out the structures
        self.structures = []

        if isinstance(structures, ase.Atoms):
            # Well, it's just one...
            structures = [structures]
        elif inspect.isgenerator(structures):
            # Let's unravel it
            iter_structs = structures
            structures = []
            for s in iter_structs:
                structures.append(s)

        if progress:
            sys.stdout.write("Loading collection...\n")
        s_n = len(structures)
        for s_i, struct in enumerate(structures):
            if progress:
                # Progress bar
                sys.stdout.write("\rLoading: {0}".format(utils.progbar(s_i+1,
                                                                       s_n)))
            # Is it an Atoms object?
            if type(struct) is ase.Atoms:
                self.structures.append(ase.Atoms(struct))
                # Copy all arrays
                for k in struct.arrays.keys():
                    if not self.structures[-1].has(k):
                        self.structures[-1].new_array(k, struct.get_array(k))
                if struct.calc is not None:
                    # Prevents pointless attempts at re-calculating
                    self.structures[-1].calc._old_atoms = self.structures[-1]
            # Or is it a string?
            elif utils.is_string(struct):
                with utils.silence_stdio(suppress_ase_warnings,
                                         suppress_ase_warnings):
                    self.structures.append(ase_io.read(str(struct)))
                # If there's no name, give it the filename
                if 'name' not in self.structures[-1].info:
                    self.structures[-1].info['name'] = utils.seedname(struct)
            else:
                raise TypeError('Structures must be Atoms objects or valid '
                                'file names,'
                                ' not {0}'.format(type(struct).__name__))
            if cell_reduce:
                # Here we must keep the energy if it was present
                # We do this by hand because ASE has its good reasons
                # for severing the atoms-calculator connection when changing
                # the unit cell.
                try:
                    _E = self.structures[-1].calc.results['energy']
                except (KeyError, AttributeError):
                    _E = None
                niggli_reduce(self.structures[-1])
                if _E is not None:
                    _calc = SinglePointCalculator(self.structures[-1],
                                                  energy=_E)
                    self.structures[-1].set_calculator(_calc)

        if progress:
            sys.stdout.write('\nLoaded {0} structures\n'.format(s_n))

        self._all = _AllCaller(self.structures, ase.Atoms)

        self._arrays = {}

        # Now assign the info
        if type(info) is not dict:
            raise TypeError('Info must be dict,'
                            ' not {0}'.format(type(info).__name__))
        else:
            self.info = info.copy()
Example #7
0
    def __init__(self,
                 name,
                 queue,
                 submit_script,
                 max_jobs=4,
                 check_time=10,
                 max_time=3600,
                 temp_folder=None,
                 remote_workdir=None,
                 remote_getfiles=['*.*'],
                 ssh_timeout=1.0,
                 continuation=False):
        """Initialize the Submitter object

        | Args:
        |   name (str): name to be used for this Submitter (two Submitters
        |               with the same name can't be launched in the same
        |               working directory)
        |   queue (QueueInterface): object describing the properties of the
        |                           interface to the queue system in use
        |   submit_script (str): text of the script to use when submitting a
        |                        job to the queue. All tags of the form <name>
        |                        will be replaced with the job's name, and all
        |                        similar tags of the form <[arg]> will be
        |                        replaced if the argument name is present in
        |                        the job's args dictionary
        |   max_jobs (Optional[int]): maximum number of jobs to submit at a
        |                             given time. Default is 4
        |   check_time (Optional[float]): time in seconds between consecutive
        |                                 checks for the queue status and
        |                                 attempts to submit new jobs. Default
        |                                 is 10
        |   max_time (Optional[float]): time in seconds the Submitter will run
        |                               for before shutting down. If set to
        |                               zero the thread won't stop until
        |                               killed with Submitter.stop.
        |   temp_folder (Optional[str]): where to store the temporary folders
        |                                for the calculations. By default it's
        |                                the current folder.
        |   remote_workdir (Optional[str]): if present, uses a directory on a
        |                                   remote machine by logging in via
        |                                   SSH. Must be in the format
        |                                   <host>:<path/to/directory>.
        |                                   Host must be defined in the user's
        |                                   ~/.ssh/config file - check the
        |                                   docs for RemoteTarget for more
        |                                   information. It is possible to
        |                                   omit the colon and directory, that
        |                                   will use the home directory of the
        |                                   given folder; that is HEAVILY
        |                                   DISCOURAGED though. Best practice
        |                                   would be to create an empty
        |                                   directory on the remote machine
        |                                   and use that, to avoid accidental
        |                                   overwriting/deleting of important
        |                                   files.
        |   remote_getfiles (Optional[list(str)]): list of files to be
        |                                          downloaded from the remote
        |                                          copy of the job's temporary
        |                                          directory. By default, all
        |                                          of them. Can be a list
        |                                          using specific names,
        |                                          wildcards etc. Filenames
        |                                          can also use the
        |                                          placeholder {name} to
        |                                          signify the job name, as
        |                                          well as any other element
        |                                          from the arguments.
        |   ssh_timeout (Optional[float]): connection timeout in seconds
        |                                  (default is 1 second)
        |   continuation (Optional[bool]): if True, when the Submitter is
        |                                  stopped it will not terminate the
        |                                  current jobs; rather, it will store
        |                                  the list in a pickle file.
        |                                  If the submitter is ran from the
        |                                  same folder then it will "pick up
        |                                  from where it left" and try
        |                                  recovering those jobs, then
        |                                  restart. If one wishes for
        |                                  additional values to be saved and
        |                                  restored, the save_state and
        |                                  load_state methods need to be
        |                                  defined.

        """

        # Check type
        if not isinstance(queue, QueueInterface):
            raise TypeError('A QueueInterface must be passed to the '
                            'Submitter')

        if not is_string(submit_script):
            raise TypeError('submit_script must be a string')

        self.name = name
        self.queue = queue
        self.submit_script = submit_script
        self.max_jobs = max_jobs
        self.check_time = check_time
        self.max_time = max_time if max_time > 0 else np.inf
        self.tmp_dir = (os.path.abspath(temp_folder)
                        if temp_folder is not None else '')

        # User defined signals
        self._free_signals = [
            signal.__dict__[s] for s in ('SIGUSR1', 'SIGUSR2')
            if s in signal.__dict__
        ]
        self._user_signals = {}

        self._log = None  # Will keep track of failed jobs etc.

        # Remote directory?
        if remote_workdir is None:
            self.host = None
        else:
            if ':' in remote_workdir:
                self.host, self.hostdir = remote_workdir.split(':', 1)
            else:
                self.host = remote_workdir
                self.hostdir = ''

        self.remote_getfiles = remote_getfiles

        self.queue.set_remote_host(self.host, ssh_timeout)

        self.continuation = continuation
Example #8
0
    def extract(s, vdw_set, vdw_scale, default_vdw, species_1, species_2,
                max_coord):

        elems = np.array(s.get_chemical_symbols())

        # Get the bonds
        bond_calc = Bonds({
            'vdw_set': vdw_set,
            'vdw_scale': vdw_scale,
            'default_vdw': default_vdw
        })
        bonds = bond_calc(s)
        # What if there are none?
        if len(bonds) == 0:
            # Just return
            print('WARNING: no bonds detected for CoordinationHistogram')
            return hist
        bond_inds = np.concatenate(list(zip(*bonds))[:2])
        bond_elems = elems[bond_inds]
        bN = len(bonds)

        if species_1 is None:
            species_1 = np.unique(elems)
        elif is_string(species_1):
            species_1 = np.array([species_1])

        if species_2 is None:
            species_2 = np.unique(elems)
        elif is_string(species_2):
            species_2 = np.array([species_2])

        # Initialise the histogram
        hist = {
            s1: {s2: np.zeros(max_coord + 1)
                 for s2 in species_2}
            for s1 in species_1
        }

        for s1 in species_1:
            # Which atoms are of species 1, and what are they bonded to?
            i1 = np.where(bond_elems == s1)[0]
            b1 = bond_inds[i1]
            be1 = bond_elems[(i1 - bN).astype(int)]
            for s2 in species_2:
                # Which ones are bonded to species 2?
                i2 = np.where(be1 == s2)
                b2 = b1[i2]
                b2, counts = np.unique(b2, return_counts=True)
                hist_i, hist_n = np.unique(counts, return_counts=True)
                # Fix for numbers that are too high...
                hist_big = np.where(hist_i > max_coord)[0]
                if (len(hist_big) > 0):
                    # In this case find the max_coord index, if absent add it
                    hist_maxc = np.where(hist_i == max_coord)[0]
                    if len(hist_maxc) == 0:
                        hist_i = np.concatenate([hist_i, [max_coord]])
                        hist_n = np.concatenate([hist_n, [0]])
                        hist_maxc = [-1]
                    hist_n[hist_maxc] += np.sum(hist_n[hist_big])
                    # Then slice away, keep only the admissible indices
                    hist_small = np.where(hist_i <= max_coord)[0]
                    hist_i = hist_i[hist_small]
                    hist_n = hist_n[hist_small]
                hist[s1][s2][hist_i] += hist_n

        return hist