def interpret_sel(s, sel): if sel is None: return AtomSelection.all(s) elif is_string(sel): return AtomSelection.from_element(s, sel) elif hasattr(sel, '__call__'): return sel(s)
def put_files(self, localpaths, remotedir): """ Copy files to the remote machine via SFTP. | Args: | localpaths (str or [str]): path of file(s) to copy. Can include | wildcards. | remotedir (str): remote directory to copy the file(s) into. """ if is_string(localpaths): localpaths = [localpaths] for lpath in localpaths: files = glob.glob(lpath) for f in files: _, fname = os.path.split(f) self._sftp.put(f, os.path.join(remotedir, fname), confirm=True)
def get_files(self, remotepaths, localdir): """ Download files from the remote machine via SFTP. | Args: | remotepaths (str or [str]): path of file(s) to copy. Can include | wildcards. | localdir (str): local directory to copy the file(s) into. """ if is_string(remotepaths): remotepaths = [remotepaths] for rpath in remotepaths: remotedir, remotefiles = os.path.split(rpath) all_files = self._sftp.listdir(remotedir) files = fnmatch.filter(all_files, remotefiles) for f in files: _, fname = os.path.split(f) self._sftp.get(os.path.join(remotedir, f), os.path.join(localdir, fname))
def load_tree(path, load_format, opt_args={}, safety_check=3): """Load a collection's structures from a series of folders, named like the structures, inside a given parent folder, as created by save_tree. The files can be loaded from a format of choice, or a function can be passed that will load them in a custom way. | Args: | path (str): folder path in which the collection should be saved. | load_format (str or function): format from which the structures | should be loaded. | If a string, it will be used as a | file extension. If a function, it | must take as arguments the load | path (a string) and any additional | arguments passed as opt_args, and | return the loaded structure as an | ase.Atoms object. | opt_args(dict): dictionary of additional arguments to pass to | either ase.io.read (if load_format is a string) | or to the load_format function. | safety_check (int): how much care should be taken to verify the | folder that is being loaded. Can be a number | from 0 to 3. | Here's the meaning of the codes: | | 3 (default): only load a folder if it passes | fully the check_tree control; | 2: load any folder that has a valid | .collection file, but only the listed | subfolders; | 1: load any folder that has a valid | .collection file, all subfolders. Array | data will be discarded; | 0: no checks, try to load from all subfolders. | Returns: | coll (AtomsCollection): loaded collection """ check = AtomsCollection.check_tree(path) if check == -1: raise IOError('Folder {0} does not exist'.format(path)) dirlist = [] if check < 2: with open(os.path.join(path, '.collection'), 'rb') as f: coll = pickle.load(f) if check == 1 and safety_check == 3: raise IOError(('Folder {0} is not a valid collection ' 'tree').format(path)) if safety_check >= 2: dirlist = coll['dirlist'] else: dirlist = [os.path.relpath(d, path) for d in glob.glob(os.path.join(path, '*')) if os.path.isdir(d)] else: if safety_check > 0: raise IOError(('Folder {0} is not a valid collection ' 'tree').format(path)) dirlist = [os.path.relpath(d, path) for d in glob.glob(os.path.join(path, '*')) if os.path.isdir(d)] # Format type? is_ext = utils.is_string(load_format) is_func = hasattr(load_format, '__call__') if not (is_ext or is_func): raise ValueError('Invalid load_format passed to load_tree') structures = [] for d in dirlist: if is_ext: s = ase_io.read(os.path.join(path, d, d + '.' + load_format), **opt_args) elif is_func: s = load_format(os.path.join(path, d), **opt_args) structures.append(s) if check < 2: info = coll['info'] else: info = {} loaded_coll = AtomsCollection(structures, info=info) if safety_check >= 2: arrays = coll['arrays'] for k, a in arrays.items(): loaded_coll.set_array(k, a) return loaded_coll
def save_tree(self, path, save_format, name_root='structure', opt_args={}, safety_check=3): """Save the collection's structures as a series of folders, named like the structures, inside a given parent folder (that will be created if not present). Arrays and info are stored in a pickled .collection file which works as metadata for the whole directory tree. The files can be saved in a format of choice, or a function can be passed that will save them in a custom way. Only one collection can be saved per folder. | Args: | path (str): folder path in which the collection should be saved. | save_format (str or function): format in which the structures | should be saved. | If a string, it will be used as a | file extension. If a function, it | must take as arguments the | structure (an ase.Atoms object) | the save path (a string), and any | additional arguments passed as | opt_args, and take care of saving | the required files. | name_root (str): name prefix to be used for structures when a name | is not available in their info dictionary | opt_args (dict): dictionary of additional arguments to pass to | either ase.io.write (if save_format is a string) | or to the save_format function. | safety_check (int): how much care should be taken not to overwrite | potentially important data in path. Can be a | number from 0 to 3. | Here's the meaning of the codes: | | 3 (default): always ask before overwriting an | existing folder that passes the check_tree | control, raise an exception otherwise; | 2: overwite any folder that passes fully the | check_tree control, raise an exception | otherwise; | 1: overwrite any folder that passes fully the | check_tree control, ask for user input | otherwise; | 0 (DANGER - use at your own risk!): no checks, | always overwrite path. """ check = AtomsCollection.check_tree(path) def ow_ask(path): return utils.safe_input(('Folder {0} exists, ' 'overwrite (y/n)?').format(path) ).lower() == 'y' if check > -1: # The folder exists if check == 0: if safety_check >= 3: # Ask for permission perm = ow_ask(path) else: perm = True else: if safety_check >= 2: raise IOError(('Trying to overwrite folder {0} which did' ' not pass check_tree control (result {1})' ' with safety_check level ' '{2}').format(path, check, safety_check)) elif safety_check == 1: perm = ow_ask(path) else: perm = True if not perm: print('Can not overwrite folder {0}, skipping...'.format(path)) shutil.rmtree(path) # Re-create folder os.mkdir(path) # Format type? is_ext = utils.is_string(save_format) is_func = hasattr(save_format, '__call__') if not (is_ext or is_func): raise ValueError('Invalid save_format passed to save_tree') dirlist = [] for i, s in enumerate(self.structures): sname = s.info.get('name', '{0}_{1}'.format(name_root, i+1)) fold = os.path.join(path, sname) try: os.mkdir(fold) except OSError: shutil.rmtree(fold) os.mkdir(fold) if is_ext: ase_io.write(os.path.join(fold, sname + '.' + save_format), s, **opt_args) elif is_func: save_format(s, fold, **opt_args) dirlist.append(sname) with open(os.path.join(path, '.collection'), 'wb') as f: pickle.dump({'dirlist': dirlist, 'arrays': self._arrays, 'info': self.info}, f, protocol=2)
def __init__(self, structures=[], info={}, cell_reduce=False, progress=False, suppress_ase_warnings=True): """ Initialize the AtomsCollection | Args: | structures (list[str] or list[ase.Atoms]): list of file names or | Atoms that will form | the collection | info (dict): dictionary of general information to attach | to this collection | cell_reduce (bool): if True, perform a Niggli cell reduction on | all loaded structures | progress (bool): visualize a progress bar for the loading process | suppress_ase_warnings (bool): suppress annoying ASE warnings when | loading files (default is True) """ # Start by parsing out the structures self.structures = [] if isinstance(structures, ase.Atoms): # Well, it's just one... structures = [structures] elif inspect.isgenerator(structures): # Let's unravel it iter_structs = structures structures = [] for s in iter_structs: structures.append(s) if progress: sys.stdout.write("Loading collection...\n") s_n = len(structures) for s_i, struct in enumerate(structures): if progress: # Progress bar sys.stdout.write("\rLoading: {0}".format(utils.progbar(s_i+1, s_n))) # Is it an Atoms object? if type(struct) is ase.Atoms: self.structures.append(ase.Atoms(struct)) # Copy all arrays for k in struct.arrays.keys(): if not self.structures[-1].has(k): self.structures[-1].new_array(k, struct.get_array(k)) if struct.calc is not None: # Prevents pointless attempts at re-calculating self.structures[-1].calc._old_atoms = self.structures[-1] # Or is it a string? elif utils.is_string(struct): with utils.silence_stdio(suppress_ase_warnings, suppress_ase_warnings): self.structures.append(ase_io.read(str(struct))) # If there's no name, give it the filename if 'name' not in self.structures[-1].info: self.structures[-1].info['name'] = utils.seedname(struct) else: raise TypeError('Structures must be Atoms objects or valid ' 'file names,' ' not {0}'.format(type(struct).__name__)) if cell_reduce: # Here we must keep the energy if it was present # We do this by hand because ASE has its good reasons # for severing the atoms-calculator connection when changing # the unit cell. try: _E = self.structures[-1].calc.results['energy'] except (KeyError, AttributeError): _E = None niggli_reduce(self.structures[-1]) if _E is not None: _calc = SinglePointCalculator(self.structures[-1], energy=_E) self.structures[-1].set_calculator(_calc) if progress: sys.stdout.write('\nLoaded {0} structures\n'.format(s_n)) self._all = _AllCaller(self.structures, ase.Atoms) self._arrays = {} # Now assign the info if type(info) is not dict: raise TypeError('Info must be dict,' ' not {0}'.format(type(info).__name__)) else: self.info = info.copy()
def __init__(self, name, queue, submit_script, max_jobs=4, check_time=10, max_time=3600, temp_folder=None, remote_workdir=None, remote_getfiles=['*.*'], ssh_timeout=1.0, continuation=False): """Initialize the Submitter object | Args: | name (str): name to be used for this Submitter (two Submitters | with the same name can't be launched in the same | working directory) | queue (QueueInterface): object describing the properties of the | interface to the queue system in use | submit_script (str): text of the script to use when submitting a | job to the queue. All tags of the form <name> | will be replaced with the job's name, and all | similar tags of the form <[arg]> will be | replaced if the argument name is present in | the job's args dictionary | max_jobs (Optional[int]): maximum number of jobs to submit at a | given time. Default is 4 | check_time (Optional[float]): time in seconds between consecutive | checks for the queue status and | attempts to submit new jobs. Default | is 10 | max_time (Optional[float]): time in seconds the Submitter will run | for before shutting down. If set to | zero the thread won't stop until | killed with Submitter.stop. | temp_folder (Optional[str]): where to store the temporary folders | for the calculations. By default it's | the current folder. | remote_workdir (Optional[str]): if present, uses a directory on a | remote machine by logging in via | SSH. Must be in the format | <host>:<path/to/directory>. | Host must be defined in the user's | ~/.ssh/config file - check the | docs for RemoteTarget for more | information. It is possible to | omit the colon and directory, that | will use the home directory of the | given folder; that is HEAVILY | DISCOURAGED though. Best practice | would be to create an empty | directory on the remote machine | and use that, to avoid accidental | overwriting/deleting of important | files. | remote_getfiles (Optional[list(str)]): list of files to be | downloaded from the remote | copy of the job's temporary | directory. By default, all | of them. Can be a list | using specific names, | wildcards etc. Filenames | can also use the | placeholder {name} to | signify the job name, as | well as any other element | from the arguments. | ssh_timeout (Optional[float]): connection timeout in seconds | (default is 1 second) | continuation (Optional[bool]): if True, when the Submitter is | stopped it will not terminate the | current jobs; rather, it will store | the list in a pickle file. | If the submitter is ran from the | same folder then it will "pick up | from where it left" and try | recovering those jobs, then | restart. If one wishes for | additional values to be saved and | restored, the save_state and | load_state methods need to be | defined. """ # Check type if not isinstance(queue, QueueInterface): raise TypeError('A QueueInterface must be passed to the ' 'Submitter') if not is_string(submit_script): raise TypeError('submit_script must be a string') self.name = name self.queue = queue self.submit_script = submit_script self.max_jobs = max_jobs self.check_time = check_time self.max_time = max_time if max_time > 0 else np.inf self.tmp_dir = (os.path.abspath(temp_folder) if temp_folder is not None else '') # User defined signals self._free_signals = [ signal.__dict__[s] for s in ('SIGUSR1', 'SIGUSR2') if s in signal.__dict__ ] self._user_signals = {} self._log = None # Will keep track of failed jobs etc. # Remote directory? if remote_workdir is None: self.host = None else: if ':' in remote_workdir: self.host, self.hostdir = remote_workdir.split(':', 1) else: self.host = remote_workdir self.hostdir = '' self.remote_getfiles = remote_getfiles self.queue.set_remote_host(self.host, ssh_timeout) self.continuation = continuation
def extract(s, vdw_set, vdw_scale, default_vdw, species_1, species_2, max_coord): elems = np.array(s.get_chemical_symbols()) # Get the bonds bond_calc = Bonds({ 'vdw_set': vdw_set, 'vdw_scale': vdw_scale, 'default_vdw': default_vdw }) bonds = bond_calc(s) # What if there are none? if len(bonds) == 0: # Just return print('WARNING: no bonds detected for CoordinationHistogram') return hist bond_inds = np.concatenate(list(zip(*bonds))[:2]) bond_elems = elems[bond_inds] bN = len(bonds) if species_1 is None: species_1 = np.unique(elems) elif is_string(species_1): species_1 = np.array([species_1]) if species_2 is None: species_2 = np.unique(elems) elif is_string(species_2): species_2 = np.array([species_2]) # Initialise the histogram hist = { s1: {s2: np.zeros(max_coord + 1) for s2 in species_2} for s1 in species_1 } for s1 in species_1: # Which atoms are of species 1, and what are they bonded to? i1 = np.where(bond_elems == s1)[0] b1 = bond_inds[i1] be1 = bond_elems[(i1 - bN).astype(int)] for s2 in species_2: # Which ones are bonded to species 2? i2 = np.where(be1 == s2) b2 = b1[i2] b2, counts = np.unique(b2, return_counts=True) hist_i, hist_n = np.unique(counts, return_counts=True) # Fix for numbers that are too high... hist_big = np.where(hist_i > max_coord)[0] if (len(hist_big) > 0): # In this case find the max_coord index, if absent add it hist_maxc = np.where(hist_i == max_coord)[0] if len(hist_maxc) == 0: hist_i = np.concatenate([hist_i, [max_coord]]) hist_n = np.concatenate([hist_n, [0]]) hist_maxc = [-1] hist_n[hist_maxc] += np.sum(hist_n[hist_big]) # Then slice away, keep only the admissible indices hist_small = np.where(hist_i <= max_coord)[0] hist_i = hist_i[hist_small] hist_n = hist_n[hist_small] hist[s1][s2][hist_i] += hist_n return hist