def main(*args): #Read in input script terms run_directory, lib_directory = __initial_setup(*args) for simmy in glob.iglob(os.path.join(lib_directory, '*', '*', '*', '*', '*.tar.gz')): try: tarry = tarfile.open(simmy, 'r:gz') tarry.extractall(run_directory) tarry.close() os.remove(simmy) except: print 'Failed to extract', simmy tarry.close() for biddy in glob.iglob(os.path.join(run_directory, '*', '*.bid')): os.remove(biddy) for result in glob.iglob(os.path.join(run_directory, '*', 'results.json')): os.remove(result) for record in glob.iglob(os.path.join(run_directory, '*', '*.json')): with open(record) as f: model = DM(f) key = model.keys()[0] try: if model[key]['status'] == 'error': model[key]['status'] = 'not calculated' del(model[key]['error']) with open(record, 'w') as f: model.json(fp=f, indent=4) except: pass for record in glob.iglob(os.path.join(lib_directory, '*', '*', '*', '*', '*.json')): with open(record) as f: model = DM(f) key = model.keys()[0] try: if model[key]['status'] == 'error': model[key]['status'] = 'not calculated' del(model[key]['error']) with open(record, 'w') as f: model.json(fp=f, indent=4) except: pass
def runner(dbase, run_directory, orphan_directory=None, hold_directory=None): """ High-throughput calculation runner. Parameters ---------- dbase : iprPy.Database The database to interact with. run_directory : str The path to the directory where the calculation instances to run are located. orphan_directory : str, optional The path for the orphan directory where incomplete calculations are moved. If None (default) then will use 'orphan' at the same level as the run_directory. hold_directory : str, optional The path for the hold directory where tar archives that failed to be uploaded are moved to. If None (default) then will use 'hold' at the same level as the run_directory. """ # Get path to Python executable running this script py_exe = sys.executable if py_exe is None: py_exe = 'python' # Get absolute path to run_directory run_directory = os.path.abspath(run_directory) # Get original working directory original_dir = os.getcwd() # Define runner log file d = datetime.datetime.now() pid = os.getpid() runner_log_dir = os.path.join(os.path.dirname(rootdir), 'runner-logs') if not os.path.isdir(runner_log_dir): os.makedirs(runner_log_dir) log_file = os.path.join(runner_log_dir, '%04i-%02i-%02i-%02i-%02i-%06i-%i.log' % (d.year, d.month, d.day, d.minute, d.second, d.microsecond, pid)) # Set default orphan_directory if orphan_directory is None: orphan_directory = os.path.join(os.path.dirname(run_directory), 'orphan') # Set default orphan_directory if hold_directory is None: hold_directory = os.path.join(os.path.dirname(run_directory), 'hold') # Start runner log file with open(log_file, 'a') as log: # Change to the run directory os.chdir(run_directory) # Initialize bidfailcount counter bidfailcount = 0 # Announce the runner's pid print(f'Runner started with pid {pid}', flush=True) # flist is the running list of calculations flist = os.listdir(run_directory) while len(flist) > 0: # Pick a random calculation from the list index = random.randint(0, len(flist)-1) sim = flist[index] # Submit a bid and check if it succeeded if bid(sim): # Reset bidfailcount bidfailcount = 0 # Move to simulation directory os.chdir(sim) log.write('%s\n' % sim) # Check that the calculation has calc_*.py, calc_*.in and # record in the database try: record = dbase.get_record(name=sim) calc_py = get_file('calc_*.py') calc_in = get_file('calc_*.in') # Pass ConnectionErrors forward killing runner except requests.ConnectionError as e: raise requests.ConnectionError(e) # If not complete, zip and move to the orphan directory except: log.write('Incomplete simulation: moved to orphan directory\n\n') os.chdir(run_directory) if not os.path.isdir(orphan_directory): os.makedirs(orphan_directory) shutil.make_archive(os.path.join(orphan_directory, sim), 'gztar', root_dir=run_directory, base_dir=sim) removecalc(os.path.join(run_directory, sim)) flist = os.listdir(run_directory) continue # Check if any files in the calculation folder are incomplete # records error_flag = False ready_flag = True for fname in glob.iglob('*'): parent_sim, ext = os.path.splitext(os.path.basename(fname)) if ext in ('.json', '.xml'): parent = DM(fname) try: status = parent.find('status') # Check parent record in database to see if it has completed if status == 'not calculated': parent_record = dbase.get_record(name=parent_sim) try: status = parent_record.content.find('status') # Mark flag if still incomplete if status == 'not calculated': ready_flag = False break # Skip if parent calculation failed elif status == 'error': with open(os.path.basename(fname), 'w') as f: parent_record.content.json(fp=f, indent=4) error_flag = True error_message = 'parent calculation issued an error' break # Ignore if unknown status else: raise ValueError('unknown status') # Copy parent record to calculation folder if it is now complete except: with open(os.path.basename(fname), 'w') as f: parent_record.content.json(fp=f, indent=4) log.write('parent %s copied to sim folder\n' % parent_sim) # skip if parent calculation failed elif status == 'error': error_flag = True error_message = 'parent calculation issued an error' break except: continue # Handle calculations that have unfinished parents if not ready_flag: bid_files = glob.glob('*.bid') os.chdir(run_directory) for bid_file in bid_files: os.remove(os.path.join(sim, bid_file)) flist = [parent_sim] log.write('parent %s not ready\n\n' % parent_sim) continue # Run the calculation try: assert not error_flag, error_message run = subprocess.Popen([py_exe, calc_py, calc_in, sim], stderr=subprocess.PIPE) error_message = run.stderr.read() # Load results.json try: model = DM('results.json') # Throw errors if no results.json except: error_flag = True assert not error_flag, error_message log.write('sim calculated successfully\n') # Catch any errors and build results.json except: model = record.content keys = list(model.keys()) record_type = keys[0] model[record_type]['status'] = 'error' model[record_type]['error'] = str(sys.exc_info()[1]) with open('results.json', 'w') as f: model.json(fp=f, indent=4) log.write('error: %s\n' % model[record_type]['error']) # Update record tries = 0 while tries < 10: try: dbase.update_record(content=model, name=sim) break except: tries += 1 if tries == 10: os.chdir(run_directory) log.write('failed to update record\n') else: # Archive calculation and add to database or hold_directory try: dbase.add_tar(root_dir=run_directory, name=sim) except: log.write('failed to upload archive\n') if not os.path.isdir(hold_directory): os.makedirs(hold_directory) shutil.move(sim+'.tar.gz', hold_directory) os.chdir(run_directory) removecalc(os.path.join(run_directory, sim)) log.write('\n') # Else if bid(sim) failed else: bidfailcount += 1 # Stop unproductive worker after 10 consecutive bid fails if bidfailcount > 10: print("Didn't find an open simulation", flush=True) break # Pause for 10 seconds before trying again time.sleep(10) # Regenerate flist and flush log file flist = os.listdir(run_directory) log.flush() os.fsync(log.fileno()) print('No simulations left to run', flush=True) os.chdir(original_dir)
def dump(system, **kwargs): """ Return a DataModelDict 'cell' representation of the system. Parameters ---------- system : atomman.System The system to generate the data model for. f : str or file-like object, optional File path or file-like object to write the content to. If not given, then the content is returned as a DataModelDict. format : str, optional File format 'xml' or 'json' to save if f is given. If not given, will be inferred from f if f is a filename, or taken as 'json'. indent : int, optional Indentation option to use for XML/JSON content if f is given. box_unit : str, optional Length unit to use for the box. Default value is 'angstrom'. symbols : list, optional list of atom-model symbols corresponding to the atom types. If not given, will use system.symbols. elements : list, optional list of element tags corresponding to the atom types. prop_units : dict, optional dictionary where the keys are the property keys to include, and the values are units to use. If not given, only the positions in scaled units are included. a_std : float, optional Standard deviation of a lattice constant to include if available. b_std : float, optional Standard deviation of b lattice constant to include if available. c_std : float, optional Standard deviation of c lattice constant to include if available. Returns ------- DataModelDict A 'cell' data model of the system. """ # Set default values box_unit = kwargs.get('box_unit', 'angstrom') symbols = kwargs.get('symbols', system.symbols) if isinstance(symbols, stringtype): symbols = [symbols] assert len(symbols) == system.natypes, 'Number of symbols does not match number of atom types' elements = kwargs.get('elements', [None for i in range(system.natypes)]) if not isinstance(elements, list): elements = [elements] assert len(elements) == system.natypes, 'Number of elements does not match number of atom types' prop_units = kwargs.get('prop_units', {}) if 'pos' not in prop_units: prop_units['pos'] = 'scaled' # Extract system values a = system.box.a b = system.box.b c = system.box.c alpha = system.box.alpha beta = system.box.beta gamma = system.box.gamma # Check for box standard deviations if 'a_std' in kwargs and 'b_std' in kwargs and 'c_std' in kwargs: errors = True a_std = kwargs['a_std'] b_std = kwargs['b_std'] c_std = kwargs['c_std'] else: errors = False a_std = None b_std = None c_std = None # Initialize DataModelDict model = DM() model['cell'] = cell = DM() # Test crystal family c_family = identifyfamily(system.box) if c_family is None: c_family = 'triclinic' cell[c_family] = DM() if c_family == 'cubic': a_ave = (a + b + c) / 3 if errors is True: a_std_ave = (a_std + b_std + c_std) / 3 else: a_std_ave = None cell[c_family]['a'] = uc.model(a_ave, box_unit, error=a_std_ave) elif c_family == 'tetragonal': a_ave = (a + b) / 2 if errors is True: a_std_ave = (a_std + b_std) / 2 else: a_std_ave = None cell[c_family]['a'] = uc.model(a_ave, box_unit, error=a_std_ave) cell[c_family]['c'] = uc.model(c, box_unit, error=c_std) elif c_family == 'orthorhombic': cell[c_family]['a'] = uc.model(a, box_unit, error=a_std) cell[c_family]['b'] = uc.model(b, box_unit, error=b_std) cell[c_family]['c'] = uc.model(c, box_unit, error=c_std) elif c_family == 'hexagonal': a_ave = (a + b) / 2 if errors is True: a_std_ave = (a_std + b_std) / 2 else: a_std_ave = None cell[c_family]['a'] = uc.model(a_ave, box_unit, error=a_std_ave) cell[c_family]['c'] = uc.model(c, box_unit, error=c_std) elif c_family == 'rhombohedral': a_ave = (a + b + c) / 3 alpha_ave = (alpha + beta + gamma) / 3 if errors is True: a_std_ave = (a_std + b_std + c_std) / 3 else: a_std_ave = None cell[c_family]['a'] = uc.model(a_ave, box_unit, error=a_std_ave) cell[c_family]['alpha'] = alpha_ave elif c_family == 'monoclinic': cell[c_family]['a'] = uc.model(a, box_unit, error=a_std) cell[c_family]['b'] = uc.model(b, box_unit, error=b_std) cell[c_family]['c'] = uc.model(c, box_unit, error=c_std) cell[c_family]['beta'] = beta elif c_family == 'triclinic': cell[c_family]['a'] = uc.model(a, box_unit, error=a_std) cell[c_family]['b'] = uc.model(b, box_unit, error=b_std) cell[c_family]['c'] = uc.model(c, box_unit, error=c_std) cell[c_family]['alpha'] = alpha cell[c_family]['beta'] = beta cell[c_family]['gamma'] = gamma else: raise ValueError('Unknown crystal family') atype = system.atoms.atype aindex = atype - 1 # Build list of atoms and per-atom properties for i in range(system.natoms): atom = DM() atom['component'] = int(atype[i]) symbol = symbols[aindex[i]] if symbol is not None: atom['symbol'] = symbol element = elements[aindex[i]] if element is not None: atom['element'] = element atom['position'] = DM() if prop_units['pos'] == 'scaled': atom['position']['value'] = list(system.atoms_prop(a_id=i, key='pos', scale=True)) else: atom['position']['value'] = list(uc.get_in_units(system.atoms.pos[i], prop_units['pos'])) atom['position']['unit'] = prop_units['pos'] for key, unit in iteritems(prop_units): if key != 'pos' and key != 'atype': value = system.atoms.view[key][i] prop = DM() prop['name'] = key prop.update(uc.model(value, unit)) atom.append('property', prop) model.append('atom', atom) model = DM([('atomic-system', model)]) # Return DataModelDict or str if 'f' not in kwargs: if 'format' not in kwargs: return model elif format.lower() == 'xml': return model.xml(indent=indent) elif format.lower() == 'json': return model.json(indent=indent) # Write to file else: f = kwargs['f'] if 'format' not in kwargs: try: format = os.path.splitext(f)[1][1:] except: format = 'json' else: format = kwargs['format'] if hasattr(f, 'write'): if format.lower() == 'xml': return model.xml(fp=f, indent=indent) elif format.lower() == 'json': return model.json(fp=f, indent=indent) else: with open(f, 'w') as fp: if format.lower() == 'xml': return model.xml(fp=fp, indent=indent) elif format.lower() == 'json': return model.json(fp=fp, indent=indent) return
def main(): to_run_dir = 'C:/users/lmh1/Documents/iprPy_run/to_run' xml_dir = 'C:/users/lmh1/Documents/iprPy_run/xml_library' orphan_dir = os.path.join(xml_dir, 'orphan') os.chdir(to_run_dir) flist = os.listdir(to_run_dir) while len(flist) > 0: index = random.randint(0, len(flist)-1) sim = flist[index] if bid(sim): os.chdir(sim) try: calc_py = None calc_in = None calc_name = None pot_name = None #find calc_*.py and calc_*.in files for fname in os.listdir(os.getcwd()): if fname[:5] == 'calc_': if fname[-3:] == '.py': if calc_py is None: calc_py = fname calc_name = fname[5:-3] else: raise ValueError('folder has multiple calc_*.py scripts') elif fname[-3:] == '.in': if calc_in is None: calc_in = fname else: raise ValueError('folder has multiple calc_*.in scripts') elif fname[-5:] == '.json' or fname[-4:] == '.xml': try: with open(fname) as f: test = DataModelDict(f) pot_name = test.find('LAMMPS-potential')['potential']['id'] except: pass assert pot_name is not None, 'LAMMPS-potential data model not found' assert calc_py is not None, 'calc_*.py script not found' assert calc_py is not None, 'calc_*.in script not found' except: print sim, sys.exc_info()[1] os.chdir(to_run_dir) if not os.path.isdir(orphan_dir): os.makedirs(orphan_dir) shutil.make_archive(os.path.join(orphan_dir, sim), 'gztar', root_dir=to_run_dir, base_dir=sim) shutil.rmtree(os.path.join(to_run_dir, sim)) flist = os.listdir(to_run_dir) continue pot_xml_dir = os.path.join(xml_dir, pot_name, calc_name, 'standard') try: run = subprocess.Popen(['python', calc_py, calc_in, sim], stderr=subprocess.PIPE) err_mess = run.stderr.read() if err_mess != '': raise RuntimeError(err_mess) except: with open(os.path.join(pot_xml_dir, sim + '.xml')) as f: model = DataModelDict(f) key = model.keys()[0] model[key]['error'] = str(sys.exc_info()[1]) with open('results.json', 'w') as f: model.json(fp=f, indent=4) with open('results.json') as f: model = DataModelDict(f) with open(os.path.join(pot_xml_dir, sim + '.xml'), 'w') as f: model.xml(fp=f, indent=4) os.chdir(to_run_dir) shutil.make_archive(os.path.join(pot_xml_dir, sim), 'gztar', root_dir=to_run_dir, base_dir=sim) shutil.rmtree(os.path.join(to_run_dir, sim)) flist = os.listdir(to_run_dir)
def dump(system, **kwargs): """ Return a DataModelDict 'cell' representation of the system. Parameters ---------- system : atomman.System The system to generate the data model for. f : str or file-like object, optional File path or file-like object to write the content to. If not given, then the content is returned as a DataModelDict. format : str, optional File format 'xml' or 'json' to save the content as if f is given. If f is a filename, then the format will be automatically inferred from f's extension. If format is not given and cannot be inferred, then it will be set to 'json'. indent : int or None, optional Indentation option to use for XML/JSON content if f is given. A value of None (default) will add no line separatations or indentations. box_unit : str, optional Length unit to use for the box. Default value is 'angstrom'. symbols : list, optional list of atom-model symbols corresponding to the atom types. If not given, will use system.symbols. elements : list, optional list of element tags corresponding to the atom types. prop_units : dict, optional dictionary where the keys are the property keys to include, and the values are units to use. If not given, only the positions in scaled units are included. a_std : float, optional Standard deviation of a lattice constant to include if available. b_std : float, optional Standard deviation of b lattice constant to include if available. c_std : float, optional Standard deviation of c lattice constant to include if available. Returns ------- DataModelDict A 'cell' data model of the system. """ # Set default values box_unit = kwargs.get('box_unit', 'angstrom') indent = kwargs.get('indent', None) symbols = kwargs.get('symbols', system.symbols) if isinstance(symbols, stringtype): symbols = [symbols] assert len(symbols) == system.natypes, 'Number of symbols does not match number of atom types' elements = kwargs.get('elements', [None for i in range(system.natypes)]) if not isinstance(elements, list): elements = [elements] assert len(elements) == system.natypes, 'Number of elements does not match number of atom types' prop_units = kwargs.get('prop_units', {}) if 'pos' not in prop_units: prop_units['pos'] = 'scaled' # Extract system values a = system.box.a b = system.box.b c = system.box.c alpha = system.box.alpha beta = system.box.beta gamma = system.box.gamma # Check for box standard deviations if 'a_std' in kwargs and 'b_std' in kwargs and 'c_std' in kwargs: errors = True a_std = kwargs['a_std'] b_std = kwargs['b_std'] c_std = kwargs['c_std'] else: errors = False a_std = None b_std = None c_std = None # Initialize DataModelDict model = DM() model['cell'] = cell = DM() # Test crystal family c_family = identifyfamily(system.box) if c_family is None: c_family = 'triclinic' cell[c_family] = DM() if c_family == 'cubic': a_ave = (a + b + c) / 3 if errors is True: a_std_ave = (a_std + b_std + c_std) / 3 else: a_std_ave = None cell[c_family]['a'] = uc.model(a_ave, box_unit, error=a_std_ave) elif c_family == 'tetragonal': a_ave = (a + b) / 2 if errors is True: a_std_ave = (a_std + b_std) / 2 else: a_std_ave = None cell[c_family]['a'] = uc.model(a_ave, box_unit, error=a_std_ave) cell[c_family]['c'] = uc.model(c, box_unit, error=c_std) elif c_family == 'orthorhombic': cell[c_family]['a'] = uc.model(a, box_unit, error=a_std) cell[c_family]['b'] = uc.model(b, box_unit, error=b_std) cell[c_family]['c'] = uc.model(c, box_unit, error=c_std) elif c_family == 'hexagonal': a_ave = (a + b) / 2 if errors is True: a_std_ave = (a_std + b_std) / 2 else: a_std_ave = None cell[c_family]['a'] = uc.model(a_ave, box_unit, error=a_std_ave) cell[c_family]['c'] = uc.model(c, box_unit, error=c_std) elif c_family == 'rhombohedral': a_ave = (a + b + c) / 3 alpha_ave = (alpha + beta + gamma) / 3 if errors is True: a_std_ave = (a_std + b_std + c_std) / 3 else: a_std_ave = None cell[c_family]['a'] = uc.model(a_ave, box_unit, error=a_std_ave) cell[c_family]['alpha'] = alpha_ave elif c_family == 'monoclinic': cell[c_family]['a'] = uc.model(a, box_unit, error=a_std) cell[c_family]['b'] = uc.model(b, box_unit, error=b_std) cell[c_family]['c'] = uc.model(c, box_unit, error=c_std) cell[c_family]['beta'] = beta elif c_family == 'triclinic': cell[c_family]['a'] = uc.model(a, box_unit, error=a_std) cell[c_family]['b'] = uc.model(b, box_unit, error=b_std) cell[c_family]['c'] = uc.model(c, box_unit, error=c_std) cell[c_family]['alpha'] = alpha cell[c_family]['beta'] = beta cell[c_family]['gamma'] = gamma else: raise ValueError('Unknown crystal family') atype = system.atoms.atype aindex = atype - 1 # Build list of atoms and per-atom properties for i in range(system.natoms): atom = DM() atom['component'] = int(atype[i]) symbol = symbols[aindex[i]] if symbol is not None: atom['symbol'] = symbol element = elements[aindex[i]] if element is not None: atom['element'] = element atom['position'] = DM() if prop_units['pos'] == 'scaled': atom['position']['value'] = list(system.atoms_prop(a_id=i, key='pos', scale=True)) else: atom['position']['value'] = list(uc.get_in_units(system.atoms.pos[i], prop_units['pos'])) atom['position']['unit'] = prop_units['pos'] for key, unit in iteritems(prop_units): if key != 'pos' and key != 'atype': value = system.atoms.view[key][i] prop = DM() prop['name'] = key prop.update(uc.model(value, unit)) atom.append('property', prop) model.append('atom', atom) model = DM([('atomic-system', model)]) # Return DataModelDict or str if 'f' not in kwargs: if 'format' not in kwargs: return model elif kwargs['format'].lower() == 'xml': return model.xml(indent=indent) elif kwargs['format'].lower() == 'json': return model.json(indent=indent) # Write to file else: f = kwargs['f'] if 'format' not in kwargs: try: format = os.path.splitext(f)[1][1:] except: format = 'json' else: format = kwargs['format'] if hasattr(f, 'write'): if format.lower() == 'xml': return model.xml(fp=f, indent=indent) elif format.lower() == 'json': return model.json(fp=f, indent=indent) else: with open(f, 'w') as fp: if format.lower() == 'xml': return model.xml(fp=fp, indent=indent) elif format.lower() == 'json': return model.json(fp=fp, indent=indent) return
def __parentcheck(self, calc_directory): """ Check status of parent calculations Parameters ---------- calc_directory : path-like object The calculation directory to check Returns ------- status : str The status of the parent calculations: 'error', 'not calculated', 'finished' """ message = '' status = 'ready' # Loop over all json and xml files for path in calc_directory.iterdir(): if path.suffix in ('.json', '.xml'): parent_name = path.stem # Delete pre-existing results file if parent_name == 'results': path.unlink() # Get status of local record copy parent = DM(path) try: parentstatus = parent.find('status') except: parentstatus = 'finished' if parentstatus == 'not calculated': # Get status of remote copy parent = self.database.get_record(name=parent_name).content try: parentstatus = parent.find('status') except: parentstatus = 'finished' # Update local record copy if needed if parentstatus in ['finished', 'error']: with open(path, 'w', encoding='utf-8') as f: if path.suffix == '.json': parent.json(fp=f, indent=4, ensure_ascii=False) elif path.suffix == '.xml': parent.xml(fp=f) self.__logwrite(f'parent {parent_name} copied to sim folder\n') # Identify errors and not calculated parents if parentstatus == 'error': status = 'error' message = f'parent {parent_name} issued an error' break elif parentstatus == 'not calculated': status = 'not' message = parent_name self.__logwrite(f'parent {parent_name} not calculated yet\n\n') return status, message
def load(data, prop_info=None): """ Read a LAMMPS-style dump file and return a System. Argument: data = file name, file-like object or string to read data from. Keyword Argument: prop_info -- DataModelDict for relating the per-atom properties to/from the dump file and the System. Will create a default json instance <data>.json if prop_info is not given and <data>.json doesn't already exist. """ #read in prop_info if supplied if prop_info is not None: if isinstance(prop_info, (str, unicode)) and os.path.isfile(prop_info): with open(prop_info) as f: prop_info = f.read() prop_info = DataModelDict(prop_info) #check for default prop_info file else: try: with open(data + '.json') as fj: prop_info = DataModelDict(fj) except: prop_info = None box_unit = None #read box_unit if specified in prop_info if prop_info is not None: prop_info = prop_info.find('LAMMPS-dump-atoms_prop-relate') box_unit = prop_info['box_prop'].get('unit', None) with uber_open_rmode(data) as f: pbc = None box = None natoms = None system = None readnatoms = False readatoms = False readtimestep = False acount = 0 bcount = 3 #loop over all lines in file for line in f: terms = line.split() if len(terms) > 0: #read atomic values if time to do so if readatoms: #sort values by a_id and save to prop_vals a_id = long(terms[id_index]) - 1 prop_vals[a_id] = terms acount += 1 #save values to sys once all atoms read in if acount == natoms: readatoms = False #cycle over the defined atoms_prop in prop_info for prop, p_keys in prop_info['atoms_prop'].iteritems( ): #set default keys dtype = p_keys.get('dtype', None) shape = p_keys.get('shape', None) shape = (natoms, ) + np.empty(shape).shape value = np.empty(shape) #cycle over the defined LAMMPS-attributes in prop_info for attr, a_keys in prop_info[ 'LAMMPS-attribute'].iteritems(): #cycle over list of relations for each LAMMPS-attribute for relation in a_keys.iteraslist('relation'): #if atoms prop and relation prop match if relation['prop'] == prop: #get unit and scale info unit = relation.get('unit', None) if unit == 'scaled': unit = None scale = True else: scale = False #find index of attribute in name_list a_index = name_list.index(attr) #check if relation has index listed try: index = relation['index'] if isinstance(index, list): index = ( Ellipsis, ) + tuple(index) else: index = (Ellipsis, ) + ( index, ) value[index] = prop_vals[:, a_index] #scalar if no index except: value[:] = prop_vals[:, a_index] #test if values are ints if dtype not specified if dtype is None and np.allclose( np.asarray(value, dtype=int), value): value = np.asarray(value, dtype=int) else: value = np.asarray(value, dtype=dtype) #save prop values to system system.atoms_prop(key=prop, value=uc.set_in_units( value, unit), scale=scale) #read number of atoms if time to do so elif readnatoms: natoms = int(terms[0]) readnatoms = False elif readtimestep: timestep = int(terms[0]) readtimestep = False #read x boundary condition values if time to do so elif bcount == 0: xlo = uc.set_in_units(float(terms[0]), box_unit) xhi = uc.set_in_units(float(terms[1]), box_unit) if len(terms) == 3: xy = uc.set_in_units(float(terms[2]), box_unit) bcount += 1 #read y boundary condition values if time to do so elif bcount == 1: ylo = uc.set_in_units(float(terms[0]), box_unit) yhi = uc.set_in_units(float(terms[1]), box_unit) if len(terms) == 3: xz = uc.set_in_units(float(terms[2]), box_unit) bcount += 1 #read z boundary condition values if time to do so elif bcount == 2: zlo = uc.set_in_units(float(terms[0]), box_unit) zhi = uc.set_in_units(float(terms[1]), box_unit) if len(terms) == 3: yz = uc.set_in_units(float(terms[2]), box_unit) xlo = xlo - min((0.0, xy, xz, xy + xz)) xhi = xhi - max((0.0, xy, xz, xy + xz)) ylo = ylo - min((0.0, yz)) yhi = yhi - max((0.0, yz)) box = am.Box(xlo=xlo, xhi=xhi, ylo=ylo, yhi=yhi, zlo=zlo, zhi=zhi, xy=xy, xz=xz, yz=yz) else: box = am.Box(xlo=xlo, xhi=xhi, ylo=ylo, yhi=yhi, zlo=zlo, zhi=zhi) bcount += 1 #if not time to read value, check the ITEM: header information else: #only consider ITEM: lines if terms[0] == 'ITEM:': #ITEM: TIMESTEP indicates it is time to read the timestep if terms[1] == 'TIMESTEP': readtimestep = True #ITEM: NUMBER indicates it is time to read natoms elif terms[1] == 'NUMBER': readnatoms = True #ITEM: BOX gives pbc and indicates it is time to read box parameters elif terms[1] == 'BOX': pbc = [True, True, True] for i in xrange(3): if terms[i + len(terms) - 3] != 'pp': pbc[i] = False bcount = 0 #ITEM: ATOMS gives list of per-Atom property names and indicates it is time to read atomic values elif terms[1] == 'ATOMS': assert box is not None, 'Box information not found' assert natoms is not None, 'Number of atoms not found' #read list of property names name_list = terms[2:] id_index = name_list.index('id') #create empty array for reading property values prop_vals = np.empty((natoms, len(name_list))) #create and save default prop_info Data Model if needed if prop_info is None: prop_info = __prop_info_default_load(name_list) if isinstance( data, (str, unicode)) and len(data) < 80: with open(data + '.json', 'w') as fj: prop_info.json(fp=fj, indent=4) prop_info = prop_info.find( 'LAMMPS-dump-atoms_prop-relate') #create system and flag that it is time to read data system = am.System(atoms=am.Atoms(natoms=natoms), box=box, pbc=pbc) system.prop['timestep'] = timestep readatoms = True if system is None: raise ValueError('Failed to properly load dump file ' + str(data)[:50]) return system
def prepare(terms, variable): """This is the prepare method for the calculation""" working_dir = os.getcwd() #Identify the necessary run files in the calculation directory calc_template = os.path.join(__calc_dir__, __calc_name__ + '.template') calc_py = os.path.join(__calc_dir__, __calc_name__ + '.py') min_template = os.path.join(__calc_dir__, 'min.template') #Read in the calc_template with open(calc_template) as f: template = f.read() #Interpret and check terms and variables run_directory, lib_directory, v_dict = __initial_setup(terms, variable) #Loop over all potentials for potential_file, potential_dir in zip(variable.aslist('potential_file'), variable.aslist('potential_dir')): #Load potential with open(potential_file) as f: potential = lmp.Potential(f) #Pass potential's file and directory info to v_dict v_dict['potential_file'] = os.path.basename(potential_file) v_dict['potential_dir'] = os.path.basename(potential_dir) #Loop over all systems for load, load_options, load_elements, box_parameters in zip( variable.aslist('load'), variable.aslist('load_options'), variable.aslist('load_elements'), variable.aslist('box_parameters')): #Divy up the load information load_terms = load.split() load_style = load_terms[0] load_file = ' '.join(load_terms[1:]) load_base = os.path.basename(load_file) #Check for system_model fields from previous simulations if load_style == 'system_model': with open(load_file) as f: model = DM(f) #Skip if load relaxed with a different potential try: pot_key = model.find('potential')['key'] if pot_key != potential.uuid: continue except: pass #Get or make the load artifact family name try: system_family = model.find( 'system-info')['artifact']['family'] except: system_family = os.path.splitext(load_base)[0] else: system_family = os.path.splitext(load_base)[0] #Loop over all point defect data models for ptd_model in variable.aslist('ptd_model'): #Check if ptd_model's system_family matches the load_file's system_family with open(ptd_model) as f: ptd = DM(f) if system_family != ptd['point-defect']['system-family']: continue #Pass system's file, options and box parameters to v_dict v_dict['load'] = ' '.join([load_terms[0], load_base]) v_dict['load_options'] = load_options v_dict['box_parameters'] = box_parameters #Pass defect model to v_dict ptd_file = os.path.basename(ptd_model) v_dict['ptd_model'] = ptd_file v_dict['ptd_name'] = ptd['point-defect']['identifier']['name'] #Loop over all symbols combinations for symbols in atomman_input.yield_symbols( load, load_options, load_elements, variable, potential): #Pass symbols to v_dict v_dict['symbols'] = ' '.join(symbols) #Define directory path for the record record_dir = os.path.join(lib_directory, str(potential), '-'.join(symbols), system_family, __calc_type__) #Loop over all size_mults for size_mults in variable.aslist('size_mults'): v_dict['size_mults'] = size_mults #Check if record already exists if __is_new_record(record_dir, v_dict): UUID = str(uuid.uuid4()) #Create calculation run folder sim_dir = os.path.join(run_directory, UUID) os.makedirs(sim_dir) #Copy files to run folder shutil.copy(calc_py, sim_dir) shutil.copy(min_template, sim_dir) shutil.copy(potential_file, sim_dir) shutil.copy(load_file, sim_dir) shutil.copy(ptd_model, sim_dir) #Copy potential_dir and contents to run folder os.mkdir( os.path.join(sim_dir, os.path.basename(potential_dir))) for fname in glob.iglob( os.path.join(potential_dir, '*')): shutil.copy( fname, os.path.join( sim_dir, os.path.basename(potential_dir))) #Create calculation input file by filling in template with v_dict terms os.chdir(sim_dir) calc_in = fill_template(template, v_dict, '<', '>') input_dict = calc.input(calc_in, UUID) with open(__calc_name__ + '.in', 'w') as f: f.write('\n'.join(calc_in)) os.chdir(working_dir) #Save the incomplete record model = calc.data_model(input_dict) with open(os.path.join(record_dir, UUID + '.json'), 'w') as f: model.json(fp=f, indent=2)
def download_records(self, template, localpath=None, keyword=None, mongoquery=None, format='xml', indent=None, verbose=False): """ Download records associated with a given template from the remote and save to localpath. Parameters ---------- template : str The template (schema/style) of records to download. If given, all records with this template will be downloaded. localpath : path-like object, optional Path to a local directory where the files will be saved to. If not given, will use the localpath value set during object initialization. keyword : str, optional A keyword content pattern to search for to limit which records are downloaded. mongoquery : dict, optional A MongoDB-style filter query to limit which records are downloaded. format : str, optional The file format to save the file as. Allowed values are 'xml' (default) and 'json'. indent : int, optional The indentation spacing size to use for the locally saved file. If not given, the JSON/XML content will be compact. verbose : bool, optional If True, info messages will be printed during operations. Default value is False. """ if template == 'potential_LAMMPS': raise ValueError('use download_lammps_potentials instead') elif template == 'Potential': raise ValueError('use download_lammps_potentials instead') elif template == 'Citation': raise ValueError('use download_citations instead') # Check localpath values if localpath is None: localpath = self.localpath if localpath is None: raise ValueError('localpath must be set to download files') # Check format value format = format.lower() allowed_formats = ['xml', 'json'] if format not in allowed_formats: raise ValueError("Format must be 'xml' or 'json'") # Create save directory if needed save_directory = Path(localpath, template) if not save_directory.is_dir(): save_directory.mkdir(parents=True) for fmt in allowed_formats: if fmt != format: numexisting = len( [fname for fname in save_directory.glob(f'*.{fmt}')]) if numexisting > 0: raise ValueError( f'{numexisting} records of format {fmt} already saved locally' ) # Download and save records = self.cdcs.query(template=template, keyword=keyword, mongoquery=mongoquery) for i in range(len(records)): record = records.iloc[i] fname = Path(save_directory, f'{record.title}.{format}') content = DM(record.xml_content) with open(fname, 'w') as f: if format == 'xml': content.xml(fp=f, indent=indent) else: content.json(fp=f, indent=indent) if verbose: print(f'Downloaded {len(records)} of {template}')
def download_lammps_potentials(self, localpath=None, lammps_potentials=None, format='xml', indent=None, verbose=False, status='active', get_files=True): """ Download potential_LAMMPS records from the remote and save to localpath. Parameters ---------- localpath : path-like object, optional Path to a local directory where the files will be saved to. If not given, will use the localpath value set during object initialization. lammps_potentials : list, optional A list of LAMMPS potentials to download. If not given, all LAMMPS potentials will be downloaded. format : str, optional The file format to save the record files as. Allowed values are 'xml' (default) and 'json'. indent : int, optional The indentation spacing size to use for the locally saved record files. If not given, the JSON/XML content will be compact. verbose : bool, optional If True, info messages will be printed during operations. Default value is False. status : str, list or None, optional Only potential_LAMMPS records with the given status(es) will be downloaded. Allowed values are 'active' (default), 'superseded', and 'retracted'. If None is given, then all potentials will be downloaded. get_files : bool, optional If True, the parameter files associated with the potential_LAMMPS record will also be downloaded. """ # Load potentials to speed up file downloads if get_files is True and self.potentials is None: self.load_potentials() template = 'potential_LAMMPS' # Check localpath values if localpath is None: localpath = self.localpath if localpath is None: raise ValueError('localpath must be set to download files') # Check format value format = format.lower() allowed_formats = ['xml', 'json'] if format not in allowed_formats: raise ValueError("Format must be 'xml' or 'json'") # Create save directory if needed save_directory = Path(localpath, template) if not save_directory.is_dir(): save_directory.mkdir(parents=True) for fmt in allowed_formats: if fmt != format: numexisting = len( [fname for fname in save_directory.glob(f'*.{fmt}')]) if numexisting > 0: raise ValueError( f'{numexisting} records of format {fmt} already saved locally' ) # Download and save if lammps_potentials is None: mquery = {} # Add status query if status is not None: status = aslist(status) if 'active' in status: status.append(None) mquery['potential-LAMMPS.status'] = {'$in': status} records = self.cdcs.query(template=template, mongoquery=mquery) for i in range(len(records)): record = records.iloc[i] fname = Path(save_directory, f'{record.title}.{format}') content = DM(record.xml_content) with open(fname, 'w', encoding='UTF-8') as f: if format == 'xml': content.xml(fp=f, indent=indent) else: content.json(fp=f, indent=indent) if verbose: print(f'Downloaded {len(records)} of {template}') # Save loaded content else: if status is None: status = ['active', 'superseded', 'retracted'] else: status = aslist(status) for lammps_potential in aslist(lammps_potentials): if lammps_potential.status not in status: continue potname = lammps_potential.id fname = Path(save_directory, f'{potname}.{format}') if format == 'xml': with open(fname, 'w', encoding='UTF-8') as f: lammps_potential.asmodel().xml(fp=f, indent=indent) elif format == 'json': with open(fname, 'w', encoding='UTF-8') as f: lammps_potential.asmodel().json(fp=f, indent=indent) if verbose: print(f'Downloaded {len(lammps_potentials)} of {template}') if get_files is True: # Convert downloaded records to lammps_potentials def makepotentials(series): return PotentialLAMMPS(model=series.xml_content) lammps_potentials = records.apply(makepotentials, axis=1) # Download parameter files self.get_lammps_potentials_files(lammps_potentials, local=False, remote=True, targetdir=save_directory, verbose=verbose)
def load(data, prop_info=None): """ Read a LAMMPS-style dump file and return a System. Argument: data = file name, file-like object or string to read data from. Keyword Argument: prop_info -- DataModelDict for relating the per-atom properties to/from the dump file and the System. Will create a default json instance <data>.json if prop_info is not given and <data>.json doesn't already exist. """ #read in prop_info if supplied if prop_info is not None: if isinstance(prop_info, (str, unicode)) and os.path.isfile(prop_info): with open(prop_info) as f: prop_info = f.read() prop_info = DataModelDict(prop_info) #check for default prop_info file else: try: with open(data+'.json') as fj: prop_info = DataModelDict(fj) except: prop_info = None box_unit = None #read box_unit if specified in prop_info if prop_info is not None: prop_info = prop_info.find('LAMMPS-dump-atoms_prop-relate') box_unit = prop_info['box_prop'].get('unit', None) with uber_open_rmode(data) as f: pbc = None box = None natoms = None system = None readnatoms = False readatoms = False readtimestep = False acount = 0 bcount = 3 #loop over all lines in file for line in f: terms = line.split() if len(terms) > 0: #read atomic values if time to do so if readatoms: #sort values by a_id and save to prop_vals a_id = long(terms[id_index]) - 1 prop_vals[a_id] = terms acount += 1 #save values to sys once all atoms read in if acount == natoms: readatoms = False #cycle over the defined atoms_prop in prop_info for prop, p_keys in prop_info['atoms_prop'].iteritems(): #set default keys dtype = p_keys.get('dtype', None) shape = p_keys.get('shape', None) shape = (natoms,) + np.empty(shape).shape value = np.empty(shape) #cycle over the defined LAMMPS-attributes in prop_info for attr, a_keys in prop_info['LAMMPS-attribute'].iteritems(): #cycle over list of relations for each LAMMPS-attribute for relation in a_keys.iteraslist('relation'): #if atoms prop and relation prop match if relation['prop'] == prop: #get unit and scale info unit = relation.get('unit', None) if unit == 'scaled': unit = None scale = True else: scale = False #find index of attribute in name_list a_index = name_list.index(attr) #check if relation has index listed try: index = relation['index'] if isinstance(index, list): index = (Ellipsis,) + tuple(index) else: index = (Ellipsis,) + (index,) value[index] = prop_vals[:, a_index] #scalar if no index except: value[:] = prop_vals[:, a_index] #test if values are ints if dtype not specified if dtype is None and np.allclose(np.asarray(value, dtype=int), value): value = np.asarray(value, dtype=int) else: value = np.asarray(value, dtype=dtype) #save prop values to system system.atoms_prop(key=prop, value=uc.set_in_units(value, unit), scale=scale) #read number of atoms if time to do so elif readnatoms: natoms = int(terms[0]) readnatoms = False elif readtimestep: timestep = int(terms[0]) readtimestep = False #read x boundary condition values if time to do so elif bcount == 0: xlo = uc.set_in_units(float(terms[0]), box_unit) xhi = uc.set_in_units(float(terms[1]), box_unit) if len(terms) == 3: xy = uc.set_in_units(float(terms[2]), box_unit) bcount += 1 #read y boundary condition values if time to do so elif bcount == 1: ylo = uc.set_in_units(float(terms[0]), box_unit) yhi = uc.set_in_units(float(terms[1]), box_unit) if len(terms) == 3: xz = uc.set_in_units(float(terms[2]), box_unit) bcount += 1 #read z boundary condition values if time to do so elif bcount == 2: zlo = uc.set_in_units(float(terms[0]), box_unit) zhi = uc.set_in_units(float(terms[1]), box_unit) if len(terms) == 3: yz = uc.set_in_units(float(terms[2]), box_unit) xlo = xlo - min((0.0, xy, xz, xy + xz)) xhi = xhi - max((0.0, xy, xz, xy + xz)) ylo = ylo - min((0.0, yz)) yhi = yhi - max((0.0, yz)) box = am.Box(xlo=xlo, xhi=xhi, ylo=ylo, yhi=yhi, zlo=zlo, zhi=zhi, xy=xy, xz=xz, yz=yz) else: box = am.Box(xlo=xlo, xhi=xhi, ylo=ylo, yhi=yhi, zlo=zlo, zhi=zhi) bcount += 1 #if not time to read value, check the ITEM: header information else: #only consider ITEM: lines if terms[0] == 'ITEM:': #ITEM: TIMESTEP indicates it is time to read the timestep if terms[1] == 'TIMESTEP': readtimestep = True #ITEM: NUMBER indicates it is time to read natoms elif terms[1] == 'NUMBER': readnatoms = True #ITEM: BOX gives pbc and indicates it is time to read box parameters elif terms[1] == 'BOX': pbc = [True, True, True] for i in xrange(3): if terms[i + len(terms) - 3] != 'pp': pbc[i] = False bcount = 0 #ITEM: ATOMS gives list of per-Atom property names and indicates it is time to read atomic values elif terms[1] == 'ATOMS': assert box is not None, 'Box information not found' assert natoms is not None, 'Number of atoms not found' #read list of property names name_list = terms[2:] id_index = name_list.index('id') #create empty array for reading property values prop_vals = np.empty((natoms, len(name_list))) #create and save default prop_info Data Model if needed if prop_info is None: prop_info = __prop_info_default_load(name_list) if isinstance(data, (str, unicode)) and len(data) < 80: with open(data+'.json', 'w') as fj: prop_info.json(fp=fj, indent=4) prop_info = prop_info.find('LAMMPS-dump-atoms_prop-relate') #create system and flag that it is time to read data system = am.System(atoms=am.Atoms(natoms=natoms), box=box, pbc=pbc) system.prop['timestep'] = timestep readatoms = True if system is None: raise ValueError('Failed to properly load dump file '+str(data)[:50]) return system
def dump(system, fname, prop_info=None, xf='%.13e'): """ Write a LAMMPS-style dump file from a System. Arguments: system -- System to write to the dump file. fname -- name (and location) of file to save data to. Keyword Arguments: prop_info -- DataModelDict for relating the per-atom properties to/from the dump file and the System. Will create a default json instance <fname>.json if prop_info is not given and <fname>.json doesn't already exist. xf -- c-style format for printing the floating point numbers. Default is '%.13e'. """ #create or read prop_info Data Model if prop_info is None: try: with open(fname+'.json') as fj: prop_info = DataModelDict(fj) except: prop_info = __prop_info_default_dump(system) with open(fname+'.json', 'w') as fj: prop_info.json(fp=fj, indent=4) else: if os.path.isfile(prop_info): with open(prop_info) as f: prop_info = f.read() prop_info = DataModelDict(prop_info) #read box_unit if specified in prop_info prop_info = prop_info.find('LAMMPS-dump-atoms_prop-relate') box_unit = prop_info['box_prop'].get('unit', None) #open fname with open(fname, 'w') as f: #write timestep info f.write('ITEM: TIMESTEP\n') try: f.write('%i\n'%system.prop['timestep']) except: f.write('0\n') #write number of atoms f.write('ITEM: NUMBER OF ATOMS\n') f.write('%i\n' % ( system.natoms )) #write system boundary info for an orthogonal box if system.box.xy == 0.0 and system.box.xz == 0.0 and system.box.yz == 0.0: f.write('ITEM: BOX BOUNDS') for i in xrange(3): if system.pbc[i]: f.write(' pp') else: f.write(' fm') f.write('\n') f.write('%f %f\n' % ( uc.get_in_units(system.box.xlo, box_unit), uc.get_in_units(system.box.xhi, box_unit) )) f.write('%f %f\n' % ( uc.get_in_units(system.box.ylo, box_unit), uc.get_in_units(system.box.yhi, box_unit) )) f.write('%f %f\n' % ( uc.get_in_units(system.box.zlo, box_unit), uc.get_in_units(system.box.zhi, box_unit) )) #write system boundary info for a triclinic box else: f.write('ITEM: BOX BOUNDS xy xz yz') for i in xrange(3): if system.pbc[i]: f.write(' pp') else: f.write(' fm') f.write('\n') xlo_bound = uc.get_in_units(system.box.xlo, box_unit) + uc.get_in_units(min(( 0.0, system.box.xy, system.box.xz, system.box.xy + system.box.xz)), box_unit) xhi_bound = uc.get_in_units(system.box.xhi, box_unit) + uc.get_in_units(max(( 0.0, system.box.xy, system.box.xz, system.box.xy + system.box.xz)), box_unit) ylo_bound = uc.get_in_units(system.box.ylo, box_unit) + uc.get_in_units(min(( 0.0, system.box.yz )), box_unit) yhi_bound = uc.get_in_units(system.box.yhi, box_unit) + uc.get_in_units(max(( 0.0, system.box.yz )), box_unit) zlo_bound = uc.get_in_units(system.box.zlo, box_unit) zhi_bound = uc.get_in_units(system.box.zhi, box_unit) f.write('%f %f %f\n' % ( xlo_bound, xhi_bound, uc.get_in_units(system.box.xy, box_unit) )) f.write('%f %f %f\n' % ( ylo_bound, yhi_bound, uc.get_in_units(system.box.xz, box_unit) )) f.write('%f %f %f\n' % ( zlo_bound, zhi_bound, uc.get_in_units(system.box.yz, box_unit) )) #write atomic header info and prepare outarray for writing header = 'ITEM: ATOMS id' print_string = '%i' outarray = np.empty((system.natoms, len(prop_info['LAMMPS-attribute']))) start = 0 for attr, a_keys in prop_info['LAMMPS-attribute'].iteritems(): #get first prop relation for attr relation = a_keys.aslist('relation')[0] prop = relation.get('prop') index = (Ellipsis, ) + tuple(relation.aslist('index')) unit = relation.get('unit', None) if unit == 'scaled': unit = None scale = True else: scale = False #pass values to outarray outarray[:,start] = uc.get_in_units(system.atoms_prop(key=prop, scale=scale), unit)[index].reshape((system.natoms)) start += 1 #prepare header and print_string header += ' %s' % attr if am.tools.is_dtype_int(system.atoms.dtype[prop]): print_string += ' %i' else: print_string += ' ' + xf f.write(header + '\n') print_string += '\n' #iterate over all atoms for i in xrange(system.natoms): vals = (i+1, ) + tuple(outarray[i]) f.write(print_string % vals)
def runner(dbase, run_directory, orphan_directory=None, hold_directory=None): """ High-throughput calculation runner. Parameters ---------- dbase : iprPy.Database The database to interact with. run_directory : str The path to the directory where the calculation instances to run are located. orphan_directory : str, optional The path for the orphan directory where incomplete calculations are moved. If None (default) then will use 'orphan' at the same level as the run_directory. hold_directory : str, optional The path for the hold directory where tar archives that failed to be uploaded are moved to. If None (default) then will use 'hold' at the same level as the run_directory. """ # Get path to Python executable running this script py_exe = sys.executable if py_exe is None: py_exe = 'python' # Get absolute path to run_directory run_directory = os.path.abspath(run_directory) # Get original working directory original_dir = os.getcwd() # Define runner log file d = datetime.datetime.now() pid = os.getpid() runner_log_dir = os.path.join(os.path.dirname(rootdir), 'runner-logs') if not os.path.isdir(runner_log_dir): os.makedirs(runner_log_dir) log_file = os.path.join(runner_log_dir, '%04i-%02i-%02i-%02i-%02i-%06i-%i.log' % (d.year, d.month, d.day, d.minute, d.second, d.microsecond, pid)) # Set default orphan_directory if orphan_directory is None: orphan_directory = os.path.join(os.path.dirname(run_directory), 'orphan') # Set default orphan_directory if hold_directory is None: hold_directory = os.path.join(os.path.dirname(run_directory), 'hold') # Start runner log file with open(log_file, 'a') as log: # Change to the run directory os.chdir(run_directory) # Initialize bidfailcount counter bidfailcount = 0 # Announce the runner's pid print('Runner started with pid', pid) sys.stdout.flush() # flist is the running list of calculations flist = os.listdir(run_directory) while len(flist) > 0: # Pick a random calculation from the list index = random.randint(0, len(flist)-1) sim = flist[index] # Submit a bid and check if it succeeded if bid(sim): # Reset bidfailcount bidfailcount = 0 # Move to simulation directory os.chdir(sim) log.write('%s\n' % sim) # Check that the calculation has calc_*.py, calc_*.in and # record in the database try: record = dbase.get_record(name=sim) calc_py = get_file('calc_*.py') calc_in = get_file('calc_*.in') # Pass ConnectionErrors forward killing runner except requests.ConnectionError as e: raise requests.ConnectionError(e) # If not complete, zip and move to the orphan directory except: log.write('Incomplete simulation: moved to orphan directory\n\n') os.chdir(run_directory) if not os.path.isdir(orphan_directory): os.makedirs(orphan_directory) shutil.make_archive(os.path.join(orphan_directory, sim), 'gztar', root_dir=run_directory, base_dir=sim) removecalc(os.path.join(run_directory, sim)) flist = os.listdir(run_directory) continue # Check if any files in the calculation folder are incomplete # records error_flag = False ready_flag = True for fname in glob.iglob('*'): parent_sim, ext = os.path.splitext(os.path.basename(fname)) if ext in ('.json', '.xml'): parent = DM(fname) try: status = parent.find('status') # Check parent record in database to see if it has completed if status == 'not calculated': parent_record = dbase.get_record(name=parent_sim) try: status = parent_record.content.find('status') # Mark flag if still incomplete if status == 'not calculated': ready_flag = False break # Skip if parent calculation failed elif status == 'error': with open(os.path.basename(fname), 'w') as f: parent_record.content.json(fp=f, indent=4) error_flag = True error_message = 'parent calculation issued an error' break # Ignore if unknown status else: raise ValueError('unknown status') # Copy parent record to calculation folder if it is now complete except: with open(os.path.basename(fname), 'w') as f: parent_record.content.json(fp=f, indent=4) log.write('parent %s copied to sim folder\n' % parent_sim) # skip if parent calculation failed elif status == 'error': error_flag = True error_message = 'parent calculation issued an error' break except: continue # Handle calculations that have unfinished parents if not ready_flag: bid_files = glob.glob('*.bid') os.chdir(run_directory) for bid_file in bid_files: os.remove(os.path.join(sim, bid_file)) flist = [parent_sim] log.write('parent %s not ready\n\n' % parent_sim) continue # Run the calculation try: assert not error_flag, error_message run = subprocess.Popen([py_exe, calc_py, calc_in, sim], stderr=subprocess.PIPE) error_message = run.stderr.read() # Load results.json try: model = DM('results.json') # Throw errors if no results.json except: error_flag = True assert not error_flag, error_message log.write('sim calculated successfully\n') # Catch any errors and build results.json except: model = record.content keys = list(model.keys()) record_type = keys[0] model[record_type]['status'] = 'error' model[record_type]['error'] = str(sys.exc_info()[1]) with open('results.json', 'w') as f: model.json(fp=f, indent=4) log.write('error: %s\n' % model[record_type]['error']) # Read in results.json #model = DM('results.json') # Update record tries = 0 while tries < 10: try: dbase.update_record(content=model, name=sim) break except: tries += 1 if tries == 10: os.chdir(run_directory) log.write('failed to update record\n') else: # Archive calculation and add to database or hold_directory try: dbase.add_tar(root_dir=run_directory, name=sim) except: log.write('failed to upload archive\n') if not os.path.isdir(hold_directory): os.makedirs(hold_directory) shutil.move(sim+'.tar.gz', hold_directory) os.chdir(run_directory) removecalc(os.path.join(run_directory, sim)) log.write('\n') # Else if bid(sim) failed else: bidfailcount += 1 # Stop unproductive worker after 10 consecutive bid fails if bidfailcount > 10: print("Didn't find an open simulation") break # Pause for 10 seconds before trying again time.sleep(10) # Regenerate flist and flush log file flist = os.listdir(run_directory) log.flush() os.fsync(log.fileno()) print('No simulations left to run') os.chdir(original_dir)
def save_potential_record(content, files=None, lib_directory=None, record_style='potential_users_LAMMPS', replace=False): """ Saves a new potential_*LAMMPS record to the library directory. The record's title is automatically taken as the record's id. Parameters ---------- content : str or DataModelDict.DataModelDict The record content to save to the library directory. Can be xml/json content, path to an xml/json file, or a DataModelDict. files : str or list, optional The directory path(s) to the parameter file(s) that the potential uses. lib_directory : str, optional The directory path for the library. If not given, then it will use the iprPy/library directory. record_style : str, optional The record_style to save the record as. Default value is 'potential_users_LAMMPS', which should be used for user-defined potentials. replace : bool, optional If False (Default), will raise an error if a record with the same title already exists in the library. If True, any matching records will be overwritten. Raises ------ ValueError If replace=False and a record with the same title (i.e. id) already exists in the library. """ # Load as DataModelDict and extract id as title content = DM(content) title = content['potential-LAMMPS']['id'] # Set default lib_directory if lib_directory is None: lib_directory = libdir # Define record paths stylepath = Path(lib_directory, record_style) if not stylepath.is_dir(): stylepath.mkdir() fname = Path(stylepath, title + '.json') potdir = Path(stylepath, title) # Save record if replace is False and fname.is_file(): raise ValueError(f'Record {title} already exists') with open(fname, 'w') as recordfile: content.json(fp=recordfile, indent=4) # Copy parameter files if files is not None: if not potdir.is_dir(): potdir.mkdir for fname in iaslist(files): shutil.copy(fname, potdir)
def main(args): """Main function for running calc_struct_static.py""" try: infile = args[0] try: UUID = args[1] except: UUID = str(uuid.uuid4()) except: raise ValueError('Input file not given') #Read in parameters from input file input_dict = read_input(infile) #Initial parameter setup lammps_exe = input_dict.get('lammps_exe') pot_dir = input_dict.get('potential_dir', '') symbols = input_dict.get('symbols') u_length = input_dict.get('length_display_units', 'angstrom') u_press = input_dict.get('pressure_display_units', 'GPa') u_energy = input_dict.get('energy_display_units', 'eV') r_min = input_dict.get('r_min', None) r_max = input_dict.get('r_max', None) if r_min is None: r_min = uc.get_in_units(2.0, 'angstrom') else: r_min = uc.get_in_units(float(r_min), u_length) if r_max is None: r_max = uc.get_in_units(5.0, 'angstrom') else: r_max = uc.get_in_units(float(r_max), u_length) steps = int(input_dict.get('steps', 200)) #read in potential_file with open(input_dict['potential_file']) as f: potential = lmp.Potential(f, pot_dir) #read in prototype_file with open(input_dict['crystal_file']) as f: try: ucell = am.models.crystal(f)[0] except: f.seek(0) ucell = am.models.cif_cell(f)[0] #Run ecoh_vs_r rvals, avals, evals = ecoh_vs_r(lammps_exe, deepcopy(ucell), potential, symbols, rmin=r_min, rmax=r_max, rsteps=steps) #Use plot to get rough lattice parameter guess, a0, and build ucell a0 = avals[np.argmin(evals)] cell_0 = ucell.model(symbols=symbols, box_unit='scaled') ucell.box_set(a = a0, b = a0 * ucell.box.b / ucell.box.a, c = a0 * ucell.box.c / ucell.box.a, scale=True) #Run quick_aCij to refine values results = quick_a_Cij(lammps_exe, ucell, potential, symbols) #Plot Ecoh vs. r plt.title('Cohesive Energy vs. Interatomic Spacing') plt.xlabel('r (' + u_length + ')') plt.ylabel('Cohesive Energy (' + u_energy + '/atom)') plt.plot(uc.get_in_units(rvals, u_length), uc.get_in_units(evals, u_energy)) plt.savefig('Ecoh_vs_r.png') plt.close() ucell_new = results['ucell_new'] cell_1 = ucell_new.model(symbols=symbols, box_unit=u_length) ecoh = uc.get_in_units(results['ecoh'], u_energy) C = results['C'] output = DataModelDict() output['calculation-crystal-phase'] = calc = DataModelDict() calc['calculation-id'] = UUID with open(input_dict['potential_file']) as f: potdict = DataModelDict(f) calc['potential'] = potdict['LAMMPS-potential']['potential'] calc['crystal-info'] = DataModelDict() calc['crystal-info']['artifact'] = input_dict['crystal_file'] calc['crystal-info']['symbols'] = symbols calc['phase-state'] = DataModelDict() calc['phase-state']['temperature'] = DataModelDict([('value', 0.0), ('unit', 'K')]) calc['phase-state']['pressure'] = DataModelDict([('value', 0.0), ('unit', u_press)]) calc['as-constructed-atomic-system'] = cell_0['atomic-system'] calc['relaxed-atomic-system'] = cell_1['atomic-system'] c_family = cell_1['atomic-system']['cell'].keys()[0] calc['cohesive-energy'] = DataModelDict([('value', ecoh), ('unit', u_energy)]) calc['elastic-constants'] = C.model(unit=u_press, crystal_system=c_family)['elastic-constants'] calc['cohesive-energy-relation'] = DataModelDict() calc['cohesive-energy-relation']['r'] = DataModelDict([('value', list(uc.get_in_units(rvals, u_length))), ('unit', u_length)]) calc['cohesive-energy-relation']['a'] = DataModelDict([('value', list(uc.get_in_units(avals, u_length))), ('unit', u_length)]) calc['cohesive-energy-relation']['cohesive-energy'] = DataModelDict([('value', list(uc.get_in_units(evals, u_length))), ('unit', u_energy)]) with open('results.json', 'w') as f: output.json(fp=f, indent=4)
def main(input_file): """Generic method for interpreting a prepare input script""" model = DM() model['variable'] = variable = DM() #Process the lines in order for line in input_file: terms = line.split() #ignore blank and comment lines if len(terms) > 0 and terms[0][0] != '#': if terms[0] == 'print_check': print model.json(indent=2) elif terms[0] == 'calculation': calc_name = terms[1] iprPy.calculation_prepare(calc_name, terms[2:], variable) elif terms[0] == 'list_calculations': for name in iprPy.calculation_names(): print name elif terms[0] == 'function': calc_name = terms[1] iprPy.prepare_function(calc_name, terms[2:], variable) elif terms[0] == 'list_functions': for name in iprPy.prepare_function_names(): print name elif terms[0] == 'end': break else: name = terms[0] if len(terms) == 1: raise ValueError('invalid argument "' + name + '"') elif terms[1] == 'clear': del (variable[name]) elif terms[1] == 'set': value = ' '.join(terms[2:]) if value in variable: value = deepcopy(variable[value]) variable[name] = value elif terms[1] == 'add': value = ' '.join(terms[2:]) if value in variable: value = deepcopy(variable[value]) if not isinstance(value, list): value = [value] for v in value: variable.append(name, v) elif terms[1] == 'adds': for value in terms[2:]: if value in variable: value = deepcopy(variable[value]) if not isinstance(value, list): value = [value] for v in value: variable.append(name, v) else: raise ValueError('invalid command: ' + line)
def ipr_meta_to_potentials(refresh_files=False): oldpath = Path(rootdir, '..', 'data', 'IPR metadata records') potpath = Path(rootdir, '..', 'data', 'potential') imppath = Path(rootdir, '..', 'data', 'implementation') if not oldpath.is_dir(): raise ValueError(f'{oldpath} not found') if not potpath.is_dir(): potpath.mkdir() if not imppath.is_dir(): imppath.mkdir() for fname in oldpath.glob('*.json'): # Load old model oldpotmodel = DM(fname) oldpot = oldpotmodel['interatomic-potential'] potkey = oldpot['key'] # Create new potential model newpotmodel = DM() newpotmodel['interatomic-potential'] = newpot = DM() for key1 in oldpot: # Handle description if key1 == 'description': newpot['description'] = DM() for key2 in oldpot['description']: # Strip citation of everything except DOI if key2 == 'citation': for oldcite in oldpot['description'].iteraslist('citation'): try: newcite = DM([('DOI', oldcite['DOI'])]) except: pass else: newpot['description'].append('citation', newcite) else: newpot['description'][key2] = oldpot['description'][key2] # Handle implementation elif key1 == 'implementation': # Iterate over implementations in old model for oldimp in oldpot.iteraslist('implementation'): newimpmodel = DM() newimpmodel['interatomic-potential-implementation'] = newimp = DM() for key2 in oldimp: newimp[key2] = oldimp[key2] # Add interatomic-potential-key after date if key2 == 'date': newimp['interatomic-potential-key'] = potkey # Save new implementation model impdir = Path(imppath, newimp['key']) if not impdir.is_dir(): impdir.mkdir() with open(Path(impdir, 'meta.json'), 'w') as f: newimpmodel.json(fp=f, indent=4) # Download any missing files ipr_fetch_files(newimpmodel, impdir, refresh=refresh_files) elif key1 == '@xmlns:xsi': pass else: newpot[key1] = oldpot[key1] # Save new potential model potkey = newpot['key'] with open(Path(potpath, f'{potkey}.json'), 'w') as f: newpotmodel.json(fp=f, indent=4)
def main(*args): #Read in input script terms run_directory, orphan_directory, dbase = __read_input_file(args[0]) #Start runner log file log_file = str(os.getpid()) + '-runner.log' with open(log_file, 'a') as log: #Change to the run directory os.chdir(run_directory) #flist is the running list of calculations flist = os.listdir(run_directory) while len(flist) > 0: #Pick a random calculation from the list index = random.randint(0, len(flist) - 1) sim = flist[index] #Submit a bid if bid(sim): os.chdir(sim) log.write('%s\n' % sim) #Check that the calculation has calc_*.py, calc_*.in and record in the database try: record = dbase.get_record(name=sim) calc_py = get_file('calc_*.py') calc_in = get_file('calc_*.in') #Pass ConnectionErrors forward killing runner except requests.ConnectionError as e: raise requests.ConnectionError(e) #If not complete, zip and move to the orphan directory except: log.write( 'Incomplete simulation: moved to orphan directory\n\n') os.chdir(run_directory) if not os.path.isdir(orphan_directory): os.makedirs(orphan_directory) shutil.make_archive(os.path.join(orphan_directory, sim), 'gztar', root_dir=run_directory, base_dir=sim) shutil.rmtree(os.path.join(run_directory, sim)) flist = os.listdir(run_directory) continue #Check if any files in the calculation folder are incomplete records error_flag = False ready_flag = True for fname in glob.iglob('*'): parent_sim, ext = os.path.splitext(os.path.basename(fname)) if ext in ('.json', '.xml'): with open(fname) as f: parent = DM(f) try: status = parent.find('status') #Check parent record in database to see if it has completed if status == 'not calculated': parent_record = dbase.get_record( name=parent_sim) parent = DM(parent_record.content) try: status = parent.find('status') #Mark flag if still incomplete if status == 'not calculated': ready_flag = False break #skip if parent calculation failed elif status == 'error': with open(os.path.basename(fname), 'w') as f: f.write(parent_record.content) error_flag = True error_message = 'parent calculation issued an error' break #Ignore if unknown status else: raise ValueError('unknown status') #Copy parent record to calculation folder if it is now complete except: with open(os.path.basename(fname), 'w') as f: f.write(parent_record.content) log.write( 'parent %s copied to sim folder\n' % parent_sim) #skip if parent calculation failed elif status == 'error': error_flag = True error_message = 'parent calculation issued an error' break except: continue #Handle calculations that have unfinished parents if not ready_flag: bid_files = glob.glob('*.bid') os.chdir(run_directory) for bid_file in bid_files: os.remove(os.path.join(sim, bid_file)) flist = [parent_sim] log.write('parent %s not ready\n\n' % parent_sim) continue #Run the calculation try: assert not error_flag, error_message run = subprocess.Popen(['python', calc_py, calc_in, sim], stderr=subprocess.PIPE) error_message = run.stderr.read() #Check for results.json file try: with open('results.json') as f: model = DM(f) except: error_flag = True assert not error_flag, error_message log.write('sim calculated successfully\n\n') #Catch any errors and add them to results.json except: model = DM(record.content) record_type = model.keys()[0] model[record_type]['status'] = 'error' model[record_type]['error'] = str(sys.exc_info()[1]) with open('results.json', 'w') as f: model.json(fp=f, indent=4) log.write('error: %s\n\n' % model[record_type]['error']) #Update record in the database with open('results.json') as f: model = DM(f) dbase.update_record(content=model.xml(), name=sim) #Archive calculation and add to database dbase.add_tar(root_dir=run_directory, name=sim) #Remove simulation directory os.chdir(run_directory) try: shutil.rmtree(os.path.join(run_directory, sim)) except: pass flist = os.listdir(run_directory) log.flush() os.fsync(log.fileno())
def download_potentials(self, localpath=None, potentials=None, format='xml', indent=None, verbose=False): """ Download all potentials from the remote to the localpath directory. Parameters ---------- localpath : str, optional Path to a local directory where the records are to be copied to. If not given, will check localpath value set during object initialization. potentials : list of Potential, optional A list of potentials to download. If not given, all potentials will be downloaded. format : str, optional The file format to save the file as. Allowed values are 'xml' (default) and 'json'. indent : int, optional The indentation spacing size to use for the locally saved file. If not given, the JSON/XML content will be compact. verbose : bool, optional If True, info messages will be printed during operations. Default value is False. Raises ------ ValueError If no localpath, no potentials, invalid format, or records in a different format already exist in localpath. """ template = 'Potential' # Check localpath values if localpath is None: localpath = self.localpath if localpath is None: raise ValueError('No local path set to save files to') # Check format value format = format.lower() allowed_formats = ['xml', 'json'] if format not in allowed_formats: raise ValueError("Format must be 'xml' or 'json'") # Create save directory if needed save_directory = Path(localpath, template) if not save_directory.is_dir(): save_directory.mkdir(parents=True) for fmt in allowed_formats: if fmt != format: numexisting = len( [fname for fname in save_directory.glob(f'*.{fmt}')]) if numexisting > 0: raise ValueError( f'{numexisting} records of format {fmt} already saved locally' ) # Download and save if potentials is None: records = self.cdcs.query(template=template) for i in range(len(records)): record = records.iloc[i] fname = Path(save_directory, f'{record.title}.{format}') content = DM(record.xml_content) with open(fname, 'w', encoding='UTF-8') as f: if format == 'xml': content.xml(fp=f, indent=indent) else: content.json(fp=f, indent=indent) if verbose: print(f'Downloaded {len(records)} of {template}') # Save loaded content else: for potential in aslist(potentials): potname = potential.id fname = Path(save_directory, f'potential.{potname}.{format}') if format == 'xml': with open(fname, 'w', encoding='UTF-8') as f: potential.asmodel().xml(fp=f, indent=indent) elif format == 'json': with open(fname, 'w', encoding='UTF-8') as f: potential.asmodel().json(fp=f, indent=indent) if verbose: print(f'Downloaded {len(potentials)} of {template}')
def dump(system, fname, prop_info=None, xf='%.13e'): """ Write a LAMMPS-style dump file from a System. Arguments: system -- System to write to the dump file. fname -- name (and location) of file to save data to. Keyword Arguments: prop_info -- DataModelDict for relating the per-atom properties to/from the dump file and the System. Will create a default json instance <fname>.json if prop_info is not given and <fname>.json doesn't already exist. xf -- c-style format for printing the floating point numbers. Default is '%.13e'. """ #create or read prop_info Data Model if prop_info is None: try: with open(fname + '.json') as fj: prop_info = DataModelDict(fj) except: prop_info = __prop_info_default_dump(system) with open(fname + '.json', 'w') as fj: prop_info.json(fp=fj, indent=4) else: if os.path.isfile(prop_info): with open(prop_info) as f: prop_info = f.read() prop_info = DataModelDict(prop_info) #read box_unit if specified in prop_info prop_info = prop_info.find('LAMMPS-dump-atoms_prop-relate') box_unit = prop_info['box_prop'].get('unit', None) #open fname with open(fname, 'w') as f: #write timestep info f.write('ITEM: TIMESTEP\n') try: f.write('%i\n' % system.prop['timestep']) except: f.write('0\n') #write number of atoms f.write('ITEM: NUMBER OF ATOMS\n') f.write('%i\n' % (system.natoms)) #write system boundary info for an orthogonal box if system.box.xy == 0.0 and system.box.xz == 0.0 and system.box.yz == 0.0: f.write('ITEM: BOX BOUNDS') for i in xrange(3): if system.pbc[i]: f.write(' pp') else: f.write(' fm') f.write('\n') f.write('%f %f\n' % (uc.get_in_units(system.box.xlo, box_unit), uc.get_in_units(system.box.xhi, box_unit))) f.write('%f %f\n' % (uc.get_in_units(system.box.ylo, box_unit), uc.get_in_units(system.box.yhi, box_unit))) f.write('%f %f\n' % (uc.get_in_units(system.box.zlo, box_unit), uc.get_in_units(system.box.zhi, box_unit))) #write system boundary info for a triclinic box else: f.write('ITEM: BOX BOUNDS xy xz yz') for i in xrange(3): if system.pbc[i]: f.write(' pp') else: f.write(' fm') f.write('\n') xlo_bound = uc.get_in_units( system.box.xlo, box_unit) + uc.get_in_units( min((0.0, system.box.xy, system.box.xz, system.box.xy + system.box.xz)), box_unit) xhi_bound = uc.get_in_units( system.box.xhi, box_unit) + uc.get_in_units( max((0.0, system.box.xy, system.box.xz, system.box.xy + system.box.xz)), box_unit) ylo_bound = uc.get_in_units( system.box.ylo, box_unit) + uc.get_in_units( min((0.0, system.box.yz)), box_unit) yhi_bound = uc.get_in_units( system.box.yhi, box_unit) + uc.get_in_units( max((0.0, system.box.yz)), box_unit) zlo_bound = uc.get_in_units(system.box.zlo, box_unit) zhi_bound = uc.get_in_units(system.box.zhi, box_unit) f.write('%f %f %f\n' % (xlo_bound, xhi_bound, uc.get_in_units(system.box.xy, box_unit))) f.write('%f %f %f\n' % (ylo_bound, yhi_bound, uc.get_in_units(system.box.xz, box_unit))) f.write('%f %f %f\n' % (zlo_bound, zhi_bound, uc.get_in_units(system.box.yz, box_unit))) #write atomic header info and prepare outarray for writing header = 'ITEM: ATOMS id' print_string = '%i' outarray = np.empty( (system.natoms, len(prop_info['LAMMPS-attribute']))) start = 0 for attr, a_keys in prop_info['LAMMPS-attribute'].iteritems(): #get first prop relation for attr relation = a_keys.aslist('relation')[0] prop = relation.get('prop') index = (Ellipsis, ) + tuple(relation.aslist('index')) unit = relation.get('unit', None) if unit == 'scaled': unit = None scale = True else: scale = False #pass values to outarray outarray[:, start] = uc.get_in_units( system.atoms_prop(key=prop, scale=scale), unit)[index].reshape( (system.natoms)) start += 1 #prepare header and print_string header += ' %s' % attr if am.tools.is_dtype_int(system.atoms.dtype[prop]): print_string += ' %i' else: print_string += ' ' + xf f.write(header + '\n') print_string += '\n' #iterate over all atoms for i in xrange(system.natoms): vals = (i + 1, ) + tuple(outarray[i]) f.write(print_string % vals)