def assimilate(self, old_task): """ Parses vasp runs. Then insert the result into the db. and return the task_id or doc of the insertion. Returns: If in simulate_mode, the entire doc is returned for debugging purposes. Else, only the task_id of the inserted doc is returned. """ path = old_task["dir_name"] # AJ: get dir name from task d = self.get_task_doc(path, self.parse_dos, self.additional_fields) d["dir_name_full"] = d["dir_name"].split(":")[1] d["dir_name"] = get_block_part(d["dir_name_full"]) if not self.simulate: # Perform actual insertion into db. Because db connections cannot # be pickled, every insertion needs to create a new connection # to the db. conn = MongoClient(self.host, self.port) db = conn[self.database] if self.user: db.authenticate(self.user, self.password) coll = db[self.collection] # Insert dos data into gridfs and then remove it from the dict. # DOS data tends to be above the 4Mb limit for mongo docs. A ref # to the dos file is in the dos_fs_id. result = coll.find_one({"dir_name": d["dir_name"]}) if result is None or self.update_duplicates: if self.parse_dos and "calculations" in d: for calc in d["calculations"]: if "dos" in calc: dos = json.dumps(calc["dos"], cls=MontyEncoder) fs = gridfs.GridFS(db, "dos_fs") dosid = fs.put(dos) calc["dos_fs_id"] = dosid del calc["dos"] d["last_updated"] = datetime.datetime.today() if result is None: d["task_id"] = "mp-{}".format(old_task["task_id"]) # AJ: old task_id is new logger.info("Inserting {} with taskid = {}".format(d["dir_name"], d["task_id"])) elif self.update_duplicates: d["task_id"] = result["task_id"] logger.info("Updating {} with taskid = {}".format(d["dir_name"], d["task_id"])) # Fireworks processing self.process_fw(old_task, d) coll.update({"dir_name": d["dir_name"]}, {"$set": d}, upsert=True) return d["task_id"], d else: logger.info("Skipping duplicate {}".format(d["dir_name"])) return result["task_id"], result else: d["task_id"] = 0 logger.info("Simulated insert into database for {} with task_id {}".format(d["dir_name"], d["task_id"])) return 0, d
def run_task(self, fw_spec): # write a file containing the formula and task_type for somewhat # easier file system browsing self._write_formula_file(fw_spec) # TODO: make this better - is there a way to load an environment # variable as the VASP_EXE? if 'nid' in socket.gethostname(): # hopper compute nodes # TODO: can base ncores on FW_submit.script v_exe = shlex.split('aprun -n 48 vasp') gv_exe = shlex.split('aprun -n 48 gvasp') print 'running on HOPPER' elif 'c' in socket.gethostname(): # mendel compute nodes # TODO: can base ncores on FW_submit.script v_exe = shlex.split('mpirun -n 32 vasp') gv_exe = shlex.split('aprun -n 32 gvasp') print 'running on MENDEL' else: raise ValueError('Unrecognized host!') for job in self.jobs: job.vasp_cmd = v_exe job.gamma_vasp_cmd = gv_exe logging.basicConfig(level=logging.DEBUG) c = Custodian(self.handlers, self.jobs, self.max_errors) custodian_out = c.run() all_errors = set() for run in custodian_out: for correction in run['corrections']: all_errors.update(correction['errors']) stored_data = {'error_list': list(all_errors)} update_spec = { 'prev_vasp_dir': get_block_part(os.getcwd()), 'prev_task_type': fw_spec['task_type'], 'mpsnl': fw_spec['mpsnl'], 'snlgroup_id': fw_spec['snlgroup_id'], 'run_tags': fw_spec['run_tags'] } return FWAction(stored_data=stored_data, update_spec=update_spec)
def run_task(self, fw_spec): # write a file containing the formula and task_type for somewhat # easier file system browsing self._write_formula_file(fw_spec) # TODO: make this better - is there a way to load an environment # variable as the VASP_EXE? if 'nid' in socket.gethostname(): # hopper compute nodes # TODO: can base ncores on FW_submit.script v_exe = shlex.split('aprun -n 48 vasp') gv_exe = shlex.split('aprun -n 48 gvasp') print 'running on HOPPER' elif 'c' in socket.gethostname(): # mendel compute nodes # TODO: can base ncores on FW_submit.script v_exe = shlex.split('mpirun -n 32 vasp') gv_exe = shlex.split('aprun -n 32 gvasp') print 'running on MENDEL' else: raise ValueError('Unrecognized host!') for job in self.jobs: job.vasp_cmd = v_exe job.gamma_vasp_cmd = gv_exe logging.basicConfig(level=logging.DEBUG) c = Custodian(self.handlers, self.jobs, self.max_errors) custodian_out = c.run() all_errors = set() for run in custodian_out: for correction in run['corrections']: all_errors.update(correction['errors']) stored_data = {'error_list': list(all_errors)} update_spec = {'prev_vasp_dir': get_block_part(os.getcwd()), 'prev_task_type': fw_spec['task_type'], 'mpsnl': fw_spec['mpsnl'], 'snlgroup_id': fw_spec['snlgroup_id'], 'run_tags': fw_spec['run_tags']} return FWAction(stored_data=stored_data, update_spec=update_spec)
def run_task(self, fw_spec): # get the band structure and nelect from files """ prev_dir = get_loc(fw_spec['prev_vasp_dir']) vasprun_loc = zpath(os.path.join(prev_dir, 'vasprun.xml')) kpoints_loc = zpath(os.path.join(prev_dir, 'KPOINTS')) vr = Vasprun(vasprun_loc) bs = vr.get_band_structure(kpoints_filename=kpoints_loc) """ # get the band structure and nelect from DB block_part = get_block_part(fw_spec['prev_vasp_dir']) db_dir = os.environ['DB_LOC'] assert isinstance(db_dir, object) db_path = os.path.join(db_dir, 'tasks_db.json') with open(db_path) as f: creds = json.load(f) connection = MongoClient(creds['host'], creds['port']) tdb = connection[creds['database']] tdb.authenticate(creds['admin_user'], creds['admin_password']) m_task = tdb.tasks.find_one({"dir_name": block_part}, {"calculations": 1, "task_id": 1}) nelect = m_task['calculations'][0]['input']['parameters']['NELECT'] bs_id = m_task['calculations'][0]['band_structure_fs_id'] print bs_id, type(bs_id) fs = gridfs.GridFS(tdb, 'band_structure_fs') bs_dict = json.loads(fs.get(bs_id).read()) bs_dict['structure'] = m_task['calculations'][0]['output']['crystal'] bs = BandStructure.from_dict(bs_dict) print 'Band Structure found:', bool(bs) print nelect # run Boltztrap runner = BoltztrapRunner(bs, nelect) dir = runner.run(path_dir=os.getcwd()) # put the data in the database bta = BoltztrapAnalyzer.from_files(dir) data = bta.to_dict data.update(get_meta_from_structure(bs._structure)) data['snlgroup_id'] = fw_spec['snlgroup_id'] data['run_tags'] = fw_spec['run_tags'] data['snl'] = fw_spec['mpsnl'] data['dir_name_full'] = dir data['dir_name'] = get_block_part(dir) data['task_id'] = m_task['task_id'] data['hall'] = {} # remove because it is too large and not useful data['hall_doping'] = {} # remove because it is too large and not useful tdb.boltztrap.insert(clean_json(data)) update_spec = {'prev_vasp_dir': fw_spec['prev_vasp_dir'], 'boltztrap_dir': os.getcwd(), 'prev_task_type': fw_spec['task_type'], 'mpsnl': fw_spec['mpsnl'], 'snlgroup_id': fw_spec['snlgroup_id'], 'run_tags': fw_spec['run_tags'], 'parameters': fw_spec.get('parameters')} return FWAction(update_spec=update_spec)
def run_task(self, fw_spec): # import here to prevent import errors in bigger MPCollab # get the band structure and nelect from files """ prev_dir = get_loc(fw_spec['prev_vasp_dir']) vasprun_loc = zpath(os.path.join(prev_dir, 'vasprun.xml')) kpoints_loc = zpath(os.path.join(prev_dir, 'KPOINTS')) vr = Vasprun(vasprun_loc) bs = vr.get_band_structure(kpoints_filename=kpoints_loc) """ filename = get_slug( 'JOB--' + fw_spec['mpsnl'].structure.composition.reduced_formula + '--' + fw_spec['task_type']) with open(filename, 'w+') as f: f.write('') # get the band structure and nelect from DB block_part = get_block_part(fw_spec['prev_vasp_dir']) db_dir = os.environ['DB_LOC'] assert isinstance(db_dir, object) db_path = os.path.join(db_dir, 'tasks_db.json') with open(db_path) as f: creds = json.load(f) connection = MongoClient(creds['host'], creds['port']) tdb = connection[creds['database']] tdb.authenticate(creds['admin_user'], creds['admin_password']) props = {"calculations": 1, "task_id": 1, "state": 1, "pseudo_potential": 1, "run_type": 1, "is_hubbard": 1, "hubbards": 1, "unit_cell_formula": 1} m_task = tdb.tasks.find_one({"dir_name": block_part}, props) if not m_task: time.sleep(60) # only thing to think of is wait for DB insertion(?) m_task = tdb.tasks.find_one({"dir_name": block_part}, props) if not m_task: raise ValueError("Could not find task with dir_name: {}".format(block_part)) if m_task['state'] != 'successful': raise ValueError("Cannot run Boltztrap; parent job unsuccessful") nelect = m_task['calculations'][0]['input']['parameters']['NELECT'] bs_id = m_task['calculations'][0]['band_structure_fs_id'] print bs_id, type(bs_id) fs = gridfs.GridFS(tdb, 'band_structure_fs') bs_dict = json.loads(fs.get(bs_id).read()) bs_dict['structure'] = m_task['calculations'][0]['output']['crystal'] bs = BandStructure.from_dict(bs_dict) print 'Band Structure found:', bool(bs) print nelect # run Boltztrap runner = BoltztrapRunner(bs, nelect) dir = runner.run(path_dir=os.getcwd()) # put the data in the database bta = BoltztrapAnalyzer.from_files(dir) # 8/21/15 - Anubhav removed fs_id (also see line further below, ted['boltztrap_full_fs_id'] ...) # 8/21/15 - this is to save space in MongoDB, as well as non-use of full Boltztrap output (vs rerun) """ data = bta.as_dict() data.update(get_meta_from_structure(bs._structure)) data['snlgroup_id'] = fw_spec['snlgroup_id'] data['run_tags'] = fw_spec['run_tags'] data['snl'] = fw_spec['mpsnl'] data['dir_name_full'] = dir data['dir_name'] = get_block_part(dir) data['task_id'] = m_task['task_id'] del data['hall'] # remove because it is too large and not useful fs = gridfs.GridFS(tdb, "boltztrap_full_fs") btid = fs.put(json.dumps(jsanitize(data))) """ # now for the "sanitized" data ted = bta.as_dict() del ted['seebeck'] del ted['hall'] del ted['kappa'] del ted['cond'] # ted['boltztrap_full_fs_id'] = btid ted['snlgroup_id'] = fw_spec['snlgroup_id'] ted['run_tags'] = fw_spec['run_tags'] ted['snl'] = fw_spec['mpsnl'].as_dict() ted['dir_name_full'] = dir ted['dir_name'] = get_block_part(dir) ted['task_id'] = m_task['task_id'] ted['pf_doping'] = bta.get_power_factor(output='tensor', relaxation_time=self.TAU) ted['zt_doping'] = bta.get_zt(output='tensor', relaxation_time=self.TAU, kl=self.KAPPAL) ted['pf_eigs'] = self.get_eigs(ted, 'pf_doping') ted['pf_best'] = self.get_extreme(ted, 'pf_eigs') ted['pf_best_dope18'] = self.get_extreme(ted, 'pf_eigs', max_didx=3) ted['pf_best_dope19'] = self.get_extreme(ted, 'pf_eigs', max_didx=4) ted['zt_eigs'] = self.get_eigs(ted, 'zt_doping') ted['zt_best'] = self.get_extreme(ted, 'zt_eigs') ted['zt_best_dope18'] = self.get_extreme(ted, 'zt_eigs', max_didx=3) ted['zt_best_dope19'] = self.get_extreme(ted, 'zt_eigs', max_didx=4) ted['seebeck_eigs'] = self.get_eigs(ted, 'seebeck_doping') ted['seebeck_best'] = self.get_extreme(ted, 'seebeck_eigs') ted['seebeck_best_dope18'] = self.get_extreme(ted, 'seebeck_eigs', max_didx=3) ted['seebeck_best_dope19'] = self.get_extreme(ted, 'seebeck_eigs', max_didx=4) ted['cond_eigs'] = self.get_eigs(ted, 'cond_doping') ted['cond_best'] = self.get_extreme(ted, 'cond_eigs') ted['cond_best_dope18'] = self.get_extreme(ted, 'cond_eigs', max_didx=3) ted['cond_best_dope19'] = self.get_extreme(ted, 'cond_eigs', max_didx=4) ted['kappa_eigs'] = self.get_eigs(ted, 'kappa_doping') ted['kappa_best'] = self.get_extreme(ted, 'kappa_eigs', maximize=False) ted['kappa_best_dope18'] = self.get_extreme(ted, 'kappa_eigs', maximize=False, max_didx=3) ted['kappa_best_dope19'] = self.get_extreme(ted, 'kappa_eigs', maximize=False, max_didx=4) try: from mpcollab.thermoelectrics.boltztrap_TE import BoltzSPB bzspb = BoltzSPB(ted) maxpf_p = bzspb.get_maximum_power_factor('p', temperature=0, tau=1E-14, ZT=False, kappal=0.5,\ otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', \ 'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu')) maxpf_n = bzspb.get_maximum_power_factor('n', temperature=0, tau=1E-14, ZT=False, kappal=0.5,\ otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', \ 'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu')) maxzt_p = bzspb.get_maximum_power_factor('p', temperature=0, tau=1E-14, ZT=True, kappal=0.5, otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', \ 'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu')) maxzt_n = bzspb.get_maximum_power_factor('n', temperature=0, tau=1E-14, ZT=True, kappal=0.5, otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', \ 'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu')) ted['zt_best_finemesh'] = {'p': maxzt_p, 'n': maxzt_n} ted['pf_best_finemesh'] = {'p': maxpf_p, 'n': maxpf_n} except: import traceback traceback.print_exc() print 'COULD NOT GET FINE MESH DATA' # add is_compatible mpc = MaterialsProjectCompatibility("Advanced") try: func = m_task["pseudo_potential"]["functional"] labels = m_task["pseudo_potential"]["labels"] symbols = ["{} {}".format(func, label) for label in labels] parameters = {"run_type": m_task["run_type"], "is_hubbard": m_task["is_hubbard"], "hubbards": m_task["hubbards"], "potcar_symbols": symbols} entry = ComputedEntry(Composition(m_task["unit_cell_formula"]), 0.0, 0.0, parameters=parameters, entry_id=m_task["task_id"]) ted["is_compatible"] = bool(mpc.process_entry(entry)) except: traceback.print_exc() print 'ERROR in getting compatibility, task_id: {}'.format(m_task["task_id"]) ted["is_compatible"] = None tdb.boltztrap.insert(jsanitize(ted)) update_spec = {'prev_vasp_dir': fw_spec['prev_vasp_dir'], 'boltztrap_dir': os.getcwd(), 'prev_task_type': fw_spec['task_type'], 'mpsnl': fw_spec['mpsnl'].as_dict(), 'snlgroup_id': fw_spec['snlgroup_id'], 'run_tags': fw_spec['run_tags'], 'parameters': fw_spec.get('parameters')} return FWAction(update_spec=update_spec)
def assimilate(self, old_task): """ Parses vasp runs. Then insert the result into the db. and return the task_id or doc of the insertion. Returns: If in simulate_mode, the entire doc is returned for debugging purposes. Else, only the task_id of the inserted doc is returned. """ path = old_task['dir_name'] # AJ: get dir name from task d = self.get_task_doc(path, self.parse_dos, self.additional_fields) d["dir_name_full"] = d["dir_name"].split(":")[1] d["dir_name"] = get_block_part(d["dir_name_full"]) if not self.simulate: # Perform actual insertion into db. Because db connections cannot # be pickled, every insertion needs to create a new connection # to the db. conn = MongoClient(self.host, self.port) db = conn[self.database] if self.user: db.authenticate(self.user, self.password) coll = db[self.collection] # Insert dos data into gridfs and then remove it from the dict. # DOS data tends to be above the 4Mb limit for mongo docs. A ref # to the dos file is in the dos_fs_id. result = coll.find_one({"dir_name": d["dir_name"]}) if result is None or self.update_duplicates: if self.parse_dos and "calculations" in d: for calc in d["calculations"]: if "dos" in calc: dos = json.dumps(calc["dos"]) fs = gridfs.GridFS(db, "dos_fs") dosid = fs.put(dos) calc["dos_fs_id"] = dosid del calc["dos"] d["last_updated"] = datetime.datetime.today() if result is None: d["task_id"] = "mp-{}".format(old_task['task_id']) # AJ: old task_id is new logger.info("Inserting {} with taskid = {}" .format(d["dir_name"], d["task_id"])) elif self.update_duplicates: d["task_id"] = result["task_id"] logger.info("Updating {} with taskid = {}" .format(d["dir_name"], d["task_id"])) #Fireworks processing self.process_fw(old_task, d) coll.update({"dir_name": d["dir_name"]}, {"$set": d}, upsert=True) return d["task_id"], d else: logger.info("Skipping duplicate {}".format(d["dir_name"])) return result["task_id"], result else: d["task_id"] = 0 logger.info("Simulated insert into database for {} with task_id {}" .format(d["dir_name"], d["task_id"])) return 0, d
def run_task(self, fw_spec): # import here to prevent import errors in bigger MPCollab # get the band structure and nelect from files """ prev_dir = get_loc(fw_spec['prev_vasp_dir']) vasprun_loc = zpath(os.path.join(prev_dir, 'vasprun.xml')) kpoints_loc = zpath(os.path.join(prev_dir, 'KPOINTS')) vr = Vasprun(vasprun_loc) bs = vr.get_band_structure(kpoints_filename=kpoints_loc) """ filename = get_slug( 'JOB--' + fw_spec['mpsnl'].structure.composition.reduced_formula + '--' + fw_spec['task_type']) with open(filename, 'w+') as f: f.write('') # get the band structure and nelect from DB block_part = get_block_part(fw_spec['prev_vasp_dir']) db_dir = os.environ['DB_LOC'] assert isinstance(db_dir, object) db_path = os.path.join(db_dir, 'tasks_db.json') with open(db_path) as f: creds = json.load(f) connection = MongoClient(creds['host'], creds['port']) tdb = connection[creds['database']] tdb.authenticate(creds['admin_user'], creds['admin_password']) props = { "calculations": 1, "task_id": 1, "state": 1, "pseudo_potential": 1, "run_type": 1, "is_hubbard": 1, "hubbards": 1, "unit_cell_formula": 1 } m_task = tdb.tasks.find_one({"dir_name": block_part}, props) if not m_task: time.sleep( 60) # only thing to think of is wait for DB insertion(?) m_task = tdb.tasks.find_one({"dir_name": block_part}, props) if not m_task: raise ValueError( "Could not find task with dir_name: {}".format(block_part)) if m_task['state'] != 'successful': raise ValueError( "Cannot run Boltztrap; parent job unsuccessful") nelect = m_task['calculations'][0]['input']['parameters']['NELECT'] bs_id = m_task['calculations'][0]['band_structure_fs_id'] print bs_id, type(bs_id) fs = gridfs.GridFS(tdb, 'band_structure_fs') bs_dict = json.loads(fs.get(bs_id).read()) bs_dict['structure'] = m_task['calculations'][0]['output'][ 'crystal'] bs = BandStructure.from_dict(bs_dict) print("find previous run with block_part {}".format(block_part)) print 'Band Structure found:', bool(bs) print(bs.as_dict()) print("nelect: {}".format(nelect)) # run Boltztrap doping = [] for d in [1e16, 1e17, 1e18, 1e19, 1e20]: doping.extend([1 * d, 2.5 * d, 5 * d, 7.5 * d]) doping.append(1e21) runner = BoltztrapRunner(bs, nelect, doping=doping) dir = runner.run(path_dir=os.getcwd()) # put the data in the database bta = BoltztrapAnalyzer.from_files(dir) # 8/21/15 - Anubhav removed fs_id (also see line further below, ted['boltztrap_full_fs_id'] ...) # 8/21/15 - this is to save space in MongoDB, as well as non-use of full Boltztrap output (vs rerun) """ data = bta.as_dict() data.update(get_meta_from_structure(bs._structure)) data['snlgroup_id'] = fw_spec['snlgroup_id'] data['run_tags'] = fw_spec['run_tags'] data['snl'] = fw_spec['mpsnl'] data['dir_name_full'] = dir data['dir_name'] = get_block_part(dir) data['task_id'] = m_task['task_id'] del data['hall'] # remove because it is too large and not useful fs = gridfs.GridFS(tdb, "boltztrap_full_fs") btid = fs.put(json.dumps(jsanitize(data))) """ # now for the "sanitized" data ted = bta.as_dict() del ted['seebeck'] del ted['hall'] del ted['kappa'] del ted['cond'] # ted['boltztrap_full_fs_id'] = btid ted['snlgroup_id'] = fw_spec['snlgroup_id'] ted['run_tags'] = fw_spec['run_tags'] ted['snl'] = fw_spec['mpsnl'].as_dict() ted['dir_name_full'] = dir ted['dir_name'] = get_block_part(dir) ted['task_id'] = m_task['task_id'] ted['pf_doping'] = bta.get_power_factor(output='tensor', relaxation_time=self.TAU) ted['zt_doping'] = bta.get_zt(output='tensor', relaxation_time=self.TAU, kl=self.KAPPAL) ted['pf_eigs'] = self.get_eigs(ted, 'pf_doping') ted['pf_best'] = self.get_extreme(ted, 'pf_eigs') ted['pf_best_dope18'] = self.get_extreme(ted, 'pf_eigs', max_didx=3) ted['pf_best_dope19'] = self.get_extreme(ted, 'pf_eigs', max_didx=4) ted['zt_eigs'] = self.get_eigs(ted, 'zt_doping') ted['zt_best'] = self.get_extreme(ted, 'zt_eigs') ted['zt_best_dope18'] = self.get_extreme(ted, 'zt_eigs', max_didx=3) ted['zt_best_dope19'] = self.get_extreme(ted, 'zt_eigs', max_didx=4) ted['seebeck_eigs'] = self.get_eigs(ted, 'seebeck_doping') ted['seebeck_best'] = self.get_extreme(ted, 'seebeck_eigs') ted['seebeck_best_dope18'] = self.get_extreme(ted, 'seebeck_eigs', max_didx=3) ted['seebeck_best_dope19'] = self.get_extreme(ted, 'seebeck_eigs', max_didx=4) ted['cond_eigs'] = self.get_eigs(ted, 'cond_doping') ted['cond_best'] = self.get_extreme(ted, 'cond_eigs') ted['cond_best_dope18'] = self.get_extreme(ted, 'cond_eigs', max_didx=3) ted['cond_best_dope19'] = self.get_extreme(ted, 'cond_eigs', max_didx=4) ted['kappa_eigs'] = self.get_eigs(ted, 'kappa_doping') ted['kappa_best'] = self.get_extreme(ted, 'kappa_eigs', maximize=False) ted['kappa_best_dope18'] = self.get_extreme(ted, 'kappa_eigs', maximize=False, max_didx=3) ted['kappa_best_dope19'] = self.get_extreme(ted, 'kappa_eigs', maximize=False, max_didx=4) try: from mpcollab.thermoelectrics.boltztrap_TE import BoltzSPB bzspb = BoltzSPB(ted) maxpf_p = bzspb.get_maximum_power_factor( 'p', temperature=0, tau=1E-14, ZT=False, kappal=0.5, otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', 'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu')) maxpf_n = bzspb.get_maximum_power_factor( 'n', temperature=0, tau=1E-14, ZT=False, kappal=0.5, otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', 'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu')) maxzt_p = bzspb.get_maximum_power_factor( 'p', temperature=0, tau=1E-14, ZT=True, kappal=0.5, otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', 'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu')) maxzt_n = bzspb.get_maximum_power_factor( 'n', temperature=0, tau=1E-14, ZT=True, kappal=0.5, otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', 'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu')) ted['zt_best_finemesh'] = {'p': maxzt_p, 'n': maxzt_n} ted['pf_best_finemesh'] = {'p': maxpf_p, 'n': maxpf_n} except: import traceback traceback.print_exc() print 'COULD NOT GET FINE MESH DATA' # add is_compatible mpc = MaterialsProjectCompatibility("Advanced") try: func = m_task["pseudo_potential"]["functional"] labels = m_task["pseudo_potential"]["labels"] symbols = ["{} {}".format(func, label) for label in labels] parameters = { "run_type": m_task["run_type"], "is_hubbard": m_task["is_hubbard"], "hubbards": m_task["hubbards"], "potcar_symbols": symbols } entry = ComputedEntry(Composition(m_task["unit_cell_formula"]), 0.0, 0.0, parameters=parameters, entry_id=m_task["task_id"]) ted["is_compatible"] = bool(mpc.process_entry(entry)) except: traceback.print_exc() print 'ERROR in getting compatibility, task_id: {}'.format( m_task["task_id"]) ted["is_compatible"] = None tdb.boltztrap.insert(jsanitize(ted)) update_spec = { 'prev_vasp_dir': fw_spec['prev_vasp_dir'], 'boltztrap_dir': os.getcwd(), 'prev_task_type': fw_spec['task_type'], 'mpsnl': fw_spec['mpsnl'].as_dict(), 'snlgroup_id': fw_spec['snlgroup_id'], 'run_tags': fw_spec['run_tags'], 'parameters': fw_spec.get('parameters') } return FWAction(update_spec=update_spec)
def assimilate(self, path, launches_coll=None): """ Parses vasp runs. Then insert the result into the db. and return the task_id or doc of the insertion. Returns: If in simulate_mode, the entire doc is returned for debugging purposes. Else, only the task_id of the inserted doc is returned. """ d = self.get_task_doc(path) if self.additional_fields: d.update(self.additional_fields) # always add additional fields, even for failed jobs try: d["dir_name_full"] = d["dir_name"].split(":")[1] d["dir_name"] = get_block_part(d["dir_name_full"]) d["stored_data"] = {} except: print 'COULD NOT GET DIR NAME' pprint.pprint(d) print traceback.format_exc() raise ValueError('IMPROPER PARSING OF {}'.format(path)) if not self.simulate: # Perform actual insertion into db. Because db connections cannot # be pickled, every insertion needs to create a new connection # to the db. conn = MongoClient(self.host, self.port) db = conn[self.database] if self.user: db.authenticate(self.user, self.password) coll = db[self.collection] # Insert dos data into gridfs and then remove it from the dict. # DOS data tends to be above the 4Mb limit for mongo docs. A ref # to the dos file is in the dos_fs_id. result = coll.find_one({"dir_name": d["dir_name"]}) if result is None or self.update_duplicates: if self.parse_dos and "calculations" in d: for calc in d["calculations"]: if "dos" in calc: dos = json.dumps(calc["dos"], cls=MontyEncoder) fs = gridfs.GridFS(db, "dos_fs") dosid = fs.put(dos) calc["dos_fs_id"] = dosid del calc["dos"] d["last_updated"] = datetime.datetime.today() if result is None: if ("task_id" not in d) or (not d["task_id"]): d["task_id"] = "mp-{}".format( db.counter.find_one_and_update( {"_id": "taskid"}, {"$inc": {"c": 1}} )["c"]) logger.info("Inserting {} with taskid = {}" .format(d["dir_name"], d["task_id"])) elif self.update_duplicates: d["task_id"] = result["task_id"] logger.info("Updating {} with taskid = {}" .format(d["dir_name"], d["task_id"])) #Fireworks processing self.process_fw(path, d) try: #Add oxide_type struct=Structure.from_dict(d["output"]["crystal"]) d["oxide_type"]=oxide_type(struct) except: logger.error("can't get oxide_type for {}".format(d["task_id"])) d["oxide_type"] = None #Override incorrect outcar subdocs for two step relaxations if "optimize structure" in d['task_type'] and \ os.path.exists(os.path.join(path, "relax2")): try: run_stats = {} for i in [1,2]: o_path = os.path.join(path,"relax"+str(i),"OUTCAR") o_path = o_path if os.path.exists(o_path) else o_path+".gz" outcar = Outcar(o_path) d["calculations"][i-1]["output"]["outcar"] = outcar.as_dict() run_stats["relax"+str(i)] = outcar.run_stats except: logger.error("Bad OUTCAR for {}.".format(path)) try: overall_run_stats = {} for key in ["Total CPU time used (sec)", "User time (sec)", "System time (sec)", "Elapsed time (sec)"]: overall_run_stats[key] = sum([v[key] for v in run_stats.values()]) run_stats["overall"] = overall_run_stats except: logger.error("Bad run stats for {}.".format(path)) d["run_stats"] = run_stats # add is_compatible mpc = MaterialsProjectCompatibility("Advanced") try: func = d["pseudo_potential"]["functional"] labels = d["pseudo_potential"]["labels"] symbols = ["{} {}".format(func, label) for label in labels] parameters = {"run_type": d["run_type"], "is_hubbard": d["is_hubbard"], "hubbards": d["hubbards"], "potcar_symbols": symbols} entry = ComputedEntry(Composition(d["unit_cell_formula"]), 0.0, 0.0, parameters=parameters, entry_id=d["task_id"]) d['is_compatible'] = bool(mpc.process_entry(entry)) except: traceback.print_exc() print 'ERROR in getting compatibility' d['is_compatible'] = None #task_type dependent processing if 'static' in d['task_type']: launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}}, {"action.stored_data": 1}) for i in ["conventional_standard_structure", "symmetry_operations", "symmetry_dataset", "refined_structure"]: try: d['stored_data'][i] = launch_doc['action']['stored_data'][i] except: pass #parse band structure if necessary if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\ and d['state'] == 'successful': launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}}, {"action.stored_data": 1}) vasp_run = Vasprun(zpath(os.path.join(path, "vasprun.xml")), parse_projected_eigen=False) if 'band structure' in d['task_type']: def string_to_numlist(stringlist): g=re.search('([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)', stringlist) return [float(g.group(i)) for i in range(1,4)] for i in ["kpath_name", "kpath"]: d['stored_data'][i] = launch_doc['action']['stored_data'][i] kpoints_doc = d['stored_data']['kpath']['kpoints'] for i in kpoints_doc: kpoints_doc[i]=string_to_numlist(kpoints_doc[i]) bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'], line_mode=True) else: bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'], line_mode=False) bs_json = json.dumps(bs.as_dict(), cls=MontyEncoder) fs = gridfs.GridFS(db, "band_structure_fs") bs_id = fs.put(bs_json) d['calculations'][0]["band_structure_fs_id"] = bs_id # also override band gap in task doc gap = bs.get_band_gap() vbm = bs.get_vbm() cbm = bs.get_cbm() update_doc = {'bandgap': gap['energy'], 'vbm': vbm['energy'], 'cbm': cbm['energy'], 'is_gap_direct': gap['direct']} d['analysis'].update(update_doc) d['calculations'][0]['output'].update(update_doc) coll.update_one({"dir_name": d["dir_name"]}, {'$set': d}, upsert=True) return d["task_id"], d else: logger.info("Skipping duplicate {}".format(d["dir_name"])) return result["task_id"], result else: d["task_id"] = 0 logger.info("Simulated insert into database for {} with task_id {}" .format(d["dir_name"], d["task_id"])) return 0, d
def assimilate(self, path, launches_coll=None): """ Parses vasp runs. Then insert the result into the db. and return the task_id or doc of the insertion. Returns: If in simulate_mode, the entire doc is returned for debugging purposes. Else, only the task_id of the inserted doc is returned. """ d = self.get_task_doc(path) if self.additional_fields: d.update(self.additional_fields ) # always add additional fields, even for failed jobs try: d["dir_name_full"] = d["dir_name"].split(":")[1] d["dir_name"] = get_block_part(d["dir_name_full"]) d["stored_data"] = {} except: print 'COULD NOT GET DIR NAME' pprint.pprint(d) print traceback.format_exc() raise ValueError('IMPROPER PARSING OF {}'.format(path)) if not self.simulate: # Perform actual insertion into db. Because db connections cannot # be pickled, every insertion needs to create a new connection # to the db. conn = MongoClient(self.host, self.port) db = conn[self.database] if self.user: db.authenticate(self.user, self.password) coll = db[self.collection] # Insert dos data into gridfs and then remove it from the dict. # DOS data tends to be above the 4Mb limit for mongo docs. A ref # to the dos file is in the dos_fs_id. result = coll.find_one({"dir_name": d["dir_name"]}) if result is None or self.update_duplicates: if self.parse_dos and "calculations" in d: for calc in d["calculations"]: if "dos" in calc: dos = json.dumps(calc["dos"], cls=MontyEncoder) fs = gridfs.GridFS(db, "dos_fs") dosid = fs.put(dos) calc["dos_fs_id"] = dosid del calc["dos"] d["last_updated"] = datetime.datetime.today() if result is None: if ("task_id" not in d) or (not d["task_id"]): d["task_id"] = "mp-{}".format( db.counter.find_one_and_update({"_id": "taskid"}, {"$inc": { "c": 1 }})["c"]) logger.info("Inserting {} with taskid = {}".format( d["dir_name"], d["task_id"])) elif self.update_duplicates: d["task_id"] = result["task_id"] logger.info("Updating {} with taskid = {}".format( d["dir_name"], d["task_id"])) #Fireworks processing self.process_fw(path, d) try: #Add oxide_type struct = Structure.from_dict(d["output"]["crystal"]) d["oxide_type"] = oxide_type(struct) except: logger.error("can't get oxide_type for {}".format( d["task_id"])) d["oxide_type"] = None #Override incorrect outcar subdocs for two step relaxations if "optimize structure" in d['task_type'] and \ os.path.exists(os.path.join(path, "relax2")): try: run_stats = {} for i in [1, 2]: o_path = os.path.join(path, "relax" + str(i), "OUTCAR") o_path = o_path if os.path.exists( o_path) else o_path + ".gz" outcar = Outcar(o_path) d["calculations"][ i - 1]["output"]["outcar"] = outcar.as_dict() run_stats["relax" + str(i)] = outcar.run_stats except: logger.error("Bad OUTCAR for {}.".format(path)) try: overall_run_stats = {} for key in [ "Total CPU time used (sec)", "User time (sec)", "System time (sec)", "Elapsed time (sec)" ]: overall_run_stats[key] = sum( [v[key] for v in run_stats.values()]) run_stats["overall"] = overall_run_stats except: logger.error("Bad run stats for {}.".format(path)) d["run_stats"] = run_stats # add is_compatible mpc = MaterialsProjectCompatibility("Advanced") try: func = d["pseudo_potential"]["functional"] labels = d["pseudo_potential"]["labels"] symbols = ["{} {}".format(func, label) for label in labels] parameters = { "run_type": d["run_type"], "is_hubbard": d["is_hubbard"], "hubbards": d["hubbards"], "potcar_symbols": symbols } entry = ComputedEntry(Composition(d["unit_cell_formula"]), 0.0, 0.0, parameters=parameters, entry_id=d["task_id"]) d['is_compatible'] = bool(mpc.process_entry(entry)) except: traceback.print_exc() print 'ERROR in getting compatibility' d['is_compatible'] = None #task_type dependent processing if 'static' in d['task_type']: launch_doc = launches_coll.find_one( { "fw_id": d['fw_id'], "launch_dir": { "$regex": d["dir_name"] } }, {"action.stored_data": 1}) for i in [ "conventional_standard_structure", "symmetry_operations", "symmetry_dataset", "refined_structure" ]: try: d['stored_data'][i] = launch_doc['action'][ 'stored_data'][i] except: pass #parse band structure if necessary if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\ and d['state'] == 'successful': launch_doc = launches_coll.find_one( { "fw_id": d['fw_id'], "launch_dir": { "$regex": d["dir_name"] } }, {"action.stored_data": 1}) vasp_run = Vasprun(zpath(os.path.join(path, "vasprun.xml")), parse_projected_eigen=True) if 'band structure' in d['task_type']: def string_to_numlist(stringlist): g = re.search( '([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)', stringlist) return [float(g.group(i)) for i in range(1, 4)] for i in ["kpath_name", "kpath"]: d['stored_data'][i] = launch_doc['action'][ 'stored_data'][i] kpoints_doc = d['stored_data']['kpath']['kpoints'] for i in kpoints_doc: if isinstance(kpoints_doc[i], six.string_types): kpoints_doc[i] = string_to_numlist( kpoints_doc[i]) bs = vasp_run.get_band_structure( efermi=d['calculations'][0]['output']['outcar'] ['efermi'], line_mode=True) else: bs = vasp_run.get_band_structure( efermi=d['calculations'][0]['output']['outcar'] ['efermi'], line_mode=False) bs_json = json.dumps(bs.as_dict(), cls=MontyEncoder) fs = gridfs.GridFS(db, "band_structure_fs") bs_id = fs.put(bs_json) d['calculations'][0]["band_structure_fs_id"] = bs_id # also override band gap in task doc gap = bs.get_band_gap() vbm = bs.get_vbm() cbm = bs.get_cbm() update_doc = { 'bandgap': gap['energy'], 'vbm': vbm['energy'], 'cbm': cbm['energy'], 'is_gap_direct': gap['direct'] } d['analysis'].update(update_doc) d['calculations'][0]['output'].update(update_doc) coll.update_one({"dir_name": d["dir_name"]}, {'$set': d}, upsert=True) return d["task_id"], d else: logger.info("Skipping duplicate {}".format(d["dir_name"])) return result["task_id"], result else: d["task_id"] = 0 logger.info( "Simulated insert into database for {} with task_id {}".format( d["dir_name"], d["task_id"])) return 0, d
def run_task(self, fw_spec): # get the band structure and nelect from files """ prev_dir = get_loc(fw_spec['prev_vasp_dir']) vasprun_loc = zpath(os.path.join(prev_dir, 'vasprun.xml')) kpoints_loc = zpath(os.path.join(prev_dir, 'KPOINTS')) vr = Vasprun(vasprun_loc) bs = vr.get_band_structure(kpoints_filename=kpoints_loc) """ # get the band structure and nelect from DB block_part = get_block_part(fw_spec['prev_vasp_dir']) db_dir = os.environ['DB_LOC'] assert isinstance(db_dir, object) db_path = os.path.join(db_dir, 'tasks_db.json') with open(db_path) as f: creds = json.load(f) connection = MongoClient(creds['host'], creds['port']) tdb = connection[creds['database']] tdb.authenticate(creds['admin_user'], creds['admin_password']) m_task = tdb.tasks.find_one({"dir_name": block_part}, { "calculations": 1, "task_id": 1 }) nelect = m_task['calculations'][0]['input']['parameters']['NELECT'] bs_id = m_task['calculations'][0]['band_structure_fs_id'] print bs_id, type(bs_id) fs = gridfs.GridFS(tdb, 'band_structure_fs') bs_dict = json.loads(fs.get(bs_id).read()) bs_dict['structure'] = m_task['calculations'][0]['output'][ 'crystal'] bs = BandStructure.from_dict(bs_dict) print 'Band Structure found:', bool(bs) print nelect # run Boltztrap runner = BoltztrapRunner(bs, nelect) dir = runner.run(path_dir=os.getcwd()) # put the data in the database bta = BoltztrapAnalyzer.from_files(dir) data = bta.to_dict data.update(get_meta_from_structure(bs._structure)) data['snlgroup_id'] = fw_spec['snlgroup_id'] data['run_tags'] = fw_spec['run_tags'] data['snl'] = fw_spec['mpsnl'] data['dir_name_full'] = dir data['dir_name'] = get_block_part(dir) data['task_id'] = m_task['task_id'] data['hall'] = {} # remove because it is too large and not useful data['hall_doping'] = { } # remove because it is too large and not useful tdb.boltztrap.insert(clean_json(data)) update_spec = { 'prev_vasp_dir': fw_spec['prev_vasp_dir'], 'boltztrap_dir': os.getcwd(), 'prev_task_type': fw_spec['task_type'], 'mpsnl': fw_spec['mpsnl'], 'snlgroup_id': fw_spec['snlgroup_id'], 'run_tags': fw_spec['run_tags'], 'parameters': fw_spec.get('parameters') } return FWAction(update_spec=update_spec)
def assimilate(self, path, launches_coll=None): """ Parses vasp runs. Then insert the result into the db. and return the task_id or doc of the insertion. Returns: If in simulate_mode, the entire doc is returned for debugging purposes. Else, only the task_id of the inserted doc is returned. """ d = self.get_task_doc(path, self.parse_dos, self.additional_fields) try: d["dir_name_full"] = d["dir_name"].split(":")[1] d["dir_name"] = get_block_part(d["dir_name_full"]) d["stored_data"] = {} except: print 'COULD NOT GET DIR NAME' pprint.pprint(d) print traceback.format_exc() raise ValueError('IMPROPER PARSING OF {}'.format(path)) if not self.simulate: # Perform actual insertion into db. Because db connections cannot # be pickled, every insertion needs to create a new connection # to the db. conn = MongoClient(self.host, self.port) db = conn[self.database] if self.user: db.authenticate(self.user, self.password) coll = db[self.collection] # Insert dos data into gridfs and then remove it from the dict. # DOS data tends to be above the 4Mb limit for mongo docs. A ref # to the dos file is in the dos_fs_id. result = coll.find_one({"dir_name": d["dir_name"]}) if result is None or self.update_duplicates: if self.parse_dos and "calculations" in d: for calc in d["calculations"]: if "dos" in calc: dos = json.dumps(calc["dos"]) fs = gridfs.GridFS(db, "dos_fs") dosid = fs.put(dos) calc["dos_fs_id"] = dosid del calc["dos"] d["last_updated"] = datetime.datetime.today() if result is None: if ("task_id" not in d) or (not d["task_id"]): d["task_id"] = "mp-{}".format( db.counter.find_and_modify( query={"_id": "taskid"}, update={"$inc": {"c": 1}})["c"]) logger.info("Inserting {} with taskid = {}" .format(d["dir_name"], d["task_id"])) elif self.update_duplicates: d["task_id"] = result["task_id"] logger.info("Updating {} with taskid = {}" .format(d["dir_name"], d["task_id"])) #Fireworks processing self.process_fw(path, d) #Override incorrect outcar subdocs for two step relaxations if "optimize structure" in d['task_type'] and \ os.path.exists(os.path.join(path, "relax2")): try: run_stats = {} for i in [1,2]: outcar = Outcar(os.path.join(path,"relax"+str(i),"OUTCAR")) d["calculations"][i-1]["output"]["outcar"] = outcar.to_dict run_stats["relax"+str(i)] = outcar.run_stats except: logger.error("Bad OUTCAR for {}.".format(path)) try: overall_run_stats = {} for key in ["Total CPU time used (sec)", "User time (sec)", "System time (sec)", "Elapsed time (sec)"]: overall_run_stats[key] = sum([v[key] for v in run_stats.values()]) run_stats["overall"] = overall_run_stats except: logger.error("Bad run stats for {}.".format(path)) d["run_stats"] = run_stats #task_type dependent processing if 'static' in d['task_type']: launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}}, {"action.stored_data": 1}) for i in ["conventional_standard_structure", "symmetry_operations", "symmetry_dataset", "refined_structure"]: try: d['stored_data'][i] = launch_doc['action']['stored_data'][i] except: pass #parse band structure if necessary if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\ and d['state'] == 'successful': launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}}, {"action.stored_data": 1}) vasp_run = Vasprun(os.path.join(path, "vasprun.xml"), parse_projected_eigen=False) if 'band structure' in d['task_type']: def string_to_numlist(stringlist): g=re.search('([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)', stringlist) return [float(g.group(i)) for i in range(1,4)] for i in ["kpath_name", "kpath"]: d['stored_data'][i] = launch_doc['action']['stored_data'][i] kpoints_doc = d['stored_data']['kpath']['kpoints'] for i in kpoints_doc: kpoints_doc[i]=string_to_numlist(kpoints_doc[i]) bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'], line_mode=True) else: bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'], line_mode=False) bs_json = json.dumps(bs.to_dict) fs = gridfs.GridFS(db, "band_structure_fs") bs_id = fs.put(bs_json) d['calculations'][0]["band_structure_fs_id"] = bs_id coll.update({"dir_name": d["dir_name"]}, d, upsert=True) return d["task_id"], d else: logger.info("Skipping duplicate {}".format(d["dir_name"])) return result["task_id"], result else: d["task_id"] = 0 logger.info("Simulated insert into database for {} with task_id {}" .format(d["dir_name"], d["task_id"])) return 0, d
def run_task(self, fw_spec): if '_fizzled_parents' in fw_spec and not 'prev_vasp_dir' in fw_spec: prev_dir = get_loc(fw_spec['_fizzled_parents'][0]['launches'][0]['launch_dir']) update_spec = {} fizzled_parent = True parse_dos = False else: prev_dir = get_loc(fw_spec['prev_vasp_dir']) update_spec = {'prev_vasp_dir': get_block_part(prev_dir), 'prev_task_type': fw_spec['prev_task_type'], 'run_tags': fw_spec['run_tags']} self.additional_fields['run_tags'] = fw_spec['run_tags'] fizzled_parent = False parse_dos = 'Uniform' in fw_spec['prev_task_type'] if MOVE_TO_GARDEN_DEV: prev_dir = move_to_garden(prev_dir, prod=False) elif MOVE_TO_GARDEN_PROD: prev_dir = move_to_garden(prev_dir, prod=True) # get the directory containing the db file db_dir = os.environ['DB_LOC'] db_path = os.path.join(db_dir, 'tasks_db.json') logging.basicConfig(level=logging.INFO) logger = logging.getLogger('MPVaspDrone') logger.setLevel(logging.INFO) sh = logging.StreamHandler(stream=sys.stdout) sh.setLevel(getattr(logging, 'INFO')) logger.addHandler(sh) with open(db_path) as f: db_creds = json.load(f) drone = MPVaspDrone( host=db_creds['host'], port=db_creds['port'], database=db_creds['database'], user=db_creds['admin_user'], password=db_creds['admin_password'], collection=db_creds['collection'], parse_dos=parse_dos, additional_fields=self.additional_fields, update_duplicates=self.update_duplicates) t_id, d = drone.assimilate(prev_dir, launches_coll=LaunchPad.auto_load().launches) mpsnl = d['snl_final'] if 'snl_final' in d else d['snl'] snlgroup_id = d['snlgroup_id_final'] if 'snlgroup_id_final' in d else d['snlgroup_id'] update_spec.update({'mpsnl': mpsnl, 'snlgroup_id': snlgroup_id}) print 'ENTERED task id:', t_id stored_data = {'task_id': t_id} if d['state'] == 'successful': update_spec['analysis'] = d['analysis'] update_spec['output'] = d['output'] return FWAction(stored_data=stored_data, update_spec=update_spec) # not successful - first test to see if UnconvergedHandler is needed if not fizzled_parent: unconverged_tag = 'unconverged_handler--{}'.format(fw_spec['prev_task_type']) output_dir = last_relax(os.path.join(prev_dir, 'vasprun.xml')) ueh = UnconvergedErrorHandler(output_filename=output_dir) if ueh.check() and unconverged_tag not in fw_spec['run_tags']: print 'Unconverged run! Creating dynamic FW...' spec = {'prev_vasp_dir': get_block_part(prev_dir), 'prev_task_type': fw_spec['task_type'], 'mpsnl': mpsnl, 'snlgroup_id': snlgroup_id, 'task_type': fw_spec['prev_task_type'], 'run_tags': list(fw_spec['run_tags']), '_dupefinder': DupeFinderVasp().to_dict(), '_priority': fw_spec['_priority']} snl = StructureNL.from_dict(spec['mpsnl']) spec['run_tags'].append(unconverged_tag) spec['_queueadapter'] = QA_VASP fws = [] connections = {} f = Composition.from_formula( snl.structure.composition.reduced_formula).alphabetical_formula fws.append(FireWork( [VaspCopyTask({'files': ['INCAR', 'KPOINTS', 'POSCAR', 'POTCAR', 'CONTCAR'], 'use_CONTCAR': False}), SetupUnconvergedHandlerTask(), get_custodian_task(spec)], spec, name=get_slug(f + '--' + spec['task_type']), fw_id=-2)) spec = {'task_type': 'VASP db insertion', '_allow_fizzled_parents': True, '_priority': fw_spec['_priority'], '_queueadapter': QA_DB, 'run_tags': list(fw_spec['run_tags'])} spec['run_tags'].append(unconverged_tag) fws.append( FireWork([VaspToDBTask()], spec, name=get_slug(f + '--' + spec['task_type']), fw_id=-1)) connections[-2] = -1 wf = Workflow(fws, connections) return FWAction(detours=wf) # not successful and not due to convergence problem - FIZZLE raise ValueError("DB insertion successful, but don't know how to fix this FireWork! Can't continue with workflow...")
def detect(): for d in glob.glob(os.path.join(SCRATCH_PATH, 'block*/launch*')): block_part = get_block_part(d) garden_dir = os.path.join(GARDEN_PATH, block_part) if os.path.exists(garden_dir): print garden_dir
def run_task(self, fw_spec): if '_fizzled_parents' in fw_spec and not 'prev_vasp_dir' in fw_spec: prev_dir = get_loc( fw_spec['_fizzled_parents'][0]['launches'][0]['launch_dir']) update_spec = {} fizzled_parent = True parse_dos = False else: prev_dir = get_loc(fw_spec['prev_vasp_dir']) update_spec = { 'prev_vasp_dir': get_block_part(prev_dir), 'prev_task_type': fw_spec['prev_task_type'], 'run_tags': fw_spec['run_tags'] } self.additional_fields['run_tags'] = fw_spec['run_tags'] fizzled_parent = False parse_dos = 'Uniform' in fw_spec['prev_task_type'] if MOVE_TO_GARDEN_DEV: prev_dir = move_to_garden(prev_dir, prod=False) elif MOVE_TO_GARDEN_PROD: prev_dir = move_to_garden(prev_dir, prod=True) # get the directory containing the db file db_dir = os.environ['DB_LOC'] db_path = os.path.join(db_dir, 'tasks_db.json') logging.basicConfig(level=logging.INFO) logger = logging.getLogger('MPVaspDrone') logger.setLevel(logging.INFO) sh = logging.StreamHandler(stream=sys.stdout) sh.setLevel(getattr(logging, 'INFO')) logger.addHandler(sh) with open(db_path) as f: db_creds = json.load(f) drone = MPVaspDrone(host=db_creds['host'], port=db_creds['port'], database=db_creds['database'], user=db_creds['admin_user'], password=db_creds['admin_password'], collection=db_creds['collection'], parse_dos=parse_dos, additional_fields=self.additional_fields, update_duplicates=self.update_duplicates) t_id, d = drone.assimilate( prev_dir, launches_coll=LaunchPad.auto_load().launches) mpsnl = d['snl_final'] if 'snl_final' in d else d['snl'] snlgroup_id = d['snlgroup_id_final'] if 'snlgroup_id_final' in d else d[ 'snlgroup_id'] update_spec.update({'mpsnl': mpsnl, 'snlgroup_id': snlgroup_id}) print 'ENTERED task id:', t_id stored_data = {'task_id': t_id} if d['state'] == 'successful': update_spec['analysis'] = d['analysis'] update_spec['output'] = d['output'] return FWAction(stored_data=stored_data, update_spec=update_spec) # not successful - first test to see if UnconvergedHandler is needed if not fizzled_parent: unconverged_tag = 'unconverged_handler--{}'.format( fw_spec['prev_task_type']) output_dir = last_relax(os.path.join(prev_dir, 'vasprun.xml')) ueh = UnconvergedErrorHandler(output_filename=output_dir) if ueh.check() and unconverged_tag not in fw_spec['run_tags']: print 'Unconverged run! Creating dynamic FW...' spec = { 'prev_vasp_dir': get_block_part(prev_dir), 'prev_task_type': fw_spec['task_type'], 'mpsnl': mpsnl, 'snlgroup_id': snlgroup_id, 'task_type': fw_spec['prev_task_type'], 'run_tags': list(fw_spec['run_tags']), '_dupefinder': DupeFinderVasp().to_dict(), '_priority': fw_spec['_priority'] } snl = StructureNL.from_dict(spec['mpsnl']) spec['run_tags'].append(unconverged_tag) spec['_queueadapter'] = QA_VASP fws = [] connections = {} f = Composition.from_formula( snl.structure.composition.reduced_formula ).alphabetical_formula fws.append( FireWork([ VaspCopyTask({ 'files': [ 'INCAR', 'KPOINTS', 'POSCAR', 'POTCAR', 'CONTCAR' ], 'use_CONTCAR': False }), SetupUnconvergedHandlerTask(), get_custodian_task(spec) ], spec, name=get_slug(f + '--' + spec['task_type']), fw_id=-2)) spec = { 'task_type': 'VASP db insertion', '_allow_fizzled_parents': True, '_priority': fw_spec['_priority'], '_queueadapter': QA_DB, 'run_tags': list(fw_spec['run_tags']) } spec['run_tags'].append(unconverged_tag) fws.append( FireWork([VaspToDBTask()], spec, name=get_slug(f + '--' + spec['task_type']), fw_id=-1)) connections[-2] = -1 wf = Workflow(fws, connections) return FWAction(detours=wf) # not successful and not due to convergence problem - FIZZLE raise ValueError( "DB insertion successful, but don't know how to fix this FireWork! Can't continue with workflow..." )