def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinit.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = 0 if self.contact_resource_manager: # and flow.TaskManager.qadapter.QTYPE == "shell": # This call is expensive and therefore it's optional (must be activate in manager.yml) nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 if flow.manager.has_queue: logger.warning('Cannot get njobs_inqueue') else: # Here we just count the number of tasks in the flow who are running. # This logic breaks down if there are multiple schedulers runnig # but it's easy to implement without having to contact the resource manager. nqjobs = (len(list(flow.iflat_tasks(status=flow.S_RUN))) + len(list(flow.iflat_tasks(status=flow.S_SUB)))) if nqjobs >= self.max_njobs_inqueue: print("Too many jobs in the queue: %s. No job will be submitted." % nqjobs) flow.check_status(show=False) return if self.max_nlaunches == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches) # check status. flow.check_status(show=False) # This check is not perfect, we should make a list of tasks to sumbit # and select only the subset so that we don't exceeed mac_ncores_used # Many sections of this code should be rewritten. #if self.max_ncores_used is not None and flow.ncores_used > self.max_ncores_used: if self.max_ncores_used is not None and flow.ncores_allocated > self.max_ncores_used: print("Cannot exceed max_ncores_used %s" % self.max_ncores_used) return # Try to restart the unconverged tasks # TODO: do not fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: logger.info("Restart: too many jobs in the queue, returning") flow.pickle_dump() return except task.RestartError: excs.append(straceback()) # Temporarily disable by MG because I don't know if fix_critical works after the # introduction of the new qadapters # reenabled by MsS disable things that do not work at low level # fix only prepares for restarting, and sets to ready if self.fix_qcritical: nfixed = flow.fix_queue_critical() if nfixed: print("Fixed %d QCritical error(s)" % nfixed) nfixed = flow.fix_abicritical() if nfixed: print("Fixed %d AbiCritical error(s)" % nfixed) # update database flow.pickle_dump() # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: cprint("[%s] Number of launches: %d" % (time.asctime(), nlaunch), "yellow") except Exception: excs.append(straceback()) # check status. flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs)
def create(self): """ create single abinit G0W0 flow """ # manager = 'slurm' if 'ceci' in self.spec['mode'] else 'shell' # an AbiStructure object has an overwritten version of get_sorted_structure that sorts according to Z # this could also be pulled into the constructor of Abistructure # abi_structure = self.structure.get_sorted_structure() from abipy import abilab item = copy.copy(self.structure.item) self.structure.__class__ = abilab.Structure self.structure = self.structure.get_sorted_structure_z() self.structure.item = item abi_structure = self.structure manager = TaskManager.from_user_config() # Initialize the flow. flow = Flow(self.work_dir, manager, pickle_protocol=0) # flow = Flow(self.work_dir, manager) # kpoint grid defined over density 40 > ~ 3 3 3 if self.spec['converge'] and not self.all_converged: # (2x2x2) gamma centered mesh for the convergence test on nbands and ecuteps # if kp_in is present in the specs a kp_in X kp_in x kp_in mesh is used for the convergence study if 'kp_in' in self.spec.data.keys(): if self.spec['kp_in'] > 9: print('WARNING:\nkp_in should be < 13 to generate an n x n x n mesh\nfor larger values a grid with ' 'density kp_in will be generated') kppa = self.spec['kp_in'] else: kppa = 2 else: # use the specified density for the final calculation with the converged nbands and ecuteps of other # stand alone calculations kppa = self.spec['kp_grid_dens'] gamma = True # 'standard' parameters for stand alone calculation scf_nband = self.get_bands(self.structure) + 20 # additional bands to accommodate for nbdbuf and a bit extra nscf_nband = [10 * self.get_bands(self.structure)] nksmall = None ecuteps = [8] extra_abivars = dict() # read user defined extra abivars from file 'extra_abivars' should be dictionary extra_abivars.update(read_extra_abivars()) # self.bands_fac = 0.5 if 'gwcomp' in extra_abivars.keys() else 1 # self.convs['nscf_nbands']['test_range'] = # tuple([self.bands_fac*x for x in self.convs['nscf_nbands']['test_range']]) ecut = extra_abivars.pop('ecut', 44) ecutsigx = extra_abivars.pop('ecutsigx', 44) if ecutsigx > ecut: raise RuntimeError('ecutsigx can not be largen than ecut') if ecutsigx < max(ecuteps): raise RuntimeError('ecutsigx < ecuteps this is not realistic') response_models = ['godby'] if 'ppmodel' in extra_abivars.keys(): response_models = [extra_abivars.pop('ppmodel')] if self.option is not None: for k in self.option.keys(): if k == 'ecut': ecut = self.option[k] if k in ['ecuteps', 'nscf_nbands']: pass else: extra_abivars.update({k: self.option[k]}) try: grid = read_grid_from_file(s_name(self.structure)+".full_res")['grid'] all_done = read_grid_from_file(s_name(self.structure)+".full_res")['all_done'] workdir = os.path.join(s_name(self.structure), 'w'+str(grid)) except (IOError, OSError): grid = 0 all_done = False workdir = None if not all_done: if (self.spec['test'] or self.spec['converge']) and not self.all_converged: if self.spec['test']: print('| setting test calculation') tests = SingleAbinitGWWork(self.structure, self.spec).tests response_models = [] else: if grid == 0: print('| setting convergence calculations for grid 0') # tests = SingleAbinitGWWorkFlow(self.structure, self.spec).convs tests = self.convs else: print('| extending grid') # tests = expand(SingleAbinitGWWorkFlow(self.structure, self.spec).convs, grid) tests = expand(self.convs, grid) ecuteps = [] nscf_nband = [] for test in tests: if tests[test]['level'] == 'scf': if self.option is None: extra_abivars.update({test + '_s': tests[test]['test_range']}) elif test in self.option: extra_abivars.update({test: self.option[test]}) else: extra_abivars.update({test + '_s': tests[test]['test_range']}) else: for value in tests[test]['test_range']: if test == 'nscf_nbands': nscf_nband.append(value * self.get_bands(self.structure)) # scr_nband takes nscf_nbands if not specified # sigma_nband takes scr_nbands if not specified if test == 'ecuteps': ecuteps.append(value) if test == 'response_model': response_models.append(value) elif self.all_converged: print('| setting up for testing the converged values at the high kp grid ') # add a bandstructure and dos calculation if os.path.isfile('bands'): nksmall = -30 # negative value > only bandstructure else: nksmall = 30 # in this case a convergence study has already been performed. # The resulting parameters are passed as option ecuteps = [self.option['ecuteps'], self.option['ecuteps'] + self.convs['ecuteps']['test_range'][1] - self.convs['ecuteps']['test_range'][0]] nscf_nband = [self.option['nscf_nbands'], self.option['nscf_nbands'] + self.convs['nscf_nbands'][ 'test_range'][1] - self.convs['nscf_nbands']['test_range'][0]] # for option in self.option: # if option not in ['ecuteps', 'nscf_nband']: # extra_abivars.update({option + '_s': self.option[option]}) else: print('| all is done for this material') return logger.info('ecuteps : %s ' % str(ecuteps)) logger.info('extra : %s ' % str(extra_abivars)) logger.info('nscf_nb : %s ' % str(nscf_nband)) inputs = g0w0_convergence_inputs(abi_structure, self.pseudo_table, kppa, nscf_nband, ecuteps, ecutsigx, scf_nband, ecut, accuracy="normal", spin_mode="unpolarized", smearing=None, response_models=response_models, charge=0.0, sigma_nband=None, scr_nband=None, gamma=gamma, nksmall=nksmall, extra_abivars=extra_abivars) work = G0W0Work(scf_inputs=inputs[0], nscf_inputs=inputs[1], scr_inputs=inputs[2], sigma_inputs=inputs[3]) # work = g0w0_extended_work(abi_structure, self.pseudo_table, kppa, nscf_nband, ecuteps, ecutsigx, scf_nband, # accuracy="normal", spin_mode="unpolarized", smearing=None, response_models=response_models, # charge=0.0, sigma_nband=None, scr_nband=None, gamma=gamma, nksmall=nksmall, **extra_abivars) print(workdir) flow.register_work(work, workdir=workdir) return flow.allocate()
def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinit.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = 0 if self.contact_resource_manager: # This call is expensive and therefore it's optional nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 if flow.manager.has_queue: logger.warning('Cannot get njobs_inqueue') if nqjobs >= self.max_njobs_inqueue: print("Too many jobs in the queue: %s, returning" % nqjobs) return if self.max_nlaunches == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches) # check status. flow.check_status(show=False) # This check is not perfect, we should make a list of tasks to sumbit # and select only the subset so that we don't exceeed mac_ncores_used # Many sections of this code should be rewritten. #if self.max_ncores_used is not None and flow.ncores_used > self.max_ncores_used: if self.max_ncores_used is not None and flow.ncores_allocated > self.max_ncores_used: print("Cannot exceed max_ncores_use:d %s" % self.max_ncores_used) return # Try to restart the unconverged tasks # TODO: do not fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: logger.info( "Restart: too many jobs in the queue, returning") flow.pickle_dump() return except task.RestartError: excs.append(straceback()) # Temporarily disable by MG because I don't know if fix_critical works after the # introduction of the new qadapters # reenabled by MsS disable things that do not work at low level # fix only prepares for restarting, and sets to ready if self.fix_qcritical: nfixed = flow.fix_queue_critical() if nfixed: print("Fixed %d QCritical error(s)" % nfixed) nfixed = flow.fix_abicritical() if nfixed: print("Fixed %d AbiCritical error(s)" % nfixed) # update database flow.pickle_dump() # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: print("[%s] Number of launches: %d" % (time.asctime(), nlaunch)) except Exception: excs.append(straceback()) # check status. flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs)
def create(self): """ create single abinit G0W0 flow """ # manager = 'slurm' if 'ceci' in self.spec['mode'] else 'shell' # an AbiStructure object has an overwritten version of get_sorted_structure that sorts according to Z # this could also be pulled into the constructor of Abistructure # abi_structure = self.structure.get_sorted_structure() from abipy import abilab item = copy.copy(self.structure.item) self.structure.__class__ = abilab.Structure self.structure = self.structure.get_sorted_structure_z() self.structure.item = item abi_structure = self.structure manager = TaskManager.from_user_config() # Initialize the flow. flow = Flow(self.work_dir, manager, pickle_protocol=0) # flow = Flow(self.work_dir, manager) # kpoint grid defined over density 40 > ~ 3 3 3 if self.spec['converge'] and not self.all_converged: # (2x2x2) gamma centered mesh for the convergence test on nbands and ecuteps # if kp_in is present in the specs a kp_in X kp_in x kp_in mesh is used for the convergence study if 'kp_in' in self.spec.data.keys(): if self.spec['kp_in'] > 9: print( 'WARNING:\nkp_in should be < 13 to generate an n x n x n mesh\nfor larger values a grid with ' 'density kp_in will be generated') kppa = self.spec['kp_in'] else: kppa = 2 else: # use the specified density for the final calculation with the converged nbands and ecuteps of other # stand alone calculations kppa = self.spec['kp_grid_dens'] gamma = True # 'standard' parameters for stand alone calculation scf_nband = self.get_bands(self.structure) + 20 # additional bands to accommodate for nbdbuf and a bit extra nscf_nband = [10 * self.get_bands(self.structure)] nksmall = None ecuteps = [8] extra_abivars = dict() # read user defined extra abivars from file 'extra_abivars' should be dictionary extra_abivars.update(read_extra_abivars()) # self.bands_fac = 0.5 if 'gwcomp' in extra_abivars.keys() else 1 # self.convs['nscf_nbands']['test_range'] = # tuple([self.bands_fac*x for x in self.convs['nscf_nbands']['test_range']]) ecut = extra_abivars.pop('ecut', 44) ecutsigx = extra_abivars.pop('ecutsigx', 44) if ecutsigx > ecut: raise RuntimeError('ecutsigx can not be largen than ecut') if ecutsigx < max(ecuteps): raise RuntimeError('ecutsigx < ecuteps this is not realistic') response_models = ['godby'] if 'ppmodel' in extra_abivars.keys(): response_models = [extra_abivars.pop('ppmodel')] if self.option is not None: for k in self.option.keys(): if k == 'ecut': ecut = self.option[k] if k in ['ecuteps', 'nscf_nbands']: pass else: extra_abivars.update({k: self.option[k]}) try: grid = read_grid_from_file(s_name(self.structure) + ".full_res")['grid'] all_done = read_grid_from_file( s_name(self.structure) + ".full_res")['all_done'] workdir = os.path.join(s_name(self.structure), 'w' + str(grid)) except (IOError, OSError): grid = 0 all_done = False workdir = None if not all_done: if (self.spec['test'] or self.spec['converge']) and not self.all_converged: if self.spec['test']: print('| setting test calculation') tests = SingleAbinitGWWork(self.structure, self.spec).tests response_models = [] else: if grid == 0: print('| setting convergence calculations for grid 0') # tests = SingleAbinitGWWorkFlow(self.structure, self.spec).convs tests = self.convs else: print('| extending grid') # tests = expand(SingleAbinitGWWorkFlow(self.structure, self.spec).convs, grid) tests = expand(self.convs, grid) ecuteps = [] nscf_nband = [] for test in tests: if tests[test]['level'] == 'scf': if self.option is None: extra_abivars.update( {test + '_s': tests[test]['test_range']}) elif test in self.option: extra_abivars.update({test: self.option[test]}) else: extra_abivars.update( {test + '_s': tests[test]['test_range']}) else: for value in tests[test]['test_range']: if test == 'nscf_nbands': nscf_nband.append( value * self.get_bands(self.structure)) # scr_nband takes nscf_nbands if not specified # sigma_nband takes scr_nbands if not specified if test == 'ecuteps': ecuteps.append(value) if test == 'response_model': response_models.append(value) elif self.all_converged: print( '| setting up for testing the converged values at the high kp grid ' ) # add a bandstructure and dos calculation if os.path.isfile('bands'): nksmall = -30 # negative value > only bandstructure else: nksmall = 30 # in this case a convergence study has already been performed. # The resulting parameters are passed as option ecuteps = [ self.option['ecuteps'], self.option['ecuteps'] + self.convs['ecuteps']['test_range'][1] - self.convs['ecuteps']['test_range'][0] ] nscf_nband = [ self.option['nscf_nbands'], self.option['nscf_nbands'] + self.convs['nscf_nbands']['test_range'][1] - self.convs['nscf_nbands']['test_range'][0] ] # for option in self.option: # if option not in ['ecuteps', 'nscf_nband']: # extra_abivars.update({option + '_s': self.option[option]}) else: print('| all is done for this material') return logger.info('ecuteps : %s ' % str(ecuteps)) logger.info('extra : %s ' % str(extra_abivars)) logger.info('nscf_nb : %s ' % str(nscf_nband)) inputs = g0w0_convergence_inputs(abi_structure, self.pseudo_table, kppa, nscf_nband, ecuteps, ecutsigx, scf_nband, ecut, accuracy="normal", spin_mode="unpolarized", smearing=None, response_models=response_models, charge=0.0, sigma_nband=None, scr_nband=None, gamma=gamma, nksmall=nksmall, extra_abivars=extra_abivars) work = G0W0Work(scf_inputs=inputs[0], nscf_inputs=inputs[1], scr_inputs=inputs[2], sigma_inputs=inputs[3]) # work = g0w0_extended_work(abi_structure, self.pseudo_table, kppa, nscf_nband, ecuteps, ecutsigx, scf_nband, # accuracy="normal", spin_mode="unpolarized", smearing=None, response_models=response_models, # charge=0.0, sigma_nband=None, scr_nband=None, gamma=gamma, nksmall=nksmall, **extra_abivars) print(workdir) flow.register_work(work, workdir=workdir) return flow.allocate()