def test_flow(self): """ Testing flow creation and task registering """ flow = Flow(workdir=test_dir, manager=TaskManager.from_file(os.path.join(test_dir, "taskmanager.yml"))) inp = {} flow.register_task(input=inp) flow.allocate() self.assertTrue(flow.allocated) self.assertIsInstance(flow[0], Work) self.assertIsInstance(flow[0][0], Task) self.assertEqual(flow.check_status(), None)
def __init__(self, workdir, name=None, flows=None, manager=None, timelimit=None): """ Args: workdir: Working directory name: Name assigned to the `BatchLauncher`. flows: List of `Flow` objects. manager: :class:`TaskManager` object responsible for the submission of the jobs. If manager is None, the object is initialized from the yaml file located either in the working directory or in the user configuration dir. timelimit: Time limit (int with seconds or string with time given with the slurm convention: "days-hours:minutes:seconds". If timelimit is None, the default value specified in the `batch_adapter` is taken. """ self.workdir = os.path.abspath(workdir) if not os.path.exists(self.workdir): os.makedirs(self.workdir) else: pass #raise RuntimeError("Directory %s already exists. Use BatchLauncher.pickle_load()" % self.workdir) self.name = os.path.basename(self.workdir) if name is None else name self.script_file = File(os.path.join(self.workdir, "run.sh")) self.qerr_file = File(os.path.join(self.workdir, "queue.qerr")) self.qout_file = File(os.path.join(self.workdir, "queue.qout")) self.log_file = File(os.path.join(self.workdir, "run.log")) self.batch_pidfile = File(os.path.join(self.workdir, "batch.pid")) from .tasks import TaskManager manager = TaskManager.as_manager(manager) # Extract the qadapater to be used for the batch script. try: self.qadapter = qad = manager.batch_adapter except AttributeError: raise RuntimeError("Your manager.yml file does not define an entry for the batch_adapter") if qad is None: raise RuntimeError("Your manager.yml file does not define an entry for the batch_adapter") # Set mpi_procs to 1 just to be on the safe side # Then allow the user to change the timelimit via __init__ qad.set_mpi_procs(1) if timelimit is not None: self.set_timelimit(timelimit) # FIXME: Remove me! self.set_timelimit(36000) # Initialize list of flows. if flows is None: flows = [] if not isinstance(flows, (list, tuple)): flows = [flows] self.flows = flows
def test_flow(self): """ Testing flow creation and task registering """ flow = Flow(workdir=test_dir, manager=TaskManager.from_file( os.path.join(test_dir, "taskmanager.yml"))) inp = {} flow.register_task(input=inp) flow.allocate() self.assertTrue(flow.allocated) self.assertIsInstance(flow[0], Work) self.assertIsInstance(flow[0][0], Task) self.assertEqual(flow.check_status(), None)
def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinit.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = 0 if self.contact_resource_manager: # and flow.TaskManager.qadapter.QTYPE == "shell": # This call is expensive and therefore it's optional (must be activate in manager.yml) nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 if flow.manager.has_queue: logger.warning('Cannot get njobs_inqueue') else: # Here we just count the number of tasks in the flow who are running. # This logic breaks down if there are multiple schedulers runnig # but it's easy to implement without having to contact the resource manager. nqjobs = (len(list(flow.iflat_tasks(status=flow.S_RUN))) + len(list(flow.iflat_tasks(status=flow.S_SUB)))) if nqjobs >= self.max_njobs_inqueue: print("Too many jobs in the queue: %s. No job will be submitted." % nqjobs) flow.check_status(show=False) return if self.max_nlaunches == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches) # check status. flow.check_status(show=False) # This check is not perfect, we should make a list of tasks to sumbit # and select only the subset so that we don't exceeed mac_ncores_used # Many sections of this code should be rewritten. #if self.max_ncores_used is not None and flow.ncores_used > self.max_ncores_used: if self.max_ncores_used is not None and flow.ncores_allocated > self.max_ncores_used: print("Cannot exceed max_ncores_used %s" % self.max_ncores_used) return # Try to restart the unconverged tasks # TODO: do not fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: logger.info("Restart: too many jobs in the queue, returning") flow.pickle_dump() return except task.RestartError: excs.append(straceback()) # Temporarily disable by MG because I don't know if fix_critical works after the # introduction of the new qadapters # reenabled by MsS disable things that do not work at low level # fix only prepares for restarting, and sets to ready if self.fix_qcritical: nfixed = flow.fix_queue_critical() if nfixed: print("Fixed %d QCritical error(s)" % nfixed) nfixed = flow.fix_abicritical() if nfixed: print("Fixed %d AbiCritical error(s)" % nfixed) # update database flow.pickle_dump() # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: cprint("[%s] Number of launches: %d" % (time.asctime(), nlaunch), "yellow") except Exception: excs.append(straceback()) # check status. flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs)
def _runem_all(self): """ This function checks the status of all tasks, tries to fix tasks that went unconverged, abicritical, or queuecritical and tries to run all the tasks that can be submitted.+ """ excs = [] flow = self.flow # Allow to change the manager at run-time if self.use_dynamic_manager: from pymatgen.io.abinit.tasks import TaskManager new_manager = TaskManager.from_user_config() for work in flow: work.set_manager(new_manager) nqjobs = 0 if self.contact_resource_manager: # This call is expensive and therefore it's optional nqjobs = flow.get_njobs_in_queue() if nqjobs is None: nqjobs = 0 if flow.manager.has_queue: logger.warning('Cannot get njobs_inqueue') if nqjobs >= self.max_njobs_inqueue: print("Too many jobs in the queue: %s, returning" % nqjobs) return if self.max_nlaunches == -1: max_nlaunch = self.max_njobs_inqueue - nqjobs else: max_nlaunch = min(self.max_njobs_inqueue - nqjobs, self.max_nlaunches) # check status. flow.check_status(show=False) # This check is not perfect, we should make a list of tasks to sumbit # and select only the subset so that we don't exceeed mac_ncores_used # Many sections of this code should be rewritten. #if self.max_ncores_used is not None and flow.ncores_used > self.max_ncores_used: if self.max_ncores_used is not None and flow.ncores_allocated > self.max_ncores_used: print("Cannot exceed max_ncores_use:d %s" % self.max_ncores_used) return # Try to restart the unconverged tasks # TODO: do not fire here but prepare for fireing in rapidfire for task in self.flow.unconverged_tasks: try: logger.info("Flow will try restart task %s" % task) fired = task.restart() if fired: self.nlaunch += 1 max_nlaunch -= 1 if max_nlaunch == 0: logger.info( "Restart: too many jobs in the queue, returning") flow.pickle_dump() return except task.RestartError: excs.append(straceback()) # Temporarily disable by MG because I don't know if fix_critical works after the # introduction of the new qadapters # reenabled by MsS disable things that do not work at low level # fix only prepares for restarting, and sets to ready if self.fix_qcritical: nfixed = flow.fix_queue_critical() if nfixed: print("Fixed %d QCritical error(s)" % nfixed) nfixed = flow.fix_abicritical() if nfixed: print("Fixed %d AbiCritical error(s)" % nfixed) # update database flow.pickle_dump() # Submit the tasks that are ready. try: nlaunch = PyLauncher(flow).rapidfire(max_nlaunch=max_nlaunch, sleep_time=10) self.nlaunch += nlaunch if nlaunch: print("[%s] Number of launches: %d" % (time.asctime(), nlaunch)) except Exception: excs.append(straceback()) # check status. flow.show_status() if excs: logger.critical("*** Scheduler exceptions:\n *** %s" % "\n".join(excs)) self.exceptions.extend(excs)
def create(self): """ create single abinit G0W0 flow """ # manager = 'slurm' if 'ceci' in self.spec['mode'] else 'shell' # an AbiStructure object has an overwritten version of get_sorted_structure that sorts according to Z # this could also be pulled into the constructor of Abistructure # abi_structure = self.structure.get_sorted_structure() from abipy import abilab item = copy.copy(self.structure.item) self.structure.__class__ = abilab.Structure self.structure = self.structure.get_sorted_structure_z() self.structure.item = item abi_structure = self.structure manager = TaskManager.from_user_config() # Initialize the flow. flow = Flow(self.work_dir, manager, pickle_protocol=0) # flow = Flow(self.work_dir, manager) # kpoint grid defined over density 40 > ~ 3 3 3 if self.spec['converge'] and not self.all_converged: # (2x2x2) gamma centered mesh for the convergence test on nbands and ecuteps # if kp_in is present in the specs a kp_in X kp_in x kp_in mesh is used for the convergence study if 'kp_in' in self.spec.data.keys(): if self.spec['kp_in'] > 9: print('WARNING:\nkp_in should be < 13 to generate an n x n x n mesh\nfor larger values a grid with ' 'density kp_in will be generated') kppa = self.spec['kp_in'] else: kppa = 2 else: # use the specified density for the final calculation with the converged nbands and ecuteps of other # stand alone calculations kppa = self.spec['kp_grid_dens'] gamma = True # 'standard' parameters for stand alone calculation scf_nband = self.get_bands(self.structure) + 20 # additional bands to accommodate for nbdbuf and a bit extra nscf_nband = [10 * self.get_bands(self.structure)] nksmall = None ecuteps = [8] extra_abivars = dict() # read user defined extra abivars from file 'extra_abivars' should be dictionary extra_abivars.update(read_extra_abivars()) # self.bands_fac = 0.5 if 'gwcomp' in extra_abivars.keys() else 1 # self.convs['nscf_nbands']['test_range'] = # tuple([self.bands_fac*x for x in self.convs['nscf_nbands']['test_range']]) ecut = extra_abivars.pop('ecut', 44) ecutsigx = extra_abivars.pop('ecutsigx', 44) if ecutsigx > ecut: raise RuntimeError('ecutsigx can not be largen than ecut') if ecutsigx < max(ecuteps): raise RuntimeError('ecutsigx < ecuteps this is not realistic') response_models = ['godby'] if 'ppmodel' in extra_abivars.keys(): response_models = [extra_abivars.pop('ppmodel')] if self.option is not None: for k in self.option.keys(): if k == 'ecut': ecut = self.option[k] if k in ['ecuteps', 'nscf_nbands']: pass else: extra_abivars.update({k: self.option[k]}) try: grid = read_grid_from_file(s_name(self.structure)+".full_res")['grid'] all_done = read_grid_from_file(s_name(self.structure)+".full_res")['all_done'] workdir = os.path.join(s_name(self.structure), 'w'+str(grid)) except (IOError, OSError): grid = 0 all_done = False workdir = None if not all_done: if (self.spec['test'] or self.spec['converge']) and not self.all_converged: if self.spec['test']: print('| setting test calculation') tests = SingleAbinitGWWork(self.structure, self.spec).tests response_models = [] else: if grid == 0: print('| setting convergence calculations for grid 0') # tests = SingleAbinitGWWorkFlow(self.structure, self.spec).convs tests = self.convs else: print('| extending grid') # tests = expand(SingleAbinitGWWorkFlow(self.structure, self.spec).convs, grid) tests = expand(self.convs, grid) ecuteps = [] nscf_nband = [] for test in tests: if tests[test]['level'] == 'scf': if self.option is None: extra_abivars.update({test + '_s': tests[test]['test_range']}) elif test in self.option: extra_abivars.update({test: self.option[test]}) else: extra_abivars.update({test + '_s': tests[test]['test_range']}) else: for value in tests[test]['test_range']: if test == 'nscf_nbands': nscf_nband.append(value * self.get_bands(self.structure)) # scr_nband takes nscf_nbands if not specified # sigma_nband takes scr_nbands if not specified if test == 'ecuteps': ecuteps.append(value) if test == 'response_model': response_models.append(value) elif self.all_converged: print('| setting up for testing the converged values at the high kp grid ') # add a bandstructure and dos calculation if os.path.isfile('bands'): nksmall = -30 # negative value > only bandstructure else: nksmall = 30 # in this case a convergence study has already been performed. # The resulting parameters are passed as option ecuteps = [self.option['ecuteps'], self.option['ecuteps'] + self.convs['ecuteps']['test_range'][1] - self.convs['ecuteps']['test_range'][0]] nscf_nband = [self.option['nscf_nbands'], self.option['nscf_nbands'] + self.convs['nscf_nbands'][ 'test_range'][1] - self.convs['nscf_nbands']['test_range'][0]] # for option in self.option: # if option not in ['ecuteps', 'nscf_nband']: # extra_abivars.update({option + '_s': self.option[option]}) else: print('| all is done for this material') return logger.info('ecuteps : %s ' % str(ecuteps)) logger.info('extra : %s ' % str(extra_abivars)) logger.info('nscf_nb : %s ' % str(nscf_nband)) inputs = g0w0_convergence_inputs(abi_structure, self.pseudo_table, kppa, nscf_nband, ecuteps, ecutsigx, scf_nband, ecut, accuracy="normal", spin_mode="unpolarized", smearing=None, response_models=response_models, charge=0.0, sigma_nband=None, scr_nband=None, gamma=gamma, nksmall=nksmall, extra_abivars=extra_abivars) work = G0W0Work(scf_inputs=inputs[0], nscf_inputs=inputs[1], scr_inputs=inputs[2], sigma_inputs=inputs[3]) # work = g0w0_extended_work(abi_structure, self.pseudo_table, kppa, nscf_nband, ecuteps, ecutsigx, scf_nband, # accuracy="normal", spin_mode="unpolarized", smearing=None, response_models=response_models, # charge=0.0, sigma_nband=None, scr_nband=None, gamma=gamma, nksmall=nksmall, **extra_abivars) print(workdir) flow.register_work(work, workdir=workdir) return flow.allocate()
def test_fixes(self): flow = Flow(workdir=test_dir, manager=TaskManager.from_file(os.path.join(test_dir, "taskmanager.yml"))) inp = {} flow.register_task(input=inp) flow.allocate()
def create(self): """ create single abinit G0W0 flow """ # manager = 'slurm' if 'ceci' in self.spec['mode'] else 'shell' # an AbiStructure object has an overwritten version of get_sorted_structure that sorts according to Z # this could also be pulled into the constructor of Abistructure # abi_structure = self.structure.get_sorted_structure() from abipy import abilab item = copy.copy(self.structure.item) self.structure.__class__ = abilab.Structure self.structure = self.structure.get_sorted_structure_z() self.structure.item = item abi_structure = self.structure manager = TaskManager.from_user_config() # Initialize the flow. flow = Flow(self.work_dir, manager, pickle_protocol=0) # flow = Flow(self.work_dir, manager) # kpoint grid defined over density 40 > ~ 3 3 3 if self.spec['converge'] and not self.all_converged: # (2x2x2) gamma centered mesh for the convergence test on nbands and ecuteps # if kp_in is present in the specs a kp_in X kp_in x kp_in mesh is used for the convergence study if 'kp_in' in self.spec.data.keys(): if self.spec['kp_in'] > 9: print( 'WARNING:\nkp_in should be < 13 to generate an n x n x n mesh\nfor larger values a grid with ' 'density kp_in will be generated') kppa = self.spec['kp_in'] else: kppa = 2 else: # use the specified density for the final calculation with the converged nbands and ecuteps of other # stand alone calculations kppa = self.spec['kp_grid_dens'] gamma = True # 'standard' parameters for stand alone calculation scf_nband = self.get_bands(self.structure) + 20 # additional bands to accommodate for nbdbuf and a bit extra nscf_nband = [10 * self.get_bands(self.structure)] nksmall = None ecuteps = [8] extra_abivars = dict() # read user defined extra abivars from file 'extra_abivars' should be dictionary extra_abivars.update(read_extra_abivars()) # self.bands_fac = 0.5 if 'gwcomp' in extra_abivars.keys() else 1 # self.convs['nscf_nbands']['test_range'] = # tuple([self.bands_fac*x for x in self.convs['nscf_nbands']['test_range']]) ecut = extra_abivars.pop('ecut', 44) ecutsigx = extra_abivars.pop('ecutsigx', 44) if ecutsigx > ecut: raise RuntimeError('ecutsigx can not be largen than ecut') if ecutsigx < max(ecuteps): raise RuntimeError('ecutsigx < ecuteps this is not realistic') response_models = ['godby'] if 'ppmodel' in extra_abivars.keys(): response_models = [extra_abivars.pop('ppmodel')] if self.option is not None: for k in self.option.keys(): if k == 'ecut': ecut = self.option[k] if k in ['ecuteps', 'nscf_nbands']: pass else: extra_abivars.update({k: self.option[k]}) try: grid = read_grid_from_file(s_name(self.structure) + ".full_res")['grid'] all_done = read_grid_from_file( s_name(self.structure) + ".full_res")['all_done'] workdir = os.path.join(s_name(self.structure), 'w' + str(grid)) except (IOError, OSError): grid = 0 all_done = False workdir = None if not all_done: if (self.spec['test'] or self.spec['converge']) and not self.all_converged: if self.spec['test']: print('| setting test calculation') tests = SingleAbinitGWWork(self.structure, self.spec).tests response_models = [] else: if grid == 0: print('| setting convergence calculations for grid 0') # tests = SingleAbinitGWWorkFlow(self.structure, self.spec).convs tests = self.convs else: print('| extending grid') # tests = expand(SingleAbinitGWWorkFlow(self.structure, self.spec).convs, grid) tests = expand(self.convs, grid) ecuteps = [] nscf_nband = [] for test in tests: if tests[test]['level'] == 'scf': if self.option is None: extra_abivars.update( {test + '_s': tests[test]['test_range']}) elif test in self.option: extra_abivars.update({test: self.option[test]}) else: extra_abivars.update( {test + '_s': tests[test]['test_range']}) else: for value in tests[test]['test_range']: if test == 'nscf_nbands': nscf_nband.append( value * self.get_bands(self.structure)) # scr_nband takes nscf_nbands if not specified # sigma_nband takes scr_nbands if not specified if test == 'ecuteps': ecuteps.append(value) if test == 'response_model': response_models.append(value) elif self.all_converged: print( '| setting up for testing the converged values at the high kp grid ' ) # add a bandstructure and dos calculation if os.path.isfile('bands'): nksmall = -30 # negative value > only bandstructure else: nksmall = 30 # in this case a convergence study has already been performed. # The resulting parameters are passed as option ecuteps = [ self.option['ecuteps'], self.option['ecuteps'] + self.convs['ecuteps']['test_range'][1] - self.convs['ecuteps']['test_range'][0] ] nscf_nband = [ self.option['nscf_nbands'], self.option['nscf_nbands'] + self.convs['nscf_nbands']['test_range'][1] - self.convs['nscf_nbands']['test_range'][0] ] # for option in self.option: # if option not in ['ecuteps', 'nscf_nband']: # extra_abivars.update({option + '_s': self.option[option]}) else: print('| all is done for this material') return logger.info('ecuteps : %s ' % str(ecuteps)) logger.info('extra : %s ' % str(extra_abivars)) logger.info('nscf_nb : %s ' % str(nscf_nband)) inputs = g0w0_convergence_inputs(abi_structure, self.pseudo_table, kppa, nscf_nband, ecuteps, ecutsigx, scf_nband, ecut, accuracy="normal", spin_mode="unpolarized", smearing=None, response_models=response_models, charge=0.0, sigma_nband=None, scr_nband=None, gamma=gamma, nksmall=nksmall, extra_abivars=extra_abivars) work = G0W0Work(scf_inputs=inputs[0], nscf_inputs=inputs[1], scr_inputs=inputs[2], sigma_inputs=inputs[3]) # work = g0w0_extended_work(abi_structure, self.pseudo_table, kppa, nscf_nband, ecuteps, ecutsigx, scf_nband, # accuracy="normal", spin_mode="unpolarized", smearing=None, response_models=response_models, # charge=0.0, sigma_nband=None, scr_nband=None, gamma=gamma, nksmall=nksmall, **extra_abivars) print(workdir) flow.register_work(work, workdir=workdir) return flow.allocate()