def __init__(self, path=None, verbose=10): if path is None: self.path = fs_tracker.get_queue_directory() else: self.path = path self.logger = logs.getLogger(self.__class__.__name__) self.logger.setLevel(verbose)
def __init__(self, path=None): if path is None: self.path = fs_tracker.get_queue_directory() else: self.path = path
def worker_loop(queue, parsed_args, single_experiment=False, timeout=0, verbose=None): fetch_artifacts = True logger = logs.getLogger('worker_loop') hold_period = 4 retval = 0 while True: msg = queue.dequeue(acknowledge=False, timeout=timeout) if not msg: break # first_exp, ack_key = queue.dequeue(acknowledge=False) first_exp, ack_key = msg data_dict = json.loads(sixdecode(first_exp)) experiment_key = data_dict['experiment']['key'] config = data_dict['config'] parsed_args.config = config if verbose: config['verbose'] = verbose else: verbose = model.parse_verbosity(config.get('verbose')) logger.setLevel(verbose) logger.debug('Received message: \n{}'.format(data_dict)) executor = LocalExecutor(parsed_args) with model.get_db_provider(config) as db: # experiment = experiment_from_dict(data_dict['experiment']) def try_get_experiment(): experiment = db.get_experiment(experiment_key) if experiment is None: raise ValueError( 'experiment is not found - indicates storage failure') return experiment experiment = retry(try_get_experiment, sleep_time=10, logger=logger) if config.get('experimentLifetime') and \ int(str2duration(config['experimentLifetime']) .total_seconds()) + experiment.time_added < time.time(): logger.info( 'Experiment expired (max lifetime of {} was exceeded)'. format(config.get('experimentLifetime'))) queue.acknowledge(ack_key) continue if allocate_resources(experiment, config, verbose=verbose): def hold_job(): queue.hold(ack_key, hold_period) hold_job() sched = BackgroundScheduler() sched.add_job(hold_job, 'interval', minutes=hold_period / 2) sched.start() try: python = 'python' if experiment.pythonver == 3: python = 'python3' if '_singularity' not in experiment.artifacts.keys(): pip_diff = pip_needed_packages(experiment.pythonenv, python) if any(pip_diff): logger.info( 'Setting up python packages for experiment') if pip_install_packages(pip_diff, python, logger) != 0: logger.info( "Installation of all packages together " + " failed, " "trying one package at a time") for pkg in pip_diff: pip_install_packages([pkg], python, logger) for tag, art in six.iteritems(experiment.artifacts): if fetch_artifacts or 'local' not in art.keys(): logger.info('Fetching artifact ' + tag) if tag == 'workspace': art['local'] = retry(lambda: db.get_artifact( art, only_newer=False), sleep_time=10, logger=logger) else: art['local'] = retry( lambda: db.get_artifact(art), sleep_time=10, logger=logger) returncode = executor.run(experiment) if returncode != 0: retval = returncode finally: sched.shutdown() queue.acknowledge(ack_key) if single_experiment: logger.info('single_experiment is True, quitting') return retval else: logger.info('Cannot run experiment ' + experiment.key + ' due lack of resources. Will retry') time.sleep(config['sleep_time']) # wait_for_messages(queue, timeout, logger) # queue = glob.glob(fs_tracker.get_queue_directory() + "/*") logger.info("Queue in {} is empty, quitting".format( fs_tracker.get_queue_directory())) return retval
def worker_loop(queue, parsed_args, setup_pyenv=False, single_experiment=False, fetch_artifacts=False, timeout=0): logger = logging.getLogger('worker_loop') hold_period = 4 while queue.has_next(): first_exp, ack_key = queue.dequeue(acknowledge=False) experiment_key = json.loads(first_exp)['experiment']['key'] config = json.loads(first_exp)['config'] parsed_args.config = config verbose = model.parse_verbosity(config.get('verbose')) logger.setLevel(verbose) logger.debug( 'Received experiment {} with config {} from the queue'.format( experiment_key, config)) executor = LocalExecutor(parsed_args) experiment = executor.db.get_experiment(experiment_key) if allocate_resources(experiment, config, verbose=verbose): def hold_job(): queue.hold(ack_key, hold_period) hold_job() sched = BackgroundScheduler() sched.add_job(hold_job, 'interval', minutes=hold_period / 2) sched.start() try: if setup_pyenv: logger.info('Setting up python packages for experiment') pipp = subprocess.Popen(['pip', 'install'] + experiment.pythonenv, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) pipout, _ = pipp.communicate() logger.info("pip output: \n" + pipout) # pip.main(['install'] + experiment.pythonenv) for tag, art in experiment.artifacts.iteritems(): if fetch_artifacts or 'local' not in art.keys(): logger.info('Fetching artifact ' + tag) if tag == 'workspace': # art['local'] = executor.db.store.get_artifact( # art, '.', only_newer=False) art['local'] = executor.db.store.get_artifact( art, only_newer=False) else: art['local'] = executor.db.store.get_artifact(art) executor.run(experiment) finally: sched.shutdown() queue.acknowledge(ack_key) if single_experiment: logger.info('single_experiment is True, quitting') return else: logger.info('Cannot run experiment ' + experiment.key + ' due lack of resources. Will retry') time.sleep(config['sleep_time']) wait_for_messages(queue, timeout, logger) # queue = glob.glob(fs_tracker.get_queue_directory() + "/*") logger.info("Queue in {} is empty, quitting".format( fs_tracker.get_queue_directory()))