logger.info('Running worker for %s - PID = %d' % (service, pid)) keyw = 'admin:worker:%s:%d' % (service, pid) redis.hset(keyw, "launch_time", time.time()) redis.hset(keyw, "beat_time", time.time()) redis.expire(keyw, 600) keys = 'admin:service:%s' % service redis.hset(keys, "current_configuration", current_configuration) redis.hset(keys, "configurations", json.dumps(configurations)) redis.hset(keys, "def", pickle.dumps(services[service])) # remove reserved state from resources for key in redis.keys('reserved:%s:*' % service): redis.delete(key) # remove queued tasks on service for key in redis.keys('queued:%s' % service): redis.delete(key) # On startup, add all active tasks in the work queue or service queue for task_id in task.list_active(redis, service): with redis.acquire_lock(task_id): status = redis.hget('task:' + task_id, 'status') if status == 'queued' or status == 'allocating' or status == 'allocated': task.service_queue(redis, task_id, redis.hget('task:' + task_id, 'service')) task.set_status(redis, 'task:' + task_id, 'queued') else: task.work_queue(redis, task_id, service) # check integrity of tasks
redis.config_set('notify-keyspace-events', 'Klgx') break except ConnectionError as e: retry += 1 logger.warn("cannot connect to redis DB - retrying (%d)" % retry) time.sleep(1) assert retry < 10, "Cannot connect to redis DB - aborting" services, base_config = config.load_services(cfg.get('default', 'config_dir')) for service in services: # remove busy state from resources for key in redis.keys('busy:%s:*' % service): redis.delete(key) # remove reserved state from resources for key in redis.keys('reserved:%s:*' % service): redis.delete(key) # remove queued tasks on service for key in redis.keys('queued:%s' % service): redis.delete(key) # On startup, add all active tasks in the work queue or service queue for task_id in task.list_active(redis, service): with redis.acquire_lock(task_id): status = redis.hget('task:'+task_id, 'status') if status == 'queued' or status == 'allocating' or status == 'allocated': task.service_queue(redis, task_id, redis.hget('task:'+task_id, 'service')) task.set_status(redis, 'task:'+task_id, 'queued') else:
break time.sleep(5) if time.time() - start > 30: # check if worker still there w = redis.exists("admin:worker:%s:%d" % (service, p1.pid)) if not w: print("[%s-%d] ** No heartbeat" % (service, p1.pid)) p1.terminate() except Exception as e: log_fh.write("-" * 80) log_fh.write("\n") log_fh.write("INTERRUPTED: "+str(e)) log_fh.write("\n") # whatever happened, we remove trace of the worker redis.delete("admin:worker:%s:%d" % (service, p1.pid)) stop = time.time() print("[%s] ** process stopped: %d" % (service, p1.pid)) sys.stdout.flush() log_fh.flush() log_fh.write("-" * 80) log_fh.write("\n") log_fh.write("RUNNING TIME: %f\n" % (stop-start)) if p1.returncode == 55: break if stop - start < args.fast_restart_delay: