def __init__(self, engine, db_config): super(OrmProxy, self).__init__() self.logger = LogManager.get_logger("orm." + self.__class__.__name__) self.engine = engine self.db_config = db_config self.host = self.db_config["HOST"] self.port = self.db_config["PORT"] self.user = self.db_config["USER"] self.passwd = self.db_config["PASSWORD"] self.db = self.db_config["NAME"] self.connected = False self.orm_engine = None try: if self.engine == 'mysql': params = (self.user, self.passwd, self.host, self.port, self.db) connect = 'mysql+mysqldb://%s:%s@%s:%s/%s?charset=utf8' % params self.orm_engine = create_engine(connect, max_overflow=5, encoding='utf-8', echo=True) self.connected = True self.logger.info('init: %s', 'Database engine MySQLdb.') else: self.logger.error('init: err=%s', 'Database engine not find.') raise "Database engine not find." except SQLAlchemyError: self.logger.error('connect: err=%s', 'Connect orm failed.') self.logger.log_last_except()
def __init__(self, project, options): from log import LogManager from report import ReportManager from dev import DeviceManager from cli import CliManager from pressure import PressureManager from db import DBConnector self.logManager = LogManager(options.outputdir) self.logManager.setCmdLogLevel(project.stdLogLevel) self.logManager.setFileLogLevel(project.fileLogLevel) self.reportManager = ReportManager(options.outputdir) self.deviceManager = DeviceManager(project.devices) self.cliManager = CliManager(project.clis) if project.db is not None: self.dbConnector = DBConnector(project.db).createConnect() self.pressureManager = PressureManager(project.isPressure, project.repetition)
class DockerWrapper: DOCKER_RESTART_POLICY = RestartPolicy(condition='none') def __init__(self, client: docker.DockerClient, config: RunnerConfig, authenticator: AuthenticationFactory): self._client = client self._config = config self._authenticator = authenticator self._logger = LogManager(__name__) def start_task(self, job_id: str, image: str, task_name: str, task_args: Iterable[str]) -> int: self._logger.info('Starting task {tn} for job {ji}'.format( tn=task_name, ji=job_id)) run_env = [ 'SWARMER_ADDRESS=http://{addr}:{port}/result/{ident}'.format( addr=self._config.host, port=self._config.port, ident=job_id), 'TASK_NAME={task}'.format(task=task_name), 'SWARMER_JOB_ID={ident}'.format(ident=job_id) ] if any(task_args): run_env += [ 'RUN_ARGS={args}'.format( args=','.join([str(a) for a in task_args])) ] svc = self._get_client().services.create( image, env=run_env, restart_policy=self.DOCKER_RESTART_POLICY, networks=[self._config.network], name='{id}-{name}'.format(id=job_id, name=task_name)) return svc.id def remove_service(self, service_ids: Iterable[int]): for sid in service_ids: svc = self._client.services.get(sid) if svc: svc.remove() def _get_client(self): if self._authenticator and self._authenticator.any_require_login: self._authenticator.perform_logins(self._client) return self._client
def __init__(self, port, reset=False): self.log_file = "persist_" + str(port) if reset or not os.path.isfile(self.log_file): self.data = { "current_term": 0, "vote_for": None, "log_manager": LogManager() } else: self.data = self.deserialized()
def __init__(self, job_db: JobDb, queue_len=12, thread_builder=Thread): self._job_db = job_db self._queue_len = queue_len self._logger = LogManager(__name__) self._tasks = deque() self._running_tasks = [] self._jobs = set() self._lock = Lock() self._overdue_tasks = set() # Set up the cleanup process self._bg_cleanup_thread = thread_builder( target=self._scan_for_dead_jobs, args=()) self._bg_cleanup_thread.daemon = True self._bg_cleanup_thread.start() # Set up the completed job process self._bg_completed_thread = thread_builder( target=self._scan_for_completed_jobs, args=()) self._bg_completed_thread.daemon = True self._bg_completed_thread.start()
def __init__(self, db_config): super(MysqlDatabase, self).__init__() self.logger = LogManager.get_logger("db." + self.__class__.__name__) self.db_config = db_config self.host = self.db_config["HOST"] self.port = self.db_config["PORT"] self.user = self.db_config["USER"] self.passwd = self.db_config["PASSWORD"] self.db = self.db_config["NAME"] self.connected = False self.pool = None
def on_peer_append_entries(self, peer, msg): term_is_current = msg['term'] >= self.persist['currentTerm'] prev_log_term_match = msg['prevLogTerm'] is None or \ self.log.term(msg['prevLogIndex']) == msg['prevLogTerm'] success = term_is_current and prev_log_term_match if term_is_current: self.restart_election_timer() if 'compact_data' in msg: self.log = LogManager(compact_count=msg['compact_count'], compact_term=msg['compact_term'], compact_data=msg['compact_data']) self.volatile['leaderId'] = msg['leaderId'] logger.info('Initialized Log with compact data from leader') elif success: self.log.append_entries(msg['entries'], msg['prevLogIndex']) self.log.commit(msg['leaderCommit']) self.volatile['leaderId'] = msg['leaderId'] logging.info(f'Log index is now {self.log.index}') else: logging.warning("Could not append entries. cause: %s", 'wrong term' \ if not term_is_current else 'prev log term mismatch') self._update_cluster() resp = { 'type': 'response_append', 'success': success, 'term': self.persist['currentTerm'], 'matchIndex': self.log.index } self.raft.send_peer(peer, resp)
class AuthenticationFactory: EXTRAS_KEY = 'swarmer.credentials' PROVIDER_KEY = 'provider' LAST_LOGIN_KEY = 'last_login' def __init__(self): self._providers = dict() self._logger = LogManager(__name__) self._setup_providers() @property def has_providers(self) -> bool: return any(self._providers) @property def any_require_login(self) -> bool: return False if not self.has_providers else any( [p for p in self._providers.values() if p[self.PROVIDER_KEY].should_authenticate(p[self.LAST_LOGIN_KEY])]) def perform_logins(self, client: DockerClient): self._logger.info('Running logins for docker client') if not self.has_providers: self._logger.info('No providers present, skipping...') return for entry in self._providers.values(): provider = entry[self.PROVIDER_KEY] if provider.should_authenticate(entry[self.LAST_LOGIN_KEY]): (user, password, registry) = provider.obtain_auth() client.login(username=user, password=password, registry=registry) entry['last_login'] = datetime.now() def _setup_providers(self): for entry_point in iter_entry_points(self.EXTRAS_KEY): self._logger.info('Loading authentication providers') try: provider = entry_point.load() provider_instance = provider() self._providers[entry_point.name] = {self.PROVIDER_KEY: provider_instance, self.LAST_LOGIN_KEY: None} except DistributionNotFound: # It may be the case that we were not asked to enable/include # a particular provider. This is ok and will simply default # to either: # 1) Only use the enabled ones, or # 2) Only have the ability to fetch from public registries self._logger.info( "It appears that the feature {feat} was not enabled, skipping".format(feat=entry_point.name))
def __init__(self, old_state=None, raft=None): if old_state: self.raft = old_state.raft self.persist = old_state.persist self.volatile = old_state.volatile self.log = old_state.log else: self.raft = raft self.persist = PersistentDict(os.path.join(config.storage, 'state'), {'voteFor': None, 'currentTerm': 0}) self.volatile = {'laederId': None, 'cluster': config.cluster, 'address': config.address} self.log = LogManager() self._update_cluster()
def __init__(self, engine, db_config): super(DatabaseProxy, self).__init__() self.logger = LogManager.get_logger("db." + self.__class__.__name__) self.engine = engine if self.engine == 'mysql': self.db_client = MysqlDatabase(db_config) self.db_client.connect() self.logger.info('init: %s', 'Database engine MySQLdb.') else: self.logger.error('init: err=%s', 'Database engine not find.') raise "Database engine not find." self.connected = self.db_client.connected self.data_types = self.db_client.data_types self.operators = self.db_client.operators self.format_string = self.db_client.format_string self.thread_num = 10 self.request_pool = ThreadPool(self.thread_num) self.request_time = 0.01 self.timer = Timer.add_repeat_timer(self.request_time, self.request_result)
self.certfile = certfile self.body = body def __str__(self): return self.method + " " + self.host + " " + self.url class HttpReply(object): def __init__(self, header, body): super(HttpReply, self).__init__() self.header = header self.body = body def __str__(self): return str(self.header) _logger = LogManager.get_logger("http_client") class AsyncHTTPClient(object): def __init__(self, max_clients=10, max_buffer_size=10240): """ max_buffer_size is the number of bytes that can be read by IOStream. It defaults to 10kB. """ super(AsyncHTTPClient, self).__init__() self.max_clients = max_clients self.active = {} self.max_buffer_size = max_buffer_size _logger.info('__init__: max_clients %d, max_buffer_size %d ', max_clients, max_buffer_size)
def __init__(self): self.product_manager = ProductManager() self.chosen_list = [] self.log_manager = LogManager()
class Follower(State): def __init__(self, old_state=None, raft=None): super().__init__(old_state, raft) self.persist['voteFor'] = None self.restart_election_timer() def teardown(self): self.election_timer.cancel() def restart_election_timer(self): if hasattr(self, 'election_timer'): self.election_timer.cancel() timeout = randrange(1, 4) * 10 ** 0 loop = asyncio.get_event_loop() self.election_timer = loop.call_later(timeout, self.raft.change_state, Candidate) logging.info(f"Election timer restarted: {timeout}") def on_peer_request_vote(self, peer, msg): term_is_current = msg['term'] >= self.persist['currentTerm'] can_vote = self.persist['voteFor'] in [tuple(msg['candidateId']), None] index_is_current = (msg['lastLogTerm'] > self.log.term() or (msg['lastLogTerm'] == self.log.term() and msg['lastLogIndex'] >= self.log.index)) granted = term_is_current and can_vote and index_is_current if granted: self.persist['voteFor'] = msg['candidateId'] self.restart_election_timer() logging.info(f"Voting for {peer}. Term: {term_is_current}, vote: {can_vote}, index: {index_is_current}") response = { 'type': 'response_vote', 'voteGranted': granted, 'term': self.persist['currentTerm'] } self.raft.send_peer(peer, response) def on_peer_append_entries(self, peer, msg): term_is_current = msg['term'] >= self.persist['currentTerm'] prev_log_term_match = msg['prevLogTerm'] is None or \ self.log.term(msg['prevLogIndex']) == msg['prevLogTerm'] success = term_is_current and prev_log_term_match if term_is_current: self.restart_election_timer() if 'compact_data' in msg: self.log = LogManager(compact_count=msg['compact_count'], compact_term=msg['compact_term'], compact_data=msg['compact_data']) self.volatile['leaderId'] = msg['leaderId'] logger.info('Initialized Log with compact data from leader') elif success: self.log.append_entries(msg['entries'], msg['prevLogIndex']) self.log.commit(msg['leaderCommit']) self.volatile['leaderId'] = msg['leaderId'] logging.info(f'Log index is now {self.log.index}') else: logging.warning("Could not append entries. cause: %s", 'wrong term' \ if not term_is_current else 'prev log term mismatch') self._update_cluster() resp = { 'type': 'response_append', 'success': success, 'term': self.persist['currentTerm'], 'matchIndex': self.log.index } self.raft.send_peer(peer, resp)
class JobQueue: """ The JobQueue is responsible for interacting with the database and telling the task manager what task(s) to run next. Currently all background jobs to check for dead and completed jobs is in here but it should be moved out in the future """ # We scan for bad tasks every 10 minutes DEAD_SCAN_INTERVAL = 600 # We scan for completed jobs every minute COMPLETED_SCAN_INTERVAL = 60 # For now, anything above 30 minutes is stalled DEAD_JOB_INTERVAL = datetime.timedelta(minutes=30) # If set, we use this to signal that more tasks should be run _run_signal = None def __init__(self, job_db: JobDb, queue_len=12, thread_builder=Thread): self._job_db = job_db self._queue_len = queue_len self._logger = LogManager(__name__) self._tasks = deque() self._running_tasks = [] self._jobs = set() self._lock = Lock() self._overdue_tasks = set() # Set up the cleanup process self._bg_cleanup_thread = thread_builder( target=self._scan_for_dead_jobs, args=()) self._bg_cleanup_thread.daemon = True self._bg_cleanup_thread.start() # Set up the completed job process self._bg_completed_thread = thread_builder( target=self._scan_for_completed_jobs, args=()) self._bg_completed_thread.daemon = True self._bg_completed_thread.start() @property def run_signal(self): return self._run_signal @run_signal.setter def run_signal(self, value): self._run_signal = value def add_new_job(self, identifier, image_name, callback, tasks): """ Add a new job to the job queue :param identifier: The identifier for the job :param image_name: The name of the image to run each task :param callback: The callback URL to report results :param tasks: The individual tasks to run """ if not tasks: self._logger.error( 'No tasks provided when submitting job {i}'.format( i=identifier)) raise ValueError('Tasks must be provided with the job') self._logger.info('Adding job {i} to the queue'.format(i=identifier)) self._job_db.add_job(identifier, image_name, callback) self._jobs.add(identifier) for t in tasks: self._tasks.appendleft( TaskEntry(identifier, t['task_name'], t['task_args'], image_name, None, None)) self._job_db.add_tasks(identifier, tasks) def complete_task(self, identifier, name, status, result) -> (List[int], bool): with self._lock: task = [t for t in self._running_tasks if t.name == name] try: task_id = task[0] self._job_db.update_result(identifier, name, result) self._job_db.update_status(identifier, name, status) # Remove this task from the running tasks self._running_tasks = [ t for t in self._running_tasks if t.name != name ] # Return the task id we had recorded and whether to start any more tasks which # is based on whether we are already running at capacity and whether we have # any more to run. We also use this time to empty out all 'dead' processes task_list = [task_id] while any(self._overdue_tasks): task_list.append(self._overdue_tasks.pop()) return task_list, len( self._running_tasks) < self._queue_len and any(self._tasks) except IndexError: self._logger.error( 'Was expected to find task "{tn}" for job "{jn}" but it was not present' .format(tn=name, jn=identifier)) def get_next_tasks(self) -> List[RunnableTask]: """ Query the queue for the next tasks to run :return: A list of the next tasks to run """ tasks = [] with self._lock: if len(self._running_tasks) >= self._queue_len or not any( self._tasks): return tasks for _ in range(self._queue_len - len(self._running_tasks)): if not any(self._tasks): break next_task = self._tasks.pop() tasks.append( RunnableTask(next_task.identifier, next_task.name, next_task.args, next_task.image)) self._running_tasks.append(next_task) return tasks def mark_task_started(self, identifier, name, task_id): def set_details(entry: TaskEntry): if entry.identifier != identifier or entry.name != name: return entry return TaskEntry(entry.identifier, entry.name, entry.args, entry.image, task_id, datetime.datetime.now()) with self._lock: self._running_tasks = list(map(set_details, self._running_tasks)) def get_started_tasks(self): with self._lock: return list( map( lambda it: { 'id': it.task_id, 'started': it.started }, filter( lambda it: it.task_id is not None and it.started is not None, self._running_tasks))) def get_job_details(self, identifier): job = self._job_db.get_job(identifier) job['tasks'] = json.loads(job['tasks']) return job def _scan_for_dead_jobs(self): def item_filter(entry: TaskEntry): return entry.task_id not in self._overdue_tasks while True: time.sleep(self.DEAD_SCAN_INTERVAL) with self._lock: overdue = [ t for t in self._running_tasks if datetime.datetime.now() - t.started > self.DEAD_JOB_INTERVAL ] for task in overdue: self._overdue_tasks.add(task.task_id) self._tasks.appendleft( TaskEntry(task.identifier, task.name, task.args, task.image, None, None)) self._running_tasks = list( filter(item_filter, self._running_tasks)) self._signal_should_run() def _scan_for_completed_jobs(self): while True: time.sleep(self.COMPLETED_SCAN_INTERVAL) with self._lock: job_details = [] completed = [ jid for jid in self._jobs if not any([ it for it in self._running_tasks if it.identifier == jid ]) and not any([it for it in self._tasks if it.identifier == jid]) ] for item in completed: self._jobs.remove(item) job_details.append(self._job_db.get_job(item)) self._job_db.clear_job(item) _send_job_results(job_details) self._signal_should_run() def _signal_should_run(self): if self._run_signal and len( self._running_tasks) < self._queue_len and any(self._tasks): self._run_signal()
def __init__(self, client: docker.DockerClient, config: RunnerConfig, authenticator: AuthenticationFactory): self._client = client self._config = config self._authenticator = authenticator self._logger = LogManager(__name__)
import logging import falcon from falcon.media.validators import jsonschema from jobs import JobRunner from log import LogManager from .schema import get_schema_for logger = LogManager(__name__) class SubmitJobResource(object): def __init__(self, runner: JobRunner): logger.info('Spinning up the SubmitJobResource') self._runner = runner @jsonschema.validate(get_schema_for('job_submit')) def on_post(self, req: falcon.Request, resp: falcon.Response): logger.info('Received request to create new job') image_name = req.media.get('image_name') callback = req.media.get('callback_url') tasks = req.media.get('tasks') # tasks = req.media.get('tasks') identifier = self._runner.create_new_job(image_name, callback, tasks) logger.info('Job created with identifier {i}'.format(i=identifier)) resp.status = falcon.HTTP_201 resp.media = {'id': identifier} resp.location = '/status/{i}'.format(i=identifier)
def __init__(self, client: DockerWrapper, job_queue: JobQueue): self._docker = client self._job_queue = job_queue # Not sure I'm a fan of this, probably need another refactor in the future job_queue.run_signal = self._run_tasks self._logger = LogManager(__name__)
class JobDb: """ The JobDb is responsible for handling the redis job tracking """ def __init__(self, rd: redis.StrictRedis): self._redis = rd self._logger = LogManager(__name__) def add_job(self, identifier: str, image_name: str, callback: str): """ Add a new job to the tracking database :param identifier: The unique job identifier :param image_name: The name of the image that is used to run each job :param callback: The URL to POST back all results """ self._log_operation('Adding new job {i}'.format(i=identifier)) initial_state = { '__image': image_name, '__callback': callback, 'tasks': [] } self._redis.hmset(identifier, initial_state) def add_job_with_tasks(self, identifier: str, image_name: str, callback: str, tasks: list): self.add_job(identifier, image_name, callback) self.add_tasks(identifier, tasks) def add_tasks(self, identifier: str, tasks: list): """ Add a list of tasks to the given job, when all tasks are complete, the job is considered finished. :param identifier: The unique job identifier :param tasks: A list of task objects """ self._log_operation('Adding tasks to job {i}:\n{t}'.format( i=identifier, t=json.dumps(tasks))) if not self._redis.exists(identifier): raise ValueError('Can not find item with identifier: {id}'.format( id=identifier)) task_dict = { 'tasks': json.dumps([{ 'args': t['task_args'], 'status': 500, 'result': { 'stdout': None, 'stderr': None }, 'name': t['task_name'] } for t in tasks]) } self._redis.hmset(identifier, task_dict) def update_status(self, identifier: str, task_name: str, status: int): """ Update the status of a run :param identifier: The unique job identifier :param task_name: The individual task name to update the status of :param status: The exit status of the task """ self._log_operation( 'Updating status of task {tn} for job {i} with {s}'.format( tn=task_name, i=identifier, s=status)) task = self._get_task(identifier, task_name) task['status'] = status task_list = self._get_task_list(identifier) update = [task if t['name'] == task['name'] else t for t in task_list] self._redis.hset(identifier, 'tasks', json.dumps(update)) def update_result(self, identifier: str, task_name: str, result: dict): """ Update the result of a task run :param identifier: The unique job identifier :param task_name: The individual task name :param result: A dict with the stdout and stderr output, if any was present """ self._log_operation( 'Updating result of task {tn} for job {i} with {res}'.format( tn=task_name, i=identifier, res=json.dumps(result))) task = self._get_task(identifier, task_name) task['result'] = result task_list = self._get_task_list(identifier) update = [task if t['name'] == task['name'] else t for t in task_list] self._redis.hset(identifier, 'tasks', json.dumps(update)) def get_job(self, identifier: str): """ Retrieve the tracking dict for the given job :param identifier: The unique job identifier """ self._log_operation('Getting job {i}'.format(i=identifier)) if not self._redis.exists(identifier): raise ValueError( 'Can not find job with id: {id}'.format(id=identifier)) job = self._redis.hgetall(identifier) def check_and_decode(value): try: return value.decode('utf-8') except (ValueError, AttributeError): return value return { check_and_decode(k): check_and_decode(v) for k, v in job.items() } def get_task(self, identifier: str, task_name: str): """ Retrieve the status for an individual run in a job :param identifier: The unique job identifier :param task_name: The name of the individual job """ self._log_operation('Getting task {tn} from job {i}'.format( tn=task_name, i=identifier)) return self._get_task(identifier, task_name) def get_tasks(self, identifier: str): """ Get the list of tasks for the specified job :param identifier: The unique job identifier :returns: The list of all tasks related to the specified job """ self._log_operation('Getting tasks for {i}'.format(i=identifier)) return self._get_task_list(identifier) def set_task_id(self, identifier: str, task_name: str, task_id: str): """ Set the docker service identifier for the task :param identifier: The unique job identifier :param task_name: The name of the individual task :param task_id: The id of the task service """ self._log_operation( 'Setting task id {ti} for task {tn} for job {i}'.format( ti=task_id, tn=task_name, i=identifier)) task = self._get_task(identifier, task_name) task['__task_id'] = task_id task_list = self._get_task_list(identifier) update = [task if t['name'] == task['name'] else t for t in task_list] self._redis.hset(identifier, 'tasks', json.dumps(update)) def clear_job(self, identifier: str): """ Remove an entire job from the tracking DB :param identifier: The unique job identifier """ self._log_operation('Clearing job {i}'.format(i=identifier)) if not self._redis.exists(identifier): raise ValueError( 'Can not find job with id: {id}'.format(id=identifier)) self._redis.delete(identifier) def _get_task(self, identifier, name): self._log_operation('Retrieving task {t} for {i}'.format(t=name, i=identifier)) if not self._redis.hexists(identifier, 'tasks'): raise ValueError( 'Unable to find job with identifier {id} that has any tasks'. format(id=identifier)) tasks = json.loads(self._redis.hget(identifier, 'tasks')) if not any(t['name'] == name for t in tasks): raise ValueError('Unable to locate task {name} in job {id}'.format( name=name, id=identifier)) val = [t for t in tasks if t['name'] == name] return val[0] def _get_task_list(self, identifier): self._log_operation( 'Retrieving task list for {ident}'.format(ident=identifier)) if not self._redis.hexists(identifier, 'tasks'): raise ValueError( 'Unable to find job with identifier {id} that has any tasks'. format(id=identifier)) return json.loads(self._redis.hget(identifier, 'tasks')) def _log_operation(self, message: str): self._logger.info('JobDb: {msg}'.format(msg=message))
def __init__(self, rd: redis.StrictRedis): self._redis = rd self._logger = LogManager(__name__)
def __init__(self): self._providers = dict() self._logger = LogManager(__name__) self._setup_providers()