def test_add_same_job_twice_adds_cron_once(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) command = "echo 'hello world'" cron_job = CronItem(command=command) cron_job.assigned_to = get_ip() storage = Storage() tab = CronTab(tab="""* * * * * command""") processor = Processor(12345, storage, cron=tab) for packet in UdpSerializer.dump(cron_job): processor.queue.put_nowait(packet) for packet in UdpSerializer.dump(cron_job): processor.queue.put_nowait(packet) loop.run_until_complete(processor.process()) assert 1 == len(storage.cluster_jobs) assert command == storage.cluster_jobs[0].command assert None is not next(tab.find_command(command), None) assert 1 == len(list(tab.find_command(command))) loop.close()
def __init__(self, loc, tabs=None): super().__init__() if os.path.isdir(loc): for item in os.listdir(loc): if item[0] == '.': continue path = os.path.join(loc, item) self.append(CronTab(user=False, tabfile=path)) elif os.path.isfile(loc): self.append(CronTab(user=False, tabfile=loc))
def __init__(self, udp_port, storage, cron=None, user=None, hash_key=None): self.queue = asyncio.Queue() self._buffer = [] self.udp_port = udp_port self.storage = storage if not cron: self.cron = CronTab(tabfile='/etc/crontab', user=False) else: self.cron = cron self.user = user self.hash_key = hash_key
def all(self): """ Return a CronTab object with all jobs (read-only) """ if self._all is None: self._all = CronTab(user=False) for tab in self: for job in tab: if job.user is None: job.user = tab.user or 'unknown' self._all.append(job) return self._all
def object_hook(obj): if '_type' not in obj: return obj if obj['_type'] == 'CronItem': cron = json.loads(obj['cron'], cls=CronDecoder) user = json.loads(obj['user']) cron_item = CronItem(command=obj['command'], user=user, cron=cron) cron_item.enable(obj['enabled']) cron_item.comment = obj['comment'] cron_item.assigned_to = obj['assigned_to'] cron_item.pid = obj['pid'] cron_item._log = obj['log'] if obj['last_run'] != '': cron_item.last_run = parser.parse(obj['last_run']) cron_item.set_all(obj['parts']) return cron_item elif obj['_type'] == 'CronTab': return CronTab(user=obj['user'], tab=obj['tab'], tabfile=obj['tabfile'], log=obj['log']) elif obj['_type'] == 'status': status = Status() status.system_load = obj['load'] status.state = obj['state'] status.ip = obj['ip'] status.time = obj['time'] return status return obj
def test_store_retrieve_sorts_correctly(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) storage = Storage() processor = Processor(12345, storage, cron=CronTab(tab="""* * * * * command""")) ip = '127.0.0.1' messages = [] for i in range(10): messages.append(Status(ip, 10)) for message in messages: packets = UdpSerializer.dump(message) for packet in packets: processor.queue.put_nowait(packet) while not processor.queue.empty(): loop.run_until_complete(asyncio.gather(processor.process())) assert messages[len(messages) - 1].time == storage.node_state(ip).time loop.close()
def test_store_cron_job_message_to_disk(): tmp_dir = mkdtemp() ser = path.join(tmp_dir, 'cluster_jobs.json') loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) storage = Storage(path_prefix=tmp_dir) processor = Processor(12345, storage, cron=CronTab(tab="""* * * * * command""")) message = CronItem(command="echo 'hello world'") message.append_log("test log message") for packet in UdpSerializer.dump(message): processor.queue.put_nowait(packet) loop.run_until_complete(asyncio.gather(processor.process())) loop.run_until_complete(asyncio.gather(storage.save())) assert processor.queue.qsize() == 0 assert len(storage.cluster_jobs) == 1 assert message == storage.cluster_jobs[0] assert exists(ser) loop.close() shutil.rmtree(tmp_dir)
def test_manual_run_is_executed_exactly_once(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) command = "echo 'hello world'" cron_job = CronItem(command=command) cron_job.assigned_to = get_ip() storage = Storage() tab = CronTab(tab="""* * * * * command""") processor = Processor(12345, storage, cron=tab) for packet in UdpSerializer.dump(cron_job): processor.queue.put_nowait(packet) for packet in UdpSerializer.dump(Run(cron_job)): processor.queue.put_nowait(packet) loop.run_until_complete(processor.process()) assert 1 == len(storage.cluster_jobs) assert command == storage.cluster_jobs[0].command assert 1 == len(storage.cluster_jobs[0].log) assert 'exit code: 0' in storage.cluster_jobs[0].log[ 0] and 'hello world' in storage.cluster_jobs[0].log[0] assert processor.queue.empty() loop.close()
def __init__(self, loc, tabs=None): super().__init__() if tabs and os.path.isdir(loc): self.append(CronTab(user=False)) jobs = list(tabs.all.find_command(loc)) if jobs: for item in os.listdir(loc): self.add(loc, item, jobs[0]) jobs[0].delete()
def __init__(self, loc, tabs=None): super().__init__() for username in self.listdir(loc): tab = self.generate(loc, username) if tab: self.append(tab) if not self: tab = CronTab(user=True) if tab: self.append(tab)
class CronTabs(list): """ Singleton dictionary of all detectable crontabs """ _all = None _self = None def __new__(cls, *args, **kw): if not cls._self: cls._self = super(CronTabs, cls).__new__(cls, *args, **kw) return cls._self def __init__(self): super().__init__() if not self: for loc in KNOWN_LOCATIONS: self.add(*loc) def add(self, cls, *args): for tab in cls(*args, tabs=self): self.append(tab) self._all = None @property def all(self): """ Return a CronTab object with all jobs (read-only) """ if self._all is None: self._all = CronTab(user=False) for tab in self: for job in tab: if job.user is None: job.user = tab.user or 'unknown' self._all.append(job) return self._all
def test_store_status_message(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) storage = Storage() processor = Processor(12345, storage, cron=CronTab(tab="""* * * * * command""")) ip = '127.0.0.1' message = Status(ip, 10) packets = UdpSerializer.dump(message) for packet in packets: processor.queue.put_nowait(packet) loop.run_until_complete(asyncio.gather(*[processor.process()])) assert processor.queue.qsize() == 0 assert message == storage.node_state(ip) loop.close()
class Processor(object): """ Message processor for the system """ logger = logging.getLogger(__name__) def __init__(self, udp_port, storage, cron=None, user=None, hash_key=None): self.queue = asyncio.Queue() self._buffer = [] self.udp_port = udp_port self.storage = storage if not cron: self.cron = CronTab(tabfile='/etc/crontab', user=False) else: self.cron = cron self.user = user self.hash_key = hash_key def update_status(self, status_message): self.logger.debug("got full status message in buffer ({0}".format(status_message)) self.storage.cluster_status.append(status_message) def remove_job(self, job): self.logger.debug("got full remove in buffer {0}".format(job)) if job in self.storage.cluster_jobs: self.logger.debug("removing existing job {0}".format(job)) self.storage.cluster_jobs.remove(job) if job.assigned_to == get_ip(): if job.pid: self.logger.warning("job {0} is running, going to kill it".format(job)) if check_process(job.command, job.pid): kill_proc_tree(job.pid) self.logger.info("removing existing, assigned job {0}".format(job)) cmd = next(self.cron.find_command(job.command), None) if cmd: self.logger.info("removing {0} from cron".format(job)) self.cron.remove(cmd) self.cron.write() else: self.logger.warning("defined job {0} not found in cron, but assigned to me!".format(job)) def add_job(self, new_job): self.logger.debug("got full job in buffer {0}".format(new_job)) job = next(iter([j for j in self.storage.cluster_jobs if j == new_job]), None) if not job: if new_job.assigned_to == get_ip(): existing_job = next(self.cron.find_command(new_job.command), None) if existing_job and existing_job == new_job: self.logger.info("job already defined in tab, skipping it") else: if self.user and not new_job.user: new_job.user = self.user if self.cron and not new_job.cron: new_job.cron = self.cron self.logger.info("adding job {0} to cron {1} for user {2}".format(new_job, self.cron.filename, new_job.user)) self.cron.append(new_job) self.cron.write() else: idx = self.storage.cluster_jobs.index(job) del (self.storage.cluster_jobs[idx]) self.storage.cluster_jobs.append(new_job) def toggle_job(self, toggle): self.logger.debug("got full toggle in buffer {0}".format(toggle.job)) job = next(iter([j for j in self.storage.cluster_jobs if j == toggle.job]), None) if job: if job.assigned_to == get_ip(): self.logger.info("am owner for job {0}, toggling it".format(job)) job.enable(not job.is_enabled()) if self.user and not job.user: job.user = self.user if self.cron and not job.cron: job.cron = self.cron self.cron.write() idx = self.storage.cluster_jobs.index(job) del (self.storage.cluster_jobs[idx]) self.storage.cluster_jobs.append(job) async def run(self, run, uuid): self.logger.debug("got full run in buffer {0}".format(run.job)) job = next(iter([j for j in self.storage.cluster_jobs if j == run.job]), None) if job and job.assigned_to == get_ip(): self.logger.info("am owner for job {0}".format(job)) run.timestamp = datetime.now() process = subprocess.Popen(run.job.command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, shell=True) self.logger.info("{0} has been defined, going to execute".format(job.command)) std_out, std_err = process.communicate() exit_code = process.wait() if std_err: self.logger.warning("error during execution of {0}: {1}".format(run.job.command, std_err)) self.logger.info("output of {0} with code {1}: {2}".format(job.command, exit_code, std_out)) job.append_log("{0:%b %d %H:%M:%S} localhost CRON[{1}] exit code: {2}, out: {3}, err: {4}".format(datetime.now(), process.pid, exit_code, std_out, std_err)) broadcast(self.udp_port, UdpSerializer.dump(job, self.hash_key)) self.clean_buffer(uuid) def kill(self, kill): if not kill.pid: self.logger.warning("got kill command for {0} but PID not set".format(kill.job)) else: self.logger.debug("got full kill in buffer ({0}".format(kill.job)) if kill.job.assigned_to == get_ip() and check_process(kill.command, pid=kill.pid): self.logger.info("I'm owner, going to try and kill the running job {0}".format(kill.job)) try: kill_proc_tree(kill.pid) except ValueError: self.logger.warning("got signal to kill self, that's not happening") def clean_buffer(self, uuid): """ remove packet groups from buffer :param uuid: identifier for the group """ self.logger.debug("removing message {0} from buffer".format(uuid)) g = group(self._buffer) if uuid in g: for p in g[uuid]: if p in self._buffer: self._buffer.remove(p) else: self.logger.warning("message {0} not in buffer, skipping clean".format(uuid)) async def process(self): """ processor for our queue """ data = await self.queue.get() logging.debug("got {0} on processor queue".format(data)) packet = Packet.decode(data) if packet: self._buffer.append(packet) packet_groups = group(self._buffer) for uuid in packet_groups.keys(): self.logger.debug("identifying packet group for {0}".format(uuid)) obj = UdpSerializer.load(packet_groups[uuid], self.hash_key) if obj: self.logger.debug("got object {0} from {1}".format(obj, uuid)) if isinstance(obj, Status): self.update_status(obj) self.clean_buffer(uuid) elif isinstance(obj, ReBalance): self.logger.info("re-balance received") self.storage.cluster_jobs.clear() self.cron.remove_all() self.cron.write() self._buffer.clear() elif isinstance(obj, CronItem): if obj.remove: self.remove_job(obj) else: self.add_job(obj) self.clean_buffer(uuid) elif isinstance(obj, Run): await self.run(obj, uuid) elif isinstance(obj, Kill): self.kill(obj) self.clean_buffer(uuid) elif isinstance(obj, Toggle): self.toggle_job(obj) self.clean_buffer(uuid) self.storage.prune() self.queue.task_done() if not self.queue.empty(): await self.process() def put_nowait(self, packet): """ put UDP packets on our queue for processing :param packet: UDP packet """ self.queue.put_nowait(packet) asyncio.create_task(self.process())
def generate(self, loc, username): path = os.path.join(loc, username) if username != self.get_owner(path): # Abandoned crontab pool entry! return CronTab(tabfile=path) return CronTab(user=username)
def main(): """ entry point """ parser = argparse.ArgumentParser( description='Distributed Cronlike Scheduler') parser.add_argument('-l', '--log-file', default=None, help='path to store logfile') parser.add_argument('-p', '--storage-path', default=None, help='directory where to store cache') parser.add_argument('-u', '--udp-communication-port', type=int, default=12345, help='communication port (default: 12345)') parser.add_argument('-i', '--broadcast-interval', type=int, default=5, help='interval for broadcasting data over UDP') parser.add_argument( '-c', '--cron', default=None, help= 'crontab to use (default: /etc/crontab, use `memory` to not save to file' ) parser.add_argument('-d', '--cron-user', default=None, help='user for storing cron entries') parser.add_argument('-w', '--web-port', type=int, default=8080, help='web hosting port (default: 8080)') parser.add_argument( '-n', '--ntp-server', default='pool.ntp.org', help='NTP server to detect clock skew (default: pool.ntp.org)') parser.add_argument( '-s', '--node-staleness', type=int, default=180, help= 'Time in seconds of non-communication for a node to be marked as stale (defailt: 180s)' ) parser.add_argument( '-x', '--hash-key', default='abracadabra', help="String to use for verifying UDP traffic (to disable use '')") parser.add_argument('-v', '--verbose', action='store_true', default=False, help='verbose logging') args = parser.parse_args() if get_ntp_offset(args.ntp_server) > 60: exit("your clock is not in sync (check system NTP settings)") root_logger = logging.getLogger() if args.log_file: file_handler = logging.FileHandler(args.log_file) file_handler.setFormatter(logging.Formatter(log_format)) root_logger.addHandler(file_handler) if args.verbose: root_logger.setLevel(logging.DEBUG) else: root_logger.setLevel(logging.INFO) logging.getLogger('aiohttp').setLevel(logging.WARNING) pool = ThreadPoolExecutor(4) storage = Storage(args.storage_path) if args.cron: if args.cron == 'memory': processor = Processor(args.udp_communication_port, storage, cron=CronTab(tab="""* * * * * command""")) elif args.cron_user: processor = Processor(args.udp_communication_port, storage, cron=CronTab(tabfile=args.cron, user=args.cron_user), user=args.cron_user) else: processor = Processor(args.udp_communication_port, storage, cron=CronTab(tabfile=args.cron, user='******'), user='******') else: processor = Processor(args.udp_communication_port, storage, user='******') hash_key = None if args.hash_key != '': hash_key = args.hash_key with StatusProtocolServer(processor, args.udp_communication_port) as loop: running = True scheduler = Scheduler(storage, args.node_staleness) def timed_broadcast(): """ periodically broadcast system status and known jobs """ while running: broadcast( args.udp_communication_port, UdpSerializer.dump(Status(get_ip(), get_load()), hash_key)) for job in storage.cluster_jobs: if job.assigned_to == get_ip(): job.pid = check_process(job.command) for packet in UdpSerializer.dump(job, hash_key): client(args.udp_communication_port, packet) time.sleep(args.broadcast_interval) def timed_schedule(): """ periodically check if cluster needs re-balancing """ while running: time.sleep(23) if not scheduler.check_cluster_state(): logger.info("re-balancing cluster") jobs = storage.cluster_jobs.copy() for packet in UdpSerializer.dump( ReBalance(timestamp=datetime.now()), hash_key): client(args.udp_communication_port, packet) time.sleep(5) for job in jobs: for packet in UdpSerializer.dump(job, hash_key): client(args.udp_communication_port, packet) async def scheduled_broadcast(): await loop.run_in_executor(pool, timed_broadcast) async def scheduled_rebalance(): await loop.run_in_executor(pool, timed_schedule) async def save_schedule(): """ auto save every 100 seconds """ while running: await asyncio.sleep(100) await storage.save() logger.info("setting broadcast interval to {0} seconds".format( args.broadcast_interval)) loop.create_task(scheduled_broadcast()) loop.create_task(scheduled_rebalance()) if args.storage_path: loop.create_task(save_schedule()) logger.info( "starting web application server on http://{0}:{1}/".format( get_ip(), args.web_port)) if args.cron_user: s = Site(scheduler, storage, args.udp_communication_port, cron=processor.cron, user=args.cron_user, hash_key=hash_key) else: s = Site(scheduler, storage, args.udp_communication_port, cron=processor.cron, hash_key=hash_key) runner = AppRunner(s.app) loop.run_until_complete(runner.setup()) site_instance = TCPSite(runner, port=args.web_port) loop.run_until_complete(site_instance.start()) try: loop.run_forever() except: logger.info("interrupt received") logger.info("stopping web application") loop.run_until_complete(site_instance.stop()) running = False if args.storage_path: loop.create_task(storage.save()) logger.debug("waiting for background tasks to finish") pending_tasks = [ task for task in asyncio.Task.all_tasks() if not task.done() ] loop.run_until_complete(asyncio.gather(*pending_tasks)) logger.info("elvis has left the building")