def main(): parser = ArgumentParser( description="Intel® Manager for Lustre* software Copytool Monitor") parser.add_argument("copytool_id", action=GetCopytoolAction) args = parser.parse_args() copytool_log_setup() try: manager_url = config.get('settings', 'server')['url'] + "copytool_event/" except KeyError: copytool_log.error( "No configuration found (must be configured before starting a copytool monitor)" ) sys.exit(1) client = CryptoClient(manager_url, Crypto(config.path)) monitor = CopytoolMonitor(client, args.copytool) def teardown_callback(*args, **kwargs): monitor.stop() signal.signal(signal.SIGTERM, teardown_callback) signal.signal(signal.SIGINT, teardown_callback) signal.signal(signal.SIGUSR1, decrease_loglevel) signal.signal(signal.SIGUSR2, increase_loglevel) try: monitor.start() while not monitor.stopping.is_set(): monitor.stopping.wait(timeout=10) monitor.join() except Exception as e: copytool_log.exception() sys.stderr.write("Unhandled exception: %s\n" % e) sys.exit(1) copytool_log.info("Terminating")
def send(self): events = [] envelope = dict(fqdn=self.client.fqdn, copytool=self.copytool.id, events=events) envelope_size = len(json.dumps(envelope)) while True: try: event = self.retry_queue.get_nowait() copytool_log.debug("Got event from retry queue: %s" % event) except Queue.Empty: try: raw_event = self.send_queue.get_nowait() event = json.loads(raw_event) copytool_log.debug("Got event from send queue: %s" % event) except Queue.Empty: break except ValueError: copytool_log.error("Invalid JSON: %s" % raw_event) break try: date = IMLDateTime.parse(event['event_time']) event['event_time'] = date.astimezone( tz=FixedOffset(0)).strftime("%Y-%m-%d %H:%M:%S+00:00") except ValueError as e: copytool_log.error("Invalid event date in event '%s': %s" % (event, e)) break # During restore operations, we don't know the data_fid until # after the operation has started (i.e. RUNNING). The tricky part # is that when the restore completes, the source_fid is set to # data_fid, so unless we do this swap we'll lose track of the # operation. if 'RUNNING' in event['event_type']: if event['source_fid'] in self.active_operations: self.active_operations[ event['data_fid']] = self.active_operations.pop( event['source_fid']) if self.active_operations.get(event.get('data_fid', None), None): event['active_operation'] = self.active_operations[ event['data_fid']] if 'FINISH' in event['event_type']: try: del self.active_operations[event['data_fid']] except KeyError: pass copytool_log.debug("event: %s" % json.dumps(event)) event_size = len(json.dumps(event)) if event_size > MAX_BYTES_PER_POST: copytool_log.error("Oversized event dropped: %s" % event) break if events and event_size > MAX_BYTES_PER_POST - envelope_size: copytool_log.info("Requeueing oversized message " "(%d + %d > %d, %d messages)" % (event_size, envelope_size, MAX_BYTES_PER_POST, len(events))) self.retry_queue.put(event) break events.append(event) envelope_size += event_size if events: copytool_log.debug("EventRelay sending %d events" % len(events)) try: data = self.client.post(envelope) copytool_log.debug("Got data back from POST: %s" % data) try: self.active_operations.update(data['active_operations']) except (KeyError, TypeError): pass # Reset any backoff delay that might have been added self.reset_backoff() except HttpError: copytool_log.error("Failed to relay events, requeueing") for event in envelope['events']: self.retry_queue.put(event) self.backoff()