Exemple #1
0
def main():
    parser = ArgumentParser(
        description="Intel® Manager for Lustre* software Copytool Monitor")
    parser.add_argument("copytool_id", action=GetCopytoolAction)
    args = parser.parse_args()

    copytool_log_setup()

    try:
        manager_url = config.get('settings',
                                 'server')['url'] + "copytool_event/"
    except KeyError:
        copytool_log.error(
            "No configuration found (must be configured before starting a copytool monitor)"
        )
        sys.exit(1)

    client = CryptoClient(manager_url, Crypto(config.path))
    monitor = CopytoolMonitor(client, args.copytool)

    def teardown_callback(*args, **kwargs):
        monitor.stop()

    signal.signal(signal.SIGTERM, teardown_callback)
    signal.signal(signal.SIGINT, teardown_callback)
    signal.signal(signal.SIGUSR1, decrease_loglevel)
    signal.signal(signal.SIGUSR2, increase_loglevel)

    try:
        monitor.start()
        while not monitor.stopping.is_set():
            monitor.stopping.wait(timeout=10)

        monitor.join()
    except Exception as e:
        copytool_log.exception()
        sys.stderr.write("Unhandled exception: %s\n" % e)
        sys.exit(1)

    copytool_log.info("Terminating")
Exemple #2
0
    def send(self):
        events = []

        envelope = dict(fqdn=self.client.fqdn,
                        copytool=self.copytool.id,
                        events=events)

        envelope_size = len(json.dumps(envelope))
        while True:
            try:
                event = self.retry_queue.get_nowait()
                copytool_log.debug("Got event from retry queue: %s" % event)
            except Queue.Empty:
                try:
                    raw_event = self.send_queue.get_nowait()
                    event = json.loads(raw_event)
                    copytool_log.debug("Got event from send queue: %s" % event)
                except Queue.Empty:
                    break
                except ValueError:
                    copytool_log.error("Invalid JSON: %s" % raw_event)
                    break

            try:
                date = IMLDateTime.parse(event['event_time'])
                event['event_time'] = date.astimezone(
                    tz=FixedOffset(0)).strftime("%Y-%m-%d %H:%M:%S+00:00")
            except ValueError as e:
                copytool_log.error("Invalid event date in event '%s': %s" %
                                   (event, e))
                break

            # During restore operations, we don't know the data_fid until
            # after the operation has started (i.e. RUNNING). The tricky part
            # is that when the restore completes, the source_fid is set to
            # data_fid, so unless we do this swap we'll lose track of the
            # operation.
            if 'RUNNING' in event['event_type']:
                if event['source_fid'] in self.active_operations:
                    self.active_operations[
                        event['data_fid']] = self.active_operations.pop(
                            event['source_fid'])

            if self.active_operations.get(event.get('data_fid', None), None):
                event['active_operation'] = self.active_operations[
                    event['data_fid']]

            if 'FINISH' in event['event_type']:
                try:
                    del self.active_operations[event['data_fid']]
                except KeyError:
                    pass

            copytool_log.debug("event: %s" % json.dumps(event))

            event_size = len(json.dumps(event))
            if event_size > MAX_BYTES_PER_POST:
                copytool_log.error("Oversized event dropped: %s" % event)
                break

            if events and event_size > MAX_BYTES_PER_POST - envelope_size:
                copytool_log.info("Requeueing oversized message "
                                  "(%d + %d > %d, %d messages)" %
                                  (event_size, envelope_size,
                                   MAX_BYTES_PER_POST, len(events)))
                self.retry_queue.put(event)
                break

            events.append(event)
            envelope_size += event_size

        if events:
            copytool_log.debug("EventRelay sending %d events" % len(events))
            try:
                data = self.client.post(envelope)
                copytool_log.debug("Got data back from POST: %s" % data)
                try:
                    self.active_operations.update(data['active_operations'])
                except (KeyError, TypeError):
                    pass
                # Reset any backoff delay that might have been added
                self.reset_backoff()
            except HttpError:
                copytool_log.error("Failed to relay events, requeueing")
                for event in envelope['events']:
                    self.retry_queue.put(event)
                self.backoff()