Esempio n. 1
0
    def __init__(self, data):
        do_update = False
        success = None
        agent_data = {
            'devicename': None,
            'id_agent': None,
            'sources': [],
            'timeseries': [],
            'timefixed': [],
            'max_timestamp': 0
        }

        ingest = drain.Drain(filename=None, data=data)

        if ingest.valid() is False:
            log_message = (
                'Cache data at timestamp %s is invalid. Moving.'
                '') % (ingest.timestamp())
            log.log2warning(1054, log_message)
        else:
            agent_data['timeseries'] = ingest.timeseries()
            agent_data['timefixed'] = ingest.timefixed()
            agent_data['sources'] = ingest.sources()
            agent_data['devicename'] = ingest.devicename()
            agent_data['id_agent'] = ingest.id_agent()
            agent_data['agent_name'] = ingest.agent()
            agent_data['max_timestamp'] = ingest.timestamp()

            # Upadate and note success
            (success, datapoints_processed) = self._do_update(
                agent_data)
Esempio n. 2
0
    def test_purge(self):
        """Testing function purge."""
        directory = tempfile.mkdtemp()
        filepath = ('%s/%s_%s_%s.json') % (
            directory, self.data['timestamp'], self.data['id_agent'],
            general.hashstring(self.data['devicename']))
        with open(filepath, 'w') as f_handle:
            json.dump(self.data, f_handle)

        # Create a valid Drain object
        ingest = drain.Drain(filepath)

        # Test
        self.assertEqual(os.path.exists(filepath), True)
        self.assertEqual(os.path.isfile(filepath), True)
        ingest.purge()
        self.assertEqual(os.path.exists(filepath), False)
        self.assertEqual(os.path.isfile(filepath), False)
Esempio n. 3
0
    def process(self):
        """Update the database using threads."""
        # Initialize key variables
        do_update = False
        success = None
        ingests = []
        agent_data = {
            'devicename': None,
            'id_agent': None,
            'sources': [],
            'timeseries': [],
            'timefixed': []
        }

        # Get the directory to which failed files will be moved
        failure_directory = self.config.ingest_failures_directory()

        # Initialize other values
        max_timestamp = 0

        # Get start time for activity
        start_ts = time.time()

        # Process file for each timestamp, starting from the oldes file
        for data_dict in self.metadata:
            # Initialize key variables
            timestamp = data_dict['timestamp']
            filepath = data_dict['filepath']

            # Read in data
            ingest = drain.Drain(filepath)

            # Make sure file is OK
            # Move it to a directory for further analysis
            # by administrators
            if ingest.valid() is False:
                log_message = ('Cache ingest file %s is invalid. Moving.'
                               '') % (filepath)
                log.log2warning(1054, log_message)
                shutil.copy(filepath, failure_directory)
                os.remove(filepath)
                continue

            # Append data
            agent_data['timeseries'].extend(ingest.timeseries())
            agent_data['timefixed'].extend(ingest.timefixed())
            agent_data['sources'].extend(ingest.sources())

            # Append ingest object to a list for later processing
            ingests.append(ingest)

            # Get the max timestamp
            max_timestamp = max(timestamp, max_timestamp)

            # Update information that doesn't change
            if do_update is False:
                agent_data['devicename'] = ingest.devicename()
                agent_data['id_agent'] = ingest.id_agent()
                agent_data['agent_name'] = ingest.agent()

                # Get the PID file for the agent
                pid_file = daemon.pid_file(self.ingester_agent_name)

            # Update the PID file for the agent to ensure agentd.py
            # doesn't kill the ingest while processing a long stream
            # of files. If we are running this using __main__ = process()
            # then the pid file wouldn't have been created, hence the logic.
            if os.path.isfile(pid_file) is True:
                daemon.update_pid(self.ingester_agent_name)

            # Update update flag
            do_update = True

        # Process the rest
        if do_update is True:
            # Update remaining agent data
            agent_data['max_timestamp'] = max_timestamp

            # Add datapoints to the database
            db_prepare = _PrepareDatabase(agent_data)
            db_prepare.add_datapoints()

            # Get the latest datapoints
            datapoints = db_prepare.get_datapoints()

            # Get the assigned index values for the device and agent
            idx_device = db_prepare.idx_device()
            idx_agent = db_prepare.idx_agent()

            # Update database with data
            db_update = _UpdateDB(agent_data, datapoints)
            success = db_update.update()

            # Update database table timestamps
            update_timestamps = _UpdateLastTimestamp(idx_device, idx_agent,
                                                     max_timestamp)
            update_timestamps.agent()
            update_timestamps.deviceagent()
            update_timestamps.datapoint()

            # Purge source files. Only done after complete
            # success of database updates. If not we could lose data in the
            # event of an ingester crash. Ingester would re-read the files
            # and process the non-duplicates, while deleting the duplicates.
            for ingest in ingests:
                ingest.purge()

            # Log duration of activity
            duration = time.time() - start_ts
            if success is True:
                log_message = (
                    'Agent %s was processed from %s cache files in %s '
                    'seconds (%s seconds/file, %s seconds/datapoint)'
                    '') % (agent_data['id_agent'], len(ingests),
                           round(duration, 4), round(
                               duration / len(ingests),
                               4), round(duration / len(datapoints), 6))
                log.log2info(1007, log_message)
            else:
                log_message = (
                    'Failed to process all cache files for agent %s. '
                    'Investigate.') % (agent_data['id_agent'])
                log.log2info(1008, log_message)
class TestDrain(unittest.TestCase):
    """Checks all functions and methods."""

    # Initialize key variables
    setup = unittest_setup.TestVariables()
    data = setup.cache_data()

    # Create valid file filled with valid data
    directory = tempfile.mkdtemp()
    filepath = ('%s/%s_%s_%s.json') % (
        directory,
        data['timestamp'],
        data['id_agent'],
        general.hashstring(data['devicename']))
    with open(filepath, 'w') as f_handle:
        json.dump(data, f_handle)

    # Create a valid Drain object
    ingest = drain.Drain(filepath)

    @classmethod
    def tearDownClass(cls):
        """Clean up when all over."""
        # Delete unnecessary files
        os.remove(cls.filepath)

    def test___init__(self):
        """Testing function __init__."""
        pass

    def test_valid(self):
        """Testing function valid."""
        # Test
        result = self.ingest.valid()
        self.assertEqual(result, True)

    def test_id_agent(self):
        """Testing function id_agent."""
        # Test
        result = self.ingest.id_agent()
        self.assertEqual(result, self.data['id_agent'])

    def test_timestamp(self):
        """Testing function timestamp."""
        # Test
        result = self.ingest.timestamp()
        self.assertEqual(result, self.data['timestamp'])

    def test_agent(self):
        """Testing function agent."""
        # Test
        result = self.ingest.agent()
        self.assertEqual(result, self.data['agent'])

    def test_devicename(self):
        """Testing function devicename."""
        # Test
        result = self.ingest.devicename()
        self.assertEqual(result, self.data['devicename'])

    def test_counter32(self):
        """Testing function counter32."""
        # Initialize key variables
        datapoints = _expected(self.data, 32)
        found = 0

        # Test
        results = self.ingest.counter32()
        for datapoint in datapoints:
            for result in results:
                if result['id_datapoint'] == datapoint['id_datapoint']:
                    self.assertEqual(
                        result['timestamp'], datapoint['timestamp'])
                    self.assertEqual(
                        result['value'], datapoint['value'])
                    self.assertEqual(
                        result['id_agent'], datapoint['id_agent'])

                    # Increment found
                    found += 1

        # Make sure that all are found
        self.assertEqual(len(results), len(datapoints))
        self.assertEqual(len(results), found)

    def test_counter64(self):
        """Testing function counter64."""
        # Initialize key variables
        datapoints = _expected(self.data, 64)
        found = 0

        # Test
        results = self.ingest.counter64()
        for datapoint in datapoints:
            for result in results:
                if result['id_datapoint'] == datapoint['id_datapoint']:
                    self.assertEqual(
                        result['timestamp'], datapoint['timestamp'])
                    self.assertEqual(
                        result['value'], datapoint['value'])
                    self.assertEqual(
                        result['id_agent'], datapoint['id_agent'])

                    # Increment found
                    found += 1

        # Make sure that all are found
        self.assertEqual(len(results), len(datapoints))
        self.assertEqual(len(results), found)

    def test_floating(self):
        """Testing function floating."""
        # Initialize key variables
        datapoints = _expected(self.data, 1)
        found = 0

        # Test
        results = self.ingest.floating()
        for datapoint in datapoints:
            for result in results:
                if result['id_datapoint'] == datapoint['id_datapoint']:
                    self.assertEqual(
                        result['timestamp'], datapoint['timestamp'])
                    self.assertEqual(
                        result['value'], datapoint['value'])
                    self.assertEqual(
                        result['id_agent'], datapoint['id_agent'])

                    # Increment found
                    found += 1

        # Make sure that all are found
        self.assertEqual(len(results), len(datapoints))
        self.assertEqual(len(results), found)

    def test_timeseries(self):
        """Testing function timeseries."""
        # Initialize key variables
        datapoints = []
        found = 0

        # Populate datapoints list
        datapoints.extend(_expected(self.data, 1))
        datapoints.extend(_expected(self.data, 32))
        datapoints.extend(_expected(self.data, 64))

        # Test
        results = self.ingest.timeseries()
        for datapoint in datapoints:
            for result in results:
                if result['id_datapoint'] == datapoint['id_datapoint']:
                    self.assertEqual(
                        result['timestamp'], datapoint['timestamp'])
                    self.assertEqual(
                        result['value'], datapoint['value'])
                    self.assertEqual(
                        result['id_agent'], datapoint['id_agent'])

                    # Increment found
                    found += 1

        # Make sure that all are found
        self.assertEqual(len(results), len(datapoints))
        self.assertEqual(len(results), found)

    def test_timefixed(self):
        """Testing function timefixed."""
        # Initialize key variables
        datapoints = _expected(self.data, None)
        found = 0

        # Test
        results = self.ingest.timefixed()
        for datapoint in datapoints:
            for result in results:
                if result['id_datapoint'] == datapoint['id_datapoint']:
                    self.assertEqual(
                        result['timestamp'], datapoint['timestamp'])
                    self.assertEqual(
                        result['value'], datapoint['value'])
                    self.assertEqual(
                        result['id_agent'], datapoint['id_agent'])

                    # Increment found
                    found += 1

        # Make sure that all are found
        self.assertEqual(len(results), len(datapoints))
        self.assertEqual(len(results), found)

    def test_sources(self):
        """Testing function sources."""
        # Initialize key variables
        sources = _sources(self.data)
        found = 0

        # Test
        results = self.ingest.sources()
        for source in sources:
            for result in results:
                if result['id_datapoint'] == source['id_datapoint']:
                    self.assertEqual(
                        result['id_agent'], source['id_agent'])
                    self.assertEqual(
                        result['agent_label'], source['agent_label'])
                    self.assertEqual(
                        result['agent_source'], source['agent_source'])
                    self.assertEqual(
                        result['description'], source['description'])
                    self.assertEqual(
                        result['base_type'], source['base_type'])

                    # Increment found
                    found += 1

        # Make sure that all are found
        self.assertEqual(len(results), len(sources))
        self.assertEqual(len(results), found)

    def test_purge(self):
        """Testing function purge."""
        directory = tempfile.mkdtemp()
        filepath = ('%s/%s_%s_%s.json') % (
            directory,
            self.data['timestamp'],
            self.data['id_agent'],
            general.hashstring(self.data['devicename']))
        with open(filepath, 'w') as f_handle:
            json.dump(self.data, f_handle)

        # Create a valid Drain object
        ingest = drain.Drain(filepath)

        # Test
        self.assertEqual(os.path.exists(filepath), True)
        self.assertEqual(os.path.isfile(filepath), True)
        ingest.purge()
        self.assertEqual(os.path.exists(filepath), False)
        self.assertEqual(os.path.isfile(filepath), False)
Esempio n. 5
0
    def process(self):
        """Update the database using threads."""
        # Initialize key variables
        do_update = False
        success = None
        ingests = []
        agent_data = {
            'devicename': None,
            'id_agent': None,
            'sources': [],
            'timeseries': [],
            'timefixed': [],
            'max_timestamp': 0
        }

        # Get the directory to which failed files will be moved
        failure_directory = self.config.ingest_failures_directory()

        # Get start time for activity
        start_ts = time.time()

        # Process file for each timestamp, starting from the oldes file
        for data_dict in self.metadata:
            # Initialize key variables
            timestamp = data_dict['timestamp']
            filepath = data_dict['filepath']

            # Read in data
            ingest = drain.Drain(filepath)

            # Make sure file is OK
            # Move it to a directory for further analysis
            # by administrators
            if ingest.valid() is False:
                log_message = ('Cache ingest file %s is invalid. Moving.'
                               '') % (filepath)
                log.log2warning(1054, log_message)
                shutil.copy(filepath, failure_directory)
                os.remove(filepath)
                continue

            # Append data
            agent_data['timeseries'].extend(ingest.timeseries())
            agent_data['timefixed'].extend(ingest.timefixed())
            agent_data['sources'].extend(ingest.sources())

            # Append ingest object to a list for later processing
            ingests.append(ingest)

            # Update information that doesn't change
            if do_update is False:
                agent_data['devicename'] = ingest.devicename()
                agent_data['id_agent'] = ingest.id_agent()
                agent_data['agent_name'] = ingest.agent()

                # Get the PID file for the agent
                pid_file = daemon.pid_file(self.ingester_agent_name)
            else:
                # Get the max timestamp
                agent_data['max_timestamp'] = max(timestamp,
                                                  agent_data['max_timestamp'])

            # Update the PID file for the agent to ensure agentd.py
            # doesn't kill the ingest while processing a long stream
            # of files. If we are running this using __main__ = process()
            # then the pid file wouldn't have been created, hence the logic.
            if os.path.isfile(pid_file) is True:
                daemon.update_pid(self.ingester_agent_name)

            # Update update flag
            do_update = True

        # Process the rest
        if do_update is True:
            # Upadate and note success
            (success,
             datapoints_processed) = self._do_update(agent_data, ingests)

            # Log duration of activity
            duration = time.time() - start_ts
            if success is True:
                log_message = (
                    'Agent %s was processed from %s cache files in %s '
                    'seconds (%s seconds/file, %s seconds/datapoint)'
                    '') % (agent_data['id_agent'], len(ingests),
                           round(duration, 4), round(
                               duration / len(ingests),
                               4), round(duration / datapoints_processed, 6))
                log.log2info(1007, log_message)
            else:
                log_message = (
                    'Failed to process all cache files for agent %s. '
                    'Investigate.') % (agent_data['id_agent'])
                log.log2info(1008, log_message)