Ejemplo n.º 1
0
def _generate_network_statistics(kwargs):
    with ExceptionFrame():
        data_source = FileDataSource()
        path = kwargs.get("path", None)
        output_path = kwargs.get("output_path", None)
        for stats in data_source.network_statistics(path=path, stateful=True):
            data_source.write_network_statistics(data=stats, path=output_path)
Ejemplo n.º 2
0
    def test_prepare_traffic(self):
        job = Job()
        self.assertRaises(NoDataSourceException, job.prepare_traffic)

        job = Job(data_source=FileDataSource())
        self.assertRaises(FilePathException, job.prepare_traffic)

        data_source = FileDataSource()
        parser = JobParser(data_source=data_source)
        reader = CSVReader()
        reader.parser = parser
        for job in parser.parse(
                path=os.path.join(os.path.dirname(gnmutils_tests.__file__),
                                  "data/c00-001-001/1/1-process.csv")):
            job.prepare_traffic()
        count = 0
        for process in job.processes():
            count += len(process.traffic)
        self.assertEqual(count, 3155)
        self.assertEqual(job.db_id, "1")
        self.assertEqual(job.job_id, 4165419)
        self.assertEqual(job.gpid, 30726)
        self.assertEqual(job.uid, 14808)
        self.assertEqual(job.tme, 1405011331)
        self.assertEqual(job.exit_tme, 1405065581)
        self.assertEqual(job.exit_code, 0)
        self.assertEqual(len(job.faulty_nodes), 1)
        job.regenerate_tree()
Ejemplo n.º 3
0
class TestFileDataSource(unittest.TestCase):
    def setUp(self):
        self.dataSource = FileDataSource()
        self.path = os.path.join(os.path.dirname(gnmutils_tests.__file__),
                                 "data/c00-001-001")

    def test_isAvailable(self):
        self.assertTrue(self.dataSource.is_available())

    def test_jobs(self):
        index = -1
        for index, job in enumerate(self.dataSource.jobs(path=self.path)):
            self.assertIsNotNone(job)
        self.assertEqual(index, 0)
Ejemplo n.º 4
0
    def jobs(self, **kwargs):
        """
        :param path:
        :param data_path:
        :param source:
        :return:
        """
        if "raw" in kwargs.get("source", "processed"):
            for job in FileDataSource.jobs(self, **kwargs):
                yield job
        else:
            with SQLCommand(dataSource=self._db_data_source) as sql_command:
                path = kwargs.get("path", self.default_path)
                level = directory_level(path)
                job_object = DBJobObject(valid=True, completed=True)
                if level == RUN_LEVEL:
                    _, workernode, run, _ = next(relevant_directories(path=path),
                                                      (None, None, None))
                    job_object.run = run
                    workernode_object = self._db_operator.load_or_create_workernode(data=workernode)
                    job_object.workernode_id = workernode_object.id_value
                elif level == WORKERNODE_LEVEL:
                    workernode = os.path.split(path)[1]
                    workernode_object = self._db_operator.load_or_create_workernode(data=workernode)
                    job_object.workernode_id = workernode_object.id_value
                elif level == FILE_LEVEL:
                    job_object = DBJobObject(
                        id=os.path.basename(path).split("-")[0], valid=True, completed=True)

                for job_result in sql_command.find(job_object):
                    current_path = path
                    if level == BASE_LEVEL:
                        # join different workernodes and runs
                        workernode_object = self._db_operator.load_one(
                            data=DBWorkernodeObject(id=job_result.workernode_id)
                        )
                        current_path = os.path.join(os.path.join(path, workernode_object.name),
                                                    job_result.run)
                    elif level == WORKERNODE_LEVEL:
                        # join different runs
                        current_path = os.path.join(path, job_result.run)
                    elif level == FILE_LEVEL:
                        current_path = os.path.dirname(path)

                    for job in FileDataSource.read_job(
                            self,
                            path=current_path,
                            name=job_result.id_value):
                        yield job
Ejemplo n.º 5
0
    def test_from_job(self):
        file_path = os.path.join(
            os.path.dirname(assess_tests.__file__),
            "data/c01-007-102/1/1-process.csv"
        )
        data_source = FileDataSource()
        for job in data_source.jobs(path=file_path):
            prototype = Prototype.from_job(job)
        self.assertIsNotNone(prototype)
        self.assertEqual(prototype.node_count(), 9109)

        last_tme = 0
        for node in prototype.nodes(order_first=True):
            self.assertTrue(last_tme <= node.tme)
            last_tme = node.tme
Ejemplo n.º 6
0
 def read_job(self, **kwargs):
     """
     :param data:
     :param path:
     :return:
     """
     job = kwargs.get("data", None)
     workernode_object = self._db_operator.load_or_create_workernode(data=job.workernode)
     configuration_object = self._db_operator.load_or_create_configuration(
         data=job.configuration
     )
     job_object = DBJobObject(
         run=job.run, gpid=job.gpid, tme=job.tme, workernode_id=workernode_object.id_value,
         configuration_id=configuration_object.id_value)
     try:
         job_object = self._db_operator.load_job(data=job_object)
     except:
         raise RethrowException("The job has not been found")
     else:
         if job_object is not None:
             logging.getLogger(self.__class__.__name__).debug(
                 "loaded job %d from database", job_object.id_value
             )
             return FileDataSource.read_job(
                 self,
                 path=kwargs.get("path", self.default_path),
                 name=job_object.id_value)
         else:
             logging.getLogger(self.__class__.__name__).warning(
                     "did not find job (run=%s, gpid=%s, tme=%s, workernode_id=%s) in database",
                     job.run, job.gpid, job.tme, workernode_object.id_value
             )
             return None
Ejemplo n.º 7
0
 def test_processes_in_order(self):
     data_source = FileDataSource()
     for job in data_source.jobs(path=self._file_path()):
         last_tme = 0
         last_pid = 0
         for process in job.processes_in_order():
             self.assertTrue(process.tme >= last_tme)
             if last_tme == process.tme:
                 # also check for pid
                 self.assertTrue(
                     process.pid > last_pid
                     or ((last_pid + process.pid) % 32768 < 500),
                     "%d vs %d" % (last_pid, process.pid))
                 last_pid = process.pid
             else:
                 last_pid = 0
             last_tme = process.tme
Ejemplo n.º 8
0
 def test_processes(self):
     data_source = FileDataSource()
     for job in data_source.jobs(path=self._file_path()):
         tree = job.tree
         count = 0
         for node, depth in tree.walkDFS():
             count += 1
             # check pid order of children
             initial = 0
             last_tme = 0
             for process in node.children:
                 self.assertTrue(
                     process.value.pid >= initial
                     or (process.value.pid < initial
                         and last_tme < process.value.tme),
                     "%d: initial %d differs %d (%s)" %
                     (count, initial, process.value.pid, [
                         (child.value.pid, child.value.tme)
                         for child in node.children
                     ]))
                 initial = process.value.pid
                 last_tme = process.value.tme
Ejemplo n.º 9
0
 def write_job(self, **kwargs):
     job = kwargs["data"]
     workernode_object = self._db_operator.load_or_create_workernode(data=job.workernode)
     configuration_object = self._db_operator.load_or_create_configuration(
         data=job.configuration
     )
     job_object = DBJobObject(
         job_id=job.job_id, run=job.run, uid=job.uid, gpid=job.gpid, tme=job.tme,
         exit_tme=job.exit_tme, workernode_id=workernode_object.id_value,
         configuration_id=configuration_object.id_value, valid=job.is_valid(),
         last_tme=job.last_tme, completed=job.is_complete())
     try:
         self._db_operator.save_or_update(data=job_object)
         job.db_id = job_object.id_value
         FileDataSource.write_job(self, **kwargs)
     except:
         raise RethrowException("The job could not be created")
     else:
         logging.getLogger(self.__class__.__name__).debug(
             "saved job for index %s", job_object.id_value
         )
         return job
Ejemplo n.º 10
0
 def __init__(self, path, data_source=None):
     self.path = path
     self.data_source = data_source if data_source is not None else FileDataSource(
     )
     self._logger = logging.getLogger('cache.prototypes')
     self.force_refresh = bool(
         os.environ.get('DISS_PROTOTYPE_CACHE_REFRESH', False))
     if self.force_refresh:
         self._logger.warning('Forcefully refreshing caches '
                              '(enabled via $DISS_PROTOTYPE_CACHE_REFRESH)')
     self.preloaded_only = bool(
         os.environ.get('DISS_PROTOTYPE_CACHE_PRELOADED_ONLY', False))
     if self.preloaded_only:
         self._logger.warning(
             'Only working with preloaded caches '
             '(enabled via $DISS_PROTOTYPE_CACHE_PRELOADED_ONLY)')
 def test_parsing(self):
     data_source = FileDataSource()
     data_reader = CSVReader()
     parser = NetworkStatisticsParser(data_source=data_source)
     data_reader.parser = parser
     for _ in parser.parse(path=self.traffic_file_path(
     )):  # nothing is returned by networkstatisticsparser
         pass
     for _ in parser.parse(path=self.process_file_path()):
         pass
     count = 0
     for data in parser.pop_data():
         for networkstats in data.values():
             count += networkstats.event_count
     self.assertEqual(count, 19998)
     parser.check_caches()
     parser.clear_caches()
     self.assertEqual(parser.data, {})
Ejemplo n.º 12
0
 def setUp(self):
     self.dataSource = FileDataSource()
     self.path = os.path.join(os.path.dirname(gnmutils_tests.__file__),
                              "data/c00-001-001")
Ejemplo n.º 13
0
 def __init__(self):
     FileDataSource.__init__(self)
     self._db_data_source = DBDataSource(connectionName="default")
     self._db_operator = DBOperator(data_source=self._db_data_source)