def _generate_network_statistics(kwargs): with ExceptionFrame(): data_source = FileDataSource() path = kwargs.get("path", None) output_path = kwargs.get("output_path", None) for stats in data_source.network_statistics(path=path, stateful=True): data_source.write_network_statistics(data=stats, path=output_path)
def test_prepare_traffic(self): job = Job() self.assertRaises(NoDataSourceException, job.prepare_traffic) job = Job(data_source=FileDataSource()) self.assertRaises(FilePathException, job.prepare_traffic) data_source = FileDataSource() parser = JobParser(data_source=data_source) reader = CSVReader() reader.parser = parser for job in parser.parse( path=os.path.join(os.path.dirname(gnmutils_tests.__file__), "data/c00-001-001/1/1-process.csv")): job.prepare_traffic() count = 0 for process in job.processes(): count += len(process.traffic) self.assertEqual(count, 3155) self.assertEqual(job.db_id, "1") self.assertEqual(job.job_id, 4165419) self.assertEqual(job.gpid, 30726) self.assertEqual(job.uid, 14808) self.assertEqual(job.tme, 1405011331) self.assertEqual(job.exit_tme, 1405065581) self.assertEqual(job.exit_code, 0) self.assertEqual(len(job.faulty_nodes), 1) job.regenerate_tree()
class TestFileDataSource(unittest.TestCase): def setUp(self): self.dataSource = FileDataSource() self.path = os.path.join(os.path.dirname(gnmutils_tests.__file__), "data/c00-001-001") def test_isAvailable(self): self.assertTrue(self.dataSource.is_available()) def test_jobs(self): index = -1 for index, job in enumerate(self.dataSource.jobs(path=self.path)): self.assertIsNotNone(job) self.assertEqual(index, 0)
def jobs(self, **kwargs): """ :param path: :param data_path: :param source: :return: """ if "raw" in kwargs.get("source", "processed"): for job in FileDataSource.jobs(self, **kwargs): yield job else: with SQLCommand(dataSource=self._db_data_source) as sql_command: path = kwargs.get("path", self.default_path) level = directory_level(path) job_object = DBJobObject(valid=True, completed=True) if level == RUN_LEVEL: _, workernode, run, _ = next(relevant_directories(path=path), (None, None, None)) job_object.run = run workernode_object = self._db_operator.load_or_create_workernode(data=workernode) job_object.workernode_id = workernode_object.id_value elif level == WORKERNODE_LEVEL: workernode = os.path.split(path)[1] workernode_object = self._db_operator.load_or_create_workernode(data=workernode) job_object.workernode_id = workernode_object.id_value elif level == FILE_LEVEL: job_object = DBJobObject( id=os.path.basename(path).split("-")[0], valid=True, completed=True) for job_result in sql_command.find(job_object): current_path = path if level == BASE_LEVEL: # join different workernodes and runs workernode_object = self._db_operator.load_one( data=DBWorkernodeObject(id=job_result.workernode_id) ) current_path = os.path.join(os.path.join(path, workernode_object.name), job_result.run) elif level == WORKERNODE_LEVEL: # join different runs current_path = os.path.join(path, job_result.run) elif level == FILE_LEVEL: current_path = os.path.dirname(path) for job in FileDataSource.read_job( self, path=current_path, name=job_result.id_value): yield job
def test_from_job(self): file_path = os.path.join( os.path.dirname(assess_tests.__file__), "data/c01-007-102/1/1-process.csv" ) data_source = FileDataSource() for job in data_source.jobs(path=file_path): prototype = Prototype.from_job(job) self.assertIsNotNone(prototype) self.assertEqual(prototype.node_count(), 9109) last_tme = 0 for node in prototype.nodes(order_first=True): self.assertTrue(last_tme <= node.tme) last_tme = node.tme
def read_job(self, **kwargs): """ :param data: :param path: :return: """ job = kwargs.get("data", None) workernode_object = self._db_operator.load_or_create_workernode(data=job.workernode) configuration_object = self._db_operator.load_or_create_configuration( data=job.configuration ) job_object = DBJobObject( run=job.run, gpid=job.gpid, tme=job.tme, workernode_id=workernode_object.id_value, configuration_id=configuration_object.id_value) try: job_object = self._db_operator.load_job(data=job_object) except: raise RethrowException("The job has not been found") else: if job_object is not None: logging.getLogger(self.__class__.__name__).debug( "loaded job %d from database", job_object.id_value ) return FileDataSource.read_job( self, path=kwargs.get("path", self.default_path), name=job_object.id_value) else: logging.getLogger(self.__class__.__name__).warning( "did not find job (run=%s, gpid=%s, tme=%s, workernode_id=%s) in database", job.run, job.gpid, job.tme, workernode_object.id_value ) return None
def test_processes_in_order(self): data_source = FileDataSource() for job in data_source.jobs(path=self._file_path()): last_tme = 0 last_pid = 0 for process in job.processes_in_order(): self.assertTrue(process.tme >= last_tme) if last_tme == process.tme: # also check for pid self.assertTrue( process.pid > last_pid or ((last_pid + process.pid) % 32768 < 500), "%d vs %d" % (last_pid, process.pid)) last_pid = process.pid else: last_pid = 0 last_tme = process.tme
def test_processes(self): data_source = FileDataSource() for job in data_source.jobs(path=self._file_path()): tree = job.tree count = 0 for node, depth in tree.walkDFS(): count += 1 # check pid order of children initial = 0 last_tme = 0 for process in node.children: self.assertTrue( process.value.pid >= initial or (process.value.pid < initial and last_tme < process.value.tme), "%d: initial %d differs %d (%s)" % (count, initial, process.value.pid, [ (child.value.pid, child.value.tme) for child in node.children ])) initial = process.value.pid last_tme = process.value.tme
def write_job(self, **kwargs): job = kwargs["data"] workernode_object = self._db_operator.load_or_create_workernode(data=job.workernode) configuration_object = self._db_operator.load_or_create_configuration( data=job.configuration ) job_object = DBJobObject( job_id=job.job_id, run=job.run, uid=job.uid, gpid=job.gpid, tme=job.tme, exit_tme=job.exit_tme, workernode_id=workernode_object.id_value, configuration_id=configuration_object.id_value, valid=job.is_valid(), last_tme=job.last_tme, completed=job.is_complete()) try: self._db_operator.save_or_update(data=job_object) job.db_id = job_object.id_value FileDataSource.write_job(self, **kwargs) except: raise RethrowException("The job could not be created") else: logging.getLogger(self.__class__.__name__).debug( "saved job for index %s", job_object.id_value ) return job
def __init__(self, path, data_source=None): self.path = path self.data_source = data_source if data_source is not None else FileDataSource( ) self._logger = logging.getLogger('cache.prototypes') self.force_refresh = bool( os.environ.get('DISS_PROTOTYPE_CACHE_REFRESH', False)) if self.force_refresh: self._logger.warning('Forcefully refreshing caches ' '(enabled via $DISS_PROTOTYPE_CACHE_REFRESH)') self.preloaded_only = bool( os.environ.get('DISS_PROTOTYPE_CACHE_PRELOADED_ONLY', False)) if self.preloaded_only: self._logger.warning( 'Only working with preloaded caches ' '(enabled via $DISS_PROTOTYPE_CACHE_PRELOADED_ONLY)')
def test_parsing(self): data_source = FileDataSource() data_reader = CSVReader() parser = NetworkStatisticsParser(data_source=data_source) data_reader.parser = parser for _ in parser.parse(path=self.traffic_file_path( )): # nothing is returned by networkstatisticsparser pass for _ in parser.parse(path=self.process_file_path()): pass count = 0 for data in parser.pop_data(): for networkstats in data.values(): count += networkstats.event_count self.assertEqual(count, 19998) parser.check_caches() parser.clear_caches() self.assertEqual(parser.data, {})
def setUp(self): self.dataSource = FileDataSource() self.path = os.path.join(os.path.dirname(gnmutils_tests.__file__), "data/c00-001-001")
def __init__(self): FileDataSource.__init__(self) self._db_data_source = DBDataSource(connectionName="default") self._db_operator = DBOperator(data_source=self._db_data_source)