Beispiel #1
0
 def _prototypes_from_dir(self, dir_path):
     # For each directory of CSVs, we store a header of files and
     # individual, per-file pkls.
     # Since the source will do the directory by itself, cached and uncached
     # structure behave differently here!
     cache_path = self._cache_path(dir_path)
     try:
         if self.force_refresh:
             raise OSError
         # get list of files
         with open(cache_path, 'rb') as cache_pkl:
             job_csv_paths = pickle.load(cache_pkl)
         # get job files individually to allow refreshing any
         for job_csv_path in job_csv_paths:
             for prototype in self._prototypes_from_csv(job_csv_path):
                 yield prototype
     except (OSError, IOError, EOFError):
         dir_prototype_lock = filelock.FileLock(
             os.path.splitext(cache_path)[0] + '.lock')
         try:
             with dir_prototype_lock.acquire(timeout=0):
                 # clean up broken pickles
                 if os.path.exists(cache_path):
                     os.unlink(cache_path)
                     self._logger.warning('Refreshing existing cache %r',
                                          cache_path)
         except filelock.Timeout:
             pass
         data_source = self.data_source
         job_files = []
         for job in data_source.jobs(path=dir_path):
             job.prepare_traffic()
             prototype = Prototype.from_job(job)
             yield prototype
             assert job.path not in job_files, \
                 "Job file may not contain multiple jobs (%r)" % job.path
             job_cache_path = self._cache_path(job.path)
             cache_prototype_lock = filelock.FileLock(
                 os.path.splitext(job_cache_path)[0] + '.lock')
             try:
                 with cache_prototype_lock.acquire(timeout=0):
                     # store the job individually, just remember its file
                     with open(job_cache_path, 'wb') as job_cache_pkl:
                         pickle.dump([prototype], job_cache_pkl,
                                     pickle.HIGHEST_PROTOCOL)
             except filelock.Timeout:
                 pass
             job_files.append(job.path)
         try:
             with dir_prototype_lock.acquire(timeout=0):
                 with open(cache_path, 'wb') as cache_pkl:
                     pickle.dump(job_files, cache_pkl,
                                 pickle.HIGHEST_PROTOCOL)
         except filelock.Timeout:
             pass
Beispiel #2
0
    def test_from_job(self):
        file_path = os.path.join(
            os.path.dirname(assess_tests.__file__),
            "data/c01-007-102/1/1-process.csv"
        )
        data_source = FileDataSource()
        for job in data_source.jobs(path=file_path):
            prototype = Prototype.from_job(job)
        self.assertIsNotNone(prototype)
        self.assertEqual(prototype.node_count(), 9109)

        last_tme = 0
        for node in prototype.nodes(order_first=True):
            self.assertTrue(last_tme <= node.tme)
            last_tme = node.tme
Beispiel #3
0
 def _prototypes_from_csv(self, csv_path):
     # For each individual CSV, we store *all* its content to one pkl.
     # That content may be multiple prototypes, so we yield it!
     cache_path = self._cache_path(csv_path)
     try:
         if self.force_refresh:
             raise OSError
         with open(cache_path, 'rb') as cache_pkl:
             prototypes = pickle.load(cache_pkl)
     except (OSError, IOError, EOFError):
         if self.preloaded_only:
             yield None
         # serialize pickle creation in case multiple processes use the
         # same prototype
         cache_prototype_lock = filelock.FileLock(
             os.path.splitext(cache_path)[0] + '.lock')
         try:
             # try to become the writer and create the pickle
             with cache_prototype_lock.acquire(timeout=0):
                 # clean up broken pickles
                 if os.path.exists(cache_path):
                     os.unlink(cache_path)
                     self._logger.warning('Refreshing existing cache %r',
                                          cache_path)
                 data_source = self.data_source
                 prototypes = []
                 for job in data_source.jobs(path=csv_path):
                     job.prepare_traffic()
                     prototype = Prototype.from_job(job)
                     prototypes.append(prototype)
                 if prototypes:
                     with open(cache_path, 'wb') as cache_pkl:
                         pickle.dump(prototypes, cache_pkl,
                                     pickle.HIGHEST_PROTOCOL)
         except filelock.Timeout:
             # we are NOT the writer - acquire the lock to see when the
             # writer is done
             with cache_prototype_lock:
                 with open(cache_path, 'rb') as cache_pkl:
                     prototypes = pickle.load(cache_pkl)
     for prototype in prototypes:
         yield prototype