Exemple #1
0
 def check(self):
     filepath, curr_idents_hash = self.get_filepath_hash()
     dataset_id = self.dataset_conf.dataset_id
     query = self.session.query(DatasetsAlchemy)
     query = query.filter(DatasetsAlchemy.id == dataset_id)
     idents_hash = query.one().idents_hash
     if idents_hash != curr_idents_hash:
         raise UpdatedFile(filepath, self.dataset_conf.dataset)
Exemple #2
0
 def _check(self, already_loaded):
     # Check whether the file exists
     input_dir = self.dataset_conf.input_dir(self.secuml_conf)
     self.filepath = path.join(input_dir, 'idents.csv')
     if not path.isfile(self.filepath):
         raise IdentsFileNotFound(self.filepath)
     # Check the hash
     self.idents_hash = compute_hash(self.filepath)
     dataset_id = self.dataset_conf.dataset_id
     if already_loaded:
         query = self.session.query(DatasetsHashesAlchemy)
         query = query.filter(DatasetsHashesAlchemy.id == dataset_id)
         idents_hash = query.one().idents_hash
         if idents_hash != self.idents_hash:
             raise UpdatedFile(self.filepath, self.dataset_conf.dataset)
Exemple #3
0
 def check(self):
     filepath, curr_hash = self.get_filepath_hash()
     self.exists = filepath is not None
     if not self.exists:
         self.secuml_conf.logger.warning('No ground-truth available for the '
                                         'dataset %s/%s.'
                                         % (self.dataset_conf.project,
                                            self.dataset_conf.dataset))
         return
     dataset_id = self.dataset_conf.dataset_id
     query = self.session.query(DatasetsAlchemy)
     query = query.filter(DatasetsAlchemy.id == dataset_id)
     res = query.one()
     ground_truth_hash = res.ground_truth_hash
     if ground_truth_hash != curr_hash:
         raise UpdatedFile(filepath, self.dataset_conf.dataset)
Exemple #4
0
 def _check_hashes(self):
     query = self.session.query(FeaturesFilesAlchemy)
     query = query.filter(
         FeaturesFilesAlchemy.features_set_id == self.features_set_id)
     db_files = {r.filename: r.hash for r in query.all()}
     if self.input_type == InputFeaturesTypes.file:
         files = [self.features_conf.input_features]
         dataset_dir = self.exp_conf.dataset_conf.input_dir(
             self.secuml_conf)
         features_path = os.path.join(dataset_dir, 'features')
     elif self.input_type == InputFeaturesTypes.dir:
         files = os.listdir(self.input_path)
         if len(files) != len(db_files):
             raise UpdatedDirectory(self.input_path, db_files.keys(), files)
         features_path = self.input_path
     for filename in files:
         file_path = os.path.join(features_path, filename)
         file_hash = compute_hash(file_path)
         if file_hash != db_files[filename]:
             raise UpdatedFile(file_path,
                               self.exp_conf.dataset_conf.dataset)