def check(self): filepath, curr_idents_hash = self.get_filepath_hash() dataset_id = self.dataset_conf.dataset_id query = self.session.query(DatasetsAlchemy) query = query.filter(DatasetsAlchemy.id == dataset_id) idents_hash = query.one().idents_hash if idents_hash != curr_idents_hash: raise UpdatedFile(filepath, self.dataset_conf.dataset)
def _check(self, already_loaded): # Check whether the file exists input_dir = self.dataset_conf.input_dir(self.secuml_conf) self.filepath = path.join(input_dir, 'idents.csv') if not path.isfile(self.filepath): raise IdentsFileNotFound(self.filepath) # Check the hash self.idents_hash = compute_hash(self.filepath) dataset_id = self.dataset_conf.dataset_id if already_loaded: query = self.session.query(DatasetsHashesAlchemy) query = query.filter(DatasetsHashesAlchemy.id == dataset_id) idents_hash = query.one().idents_hash if idents_hash != self.idents_hash: raise UpdatedFile(self.filepath, self.dataset_conf.dataset)
def check(self): filepath, curr_hash = self.get_filepath_hash() self.exists = filepath is not None if not self.exists: self.secuml_conf.logger.warning('No ground-truth available for the ' 'dataset %s/%s.' % (self.dataset_conf.project, self.dataset_conf.dataset)) return dataset_id = self.dataset_conf.dataset_id query = self.session.query(DatasetsAlchemy) query = query.filter(DatasetsAlchemy.id == dataset_id) res = query.one() ground_truth_hash = res.ground_truth_hash if ground_truth_hash != curr_hash: raise UpdatedFile(filepath, self.dataset_conf.dataset)
def _check_hashes(self): query = self.session.query(FeaturesFilesAlchemy) query = query.filter( FeaturesFilesAlchemy.features_set_id == self.features_set_id) db_files = {r.filename: r.hash for r in query.all()} if self.input_type == InputFeaturesTypes.file: files = [self.features_conf.input_features] dataset_dir = self.exp_conf.dataset_conf.input_dir( self.secuml_conf) features_path = os.path.join(dataset_dir, 'features') elif self.input_type == InputFeaturesTypes.dir: files = os.listdir(self.input_path) if len(files) != len(db_files): raise UpdatedDirectory(self.input_path, db_files.keys(), files) features_path = self.input_path for filename in files: file_path = os.path.join(features_path, filename) file_hash = compute_hash(file_path) if file_hash != db_files[filename]: raise UpdatedFile(file_path, self.exp_conf.dataset_conf.dataset)