def convert_samples_to_samplesets( samples, group_by_reference_id=True, references=None ): if group_by_reference_id: # Grouping sample sets sample_sets = dict() for s in samples: if s.reference_id not in sample_sets: sample_sets[s.reference_id] = ( SampleSet([s], parent=s) if references is None else SampleSet([s], parent=s, references=references) ) else: sample_sets[s.reference_id].append(s) return list(sample_sets.values()) else: return ( [SampleSet([s], parent=s) for s in samples] if references is None else [ SampleSet([s], parent=s, references=references) for s in samples ] )
def _create_sample_sets(raw_data, offset, references=None): if references is None: return [ SampleSet( [ Sample( s, reference_id=str(i + offset), key=str(uuid.uuid4()), ) ], key=str(i + offset), reference_id=str(i + offset), subject_id=str(i + offset), ) for i, s in enumerate(raw_data) ] else: return [ SampleSet( [ Sample( s, reference_id=str(i + offset), key=str(uuid.uuid4()), ) ], key=str(i + offset), reference_id=str(i + offset), subject_id=str(i + offset), references=references, ) for i, s in enumerate(raw_data) ]
def transform(self, X): if len(X) <= 0: # Nothing to be transformed return [] def _transform_samples(X, stats): scores = [] for no_normed_score in X: score = (no_normed_score.data - stats.mu) / stats.std t_score = Sample(score, parent=no_normed_score) scores.append(t_score) return scores if isinstance(X[0], SampleSet): t_normed_scores = [] # Transforming either Samples or SampleSets for probe_scores in X: stats = self.t_stats[probe_scores.reference_id] t_normed_scores.append( SampleSet( _transform_samples(probe_scores, stats), parent=probe_scores, )) else: # If it is Samples t_normed_scores = _transform_samples(X) return t_normed_scores
def transform(self, X): if len(X) <= 0: # Nothing to be transformed return [] def _transform_samples(X): scores = [] for no_normed_score in X: score = (no_normed_score.data - self.z_stats[no_normed_score.reference_id].mu ) / self.z_stats[no_normed_score.reference_id].std z_score = Sample(score, parent=no_normed_score) scores.append(z_score) return scores if isinstance(X[0], SampleSet): z_normed_scores = [] # Transforming either Samples or SampleSets for probe_scores in X: z_normed_scores.append( SampleSet(_transform_samples(probe_scores), parent=probe_scores)) else: # If it is Samples z_normed_scores = _transform_samples(X) return z_normed_scores
def compare_samples(samples, pipeline, dask_client, verbose): """Compare several samples in a All vs All fashion.""" if len(samples) == 1: raise ValueError( "It's necessary to have at least two samples for the comparison") sample_sets = [ SampleSet( [ DelayedSample(functools.partial(bob.io.base.load, s), key=str(s)) ], key=str(s), biometric_id=str(i), ) for i, s in enumerate(samples) ] if dask_client is not None: pipeline = dask_bio_pipeline(pipeline) table = [[s for s in samples]] biometric_references = pipeline.create_biometric_reference(sample_sets) scores = pipeline.compute_scores(sample_sets, biometric_references) if dask_client is not None: scores = scores.compute(scheduler=dask_client) for sset in scores: table.append([str(s.data) for s in sset]) print("All vs All comparison") print(tabulate(table)) if dask_client is not None: dask_client.shutdown()
def _make_sampleset_from_filedict(self, file_dict, reference_ids=None): samplesets = [] for key in file_dict: f = file_dict[key] annotations_key = os.path.basename(f.path) kwargs = ({ "references": reference_ids } if reference_ids is not None else {}) samplesets.append( SampleSet( key=f.path, reference_id=f.reference_id, subject_id=f.subject_id, **kwargs, samples=[ DelayedSample( key=f.path, annotations=self.annotations[annotations_key], load=partial( bob.io.base.load, os.path.join( self.original_directory, f.path + self.extension, ), ), ) ], )) return samplesets
def background_model_samples(self): """This function returns the training set for the open-set protocols o1, o2 and o3. It returns the :py:meth:`references` and the training samples with known unknowns, which get the subject id "unknown". Returns ------- [bob.pipelines.SampleSet] The training samples, where each sampleset contains all images of one subject. Only the samples of the "unknown" subject are collected from several subjects. """ if self.protocol[0] != "o": return [] # return a list of samplesets for each enrollment image and each known unknown training sample enrollmentset = self.references() data = {} for image in self.pairs["training-unknown"]: # get image path image_path = os.path.join( self.original_directory, self.image_relative_path, self.make_path_from_filename(image) + self.extension, ) # load annotations if self.annotation_directory is not None: annotation_path = os.path.join( self.annotation_directory, self.make_path_from_filename(image) + self.annotation_extension, ) annotations = self._extract(annotation_path) else: annotations = None data[image] = (image_path, annotations) # generate one sampleset from images of the unknown unknowns sset = SampleSet( key="unknown", reference_id="unknown", subject_id="unknown", samples=[ DelayedSample( key=image, load=partial(bob.io.base.load, data[image][0]), annotations=data[image][1], ) for image in data ], ) return enrollmentset + [sset]
def test_sampleset_collection(): n_samples = 10 X = np.ones(shape=(n_samples, 2), dtype=int) sampleset = SampleSet( [Sample(data, key=str(i)) for i, data in enumerate(X)], key="1") assert len(sampleset) == n_samples # Testing insert sample = Sample(X, key=100) sampleset.insert(1, sample) assert len(sampleset) == n_samples + 1 # Testing delete del sampleset[0] assert len(sampleset) == n_samples # Testing set sampleset[0] = copy.deepcopy(sample) # Testing iterator for i in sampleset: assert isinstance(i, Sample) def _load(path): return pickle.loads(open(path, "rb").read()) # Testing delayed sampleset with tempfile.TemporaryDirectory() as dir_name: samples = [Sample(data, key=str(i)) for i, data in enumerate(X)] filename = os.path.join(dir_name, "samples.pkl") with open(filename, "wb") as f: f.write(pickle.dumps(samples)) sampleset = DelayedSampleSet(functools.partial(_load, filename), key=1) assert len(sampleset) == n_samples assert sampleset.samples == samples # Testing delayed sampleset cached with tempfile.TemporaryDirectory() as dir_name: samples = [Sample(data, key=str(i)) for i, data in enumerate(X)] filename = os.path.join(dir_name, "samples.pkl") with open(filename, "wb") as f: f.write(pickle.dumps(samples)) sampleset = DelayedSampleSetCached(functools.partial(_load, filename), key=1) assert len(sampleset) == n_samples assert sampleset.samples == samples
def get_fake_sample_set(face_size=(160, 160), purpose="bioref"): data = images[purpose][0] annotations = images[purpose][1] key = "1" if purpose == "bioref" else "2" return [ SampleSet( [ DelayedSample( load=functools.partial(bob.io.base.load, data), key=key, annotations=annotations, ) ], key=key, reference_id=key, references=["1"], ) ]
def _make_sample_set( self, reference_id, subject_id, sample_path, references=None ): path = os.path.join(self.original_directory, sample_path) kwargs = {} if references is None else {"references": references} # Delaying the annotation loading delayed_annotations = partial(self._annotations, path) return SampleSet( key=str(reference_id), reference_id=str(reference_id), subject_id=str(subject_id), **kwargs, samples=[ DelayedSample( key=str(sample_path), load=partial(self._load_video_from_path, path), delayed_attributes={"annotations": delayed_annotations}, ) ], )
def _create_random_sample_set(self, n_sample_set=10, n_samples=2, seed=10): # Just generate random samples np.random.seed(seed) sample_set = [ SampleSet( samples=[], key=str(i), reference_id=str(i), subject_id=str(i), gender=np.random.choice(self.gender_choices), metadata_1=np.random.choice(self.metadata_1_choices), ) for i in range(n_sample_set) ] offset = 0 for i, s in enumerate(sample_set): if self.one_d: s.samples = self._create_random_1dsamples( n_samples, offset, self.dim ) else: s.samples = self._create_random_2dsamples( n_samples, offset, self.dim ) if self.contains_fta and i % 2: for sample in s.samples[::2]: sample.data = None if self.all_samples_fta: for sample in s.samples: sample.data = None offset += n_samples pass return sample_set
def references(self, group="dev"): if self.protocol not in self.references_dict: self.references_dict[self.protocol] = [] if self.protocol == "view2": for key in self.pairs: image_path = os.path.join( self.original_directory, self.image_relative_path, key + self.extension, ) if self.annotation_directory is not None: annotation_path = os.path.join( self.annotation_directory, key + self.annotation_extension, ) annotations = self._extract(annotation_path) else: annotations = None sset = SampleSet( key=key, reference_id=key, subject_id=self.subject_id_from_filename(key), samples=[ DelayedSample( key=key, reference_id=key, load=partial(bob.io.base.load, image_path), subject_id=self.subject_id_from_filename(key), annotations=annotations, ) ], ) self.references_dict[self.protocol].append(sset) elif self.protocol[0] == "o": for key in self.pairs["enroll"]: data = {} for image in self.pairs["enroll"][key]: # get image path image_path = os.path.join( self.original_directory, self.image_relative_path, self.make_path_from_filename(image) + self.extension, ) # load annotations if self.annotation_directory is not None: annotation_path = os.path.join( self.annotation_directory, self.make_path_from_filename(image) + self.annotation_extension, ) annotations = self._extract(annotation_path) else: annotations = None data[image] = (image_path, annotations) # generate one sampleset from several (should be 3) images of the same person sset = SampleSet( key=key, reference_id=key, subject_id=key, samples=[ DelayedSample( key=image, reference_id=key, load=partial(bob.io.base.load, data[image][0]), annotations=data[image][1], ) for image in data ], ) self.references_dict[self.protocol].append(sset) return self.references_dict[self.protocol]
def probes(self, group="dev"): if self.protocol not in self.probes_dict: self.probes_dict[self.protocol] = [] if self.protocol == "view2": for key in self.probe_reference_keys: image_path = os.path.join( self.original_directory, self.image_relative_path, key + self.extension, ) if self.annotation_directory is not None: annotation_path = os.path.join( self.annotation_directory, key + self.annotation_extension, ) annotations = self._extract(annotation_path) else: annotations = None sset = SampleSet( key=key, reference_id=key, subject_id=self.subject_id_from_filename(key), references=copy.deepcopy( self.probe_reference_keys[key] ), # deep copying to avoid bizarre issues with dask samples=[ DelayedSample( key=key, reference_id=key, subject_id=self.subject_id_from_filename(key), load=partial(bob.io.base.load, image_path), annotations=annotations, ) ], ) self.probes_dict[self.protocol].append(sset) elif self.protocol[0] == "o": # add known probes # collect probe samples: probes = [(image, key) for key in self.pairs["probe"] for image in self.pairs["probe"][key]] if self.protocol in ("o1", "o3"): probes += [(image, "unknown") for image in self.pairs["o1"]] if self.protocol in ("o2", "o3"): probes += [(image, "unknown") for image in self.pairs["o2"]] for image, key in probes: # get image path image_path = os.path.join( self.original_directory, self.image_relative_path, self.make_path_from_filename(image) + self.extension, ) # load annotations if self.annotation_directory is not None: annotation_path = os.path.join( self.annotation_directory, self.make_path_from_filename(image) + self.annotation_extension, ) annotations = self._extract(annotation_path) else: annotations = None # one probe sample per image sset = SampleSet( key=image, reference_id=image, subject_id=key, samples=[ DelayedSample( key=image, reference_id=image, load=partial(bob.io.base.load, image_path), annotations=annotations, ) ], ) self.probes_dict[self.protocol].append(sset) return self.probes_dict[self.protocol]
def score_sample_templates(self, probe_samples, enroll_samples, score_all_vs_all): """Computes the similarity score between all probe and enroll templates. Parameters ---------- probe_samples : list A list (length N) of Samples containing probe templates. enroll_samples : list A list (length M) of Samples containing enroll templates. score_all_vs_all : bool If True, the similarity scores between all probe and enroll templates are computed. If False, the similarity scores between the probes and their associated enroll templates are computed. Returns ------- score_samplesets : list A list of N SampleSets each containing a list of M score Samples if score_all_vs_all is True. Otherwise, a list of N SampleSets each containing a list of <=M score Samples depending on the database. """ logger.debug( f"{_frmt(self)}.score_sample_templates(... score_all_vs_all={score_all_vs_all})" ) # Returns a list of SampleSets where a Sampleset for each probe # SampleSet where each Sample inside the SampleSets contains the score # for one enroll SampleSet score_samplesets = [] if score_all_vs_all: probe_data = [s.data for s in probe_samples] valid_probe_indices = [ i for i, d in enumerate(probe_data) if _data_valid(d) ] valid_probe_data = [probe_data[i] for i in valid_probe_indices] scores = self.compare(SampleBatch(enroll_samples), valid_probe_data) scores = np.asarray(scores, dtype=float) if len(valid_probe_indices) != len(probe_data): # inject None scores for invalid probe samples scores: list = scores.T.tolist() for i in range(len(probe_data)): if i not in valid_probe_indices: scores.insert(i, [None] * len(enroll_samples)) # transpose back to original shape scores = np.array(scores, dtype=float).T expected_shape = (len(enroll_samples), len(probe_samples)) assert scores.shape == expected_shape, ( "The shape of the similarity scores (%s) does not match the expected shape (%s)" % (scores.shape, expected_shape)) for j, probe in enumerate(probe_samples): samples = [] for i, enroll in enumerate(enroll_samples): samples.append(Sample(scores[i, j], parent=enroll)) score_samplesets.append(SampleSet(samples, parent=probe)) else: for probe in probe_samples: references = [str(ref) for ref in probe.references] # get the indices of references for enroll samplesets indices = [ i for i, enroll in enumerate(enroll_samples) if str(enroll.reference_id) in references ] if not indices: raise ValueError( f"No enroll sampleset found for probe {probe} and its required references {references}. " "Did you mean to set score_all_vs_all=True?") if not _data_valid(probe.data): scores = [[None]] * len(indices) else: scores = self.compare( SampleBatch([enroll_samples[i] for i in indices]), SampleBatch([probe]), ) scores = np.asarray(scores, dtype=float) expected_shape = (len(indices), 1) assert scores.shape == expected_shape, ( "The shape of the similarity scores (%s) does not match the expected shape (%s)" % (scores.shape, expected_shape)) samples = [] for i, j in enumerate(indices): samples.append( Sample(scores[i, 0], parent=enroll_samples[j])) score_samplesets.append(SampleSet(samples, parent=probe)) return score_samplesets