コード例 #1
0
def convert_samples_to_samplesets(
    samples, group_by_reference_id=True, references=None
):
    if group_by_reference_id:

        # Grouping sample sets
        sample_sets = dict()
        for s in samples:
            if s.reference_id not in sample_sets:
                sample_sets[s.reference_id] = (
                    SampleSet([s], parent=s)
                    if references is None
                    else SampleSet([s], parent=s, references=references)
                )
            else:
                sample_sets[s.reference_id].append(s)
        return list(sample_sets.values())

    else:
        return (
            [SampleSet([s], parent=s) for s in samples]
            if references is None
            else [
                SampleSet([s], parent=s, references=references) for s in samples
            ]
        )
コード例 #2
0
def _create_sample_sets(raw_data, offset, references=None):
    if references is None:
        return [
            SampleSet(
                [
                    Sample(
                        s,
                        reference_id=str(i + offset),
                        key=str(uuid.uuid4()),
                    )
                ],
                key=str(i + offset),
                reference_id=str(i + offset),
                subject_id=str(i + offset),
            ) for i, s in enumerate(raw_data)
        ]
    else:
        return [
            SampleSet(
                [
                    Sample(
                        s,
                        reference_id=str(i + offset),
                        key=str(uuid.uuid4()),
                    )
                ],
                key=str(i + offset),
                reference_id=str(i + offset),
                subject_id=str(i + offset),
                references=references,
            ) for i, s in enumerate(raw_data)
        ]
コード例 #3
0
    def transform(self, X):

        if len(X) <= 0:
            # Nothing to be transformed
            return []

        def _transform_samples(X, stats):
            scores = []
            for no_normed_score in X:
                score = (no_normed_score.data - stats.mu) / stats.std

                t_score = Sample(score, parent=no_normed_score)
                scores.append(t_score)
            return scores

        if isinstance(X[0], SampleSet):

            t_normed_scores = []
            # Transforming either Samples or SampleSets

            for probe_scores in X:

                stats = self.t_stats[probe_scores.reference_id]

                t_normed_scores.append(
                    SampleSet(
                        _transform_samples(probe_scores, stats),
                        parent=probe_scores,
                    ))
        else:
            # If it is Samples
            t_normed_scores = _transform_samples(X)

        return t_normed_scores
コード例 #4
0
    def transform(self, X):

        if len(X) <= 0:
            # Nothing to be transformed
            return []

        def _transform_samples(X):
            scores = []
            for no_normed_score in X:
                score = (no_normed_score.data -
                         self.z_stats[no_normed_score.reference_id].mu
                         ) / self.z_stats[no_normed_score.reference_id].std

                z_score = Sample(score, parent=no_normed_score)
                scores.append(z_score)
            return scores

        if isinstance(X[0], SampleSet):

            z_normed_scores = []
            # Transforming either Samples or SampleSets
            for probe_scores in X:

                z_normed_scores.append(
                    SampleSet(_transform_samples(probe_scores),
                              parent=probe_scores))
        else:
            # If it is Samples
            z_normed_scores = _transform_samples(X)

        return z_normed_scores
コード例 #5
0
def compare_samples(samples, pipeline, dask_client, verbose):
    """Compare several samples in a All vs All fashion."""
    if len(samples) == 1:
        raise ValueError(
            "It's necessary to have at least two samples for the comparison")

    sample_sets = [
        SampleSet(
            [
                DelayedSample(functools.partial(bob.io.base.load, s),
                              key=str(s))
            ],
            key=str(s),
            biometric_id=str(i),
        ) for i, s in enumerate(samples)
    ]
    if dask_client is not None:
        pipeline = dask_bio_pipeline(pipeline)

    table = [[s for s in samples]]
    biometric_references = pipeline.create_biometric_reference(sample_sets)
    scores = pipeline.compute_scores(sample_sets, biometric_references)
    if dask_client is not None:
        scores = scores.compute(scheduler=dask_client)
    for sset in scores:
        table.append([str(s.data) for s in sset])

    print("All vs All comparison")
    print(tabulate(table))

    if dask_client is not None:
        dask_client.shutdown()
コード例 #6
0
ファイル: gbu.py プロジェクト: bioidiap/bob.bio.face
    def _make_sampleset_from_filedict(self, file_dict, reference_ids=None):
        samplesets = []
        for key in file_dict:
            f = file_dict[key]

            annotations_key = os.path.basename(f.path)

            kwargs = ({
                "references": reference_ids
            } if reference_ids is not None else {})

            samplesets.append(
                SampleSet(
                    key=f.path,
                    reference_id=f.reference_id,
                    subject_id=f.subject_id,
                    **kwargs,
                    samples=[
                        DelayedSample(
                            key=f.path,
                            annotations=self.annotations[annotations_key],
                            load=partial(
                                bob.io.base.load,
                                os.path.join(
                                    self.original_directory,
                                    f.path + self.extension,
                                ),
                            ),
                        )
                    ],
                ))
        return samplesets
コード例 #7
0
ファイル: lfw.py プロジェクト: bioidiap/bob.bio.face
    def background_model_samples(self):
        """This function returns the training set for the open-set protocols o1, o2 and o3.
        It returns the :py:meth:`references` and the training samples with known unknowns, which get the subject id "unknown".

        Returns
        -------

        [bob.pipelines.SampleSet]
            The training samples, where each sampleset contains all images of one subject.
            Only the samples of the "unknown" subject are collected from several subjects.

        """
        if self.protocol[0] != "o":
            return []

        # return a list of samplesets for each enrollment image and each known unknown training sample
        enrollmentset = self.references()
        data = {}
        for image in self.pairs["training-unknown"]:
            # get image path
            image_path = os.path.join(
                self.original_directory,
                self.image_relative_path,
                self.make_path_from_filename(image) + self.extension,
            )
            # load annotations
            if self.annotation_directory is not None:
                annotation_path = os.path.join(
                    self.annotation_directory,
                    self.make_path_from_filename(image) +
                    self.annotation_extension,
                )
                annotations = self._extract(annotation_path)
            else:
                annotations = None
            data[image] = (image_path, annotations)

        # generate one sampleset from images of the unknown unknowns
        sset = SampleSet(
            key="unknown",
            reference_id="unknown",
            subject_id="unknown",
            samples=[
                DelayedSample(
                    key=image,
                    load=partial(bob.io.base.load, data[image][0]),
                    annotations=data[image][1],
                ) for image in data
            ],
        )
        return enrollmentset + [sset]
コード例 #8
0
def test_sampleset_collection():

    n_samples = 10
    X = np.ones(shape=(n_samples, 2), dtype=int)
    sampleset = SampleSet(
        [Sample(data, key=str(i)) for i, data in enumerate(X)], key="1")
    assert len(sampleset) == n_samples

    # Testing insert
    sample = Sample(X, key=100)
    sampleset.insert(1, sample)
    assert len(sampleset) == n_samples + 1

    # Testing delete
    del sampleset[0]
    assert len(sampleset) == n_samples

    # Testing set
    sampleset[0] = copy.deepcopy(sample)

    # Testing iterator
    for i in sampleset:
        assert isinstance(i, Sample)

    def _load(path):
        return pickle.loads(open(path, "rb").read())

    # Testing delayed sampleset
    with tempfile.TemporaryDirectory() as dir_name:

        samples = [Sample(data, key=str(i)) for i, data in enumerate(X)]
        filename = os.path.join(dir_name, "samples.pkl")
        with open(filename, "wb") as f:
            f.write(pickle.dumps(samples))

        sampleset = DelayedSampleSet(functools.partial(_load, filename), key=1)

        assert len(sampleset) == n_samples
        assert sampleset.samples == samples

    # Testing delayed sampleset cached
    with tempfile.TemporaryDirectory() as dir_name:

        samples = [Sample(data, key=str(i)) for i, data in enumerate(X)]
        filename = os.path.join(dir_name, "samples.pkl")
        with open(filename, "wb") as f:
            f.write(pickle.dumps(samples))

        sampleset = DelayedSampleSetCached(functools.partial(_load, filename),
                                           key=1)

        assert len(sampleset) == n_samples
        assert sampleset.samples == samples
コード例 #9
0
def get_fake_sample_set(face_size=(160, 160), purpose="bioref"):

    data = images[purpose][0]
    annotations = images[purpose][1]
    key = "1" if purpose == "bioref" else "2"

    return [
        SampleSet(
            [
                DelayedSample(
                    load=functools.partial(bob.io.base.load, data),
                    key=key,
                    annotations=annotations,
                )
            ],
            key=key,
            reference_id=key,
            references=["1"],
        )
    ]
コード例 #10
0
ファイル: youtube.py プロジェクト: bioidiap/bob.bio.video
    def _make_sample_set(
        self, reference_id, subject_id, sample_path, references=None
    ):

        path = os.path.join(self.original_directory, sample_path)

        kwargs = {} if references is None else {"references": references}

        # Delaying the annotation loading
        delayed_annotations = partial(self._annotations, path)
        return SampleSet(
            key=str(reference_id),
            reference_id=str(reference_id),
            subject_id=str(subject_id),
            **kwargs,
            samples=[
                DelayedSample(
                    key=str(sample_path),
                    load=partial(self._load_video_from_path, path),
                    delayed_attributes={"annotations": delayed_annotations},
                )
            ],
        )
コード例 #11
0
    def _create_random_sample_set(self, n_sample_set=10, n_samples=2, seed=10):

        # Just generate random samples
        np.random.seed(seed)
        sample_set = [
            SampleSet(
                samples=[],
                key=str(i),
                reference_id=str(i),
                subject_id=str(i),
                gender=np.random.choice(self.gender_choices),
                metadata_1=np.random.choice(self.metadata_1_choices),
            )
            for i in range(n_sample_set)
        ]

        offset = 0
        for i, s in enumerate(sample_set):
            if self.one_d:
                s.samples = self._create_random_1dsamples(
                    n_samples, offset, self.dim
                )
            else:
                s.samples = self._create_random_2dsamples(
                    n_samples, offset, self.dim
                )
            if self.contains_fta and i % 2:
                for sample in s.samples[::2]:
                    sample.data = None
            if self.all_samples_fta:
                for sample in s.samples:
                    sample.data = None

            offset += n_samples
            pass

        return sample_set
コード例 #12
0
ファイル: lfw.py プロジェクト: bioidiap/bob.bio.face
    def references(self, group="dev"):

        if self.protocol not in self.references_dict:
            self.references_dict[self.protocol] = []

            if self.protocol == "view2":
                for key in self.pairs:

                    image_path = os.path.join(
                        self.original_directory,
                        self.image_relative_path,
                        key + self.extension,
                    )
                    if self.annotation_directory is not None:
                        annotation_path = os.path.join(
                            self.annotation_directory,
                            key + self.annotation_extension,
                        )
                        annotations = self._extract(annotation_path)
                    else:
                        annotations = None

                    sset = SampleSet(
                        key=key,
                        reference_id=key,
                        subject_id=self.subject_id_from_filename(key),
                        samples=[
                            DelayedSample(
                                key=key,
                                reference_id=key,
                                load=partial(bob.io.base.load, image_path),
                                subject_id=self.subject_id_from_filename(key),
                                annotations=annotations,
                            )
                        ],
                    )
                    self.references_dict[self.protocol].append(sset)
            elif self.protocol[0] == "o":
                for key in self.pairs["enroll"]:
                    data = {}
                    for image in self.pairs["enroll"][key]:
                        # get image path
                        image_path = os.path.join(
                            self.original_directory,
                            self.image_relative_path,
                            self.make_path_from_filename(image) +
                            self.extension,
                        )
                        # load annotations
                        if self.annotation_directory is not None:
                            annotation_path = os.path.join(
                                self.annotation_directory,
                                self.make_path_from_filename(image) +
                                self.annotation_extension,
                            )
                            annotations = self._extract(annotation_path)
                        else:
                            annotations = None
                        data[image] = (image_path, annotations)

                    # generate one sampleset from several (should be 3) images of the same person
                    sset = SampleSet(
                        key=key,
                        reference_id=key,
                        subject_id=key,
                        samples=[
                            DelayedSample(
                                key=image,
                                reference_id=key,
                                load=partial(bob.io.base.load, data[image][0]),
                                annotations=data[image][1],
                            ) for image in data
                        ],
                    )
                    self.references_dict[self.protocol].append(sset)

        return self.references_dict[self.protocol]
コード例 #13
0
ファイル: lfw.py プロジェクト: bioidiap/bob.bio.face
    def probes(self, group="dev"):
        if self.protocol not in self.probes_dict:
            self.probes_dict[self.protocol] = []

            if self.protocol == "view2":
                for key in self.probe_reference_keys:
                    image_path = os.path.join(
                        self.original_directory,
                        self.image_relative_path,
                        key + self.extension,
                    )
                    if self.annotation_directory is not None:
                        annotation_path = os.path.join(
                            self.annotation_directory,
                            key + self.annotation_extension,
                        )
                        annotations = self._extract(annotation_path)
                    else:
                        annotations = None

                    sset = SampleSet(
                        key=key,
                        reference_id=key,
                        subject_id=self.subject_id_from_filename(key),
                        references=copy.deepcopy(
                            self.probe_reference_keys[key]
                        ),  # deep copying to avoid bizarre issues with dask
                        samples=[
                            DelayedSample(
                                key=key,
                                reference_id=key,
                                subject_id=self.subject_id_from_filename(key),
                                load=partial(bob.io.base.load, image_path),
                                annotations=annotations,
                            )
                        ],
                    )
                    self.probes_dict[self.protocol].append(sset)

            elif self.protocol[0] == "o":
                # add known probes
                # collect probe samples:
                probes = [(image, key) for key in self.pairs["probe"]
                          for image in self.pairs["probe"][key]]
                if self.protocol in ("o1", "o3"):
                    probes += [(image, "unknown")
                               for image in self.pairs["o1"]]
                if self.protocol in ("o2", "o3"):
                    probes += [(image, "unknown")
                               for image in self.pairs["o2"]]

                for image, key in probes:
                    # get image path
                    image_path = os.path.join(
                        self.original_directory,
                        self.image_relative_path,
                        self.make_path_from_filename(image) + self.extension,
                    )
                    # load annotations
                    if self.annotation_directory is not None:
                        annotation_path = os.path.join(
                            self.annotation_directory,
                            self.make_path_from_filename(image) +
                            self.annotation_extension,
                        )
                        annotations = self._extract(annotation_path)
                    else:
                        annotations = None

                    # one probe sample per image
                    sset = SampleSet(
                        key=image,
                        reference_id=image,
                        subject_id=key,
                        samples=[
                            DelayedSample(
                                key=image,
                                reference_id=image,
                                load=partial(bob.io.base.load, image_path),
                                annotations=annotations,
                            )
                        ],
                    )
                    self.probes_dict[self.protocol].append(sset)

        return self.probes_dict[self.protocol]
コード例 #14
0
    def score_sample_templates(self, probe_samples, enroll_samples,
                               score_all_vs_all):
        """Computes the similarity score between all probe and enroll templates.

        Parameters
        ----------
        probe_samples : list
            A list (length N) of Samples containing probe templates.

        enroll_samples : list
            A list (length M) of Samples containing enroll templates.

        score_all_vs_all : bool
            If True, the similarity scores between all probe and enroll templates
            are computed. If False, the similarity scores between the probes and
            their associated enroll templates are computed.

        Returns
        -------
        score_samplesets : list
            A list of N SampleSets each containing a list of M score Samples if score_all_vs_all
            is True. Otherwise, a list of N SampleSets each containing a list of <=M score Samples
            depending on the database.
        """
        logger.debug(
            f"{_frmt(self)}.score_sample_templates(... score_all_vs_all={score_all_vs_all})"
        )
        # Returns a list of SampleSets where a Sampleset for each probe
        # SampleSet where each Sample inside the SampleSets contains the score
        # for one enroll SampleSet
        score_samplesets = []
        if score_all_vs_all:
            probe_data = [s.data for s in probe_samples]
            valid_probe_indices = [
                i for i, d in enumerate(probe_data) if _data_valid(d)
            ]
            valid_probe_data = [probe_data[i] for i in valid_probe_indices]
            scores = self.compare(SampleBatch(enroll_samples),
                                  valid_probe_data)
            scores = np.asarray(scores, dtype=float)

            if len(valid_probe_indices) != len(probe_data):
                # inject None scores for invalid probe samples
                scores: list = scores.T.tolist()
                for i in range(len(probe_data)):
                    if i not in valid_probe_indices:
                        scores.insert(i, [None] * len(enroll_samples))
                # transpose back to original shape
                scores = np.array(scores, dtype=float).T

            expected_shape = (len(enroll_samples), len(probe_samples))
            assert scores.shape == expected_shape, (
                "The shape of the similarity scores (%s) does not match the expected shape (%s)"
                % (scores.shape, expected_shape))
            for j, probe in enumerate(probe_samples):
                samples = []
                for i, enroll in enumerate(enroll_samples):
                    samples.append(Sample(scores[i, j], parent=enroll))
                score_samplesets.append(SampleSet(samples, parent=probe))
        else:
            for probe in probe_samples:
                references = [str(ref) for ref in probe.references]
                # get the indices of references for enroll samplesets
                indices = [
                    i for i, enroll in enumerate(enroll_samples)
                    if str(enroll.reference_id) in references
                ]
                if not indices:
                    raise ValueError(
                        f"No enroll sampleset found for probe {probe} and its required references {references}. "
                        "Did you mean to set score_all_vs_all=True?")
                if not _data_valid(probe.data):
                    scores = [[None]] * len(indices)
                else:
                    scores = self.compare(
                        SampleBatch([enroll_samples[i] for i in indices]),
                        SampleBatch([probe]),
                    )
                scores = np.asarray(scores, dtype=float)
                expected_shape = (len(indices), 1)
                assert scores.shape == expected_shape, (
                    "The shape of the similarity scores (%s) does not match the expected shape (%s)"
                    % (scores.shape, expected_shape))
                samples = []
                for i, j in enumerate(indices):
                    samples.append(
                        Sample(scores[i, 0], parent=enroll_samples[j]))
                score_samplesets.append(SampleSet(samples, parent=probe))

        return score_samplesets