Ejemplo n.º 1
0
    def _equalize(self, equality_sets):
        # pylint: disable=R0914

        self._is_direction_identified()

        if self._verbose:
            words_data = []

        for equality_set_index, equality_set_words in enumerate(equality_sets):
            equality_set_vectors = [
                normalize(self[word]) for word in equality_set_words
            ]
            center = np.mean(equality_set_vectors, axis=0)
            (projected_center,
             rejected_center) = project_reject_vector(center, self.direction)
            scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2)

            for word, vector in zip(equality_set_words, equality_set_vectors):
                projected_vector = project_vector(vector, self.direction)

                projected_part = normalize(projected_vector - projected_center)

                # In the code it is different of Bolukbasi
                # It behaves the same only for equality_sets
                # with size of 2 (pairs) - not sure!
                # However, my code is the same as the article
                # equalized_vector = rejected_center + scaling * self.direction
                # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37
                # For pairs, projected_part_vector1 == -projected_part_vector2,
                # and this is the same as
                # projected_part_vector1 == self.direction
                equalized_vector = rejected_center + scaling * projected_part

                update_word_vector(self.model, word, equalized_vector)

                if self._verbose:
                    words_data.append({
                        'equality_set_index':
                        equality_set_index,
                        'word':
                        word,
                        'scaling':
                        scaling,
                        'projected_scalar':
                        vector @ self.direction,
                        'equalized_projected_scalar':
                        (equalized_vector @ self.direction),
                    })

        if self._verbose:
            print('Equalize Words Data '
                  '(all equal for 1-dim bias space (direction):')
            words_data_df = (pd.DataFrame(words_data).set_index(
                ['equality_set_index', 'word']))
            print(tabulate(words_data_df, headers='keys'))

        self.model.init_sims(replace=True)
Ejemplo n.º 2
0
def test_project_params():
    v = np.array([1, 2, 3])
    u = np.array([-4, 5, -6])

    (_, projected_vector_v1, rejected_vector_v1) = project_params(v, u)

    projected_vector_v2, rejected_vector_v2 = project_reject_vector(u, v)

    np.testing.assert_allclose(projected_vector_v1, projected_vector_v2)
    np.testing.assert_allclose(rejected_vector_v1, rejected_vector_v2)
Ejemplo n.º 3
0
def test_project_params():
    # pylint: disable=arguments-out-of-order
    v = np.array([1, 2, 3])
    u = np.array([-4, 5, -6])

    (_, projected_vector_v1, rejected_vector_v1) = project_params(v, u)

    projected_vector_v2, rejected_vector_v2 = project_reject_vector(u, v)

    np.testing.assert_allclose(projected_vector_v1, projected_vector_v2)
    np.testing.assert_allclose(rejected_vector_v1, rejected_vector_v2)
Ejemplo n.º 4
0
def test_equalize(gender_biased_w2v_small, is_preforming=True):
    """Test _equalize method in GenderBiasWE."""
    # pylint: disable=line-too-long
    equality_sets = {
        tuple(w)
        for w in gender_biased_w2v_small._data['equalize_pairs']
    }
    equality_sets |= {
        tuple(w)
        for w in gender_biased_w2v_small._data['definitional_pairs']
    }
    equality_sets = gender_biased_w2v_small._generate_pair_candidates(
        equality_sets)

    if is_preforming:
        gender_biased_w2v_small._equalize(equality_sets)

    for equality_set in equality_sets:
        projection_vectors = []
        rejection_vectors = []

        for equality_word in equality_set:
            vector = gender_biased_w2v_small[equality_word]

            np.testing.assert_allclose(np.linalg.norm(vector), 1, atol=ATOL)

            (projection_vector, rejection_vector) = project_reject_vector(
                vector, gender_biased_w2v_small.direction)
            projection_vectors.append(projection_vector)
            rejection_vectors.append(rejection_vector)

        # <e1, d> == -<e2, d>
        # assuming equality sets of size 2
        assert len(projection_vectors) == 2
        np.testing.assert_allclose(
            projection_vectors[0] @ gender_biased_w2v_small.direction,
            -projection_vectors[1] @ gender_biased_w2v_small.direction,
            atol=ATOL)

        # all rejection part is equal for all the vectors
        for rejection_vector in rejection_vectors[1:]:
            np.testing.assert_allclose(rejection_vectors[0],
                                       rejection_vector,
                                       atol=ATOL)

    check_all_vectors_unit_length(gender_biased_w2v_small)