def project_profession_words(E, professions):
    """
    Get gender axis and project profession words onto this axis.

    :param object E: WordEmbedding object.
    :param list professions: List of professions
    :param list unique_words: List of words present in all
        embeddings to compare.
    :returns: projection, profession words, gender axis
    """
    # Extract definitional word embeddings and determine gender direction.
    defs = load_definitional_pairs(E.words)

    v_gender = doPCA(defs, E).components_[0]

    # Projection on the gender direction.
    sp = E.profession_stereotypes(professions, v_gender, print_firstn=0)

    occupations = [s[1] for s in sp]
    return sp, occupations, v_gender
def main():
    # Print basic experiment information
    print_details()

    # For each embedding, do the experiments
    for embed in FLAGS.embeddings:
        print("\n" + "#" * 56)
        print("# " + f"Doing the {embed} embedding".center(53) + "#")
        print("#" * 56)

        # Load the embedding
        E = WordEmbedding(embed)
        # Load professions and gender related lists from
        # Bolukbasi et al. for word2vec
        gender_words, defs, equalize_pairs, profession_words = load_data(E.words)
        # Define gender direction with PCA
        v_gender = we.doPCA(defs, E).components_[0]

        # Bias without debiasing
        if not FLAGS.no_show:
            show_bias(E, v_gender, profession_words, info="with bias")

        # Hard debiasing
        E_hard = hard(E, gender_words, defs, equalize_pairs)
        if not FLAGS.no_show:
            show_bias(E_hard, v_gender, profession_words, info="hard debiased")

        E_soft = None
        # Only do soft debiasing for small embeddings
        if embed.split("_")[-1] != "large":
            # Soft debiasing
            E_soft = soft(E, embed, gender_words, defs)
            if not FLAGS.no_show:
                show_bias(E_soft, v_gender, profession_words, info="soft debiased")

        # Run the benchmarks if nescessary
        if not FLAGS.no_bench:
            run_benchmark(E, E_hard, E_soft, embed)
Exemple #3
0
def debias(E, gender_specific_words, definitional, equalize):
    gender_direction = we.doPCA(definitional, E).components_[0]
    specific_set = set(gender_specific_words)
    for i, w in enumerate(E.words):
        if w not in specific_set:
            E.vecs[i] = we.drop(E.vecs[i], gender_direction)
    E.normalize()
    candidates = {
        x
        for e1, e2 in equalize
        for x in [(e1.lower(),
                   e2.lower()), (e1.title(),
                                 e2.title()), (e1.upper(), e2.upper())]
    }
    print(candidates)
    for (a, b) in candidates:
        if (a in E.index and b in E.index):
            y = we.drop((E.v(a) + E.v(b)) / 2, gender_direction)
            z = np.sqrt(1 - np.linalg.norm(y)**2)
            if (E.v(a) - E.v(b)).dot(gender_direction) < 0:
                z = -z
            E.vecs[E.index[a]] = z * gender_direction + y
            E.vecs[E.index[b]] = -z * gender_direction + y
    E.normalize()