def project_profession_words(E, professions): """ Get gender axis and project profession words onto this axis. :param object E: WordEmbedding object. :param list professions: List of professions :param list unique_words: List of words present in all embeddings to compare. :returns: projection, profession words, gender axis """ # Extract definitional word embeddings and determine gender direction. defs = load_definitional_pairs(E.words) v_gender = doPCA(defs, E).components_[0] # Projection on the gender direction. sp = E.profession_stereotypes(professions, v_gender, print_firstn=0) occupations = [s[1] for s in sp] return sp, occupations, v_gender
def main(): # Print basic experiment information print_details() # For each embedding, do the experiments for embed in FLAGS.embeddings: print("\n" + "#" * 56) print("# " + f"Doing the {embed} embedding".center(53) + "#") print("#" * 56) # Load the embedding E = WordEmbedding(embed) # Load professions and gender related lists from # Bolukbasi et al. for word2vec gender_words, defs, equalize_pairs, profession_words = load_data(E.words) # Define gender direction with PCA v_gender = we.doPCA(defs, E).components_[0] # Bias without debiasing if not FLAGS.no_show: show_bias(E, v_gender, profession_words, info="with bias") # Hard debiasing E_hard = hard(E, gender_words, defs, equalize_pairs) if not FLAGS.no_show: show_bias(E_hard, v_gender, profession_words, info="hard debiased") E_soft = None # Only do soft debiasing for small embeddings if embed.split("_")[-1] != "large": # Soft debiasing E_soft = soft(E, embed, gender_words, defs) if not FLAGS.no_show: show_bias(E_soft, v_gender, profession_words, info="soft debiased") # Run the benchmarks if nescessary if not FLAGS.no_bench: run_benchmark(E, E_hard, E_soft, embed)
def debias(E, gender_specific_words, definitional, equalize): gender_direction = we.doPCA(definitional, E).components_[0] specific_set = set(gender_specific_words) for i, w in enumerate(E.words): if w not in specific_set: E.vecs[i] = we.drop(E.vecs[i], gender_direction) E.normalize() candidates = { x for e1, e2 in equalize for x in [(e1.lower(), e2.lower()), (e1.title(), e2.title()), (e1.upper(), e2.upper())] } print(candidates) for (a, b) in candidates: if (a in E.index and b in E.index): y = we.drop((E.v(a) + E.v(b)) / 2, gender_direction) z = np.sqrt(1 - np.linalg.norm(y)**2) if (E.v(a) - E.v(b)).dot(gender_direction) < 0: z = -z E.vecs[E.index[a]] = z * gender_direction + y E.vecs[E.index[b]] = -z * gender_direction + y E.normalize()