def soft(E, embed, gender_words, defs): """ Soft debiasing of word embedding E. :param WordEmbedding E: Biased word embedding E. :param string embed: Name of the embedding. :param list gender_words: List of gender specific words. :param list defs: List of tuples with definitional pairs. :returns: Soft debiased WordEmbedding object. """ print("\nSoft debiasing...") # If do_soft is True, do soft debiasing from scratch if FLAGS.do_soft: params = SOFT_PARAMS[embed.split("_")[0]] E_soft = deepcopy(E) soft_debias( E_soft, gender_words, defs, epochs=params["epochs"], lr=params["lr"], gamma=params["gamma"], decrease_times=params["decrease_times"], ) # If do_soft is False, load precomputed soft debiased embedding else: E_soft = WordEmbedding(embed + "_soft_debiased") return E_soft
def debiasEmbedding(filename, bootstrapped=False): outfile_path = './embeddings/' + 'debiased_' + filename if(bootstrapped or not os.path.exists(outfile_path)): E = WordEmbedding('./embeddings/' + filename) with open('./data/definitional_pairs.json', "r") as f: defs = json.load(f) #print("definitional", defs) with open('./data/equalize_pairs.json', "r") as f: equalize_pairs = json.load(f) with open('./data/gender_specific_seed.json', "r") as f: gender_specific_words = json.load(f) #print("gender specific", len(gender_specific_words), gender_specific_words[:10]) debias(E, gender_specific_words, defs, equalize_pairs) E.save(outfile_path)
def main(): # Print basic experiment information print_details() # For each embedding, do the experiments for embed in FLAGS.embeddings: print("\n" + "#" * 56) print("# " + f"Doing the {embed} embedding".center(53) + "#") print("#" * 56) # Load the embedding E = WordEmbedding(embed) # Load professions and gender related lists from # Bolukbasi et al. for word2vec gender_words, defs, equalize_pairs, profession_words = load_data(E.words) # Define gender direction with PCA v_gender = we.doPCA(defs, E).components_[0] # Bias without debiasing if not FLAGS.no_show: show_bias(E, v_gender, profession_words, info="with bias") # Hard debiasing E_hard = hard(E, gender_words, defs, equalize_pairs) if not FLAGS.no_show: show_bias(E_hard, v_gender, profession_words, info="hard debiased") E_soft = None # Only do soft debiasing for small embeddings if embed.split("_")[-1] != "large": # Soft debiasing E_soft = soft(E, embed, gender_words, defs) if not FLAGS.no_show: show_bias(E_soft, v_gender, profession_words, info="soft debiased") # Run the benchmarks if nescessary if not FLAGS.no_bench: run_benchmark(E, E_hard, E_soft, embed)
from matplotlib import pyplot as plt import json import random import numpy as np logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S') import debiaswe as dwe import debiaswe.we as we from debiaswe.we import WordEmbedding from debiaswe.data import load_professions from debiaswe.debias import debias E = WordEmbedding('./embeddings/w2v_gnews_small.txt') with open('./data/definitional_pairs.json', "r") as f: defs = json.load(f) #print("definitional", defs) with open('./data/equalize_pairs.json', "r") as f: equalize_pairs = json.load(f) with open('./data/gender_specific_seed.json', "r") as f: gender_specific_words = json.load(f) #print("gender specific", len(gender_specific_words), gender_specific_words[:10]) debias(E, gender_specific_words, defs, equalize_pairs) v = Vocabulary(E.words)
from __future__ import print_function, division from matplotlib import pyplot as plt import json import random import numpy as np import debiaswe as dwe import debiaswe.we as we from debiaswe.we import WordEmbedding from debiaswe.data import load_professions from debiaswe.debias import debias # Step 0: load google news wordvec E = WordEmbedding('./embeddings/w2v_gnews_small.txt') # Step 1: load professions professions = load_professions() profession_words = [p[0] for p in professions] # Step 2: define racial direction names = [ "Emily", "Aisha", "Anne", "Keisha", "Jill", "Tamika", "Allison", "Lakisha", "Laurie", "Tanisha", "Sarah", "Latoya", "Meredith", "Kenya", "Carrie", "Latonya", "Kristen", "Ebony", "Todd", "Rasheed", "Neil", "Tremayne", "Geoffrey", "Kareem", "Brett", "Darnell", "Brendan", "Tyrone", "Greg", "Hakim", "Matthew", "Jamal", "Jay", "Leroy", "Brad", "Jermaine" ] names_group1 = [names[2 * i] for i in range(len(names) // 2)] names_group2 = [names[2 * i + 1] for i in range(len(names) // 2)] vs = [sum(E.v(w) for w in names) for names in (names_group2, names_group1)]
from matplotlib import pyplot as plt import sys import json import random import numpy as np import debiaswe as dwe import debiaswe.we as we from debiaswe.we import WordEmbedding from debiaswe.data import load_professionals ''' Part1: Find Bias ''' # Step 1: Load Data (Word embedding & professionals) E = WordEmbedding('./data/file_small.txt') professionals = load_professionals() # Step 2: Define regional (Northern China people - Southern China people) direction directly v_region = E.diff('北京人', '上海人') v_gender = E.diff('男', '女') # Step 3: Generating analogies of "Northern people: x :: Southern people: y # a_region = E.best_analogies_dist_thresh(v_region) # before_map = {} output = open("output.txt", "a") # for (a,b,c) in a_region: # before_map[a] = b # print(a+"-"+b+"-"+str(c), file=output)
from __future__ import print_function, division from matplotlib import pyplot as plt import json import random import numpy as np import debiaswe as dwe import debiaswe.we as we from debiaswe.we import WordEmbedding from debiaswe.data import load_professions from debiaswe.data import load_sports from debiaswe.debias import debias # Step 0: load google news wordvec E = WordEmbedding('./embeddings/w2v_gnews_small.txt') # Step 1: load professions professions = load_professions() profession_words = [p[0] for p in professions] # Step 1: load sports sports = load_sports() sport_words = [s[0] for s in sports] print("\n") # Step 2: define gender direction v_gender = E.diff('she', 'he') print("\n") # Step 3: generate analogies based on gender = 'man:x :: woman:y'