def calc_indirect_bias(self, word1, word2): """Calculate the indirect bias between two words. Based on the amount of shared projection of the words on the direction. Also called PairBias. :param str word1: First word :param str word2: Second word :type c: float or None :return The indirect bias between the two words """ self._is_direction_identified() vector1 = normalize(self[word1]) vector2 = normalize(self[word2]) perpendicular_vector1 = reject_vector(vector1, self.direction) perpendicular_vector2 = reject_vector(vector2, self.direction) inner_product = vector1 @ vector2 perpendicular_similarity = cosine_similarity(perpendicular_vector1, perpendicular_vector2) indirect_bias = ((inner_product - perpendicular_similarity) / inner_product) return indirect_bias
def _equalize(self, equality_sets): # pylint: disable=R0914 self._is_direction_identified() if self._verbose: words_data = [] for equality_set_index, equality_set_words in enumerate(equality_sets): equality_set_vectors = [normalize(self[word]) for word in equality_set_words] center = np.mean(equality_set_vectors, axis=0) (projected_center, rejected_center) = project_reject_vector(center, self.direction) scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2) for word, vector in zip(equality_set_words, equality_set_vectors): projected_vector = project_vector(vector, self.direction) projected_part = normalize(projected_vector - projected_center) # In the code it is different of Bolukbasi # It behaves the same only for equality_sets # with size of 2 (pairs) - not sure! # However, my code is the same as the article # equalized_vector = rejected_center + scaling * self.direction # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37 # For pairs, projected_part_vector1 == -projected_part_vector2, # and this is the same as # projected_part_vector1 == self.direction equalized_vector = rejected_center + scaling * projected_part update_word_vector(self.model, word, equalized_vector) if self._verbose: words_data.append({ 'equality_set_index': equality_set_index, 'word': word, 'scaling': scaling, 'projected_scalar': vector @ self.direction, 'equalized_projected_scalar': (equalized_vector @ self.direction), }) if self._verbose: print('Equalize Words Data ' '(all equal for 1-dim bias space (direction):') words_data_df = (pd.DataFrame(words_data) .set_index(['equality_set_index', 'word'])) print(tabulate(words_data_df, headers='keys')) self.model.init_sims(replace=True)
def calc_projection_data(self, words): """ Calculate projection, projected and rejected vectors of a words list. :param list words: List of words :return: :class:`pandas.DataFrame` of the projection, projected and rejected vectors of the words list """ projection_data = [] for word in words: vector = self[word] projection = self.project_on_direction(word) normalized_vector = normalize(vector) (projection, projected_vector, rejected_vector) = project_params(normalized_vector, self.direction) projection_data.append({'word': word, 'vector': vector, 'projection': projection, 'projected_vector': projected_vector, 'rejected_vector': rejected_vector}) return pd.DataFrame(projection_data)
def generate_closest_words_indirect_bias(self, neutral_positive_end, neutral_negative_end, words=None, n_extreme=5): """ Generate closest words to a neutral direction and their indirect bias. The direction of the neutral words is used to find the most extreme words. The indirect bias is calculated between the most extreme words and the closest end. :param str neutral_positive_end: A word that define the positive side of the neutral direction. :param str neutral_negative_end: A word that define the negative side of the neutral direction. :param list words: List of words to project on the neutral direction. :param int n_extreme: The number for the most extreme words (positive and negative) to show. :return: Data Frame of the most extreme words with their projection scores and indirect biases. """ neutral_direction = normalize(self[neutral_positive_end] - self[neutral_negative_end]) vectors = [normalize(self[word]) for word in words] df = (pd.DataFrame([{'word': word, 'projection': vector @ neutral_direction} for word, vector in zip(words, vectors)]) .sort_values('projection', ascending=False)) df = take_two_sides_extreme_sorted(df, n_extreme, 'end', neutral_positive_end, neutral_negative_end) df['indirect_bias'] = df.apply(lambda r: self.calc_indirect_bias(r['word'], r['end']), axis=1) df = df.set_index(['end', 'word']) df = df[['projection', 'indirect_bias']] return df
def learn_full_specific_words(self, seed_specific_words, max_non_specific_examples=None, debug=None): """Learn specific words given a list of seed specific wordsself. Using Linear SVM. :param list seed_specific_words: List of seed specific words :param int max_non_specific_examples: The number of non-specific words to sample for training :return: List of learned specific words and the classifier object """ if debug is None: debug = False if max_non_specific_examples is None: max_non_specific_examples = MAX_NON_SPECIFIC_EXAMPLES data = [] non_specific_example_count = 0 for word in self.model.vocab: is_specific = word in seed_specific_words if not is_specific: non_specific_example_count += 1 if non_specific_example_count <= max_non_specific_examples: data.append((self[word], is_specific)) else: data.append((self[word], is_specific)) np.random.seed(RANDOM_STATE) np.random.shuffle(data) X, y = zip(*data) X = np.array(X) X /= np.linalg.norm(X, axis=1)[:, None] y = np.array(y).astype('int') clf = LinearSVC(C=1, class_weight='balanced', random_state=RANDOM_STATE) clf.fit(X, y) full_specific_words = [] for word in self.model.vocab: vector = [normalize(self[word])] if clf.predict(vector): full_specific_words.append(word) if not debug: return full_specific_words, clf return full_specific_words, clf, X, y
def _identify_subspace_by_pca(self, definitional_pairs, n_components): matrix = [] for word1, word2 in definitional_pairs: vector1 = normalize(self[word1]) vector2 = normalize(self[word2]) center = (vector1 + vector2) / 2 matrix.append(vector1 - center) matrix.append(vector2 - center) pca = PCA(n_components=n_components) pca.fit(matrix) if self._verbose: table = enumerate(pca.explained_variance_ratio_, start=1) headers = ['Principal Component', 'Explained Variance Ratio'] print(tabulate(table, headers=headers)) return pca
def _identify_direction(self, positive_end, negative_end, definitional, method='pca'): if method not in DIRECTION_METHODS: raise ValueError('method should be one of {}, {} was given'.format( DIRECTION_METHODS, method)) if positive_end == negative_end: raise ValueError('positive_end and negative_end' 'should be different, and not the same "{}"' .format(positive_end)) if self._verbose: print('Identify direction using {} method...'.format(method)) direction = None if method == 'single': direction = normalize(normalize(self[definitional[0]]) - normalize(self[definitional[1]])) elif method == 'sum': group1_sum_vector = np.sum([self[word] for word in definitional[0]], axis=0) group2_sum_vector = np.sum([self[word] for word in definitional[1]], axis=0) diff_vector = (normalize(group1_sum_vector) - normalize(group2_sum_vector)) direction = normalize(diff_vector) elif method == 'pca': pca = self._identify_subspace_by_pca(definitional, 10) if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD: raise RuntimeError('The Explained variance' 'of the first principal component should be' 'at least {}, but it is {}' .format(FIRST_PC_THRESHOLD, pca.explained_variance_ratio_[0])) direction = pca.components_[0] # if direction is opposite (e.g. we cannot control # what the PCA will return) ends_diff_projection = cosine_similarity((self[positive_end] - self[negative_end]), direction) if ends_diff_projection < 0: direction = -direction # pylint: disable=invalid-unary-operand-type self.direction = direction self.positive_end = positive_end self.negative_end = negative_end
def plot_most_biased_clustering(biased, debiased, seed='ends', n_extreme=500, random_state=1): """Plot clustering as classification of biased neutral words. :param biased: Biased word embedding of :class:`~ethically.we.bias.BiasWordEmbedding`. :param debiased: Debiased word embedding of :class:`~ethically.we.bias.BiasWordEmbedding`. :param seed: The definition of the seed vector. Either by a tuple of two word ends, or by `'ends` for the pre-defined ends or by `'direction'` for the pre-defined direction vector. :param n_extrem: The number of extreme biased neutral words to use. :return: Tuple of list of ax objects of the plot, and a dictionary with the most positive and negative words. Based on: - Gonen, H., & Goldberg, Y. (2019). `Lipstick on a Pig: Debiasing Methods Cover up Systematic Gender Biases in Word Embeddings But do not Remove Them <https://arxiv.org/abs/1903.03862>`_. arXiv preprint arXiv:1903.03862. - https://github.com/gonenhila/gender_bias_lipstick """ # pylint: disable=protected-access,too-many-locals,line-too-long assert biased.positive_end == debiased.positive_end, \ 'Postive ends should be the same.' assert biased.negative_end == debiased.negative_end, \ 'Negative ends should be the same.' seed_vector, _, _ = get_seed_vector(seed, biased) neutral_words = biased._data['neutral_words'] neutral_word_vectors = (biased[word] for word in neutral_words) neutral_word_projections = [(normalize(vector) @ seed_vector, word) for word, vector in zip(neutral_words, neutral_word_vectors)] neutral_word_projections.sort() _, most_negative_words = zip(*neutral_word_projections[:n_extreme]) _, most_positive_words = zip(*neutral_word_projections[-n_extreme:]) most_biased_neutral_words = most_negative_words + most_positive_words y_bias = [False] * n_extreme + [True] * n_extreme _, axes = plt.subplots(1, 2, figsize=(20, 5)) acc_biased = biased._plot_most_biased_one_cluster(most_biased_neutral_words, y_bias, random_state=random_state, ax=axes[0]) axes[0].set_title('Biased - Accuracy={}'.format(acc_biased)) acc_debiased = debiased._plot_most_biased_one_cluster(most_biased_neutral_words, y_bias, random_state=random_state, ax=axes[1]) axes[1].set_title('Debiased - Accuracy={}'.format(acc_debiased)) return axes, {biased.positive_end: most_positive_words, biased.negative_end: most_negative_words}