def main(): # generate some data nb_obs = 100000 nb_seats = 20000 R, S, A = simulate_exogenous_vars(nb_obs, R_pct=0.75, S_pct=0.6) # simulate exogeous variables G, L, F = simulate_endogenous_vars(A, R, S) # simulate endogenous variables # set up naive policy simplePolicy = SimplePolicy() # set up naive policy naivePolicy = NaivePolicy() naivePolicy.train(R, S, G, L, F) # set up and train unaware policy unawarePolicy = UnawarePolicy() unawarePolicy.train(G, L, F) # set up and train fair policy fairPolicy = FairPolicy() fairPolicy.train(R, S, G, L) R, S, A = simulate_exogenous_vars(nb_obs, R_pct=0.75, S_pct=0.6) # simulate exogeous variables G, L, F = simulate_endogenous_vars(A, R, S) # simulate endogenous variables # form policy dictionary P = {'simple': simplePolicy.evaluate(G, L, nb_seats), 'naive': naivePolicy.evaluate(R, S, G, L, nb_seats), 'unaware': unawarePolicy.evaluate(G, L, nb_seats), 'fair': fairPolicy.evaluate(R, S, G, L, nb_seats)} hist_ability_by_policy(P['simple'], A, R, S, G, L, F) hist_ability_by_policy(P['naive'], A, R, S, G, L, F) hist_ability_by_policy(P['unaware'], A, R, S, G, L, F) hist_ability_by_policy(P['fair'], A, R, S, G, L, F) kde_ability_by_protected(P['simple'], A, R, S, G, L, F) kde_ability_by_protected(P['naive'], A, R, S, G, L, F) kde_ability_by_protected(P['unaware'], A, R, S, G, L, F) kde_ability_by_protected(P['fair'], A, R, S, G, L, F) unfair_ranking = np.argsort(-F.squeeze()) print('Utility comparison on ranking from unfair policy:') for policy_name in ['simple', 'naive', 'unaware', 'fair']: print(_.upper_first(policy_name), 'policy:', calc_utility(P[policy_name], unfair_ranking, nb_seats)) fair_ranking = np.argsort(-A.squeeze()) print('Utility comparison on ranking from fair policy (based only on true ability):') for policy_name in ['simple', 'naive', 'unaware', 'fair']: print(_.upper_first(policy_name), 'policy:', calc_utility(P[policy_name], fair_ranking, nb_seats))
def _apply_match_heuristic(page, link_contexts, to_match, entity): '''helper for defining heuristics for finding mentions of an entity''' matches = u.match_all(to_match, page['plaintext']) mentions = sum(link_contexts.values(), []) link_context = { entity: [{ 'text': to_match, 'offset': match_index, 'page_title': page['title'], 'preredirect': _.upper_first(entity) } for match_index in matches] } filtered_link_context = { entity: [ mention for mention in link_context[entity] if not _mention_overlaps(mentions, mention) ] } concat = lambda dest, src: _.uniq_by(dest + src, 'offset') if dest else src if not _.is_empty(filtered_link_context[entity]): return _.merge_with(link_contexts, filtered_link_context, iteratee=concat) else: return link_contexts
def _sentence_to_link_contexts(redirects_lookup, page, sentence): page_title = page['title'] contexts = {} if 'links' in sentence: for link in sentence['links']: if is_valid_link(link): link_text = link.get('text') or link['page'] try: mention_offset = get_mention_offset( page['plaintext'], sentence['text'], link_text) entity = _get_entity(redirects_lookup, link) context = { 'text': link_text, 'sentence': sentence['text'], 'offset': mention_offset, 'page_title': page_title, 'preredirect': _.upper_first(link['page']) } if entity in contexts: contexts[entity].append(context) else: contexts[entity] = [context] except ValueError: continue return contexts
def _build_redirects_lookup(redirects_rows): lookup = {} for row in redirects_rows: from_page = row['redirect_from'].replace('_', ' ') to_page = row['redirect_to'].replace('_', ' ') lookup[_.upper_first(from_page)] = to_page lookup[_.lower_first(from_page)] = to_page return lookup
def _get_entity(redirects_lookup, link): link_destination = link['page'] followed_redirect = redirects_lookup.get(link_destination) return _.upper_first(followed_redirect or link_destination)
def test_upper_first(case, expected): assert _.upper_first(case) == expected
def get_classname(self, component_name): return _.upper_first(_.camel_case(component_name or self.name))