def entrypoint(incoming, old, new, retirement): """ Script to assist with merging legislator files. Can be used in two modes: incoming or file merge. Incoming mode analyzes incoming/ directory files (generated with to_yaml.py) and discovers identical & similar files to assist with merging. File merge mode merges two legislator files. """ if incoming: abbr = incoming existing_people = [] for filename in glob.glob( os.path.join(get_data_dir(abbr), "legislature/*.yml") ) + glob.glob(os.path.join(get_data_dir(abbr), "retired/*.yml")): with open(filename) as f: existing_people.append(load_yaml(f)) new_people = [] incoming_dir = get_data_dir(abbr).replace("data", "incoming") for filename in glob.glob(os.path.join(incoming_dir, "legislature/*.yml")): with open(filename) as f: new_people.append(load_yaml(f)) click.secho( f"analyzing {len(existing_people)} existing people and {len(new_people)} incoming" ) unmatched = incoming_merge(abbr, existing_people, new_people, retirement) click.secho(f"{len(unmatched)} people were unmatched") if old and new: with open(old) as f: old_obj = load_yaml(f) with open(new) as f: new_obj = load_yaml(f) keep_both_ids = True if "incoming" in new: keep_both_ids = False merged = merge_people(old_obj, new_obj, keep_both_ids=keep_both_ids) dump_obj(merged, filename=old) os.remove(new) click.secho(f"merged files into {old}\ndeleted {new}\ncheck git diff before committing")
def test_out_threshold(self, activations): stat_for_threshold = {} pred_for_threshold = {} for threshold in [0.5, 0.52, 0.55, 0.6]: preds, stat = self.kernel.predict_rle_from_acts_with_threshold( activations, threshold) stat_for_threshold[threshold] = stat pred_for_threshold[threshold] = preds # my_trace() # print(self.metric_cal(preds)) utils.dump_obj(stat_for_threshold, "stat_for_threshold.pkl", force=True) utils.dump_obj(pred_for_threshold, "pred_for_threshold.pkl", force=True)
def merge_scraped_coms(abbr, old, new): old_by_key = {(c["parent"], c["name"]): c for c in old} for c in new: old_com = old_by_key.pop((c["parent"], c["name"]), None) if old_com: old_com["sources"] = c["sources"] old_com["memberships"] = c["memberships"] fname = os.path.join(get_data_dir(abbr), "organizations", get_filename(old_com)) dump_obj(old_com, filename=fname) click.secho(f"updated {fname}") os.remove(f"incoming/{abbr}/organizations/{get_filename(c)}") else: copy_new_incoming(abbr, c, "organizations") # remove unmatched old committees for com in old_by_key.values(): fn = get_filename(com) click.secho(f"removing {fn}", fg="yellow") os.remove(os.path.join(get_data_dir(abbr), "organizations", fn))
def update_from_csv(filename, fields, other_identifiers): with open(filename) as f: for line in csv.DictReader(f): yaml_filename = find_by_id(line['id']) with open(yaml_filename) as yf: person = load_yaml(yf) for field in fields: person[field] = line[field] if other_identifiers and 'other_identifiers' not in person: person['other_identifiers'] = [] for scheme in other_identifiers: # TODO: check for duplicates among what was already there for id in line[scheme].split(';'): if id: person['other_identifiers'].append({ 'scheme': scheme, 'identifier': id }) dump_obj(person, filename=yaml_filename)
def update_from_csv(filename, fields, other_identifiers): with open(filename) as f: for line in csv.DictReader(f): yaml_filename = find_file(line["id"]) with open(yaml_filename) as yf: person = load_yaml(yf) for field in fields: person[field] = line[field] if other_identifiers and "other_identifiers" not in person: person["other_identifiers"] = [] for scheme in other_identifiers: # TODO: check for duplicates among what was already there for id in line[scheme].split(";"): if id: person["other_identifiers"].append({ "scheme": scheme, "identifier": id }) dump_obj(person, filename=yaml_filename)
def create_person(fname, lname, name, state, district, party, rtype, url, image, email, start_date): role = { "type": rtype, "district": district, "jurisdiction": get_jurisdiction_id(state), "start_date": start_date, } if rtype in ("upper", "lower", "legislature"): directory = "legislature" elif rtype in ("mayor", ): directory = "municipalities" role.pop("district") elif rtype in ("governor", "lt_governor"): directory = "executive" role.pop("district") else: raise ValueError(f"unknown role type {rtype}") person = OrderedDict({ "id": ocd_uuid("person"), "name": name or f"{fname} {lname}", "given_name": fname, "family_name": lname, "image": image, "email": email, "party": [{ "name": party }], "roles": [role], "links": [{ "url": url }], "sources": [{ "url": url }], }) output_dir = get_data_dir(state) dump_obj(person, output_dir=os.path.join(output_dir, directory))
def process_dir(input_dir, output_dir, jurisdiction_id): person_memberships = defaultdict(list) # collect memberships for filename in glob.glob(os.path.join(input_dir, "membership_*.json")): with open(filename) as f: membership = json.load(f) if membership["person_id"].startswith("~"): raise ValueError(membership) person_memberships[membership["person_id"]].append(membership) # process people for filename in glob.glob(os.path.join(input_dir, "person_*.json")): with open(filename) as f: person = json.load(f) scrape_id = person["_id"] person["memberships"] = person_memberships[scrape_id] person = process_person(person, jurisdiction_id) dump_obj(person, output_dir=os.path.join(output_dir, "legislature"))
def retire(end_date, filename, reason, death): """ Retire a legislator, given END_DATE and FILENAME. Will set end_date on active roles. """ # end the person's active roles & re-save with open(filename) as f: person = load_yaml(f) if death: reason = "Deceased" person, num = retire_person(person, end_date, reason, death) dump_obj(person, filename=filename) if num == 0: click.secho("no active roles to retire", fg="red") elif num == 1: click.secho("retired person") else: click.secho(f"retired person from {num} roles") move_file(filename)
def create_person(fname, lname, name, state, district, party, rtype, url, image, start_date): person = OrderedDict({ 'id': ocd_uuid('person'), 'name': name or f'{fname} {lname}', 'given_name': fname, 'family_name': lname, 'image': image, 'party': [{'name': party}], 'roles': [ {'type': rtype, 'district': district, 'jurisdiction': get_jurisdiction_id(state), 'start_date': start_date, } ], 'links': [{'url': url}], 'sources': [{'url': url}], }) output_dir = get_data_dir(state) dump_obj(person, output_dir=os.path.join(output_dir, 'people'))
def create_committee(*, name, state, parent, url): members = [] click.echo("Enter members, enter a blank member to stop.") while True: mname = click.prompt("Member name ('done' to stop)") if mname == "done": break members.append({"name": mname}) com = OrderedDict( { "id": ocd_uuid("organization"), "name": name, "classification": "committee", "jurisdiction": get_jurisdiction_id(state), "parent": parent, "sources": [{"url": url}], "links": [{"url": url}], "memberships": members, } ) output_dir = get_data_dir(state) dump_obj(com, output_dir=os.path.join(output_dir, "organizations"))
def directory_merge(abbr, existing_people, new_people, remove_identical, copy_new, interactive): perfect_matched = set() matches = [] id_to_new_filename = {} for new in new_people: best_similarity = 0 best_match = None id_to_new_filename[new["id"]] = get_filename(new) for existing in existing_people: similarity = calculate_similarity(existing, new) if similarity > 0.999: perfect_matched.add(new["id"]) continue if similarity > best_similarity: best_similarity = similarity best_match = existing matches.append((best_similarity, new, best_match)) click.secho(f"{len(perfect_matched)} were perfect matches", fg="green") if remove_identical: for id in perfect_matched: fname = id_to_new_filename[id] fname = f"incoming/{abbr}/people/{fname}".format(fname) click.secho("removing " + fname, fg="red") os.remove(fname) unmatched = set(p["id"] for p in new_people) - perfect_matched for sim, new, old in sorted(matches, reverse=True, key=lambda x: x[0]): if sim < 0.001: break unmatched.remove(new["id"]) oldfname = "data/{}/people/{}".format(abbr, get_filename(old)) newfname = "incoming/{}/people/{}".format(abbr, get_filename(new)) click.secho(" {:.2f} {} {}".format(sim, oldfname, newfname), fg="yellow") if interactive: differences = compare_objects(old, new) for difference in differences: click.echo(" " + str(difference)) ch = "~" while ch not in "onsa": click.secho("Keep (o)ld? Keep (n)ew? (s)kip? (a)bort?", bold=True) ch = click.getchar() if ch == "a": raise SystemExit(-1) elif ch == "o": keep_on_conflict = "old" elif ch == "n": keep_on_conflict = "new" elif ch == "s": continue merged = merge_people( old, new, keep_both_ids=False, keep_on_conflict=keep_on_conflict ) dump_obj(merged, filename=oldfname) os.remove(newfname) click.secho(f"{len(unmatched)} were unmatched") for id in unmatched: fname = id_to_new_filename[id] oldfname = f"incoming/{abbr}/people/{fname}".format(fname) if copy_new: newfname = f"data/{abbr}/people/{fname}".format(fname) click.secho(f"moving {oldfname} to {newfname}", fg="yellow") os.rename(oldfname, newfname)
def train(): algo = sys.argv[1] print('[INFO] Chosen algo is:', algo) print('[INFO] Loading data') train_data, train_labels, train_query_lens = load_data(train_file) print('[INFO] Training set loaded') valid_data, valid_labels, valid_query_lens = load_data(valid_file) print('[INFO] Validation set loaded') test_data, test_labels, test_query_lens = load_data(test_file) print('[INFO] Testing set loaded') eval_set = [(train_data, train_labels), (valid_data, valid_labels), (test_data, test_labels)] eval_group = [train_query_lens, valid_query_lens, test_query_lens] eval_names = ['train', 'valid', 'test'] params = { 'objective': 'rank_xendcg', 'learning_rate': 0.05, 'num_leaves': 64, 'metric': ['ndcg'], 'ndcg_eval_at': 10, 'force_row_wise': True, 'max_bin': 127 } strategies = ('fixed', 'random_iter', 'random_query', 'decay', 'false_positives', 'equal_size', 'delta', 'limit_resample') n_estimators = 1000 early_stopping_rounds = 100 n_iter_sample = 1 verbose = 5 print('[INFO] Starting training') if algo in ('lgbm base', 'lgbm goss', 'lambdarank'): if algo == 'lambdarank': params['objective'] = 'lambdarank' elif algo == 'lgbm goss': params['boosting'] = 'goss' train_set = lgb.Dataset(train_data, label=train_labels, group=train_query_lens) eval_results = {} valid_sets = [train_set] for i, data in enumerate(eval_set[1:]): ds = lgb.Dataset(data[0], data[1], group=eval_group[1:][i], reference=train_set) valid_sets.append(ds) model = lgb.train(params, train_set, num_boost_round=n_estimators, valid_sets=valid_sets, valid_names=eval_names, verbose_eval=verbose, evals_result=eval_results, early_stopping_rounds=early_stopping_rounds) dump_obj(eval_results, results_path, algo) elif algo in strategies: if algo == 'fixed': print('[INFO] Starting fitting, tuning') eval_results = {} for p in (0.5, 0.25, 0.1, 0.05, 0.01): print('[INFO] p value:', p) model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample, p=p, method=algo) model.fit(train_data, train_labels, train_query_lens, eval_set=eval_set, eval_group=eval_group, eval_names=eval_names, verbose=verbose, early_stopping_rounds=early_stopping_rounds) eval_results[p] = model.get_eval_result() dump_obj(eval_results, results_path, algo) elif algo in ('equal_size', 'false_positives'): print('[INFO] Starting fitting, no tuning') model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample, method=algo) model.fit(train_data, train_labels, train_query_lens, eval_set=eval_set, eval_group=eval_group, eval_names=eval_names, verbose=verbose, early_stopping_rounds=early_stopping_rounds) eval_results = model.get_eval_result() dump_obj(eval_results, results_path, algo) elif algo.startswith('random'): print('[INFO] Starting fitting, with tuning') eval_results = {} for max_p in (0.5, 0.25, 0.1, 0.05, 0.02): print('[INFO] max_p value:', max_p) model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample, max_p=max_p, method=algo) model.fit(train_data, train_labels, train_query_lens, eval_set=eval_set, eval_group=eval_group, eval_names=eval_names, verbose=verbose, early_stopping_rounds=early_stopping_rounds) eval_results[max_p] = model.get_eval_result() dump_obj(eval_results, results_path, algo) elif algo == 'delta': print('[INFO] Starting fitting, with tuning') eval_results = {} for delta_pos in (3, 5, 10): delta = 0.25 print('[INFO] delta_pos:', delta_pos) print('[INFO] delta:', delta) model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample, delta_pos=delta_pos, delta=delta, method=algo) model.fit(train_data, train_labels, train_query_lens, eval_set=eval_set, eval_group=eval_group, eval_names=eval_names, verbose=verbose, early_stopping_rounds=early_stopping_rounds) eval_results[(delta_pos, delta)] = model.get_eval_result() dump_obj(eval_results, results_path, algo) for delta in (1, 0.5, 0.1): delta_pos = 5 print('[INFO] delta_pos:', delta_pos) print('[INFO] delta:', delta) model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample, delta_pos=delta_pos, delta=delta, method=algo) model.fit(train_data, train_labels, train_query_lens, eval_set=eval_set, eval_group=eval_group, eval_names=eval_names, verbose=verbose, early_stopping_rounds=early_stopping_rounds) eval_results[(delta_pos, delta)] = model.get_eval_result() dump_obj(eval_results, results_path, algo) elif algo == 'decay': print('[INFO] Starting fitting, with tuning') eval_results = {} for p in (0.5, 0.75): k = 0.985 print('[INFO] p:', p) print('[INFO] k:', k) model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample, p=p, k_factor=k, method=algo) model.fit(train_data, train_labels, train_query_lens, eval_set=eval_set, eval_group=eval_group, eval_names=eval_names, verbose=verbose, early_stopping_rounds=early_stopping_rounds) eval_results[(p, k)] = model.get_eval_result() dump_obj(eval_results, results_path, algo) for k in (0.98, 0.99): p = 0.5 print('[INFO] p:', p) print('[INFO] k:', k) model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample, p=p, k_factor=k, method=algo) model.fit(train_data, train_labels, train_query_lens, eval_set=eval_set, eval_group=eval_group, eval_names=eval_names, verbose=verbose, early_stopping_rounds=early_stopping_rounds) eval_results[(p, k)] = model.get_eval_result() dump_obj(eval_results, results_path, algo) elif algo == 'limit_resample': print('[INFO] Starting fitting, with tuning') eval_results = {} algo = 'limit_resample_rndq' for max_resample in [250, 500, 0.5, 0.75]: print('[INFO] max_resample', max_resample) model = LGBMSelGB(n_estimators=n_estimators, n_iter_sample=n_iter_sample, max_resample=max_resample, method='random_query', max_p=0.05) model.fit(train_data, train_labels, train_query_lens, eval_set=eval_set, eval_group=eval_group, eval_names=eval_names, verbose=verbose, early_stopping_rounds=early_stopping_rounds) eval_results[max_resample] = model.get_eval_result() dump_obj(eval_results, results_path, algo) else: raise ValueError('algo parameter is wrong') print('FITTING OVER!') dump_obj(eval_results, results_path, algo) print('dumped results') model.save_model(os.path.join(models_path, algo + '.txt')) print('saved model')
def save_object(self, obj, output_dir): dump_obj(obj.to_dict(), output_dir=output_dir)
def save_eval_result(self, path, filename=None): if filename is not None: dump_obj(self.get_eval_result(), path, filename) else: dump_obj(path, 'selgb-' + self.method)
def process_old_file(filename, metadata): data = json.load(open(filename)) if data["leg_id"] != data["_id"]: raise Exception() if data.get("active"): print(data) return raise Exception() if data.get("roles", []): raise Exception() # remove unused fields for k in ( "_yearly_contributions", "nimsp_candidate_id", "votesmart_id", "_contributions_start_year", "_scraped_name", "_total_contributions", "transparencydata_id", "_locked_fields", "level", "nimsp_id", "_type", "country", "updated_at", "_id", "active", "roles", "offices", "notice", "nickname", "district", "party", "chamber", "csrfmiddlewaretoken", "email", "created_at", "office_address", "office_phone", "occupation", "_guid", "_code", "all_ids", "2008-2011", ): data.pop(k, None) # remove plus fields for k in [k for k in data.keys() if k.startswith("+")]: data.pop(k) leg_obj = OrderedDict({"id": ocd_uuid("person")}) leg_obj["name"] = data.pop("full_name") first_name = data.pop("first_name") middle_name = data.pop("middle_name") last_name = data.pop("last_name") suffixes = data.pop("suffixes", "") suffix = data.pop("suffix", "") if first_name: leg_obj["given_name"] = first_name if last_name: leg_obj["family_name"] = last_name if middle_name: leg_obj["middle_name"] = middle_name if suffix: leg_obj["suffix"] = suffixes or suffix state = data.pop("state") jurisdiction_id = get_jurisdiction_id(state) # pull useful fields old_roles = data.pop("old_roles", {}) parties = set() new_roles = [] for session, roles in old_roles.items(): for role in roles: if role["type"] in ( "committee member", "Minority Floor Leader", "Majority Floor Leader", "Majority Caucus Chair", "Minority Caucus Chair", "Speaker Pro Tem", "President Pro Tem", "Senate President", "Speaker of the House", "Minority Whip", "Majority Whip", "Lt. Governor", ) or role.get("committee"): continue parties.add(role["party"]) new_roles.append({ "term": role["term"], "chamber": role["chamber"], "district": role["district"] }) leg_obj["party"] = [{"name": party} for party in parties] # add these to leg_obj roles = terms_to_roles(new_roles, metadata["terms"]) formatted_roles = [] for chamber, district, start, end in roles: formatted_roles.append( OrderedDict({ "district": district, "jurisdiction": jurisdiction_id, "type": chamber, "start_date": f"{start}-01-01", "end_date": f"{end}-12-31", })) leg_obj["roles"] = formatted_roles all_ids = data.pop("_all_ids") leg_id = data.pop("leg_id") if leg_id not in all_ids: all_ids.append(leg_id) image = data.pop("photo_url", "") if image: leg_obj["image"] = image url = data.pop("url", "") if url: leg_obj["links"] = [{"url": url}] leg_obj["sources"] = data.pop("sources") leg_obj["other_identifiers"] = [{ "identifier": id_, "scheme": "legacy_openstates" } for id_ in all_ids] if data: print(data) raise Exception() output_dir = get_data_dir(state) dump_obj(leg_obj, output_dir=os.path.join(output_dir, "retired"))
def save(self): dump_obj(self.data, filename=self.filename)
def prepare_train_dev_data(self): data = utils.get_obj_or_dump("data0.bin") if data is None: data = self._get_fold_data(0) utils.dump_obj(data, "data0.bin") self.data0 = data
from utils import load_yaml, dump_obj import sys for fn in sys.argv[1:]: data = load_yaml(open(fn)) data.pop("contact_details") dump_obj(data, filename=fn)
import csv from utils import find_file, load_yaml, dump_obj with open("nyleg.csv") as f: for row in csv.DictReader(f): os_id = row["osid"] fname = find_file(os_id) with open(fname) as lf: obj = load_yaml(lf) for cd in obj["contact_details"]: if cd["note"] == "Capitol Office": cd["voice"] = row["Capitol Phone"].replace("(", "").replace( ") ", "-") if cd["note"] == "District Office": cd["voice"] = row["District Phone"].replace("(", "").replace( ") ", "-") obj["email"] = row["email"] if row["twitter"] and "ids" not in obj: obj["ids"] = {"twitter": row["twitter"].replace("@", "")} dump_obj(obj, filename=fname)
new_reward_indices = defaultdict(list) for data_i in range(input_meta['size']): if is_valid(input_episode_file.actions[data_i]): new_reward_indices[int(input_episode_file.rewards[data_i])].append( len(valid_actions_indices)) valid_actions_indices.append(data_i) if len(valid_actions_indices) > 0: output_file_dir = f'{output_data_files_dir}/{inter_dir}' Path(output_file_dir).mkdir(parents=True, exist_ok=True) output_meta_file = f'{output_file_dir}/{file_name}.meta' dump_obj( { 'max_size': len(valid_actions_indices), 'size': len(valid_actions_indices), 'example': input_meta['example'], 'reward_indices': new_reward_indices }, output_meta_file) output_episode_file = EpisodeFile(f'{output_file_dir}/{file_name}', len(valid_actions_indices), input_meta['example'], 'w+') for data_i in range(len(valid_actions_indices)): output_episode_file.set( input_episode_file.get(valid_actions_indices[data_i]), data_i) output_episode_file.flush() output_episode_file.close() input_episode_file.close()
def render(self): read_game = self.env.read_game frame = self.env.frame #if not read_game.is_in_game or not keys["KEY_INSPECTOR"]: return if keys["KEY_INSPECTOR"]: for i in range(PLAYERMAX): print "Player #%i: %s" % (i, read_game.player[i].name) for idx in range(ENTITIESMAX): e = read_game.mw2_entity.arr[idx] spot = read_game.world_to_screen(e.pos) if spot: cur_angle_dist = self.sq(spot.x - read_game.screen_center_x, spot.y - read_game.screen_center_y) if cur_angle_dist < 50 * 50: # not too far from center s = "[idx=%i(%x), typ=%i, weap=%i]" % (idx, idx, e.type, e.WeaponNum) draw_string_center(frame.font, spot.x, spot.y, 0xFFFFFFFF, s) print s print dump_obj(e) # if e.owner_scr1 >= 0 and e.owner_scr1 < 2047: # ee = read_game.mw2_entity.arr[e.owner_scr1] # print "[idx=%i(%x), typ=%i, weap=%i]" % (e.owner_scr1, e.owner_scr1, ee.type, ee.WeaponNum) # print dump_obj(ee) # if e.owner_scr2 >= 0 and e.owner_scr2 < 2047: # ee = read_game.mw2_entity.arr[e.owner_scr2] # print "[idx=%i(%x), typ=%i, weap=%i]" % (e.owner_scr2, e.owner_scr2, ee.type, ee.WeaponNum) # print dump_obj(ee) #======================================================= # if e.type == ET_EXPLOSIVE: # print "dump explo" # print dump_obj(e) #======================================================= if keys["KEY_INSPECT_POS"]: # print my player's position pos = read_game.mw2_mypos print "pos= (%.2f, %.2f, %.2f)" % (pos.x, pos.y, pos.z) ang = read_game.view_angles print "angles= (%.2f, %.2f, %.2f)" % (ang.x, ang.y, ang.z) if keys["KEY_INSPECT_DUMP"]: # dump some memory structures #mem = dumped() #read_game._RPM(0x6727F13, mem) #read_game._RPM(0x6727F10, mem) #print dump_obj(mem) #read_game._RPM(0x64DA350, mem) for i in range(8): print "player #%i" % i print dump_obj(read_game.mw2_entity.arr[i]) print "client info" print dump_obj(read_game.mw2_clientinfo.arr[i]) #del mem if keys["KEY_INSPECT_DUMP_PLAYERS"]: for i in range(PLAYERMAX): print "Player #%i: %s" % (i, read_game.player[i].name) #=================================================================== # print "refdef" # print dump_obj(read_game.mw2_refdef) # print "viewy" # print dump_obj(read_game.mw2_viewy) #=================================================================== if False and read_game.is_in_game: print "time=%8i, pos2=%.1f %.1f %.1f, pos3=%.1f %.1f %.1f" % (read_game.game_time, read_game.my_player.pos2.x, read_game.my_player.pos2.y, read_game.my_player.pos2.z, read_game.my_player.pos3.x, read_game.my_player.pos3.y, read_game.my_player.pos3.z, ) if False and read_game.is_in_game: print "time=%8i, motion=%.1f %.1f %.1f, abs=%.1f" % (read_game.game_time, read_game.my_player.motion.x, read_game.my_player.motion.y, read_game.my_player.motion.z, read_game.my_player.motion.length() ) if False: for e in read_game.mw2_entity.arr: if e.type == ET_EXPLOSIVE and e.alive & ALIVE_FLAG: print "time=%8i, pos=%.1f %.1f %.1f" % (read_game.game_time, e.pos.x, e.pos.y, e.pos.z, ) if False and read_game.is_in_game: print "kills=%i, deaths=%i" % (read_game.kills, read_game.deaths) if False and read_game.is_in_game: print "weapon=%i" % read_game.my_player.weapon_num if keys["KEY_INSPECT_MOVE_MOUSE"]: self.env.sched.new(self.move_sequence())
def fix_offices(filename): with open(filename) as file: data = load_yaml(file) # office_type -> key -> set of values seen all_details = defaultdict(lambda: defaultdict(set)) email = set() for office in data.get("contact_details", []): for key, value in office.items(): if key == "note": continue if key == "email": email.add(value) else: otype = office["note"] # # Florida fixes # if "/fl" in filename and key == "address": # value = "; ".join([v.strip() for v in value.split(";")]) # if "/fl/" in filename and key == "voice" and not value.startswith("850-"): # otype = "District Office" # if "/fl/" in filename and key == "address" and "32399-1300" not in value: # otype = "District Office" all_details[otype][key].add(value) reformatted = defaultdict(dict) error = False for office_type, office_details in all_details.items(): for ctype, values in office_details.items(): if len(values) == 1: reformatted[office_type][ctype] = values.pop() else: click.secho( f"multiple values for {office_type} {ctype}: {values}", fg="red") error = True if len(email) == 1: email = email.pop() elif len(email) > 1: emails = list(email) if "leg.state.vt.us" in emails[0]: email = emails[0] elif "leg.state.vt.us" in emails[1]: email = emails[0] elif emails[0].lower() == emails[1].lower(): email = emails[0] else: click.secho(f"multiple values for email: {email}", fg="red") error = True if not error: if email: data["email"] = email data["contact_details"] = [] for otype in ("Capitol Office", "District Office", "Primary Office"): if otype in reformatted: data["contact_details"].append( OrderedDict(note=otype, **reformatted[otype])) # click.echo(f"rewrite contact details as {data['contact_details']}") dump_obj(data, filename=filename)
def make_mayors(state_to_import): all_municipalities = [] os.makedirs(f"data/{state_to_import}/municipalities") with open("mayors.csv") as f: data = csv.DictReader(f) for line in data: state = line["Postal Code"].lower() if state != state_to_import: continue city = line["City"].strip() given_name = line["First"].strip() family_name = line["Last"].strip() name = f"{given_name} {family_name}" email = line["Email"].strip() webform = line["Web Form"].strip() phone = reformat_phone_number(line["Phone"]) fax = reformat_phone_number(line["Fax"]) address1 = line["Address 1"].strip() address2 = line["Address 2"].strip() zipcode = line["Zip Code"].strip() if line["Zip Plus 4"].strip(): zipcode += "-" + line["Zip Plus 4"].strip() if not line["Term End"]: term_end = "2021-01-01" # temporary term end date for the unknowns else: term_end = datetime.datetime.strptime( line["Term End"], "%m/%d/%Y").strftime("%Y-%m-%d") if address2: full_address = f"{address1};{address2};{city}, {state.upper()} {zipcode}" else: full_address = f"{address1};{city}, {state.upper()} {zipcode}" contact = {"note": "Primary Office"} if full_address: contact["address"] = full_address if fax: contact["fax"] = fax if phone: contact["voice"] = phone if email: contact["email"] = email jid = city_to_jurisdiction(city, state) all_municipalities.append(OrderedDict({"name": city, "id": jid})) obj = OrderedDict({ "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "roles": [{ "jurisdiction": jid, "type": "mayor", "end_date": term_end }], "contact_details": [contact], "sources": [{ "url": webform }] if webform else [], "links": [{ "url": webform }] if webform else [], }) dump_obj(obj, output_dir=f"data/{state}/municipalities/") dump_obj(all_municipalities, filename=f"data/{state_to_import}/municipalities.yml")
import glob from utils import load_yaml, dump_obj, role_is_active for file in glob.glob("data/ca/legislature/*.yml"): with open(file) as inf: data = load_yaml(inf) for role in data["roles"]: if role_is_active(role): letter = "A" if role["type"] == "lower" else "S" district = int(role["district"]) url = f"https://lcmspubcontact.lc.ca.gov/PublicLCMS/ContactPopup.php?district={letter}D{district:02d}&inframe=N" data["links"].append({"url": url, "note": "Contact Form"},) dump_obj(data, filename=file)
def make_governors(): with open("governors.csv") as f: data = csv.DictReader(f) for line in data: state = line["state"] name = line["name"] given_name = line["first_name"] family_name = line["last_name"] party = line["party"] birth_date = line["birth_date"] start_date = line["start_date"] end_date = line["end_date"] website = line["website"] twitter = line["twitter"] webform = line["webform"] full_address = "; ".join( [n.strip() for n in line["address"].splitlines()]) phone = line["phone"] email = line["email"] fax = line["fax"] contact = {"note": "Capitol Office"} if full_address: contact["address"] = full_address if fax: contact["fax"] = reformat_phone_number(fax) if phone: contact["voice"] = reformat_phone_number(phone) if email: contact["email"] = email ids = {} if twitter: ids["twitter"] = twitter jid = metadata.lookup(name=state).jurisdiction_id abbr = metadata.lookup(name=state).abbr.lower() obj = OrderedDict({ "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "birth_date": birth_date, "party": [{ "name": party }], "roles": [{ "jurisdiction": jid, "type": "governor", "start_date": start_date, "end_date": end_date, }], "contact_details": [contact], "ids": ids, "sources": [{ "url": website }], "links": [{ "url": website }, { "url": webform, "note": "webform" }], }) outdir = f"data/{abbr}/executive/" os.makedirs(outdir) dump_obj(obj, output_dir=outdir)
def make_ceos(): with open("ceo.csv") as f: data = csv.DictReader(f) for line in data: state = line["State"].strip() given_name = line["First"] family_name = line["Last"] name = f"{given_name} {family_name}" role = line["Role"].strip().lower() addr1 = line["Address 1"] addr2 = line["Address 2"] city = line["City"] state_abbr = line["Postal Code"] zip5 = line["Zip Code"] zip4 = line["Zip Plus 4"] phone = line["Phone"] email = line["Email"] fax = line["Fax"] contact_form = line["Contact Form"] source = line["Source"] twitter = line["Twitter"] party = line["Party"] if party == "R": party = "Republican" elif party == "D": party = "Democratic" else: party = "Independent" if role != "secretary of state": role = "chief election officer" full_address = "; ".join([addr1, addr2, f"{city}, {state_abbr} {zip5}-{zip4}"]) contact = {"note": "Capitol Office"} contact["address"] = full_address if fax: contact["fax"] = reformat_phone_number(fax) if phone: contact["voice"] = reformat_phone_number(phone) if email: contact["email"] = email ids = {} if twitter: ids["twitter"] = twitter try: jid = metadata.lookup(name=state).jurisdiction_id except KeyError: continue abbr = metadata.lookup(name=state).abbr.lower() links = [{"url": source}] if contact_form: links.append({"url": contact_form, "note": "webform"}) obj = OrderedDict( { "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "roles": [ { "jurisdiction": jid, "type": role.strip().lower(), "end_date": "2021-12-31", }, ], "contact_details": [contact], "ids": ids, "sources": [{"url": source}], "links": links, "party": [{"name": party}], } ) outdir = f"data/{abbr}/executive/" # os.makedirs(outdir) dump_obj(obj, output_dir=outdir)
def render(self): if not DEBUG: return read_game = self.env.read_game frame = self.env.frame #if not read_game.is_in_game or not keys["KEY_INSPECTOR"]: return if keys["KEY_INSPECTOR"]: # for i in range(PLAYERMAX): # print "Player #%i: %s" % (i, read_game.player[i].name) for idx in range(ENTITIESMAX): e = read_game.cod7_entity.arr[idx] spot = read_game.world_to_screen(e.pos) if spot: cur_angle_dist = self.sq(spot.x - read_game.screen_center_x, spot.y - read_game.screen_center_y) if cur_angle_dist < 50 * 50: # not too far from center s = "[idx=%i(%x), typ=%i, weap=%i]" % (idx, idx, e.type, e.weapon) draw_string_center(frame.font, spot.x, spot.y, 0xFFFFFFFF, s) print s #print dump_obj(e) # if e.owner_scr1 >= 0 and e.owner_scr1 < 2047: # ee = read_game.mw2_entity.arr[e.owner_scr1] # print "[idx=%i(%x), typ=%i, weap=%i]" % (e.owner_scr1, e.owner_scr1, ee.type, ee.WeaponNum) # print dump_obj(ee) # if e.owner_scr2 >= 0 and e.owner_scr2 < 2047: # ee = read_game.mw2_entity.arr[e.owner_scr2] # print "[idx=%i(%x), typ=%i, weap=%i]" % (e.owner_scr2, e.owner_scr2, ee.type, ee.WeaponNum) # print dump_obj(ee) #======================================================= # if e.type == ET_EXPLOSIVE: # print "dump explo" # print dump_obj(e) #======================================================= if keys["KEY_INSPECT_POS"]: # print my player's position NUMPAD1 pos = read_game.my_pos ang = read_game.view_angles print "time=%i ticks=%i perk=%i pos=(%.2f, %.2f, %.2f) angles=(%.2f, %.2f, %.2f)" % (read_game.game_time, self.env.ticks, read_game.my_player.perk, pos.x, pos.y, pos.z, ang.x, ang.y, ang.z) if keys["KEY_INSPECT_DUMP"]: # dump some memory structures NUMPAD9 #mem = dumped() #read_game._RPM(0x6727F13, mem) #read_game._RPM(0x6727F10, mem) #print dump_obj(mem) #read_game._RPM(0x64DA350, mem) i = 0 print "player #%i" % i print dump_obj(read_game.cod7_entity.arr[i]) print "client info" print dump_obj(read_game.cod7_clientinfo.arr[i]) #del mem if keys["KEY_INSPECT_DUMP_CG"]: # NUMPAD 8 #i = read_game.local_client_num #print "Current player, pose=%x, isalive=%x|%x" % (read_game.cod7_clientinfo.arr[i].pose, read_game.cod7_entity.arr[i].isalive, read_game.cod7_entity.arr[i].isalive2) #print dump_obj(read_game.cgs) mem = dumped() read_game._RPM(self.env.offsets.RXCD_T, mem) print dump_obj(mem) if keys["KEY_INSPECT_DUMP_PLAYERS"]: # NUMPAD 7 print "local_client=%i" % read_game.local_client_num for i in range(PLAYERMAX): print "Player #%i: %s, Team:%i" % (i, read_game.player[i].name, read_game.player[i].team) if keys["KEY_INSPECT_AMMO"]: for i in range(AMMOMAX): ammo = read_game.cg.ammos[i] print "Ammo[%i] (%i)%s = %i" % (i, ammo.weapon_id, self.env.weapon_names.get_weapon_model(ammo.weapon_id), ammo.ammo)
def dump_state(self, exec_flag=False): self.logger.debug("state %s" % self._stage) if exec_flag: self.logger.debug("dumping state to file for %s" % self._stage) # dump_obj(self, 'run_state.pkl', force=True) # too large utils.dump_obj(self, "run_state_%s.pkl" % self._stage, force=True)
def directory_merge(abbr, existing_people, new_people, remove_identical, copy_new, interactive): perfect_matched = set() matches = [] id_to_new_filename = {} for new in new_people: best_similarity = 0 best_match = None id_to_new_filename[new['id']] = get_filename(new) for existing in existing_people: similarity = calculate_similarity(existing, new) if similarity > 0.999: perfect_matched.add(new['id']) continue if similarity > best_similarity: best_similarity = similarity best_match = existing matches.append((best_similarity, new, best_match)) click.secho(f'{len(perfect_matched)} were perfect matches', fg='green') if remove_identical: for id in perfect_matched: fname = id_to_new_filename[id] fname = f'incoming/{abbr}/people/{fname}'.format(fname) click.secho('removing ' + fname, fg='red') os.remove(fname) unmatched = set(p['id'] for p in new_people) - perfect_matched for sim, new, old in sorted(matches, reverse=True, key=lambda x: x[0]): if sim < 0.001: break unmatched.remove(new['id']) oldfname = 'data/{}/people/{}'.format(abbr, get_filename(old)) newfname = 'incoming/{}/people/{}'.format(abbr, get_filename(new)) click.secho(' {:.2f} {} {}'.format(sim, oldfname, newfname), fg='yellow') if interactive: differences = compare_objects(old, new) for difference in differences: click.echo(' ' + str(difference)) ch = '~' while ch not in 'onsa': click.secho('Keep (o)ld? Keep (n)ew? (s)kip? (a)bort?', bold=True) ch = click.getchar() if ch == 'a': raise SystemExit(-1) elif ch == 'o': keep_on_conflict = 'old' elif ch == 'n': keep_on_conflict = 'new' elif ch == 's': continue merged = merge_people(old, new, keep_both_ids=False, keep_on_conflict=keep_on_conflict) dump_obj(merged, filename=oldfname) os.remove(newfname) click.secho(f'{len(unmatched)} were unmatched') for id in unmatched: fname = id_to_new_filename[id] oldfname = f'incoming/{abbr}/people/{fname}'.format(fname) if copy_new: newfname = f'data/{abbr}/people/{fname}'.format(fname) click.secho(f'moving {oldfname} to {newfname}', fg='yellow') os.rename(oldfname, newfname)
def dir_to_mongo(abbr, create, clear_old_roles, verbose): db = pymongo.MongoClient(os.environ.get('BILLY_MONGO_HOST', 'localhost'))['fiftystates'] metadata = db.metadata.find({'_id': abbr})[0] latest_term = metadata['terms'][-1]['name'] active_ids = [] for person, filename in iter_objects(abbr, 'people'): legacy_ids = [ oid['identifier'] for oid in person.get('other_identifiers', []) if oid['scheme'] == 'legacy_openstates' ] if not legacy_ids: if create: # get next ID new_id = get_next_id(db, abbr) legacy_ids = [new_id] if 'other_identifiers' not in person: person['other_identifiers'] = [] person['other_identifiers'].append({ 'scheme': 'legacy_openstates', 'identifier': new_id }) dump_obj(person, filename=filename) else: click.secho( f'{filename} does not have legacy ID, run with --create', fg='red') sys.exit(1) active_ids.append(legacy_ids[0]) # handle name prefix, first_name, last_name, suffixes = name_tools.split( person['name']) # get chamber, district, party for role in person['roles']: if role_is_active(role): chamber = role['type'] district = role['district'] break for role in person['party']: if role_is_active(role): party = role['name'] url = person['links'][0]['url'] email = '' offices = [] for cd in person.get('contact_details', []): office = { 'fax': cd.get('fax'), 'phone': cd.get('voice'), 'address': cd.get('address'), 'email': cd.get('email'), 'name': cd['note'], 'type': 'capitol' if 'capitol' in cd['note'].lower() else 'district' } offices.append(office) if office['email'] and not email: email = office['email'] # NE & DC if chamber == 'legislature': chamber = 'upper' # get some old data to keep around created_at = datetime.datetime.utcnow() old_roles = {} old_person = None try: old_person = db.legislators.find({'_id': legacy_ids[0]})[0] created_at = old_person['created_at'] if not clear_old_roles: old_roles = old_person.get('old_roles', {}) except IndexError: pass mongo_person = { '_id': legacy_ids[0], 'leg_id': legacy_ids[0], '_all_ids': legacy_ids, '_type': 'person', 'active': True, 'full_name': person['name'], '_scraped_name': person['name'], 'photo_url': person.get('image'), 'state': abbr, 'district': district, 'chamber': chamber, 'party': party, 'email': email, 'url': url, 'offices': offices, 'created_at': created_at, 'first_name': first_name, 'middle_name': '', 'last_name': last_name, 'suffixes': suffixes, 'sources': person['sources'], 'old_roles': old_roles, 'roles': [ { 'term': latest_term, 'district': district, 'chamber': chamber, 'state': abbr, 'party': party, 'type': 'member', 'start_date': None, 'end_date': None }, ], } # TODO: committee info # { "term" : "2017-2018", "committee_id" : "NCC000233", "chamber" : "lower", # "state" : "nc", "subcommittee" : null, "committee" : "State and Local Government II", # "position" : "member", "type" : "committee member" }, # compare if old_person: old_person.pop('updated_at', None) if old_person == mongo_person: if verbose: click.secho(f'no updates to {mongo_person["_id"]}') else: # print(mongo_person, old_person) # raise Exception() click.secho(f'updating {mongo_person["_id"]}', fg='green') mongo_person['updated_at'] = datetime.datetime.utcnow() try: db.legislators.save(mongo_person) except Exception as e: print(e) continue to_retire = db.legislators.find({ '_id': { '$nin': active_ids }, 'state': abbr }) click.secho(f'going to try to retire {to_retire.count()}') for leg in to_retire: retire_person(db, leg)
def save(self, directory): dump_obj(self.to_dict(), output_dir=directory)
def update_municipalities(municipalities, state): fname = f"data/{state}/municipalities.yml" with open(fname, 'r') as f: contents = load_yaml(f) dump_obj(contents + municipalities, filename=fname)