def main(): d = scb.get_request(URL, QUERY) out_data = scb.NestedDefaultdict() for entry in d["data"]: val = entry["values"][0] if val == scb.na_val: continue val = float(val) / 100 party_abbr = entry["key"][1].lower() year = int(entry["key"][2]) out_data[party_abbr][year] = val ds = MongoDBDatastore() ds.store_object(out_data.to_dict(), "election_results")
def main(): d = scb.get_request(URL, QUERY) out_data = scb.NestedDefaultdict() for entry in d["data"]: val = entry["values"][0] if val == scb.na_val: continue val = float(val) / 100 party_abbr = entry["key"][0].lower() year, month = map(int, entry["key"][1].split('M')) day = 1 date = dt.datetime(year, month, day) out_data[party_abbr][date] = val ds = MongoDBDatastore() ds.store_object(out_data.to_dict(), "scb_polls")
"code": "ContentsCode", "selection": { "filter": "item", "values": ["ME0201AV"] } }], "response": { "format": "json" } } url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/START/ME/ME0201/ME0201B/Partisympati17" d = scb.get_request(url, query) data = scb.NestedDefaultdict() key_order = scb.best_party_gender_key_order for entry in d["data"]: val = entry["values"][0] val = float('NaN') if val == scb.na_val else float(val) / 100 keys = scb.translate_keys(entry["key"]) keys_d = dict(zip(key_order, keys)) t = keys_d["Tid"] p = keys_d["Partisympati"] data[p][t] = val ds = MongoDBDatastore() ds.store_object(data.to_dict(), "best_party_time")
def cosigning_timeseries(): engine = create_engine(pg_utils.engine_url()) session = sessionmaker(bind=engine) s = session() party_metadata = dict() for party in s.query(Party): party_metadata[party.id] = dict(abbr=party.abbr, name=party.name, ordering=party_ordering[party.abbr]) committee_metadata = dict() for c in s.query(Committee): committee_metadata[c.id] = dict(abbr=c.abbr, name=c.name) committee_metadata[0] = dict(abbr="Alla", name="Alla utskott") basequery = s.query(MemberProposal) \ .options(joinedload(MemberProposal.points).joinedload(ProposalPoint.committee_report).joinedload(CommitteeReport.committee)) \ .filter(MemberProposal.subtype != 'Enskild motion') riksmoten = [ '{:04d}/{:02d}'.format(y, y - 2000 + 1) for y in range(2002, 2014) ] output = dict() num_missing_committee_report = 0 num_missing_committee_report_committee = 0 matrix = dict() for (rm_idx, rm) in enumerate(riksmoten): print("RM: {}".format(rm)) for doc in basequery.filter(MemberProposal.session == rm): if not doc.signatories or len(doc.signatories) == 1: continue signing_parties = sorted( list( set(m.party.id for m in doc.signatories if not m.party.abbr == "-"))) if len(signing_parties) <= 1: continue parties_key = repr(signing_parties) for point in doc.points: if not point.committee_report: num_missing_committee_report += 1 continue if not point.committee_report.committee: num_missing_committee_report_committee += 1 continue committee = point.committee_report.committee # compute party matrix data for (p1, p2) in permutations(signing_parties, 2): if not p1 in matrix: matrix[p1] = dict() for committee_key in [0, committee.id]: if not committee_key in matrix[p1]: matrix[p1][committee_key] = dict() if not p2 in matrix[p1][committee_key]: matrix[p1][committee_key][p2] = dict( values=[0 for rm in riksmoten], abbr=party_metadata[p2]["abbr"], name=party_metadata[p2]["name"], id=p2) matrix[p1][committee_key][p2]["values"][rm_idx] += 1 # compute timeseries data for committee_key in [0, committee.id]: if not committee_key in output: output[committee_key] = dict() if not parties_key in output[committee_key]: output[committee_key][parties_key] = \ dict( values = [0 for rm in riksmoten], abbr = ' + '.join(party_metadata[p_id]["abbr"] for p_id in signing_parties), name = ' + '.join(party_metadata[p_id]["name"] for p_id in signing_parties), num_parties = len(signing_parties) ) output[committee_key][parties_key]["values"][rm_idx] += 1 ds = MongoDBDatastore() output_timeseries_top = dict( t=riksmoten, committees=[ dict(abbr=committee_metadata[c_id]["abbr"], name=committee_metadata[c_id]["name"], id=c_id, series=[[ dict(abbr=p_dict["abbr"], name=p_dict["name"], num_parties=p_dict["num_parties"], value=p_dict["values"][rm_idx]) for (p_key, p_dict) in series.items() if p_dict["values"][rm_idx] > 0 ] for (rm_idx, rm) in enumerate(riksmoten)]) for (c_id, series) in output.items() ]) ds.store_object(output_timeseries_top, "party_cosigning_timeseries") mongodb = ds.get_mongodb_database() mongo_collection = mongodb.party_cosigning_matrix mongo_collection.ensure_index([("partyA", ASCENDING)], unique=True) for (p1, matrix_p1) in matrix.items(): mongo_partyA = party_metadata[p1]["abbr"] output_party_matrix_top = dict( partyA=mongo_partyA, t=riksmoten, committees=[ dict(abbr=committee_metadata[c_id]["abbr"], name=committee_metadata[c_id]["name"], id=c_id, parties=list(committee_dict.values())) for (c_id, committee_dict) in matrix_p1.items() ]) mongo_collection.update(dict(partyA=mongo_partyA), output_party_matrix_top, upsert=True) print( "missing committee_report: {}, committee_reports missing committee's: {}" .format(num_missing_committee_report, num_missing_committee_report_committee))
def main(): engine = create_engine(pg_utils.engine_url()) session = sessionmaker(bind=engine) s = session() # Enumerate all searchable objects and arrange in groups members = list() member_indices = dict() for (idx, member) in enumerate(s.query(Member)): members.append( dict(title=member.first_name + " " + member.last_name + " (" + member.party.abbr + ")", img_url=member.image_url_sm, url='/' + member.url_name)) member_indices[member.id] = idx parties = list() party_indices = dict() for (idx, party) in enumerate(s.query(Party)): parties.append( dict(title=party.name, img_url='/static/img/parties/' + party.abbr.lower() + '.png', url='/' + party.abbr.lower())) party_indices[party.id] = idx output_groups = [ dict(title="Ledamöter", objects=members), dict(title="Partier", objects=parties) ] group_indices = dict(member=0, party=1) # Enumerate all strings to match and couple them to objects (primary and secondary hits) keywords = list() for member in s.query(Member): normalized_string = (member.first_name + " " + member.last_name).lower() keywords.append( dict(string=normalized_string, primary=(group_indices["member"], member_indices[member.id]), secondaries=[(group_indices["party"], party_indices[member.party.id])])) for party in s.query(Party): normalized_string = (party.name).lower() keywords.append( dict(string=normalized_string, primary=(group_indices["party"], party_indices[party.id]), secondaries=[(group_indices["member"], member_indices[m.id]) for m in party.members[0:2]])) # Generate reverse table of all trigrams to match, such that we can ask: # which keywords contain this trigram (and how many) trigrams_dict = dict() for (keyword_idx, keyword) in enumerate(keywords): grams = Counter( map(lambda t: ''.join(t), zip(*[keyword["string"][k:] for k in range(3)]))) for (gram, count) in grams.items(): if not gram in trigrams_dict: trigrams_dict[gram] = list() trigrams_dict[gram].append((keyword_idx, count)) print('# unique trigrams: {}'.format(len(trigrams_dict))) output_top = dict(groups=output_groups, keywords=keywords) ds = MongoDBDatastore() ds.store_object(output_top, "search")
municipalities[m][y][sort_special[p]] = dp[m] else: if not math.isnan(dp[m]): municipalities[m][y]['total_votes'] += dp[m] totals = {} for p, dp in party_sums.items(): for y, val in dp.items(): if y not in totals: totals[y] = 0 totals[y] += val # Set municipalities relative to number of votes (leave NaNs) for y, dy in elec_dict.items(): for p, dp in dy.items(): for m in dp.keys(): try: if not math.isnan(dp[m]): dp[m] = dp[m] / municipalities[m][y]['total_votes'] except ZeroDivisionError as e: print("Division by zero: municipality {}, year {}.".format( m, y)) print("Storing in MongoDB.") ds = MongoDBDatastore() ds.store_object(elec_dict, "election_municipalities") ds.store_object(municipalities, "election_municipality_sums") ds.store_object(party_sums, "election_party_sums") ds.store_object(totals, "election_totals")
], "response": { "format": "json" } } url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/START/ME/ME0201/ME0201B/Partisympati17" d = scb.get_request(url,query) data = scb.NestedDefaultdict() key_order = scb.best_party_gender_key_order for entry in d["data"]: val = entry["values"][0] val = float('NaN') if val==scb.na_val else float(val)/100 keys = scb.translate_keys(entry["key"]) keys_d = dict(zip(key_order,keys)) t = keys_d["Tid"] p = keys_d["Partisympati"] g = keys_d["Kon"] e = keys_d["UtbNivaSUN2000"] data[t][p][g] = val ds = MongoDBDatastore() ds.store_object(data.to_dict(),"best_party_gender")
"filter": "item", "values": ["ME0201AV"] } }], "response": { "format": "json" } } url = "http://api.scb.se/OV0104/v1/doris/sv/ssd/START/ME/ME0201/ME0201B/Partisympati17" d = scb.get_request(url, query) data = scb.NestedDefaultdict() key_order = scb.best_party_gender_key_order for entry in d["data"]: val = entry["values"][0] val = float('NaN') if val == scb.na_val else float(val) / 100 keys = scb.translate_keys(entry["key"]) keys_d = dict(zip(key_order, keys)) t = keys_d["Tid"] p = keys_d["Partisympati"] g = keys_d["Kon"] e = keys_d["UtbNivaSUN2000"] data[t][p][e] = val ds = MongoDBDatastore() ds.store_object(data.to_dict(), "best_party_education")