def handle_noargs(self, **options): api_key = hmac.new(settings.IEBC_API_SECRET, "appid=%s" % (settings.IEBC_API_ID, ), hashlib.sha256).hexdigest() token_data = get_data(make_api_token_url(settings.IEBC_API_ID, api_key)) token = token_data['token'] def url(path, query_filter=None): """A closure to avoid repeating parameters""" return make_api_url(path, settings.IEBC_API_SECRET, token, query_filter) # To get all the candidates, we iterate over each county, # constituency and ward, and request the candidates for each. cache_directory = os.path.join(data_directory, 'api-cache-2013-02-08') mkdir_p(cache_directory) # ------------------------------------------------------------------------ print "Getting (incorrect) party names mapping for matching..." parties_cache_filename = os.path.join(cache_directory, 'parties') party_data = get_data_with_cache(parties_cache_filename, url('/party/')) party_names_api = set(d['name'].strip().encode('utf-8') for d in party_data['parties']) party_names_db = set( o.name.strip().encode('utf-8') for o in Organisation.objects.filter(kind__slug='party')) with open(os.path.join(data_directory, 'party-names.csv'), 'w') as fp: writer = csv.writer(fp) for t in itertools.izip_longest(party_names_api, party_names_db): writer.writerow(t) # ------------------------------------------------------------------------ print "Getting (incorrect) ward names mapping for matching..." ward_data = get_data(url('/ward/')) wards_from_api = sorted(ward['name'].encode('utf-8') for ward in ward_data['region']['locations']) wards_from_db = sorted( p.name.encode('utf-8') for p in Place.objects.filter(kind__slug='ward')) with open(os.path.join(data_directory, 'wards-names.csv'), 'w') as fp: writer = csv.writer(fp) for t in itertools.izip_longest(wards_from_api, wards_from_db): writer.writerow(t) # ------------------------------------------------------------------------ headings = [ 'Same/Different', 'API Name', 'API Party', 'API Place', 'API Candidate Code', 'Mz Legal Name', 'Mz Other Names', 'Mz URL', 'Mz Parties Ever', 'Mz Aspirant Ever', 'Mz Politician Ever', 'Mz ID' ] with open(os.path.join(sys.path[0], 'names-to-check.csv'), 'w') as fp: writer = csv.DictWriter(fp, headings) writer.writerow(dict((h, h) for h in headings)) for area_type in 'county', 'constituency', 'ward': cache_filename = os.path.join(cache_directory, area_type) area_type_data = get_data_with_cache(cache_filename, url('/%s/' % (area_type))) areas = area_type_data['region']['locations'] for i, area in enumerate(areas): # Get the candidates for that area: code = area['code'] candidates_cache_filename = os.path.join( cache_directory, 'candidates-for-' + area_type + '-' + code) candidate_data = get_data_with_cache( candidates_cache_filename, url('/candidate/', query_filter='%s=%s' % (area_type, code))) for race in candidate_data['candidates']: full_race_name = race['race'] race_type, place_name = parse_race_name(full_race_name) place_kind, session, title = known_race_type_mapping[ race_type] candidates = race['candidates'] for candidate in candidates: first_names = candidate['other_name'] or '' surname = candidate['surname'] or '' person = get_person_from_names( first_names, surname) if person: print "Got person match to:", person row = {} row['Same/Different'] = '' row['API Name'] = first_names + ' ' + surname party_data = candidate['party'] row['API Party'] = party_data[ 'name'] if 'name' in party_data else '' row['API Place'] = '%s (%s)' % (place_name, area_type) row['API Candidate Code'] = candidate['code'] row['Mz Legal Name'] = person.legal_name row['Mz Other Names'] = person.other_names row['Mz URL'] = 'http://info.mzalendo.com' + person.get_absolute_url( ) row['Mz Parties Ever'] = ', '.join( o.name for o in person.parties_ever()) for heading, positions in ( ('Mz Aspirant Ever', person.aspirant_positions_ever()), ('Mz Politician Ever', person.politician_positions_ever())): row[heading] = ', '.join( '%s at %s' % (p.title.name, p.place) for p in positions) row['Mz ID'] = person.id for key, value in row.items(): row[key] = unicode(value).encode('utf-8') writer.writerow(row)
def handle_noargs(self, **options): api_key = hmac.new(settings.IEBC_API_SECRET, "appid=%s" % (settings.IEBC_API_ID,), hashlib.sha256).hexdigest() token_data = get_data(make_api_token_url(settings.IEBC_API_ID, api_key)) token = token_data['token'] def url(path, query_filter=None): """A closure to avoid repeating parameters""" return make_api_url(path, settings.IEBC_API_SECRET, token, query_filter) # To get all the candidates, we iterate over each county, # constituency and ward, and request the candidates for each. cache_directory = os.path.join(data_directory, 'api-cache-2013-02-08') mkdir_p(cache_directory) # ------------------------------------------------------------------------ print "Getting (incorrect) party names mapping for matching..." parties_cache_filename = os.path.join(cache_directory, 'parties') party_data = get_data_with_cache(parties_cache_filename, url('/party/')) party_names_api = set(d['name'].strip().encode('utf-8') for d in party_data['parties']) party_names_db = set(o.name.strip().encode('utf-8') for o in Organisation.objects.filter(kind__slug='party')) with open(os.path.join(data_directory, 'party-names.csv'), 'w') as fp: writer = csv.writer(fp) for t in itertools.izip_longest(party_names_api, party_names_db): writer.writerow(t) # ------------------------------------------------------------------------ print "Getting (incorrect) ward names mapping for matching..." ward_data = get_data(url('/ward/')) wards_from_api = sorted(ward['name'].encode('utf-8') for ward in ward_data['region']['locations']) wards_from_db = sorted(p.name.encode('utf-8') for p in Place.objects.filter(kind__slug='ward')) with open(os.path.join(data_directory, 'wards-names.csv'), 'w') as fp: writer = csv.writer(fp) for t in itertools.izip_longest(wards_from_api, wards_from_db): writer.writerow(t) # ------------------------------------------------------------------------ headings = ['Same/Different', 'API Name', 'API Party', 'API Place', 'API Candidate Code', 'Mz Legal Name', 'Mz Other Names', 'Mz URL', 'Mz Parties Ever', 'Mz Aspirant Ever', 'Mz Politician Ever', 'Mz ID'] with open(os.path.join(sys.path[0], 'names-to-check.csv'), 'w') as fp: writer = csv.DictWriter(fp, headings) writer.writerow(dict((h, h) for h in headings)) for area_type in 'county', 'constituency', 'ward': cache_filename = os.path.join(cache_directory, area_type) area_type_data = get_data_with_cache(cache_filename, url('/%s/' % (area_type))) areas = area_type_data['region']['locations'] for i, area in enumerate(areas): # Get the candidates for that area: code = area['code'] candidates_cache_filename = os.path.join(cache_directory, 'candidates-for-' + area_type + '-' + code) candidate_data = get_data_with_cache(candidates_cache_filename, url('/candidate/', query_filter='%s=%s' % (area_type, code))) for race in candidate_data['candidates']: full_race_name = race['race'] race_type, place_name = parse_race_name(full_race_name) place_kind, session, title = known_race_type_mapping[race_type] candidates = race['candidates'] for candidate in candidates: first_names = candidate['other_name'] or '' surname = candidate['surname'] or '' person = get_person_from_names(first_names, surname) if person: print "Got person match to:", person row = {} row['Same/Different'] = '' row['API Name'] = first_names + ' ' + surname party_data = candidate['party'] row['API Party'] = party_data['name'] if 'name' in party_data else '' row['API Place'] = '%s (%s)' % (place_name, area_type) row['API Candidate Code'] = candidate['code'] row['Mz Legal Name'] = person.legal_name row['Mz Other Names'] = person.other_names row['Mz URL'] = 'http://info.mzalendo.com' + person.get_absolute_url() row['Mz Parties Ever'] = ', '.join(o.name for o in person.parties_ever()) for heading, positions in (('Mz Aspirant Ever', person.aspirant_positions_ever()), ('Mz Politician Ever', person.politician_positions_ever())): row[heading] = ', '.join('%s at %s' % (p.title.name, p.place) for p in positions) row['Mz ID'] = person.id for key, value in row.items(): row[key] = unicode(value).encode('utf-8') writer.writerow(row)
def update_candidates_for_place(place_name, place_kind, parliamentary_session, title, race_type, candidates, same_person_checker, **options): all_updates_succeeded = True place = get_matching_place(place_name, place_kind, parliamentary_session) current_aspirants = Position.objects.filter(place=place, title=title).currently_active() print "%s %s %s %s %s" % (place_name, place_kind, parliamentary_session, title, race_type) def full_name(c): return normalize_name(c['other_name']) + " " + normalize_name(c['surname']) code_to_existing_aspirant = dict((a.external_id, a) for a in current_aspirants if a.external_id) code_to_current_candidates = dict((c['code'], c) for c in candidates) existing_aspirant_codes = set(code_to_existing_aspirant.keys()) current_candidate_codes = set(code_to_current_candidates.keys()) existing_aspirants_to_remove = existing_aspirant_codes - current_candidate_codes new_candidates_to_add = current_candidate_codes - existing_aspirant_codes matched_candidate_codes = current_candidate_codes & existing_aspirant_codes # Add each candidate that wasn't already present: for code in new_candidates_to_add: candidate = code_to_current_candidates[code] first_names = normalize_name(candidate['other_name'] or '') surname = normalize_name(candidate['surname'] or '') person = get_person_from_names(first_names, surname) # If that person was an existing current aspirant, they'll # just need to have the IEBC candidate code set: iebc_code_just_needed_setting = False for matching_existing_aspirant_position in current_aspirants.filter(person=person): if matching_existing_aspirant_position.external_id: if matching_existing_aspirant_position.external_id != code: print " original: '%s'" % (matching_existing_aspirant_position.external_id,) print " candidate: '%s'" % (code,) format_tuple = (matching_existing_aspirant_position.person, matching_existing_aspirant_position.external_id, person, code) message = "There was an existing candidate (%s - %s) with a name match for (%s - %s) but different codes" % format_tuple # raise Exception, message print message else: matching_existing_aspirant_position.external_id = code print >> sys.stderr, "* The IEBC code was missing for %s" % (matching_existing_aspirant_position,) maybe_save(matching_existing_aspirant_position, **options) update_parties(person, candidate['party'], **options) iebc_code_just_needed_setting = True if iebc_code_just_needed_setting: continue # If we have a person match, but they weren't a current # aspirant in this race, then be careful that we're not # mismatching using the SamePersonChecker: if person: same_person = same_person_checker.check_same_and_update(candidate, place, race_type, person) if same_person is None: all_updates_succeeded = False continue if not person: print >> sys.stderr, "* Creating a person because none matched %s %s" % (first_names, surname) person = make_new_person(candidate, **options) assert(person) update_parties(person, candidate['party'], **options) aspirant_position_properties = { 'organisation': Organisation.objects.get(name='REPUBLIC OF KENYA'), 'place': place, 'person': person, 'title': title, 'category': 'political'} # First see if that aspirant position already exists: existing_matching_aspirant_positions = Position.objects.filter(**aspirant_position_properties).currently_active() if existing_matching_aspirant_positions: # If it does, make sure that the IEBC code is set in # external_id, and that the end_date is 'future': for existing_matching_aspirant_position in existing_matching_aspirant_positions: if existing_matching_aspirant_position.external_id != code or existing_matching_aspirant_position.end_date != future_approximate_date: print >> sys.stderr, "* Need to set either the IEBC code or the end date (future) for %s" % (existing_matching_aspirant_position,) existing_matching_aspirant_position.external_id = code existing_matching_aspirant_position.end_date = future_approximate_date maybe_save(existing_matching_aspirant_position, **options) else: # Then we have to create a new position: new_position = Position(start_date=today_approximate_date, end_date=future_approximate_date, external_id=code, **aspirant_position_properties) print >> sys.stderr, "* Creating a missing position: %s" % (new_position,) maybe_save(new_position, **options) # For those aspirants that are no longer current, end their aspirant position: for code in existing_aspirants_to_remove: existing_aspirant_to_remove = code_to_existing_aspirant[code] print >> sys.stderr, "* Removing a no longer current aspirant: %s (position id: %s)" % (existing_aspirant_to_remove, existing_aspirant_to_remove.id,) existing_aspirant_to_remove.end_date = yesterday_approximate_date maybe_save(existing_aspirant_to_remove, **options) # For those aspirants that were already present, just make sure # their party assignments are correct: for code in matched_candidate_codes: # Make sure that the party assignments are correct: person = code_to_existing_aspirant[code].person candidate = code_to_current_candidates[code] update_parties(person, candidate['party'], **options) return all_updates_succeeded
class Command(NoArgsCommand): help = 'Update the database with aspirants from the IEBC website' option_list = NoArgsCommand.option_list + (make_option( '--commit', action='store_true', dest='commit', help='Actually update the database'), ) def handle_noargs(self, **options): api_key = hmac.new(settings.IEBC_API_SECRET, "appid=%s" % (settings.IEBC_API_ID, ), hashlib.sha256).hexdigest() token_data = get_data(make_api_token_url(settings.IEBC_API_ID, api_key)) token = token_data['token'] def url(path, query_filter=None): """A closure to avoid repeating parameters""" return make_api_url(path, settings.IEBC_API_SECRET, token, query_filter) # To get all the candidates, we iterate over each county, # constituency and ward, and request the candidates for each. cache_directory = os.path.join(data_directory, 'api-cache-2013-02-08') mkdir_p(cache_directory) parties_cache_filename = os.path.join(cache_directory, 'parties') party_data = get_data_with_cache(parties_cache_filename, url('/party/'), only_from_cache=True) for party in party_data['parties']: party_api_code = party['code'] try: party_organisation = get_matching_party(party['name'], create=False, **options) except CommandError, ce: print >> sys.stderr, "Not setting the API code: %s" % (ce, ) party_organisation.external_id = party_api_code maybe_save(party_organisation, **options) print "########################################################################" for area_type in 'county', 'constituency', 'ward': cache_filename = os.path.join(cache_directory, area_type) area_type_data = get_data_with_cache(cache_filename, url('/%s/' % (area_type)), only_from_cache=True) areas = area_type_data['region']['locations'] for i, area in enumerate(areas): # Get the candidates for that area: code = area['code'] candidates_cache_filename = os.path.join( cache_directory, 'candidates-for-' + area_type + '-' + code) candidate_data = get_data_with_cache(candidates_cache_filename, url('/candidate/', query_filter='%s=%s' % (area_type, code)), only_from_cache=True) # print "got candidate_data:", candidate_data for race in candidate_data['candidates']: full_race_name = race['race'] race_type, place_name = parse_race_name(full_race_name) place_kind, session, title = known_race_type_mapping[ race_type] place = get_matching_place(place_name, place_kind, session) place.external_id = code maybe_save(place, **options) candidates = race['candidates'] for candidate in candidates: first_names = candidate['other_name'] or '' surname = candidate['surname'] or '' person = get_person_from_names(first_names, surname) if not person: raise Exception, "Failed to find the person from '%s' '%s'" % ( first_names, surname) aspirant_position_properties = { 'organisation': Organisation.objects.get(name='REPUBLIC OF KENYA'), 'place': place, 'person': person, 'title': title, 'category': 'political' } positions = Position.objects.filter( **aspirant_position_properties).currently_active() if positions: if len(positions) > 1: print >> sys.stderr, "There were multiple (%d) matches for: %s" % ( len(positions), aspirant_position_properties, ) for position in positions: position.external_id = candidate['code'] maybe_save(position, **options) else: message = "There were no matches for %s" % ( aspirant_position_properties, ) # raise Exception, message print >> sys.stderr, message
def update_candidates_for_place(place_name, place_kind, parliamentary_session, title, race_type, candidates, same_person_checker, **options): all_updates_succeeded = True place = get_matching_place(place_name, place_kind, parliamentary_session) current_aspirants = Position.objects.filter( place=place, title=title).currently_active() print "%s %s %s %s %s" % (place_name, place_kind, parliamentary_session, title, race_type) def full_name(c): return normalize_name(c['other_name']) + " " + normalize_name( c['surname']) code_to_existing_aspirant = dict( (a.external_id, a) for a in current_aspirants if a.external_id) code_to_current_candidates = dict((c['code'], c) for c in candidates) existing_aspirant_codes = set(code_to_existing_aspirant.keys()) current_candidate_codes = set(code_to_current_candidates.keys()) existing_aspirants_to_remove = existing_aspirant_codes - current_candidate_codes new_candidates_to_add = current_candidate_codes - existing_aspirant_codes matched_candidate_codes = current_candidate_codes & existing_aspirant_codes # Add each candidate that wasn't already present: for code in new_candidates_to_add: candidate = code_to_current_candidates[code] first_names = normalize_name(candidate['other_name'] or '') surname = normalize_name(candidate['surname'] or '') person = get_person_from_names(first_names, surname) # If that person was an existing current aspirant, they'll # just need to have the IEBC candidate code set: iebc_code_just_needed_setting = False for matching_existing_aspirant_position in current_aspirants.filter( person=person): if matching_existing_aspirant_position.external_id: if matching_existing_aspirant_position.external_id != code: print " original: '%s'" % ( matching_existing_aspirant_position.external_id, ) print " candidate: '%s'" % (code, ) format_tuple = ( matching_existing_aspirant_position.person, matching_existing_aspirant_position.external_id, person, code) message = "There was an existing candidate (%s - %s) with a name match for (%s - %s) but different codes" % format_tuple # raise Exception, message print message else: matching_existing_aspirant_position.external_id = code print >> sys.stderr, "* The IEBC code was missing for %s" % ( matching_existing_aspirant_position, ) maybe_save(matching_existing_aspirant_position, **options) update_parties(person, candidate['party'], **options) iebc_code_just_needed_setting = True if iebc_code_just_needed_setting: continue # If we have a person match, but they weren't a current # aspirant in this race, then be careful that we're not # mismatching using the SamePersonChecker: if person: same_person = same_person_checker.check_same_and_update( candidate, place, race_type, person) if same_person is None: all_updates_succeeded = False continue if not person: print >> sys.stderr, "* Creating a person because none matched %s %s" % ( first_names, surname) person = make_new_person(candidate, **options) assert (person) update_parties(person, candidate['party'], **options) aspirant_position_properties = { 'organisation': Organisation.objects.get(name='REPUBLIC OF KENYA'), 'place': place, 'person': person, 'title': title, 'category': 'political' } # First see if that aspirant position already exists: existing_matching_aspirant_positions = Position.objects.filter( **aspirant_position_properties).currently_active() if existing_matching_aspirant_positions: # If it does, make sure that the IEBC code is set in # external_id, and that the end_date is 'future': for existing_matching_aspirant_position in existing_matching_aspirant_positions: if existing_matching_aspirant_position.external_id != code or existing_matching_aspirant_position.end_date != future_approximate_date: print >> sys.stderr, "* Need to set either the IEBC code or the end date (future) for %s" % ( existing_matching_aspirant_position, ) existing_matching_aspirant_position.external_id = code existing_matching_aspirant_position.end_date = future_approximate_date maybe_save(existing_matching_aspirant_position, **options) else: # Then we have to create a new position: new_position = Position(start_date=today_approximate_date, end_date=future_approximate_date, external_id=code, **aspirant_position_properties) print >> sys.stderr, "* Creating a missing position: %s" % ( new_position, ) maybe_save(new_position, **options) # For those aspirants that are no longer current, end their aspirant position: for code in existing_aspirants_to_remove: existing_aspirant_to_remove = code_to_existing_aspirant[code] print >> sys.stderr, "* Removing a no longer current aspirant: %s (position id: %s)" % ( existing_aspirant_to_remove, existing_aspirant_to_remove.id, ) existing_aspirant_to_remove.end_date = yesterday_approximate_date maybe_save(existing_aspirant_to_remove, **options) # For those aspirants that were already present, just make sure # their party assignments are correct: for code in matched_candidate_codes: # Make sure that the party assignments are correct: person = code_to_existing_aspirant[code].person candidate = code_to_current_candidates[code] update_parties(person, candidate['party'], **options) return all_updates_succeeded