def handle_noargs(self, **options): csv_filename = os.path.join(data_directory, 'positions-to-end-delete-and-alternative-names.csv') with open(csv_filename) as fp: reader = csv.DictReader(fp) for row in reader: alternative_names_to_add = row['Alternative Names To Add'] if not alternative_names_to_add: continue try: position = Position.objects.get(pk=row["Existing Aspirant Position ID"]) except Position.DoesNotExist: # Probably it's already been removed by the remove duplicates script continue if alternative_names_to_add == '[endpos]': position.end_date = yesterday_approximate_date maybe_save(position, **options) elif alternative_names_to_add == '[delpos]': if options['commit']: position.delete() else: print "------------------------------------------------------------------------" print alternative_names_to_add names_to_add = [an.title().strip() for an in alternative_names_to_add.split(', ')] for n in names_to_add: person = Person.objects.get(pk=row['Existing Aspirant Person ID']) person.add_alternative_name(n) maybe_save(person, **options)
def handle_noargs(self, **options): for person in Person.objects.all(): legal_name = person.legal_name other_names = person.other_names normalized_legal_name = normalize_name(legal_name) normalized_other_names = normalize_name(other_names) if (legal_name != normalized_legal_name) or (other_names != normalized_other_names): print >> sys.stderr, "Found difference(s):" if legal_name != normalized_legal_name: print >> sys.stderr, " ", legal_name, "should be", normalized_legal_name if other_names != normalized_other_names: print >> sys.stderr, " ", other_names, "should be", normalized_other_names person.legal_name = normalized_legal_name person.other_names = normalized_other_names maybe_save(person, **options)
def handle_noargs(self, **options): for person in Person.objects.all(): legal_name = person.legal_name other_names = person.other_names normalized_legal_name = normalize_name(legal_name) normalized_other_names = normalize_name(other_names) if (legal_name != normalized_legal_name) or ( other_names != normalized_other_names): print >> sys.stderr, "Found difference(s):" if legal_name != normalized_legal_name: print >> sys.stderr, " ", legal_name, "should be", normalized_legal_name if other_names != normalized_other_names: print >> sys.stderr, " ", other_names, "should be", normalized_other_names person.legal_name = normalized_legal_name person.other_names = normalized_other_names maybe_save(person, **options)
def make_new_person(candidate, **options): legal_name = normalize_name((candidate.get('other_name', None) or '').title()) if legal_name: legal_name += ' ' legal_name += normalize_name((candidate.get('surname', None) or '').title()) slug_to_use = slugify(legal_name) suffix = 2 while True: try: Person.objects.get(slug=slug_to_use) except Person.DoesNotExist: # Then this slug_to_use is fine, so just break: break slug_to_use = re.sub('-?\d*$', '', slug_to_use) + '-' + str(suffix) suffix += 1 new_person = Person(legal_name=legal_name, slug=slug_to_use) maybe_save(new_person, **options) return new_person
def make_new_person(candidate, **options): legal_name = normalize_name((candidate.get('other_name', None) or '').title()) if legal_name: legal_name += ' ' legal_name += normalize_name((candidate.get('surname', None) or '').title()) slug_to_use = slugify(legal_name) suffix = 2 while True: try: Person.objects.get(slug=slug_to_use) except Person.DoesNotExist: # Then this slug_to_use is fine, so just break: break slug_to_use = re.sub('-?\d*$', '', slug_to_use) + '-' + str(suffix) suffix += 1 new_person = Person(legal_name=legal_name, slug=slug_to_use) maybe_save(new_person, **options) return new_person
def get_matching_party(party_name, **options): party_name_to_use = party_name_corrections.get(party_name, party_name) # print "looking for '%s'" % (party_name_to_use,) matching_parties = Organisation.objects.filter(kind__slug='party', name__iexact=party_name_to_use) if not matching_parties: matching_parties = Organisation.objects.filter(kind__slug='party', name__istartswith=party_name_to_use) if len(matching_parties) == 0: party_name_for_creation = party_name_to_use.title() new_party = Organisation(name=party_name_for_creation, slug=slugify(party_name_for_creation), started=ApproximateDate(datetime.date.today().year), ended=None, kind=OrganisationKind.objects.get(slug='party')) print >> sys.stderr, "* Creating a new party because none matched '%s'" % (party_name_to_use,) maybe_save(new_party, **options) return new_party elif len(matching_parties) == 1: return matching_parties[0] else: raise Exception, "Multiple parties matched %s" % (party_name_to_use,)
def get_matching_party(party_name, **options): party_name_to_use = party_name_corrections.get(party_name, party_name) # print "looking for '%s'" % (party_name_to_use,) matching_parties = Organisation.objects.filter( kind__slug='party', name__iexact=party_name_to_use) if not matching_parties: matching_parties = Organisation.objects.filter( kind__slug='party', name__istartswith=party_name_to_use) if len(matching_parties) == 0: party_name_for_creation = party_name_to_use.title() new_party = Organisation( name=party_name_for_creation, slug=slugify(party_name_for_creation), started=ApproximateDate(datetime.date.today().year), ended=None, kind=OrganisationKind.objects.get(slug='party')) print >> sys.stderr, "* Creating a new party because none matched '%s'" % ( party_name_to_use, ) maybe_save(new_party, **options) return new_party elif len(matching_parties) == 1: return matching_parties[0] else: raise Exception, "Multiple parties matched %s" % (party_name_to_use, )
def update_parties(person, api_party, **options): current_party_positions = person.position_set.all().currently_active( ).filter(title__slug='member').filter(organisation__kind__slug='party') if 'name' in api_party: api_party_name = api_party['name'] else: api_party_name = 'Independent Aspirant' # If there's a current assignment to another party, issue a warning: current_party_assignments = [ p for p in current_party_positions if p.organisation.slug != 'independent-aspirant' ] if current_party_assignments: print "Not resetting to Independent Aspirant someone who's in a party %s: %s" % ( person, current_party_assignments) return # Then we should be checking that a valid party membership # exists, or create a new one otherwise: mz_party = get_matching_party(api_party_name, **options) need_to_create_party_position = True for party_position in (p for p in current_party_positions if p.organisation == mz_party): # If there's a current position in this party, that's fine # - just make sure that the end_date is 'future': if party_position.end_date != future_approximate_date: party_position.end_date = future_approximate_date print >> sys.stderr, "* Setting a party position end_date to 'future' for %s in %s" % ( person, api_party) maybe_save(party_position, **options) need_to_create_party_position = False for party_position in (p for p in current_party_positions if p.organisation != mz_party): # These shouldn't be current any more - end them when we # got the new data: party_position.end_date = yesterday_approximate_date print >> sys.stderr, "* Ending a party position yesterday for %s: no longer in %s" % ( person, party_position.organisation) maybe_save(party_position, **options) if need_to_create_party_position: new_position = Position(title=PositionTitle.objects.get(name='Member'), organisation=mz_party, category='political', person=person, start_date=today_approximate_date, end_date=future_approximate_date) print >> sys.stderr, "* Creating a party position that needs to exist: %s should be in %s" % ( person, mz_party) maybe_save(new_position, **options)
def update_parties(person, api_party, **options): current_party_positions = person.position_set.all().currently_active().filter(title__slug='member').filter(organisation__kind__slug='party') if 'name' in api_party: api_party_name = api_party['name'] else: api_party_name = 'Independent Aspirant' # If there's a current assignment to another party, issue a warning: current_party_assignments = [p for p in current_party_positions if p.organisation.slug != 'independent-aspirant'] if current_party_assignments: print "Not resetting to Independent Aspirant someone who's in a party %s: %s" % (person, current_party_assignments) return # Then we should be checking that a valid party membership # exists, or create a new one otherwise: mz_party = get_matching_party(api_party_name, **options) need_to_create_party_position = True for party_position in (p for p in current_party_positions if p.organisation == mz_party): # If there's a current position in this party, that's fine # - just make sure that the end_date is 'future': if party_position.end_date != future_approximate_date: party_position.end_date = future_approximate_date print >> sys.stderr, "* Setting a party position end_date to 'future' for %s in %s" % (person, api_party) maybe_save(party_position, **options) need_to_create_party_position = False for party_position in (p for p in current_party_positions if p.organisation != mz_party): # These shouldn't be current any more - end them when we # got the new data: party_position.end_date = yesterday_approximate_date print >> sys.stderr, "* Ending a party position yesterday for %s: no longer in %s" % (person, party_position.organisation) maybe_save(party_position, **options) if need_to_create_party_position: new_position = Position(title=PositionTitle.objects.get(name='Member'), organisation=mz_party, category='political', person=person, start_date=today_approximate_date, end_date=future_approximate_date) print >> sys.stderr, "* Creating a party position that needs to exist: %s should be in %s" % (person, mz_party) maybe_save(new_position, **options)
def update_candidates_for_place(place_name, place_kind, parliamentary_session, title, race_type, candidates, same_person_checker, **options): all_updates_succeeded = True place = get_matching_place(place_name, place_kind, parliamentary_session) current_aspirants = Position.objects.filter(place=place, title=title).currently_active() print "%s %s %s %s %s" % (place_name, place_kind, parliamentary_session, title, race_type) def full_name(c): return normalize_name(c['other_name']) + " " + normalize_name(c['surname']) code_to_existing_aspirant = dict((a.external_id, a) for a in current_aspirants if a.external_id) code_to_current_candidates = dict((c['code'], c) for c in candidates) existing_aspirant_codes = set(code_to_existing_aspirant.keys()) current_candidate_codes = set(code_to_current_candidates.keys()) existing_aspirants_to_remove = existing_aspirant_codes - current_candidate_codes new_candidates_to_add = current_candidate_codes - existing_aspirant_codes matched_candidate_codes = current_candidate_codes & existing_aspirant_codes # Add each candidate that wasn't already present: for code in new_candidates_to_add: candidate = code_to_current_candidates[code] first_names = normalize_name(candidate['other_name'] or '') surname = normalize_name(candidate['surname'] or '') person = get_person_from_names(first_names, surname) # If that person was an existing current aspirant, they'll # just need to have the IEBC candidate code set: iebc_code_just_needed_setting = False for matching_existing_aspirant_position in current_aspirants.filter(person=person): if matching_existing_aspirant_position.external_id: if matching_existing_aspirant_position.external_id != code: print " original: '%s'" % (matching_existing_aspirant_position.external_id,) print " candidate: '%s'" % (code,) format_tuple = (matching_existing_aspirant_position.person, matching_existing_aspirant_position.external_id, person, code) message = "There was an existing candidate (%s - %s) with a name match for (%s - %s) but different codes" % format_tuple # raise Exception, message print message else: matching_existing_aspirant_position.external_id = code print >> sys.stderr, "* The IEBC code was missing for %s" % (matching_existing_aspirant_position,) maybe_save(matching_existing_aspirant_position, **options) update_parties(person, candidate['party'], **options) iebc_code_just_needed_setting = True if iebc_code_just_needed_setting: continue # If we have a person match, but they weren't a current # aspirant in this race, then be careful that we're not # mismatching using the SamePersonChecker: if person: same_person = same_person_checker.check_same_and_update(candidate, place, race_type, person) if same_person is None: all_updates_succeeded = False continue if not person: print >> sys.stderr, "* Creating a person because none matched %s %s" % (first_names, surname) person = make_new_person(candidate, **options) assert(person) update_parties(person, candidate['party'], **options) aspirant_position_properties = { 'organisation': Organisation.objects.get(name='REPUBLIC OF KENYA'), 'place': place, 'person': person, 'title': title, 'category': 'political'} # First see if that aspirant position already exists: existing_matching_aspirant_positions = Position.objects.filter(**aspirant_position_properties).currently_active() if existing_matching_aspirant_positions: # If it does, make sure that the IEBC code is set in # external_id, and that the end_date is 'future': for existing_matching_aspirant_position in existing_matching_aspirant_positions: if existing_matching_aspirant_position.external_id != code or existing_matching_aspirant_position.end_date != future_approximate_date: print >> sys.stderr, "* Need to set either the IEBC code or the end date (future) for %s" % (existing_matching_aspirant_position,) existing_matching_aspirant_position.external_id = code existing_matching_aspirant_position.end_date = future_approximate_date maybe_save(existing_matching_aspirant_position, **options) else: # Then we have to create a new position: new_position = Position(start_date=today_approximate_date, end_date=future_approximate_date, external_id=code, **aspirant_position_properties) print >> sys.stderr, "* Creating a missing position: %s" % (new_position,) maybe_save(new_position, **options) # For those aspirants that are no longer current, end their aspirant position: for code in existing_aspirants_to_remove: existing_aspirant_to_remove = code_to_existing_aspirant[code] print >> sys.stderr, "* Removing a no longer current aspirant: %s (position id: %s)" % (existing_aspirant_to_remove, existing_aspirant_to_remove.id,) existing_aspirant_to_remove.end_date = yesterday_approximate_date maybe_save(existing_aspirant_to_remove, **options) # For those aspirants that were already present, just make sure # their party assignments are correct: for code in matched_candidate_codes: # Make sure that the party assignments are correct: person = code_to_existing_aspirant[code].person candidate = code_to_current_candidates[code] update_parties(person, candidate['party'], **options) return all_updates_succeeded
def remove_duplicate_candidates_for_place(place_name, place_kind, parliamentary_session, title, race_type, candidates, **options): place = get_matching_place(place_name, place_kind, parliamentary_session) current_aspirant_positions = Position.objects.filter(place=place, title=title).currently_active() # Create a person to position mapping to check for duplicate # positions for people: person_to_position = defaultdict(list) for current_aspirant_position in current_aspirant_positions.all(): person = current_aspirant_position.person person_to_position[person].append(current_aspirant_position) for person, positions in person_to_position.items(): if len(positions) > 1: # Then there are multiple positions for this person - # use a method for picking the best one that happens # to work for the 8 cases we have here; at the moment it's not positions.sort(key=lambda x: (x.external_id, x.start_date)) best_position = positions[-1] for position in positions: if position != best_position: if options['commit']: position.delete() # Now look for current aspirant positions where the # alternative_name for its person is the legal_name of the person # associated with another current aspirant position - remove the # latter. real_person_to_others = defaultdict(set) for current_aspirant_position in current_aspirant_positions: other_aspirant_positions = set(current_aspirant_positions) other_aspirant_positions.discard(current_aspirant_position) for other_aspirant_position in other_aspirant_positions: current_person = current_aspirant_position.person other_person = other_aspirant_position.person if person_replaces_other_person(current_person, other_person): real_person_to_others[current_aspirant_position].add(other_aspirant_position) if real_person_to_others: print "------------------------------------------------------------------------" print "Removing duplicates for %s (%s)" % (place_name, race_type) removed_duplicate_position_for_same_person = False for real_person_position, other_position_set in real_person_to_others.items(): if removed_duplicate_position_for_same_person: # We can't risk deleting both positions referring to # the same person, so if we've done one just break: break print real_person_position iebc_codes = set(op.external_id for op in other_position_set if op.external_id) for other_position in other_position_set: print " <=", other_position if iebc_codes: if len(iebc_codes) > 1: raise Exception, "Non-unique IEBC codes %s" % (iebc_codes,) else: print " IEBC codes were:", iebc_codes unique_iebc_code = iter(iebc_codes).next() if real_person_position.external_id and (real_person_position.external_id != unique_iebc_code): raise Exception, "Would change the IEBC code, but it conflicted" real_person_position.external_id = unique_iebc_code maybe_save(real_person_position, **options) for other_position in other_position_set: other_person = other_position.person same_person = (real_person_position.person == other_person) if same_person: removed_duplicate_position_for_same_person = True # Then don't delete the other person, just delete the position: if options['commit']: other_position.delete() else: # Otherwise delete both the position and the person: if other_position.created < before_import_date: print "!! Would be deleting a position %s (%d) created before our first import: %s" % (other_position, other_position.id, other_position.created) if options['commit']: other_position.delete() if other_person.created < before_import_date: print "!! Would be deleting a person %s (%d) created before our first import: %s" % (other_person, other_person.id, other_person.created) # Create a redirect from the old slug: sr = SlugRedirect(content_type=ContentType.objects.get_for_model(Person), old_object_slug=other_person.slug, new_object_id=real_person_position.person.id, new_object=real_person_position.person) maybe_save(sr, **options) # Then finally delete the person: if options['commit']: other_person.delete()
def update_candidates_for_place(place_name, place_kind, parliamentary_session, title, race_type, candidates, same_person_checker, **options): all_updates_succeeded = True place = get_matching_place(place_name, place_kind, parliamentary_session) current_aspirants = Position.objects.filter( place=place, title=title).currently_active() print "%s %s %s %s %s" % (place_name, place_kind, parliamentary_session, title, race_type) def full_name(c): return normalize_name(c['other_name']) + " " + normalize_name( c['surname']) code_to_existing_aspirant = dict( (a.external_id, a) for a in current_aspirants if a.external_id) code_to_current_candidates = dict((c['code'], c) for c in candidates) existing_aspirant_codes = set(code_to_existing_aspirant.keys()) current_candidate_codes = set(code_to_current_candidates.keys()) existing_aspirants_to_remove = existing_aspirant_codes - current_candidate_codes new_candidates_to_add = current_candidate_codes - existing_aspirant_codes matched_candidate_codes = current_candidate_codes & existing_aspirant_codes # Add each candidate that wasn't already present: for code in new_candidates_to_add: candidate = code_to_current_candidates[code] first_names = normalize_name(candidate['other_name'] or '') surname = normalize_name(candidate['surname'] or '') person = get_person_from_names(first_names, surname) # If that person was an existing current aspirant, they'll # just need to have the IEBC candidate code set: iebc_code_just_needed_setting = False for matching_existing_aspirant_position in current_aspirants.filter( person=person): if matching_existing_aspirant_position.external_id: if matching_existing_aspirant_position.external_id != code: print " original: '%s'" % ( matching_existing_aspirant_position.external_id, ) print " candidate: '%s'" % (code, ) format_tuple = ( matching_existing_aspirant_position.person, matching_existing_aspirant_position.external_id, person, code) message = "There was an existing candidate (%s - %s) with a name match for (%s - %s) but different codes" % format_tuple # raise Exception, message print message else: matching_existing_aspirant_position.external_id = code print >> sys.stderr, "* The IEBC code was missing for %s" % ( matching_existing_aspirant_position, ) maybe_save(matching_existing_aspirant_position, **options) update_parties(person, candidate['party'], **options) iebc_code_just_needed_setting = True if iebc_code_just_needed_setting: continue # If we have a person match, but they weren't a current # aspirant in this race, then be careful that we're not # mismatching using the SamePersonChecker: if person: same_person = same_person_checker.check_same_and_update( candidate, place, race_type, person) if same_person is None: all_updates_succeeded = False continue if not person: print >> sys.stderr, "* Creating a person because none matched %s %s" % ( first_names, surname) person = make_new_person(candidate, **options) assert (person) update_parties(person, candidate['party'], **options) aspirant_position_properties = { 'organisation': Organisation.objects.get(name='REPUBLIC OF KENYA'), 'place': place, 'person': person, 'title': title, 'category': 'political' } # First see if that aspirant position already exists: existing_matching_aspirant_positions = Position.objects.filter( **aspirant_position_properties).currently_active() if existing_matching_aspirant_positions: # If it does, make sure that the IEBC code is set in # external_id, and that the end_date is 'future': for existing_matching_aspirant_position in existing_matching_aspirant_positions: if existing_matching_aspirant_position.external_id != code or existing_matching_aspirant_position.end_date != future_approximate_date: print >> sys.stderr, "* Need to set either the IEBC code or the end date (future) for %s" % ( existing_matching_aspirant_position, ) existing_matching_aspirant_position.external_id = code existing_matching_aspirant_position.end_date = future_approximate_date maybe_save(existing_matching_aspirant_position, **options) else: # Then we have to create a new position: new_position = Position(start_date=today_approximate_date, end_date=future_approximate_date, external_id=code, **aspirant_position_properties) print >> sys.stderr, "* Creating a missing position: %s" % ( new_position, ) maybe_save(new_position, **options) # For those aspirants that are no longer current, end their aspirant position: for code in existing_aspirants_to_remove: existing_aspirant_to_remove = code_to_existing_aspirant[code] print >> sys.stderr, "* Removing a no longer current aspirant: %s (position id: %s)" % ( existing_aspirant_to_remove, existing_aspirant_to_remove.id, ) existing_aspirant_to_remove.end_date = yesterday_approximate_date maybe_save(existing_aspirant_to_remove, **options) # For those aspirants that were already present, just make sure # their party assignments are correct: for code in matched_candidate_codes: # Make sure that the party assignments are correct: person = code_to_existing_aspirant[code].person candidate = code_to_current_candidates[code] update_parties(person, candidate['party'], **options) return all_updates_succeeded