Beispiel #1
0
    def handle_noargs(self, **options):

        api_key = hmac.new(settings.IEBC_API_SECRET,
                           "appid=%s" % (settings.IEBC_API_ID, ),
                           hashlib.sha256).hexdigest()

        token_data = get_data(make_api_token_url(settings.IEBC_API_ID,
                                                 api_key))
        token = token_data['token']

        def url(path, query_filter=None):
            """A closure to avoid repeating parameters"""
            return make_api_url(path, settings.IEBC_API_SECRET, token,
                                query_filter)

        # To get all the candidates, we iterate over each county,
        # constituency and ward, and request the candidates for each.

        cache_directory = os.path.join(data_directory, 'api-cache-2013-02-08')

        mkdir_p(cache_directory)

        # ------------------------------------------------------------------------

        print "Getting (incorrect) party names mapping for matching..."

        parties_cache_filename = os.path.join(cache_directory, 'parties')
        party_data = get_data_with_cache(parties_cache_filename,
                                         url('/party/'))

        party_names_api = set(d['name'].strip().encode('utf-8')
                              for d in party_data['parties'])
        party_names_db = set(
            o.name.strip().encode('utf-8')
            for o in Organisation.objects.filter(kind__slug='party'))

        with open(os.path.join(data_directory, 'party-names.csv'), 'w') as fp:
            writer = csv.writer(fp)
            for t in itertools.izip_longest(party_names_api, party_names_db):
                writer.writerow(t)

        # ------------------------------------------------------------------------

        print "Getting (incorrect) ward names mapping for matching..."

        ward_data = get_data(url('/ward/'))

        wards_from_api = sorted(ward['name'].encode('utf-8')
                                for ward in ward_data['region']['locations'])
        wards_from_db = sorted(
            p.name.encode('utf-8')
            for p in Place.objects.filter(kind__slug='ward'))

        with open(os.path.join(data_directory, 'wards-names.csv'), 'w') as fp:
            writer = csv.writer(fp)
            for t in itertools.izip_longest(wards_from_api, wards_from_db):
                writer.writerow(t)

        # ------------------------------------------------------------------------

        headings = [
            'Same/Different', 'API Name', 'API Party', 'API Place',
            'API Candidate Code', 'Mz Legal Name', 'Mz Other Names', 'Mz URL',
            'Mz Parties Ever', 'Mz Aspirant Ever', 'Mz Politician Ever',
            'Mz ID'
        ]

        with open(os.path.join(sys.path[0], 'names-to-check.csv'), 'w') as fp:

            writer = csv.DictWriter(fp, headings)

            writer.writerow(dict((h, h) for h in headings))

            for area_type in 'county', 'constituency', 'ward':
                cache_filename = os.path.join(cache_directory, area_type)
                area_type_data = get_data_with_cache(cache_filename,
                                                     url('/%s/' % (area_type)))
                areas = area_type_data['region']['locations']
                for i, area in enumerate(areas):
                    # Get the candidates for that area:
                    code = area['code']
                    candidates_cache_filename = os.path.join(
                        cache_directory,
                        'candidates-for-' + area_type + '-' + code)
                    candidate_data = get_data_with_cache(
                        candidates_cache_filename,
                        url('/candidate/',
                            query_filter='%s=%s' % (area_type, code)))
                    for race in candidate_data['candidates']:
                        full_race_name = race['race']
                        race_type, place_name = parse_race_name(full_race_name)
                        place_kind, session, title = known_race_type_mapping[
                            race_type]
                        candidates = race['candidates']
                        for candidate in candidates:
                            first_names = candidate['other_name'] or ''
                            surname = candidate['surname'] or ''
                            person = get_person_from_names(
                                first_names, surname)
                            if person:
                                print "Got person match to:", person
                                row = {}
                                row['Same/Different'] = ''
                                row['API Name'] = first_names + ' ' + surname
                                party_data = candidate['party']
                                row['API Party'] = party_data[
                                    'name'] if 'name' in party_data else ''
                                row['API Place'] = '%s (%s)' % (place_name,
                                                                area_type)
                                row['API Candidate Code'] = candidate['code']
                                row['Mz Legal Name'] = person.legal_name
                                row['Mz Other Names'] = person.other_names
                                row['Mz URL'] = 'http://info.mzalendo.com' + person.get_absolute_url(
                                )
                                row['Mz Parties Ever'] = ', '.join(
                                    o.name for o in person.parties_ever())
                                for heading, positions in (
                                    ('Mz Aspirant Ever',
                                     person.aspirant_positions_ever()),
                                    ('Mz Politician Ever',
                                     person.politician_positions_ever())):
                                    row[heading] = ', '.join(
                                        '%s at %s' % (p.title.name, p.place)
                                        for p in positions)
                                row['Mz ID'] = person.id
                                for key, value in row.items():
                                    row[key] = unicode(value).encode('utf-8')
                                writer.writerow(row)
    def handle_noargs(self, **options):

        api_key = hmac.new(settings.IEBC_API_SECRET,
                           "appid=%s" % (settings.IEBC_API_ID,),
                           hashlib.sha256).hexdigest()

        token_data = get_data(make_api_token_url(settings.IEBC_API_ID, api_key))
        token = token_data['token']

        def url(path, query_filter=None):
            """A closure to avoid repeating parameters"""
            return make_api_url(path, settings.IEBC_API_SECRET, token, query_filter)

        # To get all the candidates, we iterate over each county,
        # constituency and ward, and request the candidates for each.

        cache_directory = os.path.join(data_directory, 'api-cache-2013-02-08')

        mkdir_p(cache_directory)

        # ------------------------------------------------------------------------

        print "Getting (incorrect) party names mapping for matching..."

        parties_cache_filename = os.path.join(cache_directory, 'parties')
        party_data = get_data_with_cache(parties_cache_filename, url('/party/'))

        party_names_api = set(d['name'].strip().encode('utf-8') for d in party_data['parties'])
        party_names_db = set(o.name.strip().encode('utf-8') for o in Organisation.objects.filter(kind__slug='party'))

        with open(os.path.join(data_directory, 'party-names.csv'), 'w') as fp:
            writer = csv.writer(fp)
            for t in itertools.izip_longest(party_names_api, party_names_db):
                writer.writerow(t)

        # ------------------------------------------------------------------------

        print "Getting (incorrect) ward names mapping for matching..."

        ward_data = get_data(url('/ward/'))

        wards_from_api = sorted(ward['name'].encode('utf-8') for ward in ward_data['region']['locations'])
        wards_from_db = sorted(p.name.encode('utf-8') for p in Place.objects.filter(kind__slug='ward'))

        with open(os.path.join(data_directory, 'wards-names.csv'), 'w') as fp:
            writer = csv.writer(fp)
            for t in itertools.izip_longest(wards_from_api, wards_from_db):
                writer.writerow(t)

        # ------------------------------------------------------------------------

        headings = ['Same/Different',
                    'API Name',
                    'API Party',
                    'API Place',
                    'API Candidate Code',
                    'Mz Legal Name',
                    'Mz Other Names',
                    'Mz URL',
                    'Mz Parties Ever',
                    'Mz Aspirant Ever',
                    'Mz Politician Ever',
                    'Mz ID']

        with open(os.path.join(sys.path[0], 'names-to-check.csv'), 'w') as fp:

            writer = csv.DictWriter(fp, headings)

            writer.writerow(dict((h, h) for h in headings))

            for area_type in 'county', 'constituency', 'ward':
                cache_filename = os.path.join(cache_directory, area_type)
                area_type_data = get_data_with_cache(cache_filename, url('/%s/' % (area_type)))
                areas = area_type_data['region']['locations']
                for i, area in enumerate(areas):
                    # Get the candidates for that area:
                    code = area['code']
                    candidates_cache_filename = os.path.join(cache_directory, 'candidates-for-' + area_type + '-' + code)
                    candidate_data = get_data_with_cache(candidates_cache_filename, url('/candidate/', query_filter='%s=%s' % (area_type, code)))
                    for race in candidate_data['candidates']:
                        full_race_name = race['race']
                        race_type, place_name = parse_race_name(full_race_name)
                        place_kind, session, title = known_race_type_mapping[race_type]
                        candidates = race['candidates']
                        for candidate in candidates:
                            first_names = candidate['other_name'] or ''
                            surname = candidate['surname'] or ''
                            person = get_person_from_names(first_names, surname)
                            if person:
                                print "Got person match to:", person
                                row = {}
                                row['Same/Different'] = ''
                                row['API Name'] = first_names + ' ' + surname
                                party_data = candidate['party']
                                row['API Party'] = party_data['name'] if 'name' in party_data else ''
                                row['API Place'] = '%s (%s)' % (place_name, area_type)
                                row['API Candidate Code'] = candidate['code']
                                row['Mz Legal Name'] = person.legal_name
                                row['Mz Other Names'] = person.other_names
                                row['Mz URL'] = 'http://info.mzalendo.com' + person.get_absolute_url()
                                row['Mz Parties Ever'] = ', '.join(o.name for o in person.parties_ever())
                                for heading, positions in (('Mz Aspirant Ever', person.aspirant_positions_ever()),
                                                           ('Mz Politician Ever', person.politician_positions_ever())):
                                    row[heading] = ', '.join('%s at %s' % (p.title.name, p.place) for p in positions)
                                row['Mz ID'] = person.id
                                for key, value in row.items():
                                    row[key] = unicode(value).encode('utf-8')
                                writer.writerow(row)
def update_candidates_for_place(place_name,
                                place_kind,
                                parliamentary_session,
                                title,
                                race_type,
                                candidates,
                                same_person_checker,
                                **options):

    all_updates_succeeded = True

    place = get_matching_place(place_name, place_kind, parliamentary_session)

    current_aspirants = Position.objects.filter(place=place, title=title).currently_active()

    print "%s %s %s %s %s" % (place_name,
                              place_kind,
                              parliamentary_session,
                              title,
                              race_type)

    def full_name(c):
        return normalize_name(c['other_name']) + " " + normalize_name(c['surname'])

    code_to_existing_aspirant = dict((a.external_id, a) for a in current_aspirants if a.external_id)
    code_to_current_candidates = dict((c['code'], c) for c in candidates)

    existing_aspirant_codes = set(code_to_existing_aspirant.keys())
    current_candidate_codes = set(code_to_current_candidates.keys())

    existing_aspirants_to_remove = existing_aspirant_codes - current_candidate_codes
    new_candidates_to_add = current_candidate_codes - existing_aspirant_codes

    matched_candidate_codes = current_candidate_codes & existing_aspirant_codes

    # Add each candidate that wasn't already present:

    for code in new_candidates_to_add:
        candidate = code_to_current_candidates[code]
        first_names = normalize_name(candidate['other_name'] or '')
        surname = normalize_name(candidate['surname'] or '')
        person = get_person_from_names(first_names, surname)
        # If that person was an existing current aspirant, they'll
        # just need to have the IEBC candidate code set:
        iebc_code_just_needed_setting = False
        for matching_existing_aspirant_position in current_aspirants.filter(person=person):
            if matching_existing_aspirant_position.external_id:
                if matching_existing_aspirant_position.external_id != code:
                    print "     original: '%s'" % (matching_existing_aspirant_position.external_id,)
                    print "    candidate: '%s'" % (code,)
                    format_tuple = (matching_existing_aspirant_position.person,
                                    matching_existing_aspirant_position.external_id,
                                    person,
                                    code)
                    message = "There was an existing candidate (%s - %s) with a name match for (%s - %s) but different codes" % format_tuple
                    # raise Exception, message
                    print message
            else:
                matching_existing_aspirant_position.external_id = code
                print >> sys.stderr, "* The IEBC code was missing for %s" % (matching_existing_aspirant_position,)
                maybe_save(matching_existing_aspirant_position, **options)
                update_parties(person, candidate['party'], **options)
                iebc_code_just_needed_setting = True
        if iebc_code_just_needed_setting:
            continue
        # If we have a person match, but they weren't a current
        # aspirant in this race, then be careful that we're not
        # mismatching using the SamePersonChecker:
        if person:
            same_person = same_person_checker.check_same_and_update(candidate,
                                                                    place,
                                                                    race_type,
                                                                    person)
            if same_person is None:
                all_updates_succeeded = False
                continue
        if not person:
            print >> sys.stderr, "* Creating a person because none matched %s %s" % (first_names, surname)
            person = make_new_person(candidate, **options)

        assert(person)

        update_parties(person, candidate['party'], **options)

        aspirant_position_properties = {
            'organisation': Organisation.objects.get(name='REPUBLIC OF KENYA'),
            'place': place,
            'person': person,
            'title': title,
            'category': 'political'}

        # First see if that aspirant position already exists:
        existing_matching_aspirant_positions = Position.objects.filter(**aspirant_position_properties).currently_active()

        if existing_matching_aspirant_positions:
            # If it does, make sure that the IEBC code is set in
            # external_id, and that the end_date is 'future':
            for existing_matching_aspirant_position in existing_matching_aspirant_positions:
                if existing_matching_aspirant_position.external_id != code or existing_matching_aspirant_position.end_date != future_approximate_date:
                    print >> sys.stderr, "* Need to set either the IEBC code or the end date (future) for %s" % (existing_matching_aspirant_position,)
                    existing_matching_aspirant_position.external_id = code
                    existing_matching_aspirant_position.end_date = future_approximate_date
                    maybe_save(existing_matching_aspirant_position, **options)
        else:
            # Then we have to create a new position:
            new_position = Position(start_date=today_approximate_date,
                                    end_date=future_approximate_date,
                                    external_id=code,
                                    **aspirant_position_properties)
            print >> sys.stderr, "* Creating a missing position: %s" % (new_position,)
            maybe_save(new_position, **options)

    # For those aspirants that are no longer current, end their aspirant position:

    for code in existing_aspirants_to_remove:
        existing_aspirant_to_remove = code_to_existing_aspirant[code]
        print >> sys.stderr, "* Removing a no longer current aspirant: %s (position id: %s)" % (existing_aspirant_to_remove, existing_aspirant_to_remove.id,)
        existing_aspirant_to_remove.end_date = yesterday_approximate_date
        maybe_save(existing_aspirant_to_remove, **options)

    # For those aspirants that were already present, just make sure
    # their party assignments are correct:

    for code in matched_candidate_codes:
        # Make sure that the party assignments are correct:
        person = code_to_existing_aspirant[code].person
        candidate = code_to_current_candidates[code]
        update_parties(person, candidate['party'], **options)

    return all_updates_succeeded
Beispiel #4
0
class Command(NoArgsCommand):
    help = 'Update the database with aspirants from the IEBC website'

    option_list = NoArgsCommand.option_list + (make_option(
        '--commit',
        action='store_true',
        dest='commit',
        help='Actually update the database'), )

    def handle_noargs(self, **options):

        api_key = hmac.new(settings.IEBC_API_SECRET,
                           "appid=%s" % (settings.IEBC_API_ID, ),
                           hashlib.sha256).hexdigest()

        token_data = get_data(make_api_token_url(settings.IEBC_API_ID,
                                                 api_key))
        token = token_data['token']

        def url(path, query_filter=None):
            """A closure to avoid repeating parameters"""
            return make_api_url(path, settings.IEBC_API_SECRET, token,
                                query_filter)

        # To get all the candidates, we iterate over each county,
        # constituency and ward, and request the candidates for each.

        cache_directory = os.path.join(data_directory, 'api-cache-2013-02-08')

        mkdir_p(cache_directory)

        parties_cache_filename = os.path.join(cache_directory, 'parties')
        party_data = get_data_with_cache(parties_cache_filename,
                                         url('/party/'),
                                         only_from_cache=True)
        for party in party_data['parties']:
            party_api_code = party['code']
            try:
                party_organisation = get_matching_party(party['name'],
                                                        create=False,
                                                        **options)
            except CommandError, ce:
                print >> sys.stderr, "Not setting the API code: %s" % (ce, )
            party_organisation.external_id = party_api_code
            maybe_save(party_organisation, **options)

        print "########################################################################"

        for area_type in 'county', 'constituency', 'ward':
            cache_filename = os.path.join(cache_directory, area_type)
            area_type_data = get_data_with_cache(cache_filename,
                                                 url('/%s/' % (area_type)),
                                                 only_from_cache=True)
            areas = area_type_data['region']['locations']
            for i, area in enumerate(areas):
                # Get the candidates for that area:
                code = area['code']
                candidates_cache_filename = os.path.join(
                    cache_directory,
                    'candidates-for-' + area_type + '-' + code)
                candidate_data = get_data_with_cache(candidates_cache_filename,
                                                     url('/candidate/',
                                                         query_filter='%s=%s' %
                                                         (area_type, code)),
                                                     only_from_cache=True)
                # print "got candidate_data:", candidate_data
                for race in candidate_data['candidates']:
                    full_race_name = race['race']
                    race_type, place_name = parse_race_name(full_race_name)
                    place_kind, session, title = known_race_type_mapping[
                        race_type]
                    place = get_matching_place(place_name, place_kind, session)
                    place.external_id = code
                    maybe_save(place, **options)
                    candidates = race['candidates']
                    for candidate in candidates:
                        first_names = candidate['other_name'] or ''
                        surname = candidate['surname'] or ''
                        person = get_person_from_names(first_names, surname)
                        if not person:
                            raise Exception, "Failed to find the person from '%s' '%s'" % (
                                first_names, surname)
                        aspirant_position_properties = {
                            'organisation':
                            Organisation.objects.get(name='REPUBLIC OF KENYA'),
                            'place':
                            place,
                            'person':
                            person,
                            'title':
                            title,
                            'category':
                            'political'
                        }
                        positions = Position.objects.filter(
                            **aspirant_position_properties).currently_active()
                        if positions:
                            if len(positions) > 1:
                                print >> sys.stderr, "There were multiple (%d) matches for: %s" % (
                                    len(positions),
                                    aspirant_position_properties,
                                )
                            for position in positions:
                                position.external_id = candidate['code']
                                maybe_save(position, **options)
                        else:
                            message = "There were no matches for %s" % (
                                aspirant_position_properties, )
                            # raise Exception, message
                            print >> sys.stderr, message
def update_candidates_for_place(place_name, place_kind, parliamentary_session,
                                title, race_type, candidates,
                                same_person_checker, **options):

    all_updates_succeeded = True

    place = get_matching_place(place_name, place_kind, parliamentary_session)

    current_aspirants = Position.objects.filter(
        place=place, title=title).currently_active()

    print "%s %s %s %s %s" % (place_name, place_kind, parliamentary_session,
                              title, race_type)

    def full_name(c):
        return normalize_name(c['other_name']) + " " + normalize_name(
            c['surname'])

    code_to_existing_aspirant = dict(
        (a.external_id, a) for a in current_aspirants if a.external_id)
    code_to_current_candidates = dict((c['code'], c) for c in candidates)

    existing_aspirant_codes = set(code_to_existing_aspirant.keys())
    current_candidate_codes = set(code_to_current_candidates.keys())

    existing_aspirants_to_remove = existing_aspirant_codes - current_candidate_codes
    new_candidates_to_add = current_candidate_codes - existing_aspirant_codes

    matched_candidate_codes = current_candidate_codes & existing_aspirant_codes

    # Add each candidate that wasn't already present:

    for code in new_candidates_to_add:
        candidate = code_to_current_candidates[code]
        first_names = normalize_name(candidate['other_name'] or '')
        surname = normalize_name(candidate['surname'] or '')
        person = get_person_from_names(first_names, surname)
        # If that person was an existing current aspirant, they'll
        # just need to have the IEBC candidate code set:
        iebc_code_just_needed_setting = False
        for matching_existing_aspirant_position in current_aspirants.filter(
                person=person):
            if matching_existing_aspirant_position.external_id:
                if matching_existing_aspirant_position.external_id != code:
                    print "     original: '%s'" % (
                        matching_existing_aspirant_position.external_id, )
                    print "    candidate: '%s'" % (code, )
                    format_tuple = (
                        matching_existing_aspirant_position.person,
                        matching_existing_aspirant_position.external_id,
                        person, code)
                    message = "There was an existing candidate (%s - %s) with a name match for (%s - %s) but different codes" % format_tuple
                    # raise Exception, message
                    print message
            else:
                matching_existing_aspirant_position.external_id = code
                print >> sys.stderr, "* The IEBC code was missing for %s" % (
                    matching_existing_aspirant_position, )
                maybe_save(matching_existing_aspirant_position, **options)
                update_parties(person, candidate['party'], **options)
                iebc_code_just_needed_setting = True
        if iebc_code_just_needed_setting:
            continue
        # If we have a person match, but they weren't a current
        # aspirant in this race, then be careful that we're not
        # mismatching using the SamePersonChecker:
        if person:
            same_person = same_person_checker.check_same_and_update(
                candidate, place, race_type, person)
            if same_person is None:
                all_updates_succeeded = False
                continue
        if not person:
            print >> sys.stderr, "* Creating a person because none matched %s %s" % (
                first_names, surname)
            person = make_new_person(candidate, **options)

        assert (person)

        update_parties(person, candidate['party'], **options)

        aspirant_position_properties = {
            'organisation': Organisation.objects.get(name='REPUBLIC OF KENYA'),
            'place': place,
            'person': person,
            'title': title,
            'category': 'political'
        }

        # First see if that aspirant position already exists:
        existing_matching_aspirant_positions = Position.objects.filter(
            **aspirant_position_properties).currently_active()

        if existing_matching_aspirant_positions:
            # If it does, make sure that the IEBC code is set in
            # external_id, and that the end_date is 'future':
            for existing_matching_aspirant_position in existing_matching_aspirant_positions:
                if existing_matching_aspirant_position.external_id != code or existing_matching_aspirant_position.end_date != future_approximate_date:
                    print >> sys.stderr, "* Need to set either the IEBC code or the end date (future) for %s" % (
                        existing_matching_aspirant_position, )
                    existing_matching_aspirant_position.external_id = code
                    existing_matching_aspirant_position.end_date = future_approximate_date
                    maybe_save(existing_matching_aspirant_position, **options)
        else:
            # Then we have to create a new position:
            new_position = Position(start_date=today_approximate_date,
                                    end_date=future_approximate_date,
                                    external_id=code,
                                    **aspirant_position_properties)
            print >> sys.stderr, "* Creating a missing position: %s" % (
                new_position, )
            maybe_save(new_position, **options)

    # For those aspirants that are no longer current, end their aspirant position:

    for code in existing_aspirants_to_remove:
        existing_aspirant_to_remove = code_to_existing_aspirant[code]
        print >> sys.stderr, "* Removing a no longer current aspirant: %s (position id: %s)" % (
            existing_aspirant_to_remove,
            existing_aspirant_to_remove.id,
        )
        existing_aspirant_to_remove.end_date = yesterday_approximate_date
        maybe_save(existing_aspirant_to_remove, **options)

    # For those aspirants that were already present, just make sure
    # their party assignments are correct:

    for code in matched_candidate_codes:
        # Make sure that the party assignments are correct:
        person = code_to_existing_aspirant[code].person
        candidate = code_to_current_candidates[code]
        update_parties(person, candidate['party'], **options)

    return all_updates_succeeded