def handle_label(self, output_filename, **options): api = create_popit_api_object() ntf = NamedTemporaryFile( delete=False, dir=dirname(output_filename) ) all_parties = [] for party in popit_unwrap_pagination( api.organizations, embed='', per_page=200 ): if party['classification'] != 'Party': continue # These URLs are specific to the server this data is # extracted from: del party['url'] del party['html_url'] # Similarly the URLs of images: party.pop('image', None) party.pop('proxy_image', None) party.pop('images', None) # The generated id in each identifier will be different # between systems (or between runs on the same system) so # just produces noisy diffs if we include it. for identifier in party.get('identifiers', []): identifier.pop('id', None) all_parties.append(party) all_parties.sort(key=lambda p: p['id']) # Output to a string so that we can strip any trainling whitespace. output = json.dumps(all_parties, sort_keys=True, indent=4) output = re.sub(r'(?ms)\s*$', '', output) ntf.write(output) rename(ntf.name, output_filename)
def handle(self, **options): joint_party_to_sub_parties = defaultdict(list) api = create_popit_api_object() for party in popit_unwrap_pagination( api.organizations, embed='', per_page=200 ): if party['classification'] != 'Party': continue if 'dissolution_date' in party: dissolution_date = party['dissolution_date'] if dissolution_date < str(date.today()): continue for d in party.get('descriptions', []): m = joint_description_re.search(d['description']) if not m: continue joint_name = fix_joint_party_name(m.group('joint_name')) joint_party_to_sub_parties[joint_name].append(party) for joint_name, sub_parties in joint_party_to_sub_parties.items(): if len(sub_parties) < 2: message = 'Ignoring "{joint_name}" (only made up of one party: "{sub_party}")' print message.format( joint_name=joint_name, sub_party=sub_parties[0]['name'] ) continue create_or_update_party(api, joint_name, sub_parties)
def handle(self, **options): for person_data in popit_unwrap_pagination( self.api.persons, embed='', per_page=100 ): msg = "Person {0}persons/{1}" print msg.format(get_base_url(), person_data['id']) strip_bogus_fields( person_data, [ 'founding_date', 'dissolution_date', 'start_date', 'end_date' ] ) for image in person_data.get('images', []): strip_bogus_fields( image, [ 'birth_date', 'death_date', 'founding_date', 'dissolution_date', 'start_date', 'end_date' ] ) person = PopItPerson.create_from_dict(person_data) person.save_to_popit(self.api) person.invalidate_cache_entries()
def handle_label(self, output_filename, **options): api = create_popit_api_object() ntf = NamedTemporaryFile(delete=False, dir=dirname(output_filename)) all_parties = [] for party in popit_unwrap_pagination(api.organizations, embed='', per_page=200): if party['classification'] != 'Party': continue if 'dissolution_date' in party: dissolution_date = party['dissolution_date'] if dissolution_date < str(date.today()): continue # These URLs are specific to the server this data is # extracted from: del party['url'] del party['html_url'] # Similarly the URLs of images: party.pop('image', None) party.pop('proxy_image', None) party.pop('images', None) # The generated id in each identifier will be different # between systems (or between runs on the same system) so # just produces noisy diffs if we include it. for identifier in party.get('identifiers', []): identifier.pop('id', None) all_parties.append(party) all_parties.sort(key=lambda p: p['id']) # Output to a string so that we can strip any trainling whitespace. output = json.dumps(all_parties, sort_keys=True, indent=4) output = re.sub(r'(?ms)\s*$', '', output) ntf.write(output) rename(ntf.name, output_filename)
def handle(self, **options): for election, election_data in settings.ELECTIONS.items(): for membership in popit_unwrap_pagination( self.api.memberships, embed='', per_page=100 ): if membership.get('role', '') != election_data['candidate_membership_role']: continue if not membership_covers_date( membership, election_data['election_date'] ): continue if membership.get('election', '') == election: # Then the attribute is already correct, don't # bother setting it. continue membership['election'] = election # Some of these memberships have a spurious 'area' # attribute, set to: {'area': {'name': ''}} which will # cause the PUT to fail, so remove that. existing_area = membership.get('area') if existing_area is not None: if existing_area == {'name': ''}: del membership['area'] try: self.api.memberships(membership['id']).put(membership) except (HttpServerError, HttpClientError) as e: message = "There was an error when putting to membership: {0}" print message.format(membership['id']) print json.dumps(membership, indent=True, sort_keys=True) print "The error was:" print e.content
def handle(self, **options): for person in popit_unwrap_pagination( self.api.persons, embed='', per_page=100 ): msg = "Person {0}persons/{1}" print msg.format(self.get_base_url(), person['id']) strip_bogus_fields( person, [ 'founding_date', 'dissolution_date', 'start_date', 'end_date' ] ) for image in person.get('images', []): strip_bogus_fields( image, [ 'birth_date', 'death_date', 'founding_date', 'dissolution_date', 'start_date', 'end_date' ] ) try: self.api.persons(person['id']).put(person) except HttpClientError as e: print "HttpClientError", e.content sys.exit(1) invalidate_cache_entries_from_person_data(person)
def handle(self, **options): for person in popit_unwrap_pagination(self.api.persons, embed='', per_page=100): needs_update = False for version in person.get('versions', []): data = version['data'] if data.get('last_party'): needs_update = True msg = "Fixing person {0}persons/{1}" print msg.format(self.get_base_url(), person['id']) del data['last_party'] if not needs_update: continue for image in person.get('images', []): image.pop('_id', None) # Some images have an empty 'created' field, which # causes an Elasticsearch indexing error, so remove # that if it's the case: if not image.get('created'): image.pop('created', None) fix_dates(person) try: self.api.persons(person['id']).put(person) except HttpClientError as e: print "HttpClientError", e.content sys.exit(1)
def handle(self, **options): joint_party_to_sub_parties = defaultdict(list) api = create_popit_api_object() for party in popit_unwrap_pagination(api.organizations, embed='', per_page=200): if party['classification'] != 'Party': continue if 'dissolution_date' in party: dissolution_date = party['dissolution_date'] if dissolution_date < str(date.today()): continue for d in party.get('descriptions', []): m = joint_description_re.search(d['description']) if not m: continue joint_name = fix_joint_party_name(m.group('joint_name')) joint_party_to_sub_parties[joint_name].append(party) for joint_name, sub_parties in joint_party_to_sub_parties.items(): if len(sub_parties) < 2: message = 'Ignoring "{joint_name}" (only made up of one party: "{sub_party}")' print message.format(joint_name=joint_name, sub_party=sub_parties[0]['name']) continue create_or_update_party(api, joint_name, sub_parties)
def handle(self, **options): all_people = [] for person_dict in popit_unwrap_pagination( self.api.persons, embed="membership.organization", per_page=100, ): if person_dict.get('standing_in') \ and person_dict['standing_in'].get(options['year']): person = PopItPerson.create_from_dict(person_dict) all_people.append(person.as_dict(year=options['year'])) csv = list_to_csv(all_people) # Write to stdout if no output filename is specified, or if it # is '-' if options['output_filename'] in (None, '-'): with sys.stdout as f: f.write(csv) else: # Otherwise write to a temporary file and atomically # rename into place: ntf = NamedTemporaryFile( delete=False, dir=dirname(options['output_filename']) ) ntf.write(csv) chmod(ntf.name, 0o644) rename(ntf.name, options['output_filename'])
def handle(self, **options): for election, election_data in settings.ELECTIONS.items(): for membership in popit_unwrap_pagination(self.api.memberships, embed='', per_page=100): if membership.get( 'role', '') != election_data['candidate_membership_role']: continue if not membership_covers_date(membership, election_data['election_date']): continue if membership.get('election', '') == election: # Then the attribute is already correct, don't # bother setting it. continue membership['election'] = election # Some of these memberships have a spurious 'area' # attribute, set to: {'area': {'name': ''}} which will # cause the PUT to fail, so remove that. existing_area = membership.get('area') if existing_area is not None: if existing_area == {'name': ''}: del membership['area'] try: self.api.memberships(membership['id']).put(membership) except (HttpServerError, HttpClientError) as e: message = "There was an error when putting to membership: {0}" print message.format(membership['id']) print json.dumps(membership, indent=True, sort_keys=True) print "The error was:" print e.content
def handle(self, **options): for collection in ('organization', 'person'): api_collection = getattr(self.api, collection + 's') message = "{titled} {base_url}{plural}/{id}" for item in popit_unwrap_pagination(api_collection, embed='', per_page=100): print message.format(titled=collection.title(), base_url=get_base_url(), plural=(collection + "s"), id=item['id']) for image in item.get('images', []): print " Image with URL:", image['url'] fix_image(image) # Some images have an empty 'created' field, which # causes an Elasticsearch indexing error, so change it # to null if that's the case: if not image.get('created'): image['created'] = None fix_dates(item) try: api_collection(item['id']).put(item) except HttpClientError as e: print "HttpClientError", e.content sys.exit(1) # If this is a person, make sure that the # corresponding cache entries are invalidated: if collection == 'person': person = PopItPerson.create_from_dict(item) person.invalidate_cache_entries()
def handle(self, **options): for person_popit_data in popit_unwrap_pagination( self.api.persons, per_page=100 ): # Exclude anyone who doesn't have a parlparse ID; this is # more or less the incumbents (by-elections causing the # "more or less" bit.) if not get_parlparse_id(person_popit_data): continue if '2015' in person_popit_data['standing_in']: print "We already have 2015 information for", person_popit_data['name'] continue print "Considering person:", person_popit_data['name'] person_data, _ = self.get_person(person_popit_data['id']) cons_id = person_data['standing_in']['2010']['post_id'] party_id = person_data['party_memberships']['2010']['id'] party_name = person_data['party_memberships']['2010']['name'] cons_url = 'https://yournextmp.com/constituency/{0}'.format(cons_id) # We're now considering marking the candidate as standing # again in the same consituency as in 2010. Check that # there's not another candidate from their party already # marked as standing in that consituency. if self.existing_candidate_same_party(cons_id, party_id): msg = u"There was already a candidate for {0} in {1} - skipping" print msg.format(party_name, cons_url).format('utf-8') continue # Now it should be safe to update the candidate and set # them as standing in 2015. previous_versions = person_data.pop('versions') person_data['standing_in']['2015'] = person_data['standing_in']['2010'] person_data['party_memberships']['2015'] = person_data['party_memberships']['2010'] change_metadata = self.get_change_metadata( None, "Assuming that incumbents we don't have definite information on yet are standing again", ) self.update_person( person_data, change_metadata, previous_versions, ) invalidate_cache_entries_from_person_data(person_data) message = u"Marked an incumbent ({name} - {party}) as standing again in {cons_url}" print message.format( name=person_data['name'], party=party_name, cons_url=cons_url, ).encode('utf-8')
def handle(self, **options): all_people = [] for person_dict in popit_unwrap_pagination( self.api.persons, embed="membership.organization", per_page=100, ): if person_dict.get('standing_in') \ and person_dict['standing_in'].get(options['year']): person = PopItPerson.create_from_dict(person_dict) all_people.append(person.as_dict(year=options['year'])) csv = list_to_csv(all_people) # Write to stdout if no output filename is specified, or if it # is '-' if options['output_filename'] in (None, '-'): with sys.stdout as f: f.write(csv) else: # Otherwise write to a temporary file and atomically # rename into place: ntf = NamedTemporaryFile(delete=False, dir=dirname(options['output_filename'])) ntf.write(csv) chmod(ntf.name, 0o644) rename(ntf.name, options['output_filename'])
def handle(self, **options): for person in popit_unwrap_pagination(self.api.persons, embed='', per_page=100): msg = "Person {0}persons/{1}" print msg.format(self.get_base_url(), person['id']) strip_bogus_fields(person, [ 'founding_date', 'dissolution_date', 'start_date', 'end_date' ]) for image in person.get('images', []): image.pop('_id', None) # Some images have an empty 'created' field, which # causes an Elasticsearch indexing error, so remove # that if it's the case: if not image.get('created'): image.pop('created', None) strip_bogus_fields(image, [ 'birth_date', 'death_date', 'founding_date', 'dissolution_date', 'start_date', 'end_date' ]) fix_dates(person) try: self.api.persons(person['id']).put(person) except HttpClientError as e: print "HttpClientError", e.content sys.exit(1)
def handle(self, **options): for collection in ('organization', 'person'): api_collection = getattr(self.api, collection + 's') message = "{titled} {base_url}{plural}/{id}" for item in popit_unwrap_pagination( api_collection, embed='', per_page=100 ): print message.format( titled=collection.title(), base_url=self.get_base_url(), plural=(collection + "s"), id=item['id'] ) for image in item.get('images', []): print " Image with URL:", image['url'] fix_image(image) # Some images have an empty 'created' field, which # causes an Elasticsearch indexing error, so change it # to null if that's the case: if not image.get('created'): image['created'] = None fix_dates(item) try: api_collection(item['id']).put(item) except HttpClientError as e: print "HttpClientError", e.content sys.exit(1) if collection == 'person': invalidate_cache_entries_from_person_data(item)
def handle(self, **options): for person in popit_unwrap_pagination(self.api.persons, per_page=100): print u"Stripping IP addresses from {name} ({id})".format( **person).encode('utf-8') for version in person.get('versions', []): version.pop('ip', None) self.api.persons(person['id']).put(person)
def handle(self, **options): print "Finding the maximum person ID from PopIt" max_person_id = -1 for person in popit_unwrap_pagination(self.api.persons, per_page=100): person_id = int(person['id']) max_person_id = max(person_id, max_person_id) print "Setting maximum PopIt person ID to:", max_person_id MaxPopItIds.update_max_persons_id(max_person_id)
def handle(self, **options): for o in popit_unwrap_pagination(self.api.organizations, per_page=100): if o['classification'] != 'Party': continue print o['name'] for image in o.get('images', []): print " DELETE", image['_id'] self.api.organizations(o['id']).image(image['_id']).delete()
def handle(self, **options): max_person_id = -1 for person in popit_unwrap_pagination( self.api.persons, per_page=100 ): person_id = int(person['id']) max_person_id = max(person_id, max_person_id) print "Maximum person ID is:", max_person_id
def handle(self, **options): for person_popit_data in popit_unwrap_pagination(self.api.persons, per_page=100): # Exclude anyone who doesn't have a parlparse ID; this is # more or less the incumbents (by-elections causing the # "more or less" bit.) if not get_parlparse_id(person_popit_data): continue if '2015' in person_popit_data['standing_in']: print "We already have 2015 information for", person_popit_data[ 'name'] continue person = PopItPerson.create_from_dict(person_popit_data) print "Considering person:", person.name cons_id = person.standing_in['2010']['post_id'] party_id = person.party_memberships['2010']['id'] party_name = person.party_memberships['2010']['name'] cons_url = 'https://yournextmp.com/constituency/{0}'.format( cons_id) # We're now considering marking the candidate as standing # again in the same consituency as in 2010. Check that # there's not another candidate from their party already # marked as standing in that consituency. if self.existing_candidate_same_party(cons_id, party_id): msg = u"There was already a candidate for {0} in {1} - skipping" print msg.format(party_name, cons_url).format('utf-8') continue # Now it should be safe to update the candidate and set # them as standing in 2015. person.standing_in['2015'] = person.standing_in['2010'] person.party_memberships['2015'] = person.party_memberships['2010'] person.record_version( get_change_metadata( None, "Assuming that incumbents we don't have definite information on yet are standing again", )) person.save_to_popit(self.api) person.invalidate_cache_entries() message = u"Marked an incumbent ({name} - {party}) as standing again in {cons_url}" print message.format( name=person.name, party=party_name, cons_url=cons_url, ).encode('utf-8')
def handle(self, **options): print "Finding the maximum person ID from PopIt" max_person_id = -1 for person in popit_unwrap_pagination( self.api.persons, per_page=100 ): person_id = int(person['id']) max_person_id = max(person_id, max_person_id) print "Setting maximum PopIt person ID to:", max_person_id MaxPopItIds.update_max_persons_id(max_person_id)
def handle(self, **options): for person in popit_unwrap_pagination( self.api.persons, per_page=100 ): print u"Stripping IP addresses from {name} ({id})".format( **person ).encode('utf-8') for version in person.get('versions', []): version.pop('ip', None) self.api.persons(person['id']).put(person)
def handle(self, **options): for o in popit_unwrap_pagination( self.api.organizations, per_page=100 ): if o['classification'] != 'Party': continue print o['name'] for image in o.get('images', []): print " DELETE", image['_id'] self.api.organizations(o['id']).image(image['_id']).delete()
def handle(self, **options): for o in popit_unwrap_pagination(self.api.organizations, per_page=100, embed='membership.person'): if o['classification'] != 'Party': continue print o['name'] for image in o.get('images', []): print " DELETE", image['_id'] self.api.organizations(o['id']).image(image['_id']).delete() # The person pages get party images via the # membership.organization embed, so invalidate the cache # entries for any person who's a member of this party: for membership in o.get('memberships', []): person = PopItPerson.create_from_dict(membership['person_id']) person.invalidate_cache_entries()
def handle(self, **options): date_count = defaultdict(int) total_candidates = 0 twitter_usernames = 0 email_addresses = 0 both_twitter_and_email = 0 neither_twitter_nor_email = 0 for person in popit_unwrap_pagination( self.api.persons, per_page=100 ): if not person.get('standing_in'): continue if not person['standing_in'].get('2015'): continue total_candidates += 1 have_twitter = False have_email = False if person['versions'][0]['data']['twitter_username']: have_twitter = True twitter_usernames += 1 if person['versions'][0]['data']['email']: have_email = True email_addresses += 1 if have_email and have_twitter: both_twitter_and_email += 1 elif not (have_email or have_twitter): neither_twitter_nor_email += 1 versions = person.get('versions') for version in reversed(versions): standing_in = version['data'].get('standing_in', {}) if standing_in.get('2015'): date_str = re.sub(r'T.*', '', version['timestamp']) date_count[date_str] += 1 break print "Candidates with Twitter usernames: {0}%".format((100 * twitter_usernames) / total_candidates) print "Candidates with email addresses: {0}%".format((100 * email_addresses) / total_candidates) print "Candidates with neither Twitter nor email: {0}%".format((100 * neither_twitter_nor_email) / total_candidates) print "Candidates with both Twitter and email: {0}%".format((100 * both_twitter_and_email) / total_candidates) cumulative = 0 with open('candidates-over-time', 'wb') as csvfile: writer = csv.writer(csvfile) writer.writerow(['Date', 'New Candidates', 'Cumulative Candidates']) for row in sorted(date_count.items()): cumulative += row[1] row = list(row) + [cumulative] writer.writerow(row)
def handle(self, *args, **options): api = create_popit_api_object() for org in popit_unwrap_pagination( api.organizations, per_page=100 ): org.pop('versions', None) org.pop('memberships', None) images = org.get('images', []) if len(images) < 2: continue print "=====================================================" print len(images), org['id'], org['name'].encode('utf-8') for image in images: print ' --' print ' ' + image['notes'].encode('utf-8') print ' ' + image['url']
def handle(self, **options): for o in popit_unwrap_pagination( self.api.organizations, per_page=100, embed='membership.person' ): if o['classification'] != 'Party': continue print o['name'] for image in o.get('images', []): print " DELETE", image['_id'] self.api.organizations(o['id']).image(image['_id']).delete() # The person pages get party images via the # membership.organization embed, so invalidate the cache # entries for any person who's a member of this party: for membership in o.get('memberships', []): person = PopItPerson.create_from_dict(membership['person_id']) person.invalidate_cache_entries()
def handle(self, **options): for person_data in popit_unwrap_pagination( self.api.persons, embed='', per_page=100 ): needs_update = False for version in person_data.get('versions', []): data = version['data'] if data.get('last_party'): needs_update = True msg = "Fixing person {0}persons/{1}" print msg.format(get_base_url(), person_data['id']) del data['last_party'] if not needs_update: continue person = PopItPerson.create_from_dict(person_data) person.save_to_popit(self.api) person.invalidate_cache_entries()
def handle(self, **options): from slumber.exceptions import HttpServerError, HttpClientError from candidates.popit import popit_unwrap_pagination for membership in popit_unwrap_pagination(self.api.memberships, embed='', per_page=100): post_id = membership.get('post_id') if not post_id: continue post = self.api.posts(post_id).get(embed='')['result'] post_role = post['role'] # Now find the election that matches this membership and role: election_found = False for edata in Election.objects.all(): if edata.for_post_role != post_role: continue election_found = True membership['election'] = edata.slug # Now correct the membership role: membership['role'] = edata.candidate_membership_role # Some of these memberships have a spurious 'area' # attribute, set to: {'area': {'name': ''}} which will # cause the PUT to fail, so remove that. existing_area = membership.get('area') if existing_area is not None: if existing_area == {'name': ''}: del membership['area'] try: self.api.memberships(membership['id']).put(membership) except (HttpServerError, HttpClientError) as e: message = "There was an error when putting to membership: {0}" print message.format(membership['id']) print json.dumps(membership, indent=True, sort_keys=True) print "The error was:" print e.content break if not election_found: message = u"Warning: no election found for membership ID {0}" print message.format(membership['id'])
def handle(self, *args, **options): if len(args) != 1: msg = "You must supply the prefix for output filenames" raise CommandError(msg) output_prefix = args[0] all_people = [] election_to_people = defaultdict(list) for person_dict in popit_unwrap_pagination( self.api.persons, embed="membership.organization", per_page=100, ): standing_in = person_dict.get('standing_in') if not standing_in: continue for election in standing_in.keys(): if not standing_in[election]: continue person = PopItPerson.create_from_dict(person_dict) person_as_csv_dict = person.as_dict(election=election) all_people.append(person_as_csv_dict) election_to_people[election].append(person_as_csv_dict) elections = election_to_people.keys() + [None] for election in elections: if election is None: output_filename = output_prefix + '-all.csv' people_data = all_people else: output_filename = output_prefix + '-' + election + '.csv' people_data = election_to_people[election] csv = list_to_csv(people_data) # Otherwise write to a temporary file and atomically # rename into place: ntf = NamedTemporaryFile( delete=False, dir=dirname(output_filename) ) ntf.write(csv) chmod(ntf.name, 0o644) rename(ntf.name, output_filename)
def handle(self, **options): from slumber.exceptions import HttpServerError, HttpClientError from candidates.popit import popit_unwrap_pagination for membership in popit_unwrap_pagination( self.api.memberships, embed='', per_page=100 ): post_id = membership.get('post_id') if not post_id: continue post = self.api.posts(post_id).get(embed='')['result'] post_role = post['role'] # Now find the election that matches this membership and role: election_found = False for edata in Election.objects.all(): if edata.for_post_role != post_role: continue election_found = True membership['election'] = edata.slug # Now correct the membership role: membership['role'] = edata.candidate_membership_role # Some of these memberships have a spurious 'area' # attribute, set to: {'area': {'name': ''}} which will # cause the PUT to fail, so remove that. existing_area = membership.get('area') if existing_area is not None: if existing_area == {'name': ''}: del membership['area'] try: self.api.memberships(membership['id']).put(membership) except (HttpServerError, HttpClientError) as e: message = "There was an error when putting to membership: {0}" print message.format(membership['id']) print json.dumps(membership, indent=True, sort_keys=True) print "The error was:" print e.content break if not election_found: message = u"Warning: no election found for membership ID {0}" print message.format(membership['id'])
def handle(self, **options): for person in popit_unwrap_pagination( self.api.persons, embed='', per_page=100 ): needs_update = False for version in person.get('versions', []): data = version['data'] if data.get('last_party'): needs_update = True msg = "Fixing person {0}persons/{1}" print msg.format(self.get_base_url(), person['id']) del data['last_party'] if not needs_update: continue try: self.api.persons(person['id']).put(person) except HttpClientError as e: print "HttpClientError", e.content sys.exit(1) invalidate_cache_entries_from_person_data(person)
def get_all_parties(api, when_date): '''Fetch all political parties from PopIt divided into active / inactive''' result = {} for register in ec_registers: result[register] = { 'active': {}, 'inactive': {}, } for org in popit_unwrap_pagination(api.organizations, embed=''): if org['classification'] != 'Party': continue org_register = org.get('register') if org_register not in ec_registers: message = u"Unknown register {0} in organization: {1}" # raise Exception, message.format(org_register, org) print (u"Warning:" + message.format(org_register, org)).encode('utf-8') continue register_dict = result[org_register] party_dict = register_dict[ 'active' if organization_active(org, when_date) else 'inactive' ] party_dict[normalize_party_name(org['name'])] = org return result
def handle(self, **options): for person in popit_unwrap_pagination( self.api.persons, embed='', per_page=100 ): msg = "Person {0}persons/{1}" print msg.format(self.get_base_url(), person['id']) for image in person.get('images', []): print " Image with URL:", image['url'] fix_image(image) image.pop('_id', None) # Some images have an empty 'created' field, which # causes an Elasticsearch indexing error, so remove # that if it's the case: if not image.get('created'): image.pop('created', None) fix_dates(person) try: self.api.persons(person['id']).put(person) except HttpClientError as e: print "HttpClientError", e.content sys.exit(1)
def handle(self, **options): for membership in popit_unwrap_pagination(self.api.memberships, embed="", per_page=100): post_id = membership.get("post_id") if not post_id: continue post = self.api.posts(post_id).get(embed="")["result"] post_role = post["role"] # Now find the election that matches this membership and role: election_found = False for election, edata in settings.ELECTIONS.items(): if edata["for_post_role"] != post_role: continue election_found = True membership["election"] = election # Now correct the membership role: membership["role"] = edata["candidate_membership_role"] # Some of these memberships have a spurious 'area' # attribute, set to: {'area': {'name': ''}} which will # cause the PUT to fail, so remove that. existing_area = membership.get("area") if existing_area is not None: if existing_area == {"name": ""}: del membership["area"] try: self.api.memberships(membership["id"]).put(membership) except (HttpServerError, HttpClientError) as e: message = "There was an error when putting to membership: {0}" print message.format(membership["id"]) print json.dumps(membership, indent=True, sort_keys=True) print "The error was:" print e.content break if not election_found: message = u"Warning: no election found for membership ID {0}" print message.format(membership["id"])
def handle(self, **options): all_constituencies = MapItData.constituencies_2010_name_sorted all_parties = PartyData.party_id_to_name counts = { 'candidates_2015': 0, 'parties': { name: { 'count': 0, 'party_id': party_id } for party_id, name in all_parties.items() }, 'constituencies': { n[1]['name']: { 'count': 0, 'con_id': n[1]['id'] } for n in all_constituencies }, } count_2010 = 0 standing_again = 0 all_people = 0 new_candidates = 0 # Loop over everything, counting things we're interseted in for person in popit_unwrap_pagination(self.api.persons, embed="membership.organization", per_page=100): all_people += 1 if person.get('standing_in') and person['standing_in'].get('2015'): counts['candidates_2015'] += 1 party_name = person['party_memberships']['2015']['name'] counts['parties'][party_name]['count'] += 1 constituency_name = person['standing_in']['2015']['name'] counts['constituencies'][constituency_name]['count'] += 1 if person['standing_in'].get('2010'): standing_again += 1 if person.get('standing_in') and person['standing_in'].get('2010'): count_2010 += 1 else: new_candidates += 1 # Add or create objects in the database # Parties for party_name, data in counts['parties'].items(): obj = { 'count_type': 'party', 'name': party_name, 'count': data['count'], 'object_id': data['party_id'] } self.add_or_update(obj) # Constituencies for constituency_name, data in counts['constituencies'].items(): obj = { 'count_type': 'constituency', 'name': constituency_name, 'count': data['count'], 'object_id': data['con_id'] } self.add_or_update(obj) # Total candidates in 2015 obj = { 'count_type': 'total', 'name': 'total_2015', 'count': counts['candidates_2015'], 'object_id': 'candidates_2015', } self.add_or_update(obj) # New candidates obj = { 'count_type': 'total', 'name': 'new_candidates', 'count': new_candidates, 'object_id': 'new_candidates', } self.add_or_update(obj) # Standing again obj = { 'count_type': 'total', 'name': 'standing_again', 'count': standing_again, 'object_id': 'standing_again', } self.add_or_update(obj)
def handle(self, **options): all_constituencies = MapItData.constituencies_2010_name_sorted all_parties = PartyData.party_id_to_name counts = { 'candidates_2010': 0, 'candidates_2015': 0, 'parties': { party_id: {'count': 0, 'party_name': name} for party_id, name in all_parties.items() }, 'constituencies': { str(n[1]['id']): {'count': 0, 'con_name': n[1]['name']} for n in all_constituencies }, } standing_again_same_party = 0 standing_again_different_party = 0 all_people = 0 new_candidates = 0 # Loop over everything, counting things we're interseted in for person in popit_unwrap_pagination( self.api.persons, embed="membership.organization", per_page=100 ): all_people += 1 if not person.get('standing_in'): continue if person['standing_in'].get('2010'): counts['candidates_2010'] += 1 if person['standing_in'].get('2015'): counts['candidates_2015'] += 1 party_id = person['party_memberships']['2015']['id'] if party_id in counts['parties']: counts['parties'][party_id]['count'] += 1 constituency_id = person['standing_in']['2015']['post_id'] counts['constituencies'][constituency_id]['count'] += 1 if person['standing_in'].get('2010'): if party_id == person['party_memberships']['2010']['id']: standing_again_same_party += 1 else: standing_again_different_party += 1 else: new_candidates += 1 else: message = u"WARNING: candidate {0} is standing in 2015 " + \ "for a dissolved or deleted party {1}" print message.format(person['id'], party_id).encode('utf-8') # Add or create objects in the database # Parties for party_id, data in counts['parties'].items(): obj = { 'count_type': 'party', 'name': data['party_name'], 'count': data['count'], 'object_id': party_id } self.add_or_update(obj) # Remove any parties that have dissolved but still might have # entries in the database: parties_in_database = set( CachedCount.objects.filter(count_type='party'). \ values_list('object_id', flat=True) ) current_parties = set( PartyData.party_id_to_name.keys() ) parties_to_remove = parties_in_database - current_parties CachedCount.objects.filter( count_type='party', object_id__in=parties_to_remove ).delete() # Constituencies for constituency_id, data in counts['constituencies'].items(): obj = { 'count_type': 'constituency', 'name': data['con_name'], 'count': data['count'], 'object_id': constituency_id } self.add_or_update(obj) for name, count, object_id in ( ('new_candidates', new_candidates, 'new_candidates'), ('total_2010', counts['candidates_2010'], 'candidates_2010'), ('total_2015', counts['candidates_2015'], 'candidates_2015'), ('standing_again_same_party', standing_again_same_party, 'standing_again_same_party'), ('standing_again_different_party', standing_again_different_party, 'standing_again_different_party'), ('standing_again', standing_again_same_party + standing_again_different_party, 'standing_again'), ): obj = { 'count_type': 'total', 'name': name, 'count': count, 'object_id': object_id, } self.add_or_update(obj)
def handle(self, **options): max_person_id = -1 for person in popit_unwrap_pagination(self.api.persons, per_page=100): person_id = int(person['id']) max_person_id = max(person_id, max_person_id) print "Maximum person ID is:", max_person_id
def handle(self, **options): all_constituencies = MapItData.constituencies_2010_name_sorted all_parties = PartyData.party_id_to_name counts = { 'candidates_2010': 0, 'candidates_2015': 0, 'parties': { party_id: {'count': 0, 'party_name': name} for party_id, name in all_parties.items() }, 'constituencies': { str(n[1]['id']): {'count': 0, 'con_name': n[1]['name']} for n in all_constituencies }, } standing_again_same_party = 0 standing_again_different_party = 0 all_people = 0 new_candidates = 0 # Loop over everything, counting things we're interseted in for person in popit_unwrap_pagination( self.api.persons, embed="membership.organization", per_page=100 ): all_people += 1 if not person.get('standing_in'): continue if person['standing_in'].get('2010'): counts['candidates_2010'] += 1 if person['standing_in'].get('2015'): counts['candidates_2015'] += 1 party_id = person['party_memberships']['2015']['id'] counts['parties'][party_id]['count'] += 1 constituency_id = person['standing_in']['2015']['post_id'] counts['constituencies'][constituency_id]['count'] += 1 if person['standing_in'].get('2010'): if party_id == person['party_memberships']['2010']['id']: standing_again_same_party += 1 else: standing_again_different_party += 1 else: new_candidates += 1 # Add or create objects in the database # Parties for party_id, data in counts['parties'].items(): obj = { 'count_type': 'party', 'name': data['party_name'], 'count': data['count'], 'object_id': party_id } self.add_or_update(obj) # Constituencies for constituency_id, data in counts['constituencies'].items(): obj = { 'count_type': 'constituency', 'name': data['con_name'], 'count': data['count'], 'object_id': constituency_id } self.add_or_update(obj) for name, count, object_id in ( ('new_candidates', new_candidates, 'new_candidates'), ('total_2010', counts['candidates_2010'], 'candidates_2010'), ('total_2015', counts['candidates_2015'], 'candidates_2015'), ('standing_again_same_party', standing_again_same_party, 'standing_again_same_party'), ('standing_again_different_party', standing_again_different_party, 'standing_again_different_party'), ('standing_again', standing_again_same_party + standing_again_different_party, 'standing_again'), ): obj = { 'count_type': 'total', 'name': name, 'count': count, 'object_id': object_id, } self.add_or_update(obj)