def handle_label(self, output_filename, **options):
     api = create_popit_api_object()
     ntf = NamedTemporaryFile(
         delete=False,
         dir=dirname(output_filename)
     )
     all_parties = []
     for party in popit_unwrap_pagination(
             api.organizations,
             embed='',
             per_page=200
     ):
         if party['classification'] != 'Party':
             continue
         # These URLs are specific to the server this data is
         # extracted from:
         del party['url']
         del party['html_url']
         # Similarly the URLs of images:
         party.pop('image', None)
         party.pop('proxy_image', None)
         party.pop('images', None)
         # The generated id in each identifier will be different
         # between systems (or between runs on the same system) so
         # just produces noisy diffs if we include it.
         for identifier in party.get('identifiers', []):
             identifier.pop('id', None)
         all_parties.append(party)
     all_parties.sort(key=lambda p: p['id'])
     # Output to a string so that we can strip any trainling whitespace.
     output = json.dumps(all_parties, sort_keys=True, indent=4)
     output = re.sub(r'(?ms)\s*$', '', output)
     ntf.write(output)
     rename(ntf.name, output_filename)
 def handle(self, **options):
     joint_party_to_sub_parties = defaultdict(list)
     api = create_popit_api_object()
     for party in popit_unwrap_pagination(
             api.organizations,
             embed='',
             per_page=200
     ):
         if party['classification'] != 'Party':
             continue
         if 'dissolution_date' in party:
             dissolution_date = party['dissolution_date']
             if dissolution_date < str(date.today()):
                 continue
         for d in party.get('descriptions', []):
             m = joint_description_re.search(d['description'])
             if not m:
                 continue
             joint_name = fix_joint_party_name(m.group('joint_name'))
             joint_party_to_sub_parties[joint_name].append(party)
     for joint_name, sub_parties in joint_party_to_sub_parties.items():
         if len(sub_parties) < 2:
             message = 'Ignoring "{joint_name}" (only made up of one party: "{sub_party}")'
             print message.format(
                 joint_name=joint_name,
                 sub_party=sub_parties[0]['name']
             )
             continue
         create_or_update_party(api, joint_name, sub_parties)
Example #3
0
 def handle(self, **options):
     for person_data in popit_unwrap_pagination(
             self.api.persons,
             embed='',
             per_page=100
     ):
         msg = "Person {0}persons/{1}"
         print msg.format(get_base_url(), person_data['id'])
         strip_bogus_fields(
             person_data,
             [
                 'founding_date',
                 'dissolution_date',
                 'start_date',
                 'end_date'
             ]
         )
         for image in person_data.get('images', []):
             strip_bogus_fields(
                 image,
                 [
                     'birth_date',
                     'death_date',
                     'founding_date',
                     'dissolution_date',
                     'start_date',
                     'end_date'
                 ]
             )
         person = PopItPerson.create_from_dict(person_data)
         person.save_to_popit(self.api)
         person.invalidate_cache_entries()
 def handle_label(self, output_filename, **options):
     api = create_popit_api_object()
     ntf = NamedTemporaryFile(delete=False, dir=dirname(output_filename))
     all_parties = []
     for party in popit_unwrap_pagination(api.organizations,
                                          embed='',
                                          per_page=200):
         if party['classification'] != 'Party':
             continue
         if 'dissolution_date' in party:
             dissolution_date = party['dissolution_date']
             if dissolution_date < str(date.today()):
                 continue
         # These URLs are specific to the server this data is
         # extracted from:
         del party['url']
         del party['html_url']
         # Similarly the URLs of images:
         party.pop('image', None)
         party.pop('proxy_image', None)
         party.pop('images', None)
         # The generated id in each identifier will be different
         # between systems (or between runs on the same system) so
         # just produces noisy diffs if we include it.
         for identifier in party.get('identifiers', []):
             identifier.pop('id', None)
         all_parties.append(party)
     all_parties.sort(key=lambda p: p['id'])
     # Output to a string so that we can strip any trainling whitespace.
     output = json.dumps(all_parties, sort_keys=True, indent=4)
     output = re.sub(r'(?ms)\s*$', '', output)
     ntf.write(output)
     rename(ntf.name, output_filename)
    def handle(self, **options):

        for election, election_data in settings.ELECTIONS.items():
            for membership in popit_unwrap_pagination(
                    self.api.memberships, embed='', per_page=100
            ):
                if membership.get('role', '') != election_data['candidate_membership_role']:
                    continue
                if not membership_covers_date(
                        membership, election_data['election_date']
                ):
                    continue
                if membership.get('election', '') == election:
                    # Then the attribute is already correct, don't
                    # bother setting it.
                    continue
                membership['election'] = election
                # Some of these memberships have a spurious 'area'
                # attribute, set to: {'area': {'name': ''}} which will
                # cause the PUT to fail, so remove that.
                existing_area = membership.get('area')
                if existing_area is not None:
                    if existing_area == {'name': ''}:
                        del membership['area']
                try:
                    self.api.memberships(membership['id']).put(membership)
                except (HttpServerError, HttpClientError) as e:
                    message = "There was an error when putting to membership: {0}"
                    print message.format(membership['id'])
                    print json.dumps(membership, indent=True, sort_keys=True)
                    print "The error was:"
                    print e.content
 def handle(self, **options):
     for person in popit_unwrap_pagination(
             self.api.persons,
             embed='',
             per_page=100
     ):
         msg = "Person {0}persons/{1}"
         print msg.format(self.get_base_url(), person['id'])
         strip_bogus_fields(
             person,
             [
                 'founding_date',
                 'dissolution_date',
                 'start_date',
                 'end_date'
             ]
         )
         for image in person.get('images', []):
             strip_bogus_fields(
                 image,
                 [
                     'birth_date',
                     'death_date',
                     'founding_date',
                     'dissolution_date',
                     'start_date',
                     'end_date'
                 ]
             )
         try:
             self.api.persons(person['id']).put(person)
         except HttpClientError as e:
             print "HttpClientError", e.content
             sys.exit(1)
         invalidate_cache_entries_from_person_data(person)
 def handle(self, **options):
     for person in popit_unwrap_pagination(self.api.persons,
                                           embed='',
                                           per_page=100):
         needs_update = False
         for version in person.get('versions', []):
             data = version['data']
             if data.get('last_party'):
                 needs_update = True
                 msg = "Fixing person {0}persons/{1}"
                 print msg.format(self.get_base_url(), person['id'])
                 del data['last_party']
         if not needs_update:
             continue
         for image in person.get('images', []):
             image.pop('_id', None)
             # Some images have an empty 'created' field, which
             # causes an Elasticsearch indexing error, so remove
             # that if it's the case:
             if not image.get('created'):
                 image.pop('created', None)
         fix_dates(person)
         try:
             self.api.persons(person['id']).put(person)
         except HttpClientError as e:
             print "HttpClientError", e.content
             sys.exit(1)
Example #8
0
 def handle(self, **options):
     joint_party_to_sub_parties = defaultdict(list)
     api = create_popit_api_object()
     for party in popit_unwrap_pagination(api.organizations,
                                          embed='',
                                          per_page=200):
         if party['classification'] != 'Party':
             continue
         if 'dissolution_date' in party:
             dissolution_date = party['dissolution_date']
             if dissolution_date < str(date.today()):
                 continue
         for d in party.get('descriptions', []):
             m = joint_description_re.search(d['description'])
             if not m:
                 continue
             joint_name = fix_joint_party_name(m.group('joint_name'))
             joint_party_to_sub_parties[joint_name].append(party)
     for joint_name, sub_parties in joint_party_to_sub_parties.items():
         if len(sub_parties) < 2:
             message = 'Ignoring "{joint_name}" (only made up of one party: "{sub_party}")'
             print message.format(joint_name=joint_name,
                                  sub_party=sub_parties[0]['name'])
             continue
         create_or_update_party(api, joint_name, sub_parties)
 def handle(self, **options):
     all_people = []
     for person_dict in popit_unwrap_pagination(
             self.api.persons,
             embed="membership.organization",
             per_page=100,
     ):
         if person_dict.get('standing_in') \
             and person_dict['standing_in'].get(options['year']):
             person = PopItPerson.create_from_dict(person_dict)
             all_people.append(person.as_dict(year=options['year']))
     csv = list_to_csv(all_people)
     # Write to stdout if no output filename is specified, or if it
     # is '-'
     if options['output_filename'] in (None, '-'):
         with sys.stdout as f:
             f.write(csv)
     else:
         # Otherwise write to a temporary file and atomically
         # rename into place:
         ntf = NamedTemporaryFile(
             delete=False,
             dir=dirname(options['output_filename'])
         )
         ntf.write(csv)
         chmod(ntf.name, 0o644)
         rename(ntf.name, options['output_filename'])
    def handle(self, **options):

        for election, election_data in settings.ELECTIONS.items():
            for membership in popit_unwrap_pagination(self.api.memberships,
                                                      embed='',
                                                      per_page=100):
                if membership.get(
                        'role',
                        '') != election_data['candidate_membership_role']:
                    continue
                if not membership_covers_date(membership,
                                              election_data['election_date']):
                    continue
                if membership.get('election', '') == election:
                    # Then the attribute is already correct, don't
                    # bother setting it.
                    continue
                membership['election'] = election
                # Some of these memberships have a spurious 'area'
                # attribute, set to: {'area': {'name': ''}} which will
                # cause the PUT to fail, so remove that.
                existing_area = membership.get('area')
                if existing_area is not None:
                    if existing_area == {'name': ''}:
                        del membership['area']
                try:
                    self.api.memberships(membership['id']).put(membership)
                except (HttpServerError, HttpClientError) as e:
                    message = "There was an error when putting to membership: {0}"
                    print message.format(membership['id'])
                    print json.dumps(membership, indent=True, sort_keys=True)
                    print "The error was:"
                    print e.content
 def handle(self, **options):
     for collection in ('organization', 'person'):
         api_collection = getattr(self.api, collection + 's')
         message = "{titled} {base_url}{plural}/{id}"
         for item in popit_unwrap_pagination(api_collection,
                                             embed='',
                                             per_page=100):
             print message.format(titled=collection.title(),
                                  base_url=get_base_url(),
                                  plural=(collection + "s"),
                                  id=item['id'])
             for image in item.get('images', []):
                 print "  Image with URL:", image['url']
                 fix_image(image)
                 # Some images have an empty 'created' field, which
                 # causes an Elasticsearch indexing error, so change it
                 # to null if that's the case:
                 if not image.get('created'):
                     image['created'] = None
             fix_dates(item)
             try:
                 api_collection(item['id']).put(item)
             except HttpClientError as e:
                 print "HttpClientError", e.content
                 sys.exit(1)
             # If this is a person, make sure that the
             # corresponding cache entries are invalidated:
             if collection == 'person':
                 person = PopItPerson.create_from_dict(item)
                 person.invalidate_cache_entries()
    def handle(self, **options):

        for person_popit_data in popit_unwrap_pagination(
                self.api.persons,
                per_page=100
        ):
            # Exclude anyone who doesn't have a parlparse ID; this is
            # more or less the incumbents (by-elections causing the
            # "more or less" bit.)
            if not get_parlparse_id(person_popit_data):
                continue

            if '2015' in person_popit_data['standing_in']:
                print "We already have 2015 information for", person_popit_data['name']
                continue

            print "Considering person:", person_popit_data['name']

            person_data, _ = self.get_person(person_popit_data['id'])

            cons_id = person_data['standing_in']['2010']['post_id']
            party_id = person_data['party_memberships']['2010']['id']
            party_name = person_data['party_memberships']['2010']['name']
            cons_url = 'https://yournextmp.com/constituency/{0}'.format(cons_id)

            # We're now considering marking the candidate as standing
            # again in the same consituency as in 2010. Check that
            # there's not another candidate from their party already
            # marked as standing in that consituency.

            if self.existing_candidate_same_party(cons_id, party_id):
                msg = u"There was already a candidate for {0} in {1} - skipping"
                print msg.format(party_name, cons_url).format('utf-8')
                continue

            # Now it should be safe to update the candidate and set
            # them as standing in 2015.

            previous_versions = person_data.pop('versions')

            person_data['standing_in']['2015'] = person_data['standing_in']['2010']
            person_data['party_memberships']['2015'] = person_data['party_memberships']['2010']

            change_metadata = self.get_change_metadata(
                None,
                "Assuming that incumbents we don't have definite information on yet are standing again",
            )
            self.update_person(
                person_data,
                change_metadata,
                previous_versions,
            )
            invalidate_cache_entries_from_person_data(person_data)

            message = u"Marked an incumbent ({name} - {party}) as standing again in {cons_url}"
            print message.format(
                name=person_data['name'],
                party=party_name,
                cons_url=cons_url,
            ).encode('utf-8')
 def handle(self, **options):
     all_people = []
     for person_dict in popit_unwrap_pagination(
             self.api.persons,
             embed="membership.organization",
             per_page=100,
     ):
         if person_dict.get('standing_in') \
             and person_dict['standing_in'].get(options['year']):
             person = PopItPerson.create_from_dict(person_dict)
             all_people.append(person.as_dict(year=options['year']))
     csv = list_to_csv(all_people)
     # Write to stdout if no output filename is specified, or if it
     # is '-'
     if options['output_filename'] in (None, '-'):
         with sys.stdout as f:
             f.write(csv)
     else:
         # Otherwise write to a temporary file and atomically
         # rename into place:
         ntf = NamedTemporaryFile(delete=False,
                                  dir=dirname(options['output_filename']))
         ntf.write(csv)
         chmod(ntf.name, 0o644)
         rename(ntf.name, options['output_filename'])
 def handle(self, **options):
     for person in popit_unwrap_pagination(self.api.persons,
                                           embed='',
                                           per_page=100):
         msg = "Person {0}persons/{1}"
         print msg.format(self.get_base_url(), person['id'])
         strip_bogus_fields(person, [
             'founding_date', 'dissolution_date', 'start_date', 'end_date'
         ])
         for image in person.get('images', []):
             image.pop('_id', None)
             # Some images have an empty 'created' field, which
             # causes an Elasticsearch indexing error, so remove
             # that if it's the case:
             if not image.get('created'):
                 image.pop('created', None)
             strip_bogus_fields(image, [
                 'birth_date', 'death_date', 'founding_date',
                 'dissolution_date', 'start_date', 'end_date'
             ])
         fix_dates(person)
         try:
             self.api.persons(person['id']).put(person)
         except HttpClientError as e:
             print "HttpClientError", e.content
             sys.exit(1)
 def handle(self, **options):
     for collection in ('organization', 'person'):
         api_collection = getattr(self.api, collection + 's')
         message = "{titled} {base_url}{plural}/{id}"
         for item in popit_unwrap_pagination(
                 api_collection,
                 embed='',
                 per_page=100
         ):
             print message.format(
                 titled=collection.title(),
                 base_url=self.get_base_url(),
                 plural=(collection + "s"),
                 id=item['id']
             )
             for image in item.get('images', []):
                 print "  Image with URL:", image['url']
                 fix_image(image)
                 # Some images have an empty 'created' field, which
                 # causes an Elasticsearch indexing error, so change it
                 # to null if that's the case:
                 if not image.get('created'):
                     image['created'] = None
             fix_dates(item)
             try:
                 api_collection(item['id']).put(item)
             except HttpClientError as e:
                 print "HttpClientError", e.content
                 sys.exit(1)
             if collection == 'person':
                 invalidate_cache_entries_from_person_data(item)
 def handle(self, **options):
     for person in popit_unwrap_pagination(self.api.persons, per_page=100):
         print u"Stripping IP addresses from {name} ({id})".format(
             **person).encode('utf-8')
         for version in person.get('versions', []):
             version.pop('ip', None)
         self.api.persons(person['id']).put(person)
Example #17
0
 def handle(self, **options):
     print "Finding the maximum person ID from PopIt"
     max_person_id = -1
     for person in popit_unwrap_pagination(self.api.persons, per_page=100):
         person_id = int(person['id'])
         max_person_id = max(person_id, max_person_id)
     print "Setting maximum PopIt person ID to:", max_person_id
     MaxPopItIds.update_max_persons_id(max_person_id)
 def handle(self, **options):
     for o in popit_unwrap_pagination(self.api.organizations, per_page=100):
         if o['classification'] != 'Party':
             continue
         print o['name']
         for image in o.get('images', []):
             print "  DELETE", image['_id']
             self.api.organizations(o['id']).image(image['_id']).delete()
 def handle(self, **options):
     max_person_id = -1
     for person in popit_unwrap_pagination(
             self.api.persons,
             per_page=100
     ):
         person_id = int(person['id'])
         max_person_id = max(person_id, max_person_id)
     print "Maximum person ID is:", max_person_id
    def handle(self, **options):

        for person_popit_data in popit_unwrap_pagination(self.api.persons,
                                                         per_page=100):
            # Exclude anyone who doesn't have a parlparse ID; this is
            # more or less the incumbents (by-elections causing the
            # "more or less" bit.)
            if not get_parlparse_id(person_popit_data):
                continue

            if '2015' in person_popit_data['standing_in']:
                print "We already have 2015 information for", person_popit_data[
                    'name']
                continue

            person = PopItPerson.create_from_dict(person_popit_data)

            print "Considering person:", person.name

            cons_id = person.standing_in['2010']['post_id']
            party_id = person.party_memberships['2010']['id']
            party_name = person.party_memberships['2010']['name']
            cons_url = 'https://yournextmp.com/constituency/{0}'.format(
                cons_id)

            # We're now considering marking the candidate as standing
            # again in the same consituency as in 2010. Check that
            # there's not another candidate from their party already
            # marked as standing in that consituency.

            if self.existing_candidate_same_party(cons_id, party_id):
                msg = u"There was already a candidate for {0} in {1} - skipping"
                print msg.format(party_name, cons_url).format('utf-8')
                continue

            # Now it should be safe to update the candidate and set
            # them as standing in 2015.

            person.standing_in['2015'] = person.standing_in['2010']
            person.party_memberships['2015'] = person.party_memberships['2010']

            person.record_version(
                get_change_metadata(
                    None,
                    "Assuming that incumbents we don't have definite information on yet are standing again",
                ))
            person.save_to_popit(self.api)
            person.invalidate_cache_entries()

            message = u"Marked an incumbent ({name} - {party}) as standing again in {cons_url}"
            print message.format(
                name=person.name,
                party=party_name,
                cons_url=cons_url,
            ).encode('utf-8')
 def handle(self, **options):
     print "Finding the maximum person ID from PopIt"
     max_person_id = -1
     for person in popit_unwrap_pagination(
             self.api.persons,
             per_page=100
     ):
         person_id = int(person['id'])
         max_person_id = max(person_id, max_person_id)
     print "Setting maximum PopIt person ID to:", max_person_id
     MaxPopItIds.update_max_persons_id(max_person_id)
 def handle(self, **options):
     for person in popit_unwrap_pagination(
             self.api.persons,
             per_page=100
     ):
         print u"Stripping IP addresses from {name} ({id})".format(
             **person
         ).encode('utf-8')
         for version in person.get('versions', []):
             version.pop('ip', None)
         self.api.persons(person['id']).put(person)
 def handle(self, **options):
     for o in popit_unwrap_pagination(
             self.api.organizations,
             per_page=100
     ):
         if o['classification'] != 'Party':
             continue
         print o['name']
         for image in o.get('images', []):
             print "  DELETE", image['_id']
             self.api.organizations(o['id']).image(image['_id']).delete()
Example #24
0
 def handle(self, **options):
     for o in popit_unwrap_pagination(self.api.organizations,
                                      per_page=100,
                                      embed='membership.person'):
         if o['classification'] != 'Party':
             continue
         print o['name']
         for image in o.get('images', []):
             print "  DELETE", image['_id']
             self.api.organizations(o['id']).image(image['_id']).delete()
         # The person pages get party images via the
         # membership.organization embed, so invalidate the cache
         # entries for any person who's a member of this party:
         for membership in o.get('memberships', []):
             person = PopItPerson.create_from_dict(membership['person_id'])
             person.invalidate_cache_entries()
 def handle(self, **options):
     date_count = defaultdict(int)
     total_candidates = 0
     twitter_usernames = 0
     email_addresses = 0
     both_twitter_and_email = 0
     neither_twitter_nor_email = 0
     for person in popit_unwrap_pagination(
             self.api.persons,
             per_page=100
     ):
         if not person.get('standing_in'):
             continue
         if not person['standing_in'].get('2015'):
             continue
         total_candidates += 1
         have_twitter = False
         have_email = False
         if person['versions'][0]['data']['twitter_username']:
             have_twitter = True
             twitter_usernames += 1
         if person['versions'][0]['data']['email']:
             have_email = True
             email_addresses += 1
         if have_email and have_twitter:
             both_twitter_and_email += 1
         elif not (have_email or have_twitter):
             neither_twitter_nor_email += 1
         versions = person.get('versions')
         for version in reversed(versions):
             standing_in = version['data'].get('standing_in', {})
             if standing_in.get('2015'):
                 date_str = re.sub(r'T.*', '', version['timestamp'])
                 date_count[date_str] += 1
                 break
     print "Candidates with Twitter usernames: {0}%".format((100 * twitter_usernames) / total_candidates)
     print "Candidates with email addresses: {0}%".format((100 * email_addresses) / total_candidates)
     print "Candidates with neither Twitter nor email: {0}%".format((100 * neither_twitter_nor_email) / total_candidates)
     print "Candidates with both Twitter and email: {0}%".format((100 * both_twitter_and_email) / total_candidates)
     cumulative = 0
     with open('candidates-over-time', 'wb') as csvfile:
         writer = csv.writer(csvfile)
         writer.writerow(['Date', 'New Candidates', 'Cumulative Candidates'])
         for row in sorted(date_count.items()):
             cumulative += row[1]
             row = list(row) + [cumulative]
             writer.writerow(row)
Example #26
0
    def handle(self, *args, **options):
        api = create_popit_api_object()

        for org in popit_unwrap_pagination(
            api.organizations,
            per_page=100
        ):
            org.pop('versions', None)
            org.pop('memberships', None)
            images = org.get('images', [])
            if len(images) < 2:
                continue
            print "====================================================="
            print len(images), org['id'], org['name'].encode('utf-8')
            for image in images:
                print '  --'
                print '   ' + image['notes'].encode('utf-8')
                print '   ' + image['url']
 def handle(self, **options):
     for o in popit_unwrap_pagination(
             self.api.organizations,
             per_page=100,
             embed='membership.person'
     ):
         if o['classification'] != 'Party':
             continue
         print o['name']
         for image in o.get('images', []):
             print "  DELETE", image['_id']
             self.api.organizations(o['id']).image(image['_id']).delete()
         # The person pages get party images via the
         # membership.organization embed, so invalidate the cache
         # entries for any person who's a member of this party:
         for membership in o.get('memberships', []):
             person = PopItPerson.create_from_dict(membership['person_id'])
             person.invalidate_cache_entries()
Example #28
0
 def handle(self, **options):
     for person_data in popit_unwrap_pagination(
             self.api.persons,
             embed='',
             per_page=100
     ):
         needs_update = False
         for version in person_data.get('versions', []):
             data = version['data']
             if data.get('last_party'):
                 needs_update = True
                 msg = "Fixing person {0}persons/{1}"
                 print msg.format(get_base_url(), person_data['id'])
                 del data['last_party']
         if not needs_update:
             continue
         person = PopItPerson.create_from_dict(person_data)
         person.save_to_popit(self.api)
         person.invalidate_cache_entries()
Example #29
0
    def handle(self, **options):
        from slumber.exceptions import HttpServerError, HttpClientError
        from candidates.popit import popit_unwrap_pagination

        for membership in popit_unwrap_pagination(self.api.memberships,
                                                  embed='',
                                                  per_page=100):
            post_id = membership.get('post_id')
            if not post_id:
                continue
            post = self.api.posts(post_id).get(embed='')['result']
            post_role = post['role']
            # Now find the election that matches this membership and role:
            election_found = False
            for edata in Election.objects.all():
                if edata.for_post_role != post_role:
                    continue
                election_found = True
                membership['election'] = edata.slug
                # Now correct the membership role:
                membership['role'] = edata.candidate_membership_role
                # Some of these memberships have a spurious 'area'
                # attribute, set to: {'area': {'name': ''}} which will
                # cause the PUT to fail, so remove that.
                existing_area = membership.get('area')
                if existing_area is not None:
                    if existing_area == {'name': ''}:
                        del membership['area']
                try:
                    self.api.memberships(membership['id']).put(membership)
                except (HttpServerError, HttpClientError) as e:
                    message = "There was an error when putting to membership: {0}"
                    print message.format(membership['id'])
                    print json.dumps(membership, indent=True, sort_keys=True)
                    print "The error was:"
                    print e.content
                break
            if not election_found:
                message = u"Warning: no election found for membership ID {0}"
                print message.format(membership['id'])
 def handle(self, *args, **options):
     if len(args) != 1:
         msg = "You must supply the prefix for output filenames"
         raise CommandError(msg)
     output_prefix = args[0]
     all_people = []
     election_to_people = defaultdict(list)
     for person_dict in popit_unwrap_pagination(
             self.api.persons,
             embed="membership.organization",
             per_page=100,
     ):
         standing_in = person_dict.get('standing_in')
         if not standing_in:
             continue
         for election in standing_in.keys():
             if not standing_in[election]:
                 continue
             person = PopItPerson.create_from_dict(person_dict)
             person_as_csv_dict = person.as_dict(election=election)
             all_people.append(person_as_csv_dict)
             election_to_people[election].append(person_as_csv_dict)
     elections = election_to_people.keys() + [None]
     for election in elections:
         if election is None:
             output_filename = output_prefix + '-all.csv'
             people_data = all_people
         else:
             output_filename = output_prefix + '-' + election + '.csv'
             people_data = election_to_people[election]
         csv = list_to_csv(people_data)
         # Otherwise write to a temporary file and atomically
         # rename into place:
         ntf = NamedTemporaryFile(
             delete=False,
             dir=dirname(output_filename)
         )
         ntf.write(csv)
         chmod(ntf.name, 0o644)
         rename(ntf.name, output_filename)
    def handle(self, **options):
        from slumber.exceptions import HttpServerError, HttpClientError
        from candidates.popit import popit_unwrap_pagination

        for membership in popit_unwrap_pagination(
                self.api.memberships, embed='', per_page=100
        ):
            post_id = membership.get('post_id')
            if not post_id:
                continue
            post = self.api.posts(post_id).get(embed='')['result']
            post_role = post['role']
            # Now find the election that matches this membership and role:
            election_found = False
            for edata in Election.objects.all():
                if edata.for_post_role != post_role:
                    continue
                election_found = True
                membership['election'] = edata.slug
                # Now correct the membership role:
                membership['role'] = edata.candidate_membership_role
                # Some of these memberships have a spurious 'area'
                # attribute, set to: {'area': {'name': ''}} which will
                # cause the PUT to fail, so remove that.
                existing_area = membership.get('area')
                if existing_area is not None:
                    if existing_area == {'name': ''}:
                        del membership['area']
                try:
                    self.api.memberships(membership['id']).put(membership)
                except (HttpServerError, HttpClientError) as e:
                    message = "There was an error when putting to membership: {0}"
                    print message.format(membership['id'])
                    print json.dumps(membership, indent=True, sort_keys=True)
                    print "The error was:"
                    print e.content
                break
            if not election_found:
                message = u"Warning: no election found for membership ID {0}"
                print message.format(membership['id'])
 def handle(self, **options):
     for person in popit_unwrap_pagination(
             self.api.persons,
             embed='',
             per_page=100
     ):
         needs_update = False
         for version in person.get('versions', []):
             data = version['data']
             if data.get('last_party'):
                 needs_update = True
                 msg = "Fixing person {0}persons/{1}"
                 print msg.format(self.get_base_url(), person['id'])
                 del data['last_party']
         if not needs_update:
             continue
         try:
             self.api.persons(person['id']).put(person)
         except HttpClientError as e:
             print "HttpClientError", e.content
             sys.exit(1)
         invalidate_cache_entries_from_person_data(person)
def get_all_parties(api, when_date):
    '''Fetch all political parties from PopIt divided into active / inactive'''
    result = {}
    for register in ec_registers:
        result[register] = {
            'active': {},
            'inactive': {},
        }
    for org in popit_unwrap_pagination(api.organizations, embed=''):
        if org['classification'] != 'Party':
            continue
        org_register = org.get('register')
        if org_register not in ec_registers:
            message = u"Unknown register {0} in organization: {1}"
            # raise Exception, message.format(org_register, org)
            print (u"Warning:" + message.format(org_register, org)).encode('utf-8')
            continue
        register_dict = result[org_register]
        party_dict = register_dict[
            'active' if organization_active(org, when_date) else 'inactive'
        ]
        party_dict[normalize_party_name(org['name'])] = org
    return result
Example #34
0
 def handle(self, **options):
     for person in popit_unwrap_pagination(
             self.api.persons,
             embed='',
             per_page=100
     ):
         msg = "Person {0}persons/{1}"
         print msg.format(self.get_base_url(), person['id'])
         for image in person.get('images', []):
             print "  Image with URL:", image['url']
             fix_image(image)
             image.pop('_id', None)
             # Some images have an empty 'created' field, which
             # causes an Elasticsearch indexing error, so remove
             # that if it's the case:
             if not image.get('created'):
                 image.pop('created', None)
         fix_dates(person)
         try:
             self.api.persons(person['id']).put(person)
         except HttpClientError as e:
             print "HttpClientError", e.content
             sys.exit(1)
    def handle(self, **options):

        for membership in popit_unwrap_pagination(self.api.memberships, embed="", per_page=100):
            post_id = membership.get("post_id")
            if not post_id:
                continue
            post = self.api.posts(post_id).get(embed="")["result"]
            post_role = post["role"]
            # Now find the election that matches this membership and role:
            election_found = False
            for election, edata in settings.ELECTIONS.items():
                if edata["for_post_role"] != post_role:
                    continue
                election_found = True
                membership["election"] = election
                # Now correct the membership role:
                membership["role"] = edata["candidate_membership_role"]
                # Some of these memberships have a spurious 'area'
                # attribute, set to: {'area': {'name': ''}} which will
                # cause the PUT to fail, so remove that.
                existing_area = membership.get("area")
                if existing_area is not None:
                    if existing_area == {"name": ""}:
                        del membership["area"]
                try:
                    self.api.memberships(membership["id"]).put(membership)
                except (HttpServerError, HttpClientError) as e:
                    message = "There was an error when putting to membership: {0}"
                    print message.format(membership["id"])
                    print json.dumps(membership, indent=True, sort_keys=True)
                    print "The error was:"
                    print e.content
                break
            if not election_found:
                message = u"Warning: no election found for membership ID {0}"
                print message.format(membership["id"])
Example #36
0
    def handle(self, **options):
        all_constituencies = MapItData.constituencies_2010_name_sorted
        all_parties = PartyData.party_id_to_name
        counts = {
            'candidates_2015': 0,
            'parties': {
                name: {
                    'count': 0,
                    'party_id': party_id
                }
                for party_id, name in all_parties.items()
            },
            'constituencies': {
                n[1]['name']: {
                    'count': 0,
                    'con_id': n[1]['id']
                }
                for n in all_constituencies
            },
        }
        count_2010 = 0
        standing_again = 0
        all_people = 0
        new_candidates = 0
        # Loop over everything, counting things we're interseted in
        for person in popit_unwrap_pagination(self.api.persons,
                                              embed="membership.organization",
                                              per_page=100):
            all_people += 1
            if person.get('standing_in') and person['standing_in'].get('2015'):
                counts['candidates_2015'] += 1

                party_name = person['party_memberships']['2015']['name']
                counts['parties'][party_name]['count'] += 1

                constituency_name = person['standing_in']['2015']['name']
                counts['constituencies'][constituency_name]['count'] += 1
                if person['standing_in'].get('2010'):
                    standing_again += 1
            if person.get('standing_in') and person['standing_in'].get('2010'):
                count_2010 += 1
            else:
                new_candidates += 1

        # Add or create objects in the database
        # Parties
        for party_name, data in counts['parties'].items():
            obj = {
                'count_type': 'party',
                'name': party_name,
                'count': data['count'],
                'object_id': data['party_id']
            }

            self.add_or_update(obj)

        # Constituencies
        for constituency_name, data in counts['constituencies'].items():
            obj = {
                'count_type': 'constituency',
                'name': constituency_name,
                'count': data['count'],
                'object_id': data['con_id']
            }

            self.add_or_update(obj)

        # Total candidates in 2015
        obj = {
            'count_type': 'total',
            'name': 'total_2015',
            'count': counts['candidates_2015'],
            'object_id': 'candidates_2015',
        }
        self.add_or_update(obj)

        # New candidates
        obj = {
            'count_type': 'total',
            'name': 'new_candidates',
            'count': new_candidates,
            'object_id': 'new_candidates',
        }
        self.add_or_update(obj)

        # Standing again
        obj = {
            'count_type': 'total',
            'name': 'standing_again',
            'count': standing_again,
            'object_id': 'standing_again',
        }
        self.add_or_update(obj)
    def handle(self, **options):
        all_constituencies = MapItData.constituencies_2010_name_sorted
        all_parties = PartyData.party_id_to_name
        counts = {
            'candidates_2010': 0,
            'candidates_2015': 0,
            'parties': {
                party_id: {'count': 0, 'party_name': name}
                for party_id, name in all_parties.items()
            },
            'constituencies': {
                str(n[1]['id']): {'count': 0, 'con_name': n[1]['name']}
                for n in all_constituencies
            },
        }
        standing_again_same_party = 0
        standing_again_different_party = 0
        all_people = 0
        new_candidates = 0
        # Loop over everything, counting things we're interseted in
        for person in popit_unwrap_pagination(
                self.api.persons,
                embed="membership.organization",
                per_page=100
        ):
            all_people += 1
            if not person.get('standing_in'):
                continue
            if person['standing_in'].get('2010'):
                counts['candidates_2010'] += 1
            if person['standing_in'].get('2015'):
                counts['candidates_2015'] += 1
                party_id = person['party_memberships']['2015']['id']
                if party_id in counts['parties']:
                    counts['parties'][party_id]['count'] += 1
                    constituency_id = person['standing_in']['2015']['post_id']
                    counts['constituencies'][constituency_id]['count'] += 1
                    if person['standing_in'].get('2010'):
                        if party_id == person['party_memberships']['2010']['id']:
                            standing_again_same_party += 1
                        else:
                            standing_again_different_party += 1
                    else:
                        new_candidates += 1
                else:
                    message = u"WARNING: candidate {0} is standing in 2015 " + \
                        "for a dissolved or deleted party {1}"
                    print message.format(person['id'], party_id).encode('utf-8')

        # Add or create objects in the database
        # Parties
        for party_id, data in counts['parties'].items():
            obj = {
                'count_type': 'party',
                'name': data['party_name'],
                'count': data['count'],
                'object_id': party_id

            }

            self.add_or_update(obj)

        # Remove any parties that have dissolved but still might have
        # entries in the database:
        parties_in_database = set(
            CachedCount.objects.filter(count_type='party'). \
                values_list('object_id', flat=True)
        )
        current_parties = set(
            PartyData.party_id_to_name.keys()
        )
        parties_to_remove = parties_in_database - current_parties
        CachedCount.objects.filter(
            count_type='party',
            object_id__in=parties_to_remove
        ).delete()

        # Constituencies
        for constituency_id, data in counts['constituencies'].items():
            obj = {
                'count_type': 'constituency',
                'name': data['con_name'],
                'count': data['count'],
                'object_id': constituency_id

            }

            self.add_or_update(obj)

        for name, count, object_id in (
            ('new_candidates', new_candidates, 'new_candidates'),
            ('total_2010', counts['candidates_2010'], 'candidates_2010'),
            ('total_2015', counts['candidates_2015'], 'candidates_2015'),
            ('standing_again_same_party',
             standing_again_same_party,
             'standing_again_same_party'),
            ('standing_again_different_party',
             standing_again_different_party,
             'standing_again_different_party'),
            ('standing_again',
             standing_again_same_party + standing_again_different_party,
             'standing_again'),
        ):
            obj = {
                'count_type': 'total',
                'name': name,
                'count': count,
                'object_id': object_id,
            }
            self.add_or_update(obj)
 def handle(self, **options):
     max_person_id = -1
     for person in popit_unwrap_pagination(self.api.persons, per_page=100):
         person_id = int(person['id'])
         max_person_id = max(person_id, max_person_id)
     print "Maximum person ID is:", max_person_id
Example #39
0
    def handle(self, **options):
        all_constituencies = MapItData.constituencies_2010_name_sorted
        all_parties = PartyData.party_id_to_name
        counts = {
            'candidates_2010': 0,
            'candidates_2015': 0,
            'parties': {
                party_id: {'count': 0, 'party_name': name}
                for party_id, name in all_parties.items()
            },
            'constituencies': {
                str(n[1]['id']): {'count': 0, 'con_name': n[1]['name']}
                for n in all_constituencies
            },
        }
        standing_again_same_party = 0
        standing_again_different_party = 0
        all_people = 0
        new_candidates = 0
        # Loop over everything, counting things we're interseted in
        for person in popit_unwrap_pagination(
                self.api.persons,
                embed="membership.organization",
                per_page=100
        ):
            all_people += 1
            if not person.get('standing_in'):
                continue
            if person['standing_in'].get('2010'):
                counts['candidates_2010'] += 1
            if person['standing_in'].get('2015'):
                counts['candidates_2015'] += 1
                party_id = person['party_memberships']['2015']['id']
                counts['parties'][party_id]['count'] += 1
                constituency_id = person['standing_in']['2015']['post_id']
                counts['constituencies'][constituency_id]['count'] += 1
                if person['standing_in'].get('2010'):
                    if party_id == person['party_memberships']['2010']['id']:
                        standing_again_same_party += 1
                    else:
                        standing_again_different_party += 1
                else:
                    new_candidates += 1

        # Add or create objects in the database
        # Parties
        for party_id, data in counts['parties'].items():
            obj = {
                'count_type': 'party',
                'name': data['party_name'],
                'count': data['count'],
                'object_id': party_id

            }

            self.add_or_update(obj)

        # Constituencies
        for constituency_id, data in counts['constituencies'].items():
            obj = {
                'count_type': 'constituency',
                'name': data['con_name'],
                'count': data['count'],
                'object_id': constituency_id

            }

            self.add_or_update(obj)

        for name, count, object_id in (
            ('new_candidates', new_candidates, 'new_candidates'),
            ('total_2010', counts['candidates_2010'], 'candidates_2010'),
            ('total_2015', counts['candidates_2015'], 'candidates_2015'),
            ('standing_again_same_party',
             standing_again_same_party,
             'standing_again_same_party'),
            ('standing_again_different_party',
             standing_again_different_party,
             'standing_again_different_party'),
            ('standing_again',
             standing_again_same_party + standing_again_different_party,
             'standing_again'),
        ):
            obj = {
                'count_type': 'total',
                'name': name,
                'count': count,
                'object_id': object_id,
            }
            self.add_or_update(obj)