def _fetch_companies_house(self, identifiers): address_parts = ('CareofName', 'PoBox', 'AddressLine1', 'AddressLine2', 'PostTown', 'Postcode', 'County', 'Country',) helpers.create_data_folder('companieshouse') for identifier in identifiers: url = "http://data.companieshouse.gov.uk/doc/company/{}.json".format(identifier.identifier) filename = "ch_{}.json".format(identifier.identifier) try: j = helpers.fetch_json(url, filename, path='companieshouse', refresh=self.refresh) except ValueError: continue org = models.Organization.objects.get(identifiers=identifier) org.founding_date = self._parse_date(j['primaryTopic'].get('IncorporationDate')) org.dissolution_date = self._parse_date(j['primaryTopic'].get('DissolutionDate')) classification = j['primaryTopic']['CompanyCategory'] if classification is None: classification = '' org.classification = classification org.save() # # TODO: Other names # name = j['primaryTopic']['CompanyName'] # other_names = ... address = j['primaryTopic'].get('RegAddress') if address: address = ', '.join([address[k] for k in address_parts if k in address])
def _bulk_download_lords_interests(self): # it’s possible to fetch historical data from mnis. Something like: # http://data.parliament.uk/membersdataplatform/services/mnis/members/query/joinedbetween=%sand%s|lordsmemberbetween=%sand%s/Interests%7CPreferredNames/ # We don’t use this currently url = "http://data.parliament.uk/membersdataplatform/services/mnis/members/query/House=Lords/Interests%7CPreferredNames/" headers = {"content-type": "application/json"} return helpers.fetch_json(url, "lords_interests.json", path="lordsinterests", headers=headers, encoding="utf-8-sig", refresh=self.refresh)
def handle(self, *args, **options): self.refresh = options.get('refresh') url = "https://cdn.rawgit.com/mysociety/parlparse/master/members/people.json" filename = "people.json" j = helpers.fetch_json(url, filename, refresh=self.refresh) since = options.get('since') if since: print("Importing since {} ...".format(since)) # get a very stripped down version of memberships memberships = [x for x in j['memberships'] if x.get('end_date', '9999-12-31') >= str(since) and not x.get('redirect')] data = {} for k in ['persons', 'posts']: data[k] = {x['id']: x for x in j[k]} # get a stripped down version of persons data['persons'] = {x['person_id']: data['persons'][x['person_id']] for x in memberships if'person_id' in x} j['persons'] = data['persons'].values() # now get all the memberships for which our persons are involved j['memberships'] = [x for x in j['memberships'] if x.get('person_id') in data['persons']] j['posts'] = {x['post_id']: data['posts'][x['post_id']] for x in j['memberships'] if 'post_id' in x}.values() print("Processing people ...") j['persons'] = self._process_people(j['persons']) print("Processing organizations ...") j['organizations'] = self._process_organizations(j['organizations']) print("Processing posts ...") j['posts'] = self._process_posts(j['posts'], j) print("Processing memberships ...") self._process_memberships(j['memberships'], j)
def _get_overview_data(self, date): date_str = date.strftime("%d/%m/%Y") # print(" Fetching MP overview data from TheyWorkForYou (%s) ..." % date_str) filename = "mps_overview_{}.json".format(str(date)) url = "{}/api/getMPs?key={}&date={}".format(self.base_url, self.api_key, date_str) mps = helpers.fetch_json(url, filename, path='twfy', refresh=self.refresh) return [mp["person_id"] for mp in mps]
def _get_mp_info(self, mp_id): filename = "twfy_{}_info.json".format(mp_id) extra_fields = ", ".join(["wikipedia_url", "bbc_profile_url", "date_of_birth", "mp_website", "guardian_mp_summary", "journa_list_link"]) url = "{}/api/getMPInfo?key={}&id={}&fields={}".format( self.base_url, self.api_key, mp_id, extra_fields) info = helpers.fetch_json(url, filename, path='twfy', refresh=self.refresh) filename = "twfy_{}.json".format(mp_id) url = "{}/api/getMP?key={}&id={}".format( self.base_url, self.api_key, mp_id) info['details'] = helpers.fetch_json(url, filename, path='twfy', refresh=self.refresh) return info
def handle(self, *args, **options): self.refresh = options.get('refresh') filename = "ep-popolo-v1.0.json" url = "https://cdn.rawgit.com/everypolitician/everypolitician-data/master/data/UK/Commons/ep-popolo-v1.0.json" j = helpers.fetch_json(url, filename, refresh=self.refresh) print("Processing people ...") self._process_people(j['persons'])
def _download_lords_interests(self): url = "http://lda.data.parliament.uk/lordsregisteredinterests.json?_view=Registered+Interest&_pageSize=50&_page=0" page = 0 helpers.create_data_folder("lordsinterests") data = [] while url: j = helpers.fetch_json(url, "lords_interests_{:02d}.json".format(page), path="lordsinterests", refresh=self.refresh) data += j['result']['items'] url = j['result'].get('next') page += 1 return data
def handle(self, *args, **options): self.refresh = options.get('refresh') for filename in ["ministers.json", "ministers-2010.json"]: url = "https://cdn.rawgit.com/mysociety/parlparse/master/members/{}".format(filename) j = helpers.fetch_json(url, filename, refresh=self.refresh) since = options.get('since') if since: print("Importing since {} ...".format(since)) # get a very stripped down version of memberships j['memberships'] = [x for x in j['memberships'] if x.get('end_date', '9999-12-31') >= str(since) and not x.get('redirect')] organizations = {x['id']: x for x in j['organizations']} # get a stripped down version of organizations j['organizations'] = {x['organization_id']: organizations[x['organization_id']] for x in j['memberships'] if'organization_id' in x}.values() print("Processing organizations ...") j['organizations'] = self._process_organizations(j['organizations']) print("Processing ministerial posts ...") for membership in j['memberships']: self._process_minister(membership, j)
def _fetch_opencorporates(self, identifiers): for identifier in identifiers: url = "https://api.opencorporates.com/companies/gb/{}".format(identifier.identifier) filename = "oc_{}.json".format(identifier.identifier) j = helpers.fetch_json(url, filename, refresh=self.refresh)