def _fetch_companies_house(self, identifiers):
        address_parts = ('CareofName', 'PoBox', 'AddressLine1', 'AddressLine2', 'PostTown', 'Postcode', 'County', 'Country',)
        helpers.create_data_folder('companieshouse')

        for identifier in identifiers:
            url = "http://data.companieshouse.gov.uk/doc/company/{}.json".format(identifier.identifier)
            filename = "ch_{}.json".format(identifier.identifier)
            try:
                j = helpers.fetch_json(url, filename, path='companieshouse', refresh=self.refresh)
            except ValueError:
                continue

            org = models.Organization.objects.get(identifiers=identifier)

            org.founding_date = self._parse_date(j['primaryTopic'].get('IncorporationDate'))
            org.dissolution_date = self._parse_date(j['primaryTopic'].get('DissolutionDate'))
            classification = j['primaryTopic']['CompanyCategory']
            if classification is None:
                classification = ''
            org.classification = classification
            org.save()

            # # TODO: Other names
            # name = j['primaryTopic']['CompanyName']
            # other_names = ...

            address = j['primaryTopic'].get('RegAddress')
            if address:
                address = ', '.join([address[k] for k in address_parts if k in address])
 def _bulk_download_lords_interests(self):
     # it’s possible to fetch historical data from mnis. Something like:
     # http://data.parliament.uk/membersdataplatform/services/mnis/members/query/joinedbetween=%sand%s|lordsmemberbetween=%sand%s/Interests%7CPreferredNames/
     # We don’t use this currently
     url = "http://data.parliament.uk/membersdataplatform/services/mnis/members/query/House=Lords/Interests%7CPreferredNames/"
     headers = {"content-type": "application/json"}
     return helpers.fetch_json(url, "lords_interests.json", path="lordsinterests", headers=headers, encoding="utf-8-sig", refresh=self.refresh)
Ejemplo n.º 3
0
    def handle(self, *args, **options):
        self.refresh = options.get('refresh')

        url = "https://cdn.rawgit.com/mysociety/parlparse/master/members/people.json"
        filename = "people.json"
        j = helpers.fetch_json(url, filename, refresh=self.refresh)

        since = options.get('since')
        if since:
            print("Importing since {} ...".format(since))
            # get a very stripped down version of memberships
            memberships = [x for x in j['memberships'] if x.get('end_date', '9999-12-31') >= str(since) and not x.get('redirect')]
            data = {}
            for k in ['persons', 'posts']:
                data[k] = {x['id']: x for x in j[k]}
            # get a stripped down version of persons
            data['persons'] = {x['person_id']: data['persons'][x['person_id']] for x in memberships if'person_id' in x}
            j['persons'] = data['persons'].values()
            # now get all the memberships for which our persons are involved
            j['memberships'] = [x for x in j['memberships'] if x.get('person_id') in data['persons']]
            j['posts'] = {x['post_id']: data['posts'][x['post_id']] for x in j['memberships'] if 'post_id' in x}.values()

        print("Processing people ...")
        j['persons'] = self._process_people(j['persons'])

        print("Processing organizations ...")
        j['organizations'] = self._process_organizations(j['organizations'])

        print("Processing posts ...")
        j['posts'] = self._process_posts(j['posts'], j)

        print("Processing memberships ...")
        self._process_memberships(j['memberships'], j)
Ejemplo n.º 4
0
    def _get_overview_data(self, date):
        date_str = date.strftime("%d/%m/%Y")
        # print("  Fetching MP overview data from TheyWorkForYou (%s) ..." % date_str)

        filename = "mps_overview_{}.json".format(str(date))
        url = "{}/api/getMPs?key={}&date={}".format(self.base_url, self.api_key, date_str)
        mps = helpers.fetch_json(url, filename, path='twfy', refresh=self.refresh)
        return [mp["person_id"] for mp in mps]
Ejemplo n.º 5
0
    def _get_mp_info(self, mp_id):
        filename = "twfy_{}_info.json".format(mp_id)
        extra_fields = ", ".join(["wikipedia_url", "bbc_profile_url", "date_of_birth", "mp_website", "guardian_mp_summary", "journa_list_link"])
        url = "{}/api/getMPInfo?key={}&id={}&fields={}".format(
            self.base_url,
            self.api_key,
            mp_id,
            extra_fields)
        info = helpers.fetch_json(url, filename, path='twfy', refresh=self.refresh)

        filename = "twfy_{}.json".format(mp_id)
        url = "{}/api/getMP?key={}&id={}".format(
            self.base_url,
            self.api_key,
            mp_id)
        info['details'] = helpers.fetch_json(url, filename, path='twfy', refresh=self.refresh)

        return info
    def handle(self, *args, **options):
        self.refresh = options.get('refresh')

        filename = "ep-popolo-v1.0.json"
        url = "https://cdn.rawgit.com/everypolitician/everypolitician-data/master/data/UK/Commons/ep-popolo-v1.0.json"
        j = helpers.fetch_json(url, filename, refresh=self.refresh)

        print("Processing people ...")
        self._process_people(j['persons'])
 def _download_lords_interests(self):
     url = "http://lda.data.parliament.uk/lordsregisteredinterests.json?_view=Registered+Interest&_pageSize=50&_page=0"
     page = 0
     helpers.create_data_folder("lordsinterests")
     data = []
     while url:
         j = helpers.fetch_json(url, "lords_interests_{:02d}.json".format(page), path="lordsinterests", refresh=self.refresh)
         data += j['result']['items']
         url = j['result'].get('next')
         page += 1
     return data
Ejemplo n.º 8
0
    def handle(self, *args, **options):
        self.refresh = options.get('refresh')

        for filename in ["ministers.json", "ministers-2010.json"]:
            url = "https://cdn.rawgit.com/mysociety/parlparse/master/members/{}".format(filename)
            j = helpers.fetch_json(url, filename, refresh=self.refresh)

            since = options.get('since')
            if since:
                print("Importing since {} ...".format(since))
                # get a very stripped down version of memberships
                j['memberships'] = [x for x in j['memberships'] if x.get('end_date', '9999-12-31') >= str(since) and not x.get('redirect')]
                organizations = {x['id']: x for x in j['organizations']}
                # get a stripped down version of organizations
                j['organizations'] = {x['organization_id']: organizations[x['organization_id']] for x in j['memberships'] if'organization_id' in x}.values()

            print("Processing organizations ...")
            j['organizations'] = self._process_organizations(j['organizations'])

            print("Processing ministerial posts ...")
            for membership in j['memberships']:
                self._process_minister(membership, j)
 def _fetch_opencorporates(self, identifiers):
     for identifier in identifiers:
         url = "https://api.opencorporates.com/companies/gb/{}".format(identifier.identifier)
         filename = "oc_{}.json".format(identifier.identifier)
         j = helpers.fetch_json(url, filename, refresh=self.refresh)