def scrape(self):

        # load json
        data_str, data = self.get_data()
        print("found %i %s" % (len(data['features']), self.table))

        # clear any existing data
        truncate(self.table)

        # grab field names
        fields = data['fields']

        for feature in data['features']:

            # assemble record
            record = {
                'council_id': self.council_id,
                'geometry': self.make_geometry(feature),
            }
            for field in fields:
                value = feature['attributes'][field['name']]
                if isinstance(value, str):
                    record[field['name']] = value.strip()
                else:
                    record[field['name']] = value

            # save to db
            save([self.key], record, self.table)

        sync_db_to_github(self.council_id, self.table, self.key)

        # print summary
        summarise(self.table)

        self.store_history(data_str, self.council_id)
예제 #2
0
    def scrape(self):

        if not isinstance(self.pk, list):
            self.pk = [self.pk]

        # load xml
        data_str = self.get_data()
        tree = etree.fromstring(data_str)
        features = tree.findall(self.feature_tag)
        print("found %i %s" % (len(features), self.table))

        # clear any existing data
        truncate(self.table)

        for feature in features:
            record = self.process_feature(feature, tree)

            # save to db
            save(self.pk, record, self.table)

        sync_db_to_github(self.council_id, self.table, self.pk)

        # print summary
        summarise(self.table)

        self.store_history(data_str, self.council_id)
    def scrape(self):

        self.url = "%s%s" % (self.base_url, self.dataset)
        return_url = None

        # load json
        data_str, data = self.get_data()
        print("found %i %s resources" %
              (len(data['result']['resources']), self.dataset))

        for resource in data['result']['resources']:

            # assemble record
            record = {
                'format': resource['format'],
                'revision_id': resource['revision_id'],
                'created': resource['created'],
                'url': resource['url'],
                'dataset': self.dataset,
            }
            for field in self.extra_fields:
                record[field] = resource[field]

            # save to db
            save(['dataset', 'revision_id', 'format'], record, 'resources')

            if resource['format'].lower() == self.return_format.lower():
                return_url = resource['url']

        sync_file_to_github(self.council_id, self.dataset,
                            format_json(data_str.decode(self.encoding)))

        return return_url
    def scrape(self):

        # load json
        data_str, data = self.get_data()
        print("found %i %s" % (len(data['features']), self.table))

        # clear any existing data
        truncate(self.table)

        for feature in data['features']:

            # assemble record
            record = {
                'council_id': self.council_id,
                'geometry': self.make_geometry(feature),
            }
            if self.key is None:
                record['pk'] = feature['id']
            else:
                record['pk'] = feature['properties'][self.key]

            for field in feature['properties']:
                value = feature['properties'][field]
                if value is None or isinstance(value, Number) or isinstance(value, str):
                    if isinstance(value, str):
                        record[field] = value.strip()
                    else:
                        record[field] = value

            # save to db
            save(['pk'], record, self.table)

        if self.key is None:
            sync_db_to_github(self.council_id, self.table, 'pk')
        else:
            sync_db_to_github(self.council_id, self.table, self.key)

        # print summary
        summarise(self.table)

        self.store_history(data_str, self.council_id)