def scrape(self): # load json data_str, data = self.get_data() print("found %i %s" % (len(data['features']), self.table)) # clear any existing data truncate(self.table) # grab field names fields = data['fields'] for feature in data['features']: # assemble record record = { 'council_id': self.council_id, 'geometry': self.make_geometry(feature), } for field in fields: value = feature['attributes'][field['name']] if isinstance(value, str): record[field['name']] = value.strip() else: record[field['name']] = value # save to db save([self.key], record, self.table) sync_db_to_github(self.council_id, self.table, self.key) # print summary summarise(self.table) self.store_history(data_str, self.council_id)
def scrape(self): if not isinstance(self.pk, list): self.pk = [self.pk] # load xml data_str = self.get_data() tree = etree.fromstring(data_str) features = tree.findall(self.feature_tag) print("found %i %s" % (len(features), self.table)) # clear any existing data truncate(self.table) for feature in features: record = self.process_feature(feature, tree) # save to db save(self.pk, record, self.table) sync_db_to_github(self.council_id, self.table, self.pk) # print summary summarise(self.table) self.store_history(data_str, self.council_id)
def scrape(self): self.url = "%s%s" % (self.base_url, self.dataset) return_url = None # load json data_str, data = self.get_data() print("found %i %s resources" % (len(data['result']['resources']), self.dataset)) for resource in data['result']['resources']: # assemble record record = { 'format': resource['format'], 'revision_id': resource['revision_id'], 'created': resource['created'], 'url': resource['url'], 'dataset': self.dataset, } for field in self.extra_fields: record[field] = resource[field] # save to db save(['dataset', 'revision_id', 'format'], record, 'resources') if resource['format'].lower() == self.return_format.lower(): return_url = resource['url'] sync_file_to_github(self.council_id, self.dataset, format_json(data_str.decode(self.encoding))) return return_url
def scrape(self): # load json data_str, data = self.get_data() print("found %i %s" % (len(data['features']), self.table)) # clear any existing data truncate(self.table) for feature in data['features']: # assemble record record = { 'council_id': self.council_id, 'geometry': self.make_geometry(feature), } if self.key is None: record['pk'] = feature['id'] else: record['pk'] = feature['properties'][self.key] for field in feature['properties']: value = feature['properties'][field] if value is None or isinstance(value, Number) or isinstance(value, str): if isinstance(value, str): record[field] = value.strip() else: record[field] = value # save to db save(['pk'], record, self.table) if self.key is None: sync_db_to_github(self.council_id, self.table, 'pk') else: sync_db_to_github(self.council_id, self.table, self.key) # print summary summarise(self.table) self.store_history(data_str, self.council_id)