def complete(self, initiative: InitiativeImport): initiative_url = self.config.get_initiative_url(initiative.source_id) try: detail = PlatformSource.get(initiative_url) soup = BeautifulSoup(detail.content, 'html.parser') table = soup.find("dl") records = table.findAll(["dd", "dt"]) initiative.description = soup.find("p").text.strip('\t\n\r') initiative.group = self.config.group initiative.source = initiative_url setcount = 0 for i in range(0, len(records), 2): # TODO: Error prevention label = records[i].contents[1].strip("\":").lower() if label in self.config.field_map: setattr(initiative, self.config.field_map[label], records[i + 1].contents[0]) setcount += 1 if self.config.group == InitiativeGroup.DEMAND: title = soup.find("h2", "result__title") initiative.organiser = title.contents[0] # TODO: Logging is no values are assigned except ScrapeException as e: # should not catch # ('error scraping ' + initiative_url + ':' + e.args[0]) if initiative is not None: initiative.state = "processing_error"
def map_initiative(response, item): initiative = InitiativeImport( source_id=item.id, source_uri=f"https://wijamsterdam.nl/initiatief/{item.id}", # using dateutil and not datetime because: https://stackoverflow.com/a/3908349/167131 created_at=parser.parse(item.createdAt), name=item.title, description=f"{item.summary}" f"\n--------\n" f"{item.description}", group=InitiativeGroup.SUPPLY, extra_fields=response.content.decode("utf-8") # Probably better to leave email / phone empty # name is already tricky maybe albeit open data. ) if hasattr(item.extraData, "area"): initiative.location = item.extraData.area if hasattr(item.extraData, "isOrganiserName"): initiative.organiser = item.extraData.isOrganiserName if hasattr(item.extraData, "theme"): initiative.category = item.extraData.theme if hasattr(item.extraData, "isOrganiserWebsite"): initiative.url = item.extraData.isOrganiserWebsite if hasattr(item, "position"): initiative.latitude = item.position.lat initiative.longitude = item.position.lng return initiative
def complete(self, initiative: InitiativeImport): initiative_url = self.config.get_initiative_url(initiative.source_id) # This already raises ScrapeExceptions detail = PlatformSource.get(initiative_url) try: soup = BeautifulSoup(detail.content, 'html.parser') table = soup.find("dl") records = table.findAll(["dd", "dt"]) initiative.description = soup.find("p").text.strip('\t\n\r ') initiative.group = self.config.group initiative.source = initiative_url set_count = self.extract_details_table(initiative, records) if self.config.group == InitiativeGroup.DEMAND: title = soup.find("h2", "result__title") initiative.organiser = title.contents[0] if not initiative.location: self.try_alternative_place(soup, initiative) except Exception as ex: msg = f"Error reading contents from {initiative_url}" raise ScrapeException(msg) from ex if set_count == 0: raise ScrapeException("Failed to load field map details table")
def map_initiative(item): org = json.dumps(item) initiative = InitiativeImport( source_id=item["id"], source_uri=f"https://wijamsterdam.nl/initiatief/{item['id']}", # using dateutil and not datetime because: https://stackoverflow.com/a/3908349/167131 created_at=parser.parse(item["createdAt"]), name=item["title"], description=f"{item['summary']}" f"\n--------\n" f"{item['description']}", group=InitiativeGroup.SUPPLY, extra_fields=org # Probably better to leave email / phone empty # name is already tricky maybe albeit open data. ) extra_data = item["extraData"] if "area" in extra_data: initiative.location = extra_data["area"] if "isOrganiserName" in extra_data: initiative.organiser = extra_data["isOrganiserName"] if "theme" in extra_data: initiative.category = extra_data["theme"] if "isOrganiserWebsite" in extra_data: initiative.url = extra_data["isOrganiserWebsite"] if "position" in item: initiative.latitude = item["position"]["lat"] initiative.longitude = item["position"]["lng"] return initiative
def complete(self, initiative: InitiativeImport): initiative_url = self.config.get_initiative_url(initiative.source_id) # This already raises ScrapeExceptions detail = self.get(initiative_url) try: soup = BeautifulSoup(detail.content, 'html.parser') table = soup.find("dl") records = table.findAll(["dd", "dt"]) initiative.description = soup.find("p").text.strip('\t\n\r ') initiative.group = self.config.group initiative.source = initiative_url set_count = self.extract_details_table(initiative, records) if self.config.group == InitiativeGroup.DEMAND: title = soup.find("h2", "result__title") initiative.name = title.contents[0] h5nodeOrganization = soup.find("h5", text="Aangesloten bij:") if h5nodeOrganization: initiative.organiser = h5nodeOrganization.find_next_sibling( ).get_text(strip=True) else: h5nodePerson = soup.find("h5", text="Geplaatst door:") if h5nodePerson: initiative.organiser = h5nodePerson.find_next_sibling( ).get_text(strip=True) if not initiative.location: self.try_alternative_place(soup, initiative) except Exception as ex: msg = f"Error reading contents from {initiative_url}" raise ScrapeException(msg) from ex if set_count == 0: raise ScrapeException("Failed to load field map details table")