def complete(self, initiative: InitiativeImport): post_url = self.config.get_api_post_url(initiative.source_id) detail = self.get(post_url) try: initiative_url_guid = '75aa5e4d-fe98-4a7a-94ec-adab2f7f9b88' result = detail.json() initiative.created_at=parser.parse(result['created']) initiative.scraped_at=datetime.datetime.now() initiative.name=result['title'] initiative.description=result['content'] if initiative_url_guid in result['values']: initiative.url = result['values'][initiative_url_guid][0] initiative.extra_fields = self.parse_extra_fields(result) category_list = [] for tag in result['tags']: category_list.append(self.category_dict[tag['id']]) s = ', ' initiative.category = s.join(category_list) except Exception as ex: msg = f"Error in complete function for initiative {initiative.source_id}" raise ScrapeException(msg) from ex
def complete(self, initiative: InitiativeImport): initiative_url = self.config.get_initiative_url(initiative.source_id) # This already raises ScrapeExceptions detail = PlatformSource.get(initiative_url) try: soup = BeautifulSoup(detail.content, 'html.parser') table = soup.find("dl") records = table.findAll(["dd", "dt"]) initiative.description = soup.find("p").text.strip('\t\n\r ') initiative.group = self.config.group initiative.source = initiative_url set_count = self.extract_details_table(initiative, records) if self.config.group == InitiativeGroup.DEMAND: title = soup.find("h2", "result__title") initiative.organiser = title.contents[0] if not initiative.location: self.try_alternative_place(soup, initiative) except Exception as ex: msg = f"Error reading contents from {initiative_url}" raise ScrapeException(msg) from ex if set_count == 0: raise ScrapeException("Failed to load field map details table")
def complete(self, initiative: InitiativeImport): initiative_url = self.config.get_initiative_url(initiative.source_id) try: detail = PlatformSource.get(initiative_url) soup = BeautifulSoup(detail.content, 'html.parser') table = soup.find("dl") records = table.findAll(["dd", "dt"]) initiative.description = soup.find("p").text.strip('\t\n\r') initiative.group = self.config.group initiative.source = initiative_url setcount = 0 for i in range(0, len(records), 2): # TODO: Error prevention label = records[i].contents[1].strip("\":").lower() if label in self.config.field_map: setattr(initiative, self.config.field_map[label], records[i + 1].contents[0]) setcount += 1 if self.config.group == InitiativeGroup.DEMAND: title = soup.find("h2", "result__title") initiative.organiser = title.contents[0] # TODO: Logging is no values are assigned except ScrapeException as e: # should not catch # ('error scraping ' + initiative_url + ':' + e.args[0]) if initiative is not None: initiative.state = "processing_error"
def complete(self, initiative: InitiativeImport): initiative_url = self.config.get_initiative_url(initiative.source_id) # This already raises ScrapeExceptions detail = self.get(initiative_url) try: soup = BeautifulSoup(detail.content, 'html.parser') table = soup.find("dl") records = table.findAll(["dd", "dt"]) initiative.description = soup.find("p").text.strip('\t\n\r ') initiative.group = self.config.group initiative.source = initiative_url set_count = self.extract_details_table(initiative, records) if self.config.group == InitiativeGroup.DEMAND: title = soup.find("h2", "result__title") initiative.name = title.contents[0] h5nodeOrganization = soup.find("h5", text="Aangesloten bij:") if h5nodeOrganization: initiative.organiser = h5nodeOrganization.find_next_sibling( ).get_text(strip=True) else: h5nodePerson = soup.find("h5", text="Geplaatst door:") if h5nodePerson: initiative.organiser = h5nodePerson.find_next_sibling( ).get_text(strip=True) if not initiative.location: self.try_alternative_place(soup, initiative) except Exception as ex: msg = f"Error reading contents from {initiative_url}" raise ScrapeException(msg) from ex if set_count == 0: raise ScrapeException("Failed to load field map details table")