コード例 #1
0
ファイル: coronapioniers.py プロジェクト: ictmotion/helpradar
    def complete(self, initiative: InitiativeImport):
        post_url = self.config.get_api_post_url(initiative.source_id)
        detail = self.get(post_url)

        try:
            initiative_url_guid = '75aa5e4d-fe98-4a7a-94ec-adab2f7f9b88'

            result = detail.json()
            initiative.created_at=parser.parse(result['created'])
            initiative.scraped_at=datetime.datetime.now()
            
            initiative.name=result['title']
            initiative.description=result['content']

            if initiative_url_guid in result['values']:
                initiative.url = result['values'][initiative_url_guid][0]
            
            initiative.extra_fields = self.parse_extra_fields(result)

            category_list = []
            for tag in result['tags']:
                category_list.append(self.category_dict[tag['id']])
            s = ', '
            initiative.category = s.join(category_list)

        except Exception as ex:
            msg = f"Error in complete function for initiative {initiative.source_id}"
            raise ScrapeException(msg) from ex
コード例 #2
0
ファイル: nlvoorelkaar.py プロジェクト: ictmotion/helpradar
    def complete(self, initiative: InitiativeImport):
        initiative_url = self.config.get_initiative_url(initiative.source_id)
        # This already raises ScrapeExceptions
        detail = self.get(initiative_url)

        try:
            soup = BeautifulSoup(detail.content, 'html.parser')

            table = soup.find("dl")
            records = table.findAll(["dd", "dt"])
            initiative.description = soup.find("p").text.strip('\t\n\r ')
            initiative.group = self.config.group
            initiative.source = initiative_url

            set_count = self.extract_details_table(initiative, records)

            if self.config.group == InitiativeGroup.DEMAND:
                title = soup.find("h2", "result__title")
                initiative.name = title.contents[0]

            h5nodeOrganization = soup.find("h5", text="Aangesloten bij:")
            if h5nodeOrganization:
                initiative.organiser = h5nodeOrganization.find_next_sibling(
                ).get_text(strip=True)
            else:
                h5nodePerson = soup.find("h5", text="Geplaatst door:")
                if h5nodePerson:
                    initiative.organiser = h5nodePerson.find_next_sibling(
                    ).get_text(strip=True)

            if not initiative.location:
                self.try_alternative_place(soup, initiative)
        except Exception as ex:
            msg = f"Error reading contents from {initiative_url}"
            raise ScrapeException(msg) from ex

        if set_count == 0:
            raise ScrapeException("Failed to load field map details table")