コード例 #1
0
    def complete(self, initiative: InitiativeImport):
        initiative_url = self.config.get_initiative_url(initiative.source_id)

        try:
            detail = PlatformSource.get(initiative_url)

            soup = BeautifulSoup(detail.content, 'html.parser')

            table = soup.find("dl")
            records = table.findAll(["dd", "dt"])
            initiative.description = soup.find("p").text.strip('\t\n\r')
            initiative.group = self.config.group
            initiative.source = initiative_url

            setcount = 0
            for i in range(0, len(records), 2):
                # TODO: Error prevention
                label = records[i].contents[1].strip("\":").lower()
                if label in self.config.field_map:
                    setattr(initiative, self.config.field_map[label], records[i + 1].contents[0])
                    setcount += 1

            if self.config.group == InitiativeGroup.DEMAND:
                title = soup.find("h2", "result__title")
                initiative.organiser = title.contents[0]

            # TODO: Logging is no values are assigned
        except ScrapeException as e:
            # should not catch
            # ('error scraping ' + initiative_url + ':' + e.args[0])
            if initiative is not None:
                initiative.state = "processing_error"
コード例 #2
0
    def map_initiative(response, item):
        initiative = InitiativeImport(
            source_id=item.id,
            source_uri=f"https://wijamsterdam.nl/initiatief/{item.id}",
            # using dateutil and not datetime because: https://stackoverflow.com/a/3908349/167131
            created_at=parser.parse(item.createdAt),
            name=item.title,
            description=f"{item.summary}"
            f"\n--------\n"
            f"{item.description}",
            group=InitiativeGroup.SUPPLY,
            extra_fields=response.content.decode("utf-8")
            # Probably better to leave email / phone empty
            # name is already tricky maybe albeit open data.
        )

        if hasattr(item.extraData, "area"):
            initiative.location = item.extraData.area
        if hasattr(item.extraData, "isOrganiserName"):
            initiative.organiser = item.extraData.isOrganiserName
        if hasattr(item.extraData, "theme"):
            initiative.category = item.extraData.theme
        if hasattr(item.extraData, "isOrganiserWebsite"):
            initiative.url = item.extraData.isOrganiserWebsite
        if hasattr(item, "position"):
            initiative.latitude = item.position.lat
            initiative.longitude = item.position.lng

        return initiative
コード例 #3
0
    def complete(self, initiative: InitiativeImport):
        initiative_url = self.config.get_initiative_url(initiative.source_id)
        # This already raises ScrapeExceptions
        detail = PlatformSource.get(initiative_url)

        try:
            soup = BeautifulSoup(detail.content, 'html.parser')

            table = soup.find("dl")
            records = table.findAll(["dd", "dt"])
            initiative.description = soup.find("p").text.strip('\t\n\r ')
            initiative.group = self.config.group
            initiative.source = initiative_url

            set_count = self.extract_details_table(initiative, records)

            if self.config.group == InitiativeGroup.DEMAND:
                title = soup.find("h2", "result__title")
                initiative.organiser = title.contents[0]

            if not initiative.location:
                self.try_alternative_place(soup, initiative)
        except Exception as ex:
            msg = f"Error reading contents from {initiative_url}"
            raise ScrapeException(msg) from ex

        if set_count == 0:
            raise ScrapeException("Failed to load field map details table")
コード例 #4
0
ファイル: wijamsterdam.py プロジェクト: ictmotion/helpradar
    def map_initiative(item):
        org = json.dumps(item)
        initiative = InitiativeImport(
            source_id=item["id"],
            source_uri=f"https://wijamsterdam.nl/initiatief/{item['id']}",
            # using dateutil and not datetime because: https://stackoverflow.com/a/3908349/167131
            created_at=parser.parse(item["createdAt"]),
            name=item["title"],
            description=f"{item['summary']}"
            f"\n--------\n"
            f"{item['description']}",
            group=InitiativeGroup.SUPPLY,
            extra_fields=org
            # Probably better to leave email / phone empty
            # name is already tricky maybe albeit open data.
        )

        extra_data = item["extraData"]
        if "area" in extra_data:
            initiative.location = extra_data["area"]
        if "isOrganiserName" in extra_data:
            initiative.organiser = extra_data["isOrganiserName"]
        if "theme" in extra_data:
            initiative.category = extra_data["theme"]
        if "isOrganiserWebsite" in extra_data:
            initiative.url = extra_data["isOrganiserWebsite"]
        if "position" in item:
            initiative.latitude = item["position"]["lat"]
            initiative.longitude = item["position"]["lng"]

        return initiative
コード例 #5
0
ファイル: nlvoorelkaar.py プロジェクト: ictmotion/helpradar
    def complete(self, initiative: InitiativeImport):
        initiative_url = self.config.get_initiative_url(initiative.source_id)
        # This already raises ScrapeExceptions
        detail = self.get(initiative_url)

        try:
            soup = BeautifulSoup(detail.content, 'html.parser')

            table = soup.find("dl")
            records = table.findAll(["dd", "dt"])
            initiative.description = soup.find("p").text.strip('\t\n\r ')
            initiative.group = self.config.group
            initiative.source = initiative_url

            set_count = self.extract_details_table(initiative, records)

            if self.config.group == InitiativeGroup.DEMAND:
                title = soup.find("h2", "result__title")
                initiative.name = title.contents[0]

            h5nodeOrganization = soup.find("h5", text="Aangesloten bij:")
            if h5nodeOrganization:
                initiative.organiser = h5nodeOrganization.find_next_sibling(
                ).get_text(strip=True)
            else:
                h5nodePerson = soup.find("h5", text="Geplaatst door:")
                if h5nodePerson:
                    initiative.organiser = h5nodePerson.find_next_sibling(
                    ).get_text(strip=True)

            if not initiative.location:
                self.try_alternative_place(soup, initiative)
        except Exception as ex:
            msg = f"Error reading contents from {initiative_url}"
            raise ScrapeException(msg) from ex

        if set_count == 0:
            raise ScrapeException("Failed to load field map details table")