def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.add_value('title', response.xpath('//title//text()').get())
     general.add_value(
         'language',
         response.xpath('//meta[@property="og:locale"]/@content').get())
     return general
    def getLOMGeneral(self, response):
        general = LomBase.getLOMGeneral(self, response)
        general.add_value("title", response.xpath("/data/titel/text()").get())
        general.add_value("description",
                          response.xpath("/data/beschreibung/text()").get())

        return general
Exemple #3
0
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response=response)
     general.add_value("title", self.get("title", response=response))
     general.add_value("keyword", self.getKeywords(response))
     general.add_value("description",
                       self.get("description", response=response))
     return general
Exemple #4
0
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.add_value("title", response.meta["item"].get("Name").strip())
     general.add_value(
         "keyword",
         list(
             filter(
                 lambda x: x,
                 map(
                     lambda x: x.strip(),
                     response.xpath(
                         '//*[@id="ContentModuleApp"]//*[@class="topic-name"]//text()'
                     ).getall(),
                 ),
             )),
     )
     description = "\n".join(
         list(
             filter(
                 lambda x: x,
                 map(
                     lambda x: x.strip(),
                     response.xpath(
                         '//*[@id="ContentModuleApp"]//*[@content-module-type="inlinetext"]//p//text()'
                     ).getall(),
                 ),
             ))).strip()
     general.add_value("description", description)
     return general
 def getLOMGeneral(self, response):
   general = LomBase.getLOMGeneral(self, response)
   general.add_value('identifier', self.get('id', response = response))
   general.add_value('title', self.get('title', response = response))
   general.add_value('keyword', self.get('keywords', response = response))
   general.add_value('language', self.get('language', response = response))
   return general
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.add_value('identifier',
                       response.meta['item'].xpath('guid//text()').get())
     general.add_value('title',
                       response.meta['item'].xpath('title//text()').get())
     general.add_value('language', self.commonProperties['language'])
     return general
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.add_value("identifier", self.get("id", response=response))
     general.add_value("title", self.get("title", response=response))
     general.add_value("keyword", self.get("keywords", response=response))
     general.add_value("language", self.get("language", response=response))
     general.add_value("description", self.get("description", response=response))
     return general
Exemple #8
0
 def getLOMGeneral(self, response: Response) -> items.LomGeneralItemloader:
     general = LomBase.getLOMGeneral(self, response)
     general.add_value("title", response.meta["row"]["title"])
     general.add_value("description", response.meta["row"]["description"])
     general.add_value(
         "keyword", self.parse_csv_field(response.meta["row"]["keywords"]))
     general.add_value("language", self.static_values["language"])
     return general
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.replace_value("title", response.meta["item"]["title"])
     general.add_value("keyword",
                       self.getProperty("cclom:general_keyword", response))
     general.add_value(
         "description",
         self.getProperty("cclom:general_description", response))
     return general
Exemple #10
0
    def getLOMGeneral(self, response):
        general = LomBase.getLOMGeneral(self, response)
        general.add_value("title", response.xpath("/data/titel/text()").get())
        general.add_value("description",
                          response.xpath("/data/beschreibung/text()").get())

        # Adding a default aggregationLevel, which can be used during filtering queries.
        general.add_value("aggregationLevel", "1")

        return general
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.add_value("identifier", self.getLRMI("identifier", response=response))
     general.add_value("title", self.getLRMI("name", "headline", response=response))
     general.add_value("keyword", self.getLRMI("keywords", response=response))
     general.add_value("language", self.getLRMI("inLanguage", response=response))
     general.add_value(
         "description", self.getLRMI("description", "about", response=response)
     )
     return general
Exemple #12
0
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.replace_value(
         'title',
         HTMLParser().unescape(
             self.get('title.rendered', json=response.meta['item'])))
     keywords = self.get('tags', json=response.meta['item'])
     if keywords:
         keywords = list(map(lambda x: self.keywords[x], keywords))
         general.add_value('keyword', keywords)
     return general
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.add_value("title",
                       response.meta["row"][CSVBase.COLUMN_TITLE]["text"])
     general.replace_value(
         "language", response.meta["row"][CSVBase.COLUMN_LANGUAGE]["text"])
     general.add_value("keyword",
                       response.meta["row"][CSVBase.COLUMN_KEYWORD]["list"])
     general.add_value(
         "description",
         response.meta["row"][CSVBase.COLUMN_DESCRIPTION]["text"])
     return general
Exemple #14
0
 def getLOMGeneral(self, response: Response) -> items.LomGeneralItemloader:
     general = LomBase.getLOMGeneral(self, response)
     general.add_value("title", response.meta["item"]["snippet"]["title"])
     general.add_value("description", self.getDescription(response))
     general.add_value(
         "keyword", self.parse_csv_field(response.meta["row"]["keyword"]))
     if "tags" in response.meta["item"]["snippet"]:
         general.add_value("keyword",
                           response.meta["item"]["snippet"]["tags"])
     general.add_value(
         "language", self.parse_csv_field(response.meta["row"]["language"]))
     return general
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.add_value('title', response.meta['item'].get('Name').strip())
     general.add_value(
         'keyword',
         list(
             filter(
                 lambda x: x,
                 map(
                     lambda x: x.strip(),
                     response.xpath(
                         '//*[@id="ContentModuleApp"]//*[@class="topic-name"]//text()'
                     ).getall()))))
     return general
Exemple #16
0
    def getLOMGeneral(self, response):
        response.selector.remove_namespaces()
        record = response.xpath('//OAI-PMH/GetRecord/record')

        general = LomBase.getLOMGeneral(response)
        general.add_value(
            'identifier',
            record.xpath('header/identifier//text()').extract_first())
        general.add_value(
            'title',
            record.xpath(
                'metadata/lom/general/title/string//text()').extract_first())
        keywords = record.xpath(
            'metadata/lom/general/keyword/string//text()').getall()
        general.add_value('keyword', keywords)
        return general
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.add_value(
         "identifier", response.meta["item"].xpath("guid//text()").get()
     )
     general.add_value(
         "title", response.meta["item"].xpath("title//text()").get().strip()
     )
     general.add_value("language", self.commonProperties["language"])
     description = response.meta["item"].xpath("description//text()").get()
     if not description:
         description = (
             response.meta["item"].xpath('//*[name()="summary"]//text()').get()
         )
     general.add_value("description", description)
     return general
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.replace_value(
         "title", self.get("parse.title", json=response.meta["item"]))
     keywords = self.get("parse.links", json=response.meta["item"])
     if keywords:
         keywords = list(map(lambda x: x["*"], keywords))
         general.add_value("keyword", keywords)
     props = self.get("parse.properties")
     if props:
         description = list(
             map(
                 lambda x: x["*"],
                 filter(lambda x: x["name"] == "description", props),
             ))
         general.add_value("description", description)
     return general
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.replace_value(
         "title",
         HTMLParser().unescape(
             self.get("title.rendered", json=response.meta["item"])),
     )
     keywords = self.get("tags", json=response.meta["item"])
     if keywords:
         keywords = list(map(lambda x: self.keywords[x], keywords))
         general.add_value("keyword", keywords)
     general.add_value(
         "description",
         HTMLParser().unescape(
             self.get("acf.short_text", json=response.meta["item"])),
     )
     return general
Exemple #20
0
    def getLOMGeneral(self, response):
        general = LomBase.getLOMGeneral(self, response)

        # Element response as a Python dict.
        element_dict = response.meta["item"]

        # TODO: Decide which title. Do we have to construct the title, by concatenating multiple from the provided ones?
        # Einzeltitel, einzeluntertitel, serientitel, serienuntertitel
        general.add_value("title", element_dict["einzeltitel"])
        # self._if_exists_add(general, element_dict, "description", "kurzinhalt")
        if "kurzinhalt" in element_dict:
            general.add_value("description", element_dict["kurzinhalt"])

        liste_stichwort = (element_dict["listeStichwort"]
                           if "listeStichwort" in element_dict else None)
        if liste_stichwort is not None and len(liste_stichwort) > 0:
            general.add_value("keyword", liste_stichwort)

        return general
    def getLOMGeneral(self, response):
        general = LomBase.getLOMGeneral(self, response)
        general.replace_value(
            "title",
            self.html2Text(
                self.get("title.rendered", json=response.meta["item"])),
        )

        general.add_value(
            "description",
            self.html2Text(
                self.get("excerpt.rendered",
                         json=response.meta["item"])).replace(
                             "… weiterlesen …", ""),
        )
        cat = self.get("categories", json=response.meta["item"])
        if cat:
            general.add_value("keyword",
                              list(map(lambda x: self.categories[x], cat)))
        return general
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.add_value(
         "title",
         HTMLParser().unescape(
             response.meta["item"].xpath("titel//text()").get()),
     )
     general.add_value("language",
                       response.meta["item"].xpath("sprache//text()").get())
     general.add_value(
         "keyword",
         HTMLParser().unescape(response.meta["item"].xpath(
             "schlagwort//text()").get()).split("; "),
     )
     desc = response.meta["item"].xpath(
         "beschreibung//text()").get().strip()
     # dirty cleaning of invalid descriptions
     # not perfect yet, these objects also appear inside the content
     if not desc.startswith("swiffyobject_"):
         general.add_value("description", HTMLParser().unescape(desc))
     return general
Exemple #23
0
    def getLOMGeneral(self, response):
        general = LomBase.getLOMGeneral(self, response)
        general.add_value("title", response.meta["item"]["name"])
        if 'description' in response.meta["item"]:
            general.add_value("description",
                              response.meta["item"]["description"])
        else:
            html = self.getUrlData(response.url)["html"]
            if html:
                data = (Selector(text=html).xpath(
                    '//ul[contains(@class,"worksheet-pages")]//text()').getall(
                    ))
                cutoff = 4
                if len(data) > cutoff:
                    for i in range(cutoff):
                        del data[0]

                text = " ".join(data)
                text = text[:1000]
                general.add_value("description", text)
        return general
Exemple #24
0
    def getLOMGeneral(self, response):
        general = LomBase.getLOMGeneral(self, response)

        # Element response as a Python dict.
        element_dict = response.meta["item"]

        general.add_value("title", element_dict["title"])

        general.add_value("aggregationLevel",
                          element_dict["aggregation_level"])

        # self._if_exists_add(general, element_dict, "description", "kurzinhalt")
        if "kurzinhalt" in element_dict:
            general.add_value("description", element_dict["kurzinhalt"])

        liste_stichwort = (element_dict["listeStichwort"]
                           if "listeStichwort" in element_dict else None)
        if liste_stichwort is not None and len(liste_stichwort) > 0:
            general.add_value("keyword", liste_stichwort)

        return general
Exemple #25
0
    def getLOMGeneral(self, response):
        response.selector.remove_namespaces()
        record = response.xpath("//OAI-PMH/GetRecord/record")

        general = LomBase.getLOMGeneral(response)
        general.add_value(
            "identifier",
            record.xpath("header/identifier//text()").extract_first())
        general.add_value(
            "title",
            record.xpath(
                "metadata/lom/general/title/string//text()").extract_first(),
        )
        general.add_value(
            "description",
            record.xpath("metadata/lom/general/description/string//text()").
            extract_first(),
        )
        keywords = record.xpath(
            "metadata/lom/general/keyword/string//text()").getall()
        general.add_value("keyword", keywords)
        return general
Exemple #26
0
 def getLOMGeneral(self, response):
     general = LomBase.getLOMGeneral(self, response)
     general.add_value(
         "title",
         HTMLParser().unescape(response.meta["item"].xpath("@title").get()),
     )
     general.add_value(
         "description",
         self.html2Text(response.meta["item"].xpath("@task").get()))
     general.add_value("language",
                       response.meta["item"].xpath("@language").get())
     general.add_value("keyword",
                       response.meta["item"].xpath("@tags").get())
     # TODO: Maybe later in a vocabulary
     try:
         general.add_value(
             'keyword',
             list(
                 map(lambda x: self.subcategories[x],
                     response.meta["item"].xpath("@subcategory").getall())))
     except:
         pass
     return general
 def getLOMGeneral(self, response):
   general = LomBase.getLOMGeneral(self, response)
   general.add_value('title', response.meta['item']['name'])
   return general
Exemple #28
0
 def getLOMGeneral(self, response):
   general = LomBase.getLOMGeneral(self, response = response)
   general.add_value('title', self.get('title', response = response))
   general.add_value('keyword', self.getKeywords(response))
   return general
Exemple #29
0
 def getLOMGeneral(self, response):
   general = LomBase.getLOMGeneral(self, response)
   general.add_value('title', HTMLParser().unescape(response.meta['item'].xpath('titel//text()').get()))
   general.add_value('language', response.meta['item'].xpath('sprache//text()').get())
   general.add_value('keyword', HTMLParser().unescape(response.meta['item'].xpath('schlagwort//text()').get()).split('; '))
   return general
Exemple #30
0
    def getLOMGeneral(self, response):
        general = LomBase.getLOMGeneral(self, response)
        general.add_value('title', response.xpath('/data/titel/text()').get())

        return general