Example #1
0
 def getValuespaces(self, response):
     valuespaces = LrmiBase.getValuespaces(self, response)
     try:
         range = response.xpath(
             '//ul[@class="sidebar__information"]/li[@class="sidebar__information-item"]/*[contains(@class,"icon-level")]/parent::*//text()'
         ).get().replace('Stufe', '').strip().split(' - ')
         if len(range):
             valuespaces.add_value(
                 'educationalContext',
                 ValuespaceHelper.educationalContextByGrade(range))
     except:
         pass
     try:
         discipline = response.xpath(
             '//ul[@class="sidebar__information"]/li[@class="sidebar__information-item"]/*[contains(@class,"icon-subject")]/parent::*//text()'
         ).getall()
         valuespaces.add_value('discipline', discipline)
     except:
         pass
     lrt = response.meta['item'].get('type')
     valuespaces.add_value('learningResourceType', lrt)
     try:
         toolType = list(
             map(
                 lambda x: x.strip(),
                 response.xpath(
                     '//ul[@class="sidebar__information"]/li[@class="sidebar__information-item"]/*[contains(@class,"icon-settings")]/parent::*//text()'
                 ).getall()))
         # @TODO: proper mapping, maybe specialised tool field?
         valuespaces.add_value('learningResourceType', toolType)
     except:
         pass
     return valuespaces
Example #2
0
 def getValuespaces(self, response):
     valuespaces = LrmiBase.getValuespaces(self, response)
     disciplines = ["politik", "geschichte"]
     for discipline in disciplines:
         if "/" + discipline in response.url:
             valuespaces.add_value("discipline", discipline)
     return valuespaces
Example #3
0
 def getLOMGeneral(self, response):
     general = LrmiBase.getLOMGeneral(self, response)
     general.replace_value(
         'title',
         HTMLParser().unescape(response.meta['item'].get('name').strip()))
     #general.add_value('keyword', list(filter(lambda x: x,map(lambda x: x.strip(), response.xpath('//*[@id="ContentModuleApp"]//*[@class="topic-name"]//text()').getall()))))
     return general
Example #4
0
 def getBase(self, response):
     base = LrmiBase.getBase(self, response)
     #base.replace_value('thumbnail', self.url + '/media/' + response.meta['item'].get('image'))
     base.replace_value(
         'thumbnail', self.url +
         response.xpath('//img[@class="content-info__image"]/@src').get())
     base.replace_value('type', self.getType(response))
     return base
Example #5
0
 def getLOMTechnical(self, response):
     technical = LrmiBase.getLOMTechnical(self, response)
     technical.replace_value("format", "text/html")
     # technical.add_value("size", self.getLRMI(
     #     "ContentSize", response=response))
     url = self.getLRMI("mainEntityOfPage", response=response)
     if not url:
         url = response.url
     technical.replace_value("location", url)
     return technical
Example #6
0
 def getLicense(self, response):
     license = LrmiBase.getLicense(self, response)
     license_value: str = response.xpath(
         "//div[@class='cc-license']/a/@href").get()
     if license_value:
         # remove language link from license
         if license_value.endswith("deed.de"):
             license_value = license_value[:-len("deed.de")]
         elif license_value.endswith("de/"):
             license_value = license_value[:-len("de/")]
         license.replace_value("url", license_value)
     return license
Example #7
0
    def getLOMGeneral(self, response):
        general = LrmiBase.getLOMGeneral(self, response)
        general.replace_value(
            "identifier", self.getLRMI("mainEntityOfPage", response=response))

        # Keywords
        keywords: List[str] = [
            keyword.strip() for keyword in self.getLRMI(
                "keywords", response=response).split(",")
        ]
        general.replace_value("keyword", keywords)

        # Language TODO fill in value by hand or leave empty?
        general.add_value("language",
                          self.getLRMI("inLanguage", response=response))

        # Description
        general.add_value("description",
                          self.getLRMI("description", response=response))
        return general
Example #8
0
    def getLOMLifecycle(self, response):
        name = self.getLRMI("author", response=response)
        lifecycle = LrmiBase.getLOMLifecycle(self, response)

        if name == "Bundeszentrale für politische Bildung":
            lifecycle.add_value("role", "author")
            # if author organization
            lifecycle.add_value("organization", name)

        elif name == "Redaktion":
            lifecycle.add_value("role", "author")
            # if author organization
            lifecycle.add_value("organization", name)

        elif "Redaktion werkstatt.bpb.de" in name:
            lifecycle.add_value("role", "author")
            # if author organization
            lifecycle.add_value("organization", name)

        elif ", " not in name:
            # maybe one author
            lifecycle.add_value("role", "author")
            author = name.split(" ")
            lifecycle.add_value("firstName", " ".join(author[:-1]).strip())
            lifecycle.add_value("lastName", author[-1].strip())

        elif ", " in name:
            for author_name in name.split(","):
                lifecycle.add_value("role", "author")
                author = author_name.split(" ")
                lifecycle.add_value("firstName", " ".join(author[:-1]).strip())
                lifecycle.add_value("lastName", author[-1].strip())

        elif "und" in name:
            for author_name in name.split("und"):
                lifecycle.add_value("role", "author")
                author = author_name.split(" ")
                lifecycle.add_value("firstName", " ".join(author[:-1]).strip())
                lifecycle.add_value("lastName", author[-1].strip())

        return lifecycle
Example #9
0
 def __init__(self, **kwargs):
     LrmiBase.__init__(self, **kwargs)
Example #10
0
 def getLicense(self, response):
     license = LrmiBase.getLicense(self, response)
     return license
Example #11
0
 def getLOMTechnical(self, response):
     technical = LrmiBase.getLOMTechnical(self, response)
     technical.replace_value('format', 'text/html')
     technical.replace_value('location', response.url)
     return technical
Example #12
0
 def getLOMEducational(self, response):
     educational = LrmiBase.getLOMEducational(self, response)
     educational.add_value(
         'description',
         HTMLParser().unescape(response.meta['item'].get('teaser')))
     return educational
Example #13
0
 def getBase(self, response):
     base = LrmiBase.getBase(self, response)
     base.replace_value("thumbnail", None)
     return base
Example #14
0
 def __init__(self, **kwargs):
     LrmiBase.__init__(self, **kwargs)
     CrawlSpider.__init__(self, **kwargs)
Example #15
0
 def mapResponse(self, response):
     return LrmiBase.mapResponse(self, response)
Example #16
0
 def handleEntry(self, response):
     return LrmiBase.parse(self, response)
Example #17
0
 def __init__(self, **kwargs):
     SitemapSpider.__init__(self)
     LrmiBase.__init__(self, **kwargs)
Example #18
0
 def parse(self, response):
     return LrmiBase.parse(self, response)
 def getLOMTechnical(self, response):
     technical = LrmiBase.getLOMTechnical(self, response)
     technical.replace_value("format", "text/html")
     technical.replace_value("location", response.url)
     return technical