Python LrmiBase Examples, converter.spiders.lrmi_base.LrmiBase Python Examples

Example #1

0

Show file

 def getValuespaces(self, response):
     valuespaces = LrmiBase.getValuespaces(self, response)
     try:
         range = response.xpath(
             '//ul[@class="sidebar__information"]/li[@class="sidebar__information-item"]/*[contains(@class,"icon-level")]/parent::*//text()'
         ).get().replace('Stufe', '').strip().split(' - ')
         if len(range):
             valuespaces.add_value(
                 'educationalContext',
                 ValuespaceHelper.educationalContextByGrade(range))
     except:
         pass
     try:
         discipline = response.xpath(
             '//ul[@class="sidebar__information"]/li[@class="sidebar__information-item"]/*[contains(@class,"icon-subject")]/parent::*//text()'
         ).getall()
         valuespaces.add_value('discipline', discipline)
     except:
         pass
     lrt = response.meta['item'].get('type')
     valuespaces.add_value('learningResourceType', lrt)
     try:
         toolType = list(
             map(
                 lambda x: x.strip(),
                 response.xpath(
                     '//ul[@class="sidebar__information"]/li[@class="sidebar__information-item"]/*[contains(@class,"icon-settings")]/parent::*//text()'
                 ).getall()))
         # @TODO: proper mapping, maybe specialised tool field?
         valuespaces.add_value('learningResourceType', toolType)
     except:
         pass
     return valuespaces

Example #2

0

Show file

 def getValuespaces(self, response):
     valuespaces = LrmiBase.getValuespaces(self, response)
     disciplines = ["politik", "geschichte"]
     for discipline in disciplines:
         if "/" + discipline in response.url:
             valuespaces.add_value("discipline", discipline)
     return valuespaces

Example #3

0

Show file

 def getLOMGeneral(self, response):
     general = LrmiBase.getLOMGeneral(self, response)
     general.replace_value(
         'title',
         HTMLParser().unescape(response.meta['item'].get('name').strip()))
     #general.add_value('keyword', list(filter(lambda x: x,map(lambda x: x.strip(), response.xpath('//*[@id="ContentModuleApp"]//*[@class="topic-name"]//text()').getall()))))
     return general

Example #4

0

Show file

 def getBase(self, response):
     base = LrmiBase.getBase(self, response)
     #base.replace_value('thumbnail', self.url + '/media/' + response.meta['item'].get('image'))
     base.replace_value(
         'thumbnail', self.url +
         response.xpath('//img[@class="content-info__image"]/@src').get())
     base.replace_value('type', self.getType(response))
     return base

Example #5

0

Show file

 def getLOMTechnical(self, response):
     technical = LrmiBase.getLOMTechnical(self, response)
     technical.replace_value("format", "text/html")
     # technical.add_value("size", self.getLRMI(
     #     "ContentSize", response=response))
     url = self.getLRMI("mainEntityOfPage", response=response)
     if not url:
         url = response.url
     technical.replace_value("location", url)
     return technical

Example #6

0

Show file

 def getLicense(self, response):
     license = LrmiBase.getLicense(self, response)
     license_value: str = response.xpath(
         "//div[@class='cc-license']/a/@href").get()
     if license_value:
         # remove language link from license
         if license_value.endswith("deed.de"):
             license_value = license_value[:-len("deed.de")]
         elif license_value.endswith("de/"):
             license_value = license_value[:-len("de/")]
         license.replace_value("url", license_value)
     return license

Example #7

0

Show file

    def getLOMGeneral(self, response):
        general = LrmiBase.getLOMGeneral(self, response)
        general.replace_value(
            "identifier", self.getLRMI("mainEntityOfPage", response=response))

        # Keywords
        keywords: List[str] = [
            keyword.strip() for keyword in self.getLRMI(
                "keywords", response=response).split(",")
        ]
        general.replace_value("keyword", keywords)

        # Language TODO fill in value by hand or leave empty?
        general.add_value("language",
                          self.getLRMI("inLanguage", response=response))

        # Description
        general.add_value("description",
                          self.getLRMI("description", response=response))
        return general

Example #8

0

Show file

    def getLOMLifecycle(self, response):
        name = self.getLRMI("author", response=response)
        lifecycle = LrmiBase.getLOMLifecycle(self, response)

        if name == "Bundeszentrale für politische Bildung":
            lifecycle.add_value("role", "author")
            # if author organization
            lifecycle.add_value("organization", name)

        elif name == "Redaktion":
            lifecycle.add_value("role", "author")
            # if author organization
            lifecycle.add_value("organization", name)

        elif "Redaktion werkstatt.bpb.de" in name:
            lifecycle.add_value("role", "author")
            # if author organization
            lifecycle.add_value("organization", name)

        elif ", " not in name:
            # maybe one author
            lifecycle.add_value("role", "author")
            author = name.split(" ")
            lifecycle.add_value("firstName", " ".join(author[:-1]).strip())
            lifecycle.add_value("lastName", author[-1].strip())

        elif ", " in name:
            for author_name in name.split(","):
                lifecycle.add_value("role", "author")
                author = author_name.split(" ")
                lifecycle.add_value("firstName", " ".join(author[:-1]).strip())
                lifecycle.add_value("lastName", author[-1].strip())

        elif "und" in name:
            for author_name in name.split("und"):
                lifecycle.add_value("role", "author")
                author = author_name.split(" ")
                lifecycle.add_value("firstName", " ".join(author[:-1]).strip())
                lifecycle.add_value("lastName", author[-1].strip())

        return lifecycle

Example #9

0

Show file

 def __init__(self, **kwargs):
     LrmiBase.__init__(self, **kwargs)

Example #10

0

Show file

 def getLicense(self, response):
     license = LrmiBase.getLicense(self, response)
     return license

Example #11

0

Show file

 def getLOMTechnical(self, response):
     technical = LrmiBase.getLOMTechnical(self, response)
     technical.replace_value('format', 'text/html')
     technical.replace_value('location', response.url)
     return technical

Example #12

0

Show file

 def getLOMEducational(self, response):
     educational = LrmiBase.getLOMEducational(self, response)
     educational.add_value(
         'description',
         HTMLParser().unescape(response.meta['item'].get('teaser')))
     return educational

Example #13

0

Show file

 def getBase(self, response):
     base = LrmiBase.getBase(self, response)
     base.replace_value("thumbnail", None)
     return base

Example #14

0

Show file

 def __init__(self, **kwargs):
     LrmiBase.__init__(self, **kwargs)
     CrawlSpider.__init__(self, **kwargs)

Example #15

0

Show file

 def mapResponse(self, response):
     return LrmiBase.mapResponse(self, response)

Example #16

0

Show file

 def handleEntry(self, response):
     return LrmiBase.parse(self, response)

Example #17

0

Show file

 def __init__(self, **kwargs):
     SitemapSpider.__init__(self)
     LrmiBase.__init__(self, **kwargs)

Example #18

0

Show file

 def parse(self, response):
     return LrmiBase.parse(self, response)

Example #19

0

Show file

File: digitallearninglab_spider.py Project: torsten-simon/oeh-search-etl

 def getLOMTechnical(self, response):
     technical = LrmiBase.getLOMTechnical(self, response)
     technical.replace_value("format", "text/html")
     technical.replace_value("location", response.url)
     return technical