Esempio n. 1
0
    def getBase(self, response):
        base = LomBase.getBase(self, response)

        # Element response as a Python dict.
        element_dict = dict(response.meta["item"])

        base.add_value("thumbnail", element_dict.get("thumbnail",
                                                     ""))  # get or default

        # As a backup, if no other thumbnail URL is available.
        element_dict["hardcodedDefaultLogoUrl"] = "/logos/bs_logos/merlin.png"

        # By the order of preference. As soon as one of these default thumbnails is available you keep that.
        for default_thumbnail in [
                "srcLogoUrl", "logo", "hardcodedDefaultLogoUrl"
        ]:
            if default_thumbnail in element_dict:
                base.add_value(
                    "defaultThumbnail", "https://merlin.nibis.de" +
                    element_dict[default_thumbnail])
                break

        # Adding a default searchable value to constitute this element (node) as a valid-to-be-returned object.
        base.add_value("searchable", "1")

        return base
    def getBase(self, response):
        base = LomBase.getBase(self, response)
        base.replace_value("thumbnail",
                           response.meta["item"]["preview"]["url"])
        base.replace_value("origin",
                           self.getProperty("ccm:replicationsource", response))
        if self.getProperty("ccm:replicationsource", response):
            # imported objects usually have the content as binary text
            # TODO: Sometimes, edu-sharing redirects if no local content is found, and this should be html-parsed
            try:
                r = requests.get(response.meta["item"]["downloadUrl"])
                if r.status_code == 200:
                    base.replace_value("fulltext", r.text)
            except:
                logging.warning(
                    "error fetching data from " +
                    response.meta["item"]["downloadUrl"],
                    sys.exc_info()[0],
                )
        else:
            # try to transform using alfresco
            r = requests.get(
                self.apiUrl + "/node/v1/nodes/" +
                response.meta["item"]["ref"]["repo"] + "/" +
                response.meta["item"]["ref"]["id"] + "/textContent",
                headers={
                    "Accept": "application/json"
                },
            ).json()
            if "text" in r:
                base.replace_value("fulltext", r["text"])

        return base
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     base.add_value(
         "lastModified",
         response.meta["item"].xpath("letzte_aenderung//text()").get(),
     )
     return base
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     base.add_value("lastModified", response.meta["item"]["updatedAt"])
     base.add_value(
         "thumbnail",
         self.objectUrl + response.meta["item"]["id"] + ".jpg?width=1000",
     )
     return base
Esempio n. 5
0
 def getBase(self, response: Response) -> items.BaseItemLoader:
     base = LomBase.getBase(self, response)
     base.add_value("origin", response.meta["row"]["sourceTitle"].strip())
     base.add_value("lastModified",
                    response.meta["item"]["snippet"]["publishedAt"])
     base.add_value("thumbnail", self.getThumbnailUrl(response))
     base.add_value("fulltext", self.getFulltext(response))
     return base
Esempio n. 6
0
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     base.add_value('thumbnail',
                    self.getLRMI('thumbnailUrl', response=response))
     base.add_value(
         'lastModified',
         self.getLRMI('dateModified', 'datePublished', response=response))
     return base
 def getBase(self, response):
   base = LomBase.getBase(self, response)
   #print(response.url)
   #print(self.get('thumbUrl', response = response))
   #print(self.get('thumbUrl', response = response).replace('$1', '@l'))
   base.add_value('thumbnail', str(self.get('thumbUrl', response = response)).replace('$1', '@l'))
   base.add_value('lastModified', self.get('date_modified', response = response))
   return base
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     base.add_value("thumbnail", self.getLRMI("thumbnailUrl", response=response))
     base.add_value(
         "lastModified",
         self.getLRMI("dateModified", "datePublished", response=response),
     )
     return base
Esempio n. 9
0
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     thumb = response.xpath('//meta[@property="og:image"]//@content').get()
     if thumb:
         base.add_value("thumbnail",
                        self.url + thumb.replace("_350", "_1000"))
     # base.add_value('thumbnail', self.url + '/Images/Categories/' + str(self.getId(response)) + '_1000.jpg')
     return base
Esempio n. 10
0
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     base.add_value("thumbnail",
                    response.meta["row"][CSVBase.COLUMN_THUMBNAIL]["text"])
     base.add_value("collection",
                    response.meta["row"][CSVBase.COLUMN_COLLECTION]["list"])
     base.replace_value("type",
                        response.meta["row"][CSVBase.COLUMN_TYPE]["text"])
     return base
Esempio n. 11
0
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     base.replace_value(
         'thumbnail',
         self.get('acf.thumbnail.url', json=response.meta['item']))
     base.replace_value('type', self.getType(response))
     fulltext = self.get('acf.long_text', json=response.meta['item'])
     base.replace_value('fulltext', HTMLParser().unescape(fulltext))
     return base
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     base.replace_value(
         "thumbnail", self.get("acf.thumbnail.url", json=response.meta["item"])
     )
     base.replace_value("type", self.getType(response))
     fulltext = self.get("acf.long_text", json=response.meta["item"])
     base.replace_value("fulltext", HTMLParser().unescape(fulltext))
     return base
Esempio n. 13
0
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     base.add_value("lastModified",
                    self.get("lastModified.date", response=response))
     base.add_value(
         "ranking",
         0.9 + (float(self.get("revisionsCount", response=response)) / 2 +
                float(self.get("authorsCount", response=response))) / 50,
     )
     return base
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     base.replace_value(
         "thumbnail",
         self.get("acf.thumbnail.url", json=response.meta["item"]))
     base.replace_value(
         "fulltext",
         self.html2Text(
             self.get("content.rendered", json=response.meta["item"])),
     )
     return base
    def getBase(self, response):
        base = LomBase.getBase(self, response)

        # Element response as a Python dict.
        element_dict = response.meta["item"]

        # TODO: "For licensing reasons, this content is only available to users registered in the Thuringian school
        #  portal."
        base.add_value("thumbnail", element_dict["previewImageUrl"])

        return base
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     # print(response.url)
     # print(self.get('thumbUrl', response = response))
     # print(self.get('thumbUrl', response = response).replace('$1', '@l'))
     base.add_value(
         "thumbnail",
         str(self.get("thumbUrl", response=response)).replace("$1", "@l"),
     )
     base.add_value("lastModified", self.get("date_modified", response=response))
     return base
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     base.replace_value(
         "thumbnail",
         self.get("acf.thumbnail.url", json=response.meta["item"]))
     base.replace_value("type", self.getType(response))
     fulltext = self.get("acf.long_text", json=response.meta["item"])
     base.replace_value("fulltext", HTMLParser().unescape(fulltext))
     try:
         notes = '\n'.join(
             list(
                 map(lambda x: x['notes'],
                     self.get('acf.notizen', json=response.meta["item"]))))
         base.replace_value('notes', notes)
     except:
         pass
     return base
Esempio n. 18
0
    def getBase(self, response):
        base = LomBase.getBase(self, response)
        response.selector.remove_namespaces()
        record = response.xpath('//OAI-PMH/GetRecord/record')
        base.add_value(
            'fulltext',
            record.xpath('metadata/lom/general/description/string//text()').
            extract_first())

        #publisher
        contributers = record.xpath('metadata/lom/lifeCycle/contribute')
        for contributer in contributers:
            role = contributer.xpath('role/value//text()').extract_first()
            if role == 'publisher':
                vcardStr = contributer.xpath('entity//text()').extract_first()
                vcard = vobject.readOne(vcardStr)
                base.add_value('publisher', vcard.fn.value)
        return base
Esempio n. 19
0
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     response.selector.remove_namespaces()
     record = response.xpath("//OAI-PMH/GetRecord/record")
     base.add_value(
         "fulltext",
         record.xpath("metadata/lom/general/description/string//text()").
         extract_first(),
     )
     thumbnail = record.xpath(
         'metadata/lom/relation/kind/value[text()="hasthumbnail"]/parent::*/parent::*/resource/description/string//text()'
     ).get()
     if thumbnail:
         base.add_value("thumbnail", thumbnail)
     # publisher
     contributers = record.xpath("metadata/lom/lifeCycle/contribute")
     for contributer in contributers:
         role = contributer.xpath("role/value//text()").extract_first()
         if role == "publisher":
             vcardStr = contributer.xpath("entity//text()").extract_first()
             vcard = vobject.readOne(vcardStr)
             if hasattr(vcard, "fn"):
                 base.add_value("publisher", vcard.fn.value)
     return base
Esempio n. 20
0
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     # optionlly provide thumbnail. If empty, it will tried to be generated from the getLOMTechnical 'location' (if format is 'text/html')
     # base.add_value('thumbnail', 'https://url/to/thumbnail')
     return base
Esempio n. 21
0
 def getBase(self, response):
   base = LomBase.getBase(self, response)
   base.add_value('lastModified', response.meta['item'].xpath('letzte_aenderung//text()').get())
   return base
Esempio n. 22
0
    def getBase(self, response):
        base = LomBase.getBase(self, response)
        base.add_value("thumbnail",
                       response.xpath("/data/thumbnail/text()").get())

        return base
Esempio n. 23
0
    def getBase(self, response):
        base = LomBase.getBase(self, response)
        base.add_value('thumbnail',
                       response.xpath('/data/thumbnail/text()').get())

        return base
Esempio n. 24
0
 def getBase(self, response):
   base = LomBase.getBase(self, response)
   base.add_value('lastModified', response.meta['item']['updatedAt'])
   base.add_value('thumbnail', self.url + 'worksheet/' + response.meta['item']['id'] + '.jpg?width=1000')
   return base
Esempio n. 25
0
 def getBase(self, response):
   base = LomBase.getBase(self, response)
   base.add_value('lastModified', self.get('lastModified.date', response = response))
   base.add_value('ranking', 0.9 + (float(self.get('revisionsCount', response = response))/2 + float(self.get('authorsCount', response = response)))/50)
   return base
 def getBase(self, response):
     return LomBase.getBase(self, response)
Esempio n. 27
0
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     thumbnail = self.commonProperties['thumbnail']
     if thumbnail:
         base.add_value('thumbnail', thumbnail)
     return base
Esempio n. 28
0
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     base.replace_value("thumbnail",
                        response.meta["item"].xpath("@image").get())
     return base
Esempio n. 29
0
 def getBase(self, response: Response) -> items.BaseItemLoader:
     base = LomBase.getBase(self, response)
     base.replace_value("type", self.static_values["type"])
     if response.meta["row"]["thumbnail"] is not None:
         base.add_value("thumbnail", response.meta["row"]["thumbnail"])
     return base
Esempio n. 30
0
 def getBase(self, response):
     base = LomBase.getBase(self, response)
     fulltext = self.get("parse.text.*", json=response.meta["item"])
     base.replace_value("fulltext", self.html2Text(fulltext))  # crashes!
     return base