def getBase(self, response): base = LomBase.getBase(self, response) # Element response as a Python dict. element_dict = dict(response.meta["item"]) base.add_value("thumbnail", element_dict.get("thumbnail", "")) # get or default # As a backup, if no other thumbnail URL is available. element_dict["hardcodedDefaultLogoUrl"] = "/logos/bs_logos/merlin.png" # By the order of preference. As soon as one of these default thumbnails is available you keep that. for default_thumbnail in [ "srcLogoUrl", "logo", "hardcodedDefaultLogoUrl" ]: if default_thumbnail in element_dict: base.add_value( "defaultThumbnail", "https://merlin.nibis.de" + element_dict[default_thumbnail]) break # Adding a default searchable value to constitute this element (node) as a valid-to-be-returned object. base.add_value("searchable", "1") return base
def getBase(self, response): base = LomBase.getBase(self, response) base.replace_value("thumbnail", response.meta["item"]["preview"]["url"]) base.replace_value("origin", self.getProperty("ccm:replicationsource", response)) if self.getProperty("ccm:replicationsource", response): # imported objects usually have the content as binary text # TODO: Sometimes, edu-sharing redirects if no local content is found, and this should be html-parsed try: r = requests.get(response.meta["item"]["downloadUrl"]) if r.status_code == 200: base.replace_value("fulltext", r.text) except: logging.warning( "error fetching data from " + response.meta["item"]["downloadUrl"], sys.exc_info()[0], ) else: # try to transform using alfresco r = requests.get( self.apiUrl + "/node/v1/nodes/" + response.meta["item"]["ref"]["repo"] + "/" + response.meta["item"]["ref"]["id"] + "/textContent", headers={ "Accept": "application/json" }, ).json() if "text" in r: base.replace_value("fulltext", r["text"]) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.add_value( "lastModified", response.meta["item"].xpath("letzte_aenderung//text()").get(), ) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.add_value("lastModified", response.meta["item"]["updatedAt"]) base.add_value( "thumbnail", self.objectUrl + response.meta["item"]["id"] + ".jpg?width=1000", ) return base
def getBase(self, response: Response) -> items.BaseItemLoader: base = LomBase.getBase(self, response) base.add_value("origin", response.meta["row"]["sourceTitle"].strip()) base.add_value("lastModified", response.meta["item"]["snippet"]["publishedAt"]) base.add_value("thumbnail", self.getThumbnailUrl(response)) base.add_value("fulltext", self.getFulltext(response)) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.add_value('thumbnail', self.getLRMI('thumbnailUrl', response=response)) base.add_value( 'lastModified', self.getLRMI('dateModified', 'datePublished', response=response)) return base
def getBase(self, response): base = LomBase.getBase(self, response) #print(response.url) #print(self.get('thumbUrl', response = response)) #print(self.get('thumbUrl', response = response).replace('$1', '@l')) base.add_value('thumbnail', str(self.get('thumbUrl', response = response)).replace('$1', '@l')) base.add_value('lastModified', self.get('date_modified', response = response)) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.add_value("thumbnail", self.getLRMI("thumbnailUrl", response=response)) base.add_value( "lastModified", self.getLRMI("dateModified", "datePublished", response=response), ) return base
def getBase(self, response): base = LomBase.getBase(self, response) thumb = response.xpath('//meta[@property="og:image"]//@content').get() if thumb: base.add_value("thumbnail", self.url + thumb.replace("_350", "_1000")) # base.add_value('thumbnail', self.url + '/Images/Categories/' + str(self.getId(response)) + '_1000.jpg') return base
def getBase(self, response): base = LomBase.getBase(self, response) base.add_value("thumbnail", response.meta["row"][CSVBase.COLUMN_THUMBNAIL]["text"]) base.add_value("collection", response.meta["row"][CSVBase.COLUMN_COLLECTION]["list"]) base.replace_value("type", response.meta["row"][CSVBase.COLUMN_TYPE]["text"]) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.replace_value( 'thumbnail', self.get('acf.thumbnail.url', json=response.meta['item'])) base.replace_value('type', self.getType(response)) fulltext = self.get('acf.long_text', json=response.meta['item']) base.replace_value('fulltext', HTMLParser().unescape(fulltext)) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.replace_value( "thumbnail", self.get("acf.thumbnail.url", json=response.meta["item"]) ) base.replace_value("type", self.getType(response)) fulltext = self.get("acf.long_text", json=response.meta["item"]) base.replace_value("fulltext", HTMLParser().unescape(fulltext)) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.add_value("lastModified", self.get("lastModified.date", response=response)) base.add_value( "ranking", 0.9 + (float(self.get("revisionsCount", response=response)) / 2 + float(self.get("authorsCount", response=response))) / 50, ) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.replace_value( "thumbnail", self.get("acf.thumbnail.url", json=response.meta["item"])) base.replace_value( "fulltext", self.html2Text( self.get("content.rendered", json=response.meta["item"])), ) return base
def getBase(self, response): base = LomBase.getBase(self, response) # Element response as a Python dict. element_dict = response.meta["item"] # TODO: "For licensing reasons, this content is only available to users registered in the Thuringian school # portal." base.add_value("thumbnail", element_dict["previewImageUrl"]) return base
def getBase(self, response): base = LomBase.getBase(self, response) # print(response.url) # print(self.get('thumbUrl', response = response)) # print(self.get('thumbUrl', response = response).replace('$1', '@l')) base.add_value( "thumbnail", str(self.get("thumbUrl", response=response)).replace("$1", "@l"), ) base.add_value("lastModified", self.get("date_modified", response=response)) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.replace_value( "thumbnail", self.get("acf.thumbnail.url", json=response.meta["item"])) base.replace_value("type", self.getType(response)) fulltext = self.get("acf.long_text", json=response.meta["item"]) base.replace_value("fulltext", HTMLParser().unescape(fulltext)) try: notes = '\n'.join( list( map(lambda x: x['notes'], self.get('acf.notizen', json=response.meta["item"])))) base.replace_value('notes', notes) except: pass return base
def getBase(self, response): base = LomBase.getBase(self, response) response.selector.remove_namespaces() record = response.xpath('//OAI-PMH/GetRecord/record') base.add_value( 'fulltext', record.xpath('metadata/lom/general/description/string//text()'). extract_first()) #publisher contributers = record.xpath('metadata/lom/lifeCycle/contribute') for contributer in contributers: role = contributer.xpath('role/value//text()').extract_first() if role == 'publisher': vcardStr = contributer.xpath('entity//text()').extract_first() vcard = vobject.readOne(vcardStr) base.add_value('publisher', vcard.fn.value) return base
def getBase(self, response): base = LomBase.getBase(self, response) response.selector.remove_namespaces() record = response.xpath("//OAI-PMH/GetRecord/record") base.add_value( "fulltext", record.xpath("metadata/lom/general/description/string//text()"). extract_first(), ) thumbnail = record.xpath( 'metadata/lom/relation/kind/value[text()="hasthumbnail"]/parent::*/parent::*/resource/description/string//text()' ).get() if thumbnail: base.add_value("thumbnail", thumbnail) # publisher contributers = record.xpath("metadata/lom/lifeCycle/contribute") for contributer in contributers: role = contributer.xpath("role/value//text()").extract_first() if role == "publisher": vcardStr = contributer.xpath("entity//text()").extract_first() vcard = vobject.readOne(vcardStr) if hasattr(vcard, "fn"): base.add_value("publisher", vcard.fn.value) return base
def getBase(self, response): base = LomBase.getBase(self, response) # optionlly provide thumbnail. If empty, it will tried to be generated from the getLOMTechnical 'location' (if format is 'text/html') # base.add_value('thumbnail', 'https://url/to/thumbnail') return base
def getBase(self, response): base = LomBase.getBase(self, response) base.add_value('lastModified', response.meta['item'].xpath('letzte_aenderung//text()').get()) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.add_value("thumbnail", response.xpath("/data/thumbnail/text()").get()) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.add_value('thumbnail', response.xpath('/data/thumbnail/text()').get()) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.add_value('lastModified', response.meta['item']['updatedAt']) base.add_value('thumbnail', self.url + 'worksheet/' + response.meta['item']['id'] + '.jpg?width=1000') return base
def getBase(self, response): base = LomBase.getBase(self, response) base.add_value('lastModified', self.get('lastModified.date', response = response)) base.add_value('ranking', 0.9 + (float(self.get('revisionsCount', response = response))/2 + float(self.get('authorsCount', response = response)))/50) return base
def getBase(self, response): return LomBase.getBase(self, response)
def getBase(self, response): base = LomBase.getBase(self, response) thumbnail = self.commonProperties['thumbnail'] if thumbnail: base.add_value('thumbnail', thumbnail) return base
def getBase(self, response): base = LomBase.getBase(self, response) base.replace_value("thumbnail", response.meta["item"].xpath("@image").get()) return base
def getBase(self, response: Response) -> items.BaseItemLoader: base = LomBase.getBase(self, response) base.replace_value("type", self.static_values["type"]) if response.meta["row"]["thumbnail"] is not None: base.add_value("thumbnail", response.meta["row"]["thumbnail"]) return base
def getBase(self, response): base = LomBase.getBase(self, response) fulltext = self.get("parse.text.*", json=response.meta["item"]) base.replace_value("fulltext", self.html2Text(fulltext)) # crashes! return base