class Web: handlers = { 'snl.no': SNL, 'ndla.no': NDLA, 'general': General } def __init__(self, http_client=None): self.cache = Cache() def get_handler(self, url): '''Returns an appropriate handler for the url.''' hostname = urlparse(url).hostname return self.handlers.get(hostname, self.handlers['general']) def retrieve_and_parse(self, url): '''Requests and parses response.''' handler_class = self.get_handler(url) handler = handler_class(url) data = handler.request() website = handler.parse(data) return website def read(self, url): '''Returns a Website with metadata.''' if not url.startswith('http'): url = 'http://%s' % url website = self.cache.get_or_set(url, lambda: self.retrieve_and_parse(url)) return website
class Nasjonalbiblioteket: HOST = 'http://www.nb.no' READ = HOST + '/nbsok/content/reference' SEARCH = HOST + '/services/search/v2/search' def __init__(self, http_client=None): self.cache = Cache() self.http_client = http_client or HTTPClient() self.logger = getLogger(self.__class__.__name__) def extract_identifiers(self, xml): '''Extracts identifiers from the sesamids in the entry tags.''' sesam_identifiers = AtomParser(xml).sesam_identifiers urls = [sesam_id.text for sesam_id in sesam_identifiers] return urls def extract_fields(self, endnote): '''Extracts fields from EndNote text.''' endnote_parser = EndNoteParser() fields = endnote_parser.parse(endnote) return fields def read(self, identifier): '''Reads metadata for an identifier.''' parameters = ('id', identifier), ('format', 'enw') raw_endnote = self.cache.get_or_set(identifier, lambda: self.http_client.get(self.READ, parameters) ) return self.extract_fields(raw_endnote) if raw_endnote else {} def read_multiple(self, identifiers): '''Concurrently retrieves metadata for identifiers.''' with ThreadPoolExecutor() as concurrent: endnotes = concurrent.map(self.read, identifiers) return list(endnotes) def search(self, query): '''Finds identifiers that matches query.''' self.logger.info(query) parameters = ('itemsPerPage', 10), ('q', query) xml = self.cache.get_or_set(query, lambda: self.http_client.get(self.SEARCH, parameters) ) return self.extract_identifiers(xml) if xml else []
class Oria: HOST = 'https://bibsys-almaprimo.hosted.exlibrisgroup.com' READ = HOST + '/primo_library/libweb/action/PushToAction.do' SEARCH = HOST + '/primo_library/libweb/action/search.do' def __init__(self, http_client=None): self.cache = Cache() self.http_client = http_client or HTTPClient() self.logger = getLogger(self.__class__.__name__) def extract_identifiers(self, html): '''Extracts BIBSYS identifiers from HTML.''' parser = HTMLResultParser() parser.feed(html or '') identifiers = [identifier for identifier in parser.identifiers \ if identifier.startswith('BIBSYS')] return identifiers def search(self, query): '''Finds identifiers that matches query.''' self.logger.info(query) parameters = ('fn', 'search'), ('vl(freeText0)', query) identifiers = self.cache.get_or_set(query, lambda: self.extract_identifiers(self.http_client.get(self.SEARCH, parameters)) ) return identifiers or [] def read(self, identifier): '''Reads metadata for an identifier.''' endnote_parser = EndNoteParser() data = {'encode': 'UTF-8'} parameters = ('pushToType', 'EndNote'), ('docs', identifier) parsed_endnote = self.cache.get_or_set(identifier, lambda: endnote_parser.parse(self.http_client.post(self.READ, parameters, data)) ) return OriaConverter.convert(parsed_endnote) def read_multiple(self, identifiers): '''Concurrently retrieves metadata for identifiers.''' with ThreadPoolExecutor() as concurrent: endnotes = concurrent.map(self.read, identifiers) return list(endnotes)
def test_returns_existing_value_if_key_exist(self): cache = Cache() value = cache.set('foo', 'rab') value = cache.get_or_set('foo', lambda: 'bar') self.assertEqual(value, 'rab')
def test_returns_value_from_value_func_if_no_key_exist(self): cache = Cache() value = cache.get_or_set('foo', lambda: 'bar') self.assertEqual(value, 'bar')