Example #1
0
class Web:

    handlers = {
        'snl.no': SNL,
        'ndla.no': NDLA,
        'general': General
    }

    def __init__(self, http_client=None):
        self.cache = Cache()

    def get_handler(self, url):
        '''Returns an appropriate handler for the url.'''
        hostname = urlparse(url).hostname
        return self.handlers.get(hostname, self.handlers['general'])

    def retrieve_and_parse(self, url):
        '''Requests and parses response.'''
        handler_class = self.get_handler(url)
        handler = handler_class(url)
        data = handler.request()
        website = handler.parse(data)
        return website

    def read(self, url):
        '''Returns a Website with metadata.'''
        if not url.startswith('http'):
            url = 'http://%s' % url
        website = self.cache.get_or_set(url, lambda: self.retrieve_and_parse(url))
        return website
class Nasjonalbiblioteket:

    HOST = 'http://www.nb.no'
    READ = HOST + '/nbsok/content/reference'
    SEARCH = HOST + '/services/search/v2/search'

    def __init__(self, http_client=None):
        self.cache = Cache()
        self.http_client = http_client or HTTPClient()
        self.logger = getLogger(self.__class__.__name__)

    def extract_identifiers(self, xml):
        '''Extracts identifiers from the sesamids in the entry tags.'''
        sesam_identifiers = AtomParser(xml).sesam_identifiers
        urls = [sesam_id.text for sesam_id in sesam_identifiers]
        return urls

    def extract_fields(self, endnote):
        '''Extracts fields from EndNote text.'''
        endnote_parser = EndNoteParser()
        fields = endnote_parser.parse(endnote)
        return fields

    def read(self, identifier):
        '''Reads metadata for an identifier.'''
        parameters = ('id', identifier), ('format', 'enw')
        raw_endnote = self.cache.get_or_set(identifier,
            lambda: self.http_client.get(self.READ, parameters)
        )
        return self.extract_fields(raw_endnote) if raw_endnote else {}

    def read_multiple(self, identifiers):
        '''Concurrently retrieves metadata for identifiers.'''
        with ThreadPoolExecutor() as concurrent:
            endnotes = concurrent.map(self.read, identifiers)
            return list(endnotes)

    def search(self, query):
        '''Finds identifiers that matches query.'''
        self.logger.info(query)
        parameters = ('itemsPerPage', 10), ('q', query)
        xml = self.cache.get_or_set(query,
            lambda: self.http_client.get(self.SEARCH, parameters)
        )
        return self.extract_identifiers(xml) if xml else []
Example #3
0
class Oria:

    HOST = 'https://bibsys-almaprimo.hosted.exlibrisgroup.com'
    READ = HOST + '/primo_library/libweb/action/PushToAction.do'
    SEARCH = HOST + '/primo_library/libweb/action/search.do'

    def __init__(self, http_client=None):
        self.cache = Cache()
        self.http_client = http_client or HTTPClient()
        self.logger = getLogger(self.__class__.__name__)

    def extract_identifiers(self, html):
        '''Extracts BIBSYS identifiers from HTML.'''
        parser = HTMLResultParser()
        parser.feed(html or '')
        identifiers = [identifier for identifier in parser.identifiers \
            if identifier.startswith('BIBSYS')]
        return identifiers

    def search(self, query):
        '''Finds identifiers that matches query.'''
        self.logger.info(query)
        parameters = ('fn', 'search'), ('vl(freeText0)', query)
        identifiers = self.cache.get_or_set(query, lambda:
            self.extract_identifiers(self.http_client.get(self.SEARCH, parameters))
        )
        return identifiers or []

    def read(self, identifier):
        '''Reads metadata for an identifier.'''
        endnote_parser = EndNoteParser()
        data = {'encode': 'UTF-8'}
        parameters = ('pushToType', 'EndNote'), ('docs', identifier)
        parsed_endnote = self.cache.get_or_set(identifier, lambda: 
            endnote_parser.parse(self.http_client.post(self.READ, parameters, data))
        )
        return OriaConverter.convert(parsed_endnote)

    def read_multiple(self, identifiers):
        '''Concurrently retrieves metadata for identifiers.'''
        with ThreadPoolExecutor() as concurrent:
            endnotes = concurrent.map(self.read, identifiers)
            return list(endnotes)
Example #4
0
 def test_returns_existing_value_if_key_exist(self):
     cache = Cache()
     value = cache.set('foo', 'rab')
     value = cache.get_or_set('foo', lambda: 'bar')
     self.assertEqual(value, 'rab')
Example #5
0
 def test_returns_value_from_value_func_if_no_key_exist(self):
     cache = Cache()
     value = cache.get_or_set('foo', lambda: 'bar')
     self.assertEqual(value, 'bar')