Beispiel #1
0
def test_get_metaxml(monkeypatch, mock_memcache):
    import StringIO
    import urllib2

    metadata_json = None

    def urlopen(url):
        return StringIO.StringIO(metadata_json)

    monkeypatch.setattr(urllib2, "urlopen", urlopen)

    # test with correct xml
    metadata_json = """{
        "metadata": {
            "title": "Foo", 
            "identifier": "foo00bar",
            "collection": ["printdisabled", "inlibrary"]
        }
    }
    """

    print ia.get_meta_xml("foo00bar")
    assert ia.get_meta_xml("foo00bar") == {
        "title": "Foo",
        "identifier": "foo00bar",
        "collection": ["printdisabled", "inlibrary"],
        "access-restricted": False,
        "_filenames": []
    }

    # test with metadata errors
    metadata_json = "{}"
    assert ia.get_meta_xml("foo02bar") == {}
Beispiel #2
0
def test_get_metaxml(monkeypatch, mock_memcache):
    import StringIO
    import urllib2

    metadata_json = None
    def urlopen(url):
        return StringIO.StringIO(metadata_json)

    monkeypatch.setattr(urllib2, "urlopen", urlopen)

    # test with correct xml
    metadata_json = """{
        "metadata": {
            "title": "Foo",
            "identifier": "foo00bar",
            "collection": ["printdisabled", "inlibrary"]
        }
    }
    """

    print(ia.get_meta_xml("foo00bar"))
    assert ia.get_meta_xml("foo00bar") == {
        "title": "Foo",
        "identifier": "foo00bar",
        "collection": ["printdisabled", "inlibrary"],
        "access-restricted": False,
        "_filenames": []
    }

    # test with metadata errors
    metadata_json = "{}"
    assert ia.get_meta_xml("foo02bar") == {}
Beispiel #3
0
def format_book_data(book):
    d = web.storage()
    d.key = book.get('key')
    d.url = book.url()
    d.title = book.title or None
    d.ocaid = book.get("ocaid")

    def get_authors(doc):
        return [web.storage(key=a.key, name=a.name or None) for a in doc.get_authors()]

    work = book.works and book.works[0]
    d.authors = get_authors(work if work else book)
    cover = work.get_cover() if work and work.get_cover() else book.get_cover()

    if cover:
        d.cover_url = cover.url("M")
    elif d.ocaid:
        d.cover_url = 'https://archive.org/services/img/%s' % d.ocaid

    if d.ocaid:
        collections = ia.get_meta_xml(d.ocaid).get("collection", [])

        if 'lendinglibrary' in collections or 'inlibrary' in collections:
            d.borrow_url = book.url("/borrow")
        else:
            d.read_url = book.url("/borrow")
    return d
Beispiel #4
0
def format_book_data(book):
    d = web.storage()
    d.key = book.get('key')
    d.url = book.url()
    d.title = book.title or None
    d.ocaid = book.get("ocaid")

    def get_authors(doc):
        return [
            web.storage(key=a.key, name=a.name or None)
            for a in doc.get_authors()
        ]

    work = book.works and book.works[0]
    d.authors = get_authors(work if work else book)
    cover = work.get_cover() if work and work.get_cover() else book.get_cover()

    if cover:
        d.cover_url = cover.url("M")
    elif d.ocaid:
        d.cover_url = 'https://archive.org/services/img/%s' % d.ocaid

    if d.ocaid:
        collections = ia.get_meta_xml(d.ocaid).get("collection", [])

        if 'lendinglibrary' in collections or 'inlibrary' in collections:
            d.borrow_url = book.url("/borrow")
        else:
            d.read_url = book.url("/borrow")
    return d
Beispiel #5
0
def format_book_data(book):
    d = web.storage()
    d.key = book.key
    d.url = book.url()
    d.title = book.title or None

    def get_authors(doc):
        return [web.storage(key=a.key, name=a.name or None) for a in doc.get_authors()]

    work = book.works and book.works[0]
    if work:
        d.authors = get_authors(work)
    else:
        d.authors = get_authors(book)

    cover = book.get_cover()
    if cover:
        d.cover_url = cover.url("M")

    ia_id = book.get("ocaid")
    if ia_id:
        collections = ia.get_meta_xml(ia_id).get("collection", [])
        if 'printdisabled' in collections or 'lendinglibrary' in collections:
            d.daisy_url = book.url("/daisy")

        if 'lendinglibrary' in collections:
            d.borrow_url = book.url("/borrow")
        elif 'inlibrary' in collections:
            d.inlibrary_borrow_url = book.url("/borrow")
        else:
            d.read_url = book.url("/borrow")
    return d
Beispiel #6
0
def get_ia_availability(itemid):
    collections = ia.get_meta_xml(itemid).get("collection", [])

    if 'lendinglibrary' in collections:
        return 'borrow'
    elif 'printdisabled' in collections:
        return 'restricted'
    else:
        return 'full'
Beispiel #7
0
def get_ia_availability(itemid):
    collections = ia.get_meta_xml(itemid).get("collection", [])

    if 'lendinglibrary' in collections:
        return 'borrow'
    elif 'printdisabled' in collections:
        return 'restricted'
    else:
        return 'full'
Beispiel #8
0
def test_get_metaxml(monkeypatch, mock_memcache):
    import StringIO
    import urllib2
    
    metaxml = None
    def urlopen(url):
        return StringIO.StringIO(metaxml)
    
    monkeypatch.setattr(urllib2, "urlopen", urlopen)
    
    
    # test with correct xml
    metaxml = """<?xml version="1.0" encoding="UTF-8"?>
    <metadata>
        <title>Foo</title>
        <identifier>foo00bar</identifier>
        <collection>printdisabled</collection>
        <collection>inlibrary</collection>
    </metadata>
    """
    
    assert ia.get_meta_xml("foo00bar") == {
        "title": "Foo", 
        "identifier": "foo00bar",
        "collection": ["printdisabled", "inlibrary"],
        'external-identifier': [],
    }
    
    # test with html errors
    metaxml = """<html>\n<head>\n <title>Internet Archive: Error</title>..."""
    assert ia.get_meta_xml("foo01bar") == {}
    
    # test with bad xml
    metaxml = """<?xml version="1.0" encoding="UTF-8"?>
    <metadata>
        <title>Foo</title>
        <identifier>foo00bar
    """
    assert ia.get_meta_xml("foo02bar") == {}
Beispiel #9
0
    def get_ia_meta_fields(self):
        # Check for cached value
        # $$$ we haven't assigned _ia_meta_fields the first time around but there's apparently
        #     some magic that lets us check this way (and breaks using hasattr to check if defined)
        if self._ia_meta_fields:
            return self._ia_meta_fields

        if not self.get('ocaid', None):
            meta = {}
        else:
            meta = ia.get_meta_xml(self.ocaid)
            meta.setdefault("external-identifier", [])
            meta.setdefault("collection", [])

        self._ia_meta_fields = meta
        return self._ia_meta_fields
Beispiel #10
0
    def get_ia_meta_fields(self):
        # Check for cached value
        # $$$ we haven't assigned _ia_meta_fields the first time around but there's apparently
        #     some magic that lets us check this way (and breaks using hasattr to check if defined)
        if self._ia_meta_fields:
            return self._ia_meta_fields

        if not self.get('ocaid', None):
            meta = {}
        else:
            meta = ia.get_meta_xml(self.ocaid)
            meta.setdefault("external-identifier", [])
            meta.setdefault("collection", [])

        self._ia_meta_fields = meta
        return self._ia_meta_fields
Beispiel #11
0
def format_book_data(book):
    d = web.storage()
    d.key = book.key
    d.url = book.url()
    d.title = book.title or None

    def get_authors(doc):
        return [
            web.storage(key=a.key, name=a.name or None)
            for a in doc.get_authors()
        ]

    work = book.works and book.works[0]
    if work:
        d.authors = get_authors(work)
    else:
        d.authors = get_authors(book)

    cover = book.get_cover()
    if cover:
        d.cover_url = cover.url("M")

    overdrive = book.get("identifiers", {}).get('overdrive')
    if overdrive:
        d.overdrive_url = "http://search.overdrive.com/SearchResults.aspx?ReserveID={%s}" % overdrive

    ia_id = book.get("ocaid")
    if ia_id:
        collections = ia.get_meta_xml(ia_id).get("collection", [])
        if 'printdisabled' in collections or 'lendinglibrary' in collections:
            d.daisy_url = book.url("/daisy")

        if 'lendinglibrary' in collections:
            d.borrow_url = book.url("/borrow")
        elif 'inlibrary' in collections:
            d.inlibrary_borrow_url = book.url("/borrow")
        else:
            d.read_url = book.url("/borrow")
    return d
Beispiel #12
0
def format_edition(edition):
    """This should be moved to a books or carousel model"""
    collections = ia.get_meta_xml(edition.get('ocaid')).get("collection", [])
    book = {
        'ocaid':
        edition.get('ocaid'),
        'title':
        edition.title or None,
        'key':
        edition.key,
        'url':
        edition.url(),
        'authors': [
            web.storage(key=a.key, name=a.name or None)
            for a in edition.get_authors()
        ],
        'collections':
        collections,
        'protected':
        any([
            c in collections
            for c in ['lendinglibrary', 'browserlending', 'inlibrary']
        ])
    }

    cover = edition.get_cover()
    if cover:
        book['cover_url'] = cover.url(u'M')

    if 'printdisabled' in collections or 'lendinglibrary' in collections:
        book['daisy_url'] = edition.url("/daisy")
    if 'lendinglibrary' in collections:
        book['borrow_url'] = edition.url("/borrow")
    elif 'inlibrary' in collections:
        book['inlibrary_borrow_url'] = edition.url("/borrow")
    else:
        book['read_url'] = "//archive.org/stream/" + book['ocaid']
    return book
Beispiel #13
0
def format_book_data(book):
    d = web.storage()
    d.key = book.key
    d.url = book.url()
    d.title = book.title or None
    
    def get_authors(doc):
        return [web.storage(key=a.key, name=a.name or None) for a in doc.get_authors()]
        
    work = book.works and book.works[0]
    if work:
        d.authors = get_authors(work)
    else:
        d.authors = get_authors(book)

    cover = book.get_cover()
    if cover:
        d.cover_url = cover.url("M")
        
    overdrive = book.get("identifiers", {}).get('overdrive')
    if overdrive:
        d.overdrive_url = "http://search.overdrive.com/SearchResults.aspx?ReserveID={%s}" % overdrive

    ia_id = book.get("ocaid")
    if ia_id:
        collections = ia.get_meta_xml(ia_id).get("collection", [])
        if 'printdisabled' in collections or 'lendinglibrary' in collections:
            d.daisy_url = book.url("/daisy")
            
        if 'lendinglibrary' in collections:
            d.borrow_url = book.url("/borrow")
        elif 'inlibrary' in collections:
            d.inlibrary_borrow_url = book.url("/borrow")
        else:
            d.read_url = book.url("/borrow")
    return d
Beispiel #14
0
def format_book_data(book):
    d = web.storage()
    d.key = book.key
    d.url = book.url()
    d.title = book.title or None

    def get_authors(doc):
        return [
            web.storage(key=a.key, name=a.name or None)
            for a in doc.get_authors()
        ]

    work = book.works and book.works[0]
    if work:
        d.authors = get_authors(work)
    else:
        d.authors = get_authors(book)

    cover = book.get_cover()
    if cover:
        d.cover_url = cover.url("M")

    ia_id = book.get("ocaid")
    if ia_id:
        d.ocaid = ia_id
        collections = ia.get_meta_xml(ia_id).get("collection", [])
        if 'printdisabled' in collections or 'lendinglibrary' in collections:
            d.daisy_url = book.url("/daisy")

        if 'lendinglibrary' in collections:
            d.borrow_url = book.url("/borrow")
        elif 'inlibrary' in collections:
            d.inlibrary_borrow_url = book.url("/borrow")
        else:
            d.read_url = book.url("/borrow")
    return d
Beispiel #15
0
    def process(self, req):
        requests = req.split('|')
        bib_keys = sum([r.split(';') for r in requests], [])

        # filter out 'id:foo' before passing to dynlinks
        bib_keys = [k for k in bib_keys if k[:3].lower() != 'id:']

        self.docs = dynlinks.query_docs(bib_keys)
        if not self.options.get('no_details'):
            self.detailss = dynlinks.process_result_for_details(self.docs)
        else:
            self.detailss = {}
        dp = dynlinks.DataProcessor()
        self.datas = dp.process(self.docs)
        self.works = dp.works

        # XXX control costs below with [:iaid_limit] - note that this may result
        # in no 'exact' item match, even if one exists
        # Note that it's available thru above works/docs
        iaid_limit = 500
        self.wkey_to_iaids = dict((wkey, get_work_iaids(wkey)[:iaid_limit])
                                  for wkey in self.works)
        iaids = sum(self.wkey_to_iaids.values(), [])
        self.iaid_to_meta = dict((iaid, ia.get_meta_xml(iaid)) for iaid in iaids)

        def lookup_iaids(iaids):
            step = 10
            if len(iaids) > step and not self.options.get('debug_things'):
                result = []
                while iaids:
                    result += lookup_iaids(iaids[:step])
                    iaids = iaids[step:]
                return result
            query = {
                'type': '/type/edition',
                'ocaid': iaids,
            }
            result = web.ctx.site.things(query)
            return result

        ekeys = lookup_iaids(iaids)

        # If returned order were reliable, I could skip the below.
        eds = dynlinks.ol_get_many_as_dict(ekeys)
        self.iaid_to_ed = dict((ed['ocaid'], ed) for ed in eds.values())
        # self.iaid_to_ekey = dict((iaid, ed['key'])
        #                            for iaid, ed in self.iaid_to_ed.items())

        # Work towards building a dict of iaid loanability,
        # def has_lending_collection(meta):
        #     collections = meta.get("collection", [])
        #     return 'lendinglibrary' in collections or 'inlibrary' in collections
        # in case site.store supports get_many (unclear)
        # maybe_loanable_iaids = [iaid for iaid in iaids
        #                         if has_lending_collection(self.iaid_to_meta.get(iaid, {}))]
        # loanable_ekeys = [self.iaid_to_ekey.get(iaid) for iaid in maybe_loanable_iaids]
        # loanstatus =  web.ctx.site.store.get('ebooks' + ekey, {'borrowed': 'false'})

        result = {}
        for r in requests:
            bib_keys = r.split(';')
            if r.lower().startswith('id:'):
                result_key = bib_keys.pop(0)[3:]
            else:
                result_key = r
            sub_result = self.make_record(bib_keys)
            if sub_result:
                result[result_key] = sub_result

        if self.options.get('debug_items'):
            result['ekeys'] = ekeys
            result['eds'] = eds
            result['iaids'] = iaids

        return result
Beispiel #16
0
def get_readable_edition_item(edition, work, user_inlibrary, initial_edition):
    ocaid = edition.get('ocaid', False)
    if not ocaid:
        return None
    subjects = work.get_subjects()
    if not subjects:
        return None

    metaxml = ia.get_meta_xml(ocaid)

    collections = metaxml.get("collection", [])

    status = ''
    if 'lendinglibrary' in collections:
        if not 'Lending library' in subjects:
            return None
        status = 'lendable'
    elif 'inlibrary' in collections:
        if not 'In library' in subjects:
            return None
        if not user_inlibrary:
            return None
        status = 'lendable'
    elif 'printdisabled' in collections:
        status = 'restricted'
        return None
    else:
        status = 'full access'

    if status == 'full access':
        itemURL = "http://www.archive.org/stream/%s" % (ocaid)
    else:
        itemURL = u"http://openlibrary.org%s/%s/borrow" % (
            edition['key'], helpers.urlsafe(edition.get("title", "untitled")))

    if status == 'lendable':
        loanstatus = web.ctx.site.store.get('ebooks' + edition['key'],
                                            {'borrowed': 'false'})
        if loanstatus['borrowed'] == 'true':
            status = 'checked out'

    if edition['key'] == initial_edition['key']:
        match = 'exact'
    else:
        match = 'similar'

    result = {
        'enumcron': False,
        # 'orig': 'University of California'
        # 'htid': ''
        # 'lastUpdate: "" # XXX from edition.last_modified (datetime)
        'match': match,
        'status': status,
        'fromRecord': initial_edition['key'],
        'ol-edition-id': key_to_olid(edition['key']),
        'ol-work-id': key_to_olid(work['key']),
        'contributor': 'contributor',
        'itemURL': itemURL,
    }

    if edition.get('covers'):
        cover_id = edition['covers'][0]
        # XXX covers url from yaml?
        result['cover'] = {
            "small": "http://covers.openlibrary.org/b/id/%s-S.jpg" % cover_id,
            "medium": "http://covers.openlibrary.org/b/id/%s-M.jpg" % cover_id,
            "large": "http://covers.openlibrary.org/b/id/%s-L.jpg" % cover_id,
        }

    return result
Beispiel #17
0
def get_readable_edition_item(edition, work, user_inlibrary, initial_edition):
    ocaid = edition.get('ocaid', False)
    if not ocaid:
        return None
    subjects = work.get_subjects()
    if not subjects:
        return None

    metaxml = ia.get_meta_xml(ocaid)

    collections = metaxml.get("collection", [])

    status = ''
    if 'lendinglibrary' in collections:
        if not 'Lending library' in subjects:
            return None
        status = 'lendable'
    elif 'inlibrary' in collections:
        if not 'In library' in subjects:
            return None
        if not user_inlibrary:
            return None
        status = 'lendable'
    elif 'printdisabled' in collections:
        status = 'restricted'
        return None
    else:
        status = 'full access'

    if status == 'full access':
        itemURL = "http://www.archive.org/stream/%s" % (ocaid)
    else:
        itemURL = u"http://openlibrary.org%s/%s/borrow" % (edition['key'],
                                                           helpers.urlsafe(edition.get("title",
                                                                                       "untitled")))

    if status == 'lendable':
        loanstatus =  web.ctx.site.store.get('ebooks' + edition['key'], {'borrowed': 'false'})
        if loanstatus['borrowed'] == 'true':
            status = 'checked out'

    if edition['key'] == initial_edition['key']:
        match = 'exact'
    else:
        match = 'similar'

    result = {
        'enumcron': False,
        # 'orig': 'University of California'
        # 'htid': ''
        # 'lastUpdate: "" # XXX from edition.last_modified (datetime)
        'match': match,
        'status': status,
        'fromRecord': initial_edition['key'],
        'ol-edition-id': key_to_olid(edition['key']),
        'ol-work-id': key_to_olid(work['key']),
        'contributor': 'contributor',
        'itemURL': itemURL,
        }

    if edition.get('covers'):
        cover_id = edition['covers'][0]
        # XXX covers url from yaml?
        result['cover'] = {
            "small": "http://covers.openlibrary.org/b/id/%s-S.jpg" % cover_id,
            "medium": "http://covers.openlibrary.org/b/id/%s-M.jpg" % cover_id,
            "large": "http://covers.openlibrary.org/b/id/%s-L.jpg" % cover_id,
            }

    return result