Beispiel #1
0
def test_load_with_new_author(mock_site, ia_writeback):
    rec = {
        'ocaid': 'test_item',
        'title': 'Test item',
        'authors': [{'name': 'John Doe'}],
        'source_records': 'ia:test_item',
    }
    reply = load(rec)
    assert reply['success'] is True
    w = mock_site.get(reply['work']['key'])
    assert reply['authors'][0]['status'] == 'created'
    assert reply['authors'][0]['name'] == 'John Doe'
    akey1 = reply['authors'][0]['key']
    assert akey1 == '/authors/OL1A'
    a = mock_site.get(akey1)
    assert w.authors
    assert a.type.key == '/type/author'

    # Tests an existing author is modified if an Author match is found, and more data is provided
    # This represents an edition of another work by the above author.
    rec = {
        'ocaid': 'test_item1b',
        'title': 'Test item1b',
        'authors': [{'name': 'Doe, John', 'entity_type': 'person'}],
        'source_records': 'ia:test_item1b',
    }
    reply = load(rec)
    assert reply['success'] is True
    assert reply['edition']['status'] == 'created'
    assert reply['work']['status'] == 'created'
    akey2 = reply['authors'][0]['key']

    # TODO: There is no code that modifies an author if more data is provided.
    # previously the status implied the record was always 'modified', when a match was found.
    #assert reply['authors'][0]['status'] == 'modified'
    #a = mock_site.get(akey2)
    #assert 'entity_type' in a
    #assert a.entity_type == 'person'

    assert reply['authors'][0]['status'] == 'matched'
    assert akey1 == akey2 == '/authors/OL1A'

    # Tests same title with different ocaid and author is not overwritten
    rec = {
        'ocaid': 'test_item2',
        'title': 'Test item',
        'authors': [{'name': 'James Smith'}],
        'source_records': 'ia:test_item2',
    }
    reply = load(rec)
    akey3 = reply['authors'][0]['key']
    assert akey3 == '/authors/OL2A'
    assert reply['authors'][0]['status'] == 'created'
    assert reply['work']['status'] == 'created'
    assert reply['edition']['status'] == 'created'
    w = mock_site.get(reply['work']['key'])
    e = mock_site.get(reply['edition']['key'])
    assert e.ocaid == 'test_item2'
    assert len(w.authors) == 1
    assert len(e.authors) == 1
def test_load_multiple(mock_site):
    rec = {
        'title': 'Test item',
        'lccn': ['123'],
        'source_records': ['ia:test_item'],
        'authors': [{
            'name': 'Smith, John',
            'birth_date': '1980'
        }],
    }
    reply = load(rec)
    assert reply['success'] is True
    ekey1 = reply['edition']['key']

    reply = load(rec)
    assert reply['success'] is True
    ekey2 = reply['edition']['key']
    assert ekey1 == ekey2

    reply = load({
        'title': 'Test item',
        'source_records': ['ia:test_item2'],
        'lccn': ['456']
    })
    assert reply['success'] is True
    ekey3 = reply['edition']['key']
    assert ekey3 != ekey1

    reply = load(rec)
    assert reply['success'] is True
    ekey4 = reply['edition']['key']

    assert ekey1 == ekey2 == ekey4
def test_same_twice(mock_site, add_languages):
    rec = {
        'source_records': ['ia:test_item'],
        "publishers": ["Ten Speed Press"],
        "pagination":
        "20 p.",
        "description":
        "A macabre mash-up of the children's classic Pat the Bunny and the present-day zombie phenomenon, with the tactile features of the original book revoltingly re-imagined for an adult audience.",
        "title":
        "Pat The Zombie",
        "isbn_13": ["9781607740360"],
        "languages": ["eng"],
        "isbn_10": ["1607740362"],
        "authors": [{
            "entity_type": "person",
            "name": "Aaron Ximm",
            "personal_name": "Aaron Ximm"
        }],
        "contributions": ["Kaveh Soofi (Illustrator)"]
    }
    reply = load(rec)
    assert reply['success'] is True
    assert reply['edition']['status'] == 'created'
    assert reply['work']['status'] == 'created'

    reply = load(rec)
    assert reply['success'] is True
    assert reply['edition']['status'] == 'matched'
    assert reply['work']['status'] == 'matched'
 def test_from_marc(self, ia, mock_site, add_languages):
     data = open_test_data(ia + '_meta.mrc').read()
     assert len(data) == int(data[:5])
     rec = read_edition(MarcBinary(data))
     rec['source_records'] = ['ia:' + ia]
     reply = load(rec)
     assert reply['success'] is True
     assert reply['edition']['status'] == 'created'
     e = mock_site.get(reply['edition']['key'])
     assert e.type.key == '/type/edition'
     reply = load(rec)
     assert reply['success'] is True
     assert reply['edition']['status'] == 'matched'
Beispiel #5
0
def test_editions_matched(mock_site, add_languages, ia_writeback):
    rec = {'title': 'test', 'isbn_13': ['9780190906764'], 'isbn_10': ['0190906766'], 'source_records': ['test:001']}
    load(rec)
    isbns = isbns_from_record(rec)

    result_10 = editions_matched(rec, 'isbn_10', '0190906766')
    assert result_10 == ['/books/OL1M']

    result_13 = editions_matched(rec, 'isbn_13', '9780190906764')
    assert result_13 == ['/books/OL1M']

    # searching on key isbn_ will return a matching record on either isbn_10 or isbn_13 metadata fields
    result = editions_matched(rec, 'isbn_', isbns)
    assert result == ['/books/OL1M']
 def test_missing_ocaid(self, mock_site, add_languages, ia_writeback):
     ia = 'descendantsofhug00cham'
     src = ia + '_meta.mrc'
     marc = MarcBinary(open_test_data(src).read())
     rec = read_edition(marc)
     rec['source_records'] = ['marc:testdata.mrc']
     reply = load(rec)
     assert reply['success'] is True
     rec['source_records'] = ['ia:' + ia]
     rec['ocaid'] = ia
     reply = load(rec)
     assert reply['success'] is True
     e = mock_site.get(reply['edition']['key'])
     assert e.ocaid == ia
     assert 'ia:' + ia in e.source_records
Beispiel #7
0
def test_existing_work_with_subtitle(mock_site, add_languages):
    author = {
        'type': {'key': '/type/author'},
        'name': 'John Smith',
        'key': '/authors/OL20A'}
    existing_work = {
        'authors': [{'author': '/authors/OL20A', 'type': {'key': '/type/author_role'}}],
        'key': '/works/OL16W',
        'title': 'Finding existing works',
        'type': {'key': '/type/work'},
    }
    mock_site.save(author)
    mock_site.save(existing_work)
    rec = {
            'source_records': 'non-marc:test',
            'title': 'Finding Existing Works',
            'subtitle': 'the ongoing saga!',
            'authors': [{'name': 'John Smith'}],
            'publishers': ['Black Spot'],
            'publish_date': 'Jan 09, 2011',
            'isbn_10': ['1250144051'],
           }

    reply = load(rec)
    assert reply['success'] is True
    assert reply['edition']['status'] == 'created'
    assert reply['work']['status'] == 'matched'
    assert reply['work']['key'] == '/works/OL16W'
    assert reply['authors'][0]['status'] == 'matched'
    e = mock_site.get(reply['edition']['key'])
    assert e.works[0]['key'] == '/works/OL16W'
Beispiel #8
0
def test_from_marc_fields(mock_site, add_languages):
    ia = 'isbn_9781419594069'
    data = open_test_data(ia + '_meta.mrc').read()
    rec = read_edition(MarcBinary(data))
    rec['source_records'] = ['ia:' + ia]
    reply = load(rec)
    assert reply['success'] is True
    # author from 100
    assert reply['authors'][0]['name'] == 'Adam Weiner'

    edition = mock_site.get(reply['edition']['key'])
    # Publish place, publisher, & publish date - 260$a, $b, $c
    assert edition['publishers'][0] == 'Kaplan Publishing'
    assert edition['publish_date'] == '2007'
    assert edition['publish_places'][0] == 'New York'
    # Pagination 300
    assert edition['number_of_pages'] == 264
    assert edition['pagination'] == 'viii, 264 p.'
    # 8 subjects, 650
    assert len(edition['subjects']) == 8
    assert edition['subjects'] == [
        u'Action and adventure films', u'Miscellanea', u'Physics',
        u'Cinematography', u'Special effects', u'Physics in motion pictures',
        u'Science fiction films', u'Popular works'
    ]
    # Edition description from 520
    desc = 'Explains the basic laws of physics, covering such topics as mechanics, forces, and energy, while deconstructing famous scenes and stunts from motion pictures, including "Apollo 13" and "Titanic," to determine if they are possible.'
    assert isinstance(edition['description'], Text)
    assert edition['description'] == desc
    # Work description from 520
    work = mock_site.get(reply['work']['key'])
    assert isinstance(work['description'], Text)
    assert work['description'] == desc
Beispiel #9
0
    def GET(self, isbn):
        isbn = normalize_isbn(isbn)
        isbn_type = 'isbn_' + ('13' if len(isbn) == 13 else '10')
        metadata = {
            'amazon': get_amazon_metadata(isbn) or {},
            'betterworldbooks': get_betterworldbooks_metadata(isbn) or {}
        }
        # if bwb fails and isbn10, try again with isbn13
        if len(isbn) == 10 and \
           metadata['betterworldbooks'].get('price') is None:
            isbn_13 = isbn_10_to_isbn_13(isbn)
            metadata['betterworldbooks'] = get_betterworldbooks_metadata(
                isbn_13) or {}

        # fetch book by isbn if it exists
        book = web.ctx.site.things({
            'type': '/type/edition',
            isbn_type: isbn,
        })

        # if no OL edition for isbn, attempt to create
        if (not book) and metadata.get('amazon'):
            book = load(clean_amazon_metadata_for_load(
                metadata.get('amazon')))

        # include ol edition metadata in response, if available
        if book:
            ed = web.ctx.site.get(book[0])
            if ed:
                metadata['key'] = ed.key
                if getattr(ed, 'ocaid'):
                    metadata['ocaid'] = ed.ocaid

        return simplejson.dumps(metadata)
Beispiel #10
0
class importapi:
    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            raise web.HTTPError('403 Forbidden')

        data = web.data()
        error_code = "unknown_error"

        try:
            edition, format = parse_data(data)
        except DataError, e:
            edition = None
            error_code = str(e)

        #call Edward's code here with the edition dict
        if edition:
            source_url = None

            ## Anand - July 2014
            ## This is adding source_records as [null] as queue_s3_upload is disabled.
            ## Disabling this as well to fix the issue.

            # if 'source_records' not in edition:
            #     source_url = queue_s3_upload(data, format)
            #     edition['source_records'] = [source_url]

            reply = add_book.load(edition)
            if source_url:
                reply['source_record'] = source_url
            return json.dumps(reply)
        else:
            return json.dumps({'success':False, 'error_code': error_code, 'error':'Failed to parse Edition data'})
Beispiel #11
0
def create_edition_from_amazon_metadata(id_, id_type='isbn'):
    """Fetches amazon metadata by isbn from affiliates API, attempts to
    create OL edition from metadata, and returns the resulting edition key
    `/key/OL..M` if successful or None otherwise
    """
    md = get_amazon_metadata(id_, id_type=id_type)
    if md and md.get('product_group') == 'Book':
        # Save token of currently logged in user (or no-user)
        account = accounts.get_current_user()
        auth_token = account.generate_login_code() if account else ''

        try:
            # Temporarily behave (act) as ImportBot for import
            tmp_account = accounts.find(username='******')
            web.ctx.conn.set_auth_token(tmp_account.generate_login_code())
            reply = load(clean_amazon_metadata_for_load(md),
                         account=tmp_account)
        except Exception as e:
            web.ctx.conn.set_auth_token(auth_token)
            raise e

        # Return auth token to original user or no-user
        web.ctx.conn.set_auth_token(auth_token)

        if reply and reply.get('success'):
            return reply['edition'].get('key')
Beispiel #12
0
    def POST(self):
        web.header('Content-Type', 'application/json')
        if not can_write():
            raise web.HTTPError('403 Forbidden')

        data = web.data()

        try:
            edition, format = parse_data(data)
        except DataError as e:
            return self.error(str(e), 'Failed to parse import data')
        except ValidationError as e:
            return self.error('invalid-value', str(e).replace('\n', ': '))

        if not edition:
            return self.error('unknown-error', 'Failed to parse import data')

        try:
            reply = add_book.load(edition)
            # TODO: If any records have been created, return a 201, otherwise 200
            return json.dumps(reply)
        except add_book.RequiredField as e:
            return self.error('missing-required-field', str(e))
        except ClientException as e:
            return self.error('bad-request', **json.loads(e.json))
Beispiel #13
0
    def POST(self):
        web.header('Content-Type', 'application/json')
        if not can_write():
            raise web.HTTPError('403 Forbidden')

        data = web.data()

        try:
            edition, format = parse_data(data)
        except DataError as e:
            return self.error(str(e), 'Failed to parse import data')

        if not edition:
            return self.error('unknown_error', 'Failed to parse import data')

        ## Anand - July 2014
        ## This is adding source_records as [null] as queue_s3_upload is disabled.
        ## Disabling this as well to fix the issue.
        #source_url = None
        # if 'source_records' not in edition:
        #     source_url = queue_s3_upload(data, format)
        #     edition['source_records'] = [source_url]

        try:
            reply = add_book.load(edition)
        except add_book.RequiredField as e:
            return self.error('missing-required-field', str(e))
        #if source_url:
        #    reply['source_record'] = source_url
        return json.dumps(reply)
Beispiel #14
0
    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            return json.dumps({'success': False, 'error': 'Permission Denied'})

        data = web.data()

        edition, format = parse_data(data)
        #print edition

        source_url = None
        if 'source_records' not in edition:
            source_url = queue_s3_upload(data, format)
            edition['source_records'] = [source_url]

        #call Edward's code here with the edition dict
        if edition:
            reply = add_book.load(edition)
            if source_url:
                reply['source_record'] = source_url
            return json.dumps(reply)
        else:
            return json.dumps({
                'success': False,
                'error': 'Failed to parse Edition data'
            })
Beispiel #15
0
class importapi:
    """/api/import endpoint for general data formats.
    """
    def POST(self):
        web.header('Content-Type', 'application/json')
        if not can_write():
            raise web.HTTPError('403 Forbidden')

        data = web.data()

        try:
            edition, format = parse_data(data)
        except DataError, e:
            return self.error(str(e), 'Failed to parse Edition data')

        if not edition:
            return self.error('unknown_error', 'Failed to parse Edition data')

        ## Anand - July 2014
        ## This is adding source_records as [null] as queue_s3_upload is disabled.
        ## Disabling this as well to fix the issue.
        #source_url = None
        # if 'source_records' not in edition:
        #     source_url = queue_s3_upload(data, format)
        #     edition['source_records'] = [source_url]

        reply = add_book.load(edition)
        #if source_url:
        #    reply['source_record'] = source_url
        return json.dumps(reply)
Beispiel #16
0
def create_edition_from_amazon_metadata(isbn):
    """Fetches amazon metadata by isbn from affiliates API, attempts to
    create OL edition from metadata, and returns the resulting edition key
    `/key/OL..M` if successful or None otherwise
    """
    md = get_amazon_metadata(isbn)
    if md:
        # Save token of currently logged in user (or no-user)
        account = accounts.get_current_user()
        auth_token = account.generate_login_code() if account else ''

        try:
            # Temporarily behave (act) as ImportBot for import
            tmp_account = accounts.find(username='******')
            web.ctx.conn.set_auth_token(tmp_account.generate_login_code())
            reply = load(clean_amazon_metadata_for_load(md),
                         account=tmp_account)
        except Exception as e:
            web.ctx.conn.set_auth_token(auth_token)
            raise e

        # Return auth token to original user or no-user
        web.ctx.conn.set_auth_token(auth_token)

        if reply and reply.get('success'):
            return reply['edition']['key']
    def test_from_marc_author(self, mock_site, add_languages):
        ia = 'flatlandromanceo00abbouoft'
        marc = MarcBinary(open_test_data(ia + '_meta.mrc').read())

        rec = read_edition(marc)
        rec['source_records'] = ['ia:' + ia]
        reply = load(rec)
        assert reply['success'] is True
        assert reply['edition']['status'] == 'created'
        a = mock_site.get(reply['authors'][0]['key'])
        assert a.type.key == '/type/author'
        assert a.name == 'Edwin Abbott Abbott'
        assert a.birth_date == '1838'
        assert a.death_date == '1926'
        reply = load(rec)
        assert reply['success'] is True
        assert reply['edition']['status'] == 'matched'
    def test_from_marc_reimport_modifications(self, mock_site, add_languages):
        src = 'v38.i37.records.utf8--16478504-1254'
        marc = MarcBinary(open_test_data(src).read())
        rec = read_edition(marc)
        rec['source_records'] = ['marc:' + src]
        reply = load(rec)
        assert reply['success'] is True
        reply = load(rec)
        assert reply['success'] is True
        assert reply['edition']['status'] == 'matched'

        src = 'v39.i28.records.utf8--5362776-1764'
        marc = MarcBinary(open_test_data(src).read())
        rec = read_edition(marc)
        rec['source_records'] = ['marc:' + src]
        reply = load(rec)
        assert reply['success'] is True
        assert reply['edition']['status'] == 'modified'
Beispiel #19
0
def test_missing_source_records(mock_site, add_languages):
    mock_site.save({
        'key': '/authors/OL592898A',
        'name': 'Michael Robert Marrus',
        'personal_name': 'Michael Robert Marrus',
        'type': { 'key': '/type/author' }
    })

    mock_site.save({
        'authors': [{'author': '/authors/OL592898A', 'type': { 'key': '/type/author_role' }}],
        'key': '/works/OL16029710W',
        'subjects': ['Nuremberg Trial of Major German War Criminals, Nuremberg, Germany, 1945-1946', 'Protected DAISY', 'Lending library'],
        'title': 'The Nuremberg war crimes trial, 1945-46',
        'type': { 'key': '/type/work' },
    })

    mock_site.save({
        "number_of_pages": 276,
        "subtitle": "a documentary history",
        "series": ["The Bedford series in history and culture"],
        "covers": [6649715, 3865334, 173632],
        "lc_classifications": ["D804.G42 N87 1997"],
        "ocaid": "nurembergwarcrim00marr",
        "contributions": ["Marrus, Michael Robert."],
        "uri_descriptions": ["Book review (H-Net)"],
        "title": "The Nuremberg war crimes trial, 1945-46",
        "languages": [{"key": "/languages/eng"}],
        "subjects": ["Nuremberg Trial of Major German War Criminals, Nuremberg, Germany, 1945-1946"],
        "publish_country": "mau", "by_statement": "[compiled by] Michael R. Marrus.",
        "type": {"key": "/type/edition"},
        "uris": ["http://www.h-net.org/review/hrev-a0a6c9-aa"],
        "publishers": ["Bedford Books"],
        "ia_box_id": ["IA127618"],
        "key": "/books/OL1023483M",
        "authors": [{"key": "/authors/OL592898A"}],
        "publish_places": ["Boston"],
        "pagination": "xi, 276 p. :",
        "lccn": ["96086777"],
        "notes": {"type": "/type/text", "value": "Includes bibliographical references (p. 262-268) and index."},
        "identifiers": {"goodreads": ["326638"], "librarything": ["1114474"]},
        "url": ["http://www.h-net.org/review/hrev-a0a6c9-aa"],
        "isbn_10": ["031216386X", "0312136919"],
        "publish_date": "1997",
        "works": [{"key": "/works/OL16029710W"}]
    })

    ia = 'nurembergwarcrim1997marr'
    src = ia + '_meta.mrc'
    marc = MarcBinary(open_test_data(src).read())
    rec = read_edition(marc)
    rec['source_records'] = ['ia:' + ia]

    reply = load(rec)
    assert reply['success'] is True
    e = mock_site.get(reply['edition']['key'])
    assert 'source_records' in e
Beispiel #20
0
def create_edition_from_amazon_metadata(isbn):
    """Fetches amazon metadata by isbn from affiliates API, attempts to
    create OL edition from metadata, and returns the resulting edition key
    `/key/OL..M` if successful or None otherwise
    """
    md = get_amazon_metadata(isbn)
    if md:
        reply = load(clean_amazon_metadata_for_load(md))
        if reply and reply.get('success'):
            return reply['edition']['key']
Beispiel #21
0
def create_edition_from_amazon_metadata(isbn):
    """Fetches amazon metadata by isbn from affiliates API, attempts to
    create OL edition from metadata, and returns the resulting edition key
    `/key/OL..M` if successful or None otherwise
    """
    md = get_amazon_metadata(isbn)
    if md:
        reply = load(clean_amazon_metadata_for_load(md))
        if reply and reply.get('success'):
            return reply['edition']['key']
Beispiel #22
0
    def load_book(self, edition_data):
        """
        Takes a well constructed full Edition record and sends it to add_book
        to check whether it is already in the system, and to add it, and a Work
        if they do not already exist.

        :param dict edition_data: Edition record
        :rtype: dict
        """
        result = add_book.load(edition_data)
        return json.dumps(result)
def test_load_with_subjects(mock_site, ia_writeback):
    rec = {
        'ocaid': 'test_item',
        'title': 'Test item',
        'subjects': ['Protected DAISY', 'In library'],
        'source_records': 'ia:test_item',
    }
    reply = load(rec)
    assert reply['success'] is True
    w = mock_site.get(reply['work']['key'])
    assert w.title == 'Test item'
    assert w.subjects == ['Protected DAISY', 'In library']
def test_load_with_redirected_author(mock_site, add_languages):
    """Test importing existing editions without works
       which have author redirects. A work should be created with
       the final author.
    """
    redirect_author = {
        'type': {
            'key': '/type/redirect'
        },
        'name': 'John Smith',
        'key': '/authors/OL55A',
        'location': '/authors/OL10A'
    }
    final_author = {
        'type': {
            'key': '/type/author'
        },
        'name': 'John Smith',
        'key': '/authors/OL10A'
    }
    orphaned_edition = {
        'title': 'Test item HATS',
        'key': '/books/OL10M',
        'publishers': ['TestPub'],
        'publish_date': '1994',
        'authors': [{
            'key': '/authors/OL55A'
        }],
        'type': {
            'key': '/type/edition'
        }
    }
    mock_site.save(orphaned_edition)
    mock_site.save(redirect_author)
    mock_site.save(final_author)

    rec = {
        'title': 'Test item HATS',
        'authors': [{
            'name': 'John Smith'
        }],
        'publishers': ['TestPub'],
        'publish_date': '1994',
        'source_records': 'ia:test_redir_author'
    }
    reply = load(rec)
    assert reply['edition']['status'] == 'modified'
    assert reply['edition']['key'] == '/books/OL10M'
    assert reply['work']['status'] == 'created'
    e = mock_site.get(reply['edition']['key'])
    assert e.authors[0].key == '/authors/OL10A'
    w = mock_site.get(reply['work']['key'])
    assert w.authors[0].author.key == '/authors/OL10A'
Beispiel #25
0
def test_extra_author(mock_site, add_languages):
    mock_site.save({
        "name": "Hubert Howe Bancroft",
        "death_date": "1918.",
        "alternate_names": ["HUBERT HOWE BANCROFT", "Hubert Howe Bandcroft"],
        "key": "/authors/OL563100A",
        "birth_date": "1832",
        "personal_name": "Hubert Howe Bancroft",
        "type": {"key": "/type/author"},
    })

    mock_site.save({
        "title": "The works of Hubert Howe Bancroft",
        "covers": [6060295, 5551343],
        "first_sentence": {"type": "/type/text", "value": "When it first became known to Europe that a new continent had been discovered, the wise men, philosophers, and especially the learned ecclesiastics, were sorely perplexed to account for such a discovery."},
        "subject_places": ["Alaska", "America", "Arizona", "British Columbia", "California", "Canadian Northwest", "Central America", "Colorado", "Idaho", "Mexico", "Montana", "Nevada", "New Mexico", "Northwest Coast of North America", "Northwest boundary of the United States", "Oregon", "Pacific States", "Texas", "United States", "Utah", "Washington (State)", "West (U.S.)", "Wyoming"],
        "excerpts": [{"excerpt": "When it first became known to Europe that a new continent had been discovered, the wise men, philosophers, and especially the learned ecclesiastics, were sorely perplexed to account for such a discovery."}],
        "first_publish_date": "1882",
        "key": "/works/OL3421434W",
        "authors": [{"type": {"key": "/type/author_role"}, "author": {"key": "/authors/OL563100A"}}],
        "subject_times": ["1540-1810", "1810-1821", "1821-1861", "1821-1951", "1846-1850", "1850-1950", "1859-", "1859-1950", "1867-1910", "1867-1959", "1871-1903", "Civil War, 1861-1865", "Conquest, 1519-1540", "European intervention, 1861-1867", "Spanish colony, 1540-1810", "To 1519", "To 1821", "To 1846", "To 1859", "To 1867", "To 1871", "To 1889", "To 1912", "Wars of Independence, 1810-1821"],
        "type": {"key": "/type/work"},
        "subjects": ["Antiquities", "Archaeology", "Autobiography", "Bibliography", "California Civil War, 1861-1865", "Comparative Literature", "Comparative civilization", "Courts", "Description and travel", "Discovery and exploration", "Early accounts to 1600", "English essays", "Ethnology", "Foreign relations", "Gold discoveries", "Historians", "History", "Indians", "Indians of Central America", "Indians of Mexico", "Indians of North America", "Languages", "Law", "Mayas", "Mexican War, 1846-1848", "Nahuas", "Nahuatl language", "Oregon question", "Political aspects of Law", "Politics and government", "Religion and mythology", "Religions", "Social life and customs", "Spanish", "Vigilance committees", "Writing", "Zamorano 80", "Accessible book", "Protected DAISY"]
    })

    ia = 'workshuberthowe00racegoog'
    src = ia + '_meta.mrc'
    marc = MarcBinary(open_test_data(src).read())
    rec = read_edition(marc)
    rec['source_records'] = ['ia:' + ia]

    reply = load(rec)
    assert reply['success'] is True

    w = mock_site.get(reply['work']['key'])

    reply = load(rec)
    assert reply['success'] is True
    w = mock_site.get(reply['work']['key'])
    assert len(w['authors']) == 1
Beispiel #26
0
def test_from_marc(mock_site, add_languages):
    ia = 'flatlandromanceo00abbouoft'
    data = open_test_data(ia + '_meta.mrc').read()
    assert len(data) == int(data[:5])
    rec = read_edition(MarcBinary(data))
    reply = load(rec)
    assert reply['success'] is True
    akey1 = reply['authors'][0]['key']
    a = mock_site.get(akey1)
    assert a.type.key == '/type/author'
    assert a.name == 'Edwin Abbott Abbott'
    assert a.birth_date == '1838'
    assert a.death_date == '1926'
 def test_author_from_700(self, mock_site, add_languages):
     ia = 'sexuallytransmit00egen'
     data = open_test_data(ia + '_meta.mrc').read()
     rec = read_edition(MarcBinary(data))
     rec['source_records'] = ['ia:' + ia]
     reply = load(rec)
     assert reply['success'] is True
     # author from 700
     akey = reply['authors'][0]['key']
     a = mock_site.get(akey)
     assert a.type.key == '/type/author'
     assert a.name == 'Laura K. Egendorf'
     assert a.birth_date == '1973'
Beispiel #28
0
    def GET(self):
        # @hornc, add: title='', asin='', authors=''
        i = web.input(isbn='', asin='')

        if not (i.isbn or i.asin):
            return simplejson.dumps({'error': 'isbn or asin required'})

        id_ = i.asin if i.asin else normalize_isbn(i.isbn)
        id_type = 'asin' if i.asin else 'isbn_' + (
            '13' if len(id_) == 13 else '10')

        metadata = {
            'amazon': get_amazon_metadata(id_) or {},
            'betterworldbooks': get_betterworldbooks_metadata(id_)
            if id_type.startswith('isbn_') else {}
        }
        # if isbn_13 fails for amazon, we may want to check isbn_10 also
        # xxx

        # if bwb fails and isbn10, try again with isbn13
        if id_type == 'isbn_10' and \
           metadata['betterworldbooks'].get('price') is None:
            isbn_13 = isbn_10_to_isbn_13(id_)
            metadata[
                'betterworldbooks'] = isbn_13 and get_betterworldbooks_metadata(
                    isbn_13) or {}

        # fetch book by isbn if it exists
        # if asin... for now, it will fail (which is fine)
        matches = web.ctx.site.things({
            'type': '/type/edition',
            id_type: id_,
        })

        book_key = matches[0] if matches else None

        # if no OL edition for isbn, attempt to create
        if (not book_key) and metadata.get('amazon'):
            resp = load(clean_amazon_metadata_for_load(metadata.get('amazon')))
            if resp and 'edition' in resp:
                book_key = resp.get('edition').get('key')

        # include ol edition metadata in response, if available
        if book_key:
            ed = web.ctx.site.get(book_key)
            if ed:
                metadata['key'] = ed.key
                if getattr(ed, 'ocaid'):
                    metadata['ocaid'] = ed.ocaid

        return simplejson.dumps(metadata)
Beispiel #29
0
    def GET(self):
        # @hornc, add: title='', asin='', authors=''
        i = web.input(isbn='', asin='')

        if not (i.isbn or i.asin):
            return simplejson.dumps({
                'error': 'isbn or asin required'
            })

        id_ = i.asin if i.asin else normalize_isbn(i.isbn)
        id_type = 'asin' if i.asin else 'isbn_' + ('13' if len(id_) == 13 else '10')

        metadata = {
            'amazon': get_amazon_metadata(id_) or {},
            'betterworldbooks': get_betterworldbooks_metadata(id_) if id_type.startswith('isbn_') else {}
        }
        # if isbn_13 fails for amazon, we may want to check isbn_10 also
        # xxx

        # if bwb fails and isbn10, try again with isbn13
        if id_type == 'isbn_10' and \
           metadata['betterworldbooks'].get('price') is None:
            isbn_13 = isbn_10_to_isbn_13(id_)
            metadata['betterworldbooks'] = isbn_13 and get_betterworldbooks_metadata(
                isbn_13) or {}

        # fetch book by isbn if it exists
        # if asin... for now, it will fail (which is fine)
        matches = web.ctx.site.things({
            'type': '/type/edition',
            id_type: id_,
        })

        book_key = matches[0] if matches else None

        # if no OL edition for isbn, attempt to create
        if (not book_key) and metadata.get('amazon'):
            resp = load(clean_amazon_metadata_for_load(
                metadata.get('amazon')))
            if resp and 'edition' in resp:
                book_key = resp.get('edition').get('key')

        # include ol edition metadata in response, if available
        if book_key:
            ed = web.ctx.site.get(book_key)
            if ed:
                metadata['key'] = ed.key
                if getattr(ed, 'ocaid'):
                    metadata['ocaid'] = ed.ocaid

        return simplejson.dumps(metadata)
Beispiel #30
0
def test_from_marc(mock_site, add_languages):
    ia = 'coursepuremath00hardrich'
    marc = MarcBinary(open_test_data(ia + '_meta.mrc').read())
    rec = read_edition(marc)
    rec['source_records'] = ['ia:' + ia]
    reply = load(rec)
    assert reply['success'] is True
    assert reply['edition']['status'] == 'created'
    reply = load(rec)
    assert reply['success'] is True
    assert reply['edition']['status'] == 'matched'

    ia = 'flatlandromanceo00abbouoft'
    marc = MarcBinary(open_test_data(ia + '_meta.mrc').read())

    rec = read_edition(marc)
    rec['source_records'] = ['ia:' + ia]
    reply = load(rec)
    assert reply['success'] is True
    assert reply['edition']['status'] == 'created'
    reply = load(rec)
    assert reply['success'] is True
    assert reply['edition']['status'] == 'matched'
def test_duplicate_ia_book(mock_site, add_languages, ia_writeback):
    rec = {
        'ocaid': 'test_item',
        'source_records': ['ia:test_item'],
        'title': 'Test item',
        'languages': ['eng'],
    }
    reply = load(rec)
    assert reply['success'] is True
    assert reply['edition']['status'] == 'created'
    e = mock_site.get(reply['edition']['key'])
    assert e.type.key == '/type/edition'
    assert e.source_records == ['ia:test_item']

    rec = {
        'ocaid': 'test_item',
        'source_records': ['ia:test_item'],
        # Titles MUST match to be considered the same
        'title': 'Test item',
        'languages': ['fre'],
    }
    reply = load(rec)
    assert reply['success'] is True
    assert reply['edition']['status'] == 'matched'
Beispiel #32
0
def test_try_merge(mock_site):
    rec = {
        'title': 'Test item',
        'lccn': ['123'],
        'authors': [{'name': 'Smith, John', 'birth_date': '1980'}],
        'source_records': ['ia:test_item'],
    }
    reply = load(rec)
    ekey = reply['edition']['key']
    e = mock_site.get(ekey)

    rec['full_title'] = rec['title']
    e1 = build_marc(rec)
    add_db_name(e1)
    result = try_merge(e1, ekey, e)
    assert result is True
Beispiel #33
0
    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            return json.dumps({'success':False, 'error':'Permission Denied'})

        data = web.data()
       
        edition = parse_data(data)
        #print edition

        #call Edward's code here with the edition dict
        if edition:
            reply = load(edition)
            return json.dumps(reply)
        else:
            return json.dumps({'success':False, 'error':'Failed to parse Edition data'})
Beispiel #34
0
class importapi:
    def GET(self):
        web.header('Content-Type', 'text/plain')
        tasks.add.delay(777, 777)
        return 'Import API only supports POST requests.'

    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            return json.dumps({'success': False, 'error': 'Permission Denied'})

        data = web.data()
        error_code = "unknown_error"

        try:
            edition, format = parse_data(data)
        except DataError, e:
            edition = None
            error_code = str(e)

        #print edition

        #call Edward's code here with the edition dict
        if edition:
            source_url = None

            ## Anand - July 2014
            ## This is adding source_records as [null] as queue_s3_upload is disabled.
            ## Disabling this as well to fix the issue.

            # if 'source_records' not in edition:
            #     source_url = queue_s3_upload(data, format)
            #     edition['source_records'] = [source_url]

            reply = add_book.load(edition)
            if source_url:
                reply['source_record'] = source_url
            return json.dumps(reply)
        else:
            return json.dumps({
                'success': False,
                'error_code': error_code,
                'error': 'Failed to parse Edition data'
            })
Beispiel #35
0
def create_edition_from_amazon_metadata(id_, id_type='isbn'):
    """Fetches Amazon metadata by id from Amazon Product Advertising API, attempts to
    create OL edition from metadata, and returns the resulting edition
    key `/key/OL..M` if successful or None otherwise.

    :param str id_: The item id: isbn (10/13), or Amazon ASIN.
    :param str id_type: 'isbn' or 'asin'.
    :return: Edition key '/key/OL..M' or None
    :rtype: str or None
    """

    md = get_amazon_metadata(id_, id_type=id_type)

    if md and md.get('product_group') == 'Book':
        with accounts.RunAs('ImportBot') as account:
            reply = load(clean_amazon_metadata_for_load(md), account=account)
            if reply and reply.get('success'):
                return reply['edition'].get('key')
Beispiel #36
0
    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            return json.dumps({'success':False, 'error':'Permission Denied'})

        data = web.data()
       
        edition, format = parse_data(data)
        #print edition

        #call Edward's code here with the edition dict
        if edition:
            source_url = None
            if 'source_records' not in edition:
                source_url = queue_s3_upload(data, format)
                edition['source_records'] = [source_url]

            reply = add_book.load(edition)
            if source_url:
                reply['source_record'] = source_url
            return json.dumps(reply)
        else:
            return json.dumps({'success':False, 'error':'Failed to parse Edition data'})
Beispiel #37
0
    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            raise web.HTTPError('403 Forbidden')

        i = web.input()

        require_marc = not (i.get('require_marc') == 'false')
        bulk_marc = i.get('bulk_marc') == 'true'

        if 'identifier' not in i:
            return self.error('bad-input', 'identifier not provided')
        identifier = i.identifier

        # First check whether this is a non-book, bulk-marc item
        if bulk_marc:
            # Get binary MARC by identifier = ocaid/filename:offset:length
            re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)")
            try:
                ocaid, filename, offset, length = re_bulk_identifier.match(identifier).groups()
                data, next_offset, next_length = get_from_archive_bulk(identifier)
                next_data = {'next_record_offset': next_offset, 'next_record_length': next_length}
                rec = MarcBinary(data)
                edition = read_edition(rec)
            except MarcException as e:
                details = "%s: %s" % (identifier, str(e))
                logger.error("failed to read from bulk MARC record %s", details)
                return self.error('invalid-marc-record', details, **next_data)

            actual_length = int(rec.leader()[:MARC_LENGTH_POS])
            edition['source_records'] = 'marc:%s/%s:%s:%d' % (ocaid, filename, offset, actual_length)

            #TODO: Look up URN prefixes to support more sources, extend openlibrary/catalog/marc/sources?
            if ocaid == 'OpenLibraries-Trent-MARCs':
                prefix = 'trent'
                edition['local_id'] = ['urn:%s:%s' % (prefix, _id) for _id in rec.get_fields('001')]

            result = add_book.load(edition)

            # Add next_data to the response as location of next record:
            result.update(next_data)
            return json.dumps(result)

        # Case 1 - Is this a valid Archive.org item?
        try:
            item_json = ia.get_item_json(identifier)
            item_server = item_json['server']
            item_path = item_json['dir']
        except KeyError:
            return self.error("invalid-ia-identifier", "%s not found" % identifier)
        metadata = ia.extract_item_metadata(item_json)
        if not metadata:
            return self.error("invalid-ia-identifier")

        # Case 2 - Does the item have an openlibrary field specified?
        # The scan operators search OL before loading the book and add the
        # OL key if a match is found. We can trust them and attach the item
        # to that edition.
        if metadata.get("mediatype") == "texts" and metadata.get("openlibrary"):
            edition_data = self.get_ia_record(metadata)
            edition_data["openlibrary"] = metadata["openlibrary"]
            edition_data = self.populate_edition_data(edition_data, identifier)
            return self.load_book(edition_data)

        # Case 3 - Can the item be loaded into Open Library?
        status = ia.get_item_status(identifier, metadata,
                                    item_server=item_server, item_path=item_path)
        if status != 'ok':
            return self.error(status, "Prohibited Item")

        # Case 4 - Does this item have a marc record?
        marc_record = self.get_marc_record(identifier)
        if marc_record:
            self.reject_non_book_marc(marc_record)

            try:
                edition_data = read_edition(marc_record)
            except MarcException as e:
                logger.error("failed to read from MARC record %s: %s", identifier, str(e))
                return self.error("invalid-marc-record")

        elif require_marc:
            return self.error("no-marc-record")

        else:
            try:
                edition_data = self.get_ia_record(metadata)
            except KeyError:
                return self.error("invalid-ia-metadata")

        # Add IA specific fields: ocaid, source_records, and cover
        edition_data = self.populate_edition_data(edition_data, identifier)

        return self.load_book(edition_data)
Beispiel #38
0
    def POST(self):
        web.header('Content-Type', 'application/json')

        if not can_write():
            raise web.HTTPError('403 Forbidden')

        i = web.input()

        require_marc = not (i.get('require_marc') == 'false')
        bulk_marc = i.get('bulk_marc') == 'true'

        if 'identifier' not in i:
            return self.error('bad-input', 'identifier not provided')
        identifier = i.identifier

        # First check whether this is a non-book, bulk-marc item
        if bulk_marc:
            # Get binary MARC by identifier = ocaid/filename:offset:length
            re_bulk_identifier = re.compile("([^/]*)/([^:]*):(\d*):(\d*)")
            try:
                ocaid, filename, offset, length = re_bulk_identifier.match(identifier).groups()
                data, next_offset, next_length = get_from_archive_bulk(identifier)
                next_data = {'next_record_offset': next_offset, 'next_record_length': next_length}
                rec = MarcBinary(data)
                edition = read_edition(rec)
            except MarcException as e:
                details = "%s: %s" % (identifier, str(e))
                logger.error("failed to read from bulk MARC record %s", details)
                return self.error('invalid-marc-record', details, **next_data)

            actual_length = int(rec.leader()[:5])
            edition['source_records'] = 'marc:%s/%s:%s:%d' % (ocaid, filename, offset, actual_length)

            #TODO: Look up URN prefixes to support more sources
            prefix = 'trent'
            edition['local_id'] = ['urn:%s:%s' % (prefix, _id) for _id in rec.get_fields('001')]
            result = add_book.load(edition)

            # Add next_data to the response as location of next record:
            result.update(next_data)

            return json.dumps(result)

        # Case 0 - Is the item already loaded
        key = self.find_edition(identifier)
        if key:
            return self.status_matched(key)

        # Case 1 - Is this a valid Archive.org item?
        try:
            item_json = ia.get_item_json(identifier)
            item_server = item_json['server']
            item_path = item_json['dir']
        except KeyError:
            return self.error("invalid-ia-identifier", "%s not found" % identifier)
        metadata = ia.extract_item_metadata(item_json)
        if not metadata:
            return self.error("invalid-ia-identifier")

        # Case 2 - Does the item have an openlibrary field specified?
        # The scan operators search OL before loading the book and add the
        # OL key if a match is found. We can trust them and attach the item
        # to that edition.
        if metadata.get("mediatype") == "texts" and metadata.get("openlibrary"):
            d = {
                "title": metadata['title'],
                "openlibrary": "/books/" + metadata["openlibrary"]
            }
            d = self.populate_edition_data(d, identifier)
            return self.load_book(d)

        # Case 3 - Can the item be loaded into Open Library?
        status = ia.get_item_status(identifier, metadata,
                                    item_server=item_server, item_path=item_path)
        if status != 'ok':
            return self.error(status, "Prohibited Item")

        # Gio - April 2016
        # items with metadata no_ol_import=true will be not imported
        if metadata.get("no_ol_import", '').lower() == 'true':
            return self.error("no-ol-import")

        # Case 4 - Does this item have a marc record?
        marc_record = self.get_marc_record(identifier)
        if marc_record:
            # Is the item a serial instead of a book?
            marc_leaders = marc_record.leader()
            if marc_leaders[7] == 's':
                return self.error("item-is-serial")

            # insider note: follows Archive.org's approach of
            # Item::isMARCXMLforMonograph() which excludes non-books
            if not (marc_leaders[7] == 'm' and marc_leaders[6] == 'a'):
                return self.error("item-not-book")

            try:
                edition_data = read_edition(marc_record)
            except MarcException as e:
                logger.error("failed to read from MARC record %s: %s", identifier, str(e))
                return self.error("invalid-marc-record")

        elif require_marc:
            return self.error("no-marc-record")

        else:
            try:
                edition_data = self.get_ia_record(metadata)
            except KeyError:
                return self.error("invalid-ia-metadata")

        # Add IA specific fields: ocaid, source_records, and cover
        edition_data = self.populate_edition_data(edition_data, identifier)

        return self.load_book(edition_data)
Beispiel #39
0
 def load_book(self, edition_data):
     result = add_book.load(edition_data)
     return json.dumps(result)