Exemple #1
0
def load(rec, account_key=None):
    """Given a record, tries to add/match that edition in the system.

    Record is a dictionary containing all the metadata of the edition.
    The following fields are mandatory:

        * title: str
        * source_records: list

    :param dict rec: Edition record to add
    :rtype: dict
    :return: a dict to be converted into a JSON HTTP response, same as load_data()
    """
    required_fields = ['title', 'source_records'
                       ]  # ['authors', 'publishers', 'publish_date']
    for field in required_fields:
        if not rec.get(field):
            raise RequiredField(field)
    if not isinstance(rec['source_records'], list):
        rec['source_records'] = [rec['source_records']]

    # Split subtitle if required and not already present
    if ':' in rec.get('title') and not rec.get('subtitle'):
        title, subtitle = split_subtitle(rec.get('title'))
        if subtitle:
            rec['title'] = title
            rec['subtitle'] = subtitle

    rec = normalize_record_isbns(rec)

    edition_pool = build_pool(rec)
    if not edition_pool:
        # No match candidates found, add edition
        return load_data(rec, account_key=account_key)

    match = early_exit(rec)
    if not match:
        match = find_exact_match(rec, edition_pool)

    if not match:
        rec['full_title'] = rec['title']
        if rec.get('subtitle'):
            rec['full_title'] += ' ' + rec['subtitle']
        e1 = build_marc(rec)
        add_db_name(e1)
        match = find_match(e1, edition_pool)

    if not match:
        # No match found, add edition
        return load_data(rec, account_key=account_key)

    # We have an edition match at this point
    need_work_save = need_edition_save = False
    w = None
    e = web.ctx.site.get(match)
    # check for, and resolve, author redirects
    for a in e.authors:
        while is_redirect(a):
            if a in e.authors:
                e.authors.remove(a)
            a = web.ctx.site.get(a.location)
            if not is_redirect(a):
                e.authors.append(a)

    if e.get('works'):
        w = e.works[0].dict()
        work_created = False
    else:
        # Found an edition without a work
        work_created = need_work_save = need_edition_save = True
        w = new_work(e.dict(), rec)
        e.works = [{'key': w['key']}]

    # Add subjects to work, if not already present
    if 'subjects' in rec:
        work_subjects = list(w.get('subjects', []))
        for s in rec['subjects']:
            if s not in work_subjects:
                work_subjects.append(s)
                need_work_save = True
        if need_work_save and work_subjects:
            w['subjects'] = work_subjects

    # Add cover to edition
    if 'cover' in rec and not e.get_covers():
        cover_url = rec['cover']
        cover_id = add_cover(cover_url, e.key, account_key=account_key)
        if cover_id:
            e['covers'] = [cover_id]
            need_edition_save = True

    # Add cover to work, if needed
    if not w.get('covers') and e.get_covers():
        w['covers'] = [e['covers'][0]]
        need_work_save = True

    # Add description to work, if needed
    if not w.get('description') and e.get('description'):
        w['description'] = e['description']
        need_work_save = True

    # Add authors to work, if needed
    if not w.get('authors'):
        authors = [import_author(a) for a in rec.get('authors', [])]
        w['authors'] = [{
            'type': {
                'key': '/type/author_role'
            },
            'author': a.key
        } for a in authors if a.get('key')]
        if w.get('authors'):
            need_work_save = True

    # Add ocaid to edition (str), if needed
    if 'ocaid' in rec and not e.ocaid:
        e['ocaid'] = rec['ocaid']
        need_edition_save = True

    # Add list fields to edition as needed
    edition_fields = [
        'local_id',
        'lccn',
        'lc_classifications',
        'source_records',
    ]
    for f in edition_fields:
        if f not in rec:
            continue
        # ensure values is a list
        values = rec[f] if isinstance(rec[f], list) else [rec[f]]
        if f in e:
            # get values from rec that are not currently on the edition
            to_add = [v for v in values if v not in e[f]]
            e[f] += to_add
        else:
            e[f] = to_add = values
        if to_add:
            need_edition_save = True

    edits = []
    reply = {
        'success': True,
        'edition': {
            'key': match,
            'status': 'matched'
        },
        'work': {
            'key': w['key'],
            'status': 'matched'
        },
    }
    if need_edition_save:
        reply['edition']['status'] = 'modified'
        edits.append(e.dict())
    if need_work_save:
        reply['work']['status'] = 'created' if work_created else 'modified'
        edits.append(w)
    if edits:
        web.ctx.site.save_many(edits,
                               comment='import existing book',
                               action='edit-book')
    if 'ocaid' in rec:
        update_ia_metadata_for_ol_edition(match.split('/')[-1])
    return reply
def test_import_author_name_unchanged(author, new_import):
    expect = author['name']
    result = import_author(author)
    assert result['name'] == expect
Exemple #3
0
def load_data(rec, account_key=None):
    """
    Adds a new Edition to Open Library. Checks for existing Works.
    Creates a new Work, and Author, if required,
    otherwise associates the new Edition with the existing Work.

    :param dict rec: Edition record to add (no further checks at this point)
    :rtype: dict
    :return:
        {
            "success": False,
            "error": <error msg>
        }
      OR
        {
            "success": True,
            "work": {"key": <key>, "status": "created" | "modified" | "matched"},
            "edition": {"key": <key>, "status": "created"},
            "authors": [{"status": "matched", "name": "John Smith", "key": <key>}, ...]
        }
    """

    cover_url = None
    if 'cover' in rec:
        cover_url = rec['cover']
        del rec['cover']
    try:
        # get an OL style edition dict
        edition = build_query(rec)
    except InvalidLanguage as e:
        return {
            'success': False,
            'error': str(e),
        }

    ekey = web.ctx.site.new_key('/type/edition')
    cover_id = None
    if cover_url:
        cover_id = add_cover(cover_url, ekey, account_key=account_key)
    if cover_id:
        edition['covers'] = [cover_id]

    edits = []  # Things (Edition, Work, Authors) to be saved
    reply = {}
    # TOFIX: edition.authors has already been processed by import_authors() in build_query(), following line is a NOP?
    author_in = [
        import_author(a, eastern=east_in_by_statement(rec, a))
        for a in edition.get('authors', [])
    ]
    # build_author_reply() adds authors to edits
    (authors, author_reply) = build_author_reply(author_in, edits,
                                                 rec['source_records'][0])

    if authors:
        edition['authors'] = authors
        reply['authors'] = author_reply

    wkey = None
    work_state = 'created'
    # Look for an existing work
    if 'authors' in edition:
        wkey = find_matching_work(edition)
    if wkey:
        w = web.ctx.site.get(wkey)
        work_state = 'matched'
        found_wkey_match = True
        need_update = False
        for k in subject_fields:
            if k not in rec:
                continue
            for s in rec[k]:
                if normalize(s) not in [
                        normalize(existing) for existing in w.get(k, [])
                ]:
                    w.setdefault(k, []).append(s)
                    need_update = True
        if cover_id:
            w.setdefault('covers', []).append(cover_id)
            need_update = True
        if need_update:
            work_state = 'modified'
            edits.append(w.dict())
    else:
        # Create new work
        w = new_work(edition, rec, cover_id)
        wkey = w['key']
        edits.append(w)

    assert wkey
    edition['works'] = [{'key': wkey}]
    edition['key'] = ekey
    edits.append(edition)

    web.ctx.site.save_many(edits, comment='import new book', action='add-book')

    # Writes back `openlibrary_edition` and `openlibrary_work` to
    # archive.org item after successful import:
    if 'ocaid' in rec:
        update_ia_metadata_for_ol_edition(ekey.split('/')[-1])

    reply['success'] = True
    reply['edition'] = {'key': ekey, 'status': 'created'}
    reply['work'] = {'key': wkey, 'status': work_state}
    return reply
def test_import_author_name_natural_order(author, new_import):
    result = import_author(author)
    assert result['name'] == 'Forename Surname'