Exemple #1
0
def get_src(key):
    e = withKey(key)
    if 'source_records' in e:
        return e['source_records']
    src = get_mc(key)
    if src:
        return [src]
Exemple #2
0
def get_src(key):
    e = withKey(key)
    if 'source_records' in e:
        return e['source_records']
    src = get_mc(key)
    if src:
        return [src]
Exemple #3
0
def load_part(archive_id, part, start_pos=0):
    print 'load_part:', archive_id, part
    global rec_no, t_prev, load_count
    full_part = archive_id + "/" + part
    f = open(rc['marc_path'] + "/" + full_part)
    if start_pos:
        f.seek(start_pos)
    for pos, loc, data in read_marc_file(full_part, f, pos=start_pos):
        rec_no += 1
        if rec_no % chunk == 0:
            progress(archive_id, rec_no, start_pos, pos)

        if is_loaded(loc):
            continue
        want = ['001', '003', '010', '020', '035', '245']
        try:
            index_fields = fast_parse.index_fields(data, want)
        except KeyError:
            print loc
            print fast_parse.get_tag_lines(data, ['245'])
            raise
        except AssertionError:
            print loc
            raise
        if not index_fields or 'title' not in index_fields:
            continue

        edition_pool = pool.build(index_fields)

        if not edition_pool:
            yield loc, data
            continue

        rec = fast_parse.read_edition(data)
        e1 = build_marc(rec)

        match = False
        seen = set()
        for k, v in edition_pool.iteritems():
            for edition_key in v:
                if edition_key in seen:
                    continue
                thing = None
                while not thing or thing['type']['key'] == '/type/redirect':
                    seen.add(edition_key)
                    thing = withKey(edition_key)
                    assert thing
                    if thing['type']['key'] == '/type/redirect':
                        print 'following redirect %s => %s' % (edition_key, thing['location'])
                        edition_key = thing['location']
                if try_merge(e1, edition_key, thing):
                    add_source_records(edition_key, loc, thing, data)
                    match = True
                    break
            if match:
                break

        if not match:
            yield loc, data
Exemple #4
0
def follow_redirects(key):
    keys = []
    thing = None
    while not thing or thing['type']['key'] == '/type/redirect':
        keys.append(key)
        thing = withKey(key)
        assert thing
        if thing['type']['key'] == '/type/redirect':
            print 'following redirect %s => %s' % (key, thing['location'])
            key = thing['location']
    return (keys, thing)
Exemple #5
0
def follow_redirects(key):
    keys = []
    thing = None
    while not thing or thing['type']['key'] == '/type/redirect':
        keys.append(key)
        thing = withKey(key)
        assert thing
        if thing['type']['key'] == '/type/redirect':
            print('following redirect %s => %s' % (key, thing['location']))
            key = thing['location']
    return (keys, thing)
Exemple #6
0
def try_amazon(thing):
    if 'isbn_10' not in thing:
        return None
    if 'authors' in thing:
        authors = []
        for a in thing['authors']:
            author_thing = withKey(a['key'])
            if 'name' in author_thing:
                authors.append(author_thing['name'])
    else:
        authors = []
    return amazon.build_amazon(thing, authors)
Exemple #7
0
def load_part(archive_id, part, start_pos=0):
    print('load_part:', archive_id, part)
    global rec_no, t_prev, load_count
    full_part = archive_id + "/" + part
    f = open(rc['marc_path'] + "/" + full_part)
    if start_pos:
        f.seek(start_pos)
    for pos, loc, data in read_marc_file(full_part, f, pos=start_pos):
        rec_no += 1
        if rec_no % chunk == 0:
            progress(archive_id, rec_no, start_pos, pos)

        if is_loaded(loc):
            continue
        want = ['001', '003', '010', '020', '035', '245']
        try:
            index_fields = fast_parse.index_fields(data, want)
        except KeyError:
            print(loc)
            print(fast_parse.get_tag_lines(data, ['245']))
            raise
        except AssertionError:
            print(loc)
            raise
        if not index_fields or 'title' not in index_fields:
            continue

        edition_pool = pool.build(index_fields)

        if not edition_pool:
            continue

        rec = fast_parse.read_edition(data)
        e1 = build_marc(rec)

        match = False
        seen = set()
        for k, v in edition_pool.iteritems():
            for edition_key in v:
                if edition_key in seen:
                    continue
                seen.add(edition_key)
                thing = withKey(edition_key)
                assert thing
                if try_merge(e1, edition_key, thing):
                    add_source_records(edition_key, loc, thing)
                    match = True

        if not match:
            yield loc, data
Exemple #8
0
def switch_author(old, new):
    q = { 'authors': old['key'], 'type': '/type/edition', }
    for key in get_things(q):
        edition = withKey(key)
        authors = []
        for author in edition['authors']:
            if author['key'] == old['key']:
                author_key = new['key']
            else:
                author_key = author['key']
            authors.append({ 'key': author_key })

        q = {
            'key': key,
            'authors': { 'connect': 'update_list', 'value': authors }
        }
        print ol.write(q, comment='merge authors')
Exemple #9
0
def try_merge(e1, edition_key, thing):
    thing_type = thing['type']['key']
    if thing_type == '/type/delete': # 
        return False
    assert thing_type == '/type/edition'

    if 'source_records' in thing:
        if fix_source_records(edition_key, thing):
            thing = withKey(edition_key) # reload
        return source_records_match(e1, thing)

    ia = thing.get('ocaid', None)
    print edition_key
    mc = get_mc(edition_key)
    print mc
    if mc:
        if mc.startswith('ia:'):
            ia = mc[3:]
        elif mc.endswith('.xml') or mc.endswith('.mrc'):
            ia = mc[:mc.find('/')]
        if '_meta.mrc:' in mc:
            assert 'ocaid' in thing
            ia = thing['ocaid']
    rec2 = None
    if ia:
        if is_dark_or_bad(ia):
            return False
        try:
            loc2, rec2 = get_ia(ia)
        except xml.parsers.expat.ExpatError:
            return False
        except urllib2.HTTPError, error:
            print error.code
            assert error.code in (404, 403)
        if not rec2:
            return True
Exemple #10
0
        do_normalize(author, new_name)
        switch_author(merge_with, author)
#        print "delete merge_with"
        make_redirect(merge_with, author)
    else:
        new_key = merge_with['key']
        print "copy fields from author to", new_key
#        new = copy_fields(merge_with, author, new_name)
#        update_author(new_key, new)
        do_normalize(merge_with, new_name)
        switch_author(author, merge_with)
#        print "delete author"
        make_redirect(author, merge_with)
    print

author = withKey(sys.argv[1])
merge_with = withKey(sys.argv[2])

print author
print merge_with

def norm(s):
    return normalize('NFC', s)

name1 = author['name']
name2 = merge_with['name']

print sys.argv
if len(sys.argv) > 3:
    name = norm(sys.argv[3].decode('utf8'))
else:
rc = read_rc()
infogami = Infogami()
infogami.login('edward', rc['edward'])

for line in open('works_for_staging'):
    work_key, title, authors, editions = eval(line)
    q = {
        'create': 'unless_exists',
        'type': { 'key': '/type/work' },
        'key': work_key,
        'title': title,
        'authors': [{'key': '/a/' + a} for a in authors],
    }
    print q
    ret = infogami.write(q, comment='create work')
    print ret
    for edition_key in editions:
        edition = db_read.withKey(edition_key)
        if not edition: continue
        if 'works' in edition: continue
        q = {
            'key': edition_key,
            'works': { 'connect': 'update_list', 'value': [{'key': work_key}]}
        }
        ret = infogami.write(q, comment='add work to edition')
        print edition
        print q
        print edition_key, ret
        assert ret['result']['updated']
import catalog.importer.db_read as db_read
import re
import sys
import codecs

db_read.set_staging(True)

sys.stdout = codecs.getwriter('utf-8')(sys.stdout)

rc = read_rc()
infogami = Infogami()
infogami.login('edward', rc['edward'])

for line in open('works_for_staging'):
    work_key, title, authors, editions = eval(line)
    if not all(db_read.withKey('/a/' + a) for a in authors):
        continue
    work = db_read.withKey(work_key)
    print(work_key)
    if work:
        continue
    if not work:
        q = {
            'create': 'unless_exists',
            'type': { 'key': '/type/work' },
            'key': work_key,
            'title': title,
            'authors': [{'key': '/a/' + a} for a in authors],
        }
        ret = infogami.write(q, comment='create work')
        print(ret)