Exemple #1
0
def get_work_title(e, mc):
    # use first work title we find in source MARC records
    wt = None
    for src_type, src in get_marc_src(e, mc):
        if src_type == 'ia':
            wt = get_ia_work_title(src)
            if wt:
                wt = wt.strip('. ')
            if wt:
                break
            continue
        assert src_type == 'marc'
        data = None
        try:
            data = get_data(src)
        except ValueError:
            print 'bad record source:', src
            print 'http://openlibrary.org' + e['key']
            continue
        except urllib2.HTTPError, error:
            print 'HTTP error:', error.code, error.msg
            print e['key']
        if not data:
            continue
        is_marc8 = data[9] != 'a'
        try:
            line = get_first_tag(data, set(['240']))
        except BadDictionary:
            print 'bad dictionary:', src
            print 'http://openlibrary.org' + e['key']
            continue
        if line:
            wt = ' '.join(get_subfield_values(line, ['a'],
                                              is_marc8)).strip('. ')
            break
Exemple #2
0
def get_work_title(e, mc):
    # use first work title we find in source MARC records
    wt = None
    for src_type, src in get_marc_src(e, mc):
        if src_type == 'ia':
            wt = get_ia_work_title(src)
            if wt:
                wt = wt.strip('. ')
            if wt:
                break
            continue
        assert src_type == 'marc'
        data = None
        try:
            data = get_data(src)
        except ValueError:
            print 'bad record source:', src
            print 'http://openlibrary.org' + e['key']
            continue
        except urllib2.HTTPError, error:
            print 'HTTP error:', error.code, error.msg
            print e['key']
        if not data:
            continue
        is_marc8 = data[9] != 'a'
        try:
            line = get_first_tag(data, set(['240']))
        except BadDictionary:
            print 'bad dictionary:', src
            print 'http://openlibrary.org' + e['key']
            continue
        if line:
            wt = ' '.join(get_subfield_values(line, ['a'], is_marc8)).strip('. ')
            break
Exemple #3
0
def get_work_title(e, mc):
    # use first work title we find in source MARC records
    wt = None
    for src_type, src in get_marc_src(e, mc):
        if src_type == 'ia':
            wt = get_ia_work_title(src)
            if wt:
                wt = wt.strip('. ')
            if wt:
                break
            continue
        assert src_type == 'marc'
        data = None
        try:
            data = get_data(src)
        except ValueError:
            print('bad record source:', src)
            print('http://openlibrary.org' + e['key'])
            continue
        except urllib.error.HTTPError as error:
            print('HTTP error:', error.code, error.msg)
            print(e['key'])
        if not data:
            continue
        is_marc8 = data[9] != 'a'
        try:
            line = get_first_tag(data, set(['240']))
        except BadDictionary:
            print('bad dictionary:', src)
            print('http://openlibrary.org' + e['key'])
            continue
        if line:
            wt = ' '.join(get_subfield_values(line, ['a'],
                                              is_marc8)).strip('. ')
            break
    if wt:
        return wt
    for f in 'work_titles', 'work_title':
        e_wt = e.get(f, [])
        if e_wt:
            assert isinstance(e_wt, list)
            return e_wt[0].strip('. ')
Exemple #4
0
def get_work_title(e, mc):
    # use first work title we find in source MARC records
    wt = None
    for src_type, src in get_marc_src(e, mc):
        if src_type == 'ia':
            wt = get_ia_work_title(src)
            if wt:
                wt = wt.strip('. ')
            if wt:
                break
            continue
        assert src_type == 'marc'
        data = None
        try:
            data = get_data(src)
        except ValueError:
            print('bad record source:', src)
            print('http://openlibrary.org' + e['key'])
            continue
        except urllib2.HTTPError as error:
            print('HTTP error:', error.code, error.msg)
            print(e['key'])
        if not data:
            continue
        is_marc8 = data[9] != 'a'
        try:
            line = get_first_tag(data, set(['240']))
        except BadDictionary:
            print('bad dictionary:', src)
            print('http://openlibrary.org' + e['key'])
            continue
        if line:
            wt = ' '.join(get_subfield_values(line, ['a'], is_marc8)).strip('. ')
            break
    if wt:
        return wt
    for f in 'work_titles', 'work_title':
        e_wt = e.get(f, [])
        if e_wt:
            assert isinstance(e_wt, list)
            return e_wt[0].strip('. ')
Exemple #5
0
def get_work_title(e):
    # use first work title we find in source MARC records
    wt = None
    for src_type, src in get_marc_src(e):
        if src_type == 'ia':
            wt = get_ia_work_title(src)
            if wt:
                break
            continue
        assert src_type == 'marc'
        data = None
        #print 'get from archive:', src
        try:
            data = get_data(src)
        except ValueError:
            print('bad record source:', src)
            print('http://openlibrary.org' + e['key'])
            continue
        except urllib2.HTTPError as error:
            print('HTTP error:', error.code, error.msg)
            print(e['key'])
        if not data:
            continue
        try:
            line = get_first_tag(data, set(['240']))
        except BadDictionary:
            print('bad dictionary:', src)
            print('http://openlibrary.org' + e['key'])
            continue
        if line:
            wt = ' '.join(get_subfield_values(line, ['a'])).strip('. ')
            break
    if wt:
        return wt
    if not e.get('work_titles', []):
        return
    print('work title in MARC, but not in OL')
    print('http://openlibrary.org' + e['key'])
    return e['work_titles'][0]
Exemple #6
0
def get_work_title(e):
    # use first work title we find in source MARC records
    wt = None
    for src_type, src in get_marc_src(e):
        if src_type == 'ia':
            wt = get_ia_work_title(src)
            if wt:
                break
            continue
        assert src_type == 'marc'
        data = None
        #print 'get from archive:', src
        try:
            data = get_data(src)
        except ValueError:
            print('bad record source:', src)
            print('http://openlibrary.org' + e['key'])
            continue
        except urllib2.HTTPError as error:
            print('HTTP error:', error.code, error.msg)
            print(e['key'])
        if not data:
            continue
        try:
            line = get_first_tag(data, set(['240']))
        except BadDictionary:
            print('bad dictionary:', src)
            print('http://openlibrary.org' + e['key'])
            continue
        if line:
            wt = ' '.join(get_subfield_values(line, ['a'])).strip('. ')
            break
    if wt:
        return wt
    if not e.get('work_titles', []):
        return
    print('work title in MARC, but not in OL')
    print('http://openlibrary.org' + e['key'])
    return e['work_titles'][0]
Exemple #7
0
def get_marc_subjects(w):
    for src in get_marc_source(w):
        data = None
        try:
            data = get_data(src)
        except ValueError:
            print 'bad record source:', src
            print 'http://openlibrary.org' + w['key']
            continue
        except urllib2.HTTPError, error:
            print 'HTTP error:', error.code, error.msg
            print 'http://openlibrary.org' + w['key']
        if not data:
            continue
        try:
            lines = list(get_tag_lines(data, subject_fields))
        except BadDictionary:
            print 'bad dictionary:', src
            print 'http://openlibrary.org' + w['key']
            continue
        if lines:
            yield lines
Exemple #8
0
def get_marc_subjects(w):
    for src in get_marc_source(w):
        data = None
        from openlibrary.catalog.get_ia import get_data
        try:
            data = get_data(src)
        except ValueError:
            print 'bad record source:', src
            print 'http://openlibrary.org' + w['key']
            continue
        except urllib2.HTTPError, error:
            print 'HTTP error:', error.code, error.msg
            print 'http://openlibrary.org' + w['key']
        if not data:
            continue
        try:
            lines = list(get_tag_lines(data, subject_fields))
        except BadDictionary:
            print 'bad dictionary:', src
            print 'http://openlibrary.org' + w['key']
            continue
        if lines:
            yield lines
Exemple #9
0
def get_marc_subjects(w):
    for src in get_marc_source(w):
        data = None
        from openlibrary.catalog.get_ia import get_data
        try:
            data = get_data(src)
        except ValueError:
            print('bad record source:', src)
            print('http://openlibrary.org' + w['key'])
            continue
        except urllib2.HTTPError as error:
            print('HTTP error:', error.code, error.msg)
            print('http://openlibrary.org' + w['key'])
        if not data:
            continue
        try:
            lines = list(get_tag_lines(data, subject_fields))
        except BadDictionary:
            print('bad dictionary:', src)
            print('http://openlibrary.org' + w['key'])
            continue
        if lines:
            yield lines
Exemple #10
0
def read_works():
    i = 0
    pages = {}
    page_marc = {}

    for work, marc in work_and_marc():
        lines = []
        for loc in marc:
            data = get_data(loc)
            if not data:
                continue
            found = [v for k, v in get_tag_lines(data, set(['600']))]
            if found:
                lines.append((loc, found))
        if not lines:
            continue
        work['lines'] = lines
        i += 1
        print(i, work['key'], work['title'])

        try:
            people, marc_alt = read_people(j[1] for j in lines)
        except AssertionError:
            print(work['lines'])
            continue
        except KeyError:
            print(work['lines'])
            continue

        marc_alt_reverse = defaultdict(set)
        for k, v in marc_alt.items():
            marc_alt_reverse[v].add(k)

        w = ol.get(work['key'])
        w['subject_people'] = []
        for p, num in people.iteritems():
            print('  %2d %s' % (num, ' '.join("%s: %s" % (k, v) for k, v in p)))
            print('     ', p)
            if p in page_marc:
                w['subject_people'].append({'key': '/subjects/people/' + page_marc[p]})
                continue
            obj = build_person_object(p, marc_alt_reverse.get(p, []))
            key = obj['name'].replace(' ', '_')
            full_key = '/subjects/people/' + key
            w['subject_people'].append({'key': full_key})

            if key in pages:
                print(key)
                pages[key]['marc'].append(p)
                continue

            for m in obj['marc']:
                page_marc[m] = key

            pages[key] = obj
            obj_for_db = obj.copy()
            del obj_for_db['marc']
            obj_for_db['key'] = full_key
            obj_for_db['type'] = '/type/person'
            print(ol.save(full_key.encode('utf-8'), obj_for_db, 'create a new person page'))

        print(w)
        print(ol.save(w['key'], w, 'add links to people that this work is about'))
Exemple #11
0
def read_works():
    i = 0
    pages = {}
    page_marc = {}

    for work, marc in work_and_marc():
        lines = []
        for loc in marc:
            data = get_data(loc)
            if not data:
                continue
            found = [v for k, v in get_tag_lines(data, set(['600']))]
            if found:
                lines.append((loc, found))
        if not lines:
            continue
        work['lines'] = lines
        i += 1
        print i, work['key'], work['title']

        try:
            people, marc_alt = read_people(j[1] for j in lines)
        except AssertionError:
            print work['lines']
            continue
        except KeyError:
            print work['lines']
            continue

        marc_alt_reverse = defaultdict(set)
        for k, v in marc_alt.items():
            marc_alt_reverse[v].add(k)

        w = ol.get(work['key'])
        w['subject_people'] = []
        for p, num in people.iteritems():
            print '  %2d %s' % (num, ' '.join("%s: %s" % (k, v) for k, v in p))
            print '     ', p
            if p in page_marc:
                w['subject_people'].append({'key': '/subjects/people/' + page_marc[p]})
                continue
            obj = build_person_object(p, marc_alt_reverse.get(p, []))
            key = obj['name'].replace(' ', '_')
            full_key = '/subjects/people/' + key
            w['subject_people'].append({'key': full_key})

            if key in pages:
                print key
                pages[key]['marc'].append(p)
                continue

            for m in obj['marc']:
                page_marc[m] = key

            pages[key] = obj
            obj_for_db = obj.copy()
            del obj_for_db['marc']
            obj_for_db['key'] = full_key
            obj_for_db['type'] = '/type/person'
            print ol.save(full_key.encode('utf-8'), obj_for_db, 'create a new person page')

        print w
        print ol.save(w['key'], w, 'add links to people that this work is about')