Esempio n. 1
0
def load(loc, ia):
    print "load", loc, ia
    url = archive_url + loc
    f = urlopen_keep_trying(url)
    try:
        edition = parse_xml.parse(f)
    except AssertionError:
        return
    except parse_xml.BadSubtag:
        return
    except KeyError:
        return
    if 'title' not in edition:
        return
    edition['ocaid'] = ia
    write_edition("ia:" + ia, edition)
Esempio n. 2
0
        if not match:
            yield loc, ia


t0 = time()
t_prev = time()
chunk = 50
last_key = None
load_count = 0
rec_no = 0
total = 100000

for loc, ia in load():
    print("load", loc, ia)
    url = archive_url + loc
    f = urlopen_keep_trying(url)
    try:
        edition = parse_xml.parse(f)
    except AssertionError:
        continue
    except parse_xml.BadSubtag:
        continue
    except KeyError:
        continue
    if 'title' not in edition:
        continue
    edition['ocaid'] = ia
    write_edition("ia:" + ia, edition)

print("finished")
Esempio n. 3
0
        if not match:
            yield loc, ia

t0 = time()
t_prev = time()
chunk = 50
last_key = None
load_count = 0
rec_no = 0
total = 100000

for loc, ia in load():
    print("load", loc, ia)
    url = archive_url + loc
    f = urlopen_keep_trying(url)
    try:
        edition = parse_xml.parse(f)
    except AssertionError:
        continue
    except parse_xml.BadSubtag:
        continue
    except KeyError:
        continue
    if 'title' not in edition:
        continue
    edition['ocaid'] = ia
    write_edition("ia:" + ia, edition)


print("finished")