Exemple #1
0
def get_marc_ia_data(ia, host=None, path=None):
    ia = ia.strip()  # 'cyclopdiaofedu00kidd '
    ending = 'meta.mrc'
    if host and path:
        url = 'http://%s%s/%s_%s' % (host, path, ia, ending)
    else:
        url = 'http://www.archive.org/download/' + ia + '/' + ia + '_' + ending
    f = urlopen_keep_trying(url)
    return f.read() if f else None
Exemple #2
0
def get_marc_ia_data(ia, host=None, path=None):
    ia = ia.strip() # 'cyclopdiaofedu00kidd '
    ending = 'meta.mrc'
    if host and path:
        url = 'http://%s%s/%s_%s' % (host, path, ia, ending)
    else:
        url = 'http://www.archive.org/download/' + ia + '/' + ia + '_' + ending
    f = urlopen_keep_trying(url)
    return f.read() if f else None
Exemple #3
0
def get_marc_ia(ia):
    ia = ia.strip()  # 'cyclopdiaofedu00kidd '
    url = base + ia + "/" + ia + "_meta.mrc"
    data = urlopen_keep_trying(url).read()
    length = int(data[0:5])
    if len(data) != length:
        data = data.decode('utf-8').encode('raw_unicode_escape')
    assert len(data) == length

    assert 'Internet Archive: Error' not in data
    print 'leader:', data[:24]
    return data
    return fast_parse.read_edition(data, accept_electronic=True)
Exemple #4
0
def get_marc_ia(ia):
    ia = ia.strip() # 'cyclopdiaofedu00kidd '
    url = base + ia + "/" + ia + "_meta.mrc"
    data = urlopen_keep_trying(url).read()
    length = int(data[0:5])
    if len(data) != length:
        data = data.decode('utf-8').encode('raw_unicode_escape')
    assert len(data) == length

    assert 'Internet Archive: Error' not in data
    print 'leader:', data[:24]
    return data
    return fast_parse.read_edition(data, accept_electronic = True)
Exemple #5
0
def get_ia(ia):
    ia = ia.strip()  # 'cyclopdiaofedu00kidd '
    # read MARC record of scanned book from archive.org
    # try the XML first because it has better character encoding
    # if there is a problem with the XML switch to the binary MARC
    xml_file = ia + "_marc.xml"
    loc = ia + "/" + xml_file
    try:
        print base + loc
        f = urlopen_keep_trying(base + loc)
    except urllib2.HTTPError, error:
        if error.code == 404:
            raise NoMARCXML
        else:
            print 'error:', error.code, error.msg
            raise
Exemple #6
0
def get_ia(ia):
    ia = ia.strip() # 'cyclopdiaofedu00kidd '
    # read MARC record of scanned book from archive.org
    # try the XML first because it has better character encoding
    # if there is a problem with the XML switch to the binary MARC
    xml_file = ia + "_marc.xml"
    loc = ia + "/" + xml_file
    try:
        print base + loc
        f = urlopen_keep_trying(base + loc)
    except urllib2.HTTPError, error:
        if error.code == 404:
            raise NoMARCXML
        else:
            print 'error:', error.code, error.msg
            raise