예제 #1
0
    def GET(self, ia):

        ret = '''
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>Marc lookup: %s</title>
</head>
<body>
<form action="/z39.50">Internet archive identifier: <input name="ia" value="%s"><input value="go" type="submit"></form>
<h1>%s</h1>
<ul>
<li><a href="http://www.archive.org/details/%s">Internet archive detail page</a>
<li><a href="http://openlibrary.org/show-records/ia:%s">View current IA MARC record</a>
</ul>
''' % (ia, ia, ia, ia, ia)
        marc_source = 'http://www.archive.org/download/' + ia + '/' + ia + '_metasource.xml'
        marc_xml = 'http://www.archive.org/download/' + ia + '/' + ia + '_marc.xml'
        marc_bin = 'http://www.archive.org/download/' + ia + '/' + ia + '_meta.mrc'

        try:
            from_marc_xml = xml_to_html.html_record(urlopen(marc_xml).read())
        except:
            from_marc_xml = None

        try:
            meta_mrc = urlopen(marc_bin).read()
            from_marc_bin = html_record(meta_mrc)
        except:
            from_marc_bin = None

        root = etree.parse(urlopen(marc_source)).getroot()
        cclquery = root.find('cclquery').text
        target_name = root.find('target').text
        result_offset = root.find('resultOffset').text

        marc = get_marc(target_name, cclquery, result_offset)
        rec = html_record(marc)

        ret += '<h2>From Z39.50</h2>'

        ret += 'leader: <code>' + rec.leader.replace(' ', '&nbsp;') + '</code><br>'
        ret += rec.html() + '<br>\n'

        if from_marc_xml:
            ret += '<h2>From MARC XML on archive.org</h2>'

            ret += 'leader: <code>' + from_marc_xml.leader.replace(' ', '&nbsp;') + '</code><br>'
            ret += from_marc_xml.html() + '<br>\n'

        if from_marc_xml:
            ret += '<h2>From MARC binary on archive.org</h2>'

            ret += 'record length: ' + repr(len(meta_mrc)) + ' bytes<br>'
            ret += 'leader: <code>' + from_marc_bin.leader.replace(' ', '&nbsp;') + '</code><br>'
            ret += from_marc_bin.html() + '<br>\n'

        ret += '</body></html>'

        return ret
예제 #2
0
    def GET(self, ia):

        ret = '''
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>Marc lookup: %s</title>
</head>
<body>
<form action="/z39.50">Internet archive identifier: <input name="ia" value="%s"><input value="go" type="submit"></form>
<h1>%s</h1>
<ul>
<li><a href="http://www.archive.org/details/%s">Internet archive detail page</a>
<li><a href="http://openlibrary.org/show-records/ia:%s">View current IA MARC record</a>
</ul>
''' % (ia, ia, ia, ia, ia)
        marc_source = 'http://www.archive.org/download/' + ia + '/' + ia + '_metasource.xml'
        root = etree.parse(marc_source).getroot()
        cclquery = root.find('cclquery').text
        target_id = root.find('target').attrib['id']

        marc = get_marc(target_id, cclquery)
        rec = html_record(marc)

        ret += 'leader: ' + rec.leader + '<br>'
        ret += rec.html()
        ret += '</body></html>'

        return ret
예제 #3
0
class show_marc(app.view):
    path = "/show-records/(.*):(\d+):(\d+)"

    def GET(self, filename, offset, length):
        m = re_bad_meta_mrc.match(filename)
        if m:
            raise web.seeother('/show-records/ia:' + m.group(1))
        m = re_lc_sanfranpl.match(filename)
        if m:  # archive.org is case-sensative
            mixed_case = 'SanFranPL%s/SanFranPL%s.out:%s:%s' % (
                m.group(1), m.group(2), offset, length)
            raise web.seeother('/show-records/' + mixed_case)
        if filename == 'collingswoodlibrarymarcdump10-27-2008/collingswood.out':
            loc = 'CollingswoodLibraryMarcDump10-27-2008/Collingswood.out:%s:%s' % (
                offset, length)
            raise web.seeother('/show-records/' + loc)

        loc = ':'.join(['marc', filename, offset, length])

        books = web.ctx.site.things({
            'type': '/type/edition',
            'source_records': loc,
        })

        offset = int(offset)
        length = int(length)

        #print "record_locator: <code>%s</code><p/><hr>" % locator

        r0, r1 = offset, offset + 100000
        url = 'http://www.archive.org/download/%s' % filename

        ureq = urllib2.Request(
            url,
            None,
            {'Range': 'bytes=%d-%d' % (r0, r1)},
        )

        try:
            result = urllib2.urlopen(ureq).read(100000)
        except urllib2.HTTPError, e:
            return "ERROR:" + str(e)

        len_in_rec = int(result[:5])
        if len_in_rec != length:
            raise web.seeother('/show-records/%s:%d:%d' %
                               (filename, offset, len_in_rec))

        from openlibrary.catalog.marc import html

        try:
            record = html.html_record(result[0:length])
        except ValueError:
            record = None

        return app.render_template("showmarc", record, filename, offset,
                                   length, books)
예제 #4
0
def test_html_subfields():
    samples = [
        (b'  \x1fa0123456789\x1e', '<b>$a</b>0123456789'),
        (b'  end of wrapped\x1e', 'end of wrapped'),
        (b'  \x1fa<whatever>\x1e', '<b>$a</b>&lt;whatever&gt;'),
    ]
    hr = html_record("00053This is the leader.Now we are beyond the leader.")
    for input_, output in samples:
        assert hr.html_subfields(input_) == output
예제 #5
0
def test_html_line_marc8():
    samples = [
        ('020', b'  \x1fa0123456789\x1e', '&nbsp;&nbsp; <b>$a</b>0123456789'),
        ('520', b'  end of wrapped\x1e', '&nbsp;&nbsp; end of wrapped'),
    ]
    hr = html_record(b'00053This is the leader.Now we are beyond the leader.')
    for tag, input_, output in samples:
        expect = '<large>%s</large> <code>%s</code>' % (tag, output)
        assert hr.html_line(tag, input_) == expect
예제 #6
0
    def GET(self, filename, offset, length):
        m = re_bad_meta_mrc.match(filename)
        if m:
            raise web.seeother('/show-records/ia:' + m.group(1))
        m = re_lc_sanfranpl.match(filename)
        if m: # archive.org is case-sensative
            mixed_case = 'SanFranPL%s/SanFranPL%s.out:%s:%s' % (m.group(1), m.group(2), offset, length)
            raise web.seeother('/show-records/' + mixed_case)
        if filename == 'collingswoodlibrarymarcdump10-27-2008/collingswood.out':
            loc = 'CollingswoodLibraryMarcDump10-27-2008/Collingswood.out:%s:%s' % (offset, length)
            raise web.seeother('/show-records/' + loc)

        loc = ':'.join(['marc', filename, offset, length])

        books = web.ctx.site.things({
            'type': '/type/edition',
            'source_records': loc,
        })

        offset = int(offset)
        length = int(length)

        #print "record_locator: <code>%s</code><p/><hr>" % locator

        r0, r1 = offset, offset+100000
        url = 'http://www.archive.org/download/%s'% filename

        ureq = urllib2.Request(url,
                               None,
                               {'Range':'bytes=%d-%d'% (r0, r1)},
                               )

        try:
            result = urllib2.urlopen(ureq).read(100000)
        except urllib2.HTTPError as e:
            return "ERROR:" + str(e)

        len_in_rec = int(result[:5])
        if len_in_rec != length:
            raise web.seeother('/show-records/%s:%d:%d' % (filename, offset, len_in_rec))

        from openlibrary.catalog.marc import html

        try:
            record = html.html_record(result[0:length])
        except ValueError:
            record = None

        template = app.render_template("showmarc", record, filename, offset, length, books)
        template.v2 = True
        return template
예제 #7
0
    def GET(self, filename, offset, length):
        m = re_bad_meta_mrc.match(filename)
        if m:
            raise web.seeother('/show-records/ia:' + m.group(1))
        m = re_lc_sanfranpl.match(filename)
        if m:  # archive.org is case-sensative
            mixed_case = (
                f'SanFranPL{m.group(1)}/SanFranPL{m.group(2)}.out:{offset}:{length}'
            )
            raise web.seeother('/show-records/' + mixed_case)
        if filename == 'collingswoodlibrarymarcdump10-27-2008/collingswood.out':
            loc = f'CollingswoodLibraryMarcDump10-27-2008/Collingswood.out:{offset}:{length}'
            raise web.seeother('/show-records/' + loc)

        loc = ':'.join(['marc', filename, offset, length])

        books = web.ctx.site.things({
            'type': '/type/edition',
            'source_records': loc,
        })

        offset = int(offset)
        length = int(length)

        r0, r1 = offset, offset + 100000
        url = 'https://archive.org/download/%s' % filename
        headers = {'Range': 'bytes=%d-%d' % (r0, r1)}

        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            result = response.content[:100000]
        except requests.HTTPError as e:
            return "ERROR:" + str(e)

        len_in_rec = int(result[:5])
        if len_in_rec != length:
            raise web.seeother('/show-records/%s:%d:%d' %
                               (filename, offset, len_in_rec))

        from openlibrary.catalog.marc import html

        try:
            record = html.html_record(result[0:length])
        except ValueError:
            record = None

        return app.render_template("showmarc", record, filename, offset,
                                   length, books)
예제 #8
0
def test_html_line_utf8():
    samples = [
        ('245',
         (b'10\x1faDbu ma la \xca\xbejug pa\xca\xbei kar t\xcc\xa3i\xcc\x84k '
          b':\x1fbDwags-brgyud grub pa\xca\xbei s\xcc\x81in\xcc\x87 rta /\x1f'
          b'cKarma-pa Mi-bskyod-rdo-rje.\x1e'),
         (u'10 <b>$a</b>Dbu ma la \u02bejug pa\u02bei kar \u1e6d\u012bk :<b>'
          u'$b</b>Dwags-brgyud grub pa\u02bei \u015bi\u1e45 rta /<b>$c</b>Ka'
          u'rma-pa Mi-bskyod-rdo-rje.')),
    ]
    hr = html_record(b'00053Thisais the leader.Now we are beyond the leader.')
    assert hr.is_marc8 == False
    for tag, input_, output in samples:
        expect = '<large>%s</large> <code>%s</code>' % (tag, output)
        assert hr.html_line(tag, input_) == expect
예제 #9
0
    def GET(self, ia):
        error_404 = False
        url = f'https://archive.org/download/{ia}/{ia}_meta.mrc'
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.content
        except requests.HTTPError as e:
            if e.response.status_code == 404:
                error_404 = True
            else:
                return "ERROR:" + str(e)

        if error_404:  # no MARC record
            url = f'https://archive.org/download/{ia}/{ia}_meta.xml'
            try:
                response = requests.get(url)
                response.raise_for_status()
                data = response.content
            except requests.HTTPError as e:
                return "ERROR:" + str(e)
            raise web.seeother('https://archive.org/details/' + ia)

        books = web.ctx.site.things({
            'type': '/type/edition',
            'source_records': 'ia:' + ia,
        }) or web.ctx.site.things({
            'type': '/type/edition',
            'ocaid': ia,
        })

        from openlibrary.catalog.marc import html

        try:
            leader_len = int(data[:5])
        except ValueError:
            return "ERROR reading MARC for " + ia

        if len(data) != leader_len:
            data = data.decode('utf-8').encode('raw_unicode_escape')
        assert len(data) == int(data[:5])

        try:
            record = html.html_record(data)
        except ValueError:
            record = None

        return app.render_template("showia", ia, record, books)
예제 #10
0
def test_html_line():
    samples = [
        ('020', '  \x1fa0123456789\x1e', '&nbsp;&nbsp; <b>$a</b>0123456789'),
        ('520', '  end of wrapped\x1e', '&nbsp;&nbsp; end of wrapped'),
        ('245',
         ('10\x1faDbu ma la \xca\xbejug pa\xca\xbei kar t\xcc\xa3i\xcc\x84k '
          ':\x1fbDwags-brgyud grub pa\xca\xbei s\xcc\x81in\xcc\x87 rta /\x1f'
          'cKarma-pa Mi-bskyod-rdo-rje.\x1e'),
         (u'10 <b>$a</b>Dbu ma la \u02bejug pa\u02bei kar \u1e6d\u012bk :<b>'
          u'$b</b>Dwags-brgyud grub pa\u02bei \u015bi\u1e45 rta /<b>$c</b>Ka'
          u'rma-pa Mi-bskyod-rdo-rje.')),
    ]
    hr = html_record("00053This is the leader.Now we are beyond the leader.")
    for tag, input, output in samples:
        expect = '<large>%s</large> <code>%s</code>' % (tag, output)
        assert hr.html_line(tag, input) == expect
예제 #11
0
    def GET(self, ia):
        error_404 = False
        url = 'http://www.archive.org/download/%s/%s_meta.mrc' % (ia, ia)
        try:
            data = urllib.request.urlopen(url).read()
        except urllib.error.HTTPError as e:
            if e.code == 404:
                error_404 = True
            else:
                return "ERROR:" + str(e)

        if error_404:  # no MARC record
            url = 'http://www.archive.org/download/%s/%s_meta.xml' % (ia, ia)
            try:
                data = urllib.request.urlopen(url).read()
            except urllib.error.HTTPError as e:
                return "ERROR:" + str(e)
            raise web.seeother('http://www.archive.org/details/' + ia)

        books = web.ctx.site.things({
            'type': '/type/edition',
            'source_records': 'ia:' + ia,
        }) or web.ctx.site.things({
            'type': '/type/edition',
            'ocaid': ia,
        })

        from openlibrary.catalog.marc import html

        try:
            leader_len = int(data[:5])
        except ValueError:
            return "ERROR reading MARC for " + ia

        if len(data) != leader_len:
            data = data.decode('utf-8').encode('raw_unicode_escape')
        assert len(data) == int(data[:5])

        try:
            record = html.html_record(data)
        except ValueError:
            record = None

        template = app.render_template("showia", ia, record, books)
        template.v2 = True
        return template
예제 #12
0
    def GET(self, ia):
        error_404 = False
        url = 'http://www.archive.org/download/%s/%s_meta.mrc' % (ia, ia)
        try:
            data = urllib2.urlopen(url).read()
        except urllib2.HTTPError as e:
            if e.code == 404:
                error_404 = True
            else:
                return "ERROR:" + str(e)

        if error_404: # no MARC record
            url = 'http://www.archive.org/download/%s/%s_meta.xml' % (ia, ia)
            try:
                data = urllib2.urlopen(url).read()
            except urllib2.HTTPError as e:
                return "ERROR:" + str(e)
            raise web.seeother('http://www.archive.org/details/' + ia)

        books = web.ctx.site.things({
            'type': '/type/edition',
            'source_records': 'ia:' + ia,
        }) or web.ctx.site.things({
            'type': '/type/edition',
            'ocaid': ia,
        })

        from openlibrary.catalog.marc import html

        try:
            leader_len = int(data[:5])
        except ValueError:
            return "ERROR reading MARC for " + ia

        if len(data) != leader_len:
            data = data.decode('utf-8').encode('raw_unicode_escape')
        assert len(data) == int(data[:5])

        try:
            record = html.html_record(data)
        except ValueError:
            record = None

        template = app.render_template("showia", ia, record, books)
        template.v2 = True
        return template
예제 #13
0
            'ocaid': ia,
        })

        from openlibrary.catalog.marc import html

        try:
            leader_len = int(data[:5])
        except ValueError:
            return "ERROR reading MARC for " + ia

        if len(data) != leader_len:
            data = data.decode('utf-8').encode('raw_unicode_escape')
        assert len(data) == int(data[:5])

        try:
            record = html.html_record(data)
        except ValueError:
            record = None

        return app.render_template("showia", ia, record, books)


class show_amazon(app.view):
    path = "/show-records/amazon:(.*)"

    def GET(self, asin):
        return app.render_template("showamazon", asin)


re_bad_meta_mrc = re.compile('^([^/]+)_meta\.mrc$')
re_lc_sanfranpl = re.compile('^sanfranpl(\d+)/sanfranpl(\d+)\.out')
예제 #14
0
        books = web.ctx.site.things({
            'type': '/type/edition',
            'source_records': 'ia:' + ia,
        }) or web.ctx.site.things({
            'type': '/type/edition',
            'ocaid': ia,
        })

        from openlibrary.catalog.marc import html

        if len(data) != int(data[:5]):
            data = data.decode('utf-8').encode('raw_unicode_escape')
        assert len(data) == int(data[:5])

        try:
            record = html.html_record(data)
        except ValueError:
            record = None

        return render.showia(ia, record, books)
        
class show_amazon(delegate.page):
    path = "/show-records/amazon:(.*)"
    
    def GET(self, asin):
        return render.showamazon(asin)

re_bad_meta_mrc = re.compile('^([^/]+)_meta\.mrc$')
re_lc_sanfranpl = re.compile('^sanfranpl(\d+)/sanfranpl(\d+)\.out')

class show_marc(delegate.page):