Example #1
0
    def test_binary(self):
        for i in bin_samples:
            try:
                expect_filename = "%s/bin_expect/%s" % (test_data, i)
                data = open("%s/bin_input/%s" % (test_data, i)).read()
                if len(data) != int(data[:5]):
                    data = data.decode('utf-8').encode('raw_unicode_escape')
                assert len(data) == int(data[:5])
                rec = MarcBinary(data)
                edition_marc_bin = read_edition(rec)
                assert edition_marc_bin
                j = {}
                if os.path.exists(expect_filename):
                    j = simplejson.load(open(expect_filename))
                    assert j, "Unable to open test data: %s" % expect_filename
                else:
                    print "WARNING: test data %s not found, recreating it!" % expect_filename
                    simplejson.dump(edition_marc_bin, open(expect_filename, 'w'), indent=2)
                    continue
                self.assertEqual(sorted(edition_marc_bin.keys()), sorted(j.keys()))
                for k in edition_marc_bin.keys():
                    if isinstance(j[k], list):
                        for item1, item2 in zip(edition_marc_bin[k], j[k]):
                            self.assertEqual(item1, item2)

                    self.assertEqual(edition_marc_bin[k], j[k])
                self.assertEqual(edition_marc_bin, j)
            except:
                print 'Bad MARC:', i
                raise
Example #2
0
 def test_xml(self):
     for i in xml_samples:
         try:
             expect_filename = 'test_data/xml_expect/' + i + '_marc.xml'
             path = 'test_data/xml_input/' + i + '_marc.xml'
             element = etree.parse(open(path)).getroot()
             if element.tag != record_tag and element[0].tag == record_tag:
                 element = element[0]
             rec = MarcXml(element)
             edition_marc_xml = read_edition(rec)
             assert edition_marc_xml
             #            if i.startswith('engin'):
             #                pprint(edition_marc_xml)
             #                assert False
             j = {}
             if os.path.exists(expect_filename):
                 j = simplejson.load(open(expect_filename))
                 if not j:
                     print expect_filename
                 assert j
             if not j:
                 simplejson.dump(edition_marc_xml,
                                 open(expect_filename, 'w'),
                                 indent=2)
                 continue
             self.assertEqual(sorted(edition_marc_xml.keys()),
                              sorted(j.keys()))
             for k in edition_marc_xml.keys():
                 print ` i, k, edition_marc_xml[k] `
                 self.assertEqual(edition_marc_xml[k], j[k])
             self.assertEqual(edition_marc_xml, j)
         except:
             print 'bad marc:', i
             raise
Example #3
0
 def test_xml(self):
     for i in xml_samples:
         try:
             expect_filename = "%s/xml_expect/%s_marc.xml" % (test_data, i)
             path            = "%s/xml_input/%s_marc.xml"  % (test_data, i)
             element = etree.parse(open(path)).getroot()
             # Handle MARC XML collection elements in our test_data expectations:
             if element.tag == collection_tag and element[0].tag == record_tag:
                 element = element[0]
             rec = MarcXml(element)
             edition_marc_xml = read_edition(rec)
             assert edition_marc_xml
             j = {}
             if os.path.exists(expect_filename):
                 j = simplejson.load(open(expect_filename))
                 assert j, "Unable to open test data: %s" % expect_filename
             else:
                 print "WARNING: test data %s not found, recreating it!" % expect_filename
                 simplejson.dump(edition_marc_xml, open(expect_filename, 'w'), indent=2)
                 continue
             self.assertEqual(sorted(edition_marc_xml.keys()), sorted(j.keys()))
             for k in edition_marc_xml.keys():
                 self.assertEqual(edition_marc_xml[k], j[k])
             self.assertEqual(edition_marc_xml, j)
         except:
             print 'Bad MARC:', i
             raise
Example #4
0
    def test_xml(self):
        for i in xml_samples:
            expect_filename = 'test_data/xml_expect/' + i + '_marc.xml'
            path = 'test_data/xml_input/' + i + '_marc.xml'
            element = etree.parse(open(path)).getroot()
            if element.tag != record_tag and element[0].tag == record_tag:
                element = element[0]
            rec = MarcXml(element)
            edition_marc_xml = read_edition(rec)
            assert edition_marc_xml
#            if i.startswith('engin'):
#                pprint(edition_marc_xml)
#                assert False
            j = {}
            if os.path.exists(expect_filename):
                j = simplejson.load(open(expect_filename))
                if not j:
                    print expect_filename
                assert j
            if not j:
                simplejson.dump(edition_marc_xml, open(expect_filename, 'w'), indent=2)
                continue
            self.assertEqual(sorted(edition_marc_xml.keys()), sorted(j.keys()))
            for k in edition_marc_xml.keys():
                self.assertEqual(edition_marc_xml[k], j[k])
            self.assertEqual(edition_marc_xml, j)
Example #5
0
    def test_binary(self):
        for i in bin_samples:
            try:
                expect_filename = "%s/bin_expect/%s" % (test_data, i)
                data = open("%s/bin_input/%s" % (test_data, i)).read()
                if len(data) != int(data[:5]):
                    data = data.decode('utf-8').encode('raw_unicode_escape')
                assert len(data) == int(data[:5])
                rec = MarcBinary(data)
                edition_marc_bin = read_edition(rec)
                assert edition_marc_bin
                j = {}
                if os.path.exists(expect_filename):
                    j = simplejson.load(open(expect_filename))
                    assert j, "Unable to open test data: %s" % expect_filename
                else:
                    print "WARNING: test data %s not found, recreating it!" % expect_filename
                    simplejson.dump(edition_marc_bin,
                                    open(expect_filename, 'w'),
                                    indent=2)
                    continue
                self.assertEqual(sorted(edition_marc_bin.keys()),
                                 sorted(j.keys()))
                for k in edition_marc_bin.keys():
                    if isinstance(j[k], list):
                        for item1, item2 in zip(edition_marc_bin[k], j[k]):
                            self.assertEqual(item1, item2)

                    self.assertEqual(edition_marc_bin[k], j[k])
                self.assertEqual(edition_marc_bin, j)
            except:
                print 'Bad MARC:', i
                raise
Example #6
0
 def test_xml(self):
     for i in xml_samples:
         try:
             expect_filename = "%s/xml_expect/%s_marc.xml" % (test_data, i)
             path = "%s/xml_input/%s_marc.xml" % (test_data, i)
             element = etree.parse(open(path)).getroot()
             # Handle MARC XML collection elements in our test_data expectations:
             if element.tag == collection_tag and element[
                     0].tag == record_tag:
                 element = element[0]
             rec = MarcXml(element)
             edition_marc_xml = read_edition(rec)
             assert edition_marc_xml
             j = {}
             if os.path.exists(expect_filename):
                 j = simplejson.load(open(expect_filename))
                 assert j, "Unable to open test data: %s" % expect_filename
             else:
                 print "WARNING: test data %s not found, recreating it!" % expect_filename
                 simplejson.dump(edition_marc_xml,
                                 open(expect_filename, 'w'),
                                 indent=2)
                 continue
             self.assertEqual(sorted(edition_marc_xml.keys()),
                              sorted(j.keys()))
             for k in edition_marc_xml.keys():
                 self.assertEqual(edition_marc_xml[k], j[k])
             self.assertEqual(edition_marc_xml, j)
         except:
             print 'Bad MARC:', i
             raise
Example #7
0
def parse(f):
    rec = xml_rec(f)
    edition = {}
    if rec.has_blank_tag:
        print('has blank tag')
    if rec.has_blank_tag or not read_edition(rec, edition):
        return {}
    return edition
Example #8
0
def parse(f):
    rec = xml_rec(f)
    edition = {}
    if rec.has_blank_tag:
        print('has blank tag')
    if rec.has_blank_tag or not read_edition(rec, edition):
        return {}
    return edition
Example #9
0
    def test_binary(self):
        for i in bin_samples:
            try:
                expect_filename = 'test_data/bin_expect/' + i
                data = open('test_data/bin_input/' + i).read()
                if len(data) != int(data[:5]):
                    data = data.decode('utf-8').encode('raw_unicode_escape')
                assert len(data) == int(data[:5])
                rec = MarcBinary(data)
                edition_marc_bin = read_edition(rec)
                assert edition_marc_bin
                #            if i.startswith('engin'):
                #                pprint(edition_marc_bin)
                #                assert False
                j = {}
                if os.path.exists(expect_filename):
                    j = simplejson.load(open(expect_filename))
                    if not j:
                        print expect_filename
                    assert j
                if not j:
                    simplejson.dump(edition_marc_bin,
                                    open(expect_filename, 'w'),
                                    indent=2)
                    continue
                self.assertEqual(sorted(edition_marc_bin.keys()),
                                 sorted(j.keys()))
                for k in edition_marc_bin.keys():
                    if isinstance(j[k], list):
                        for item1, item2 in zip(edition_marc_bin[k], j[k]):
                            #print (i, k, item1)
                            self.assertEqual(item1, item2)

                    self.assertEqual(edition_marc_bin[k], j[k])
                self.assertEqual(edition_marc_bin, j)
            except:
                print 'bad marc:', i
                raise

        i = 'talis_see_also.mrc'
        f = open('test_data/bin_input/' + i)
        rec = MarcBinary(f.read())
        self.assertRaises(SeeAlsoAsTitle, read_edition, rec)

        i = 'talis_no_title2.mrc'
        f = open('test_data/bin_input/' + i)
        rec = MarcBinary(f.read())
        self.assertRaises(NoTitle, read_edition, rec)
Example #10
0
def read_amazon_file(f):
    while True:
        buf = f.read(1024)
        if not buf:
            break
        m = re_amazon.match(buf)
        (asin, page_len, page) = m.groups()
        page += f.read(int(page_len) - len(page))
        try:
            edition = read_edition(fromstring(page))
        except:
            print 'bad record:', asin
            raise
        if not edition:
            continue
        yield asin, edition
Example #11
0
def read_amazon_file(f):
    while True:
        buf = f.read(1024)
        if not buf:
            break
        m = re_amazon.match(buf)
        (asin, page_len, page) = m.groups()
        page += f.read(int(page_len) - len(page))
        try:
            edition = read_edition(fromstring(page))
        except:
            print('bad record:', asin)
            raise
        if not edition:
            continue
        yield asin, edition
Example #12
0
    def test_binary(self):
        for i in bin_samples:
            try:
                expect_filename = 'test_data/bin_expect/' + i
                data = open('test_data/bin_input/' + i).read()
                if len(data) != int(data[:5]):
                    data = data.decode('utf-8').encode('raw_unicode_escape')
                assert len(data) == int(data[:5])
                rec = MarcBinary(data)
                edition_marc_bin = read_edition(rec)
                assert edition_marc_bin
    #            if i.startswith('engin'):
    #                pprint(edition_marc_bin)
    #                assert False
                j = {}
                if os.path.exists(expect_filename):
                    j = simplejson.load(open(expect_filename))
                    if not j:
                        print expect_filename
                    assert j
                if not j:
                    simplejson.dump(edition_marc_bin, open(expect_filename, 'w'), indent=2)
                    continue
                self.assertEqual(sorted(edition_marc_bin.keys()), sorted(j.keys()))
                for k in edition_marc_bin.keys():
                    if isinstance(j[k], list):
                        for item1, item2 in zip(edition_marc_bin[k], j[k]):
                            #print (i, k, item1)
                            self.assertEqual(item1, item2)

                    self.assertEqual(edition_marc_bin[k], j[k])
                self.assertEqual(edition_marc_bin, j)
            except:
                print 'bad marc:', i
                raise

        i = 'talis_see_also.mrc'
        f = open('test_data/bin_input/' + i)
        rec = MarcBinary(f.read())
        self.assertRaises(SeeAlsoAsTitle, read_edition, rec)

        i = 'talis_no_title2.mrc'
        f = open('test_data/bin_input/' + i)
        rec = MarcBinary(f.read())
        self.assertRaises(NoTitle, read_edition, rec)
Example #13
0
for data, length in read_file(f):
    pos = next
    next += length
    total += 1
    if show_field:
        get_first_tag(data, set([show_field]))
    if show_leader:
        print data[:24]
    if show_pos:
        print pos
    if verbose:
        show_book(data)
        print
    if build_rec:
        marc_rec = MarcBinary(data)
        edition_marc_bin = parse.read_edition(marc_rec)
        pprint(edition_marc_bin)
        print
    try:
        rec = read_edition(data)
    except SoundRecording:
        sound_rec += 1
        continue
    except BadDictionary:
        bad_dict += 1
        continue
    except NotBook:
        if show_non_books:
            show_book(data)
            print
        not_book += 1
Example #14
0
for data, length in read_file(f):
    pos = next
    next += length
    total += 1
    if show_field:
        get_first_tag(data, set([show_field]))
    if show_leader:
        print data[:24]
    if show_pos:
        print pos
    if verbose:
        show_book(data)
        print
    if build_rec:
        marc_rec = MarcBinary(data)
        edition_marc_bin = parse.read_edition(marc_rec)
        pprint(edition_marc_bin)
        print
    try:
        rec = read_edition(data)
    except SoundRecording:
        sound_rec += 1
        continue
    except BadDictionary:
        bad_dict += 1
        continue
    except NotBook:
        if show_non_books:
            show_book(data)
            print
        not_book += 1