def test_binary(self): for i in bin_samples: try: expect_filename = "%s/bin_expect/%s" % (test_data, i) data = open("%s/bin_input/%s" % (test_data, i)).read() if len(data) != int(data[:5]): data = data.decode('utf-8').encode('raw_unicode_escape') assert len(data) == int(data[:5]) rec = MarcBinary(data) edition_marc_bin = read_edition(rec) assert edition_marc_bin j = {} if os.path.exists(expect_filename): j = simplejson.load(open(expect_filename)) assert j, "Unable to open test data: %s" % expect_filename else: print "WARNING: test data %s not found, recreating it!" % expect_filename simplejson.dump(edition_marc_bin, open(expect_filename, 'w'), indent=2) continue self.assertEqual(sorted(edition_marc_bin.keys()), sorted(j.keys())) for k in edition_marc_bin.keys(): if isinstance(j[k], list): for item1, item2 in zip(edition_marc_bin[k], j[k]): self.assertEqual(item1, item2) self.assertEqual(edition_marc_bin[k], j[k]) self.assertEqual(edition_marc_bin, j) except: print 'Bad MARC:', i raise
def test_xml(self): for i in xml_samples: try: expect_filename = 'test_data/xml_expect/' + i + '_marc.xml' path = 'test_data/xml_input/' + i + '_marc.xml' element = etree.parse(open(path)).getroot() if element.tag != record_tag and element[0].tag == record_tag: element = element[0] rec = MarcXml(element) edition_marc_xml = read_edition(rec) assert edition_marc_xml # if i.startswith('engin'): # pprint(edition_marc_xml) # assert False j = {} if os.path.exists(expect_filename): j = simplejson.load(open(expect_filename)) if not j: print expect_filename assert j if not j: simplejson.dump(edition_marc_xml, open(expect_filename, 'w'), indent=2) continue self.assertEqual(sorted(edition_marc_xml.keys()), sorted(j.keys())) for k in edition_marc_xml.keys(): print ` i, k, edition_marc_xml[k] ` self.assertEqual(edition_marc_xml[k], j[k]) self.assertEqual(edition_marc_xml, j) except: print 'bad marc:', i raise
def test_xml(self): for i in xml_samples: try: expect_filename = "%s/xml_expect/%s_marc.xml" % (test_data, i) path = "%s/xml_input/%s_marc.xml" % (test_data, i) element = etree.parse(open(path)).getroot() # Handle MARC XML collection elements in our test_data expectations: if element.tag == collection_tag and element[0].tag == record_tag: element = element[0] rec = MarcXml(element) edition_marc_xml = read_edition(rec) assert edition_marc_xml j = {} if os.path.exists(expect_filename): j = simplejson.load(open(expect_filename)) assert j, "Unable to open test data: %s" % expect_filename else: print "WARNING: test data %s not found, recreating it!" % expect_filename simplejson.dump(edition_marc_xml, open(expect_filename, 'w'), indent=2) continue self.assertEqual(sorted(edition_marc_xml.keys()), sorted(j.keys())) for k in edition_marc_xml.keys(): self.assertEqual(edition_marc_xml[k], j[k]) self.assertEqual(edition_marc_xml, j) except: print 'Bad MARC:', i raise
def test_xml(self): for i in xml_samples: expect_filename = 'test_data/xml_expect/' + i + '_marc.xml' path = 'test_data/xml_input/' + i + '_marc.xml' element = etree.parse(open(path)).getroot() if element.tag != record_tag and element[0].tag == record_tag: element = element[0] rec = MarcXml(element) edition_marc_xml = read_edition(rec) assert edition_marc_xml # if i.startswith('engin'): # pprint(edition_marc_xml) # assert False j = {} if os.path.exists(expect_filename): j = simplejson.load(open(expect_filename)) if not j: print expect_filename assert j if not j: simplejson.dump(edition_marc_xml, open(expect_filename, 'w'), indent=2) continue self.assertEqual(sorted(edition_marc_xml.keys()), sorted(j.keys())) for k in edition_marc_xml.keys(): self.assertEqual(edition_marc_xml[k], j[k]) self.assertEqual(edition_marc_xml, j)
def test_xml(self): for i in xml_samples: try: expect_filename = "%s/xml_expect/%s_marc.xml" % (test_data, i) path = "%s/xml_input/%s_marc.xml" % (test_data, i) element = etree.parse(open(path)).getroot() # Handle MARC XML collection elements in our test_data expectations: if element.tag == collection_tag and element[ 0].tag == record_tag: element = element[0] rec = MarcXml(element) edition_marc_xml = read_edition(rec) assert edition_marc_xml j = {} if os.path.exists(expect_filename): j = simplejson.load(open(expect_filename)) assert j, "Unable to open test data: %s" % expect_filename else: print "WARNING: test data %s not found, recreating it!" % expect_filename simplejson.dump(edition_marc_xml, open(expect_filename, 'w'), indent=2) continue self.assertEqual(sorted(edition_marc_xml.keys()), sorted(j.keys())) for k in edition_marc_xml.keys(): self.assertEqual(edition_marc_xml[k], j[k]) self.assertEqual(edition_marc_xml, j) except: print 'Bad MARC:', i raise
def parse(f): rec = xml_rec(f) edition = {} if rec.has_blank_tag: print('has blank tag') if rec.has_blank_tag or not read_edition(rec, edition): return {} return edition
def test_binary(self): for i in bin_samples: try: expect_filename = 'test_data/bin_expect/' + i data = open('test_data/bin_input/' + i).read() if len(data) != int(data[:5]): data = data.decode('utf-8').encode('raw_unicode_escape') assert len(data) == int(data[:5]) rec = MarcBinary(data) edition_marc_bin = read_edition(rec) assert edition_marc_bin # if i.startswith('engin'): # pprint(edition_marc_bin) # assert False j = {} if os.path.exists(expect_filename): j = simplejson.load(open(expect_filename)) if not j: print expect_filename assert j if not j: simplejson.dump(edition_marc_bin, open(expect_filename, 'w'), indent=2) continue self.assertEqual(sorted(edition_marc_bin.keys()), sorted(j.keys())) for k in edition_marc_bin.keys(): if isinstance(j[k], list): for item1, item2 in zip(edition_marc_bin[k], j[k]): #print (i, k, item1) self.assertEqual(item1, item2) self.assertEqual(edition_marc_bin[k], j[k]) self.assertEqual(edition_marc_bin, j) except: print 'bad marc:', i raise i = 'talis_see_also.mrc' f = open('test_data/bin_input/' + i) rec = MarcBinary(f.read()) self.assertRaises(SeeAlsoAsTitle, read_edition, rec) i = 'talis_no_title2.mrc' f = open('test_data/bin_input/' + i) rec = MarcBinary(f.read()) self.assertRaises(NoTitle, read_edition, rec)
def read_amazon_file(f): while True: buf = f.read(1024) if not buf: break m = re_amazon.match(buf) (asin, page_len, page) = m.groups() page += f.read(int(page_len) - len(page)) try: edition = read_edition(fromstring(page)) except: print 'bad record:', asin raise if not edition: continue yield asin, edition
def read_amazon_file(f): while True: buf = f.read(1024) if not buf: break m = re_amazon.match(buf) (asin, page_len, page) = m.groups() page += f.read(int(page_len) - len(page)) try: edition = read_edition(fromstring(page)) except: print('bad record:', asin) raise if not edition: continue yield asin, edition
for data, length in read_file(f): pos = next next += length total += 1 if show_field: get_first_tag(data, set([show_field])) if show_leader: print data[:24] if show_pos: print pos if verbose: show_book(data) print if build_rec: marc_rec = MarcBinary(data) edition_marc_bin = parse.read_edition(marc_rec) pprint(edition_marc_bin) print try: rec = read_edition(data) except SoundRecording: sound_rec += 1 continue except BadDictionary: bad_dict += 1 continue except NotBook: if show_non_books: show_book(data) print not_book += 1