def run_one(snippet, expected, desc, entbase=None, config=None, loop=None, canonical=True): m = memory.connection() m_expected = memory.connection() infile = tempfile.NamedTemporaryFile() infile.write(snippet.encode('utf-8')) infile.seek(0) outstream = StringIO() bfconvert([infile], model=m, out=outstream, config=config, canonical=canonical, loop=loop) #bfconvert(factory(infile), model=m, out=outstream, config=config, canonical=canonical, loop=loop) infile.close() outstream.seek(0) hashmap, m = hash_neutral_model(outstream) hashmap = '\n'.join(sorted([repr((i[1], i[0])) for i in hashmap.items()])) expected_stream = StringIO(expected) hashmap_expected, m_expected = hash_neutral_model(expected_stream) hashmap_expected = '\n'.join( sorted([repr((i[1], i[0])) for i in hashmap_expected.items()])) assert hashmap == hashmap_expected, "Changes to hashes found ({0}):\n{1}\n\nActual model structure diff:\n{2}".format( desc, file_diff(hashmap_expected, hashmap), file_diff(repr(m_expected), repr(m))) assert m == m_expected, "Discrepancies found ({0}):\n{1}".format( desc, file_diff(repr(m_expected), repr(m)))
def test_work_fallback_author_in_marc_with_plusbib(): m = memory.connection() m_expected = memory.connection() s = StringIO() bfconvert([BytesIO(REGULAR_MARC_EXAMPLE)], model=m, out=s, config=WORK_FALLBACK_AUTHOR_IN_MARC_CONFIG_PLUS_BIB, canonical=True) s.seek(0) #with open('/tmp/foo.versa.json', 'w') as f: # f.write(s.read()) #s.seek(0) hashmap, m = hash_neutral_model(s) hashmap = '\n'.join(sorted([ repr((i[1], i[0])) for i in hashmap.items() ])) removals = [] #Strip out tag-XXX relationships for ix, (o, r, t, a) in m: #logging.debug(r) if r.startswith('http://bibfra.me/vocab/marcext/tag-') or r.startswith('http://bibfra.me/vocab/marcext/sf-'): removals.append(ix) m.remove(removals) hashmap_expected, m_expected = hash_neutral_model(StringIO(WORK_FALLBACK_AUTHOR_IN_MARC_EXPECTED_PLUS_BIB)) hashmap_expected = '\n'.join(sorted([ repr((i[1], i[0])) for i in hashmap_expected.items() ])) assert hashmap == hashmap_expected, "Changes to hashes found:\n{0}\n\nActual model structure diff:\n{0}".format(file_diff(hashmap_expected, hashmap), file_diff(repr(m_expected), repr(m))) assert m == m_expected, "Discrepancies found:\n{0}".format(file_diff(repr(m_expected), repr(m)))
def test_model_consumed(): loop = asyncio.new_event_loop() asyncio.set_event_loop(None) m = memory.connection() fname = os.path.join(RESOURCEPATH, 'multiple-authlinks.mrx') #bfconvert([inputsource(open(fname, 'rb'))], entbase='http://example.org/', model=m, config=None, verbose=False, loop=loop) bfconvert([open(fname, 'rb')], entbase='http://example.org/', model=m, config=None, verbose=False, loop=loop) assert m.size() == 0, 'Model not consumed:\n'+repr(m)
def test_model_consumed(): loop = asyncio.new_event_loop() asyncio.set_event_loop(None) m = memory.connection() fname = os.path.join(RESOURCEPATH, 'multiple-authlinks.xml') #bfconvert([inputsource(open(fname, 'rb'))], entbase='http://example.org/', model=m, config=None, verbose=False, loop=loop) bfconvert([open(fname, 'rb')], entbase='http://example.org/', model=m, config=None, verbose=False, loop=loop) assert m.size() == 0, 'Model not consumed:\n'+repr(m)
def test_author_in_marc(): loop = asyncio.new_event_loop() asyncio.set_event_loop(None) m = memory.connection() m_expected = memory.connection() s = StringIO() bfconvert([BytesIO(AUTHOR_IN_MARC)], model=m, out=s, config=AUTHOR_IN_MARC_CONFIG, canonical=True, loop=loop) s.seek(0) #with open('/tmp/foo.versa.json', 'w') as f: # f.write(s.read()) #s.seek(0) #sys.exit(-1) hashmap, m = hash_neutral_model(s) hashmap = '\n'.join(sorted([repr((i[1], i[0])) for i in hashmap.items()])) removals = [] #Strip out tag-XXX relationships for ix, (o, r, t, a) in m: #logging.debug(r) if r.startswith('http://bibfra.me/vocab/marcext/tag-') or r.startswith( 'http://bibfra.me/vocab/marcext/sf-'): removals.append(ix) m.remove(removals) #with open('/tmp/foo.versa.json', 'w') as f: # f.write(repr(m)) hashmap_expected, m_expected = hash_neutral_model( StringIO(AUTHOR_IN_MARC_EXPECTED)) hashmap_expected = '\n'.join( sorted([repr((i[1], i[0])) for i in hashmap_expected.items()])) assert hashmap == hashmap_expected, "Changes to hashes found:\n{0}\n\nActual model structure diff:\n{0}".format( file_diff(hashmap_expected, hashmap), file_diff(repr(m_expected), repr(m))) assert m == m_expected, "Discrepancies found:\n{0}".format( file_diff(repr(m_expected), repr(m)))
def run_one(name, entbase=None, config=None, loop=None, canonical=True): m = memory.connection() m_expected = memory.connection() s = StringIO() fname = os.path.join(RESOURCEPATH, name+'.mrx') #bfconvert(factory(open(fname, 'rb')), model=m, out=s, config=config, canonical=canonical, loop=loop) #raise(Exception(repr(inputsource(open(fname, 'rb'))))) bfconvert([inputsource(open(fname, 'rb'))], model=m, out=s, config=config, canonical=canonical, loop=loop) s.seek(0) hashmap, m = hash_neutral_model(s) hashmap = '\n'.join(sorted([ repr((i[1], i[0])) for i in hashmap.items() ])) with open(os.path.join(RESOURCEPATH, name+'.versa')) as indoc: hashmap_expected, m_expected = hash_neutral_model(indoc) hashmap_expected = '\n'.join(sorted([ repr((i[1], i[0])) for i in hashmap_expected.items() ])) assert hashmap == hashmap_expected, "Changes to hashes found for {0}:\n{1}\n\nActual model structure diff:\n{2}".format(name, file_diff(hashmap_expected, hashmap), file_diff(repr(m_expected), repr(m))) assert m == m_expected, "Discrepancies found for {0}:\n{1}".format(name, file_diff(repr(m_expected), repr(m)))
def run_one(snippet, expected, desc, entbase=None, config=None, loop=None, canonical=True): m = memory.connection() m_expected = memory.connection() infile = tempfile.NamedTemporaryFile() infile.write(snippet.encode('utf-8')) infile.seek(0) outstream = StringIO() bfconvert([infile], model=m, out=outstream, config=config, canonical=canonical, loop=loop) #bfconvert(factory(infile), model=m, out=outstream, config=config, canonical=canonical, loop=loop) infile.close() outstream.seek(0) hashmap, m = hash_neutral_model(outstream) hashmap = '\n'.join(sorted([ repr((i[1], i[0])) for i in hashmap.items() ])) expected_stream = StringIO(expected) hashmap_expected, m_expected = hash_neutral_model(expected_stream) hashmap_expected = '\n'.join(sorted([ repr((i[1], i[0])) for i in hashmap_expected.items() ])) assert hashmap == hashmap_expected, "Changes to hashes found ({0}):\n{1}\n\nActual model structure diff:\n{2}".format(desc, file_diff(hashmap_expected, hashmap), file_diff(repr(m_expected), repr(m))) assert m == m_expected, "Discrepancies found ({0}):\n{1}".format(desc, file_diff(repr(m_expected), repr(m)))
handler.end_element('datafield') else: # fixed handler.start_element('controlfield', attrs) handler.char_data(value) handler.end_element('controlfield') handler.end_element('record') class FauxParser: CurrentLineNumber = -1 CurrentColumnNumber = -1 def handle_marcjson_source(infname, sink, args, attr_cls, attr_list_cls): parser = FauxParser() with open(infname) as fp: record = json.load(fp) next(sink) handler = marcxml.expat_callbacks(sink, parser, attr_cls, attr_list_cls, lax=True) result = parse_record(record, handler) if __name__ == '__main__': import sys args = sys.argv[1:] infile = args.pop(0) outfile = args.pop(0) with open(outfile, 'wb') as fp: bfconvert([infile], handle_marc_source=handle_marcjson_source, rdfttl=fp, verbose=True)
class FauxParser: CurrentLineNumber = -1 CurrentColumnNumber = -1 def handle_marcjson_source(infname, sink, args, attr_cls, attr_list_cls): parser = FauxParser() with open(infname) as fp: record = json.load(fp) next(sink) handler = marcxml.expat_callbacks(sink, parser, attr_cls, attr_list_cls, lax=True) result = parse_record(record, handler) if __name__ == '__main__': import sys args = sys.argv[1:] infile = args.pop(0) outfile = args.pop(0) with open(outfile, 'wb') as fp: bfconvert([infile], handle_marc_source=handle_marcjson_source, rdfttl=fp, verbose=True)
def main(): #Need a new event loop per timeit iteration loop = asyncio.new_event_loop(); asyncio.set_event_loop(None); instream.seek(io.SEEK_SET) bfconvert(instream, model=m, out=s, config=config, loop=loop)