class MarcReader(JsonReader): """ Reader class that understands MARC21 as base format """ @staticmethod def split_blob(blob): """ Splits the blob using <record.*?>.*?</record> as pattern. Note 1: Taken from invenio.bibrecord:create_records Note 2: Use the DOTALL flag to include newlines. """ import re regex = re.compile("<record.*?>.*?</record>", re.DOTALL) return regex.findall(blob) def _prepare_blob(self): """ Transforms the blob into rec_tree structure to use it in the standar translation phase inside C{JsonReader} """ self.rec_tree = CoolDict() try: if self.blob_wrapper.schema.lower().startswith("file:"): self.blob_wrapper.blob = open(self.blob_wrapper.blob_file_name, "r").read() if self.blob_wrapper.schema.lower() in ["recstruct"]: self.__create_rectree_from_recstruct() elif self.blob_wrapper.schema.lower() in ["xml", "file:xml"]: # TODO: Implement translation directrly from xml from invenio.bibrecord import create_record self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0] self.__create_rectree_from_recstruct() except AttributeError: # Assume marcxml from invenio.bibrecord import create_record self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0] self.__create_rectree_from_recstruct() def __create_rectree_from_recstruct(self): """ Using rectruct as base format it creates the intermediate structure that _translate will use. """ for key, values in self.blob_wrapper.blob.iteritems(): if key < "010" and key.isdigit(): # Control field, it assumes controlfields are numeric only self.rec_tree[key] = CoolList([value[3] for value in values]) else: for value in values: field = CoolDict() for subfield in value[0]: field.extend(subfield[0], subfield[1]) self.rec_tree.extend((key + value[1] + value[2]).replace(" ", "_"), field)
class MarcReader(JsonReader): """ Reader class that understands MARC21 as base format """ @staticmethod def split_blob(blob, schema): """ Splits the blob using <record.*?>.*?</record> as pattern. Note 1: Taken from invenio.bibrecord:create_records Note 2: Use the DOTALL flag to include newlines. """ import re regex = re.compile('<record.*?>.*?</record>', re.DOTALL) return regex.findall(blob) def _prepare_blob(self): """ Transforms the blob into rec_tree structure to use it in the standar translation phase inside C{JsonReader} """ self.rec_tree = CoolDict() try: if self.blob_wrapper.schema.lower().startswith('file:'): self.blob_wrapper.blob = open(self.blob_wrapper.blob_file_name, 'r').read() if self.blob_wrapper.schema.lower() in ['recstruct']: self.__create_rectree_from_recstruct() elif self.blob_wrapper.schema.lower() in ['xml', 'file:xml']: #TODO: Implement translation directrly from xml from invenio.bibrecord import create_record self.blob_wrapper.blob = create_record( self.blob_wrapper.blob)[0] self.__create_rectree_from_recstruct() except AttributeError: #Assume marcxml from invenio.bibrecord import create_record self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0] self.__create_rectree_from_recstruct() def __create_rectree_from_recstruct(self): """ Using rectruct as base format it creates the intermediate structure that _translate will use. """ for key, values in self.blob_wrapper.blob.iteritems(): if key < '010' and key.isdigit(): #Control field, it assumes controlfields are numeric only self.rec_tree[key] = CoolList([value[3] for value in values]) else: for value in values: field = CoolDict() for subfield in value[0]: field.extend(subfield[0], subfield[1]) self.rec_tree.extend( (key + value[1] + value[2]).replace(' ', '_'), field)
def __create_rectree_from_recstruct(self): """ Using rectruct as base format it creates the intermediate structure that _translate will use. """ for key, values in self.blob_wrapper.blob.iteritems(): if key < '010' and key.isdigit(): #Control field, it assumes controlfields are numeric only self.rec_tree[key] = CoolList([value[3] for value in values]) else: for value in values: field = CoolDict() for subfield in value[0]: field.extend(subfield[0], subfield[1]) self.rec_tree.extend((key + value[1] + value[2]).replace(' ', '_'), field)
def __create_rectree_from_recstruct(self): """ Using rectruct as base format it creates the intermediate structure that _translate will use. """ for key, values in self.blob_wrapper.blob.iteritems(): if key < '010' and key.isdigit(): #Control field, it assumes controlfields are numeric only self.rec_tree[key] = CoolList([value[3] for value in values]) else: for value in values: field = CoolDict() for subfield in value[0]: field.extend(subfield[0], subfield[1]) self.rec_tree.extend( (key + value[1] + value[2]).replace(' ', '_'), field)
def test_cool_list_and_dict(self): """Bibfield Utils, CoolList and CoolDict - Unit tests""" d = CoolDict() l = CoolList() d['a'] = l self.assertTrue(d.consumed) l.append(1) l.append(2) d['a'] = l self.assertFalse(d.consumed) d['b'] = CoolList([{'a':1}, {'a':2}]) self.assertFalse(d.consumed) [v for dummy_k, v in d.iteritems()] self.assertFalse(d.consumed) [i for i in d['a']] [v for i in d['b'] for dummy_k, v in i.iteritems()] self.assertTrue(d.consumed)
def test_cool_list_and_dict(self): """Bibfield Utils, CoolList and CoolDict - Unit tests""" d = CoolDict() l = CoolList() d['a'] = l self.assertTrue(d.consumed) l.append(1) l.append(2) d['a'] = l self.assertFalse(d.consumed) d['b'] = CoolList([{'a': 1}, {'a': 2}]) self.assertFalse(d.consumed) [v for dummy_k, v in d.iteritems()] self.assertFalse(d.consumed) [i for i in d['a']] [v for i in d['b'] for dummy_k, v in i.iteritems()] self.assertTrue(d.consumed)
def test_cool_dict(self): """Bibfield Utils, CoolDict - Unit tests""" d = CoolDict() d['a'] = 1 d['b'] = 2 d['c'] = 3 self.assertFalse(d.consumed) d['a'] self.assertFalse(d.consumed) [v for dummy_k, v in d.iteritems()] self.assertTrue(d.consumed) d['b'] = {'d': 1} self.assertFalse(d.consumed) d['b'] self.assertFalse(d.consumed) [v for dummy_k, v in d['b'].iteritems()] self.assertTrue(d.consumed) d.extend('a', 11) self.assertFalse(d.consumed) self.assertTrue(isinstance(d['a'], CoolList)) [i for i in d['a']] self.assertTrue(d.consumed)
def test_cool_dict(self): """Bibfield Utils, CoolDict - Unit tests""" d = CoolDict() d["a"] = 1 d["b"] = 2 d["c"] = 3 self.assertFalse(d.consumed) d["a"] self.assertFalse(d.consumed) [v for dummy_k, v in d.iteritems()] self.assertTrue(d.consumed) d["b"] = {"d": 1} self.assertFalse(d.consumed) d["b"] self.assertFalse(d.consumed) [v for dummy_k, v in d["b"].iteritems()] self.assertTrue(d.consumed) d.extend("a", 11) self.assertFalse(d.consumed) self.assertTrue(isinstance(d["a"], CoolList)) [i for i in d["a"]] self.assertTrue(d.consumed)
def _prepare_blob(self): """ Transforms the blob into rec_tree structure to use it in the standar translation phase inside C{JsonReader} """ self.rec_tree = CoolDict() try: if self.blob_wrapper.schema.lower().startswith('file:'): self.blob_wrapper.blob = open(self.blob_wrapper.blob_file_name, 'r').read() if self.blob_wrapper.schema.lower() in ['recstruct']: self.__create_rectree_from_recstruct() elif self.blob_wrapper.schema.lower() in ['xml', 'file:xml']: #TODO: Implement translation directrly from xml from invenio.bibrecord import create_record self.blob_wrapper.blob = create_record( self.blob_wrapper.blob)[0] self.__create_rectree_from_recstruct() except AttributeError: #Assume marcxml from invenio.bibrecord import create_record self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0] self.__create_rectree_from_recstruct()
def _prepare_blob(self): """ Transforms the blob into rec_tree structure to use it in the standar translation phase inside C{JsonReader} """ self.rec_tree = CoolDict() try: if self.blob_wrapper.schema.lower().startswith('file:'): self.blob_wrapper.blob = open(self.blob_wrapper.blob_file_name, 'r').read() if self.blob_wrapper.schema.lower() in ['recstruct']: self.__create_rectree_from_recstruct() elif self.blob_wrapper.schema.lower() in ['xml', 'file:xml']: #TODO: Implement translation directrly from xml from invenio.bibrecord import create_record self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0] self.__create_rectree_from_recstruct() except AttributeError: #Assume marcxml from invenio.bibrecord import create_record self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0] self.__create_rectree_from_recstruct()