コード例 #1
0
ファイル: bibfield_marcreader.py プロジェクト: adsabs/invenio
class MarcReader(JsonReader):
    """
    Reader class that understands MARC21 as base format
    """

    @staticmethod
    def split_blob(blob):
        """
        Splits the blob using <record.*?>.*?</record> as pattern.

        Note 1: Taken from invenio.bibrecord:create_records
        Note 2: Use the DOTALL flag to include newlines.
        """
        import re

        regex = re.compile("<record.*?>.*?</record>", re.DOTALL)
        return regex.findall(blob)

    def _prepare_blob(self):
        """
        Transforms the blob into rec_tree structure to use it in the standar
        translation phase inside C{JsonReader}
        """
        self.rec_tree = CoolDict()
        try:
            if self.blob_wrapper.schema.lower().startswith("file:"):
                self.blob_wrapper.blob = open(self.blob_wrapper.blob_file_name, "r").read()
            if self.blob_wrapper.schema.lower() in ["recstruct"]:
                self.__create_rectree_from_recstruct()
            elif self.blob_wrapper.schema.lower() in ["xml", "file:xml"]:
                # TODO: Implement translation directrly from xml
                from invenio.bibrecord import create_record

                self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0]
                self.__create_rectree_from_recstruct()
        except AttributeError:
            # Assume marcxml
            from invenio.bibrecord import create_record

            self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0]
            self.__create_rectree_from_recstruct()

    def __create_rectree_from_recstruct(self):
        """
        Using rectruct as base format it creates the intermediate structure that
        _translate will use.
        """
        for key, values in self.blob_wrapper.blob.iteritems():
            if key < "010" and key.isdigit():
                # Control field, it assumes controlfields are numeric only
                self.rec_tree[key] = CoolList([value[3] for value in values])
            else:
                for value in values:
                    field = CoolDict()
                    for subfield in value[0]:
                        field.extend(subfield[0], subfield[1])
                    self.rec_tree.extend((key + value[1] + value[2]).replace(" ", "_"), field)
コード例 #2
0
class MarcReader(JsonReader):
    """
    Reader class that understands MARC21 as base format
    """
    @staticmethod
    def split_blob(blob, schema):
        """
        Splits the blob using <record.*?>.*?</record> as pattern.

        Note 1: Taken from invenio.bibrecord:create_records
        Note 2: Use the DOTALL flag to include newlines.
        """
        import re
        regex = re.compile('<record.*?>.*?</record>', re.DOTALL)
        return regex.findall(blob)

    def _prepare_blob(self):
        """
        Transforms the blob into rec_tree structure to use it in the standar
        translation phase inside C{JsonReader}
        """
        self.rec_tree = CoolDict()
        try:
            if self.blob_wrapper.schema.lower().startswith('file:'):
                self.blob_wrapper.blob = open(self.blob_wrapper.blob_file_name,
                                              'r').read()
            if self.blob_wrapper.schema.lower() in ['recstruct']:
                self.__create_rectree_from_recstruct()
            elif self.blob_wrapper.schema.lower() in ['xml', 'file:xml']:
                #TODO: Implement translation directrly from xml
                from invenio.bibrecord import create_record
                self.blob_wrapper.blob = create_record(
                    self.blob_wrapper.blob)[0]
                self.__create_rectree_from_recstruct()
        except AttributeError:
            #Assume marcxml
            from invenio.bibrecord import create_record
            self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0]
            self.__create_rectree_from_recstruct()

    def __create_rectree_from_recstruct(self):
        """
        Using rectruct as base format it creates the intermediate structure that
        _translate will use.
        """
        for key, values in self.blob_wrapper.blob.iteritems():
            if key < '010' and key.isdigit():
                #Control field, it assumes controlfields are numeric only
                self.rec_tree[key] = CoolList([value[3] for value in values])
            else:
                for value in values:
                    field = CoolDict()
                    for subfield in value[0]:
                        field.extend(subfield[0], subfield[1])
                    self.rec_tree.extend(
                        (key + value[1] + value[2]).replace(' ', '_'), field)
コード例 #3
0
 def __create_rectree_from_recstruct(self):
     """
     Using rectruct as base format it creates the intermediate structure that
     _translate will use.
     """
     for key, values in self.blob_wrapper.blob.iteritems():
         if key < '010' and key.isdigit():
             #Control field, it assumes controlfields are numeric only
             self.rec_tree[key] = CoolList([value[3] for value in values])
         else:
             for value in values:
                 field = CoolDict()
                 for subfield in value[0]:
                     field.extend(subfield[0], subfield[1])
                 self.rec_tree.extend((key + value[1] + value[2]).replace(' ', '_'), field)
コード例 #4
0
 def __create_rectree_from_recstruct(self):
     """
     Using rectruct as base format it creates the intermediate structure that
     _translate will use.
     """
     for key, values in self.blob_wrapper.blob.iteritems():
         if key < '010' and key.isdigit():
             #Control field, it assumes controlfields are numeric only
             self.rec_tree[key] = CoolList([value[3] for value in values])
         else:
             for value in values:
                 field = CoolDict()
                 for subfield in value[0]:
                     field.extend(subfield[0], subfield[1])
                 self.rec_tree.extend(
                     (key + value[1] + value[2]).replace(' ', '_'), field)
コード例 #5
0
 def test_cool_list_and_dict(self):
     """Bibfield Utils, CoolList and CoolDict - Unit tests"""
     d = CoolDict()
     l = CoolList()
     d['a'] = l
     self.assertTrue(d.consumed)
     l.append(1)
     l.append(2)
     d['a'] = l
     self.assertFalse(d.consumed)
     d['b'] = CoolList([{'a':1}, {'a':2}])
     self.assertFalse(d.consumed)
     [v for dummy_k, v in d.iteritems()]
     self.assertFalse(d.consumed)
     [i for i in d['a']]
     [v for i in d['b'] for dummy_k, v in i.iteritems()]
     self.assertTrue(d.consumed)
コード例 #6
0
 def test_cool_list_and_dict(self):
     """Bibfield Utils, CoolList and CoolDict - Unit tests"""
     d = CoolDict()
     l = CoolList()
     d['a'] = l
     self.assertTrue(d.consumed)
     l.append(1)
     l.append(2)
     d['a'] = l
     self.assertFalse(d.consumed)
     d['b'] = CoolList([{'a': 1}, {'a': 2}])
     self.assertFalse(d.consumed)
     [v for dummy_k, v in d.iteritems()]
     self.assertFalse(d.consumed)
     [i for i in d['a']]
     [v for i in d['b'] for dummy_k, v in i.iteritems()]
     self.assertTrue(d.consumed)
コード例 #7
0
 def test_cool_dict(self):
     """Bibfield Utils, CoolDict - Unit tests"""
     d = CoolDict()
     d['a'] = 1
     d['b'] = 2
     d['c'] = 3
     self.assertFalse(d.consumed)
     d['a']
     self.assertFalse(d.consumed)
     [v for dummy_k, v in d.iteritems()]
     self.assertTrue(d.consumed)
     d['b'] = {'d': 1}
     self.assertFalse(d.consumed)
     d['b']
     self.assertFalse(d.consumed)
     [v for dummy_k, v in d['b'].iteritems()]
     self.assertTrue(d.consumed)
     d.extend('a', 11)
     self.assertFalse(d.consumed)
     self.assertTrue(isinstance(d['a'], CoolList))
     [i for i in d['a']]
     self.assertTrue(d.consumed)
コード例 #8
0
 def test_cool_dict(self):
     """Bibfield Utils, CoolDict - Unit tests"""
     d = CoolDict()
     d["a"] = 1
     d["b"] = 2
     d["c"] = 3
     self.assertFalse(d.consumed)
     d["a"]
     self.assertFalse(d.consumed)
     [v for dummy_k, v in d.iteritems()]
     self.assertTrue(d.consumed)
     d["b"] = {"d": 1}
     self.assertFalse(d.consumed)
     d["b"]
     self.assertFalse(d.consumed)
     [v for dummy_k, v in d["b"].iteritems()]
     self.assertTrue(d.consumed)
     d.extend("a", 11)
     self.assertFalse(d.consumed)
     self.assertTrue(isinstance(d["a"], CoolList))
     [i for i in d["a"]]
     self.assertTrue(d.consumed)
コード例 #9
0
 def test_cool_dict(self):
     """Bibfield Utils, CoolDict - Unit tests"""
     d = CoolDict()
     d['a'] = 1
     d['b'] = 2
     d['c'] = 3
     self.assertFalse(d.consumed)
     d['a']
     self.assertFalse(d.consumed)
     [v for dummy_k, v in d.iteritems()]
     self.assertTrue(d.consumed)
     d['b'] = {'d': 1}
     self.assertFalse(d.consumed)
     d['b']
     self.assertFalse(d.consumed)
     [v for dummy_k, v in d['b'].iteritems()]
     self.assertTrue(d.consumed)
     d.extend('a', 11)
     self.assertFalse(d.consumed)
     self.assertTrue(isinstance(d['a'], CoolList))
     [i for i in d['a']]
     self.assertTrue(d.consumed)
コード例 #10
0
 def _prepare_blob(self):
     """
     Transforms the blob into rec_tree structure to use it in the standar
     translation phase inside C{JsonReader}
     """
     self.rec_tree = CoolDict()
     try:
         if self.blob_wrapper.schema.lower().startswith('file:'):
             self.blob_wrapper.blob = open(self.blob_wrapper.blob_file_name,
                                           'r').read()
         if self.blob_wrapper.schema.lower() in ['recstruct']:
             self.__create_rectree_from_recstruct()
         elif self.blob_wrapper.schema.lower() in ['xml', 'file:xml']:
             #TODO: Implement translation directrly from xml
             from invenio.bibrecord import create_record
             self.blob_wrapper.blob = create_record(
                 self.blob_wrapper.blob)[0]
             self.__create_rectree_from_recstruct()
     except AttributeError:
         #Assume marcxml
         from invenio.bibrecord import create_record
         self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0]
         self.__create_rectree_from_recstruct()
コード例 #11
0
 def _prepare_blob(self):
     """
     Transforms the blob into rec_tree structure to use it in the standar
     translation phase inside C{JsonReader}
     """
     self.rec_tree = CoolDict()
     try:
         if self.blob_wrapper.schema.lower().startswith('file:'):
             self.blob_wrapper.blob = open(self.blob_wrapper.blob_file_name, 'r').read()
         if self.blob_wrapper.schema.lower() in ['recstruct']:
             self.__create_rectree_from_recstruct()
         elif self.blob_wrapper.schema.lower() in ['xml', 'file:xml']:
             #TODO: Implement translation directrly from xml
             from invenio.bibrecord import create_record
             self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0]
             self.__create_rectree_from_recstruct()
     except AttributeError:
         #Assume marcxml
         from invenio.bibrecord import create_record
         self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0]
         self.__create_rectree_from_recstruct()