def __init__(self, file_stream, url):
        DocumentParser.__init__(self, file_stream)
        self._url = url
        self._namespace_URI = 'http://www.loc.gov/METS/'
        self._mods_namespace_URI = 'http://www.loc.gov/mods/v3'

        #read the content of the file
        self._content_str = self._file_stream.read()
        
        self._logical_structure = None
        self._physical_structure = None
        self._meta_data = None
        self._relation = None
        self._file_list = None

        #some METS files contain uppercase mets directive
        #self._content_str = self._content_str.replace('METS=', 'mets=')
        #self._content_str = self._content_str.replace('', '')
        #self._content_str = self._content_str.replace('MODS=', 'mods=')
        #self._content_str = self._content_str.replace('', '')
        try:
            self._doc = parseString(self._content_str)
        except Exception:
            raise ParserError.InvalidDocument("The file is invalid. (is it" \
                    "corrupted?)")
        if self._check_xml() is not True:
            raise ParserError.InvalidDocument("The file is invalid. (is it" \
                    "corrupted?)")
Exemple #2
0
    def _get_record(self):
        """Get the record object in the xml file."""
        self._file_stream.seek(0)
        content_str = self._file_stream.read()
        doc = parseString(content_str)

        records = doc.getElementsByTagNameNS(self._namespace_URI, 'mods')

        # get the id number of the first record
        if len(records) == 0:
            raise ParserError.InvalidDocument(
                "XML/Mods Core document should contains at lease one record!")
        if len(records) > 1:
            raise ParserError.InvalidDocument(
                "XML/Mods Core document should not contains more than "\
                "one record!")
        return records[0]