def __init__(self, file_stream, url):
        DocumentParser.__init__(self, file_stream)
        self._url = url
        self._namespace_URI = 'http://www.loc.gov/METS/'
        self._mods_namespace_URI = 'http://www.loc.gov/mods/v3'

        #read the content of the file
        self._content_str = self._file_stream.read()
        
        self._logical_structure = None
        self._physical_structure = None
        self._meta_data = None
        self._relation = None
        self._file_list = None

        #some METS files contain uppercase mets directive
        #self._content_str = self._content_str.replace('METS=', 'mets=')
        #self._content_str = self._content_str.replace('', '')
        #self._content_str = self._content_str.replace('MODS=', 'mods=')
        #self._content_str = self._content_str.replace('', '')
        try:
            self._doc = parseString(self._content_str)
        except Exception:
            raise ParserError.InvalidDocument("The file is invalid. (is it" \
                    "corrupted?)")
        if self._check_xml() is not True:
            raise ParserError.InvalidDocument("The file is invalid. (is it" \
                    "corrupted?)")
Exemple #2
0
def parse_verb(word_list):
    skip(word_list, 'stop')

    if peek(word_list) == 'verb':
        return match(word_list, 'verb')
    else:
        raise ParserError("Expected a verb next.")
Exemple #3
0
    def _parse_assignment(self):
        equals_token = self.expect(TokenType.EQUALS)
        self.next()

        left_side = self.stack.pop()
        if not isinstance(left_side, Identifier) and not isinstance(left_side, ArrayDeclaration):
            raise ParserError(
                'unexpected left side of assignment, expected Identifier or ArrayDeclaration, but got {}'.format(
                    repr(left_side)))

        self.skip_whitespaces(include_newlines=True)
        token = self.token()

        if token.token_type == TokenType.WORD:
            right_side = self._parse_identifier()
        elif token.token_type in [TokenType.QUOTE, TokenType.DOUBLE_QUOTES]:
            right_side = self._parse_string_literal()
        elif token.token_type == TokenType.NUMBER:
            right_side = self._parse_constant()
        elif token.token_type == TokenType.L_CURLY:
            right_side = self._parse_array()
        else:
            raise ParsingError('unexpected right side of assignment: {}'.format(repr(token)))

        semicolon_token = self.expect(TokenType.SEMICOLON)
        self.index += 1
        return Assignment(left_side, equals_token, right_side, semicolon_token)
Exemple #4
0
    def _parse_array(self):
        l_curly_token = self.expect(TokenType.L_CURLY)
        token = self.next()

        children = []
        while token.token_type != TokenType.R_CURLY and self.index < len(self.tokens):
            if token.token_type in [TokenType.WHITESPACE, TokenType.TAB, TokenType.NEWLINE]:
                self.skip_whitespaces(include_newlines=True)
            token = self.token()

            if token.token_type == TokenType.NUMBER:
                children.append(self._parse_constant())
            elif token.token_type in [TokenType.QUOTE, TokenType.DOUBLE_QUOTES]:
                children.append(self._parse_string_literal())
            elif token.token_type == TokenType.WORD:
                children.append(self._parse_identifier())
            elif token.token_type == TokenType.L_CURLY:
                children.append(self._parse_array())
            else:
                raise ParserError('encountered unexpected token while parsing array: {}'.format(repr(token)))

            self.skip_whitespaces(include_newlines=True)
            self.expect([TokenType.COMMA, TokenType.R_CURLY])
            if self.token().token_type == TokenType.COMMA:
                token = self.next()
                continue
            else:
                break

        r_curly_token = self.expect(TokenType.R_CURLY)
        self.index += 1
        return Array(l_curly_token, children, r_curly_token)
Exemple #5
0
    def _get_record(self):
        """Get the record object in the xml file."""
        self._file_stream.seek(0)
        content_str = self._file_stream.read()
        doc = parseString(content_str)

        records = doc.getElementsByTagNameNS(self._namespace_URI, 'mods')

        # get the id number of the first record
        if len(records) == 0:
            raise ParserError.InvalidDocument(
                "XML/Mods Core document should contains at lease one record!")
        if len(records) > 1:
            raise ParserError.InvalidDocument(
                "XML/Mods Core document should not contains more than "\
                "one record!")
        return records[0]
Exemple #6
0
def parse_object(word_list):
    skip(word_list, 'stop')
    next_word = peek(word_list)

    if next_word == 'noun':
        return match(word_list, 'noun')
    elif next_word == 'direction':
        return match(word_list, 'direction')
    else:
        raise ParserError("Expected a noun or direction")
Exemple #7
0
def parse_subject(word_list):
    skip(word_list, 'stop')
    next_word = peek(word_list)

    if next_word == 'noun':
        return match(word_list, 'noun')
    elif next_word == 'verb':
        return ('noun', 'player')
    else:
        raise ParserError("Expected a verb next.")
Exemple #8
0
    def _parse_array_declaration(self):
        l_square = self.expect(TokenType.L_SQUARE)
        r_square = self.expect_next(TokenType.R_SQUARE)
        self.index += 1

        identifier = self.stack.pop()
        if not isinstance(identifier, Identifier):
            raise ParserError('expected identifier for array declaration, but got {}'.format(repr(identifier)))

        return ArrayDeclaration(identifier, l_square, r_square)
    def parse(version_str):
        m = re.match(
            r"""
                ^JDK                # 先頭JDK
                ([1-9][0-9]*)       # familyNumber1桁目は0位外の数字で始まりn桁の数字
                u                   # 固定文字 u
                ([0-9]+)$           # 1桁以上の数字(0可)
                """, version_str, re.VERBOSE)
        if m is None:
            raise ParserError("invalid version string.")

        familyNumber = int(m.group(1))
        updateNumber = int(m.group(2))
        return JavaVersion(familyNumber, updateNumber)
Exemple #10
0
 def __init__(self, node, argument):
     '''Exception raised when a error is found'''
     ParserError.__init__(self, "(Line %i, Col %i) " % \
                       (node.lineno, node.col_offset) + argument)