def tokenize(self): self.token_pos = 0 self.token_stream = list() while self.current_char is not None: t = self.next_token() self.token_stream.append(t) self.token_stream.append( Token(base.EOF, None, self.current_line, self.line_pos))
def _string(self, quote): """ recognizing literal strings, which can be enclosed by single or double quotes in Dynare/Matlab. """ result = '' self.advance() while self.current_char is not quote: result += self.current_char self.advance() self.advance() return Token(base.STRING, result, self.current_line, self.line_pos)
def _number(self): """ recognizing numbers. We treat everything as a float, although Dynare (and Matlab underneath it) treats ints and floats as distinct types """ result = '' while (self.current_char is not None and self.valid_numchar(self.current_char)): result += self.current_char self.advance() num = float(result) return Token(base.NUMBER, num, self.current_line, self.line_pos)
def _id(self): """ recognizing characters or sequences of characters that may or may not be recognized keywords. If they are not, we tag them as a Token of type ID, and assume they're a variable """ result = '' while self.valid_varchar(self.current_char): result += self.current_char self.advance() try: token = self.reserved_kw[result] except KeyError: token = base.reserved_funcs.get(result, Token(base.ID, result)) token.assign_loc(self.current_line, self.line_pos) return token
def model_expression(self) -> ModelExpression: """ model_expression : mexpr | mexpr EQUALS mexpr """ left = self.mexpr() if self.peek_type() == base.EQUALS: self.eat(base.EQUALS) right = self.mexpr() else: # if there's no equals sign, it's assumed this is a homogenous equation zero = Token(base.NUMBER, 0) right = ast.Num(zero) return ast.ModelExpression(left=left, right=right)
SBVAR = 'sbvar' SVAR_IDENTIFICATION = 'svar_identification' MS_ESTIMATION = 'ms_estimation' MS_SIMULATION = 'ms_simulation' MS_COMPUTE_MDD = 'ms_compute_mdd' MS_COMPUTE_PROBABILITIES = 'ms_compute_probabilities' MS_IRF = 'ms_irf' MS_FORECAST = 'ms_forecast' MS_VARIANCE_DECOMPOSITION = 'ms_variance_decomposition' # dynare_reserved_kw = { # the different variable types that can be declared in dynare - Section 4.2 in doc. VAR: Token(VAR, VAR), VAREXO: Token(VAREXO, VAREXO), VAREXO_DET: Token(VAREXO_DET, VAREXO_DET), PARAMETERS: Token(PARAMETERS, PARAMETERS), PREDETERMINED: Token(PREDETERMINED, PREDETERMINED), TREND_VAR: Token(TREND_VAR, TREND_VAR), LOG_TREND_VAR: Token(LOG_TREND_VAR, LOG_TREND_VAR), CHANGE_TYPE: Token(CHANGE_TYPE, CHANGE_TYPE), END:
def _exponent(self): # is_exponent makes decision based on matlab's exponentiation symbol, # but we use python's exponentiation symbol for the token self.advance() return Token(base.POWER, '**', self.current_line, self.line_pos)
def next_token(self): while self.current_char is not None: while self.current_char.isspace(): self.skip_whitespace() while self.is_singleline_comment(): self.skip_singleline_comment() while self.is_multiline_comment(): self.skip_multiline_comment() if self.valid_begchar(self.current_char): return self._id() if self.valid_numchar(self.current_char): return self._number() # # Handling strings - not entirely sure what to do here. Matlab's # transpose character: " ' " can also be used make a character # array. We tokenize any single quote as a string, so anyone # who takes the transpose of a matrix is SOL for now # if self.current_char == '\'': return self._string(quote='\'') if self.current_char == '\"': return self._string(quote='\"') # # Grammar and Logical Symbols # if self.current_char == ',': self.advance() return Token(base.COMMA, ',', self.current_line, self.line_pos) if self.current_char == '.': self.advance() return Token(base.PERIOD, '.', self.current_line, self.line_pos) if self.current_char == ';': self.advance() return Token(base.SEMI, ';', self.current_line, self.line_pos) if self.current_char == ':': self.advance() return Token(base.COLON, ':', self.current_line, self.line_pos) if self.current_char == '#': self.advance() return Token(base.POUND, '#', self.current_line, self.line_pos) if self.current_char == '&': self.advance() return Token(base.AMPERSAND, '&', self.current_line, self.line_pos) if self.current_char == '|': self.advance() return Token(base.PIPE, '|', self.current_line, self.line_pos) if self.current_char == '@': self.advance() return Token(base.ATSIGN, '@', self.current_line, self.line_pos) if self.current_char == '!': self.advance() return Token(base.EXCLAMATION, '!', self.current_line, self.line_pos) if self.current_char == '~': self.advance() return Token(base.TILDE, '~', self.current_line, self.line_pos) if self.current_char == '%': self.advance() return Token(base.PERCENT, '%', self.current_line, self.line_pos) if self.current_char == '<': self.advance() return Token(base.LT, '<', self.current_line, self.line_pos) if self.current_char == '<' and self.peek() == '=': self.advance(2) return Token(base.LTOE, '<=', self.current_line, self.line_pos) if self.current_char == '>': self.advance() return Token(base.GT, '>', self.current_line, self.line_pos) if self.current_char == '>' and self.peek() == '=': self.advance(2) return Token(base.GTOE, '>=', self.current_line, self.line_pos) if self.current_char == '=' and self.peek() == '=': self.advance(2) return Token(base.EQUALITY, '==', self.current_line, self.line_pos) if self.current_line == '\\': self.advance() return Token(base.BACKSLASH, '\\', self.current_line, self.line_pos) # # Common mathematical symbols # if self.current_char == '+': self.advance() return Token(base.PLUS, '+', self.current_line, self.line_pos) if self.current_char == '-': self.advance() return Token(base.MINUS, '-', self.current_line, self.line_pos) if self.current_char == '*': self.advance() return Token(base.MUL, '*', self.current_line, self.line_pos) if self.current_char == '/': self.advance() return Token(base.DIV, '/', self.current_line, self.line_pos) if self.is_exponent(): return self._exponent() if self.current_char == '(': self.advance() return Token(base.LPARE, '(', self.current_line, self.line_pos) if self.current_char == ')': self.advance() return Token(base.RPARE, ')', self.current_line, self.line_pos) if self.current_char == '[': self.advance() return Token(base.LBRACKET, '[', self.current_line, self.line_pos) if self.current_char == ']': self.advance() return Token(base.RBRACKET, ']', self.current_line, self.line_pos) if self.current_char == '{': self.advance() return Token(base.LBRACE, '{', self.current_line, self.line_pos) if self.current_char == '}': self.advance() return Token(base.RBRACE, '}', self.current_line, self.line_pos) if self.current_char == '=': self.advance() return Token(base.EQUALS, '=', self.current_line, self.line_pos) self.error(self.current_char) return Token(base.EOF, None, self.current_line, self.line_pos)