# realize that ncptl expects input from stdin. if entirefile == None: if filelist == [] or filelist[0] == "-": infilename = "<stdin>" # As a special case, if --help appears on the command # line, and we would normally read from standard input, # specify a dummy, empty program so the backend will # output a help message and exit. Note that --help *must* # be a backend option at this point because we've already # processed the frontend's command line and therefore # would have already seen a frontend --help. if "--help" in sys.argv or "--help-backend" in sys.argv or "-H" in sys.argv: if backend == None: errmsg.warning( 'backend help cannot be provided unless a backend is specified' ) sys.stderr.write("\n") usage(1) entirefile = "" else: infilename = filelist[0] # Read the entire input file unless a complete program was # provided on the command line. if entirefile == None: try: if be_verbose: if infilename == "<stdin>": input_program_source = "the standard input device" else:
# in case the user mistakenly omitted a filename and doesn't # realize that ncptl expects input from stdin. if entirefile == None: if filelist==[] or filelist[0]=="-": infilename = "<stdin>" # As a special case, if --help appears on the command # line, and we would normally read from standard input, # specify a dummy, empty program so the backend will # output a help message and exit. Note that --help *must* # be a backend option at this point because we've already # processed the frontend's command line and therefore # would have already seen a frontend --help. if "--help" in sys.argv or "--help-backend" in sys.argv or "-H" in sys.argv: if backend == None: errmsg.warning('backend help cannot be provided unless a backend is specified') sys.stderr.write("\n") usage(1) entirefile = "" else: infilename = filelist[0] # Read the entire input file unless a complete program was # provided on the command line. if entirefile == None: try: if be_verbose: if infilename == "<stdin>": input_program_source = "the standard input device" else:
class NCPTL_Lexer: def __init__(self): "Initialize the lexer." # Define a mapping from each uppercase keyword to its # canonicalized form. self.canonicalize_kw = { "A" : "AN", "AWAIT" : "AWAITS", "BIT" : "BITS", "BUFFER" : "BUFFERS", "BYTE" : "BYTES", "COMPLETION" : "COMPLETIONS", "COMPUTE" : "COMPUTES", "DAY" : "DAYS", "DOUBLEWORD" : "DOUBLEWORDS", "EXECUTE" : "EXECUTES", "HALFWORD" : "HALFWORDS", "HOUR" : "HOURS", "INTEGER" : "INTEGERS", "IS" : "ARE", "IT" : "THEM", "ITS" : "THEIR", "LOG" : "LOGS", "MESSAGE" : "MESSAGES", "MICROSECOND" : "MICROSECONDS", "MILLISECOND" : "MILLISECONDS", "MINUTE" : "MINUTES", "MULTICAST" : "MULTICASTS", "OUTPUT" : "OUTPUTS", "PAGE" : "PAGES", "PROCESSOR" : "PROCESSORS", "QUADWORD" : "QUADWORDS", "RECEIVE" : "RECEIVES", "REPETITION" : "REPETITIONS", "REDUCE" : "REDUCES", "RESET" : "RESETS", "RESTORE" : "RESTORES", "RESULT" : "RESULTS", "SECOND" : "SECONDS", "SEND" : "SENDS", "SLEEP" : "SLEEPS", "STORE" : "STORES", "SYNCHRONIZE" : "SYNCHRONIZES", "TASK" : "TASKS", "TIME" : "TIMES", "TOUCH" : "TOUCHES", "WORD" : "WORDS"} for kw in map(string.upper, Keywords.keywords): self.canonicalize_kw[kw] = self.canonicalize_kw.get(kw, kw) # Define a list of token names. tokens = {} for ckw in self.canonicalize_kw.values(): tokens[ckw] = 1 tokens = tokens.keys() tokens.extend(["comma", "ellipsis", "ident_token", "integer", "lbrace", "lbracket", "logic_and", "logic_or", "lparen", "op_and", "op_div", "op_eq", "op_geq", "op_gt", "op_leq", "op_lshift", "op_lt", "op_minus", "op_mult", "op_neq", "op_or", "op_plus", "op_power", "op_rshift", "period", "rbrace", "rbracket", "rparen", "star", "string_token"]) self.tokens = tokens def tokenize(self, sourcecode, filesource='<stdin>'): "Tokenize the given string of source code." self.errmsg = NCPTL_Error(filesource) # Keep track of all the comments we've encountered by storing # a mapping from line number to comment (including the initial # hash character). self.line2comment = {} # Initialize the lexer. lex.lex(module=self) # Repeatedly invoke the lexer and return all of the tokens it produces. self.lineno = 1 lex.input(sourcecode) self.toklist = [] while 1: # Acquire the next token and assign it a line number if necessary. token = lex.token() if not token: break if token.lineno < self.lineno: token.lineno = self.lineno # Hack: Disambiguate op_mult and star on the parser's behalf. if token.type in ["comma", "rparen"]: try: if self.toklist[-1].type == "op_mult": self.toklist[-1].type = "star" except IndexError: pass # We now have one more valid token. self.toklist.append(token) return self.toklist # Define a bunch of simple token types. t_comma = r' , ' t_ellipsis = r' \.\.\. ' t_lbrace = r' \{ ' t_lbracket = r' \[ ' t_logic_and = r' /\\ ' t_logic_or = r' \\/ ' t_lparen = r' \( ' t_op_and = r' & ' t_op_div = r' / ' t_op_eq = r' = ' t_op_geq = r' >= ' t_op_gt = r' > ' t_op_leq = r' <= ' t_op_lshift = r' << ' t_op_lt = r' < ' t_op_minus = r' - ' t_op_mult = r' \* ' t_op_neq = r' <> ' t_op_or = r' \| ' t_op_plus = r' \+ ' t_op_power = r' \*\* ' t_op_rshift = r' >> ' t_period = r' \. ' t_rbrace = r' \} ' t_rbracket = r' \] ' t_rparen = r' \) ' # Keep track of line numbers. def t_newline(self, token): r' \r?\n ' self.lineno = self.lineno + 1 return None # Ignore whitespace. def t_whitespace(self, token): r' [ \t]+ ' return None # Remove comments. def t_comment(self, token): r' \#.* ' self.line2comment[self.lineno] = token.value return None # Sanitize and store string literals. def t_string_token(self, token): r' \"([^\\]|(\\[\000-\177]))*?\" ' sanitized = [] c = 1 while c < len(token.value)-1: onechar = token.value[c] if onechar == "\\": c = c + 1 onechar = token.value[c] if onechar == "n": sanitized.append("\n") elif onechar == "t": sanitized.append("\t") elif onechar == "r": sanitized.append("\r") elif onechar == "\n": self.lineno = self.lineno + 1 elif onechar in ["\\", '"']: sanitized.append(onechar) else: self.errmsg.warning('Discarding unrecognized escape sequence "\\%s"' % onechar, lineno0=self.lineno, lineno1=self.lineno) else: sanitized.append(onechar) if onechar == "\n": self.lineno = self.lineno + 1 c = c + 1 token.value = '"%s"' % string.join(sanitized, "") token.lineno = self.lineno return token # Store idents as "ident" and keywords as themselves (uppercased). def t_ident_or_keyword(self, token): r' [A-Za-z]\w* ' try: # Store a keyword with its value (uppercase) as its type. token.type = self.canonicalize_kw[string.upper(token.value)] if len(self.toklist) > 0 and self.toklist[-1].value == "-": # A "-" before a keyword is treated as whitespace. self.toklist.pop() except KeyError: # Store an identifier with a tuple for a value: # {lowercase, original}. token.type = "ident_token" token.value = (string.lower(token.value), token.value) token.lineno = self.lineno return token # Store an integer as a tuple {long-expanded, original}. Note # that coNCePTuaL integers can contain a trailing multiplier, a # trailing exponent, and a trailing "st", "nd", "rd", or "th". def t_integer(self, token): r' \d+([KMGkmg]|([Ee]\d+))?([Ss][Tt]|[NnRr][Dd]|[Tt][Hh]?)? ' canon_token = re.sub(r'(st|nd|rd|th)$', "", string.lower(token.value)) parts = re.split(r'([kmgte])', canon_token, 1) if not parts[-1]: parts = parts[:-1] number = long(parts[0]) if len(parts) == 2: if parts[1] == "k": number = number * 1024L elif parts[1] == "m": number = number * 1024L**2 elif parts[1] == "g": number = number * 1024L**3 elif parts[1] == "t": number = number * 1024L**4 else: self.errmsg.error_syntax(token.value, lineno0=token.lineno, lineno1=token.lineno) elif len(parts) == 3: number = number * 10**long(parts[2]) token.value = (number, token.value) token.lineno = self.lineno return token # Everything else we encounter should return a syntax error. def t_error(self, token): self.errmsg.error_syntax(token.value[0], lineno0=token.lineno, lineno1=token.lineno)