Example #1
0
	def tokenize(self):
		""" Convert to tokens, get token list

		Returns:
			A list of obtained tokens, recursively tokenized.

		"""

		if self.tokens is not None:
			return self.tokens

		self.tokens = []

		rd = CodeReader(self.source, self.filename)

		while not rd.has_end():

			# discard garbage
			rd.sweep()

			# End of string.
			if rd.has_end():
				break

			# <identifier>
			elif rd.has_identifier():

				self._tokenize_identifier(rd)

			# {...stuff...}
			elif rd.has_code_block():

				s = rd.consume_block()
				self._add( T_CodeBlock(s) )

			# ;
			elif rd.starts(';'):

				self._collect_semicolon(rd)

			else:
				rd.error('Unexpected syntax here.')

		# tokenize all composite tokens
		for t in self.tokens:
			if t.is_composite():
				t.tokenize()

		return self.tokens
Example #2
0
File: tokens.py Project: naro/sdscp
	def tokenize(self):
		""" Convert to tokens, get token list

		Returns:
			A list of obtained tokens, recursively tokenized.

		"""

		if self.tokens is not None:
			return self.tokens

		self.tokens = []

		rd = CodeReader(self.source, self.filename)

		while not rd.has_end():

			# discard garbage
			rd.sweep()

			# End of string.
			if rd.has_end():
				break

			# <identifier>
			elif rd.has_identifier():

				self._tokenize_identifier(rd)

			# {...stuff...}
			elif rd.has_code_block():

				s = rd.consume_block()
				self._add( T_CodeBlock(s) )

			# ;
			elif rd.starts(';'):

				self._collect_semicolon(rd)

			else:
				rd.error('Unexpected syntax here.')

		# tokenize all composite tokens
		for t in self.tokens:
			if t.is_composite():
				t.tokenize()

		return self.tokens
Example #3
0
File: tokens.py Project: naro/sdscp
	def _tokenize(self):

		rd = CodeReader(self.value[1:-1].strip())

		rd.sweep()

		s = rd.consume_code(end=',', eof=True, keep_end=False)
		t = T_Expression(s)
		self.tokens.append(t)
		self.index = t

		rd.sweep()

		if not rd.has_end():
			raise Exception(
				'Invalid array index (must be single expression).'
			)
Example #4
0
	def _tokenize(self):

		rd = CodeReader(self.value[1:-1].strip())

		rd.sweep()

		s = rd.consume_code(end=',', eof=True, keep_end=False)
		t = T_Expression(s)
		self.tokens.append(t)
		self.index = t

		rd.sweep()

		if not rd.has_end():
			raise Exception(
				'Invalid array index (must be single expression).'
			)
Example #5
0
File: tokens.py Project: naro/sdscp
	def _tokenize(self):
		""" Parse expression sub-tokens """

		rd = CodeReader(self.value)

		while not rd.has_end():
			rd.sweep()

			if rd.has_identifier():
				# an identifier
				# can be variable or a function call

				s = rd.consume_identifier()
				t = T_Name(s)
				self.tokens.append(t)

				rd.sweep()

				if rd.has_bracket():
					# array index
					s = rd.consume_block()
					t = T_Bracket(s)
					self.tokens.append(t)

				elif rd.has_paren():
					# paren with arguments for the function
					s = rd.consume_block()
					t = T_Paren(s)

					t.set_type(ParenType.ARGVALS)

					self.tokens.append(t)

			elif rd.has_paren():
				# Parenthesised sub-expression
				s = rd.consume_block()
				t = T_Paren(s)
				t.set_type(ParenType.EXPR)
				self.tokens.append(t)

			elif rd.has_number():
				# Number literal
				s = rd.consume_number()
				t = T_Number(s)
				self.tokens.append(t)

			elif (len(self.tokens) > 0 and
				type(self.tokens[-1:][0]) is T_Operator
				and rd.matches(r'[-+]\s*[0-9a-z_]+')):

				# Number literal
				sign = rd.consume()
				if sign == '+':
					sign = ''

				rd.sweep()

				if sign == '-':
					self.tokens.append(T_Number('-1'))
					self.tokens.append(T_Operator('*'))

			elif rd.has_operator():
				# Operator
				s = rd.consume_operator()
				t = T_Operator(s)
				self.tokens.append(t)

			elif rd.has_char():
				# Char literal
				s = rd.consume_char()
				t = T_Char(s)
				self.tokens.append(t)

			elif rd.has_string():
				# String literal
				s = rd.consume_string()
				t = T_String(s)
				self.tokens.append(t)

			else:
				raise Exception('Unexpected expression token near' + rd.peek(10))

		for t in self.tokens:
			if t.is_composite():
				t.tokenize()
Example #6
0
	def apply_macros(self):
		""" Recursively apply macros to the output of `process()`

		To be called after `process()`.
		The `output` variable is overwritten by this.

		Returns:
			The final source code after applying all
			macro replacements.

		"""

		if len(self.output) == 0:
			print('There is no source code.')
			return

		rd = CodeReader(self.output)

		applied_count = 0
		out = ''
		while not rd.has_end():

			out += self._handle_whitespace(rd)
			if rd.has_end():
				break

			if rd.has_identifier():

				ident = rd.consume_identifier()
				ident_whitesp = rd.consume_inline_whitespace()

				if ident in self.defines:

					macros = self.defines[ident]

					replacement = None

					if rd.has_bracket():
						# array macro

						bracket = rd.consume_block()[1:-1]

						for mm in macros:
							if mm.is_arraylike():
								if mm.can_use_args([bracket]):
									replacement = mm.generate([bracket])
									break

						if replacement is None:
							out += ident + ident_whitesp
							out += '[%s]' % bracket
						else:
							out += replacement
							applied_count += 1

					elif rd.has_paren():
						# func macro

						paren = rd.consume_block()

						t = T_Paren(paren)
						t.set_type(ParenType.ARGVALS)
						t.tokenize()

						args = []
						for a in t.tokens:
							args.append(a.value)

						# print(args)

						for mm in macros:
							if mm.is_functionlike():
								if mm.can_use_args(args):
									replacement = mm.generate(args)
									break

						if replacement is None:
							out += ident + ident_whitesp + paren
							print(
								'[W] Macro "%s" defined, but can\'t use arguments (%s)'
								% (ident, ', '.join(args) ))
						else:
							out += replacement
							applied_count += 1

					else:
						# const macro

						for mm in macros:
							if mm.can_use_args(None):
								replacement = mm.generate(None)
								break

						if replacement is None:
							out += ident + ident_whitesp
						else:
							out += replacement + ident_whitesp
							applied_count += 1

				else:
					out += ident + ident_whitesp  # give it back

			# "...", and "sdgfsd""JOINED"  "This too"
			elif rd.has_string():
				# handle string concatenation
				s = ''
				while rd.has_string():
					s += rd.consume_string()[1:-1]  # drop quotes
					rd.sweep()

				out += '"%s"' % s

			# //...
			elif rd.has_inline_comment():
				rd.consume_line()

			# /* ... */
			elif rd.has_block_comment():
				rd.consume_block_comment()

			# any char...
			else:
				out += rd.consume()

		self.output = out

		# take care of macros in macros
		if applied_count > 0:
			return self.apply_macros()
		else:
			return out
Example #7
0
	def _tokenize(self):
		""" Parse expression sub-tokens """

		rd = CodeReader(self.value)

		while not rd.has_end():
			rd.sweep()

			if rd.has_identifier():
				# an identifier
				# can be variable or a function call

				s = rd.consume_identifier()
				t = T_Name(s)
				self.tokens.append(t)

				rd.sweep()

				if rd.has_bracket():
					# array index
					s = rd.consume_block()
					t = T_Bracket(s)
					self.tokens.append(t)

				elif rd.has_paren():
					# paren with arguments for the function
					s = rd.consume_block()
					t = T_Paren(s)

					t.set_type(ParenType.ARGVALS)

					self.tokens.append(t)

			elif rd.has_paren():
				# Parenthesised sub-expression
				s = rd.consume_block()
				t = T_Paren(s)
				t.set_type(ParenType.EXPR)
				self.tokens.append(t)

			elif rd.has_number():
				# Number literal
				s = rd.consume_number()
				t = T_Number(s)
				self.tokens.append(t)

			elif (((len(self.tokens) > 0 and
				type(self.tokens[-1:][0]) is T_Operator) or len(self.tokens) == 0)
				and rd.matches(r'[-+]\s*[0-9a-z_]+')):

				# Unary operator
				sign = rd.consume()
				if sign == '+':
					sign = ''

				rd.sweep()

				if sign == '-':
					self.tokens.append(T_Operator('@-'))

			elif rd.has_operator():
				# Operator
				s = rd.consume_operator()
				t = T_Operator(s)
				self.tokens.append(t)

			elif rd.has_char():
				# Char literal
				s = rd.consume_char()
				t = T_Char(s)
				self.tokens.append(t)

			elif rd.has_string():
				# String literal
				s = rd.consume_string()
				t = T_String(s)
				self.tokens.append(t)

			else:
				raise Exception('Unexpected expression token near' + rd.peek(10))

		for t in self.tokens:
			if t.is_composite():
				t.tokenize()