def raw_doc(self): """ Returns a cleaned version of the docstring token. """ if isinstance(self, Module): node = self.children[0] elif isinstance(self, ClassOrFunc): node = self.children[self.children.index(':') + 1] if is_node(node, 'suite'): # Normally a suite node = node.children[2] # -> NEWLINE INDENT stmt else: # ExprStmt simple_stmt = self.parent c = simple_stmt.parent.children index = c.index(simple_stmt) if not index: return '' node = c[index - 1] if is_node(node, 'simple_stmt'): node = node.children[0] if node.type == 'string': # TODO We have to check next leaves until there are no new # leaves anymore that might be part of the docstring. A # docstring can also look like this: ``'foo' 'bar' # Returns a literal cleaned version of the ``Token``. cleaned = cleandoc(literal_eval(node.value)) # Since we want the docstr output to be always unicode, just # force it. if is_py3 or isinstance(cleaned, unicode): return cleaned else: return unicode(cleaned, 'UTF-8', 'replace') return ''
def run_related_name_test(script, correct, line_nr): """ Runs tests for gotos. Tests look like this: >>> abc = 1 >>> #< abc@1,0 abc@3,0 >>> abc Return if the test was a fail or not, with 1 for fail and 0 for success. """ result = script.related_names() correct = correct.strip() compare = sorted((r.module_name, r.start_pos[0], r.start_pos[1]) for r in result) wanted = [] if not correct: positions = [] else: positions = literal_eval(correct) for pos_tup in positions: if type(pos_tup[0]) == str: # this means that there is a module specified wanted.append(pos_tup) else: wanted.append(('renaming', line_nr + pos_tup[0], pos_tup[1])) wanted = sorted(wanted) if compare != wanted: print('Solution @%s not right, received %s, wanted %s'\ % (line_nr - 1, compare, wanted)) return 1 return 0
def raw_doc(self): """ Returns a cleaned version of the docstring token. """ if isinstance(self, Module): stmt = self.children[0] else: stmt = self.children[self.children.index(':') + 1] if is_node(stmt, 'suite'): # Normally a suite stmt = stmt.children[2] # -> NEWLINE INDENT stmt if is_node(stmt, 'simple_stmt'): stmt = stmt.children[0] try: first = stmt.children[0] except AttributeError: pass # Probably a pass Keyword (Leaf). else: if first.type == 'string': # TODO We have to check next leaves until there are no new # leaves anymore that might be part of the docstring. A # docstring can also look like this: ``'foo' 'bar' # Returns a literal cleaned version of the ``Token``. cleaned = cleandoc(literal_eval(first.value)) # Since we want the docstr output to be always unicode, just # force it. if is_py3 or isinstance(cleaned, unicode): return cleaned else: return unicode(cleaned, 'UTF-8', 'replace') return ''
def raw_doc(self): """ Returns a cleaned version of the docstring token. """ try: # Returns a literal cleaned version of the ``Token``. return unicode(cleandoc(literal_eval(self._doc_token.string))) except AttributeError: return u('')
def run_completion_test(script, correct, line_nr): """ Runs tests for completions. Return if the test was a fail or not, with 1 for fail and 0 for success. """ completions = script.complete() #import cProfile; cProfile.run('script.complete()') comp_str = set([c.word for c in completions]) if comp_str != set(literal_eval(correct)): print('Solution @%s not right, received %s, wanted %s'\ % (line_nr - 1, comp_str, correct)) return 1 return 0
def safe_literal_eval(value): first_two = value[:2].lower() if first_two[0] == 'f' or first_two in ('fr', 'rf'): # literal_eval is not able to resovle f literals. We have to do that # manually, but that's right now not implemented. return '' try: return literal_eval(value) except SyntaxError: # It's possible to create syntax errors with literals like rb'' in # Python 2. This should not be possible and in that case just return an # empty string. # Before Python 3.3 there was a more strict definition in which order # you could define literals. return ''
def detect_encoding(): """ For the implementation of encoding definitions in Python, look at: http://www.python.org/dev/peps/pep-0263/ http://docs.python.org/2/reference/lexical_analysis.html#encoding-\ declarations """ byte_mark = '\xef\xbb\xbf' if is_py25 else \ literal_eval(r"b'\xef\xbb\xbf'") if source.startswith(byte_mark): # UTF-8 byte-order mark return 'utf-8' first_two_lines = re.match(r'(?:[^\n]*\n){0,2}', str(source)).group(0) possible_encoding = re.search(r"coding[=:]\s*([-\w.]+)", first_two_lines) if possible_encoding: return possible_encoding.group(1) else: # the default if nothing else has been set -> PEP 263 return encoding if encoding is not None else 'iso-8859-1'
def eval(self): return literal_eval(self.value)
def _parse_statement(self): """ This is not done in the main parser, because it might be slow and most of the statements won't need this data anyway. This is something 'like' a lazy execution. This is not really nice written, sorry for that. If you plan to replace it and make it nicer, that would be cool :-) """ def is_assignment(tok): return isinstance(tok, (str, unicode)) and tok.endswith('=') \ and not tok in ['>=', '<=', '==', '!='] def parse_array(token_iterator, array_type, start_pos, add_el=None, added_breaks=()): arr = Array(self._sub_module, start_pos, array_type, self) if add_el is not None: arr.add_statement(add_el) maybe_dict = array_type == Array.SET break_tok = None is_array = None while True: stmt, break_tok = parse_stmt(token_iterator, maybe_dict, break_on_assignment=bool(add_el), added_breaks=added_breaks) if stmt is None: break else: if break_tok == ',': is_array = True is_key = maybe_dict and break_tok == ':' arr.add_statement(stmt, is_key) if break_tok in closing_brackets \ or break_tok in added_breaks \ or is_assignment(break_tok): break if arr.type == Array.TUPLE and len(arr) == 1 and not is_array: arr.type = Array.NOARRAY if not arr.values and maybe_dict: # this is a really special case - empty brackets {} are # always dictionaries and not sets. arr.type = Array.DICT k, v = arr.keys, arr.values latest = (v[-1] if v else k[-1] if k else None) end_pos = latest.end_pos if latest is not None \ else (start_pos[0], start_pos[1] + 1) arr.end_pos = end_pos[0], end_pos[1] + (len(break_tok) if break_tok else 0) return arr, break_tok def parse_stmt(token_iterator, maybe_dict=False, added_breaks=(), break_on_assignment=False, stmt_class=Statement): token_list = [] used_vars = [] level = 1 tok = None first = True end_pos = None for i, tok_temp in token_iterator: if isinstance(tok_temp, Base): # the token is a Name, which has already been parsed tok = tok_temp if first: start_pos = tok.start_pos first = False end_pos = tok.end_pos if isinstance(tok, ListComprehension): # it's not possible to set it earlier tok.parent = self if isinstance(tok, Name): used_vars.append(tok) else: token_type, tok, start_tok_pos = tok_temp last_end_pos = end_pos end_pos = start_tok_pos[0], start_tok_pos[1] + len(tok) if first: first = False start_pos = start_tok_pos if tok == 'lambda': lambd, tok = parse_lambda(token_iterator) if lambd is not None: token_list.append(lambd) elif tok == 'for': list_comp, tok = parse_list_comp(token_iterator, token_list, start_pos, last_end_pos) if list_comp is not None: token_list = [list_comp] if tok in closing_brackets: level -= 1 elif tok in brackets.keys(): level += 1 if level == 0 and tok in closing_brackets \ or tok in added_breaks \ or level == 1 and (tok == ',' or maybe_dict and tok == ':' or is_assignment(tok) and break_on_assignment): end_pos = end_pos[0], end_pos[1] - 1 break token_list.append(tok_temp) if not token_list: return None, tok statement = stmt_class(self._sub_module, [], [], token_list, start_pos, end_pos, self.parent) statement.used_vars = used_vars return statement, tok def parse_lambda(token_iterator): params = [] start_pos = self.start_pos while True: param, tok = parse_stmt(token_iterator, added_breaks=[':'], stmt_class=Param) if param is None: break params.append(param) if tok == ':': break if tok != ':': return None, tok # since lambda is a Function scope, it needs Scope parents parent = self.get_parent_until(IsScope) lambd = Lambda(self._sub_module, params, start_pos, parent) ret, tok = parse_stmt(token_iterator) if ret is not None: ret.parent = lambd lambd.returns.append(ret) lambd.end_pos = self.end_pos return lambd, tok def parse_list_comp(token_iterator, token_list, start_pos, end_pos): def parse_stmt_or_arr(token_iterator, added_breaks=()): stmt, tok = parse_stmt(token_iterator, added_breaks=added_breaks) if not stmt: return None, tok if tok == ',': arr, tok = parse_array(token_iterator, Array.TUPLE, stmt.start_pos, stmt, added_breaks=added_breaks) used_vars = [] for stmt in arr: used_vars += stmt.used_vars start_pos = arr.start_pos[0], arr.start_pos[1] - 1 stmt = Statement(self._sub_module, [], used_vars, [], start_pos, arr.end_pos) arr.parent = stmt stmt.token_list = stmt._commands = [arr] else: for v in stmt.used_vars: v.parent = stmt return stmt, tok st = Statement(self._sub_module, [], [], token_list, start_pos, end_pos) middle, tok = parse_stmt_or_arr(token_iterator, added_breaks=['in']) if tok != 'in' or middle is None: debug.warning('list comprehension middle @%s' % str(start_pos)) return None, tok in_clause, tok = parse_stmt_or_arr(token_iterator) if in_clause is None: debug.warning('list comprehension in @%s' % str(start_pos)) return None, tok return ListComprehension(st, middle, in_clause, self), tok # initializations result = [] is_chain = False brackets = {'(': Array.TUPLE, '[': Array.LIST, '{': Array.SET} closing_brackets = ')', '}', ']' token_iterator = common.PushBackIterator(enumerate(self.token_list)) for i, tok_temp in token_iterator: if isinstance(tok_temp, Base): # the token is a Name, which has already been parsed tok = tok_temp token_type = None start_pos = tok.start_pos else: token_type, tok, start_pos = tok_temp if is_assignment(tok): # This means, there is an assignment here. # Add assignments, which can be more than one self._assignment_details.append((result, tok)) result = [] is_chain = False continue elif tok == 'as': # just ignore as, because it sets values next(token_iterator, None) continue if tok == 'lambda': lambd, tok = parse_lambda(token_iterator) if lambd is not None: result.append(lambd) is_literal = token_type in [tokenize.STRING, tokenize.NUMBER] if isinstance(tok, Name) or is_literal: c_type = Call.NAME if is_literal: tok = literal_eval(tok) if token_type == tokenize.STRING: c_type = Call.STRING elif token_type == tokenize.NUMBER: c_type = Call.NUMBER call = Call(self._sub_module, tok, c_type, start_pos, self) if is_chain: result[-1].set_next(call) else: result.append(call) is_chain = False elif tok in brackets.keys(): arr, is_ass = parse_array(token_iterator, brackets[tok], start_pos) if result and isinstance(result[-1], Call): result[-1].set_execution(arr) else: arr.parent = self result.append(arr) elif tok == '.': if result and isinstance(result[-1], Call): is_chain = True elif tok == ',': # implies a tuple # commands is now an array not a statement anymore t = result[0] start_pos = t[2] if isinstance(t, tuple) else t.start_pos # get the correct index i, tok = next(token_iterator, (len(self.token_list), None)) if tok is not None: token_iterator.push_back((i, tok)) t = self.token_list[i - 1] try: end_pos = t.end_pos except AttributeError: end_pos = (t[2][0], t[2][1] + len(t[1])) \ if isinstance(t, tuple) else t.start_pos stmt = Statement(self._sub_module, [], [], result, start_pos, end_pos, self.parent) stmt._commands = result arr, break_tok = parse_array(token_iterator, Array.TUPLE, stmt.start_pos, stmt) result = [arr] if is_assignment(break_tok): self._assignment_details.append((result, break_tok)) result = [] is_chain = False else: if tok != '\n': result.append(tok) return result
def add_docstr(self, string): """ Clean up a docstring """ self.docstr = cleandoc(literal_eval(string))
def __init__(self, module, literal, start_pos, end_pos, parent=None): super(Literal, self).__init__(module, start_pos, end_pos, parent) self.literal = literal self.value = literal_eval(literal)