def get_cores(line_code): line_code = round_trip(line_code) try: tokens = tokenize(line_code) result = parse_func_header(tokens) return [] except: result = decompose_line(line_code) cores = [] for key in result: if 'stmt' in key: stmt, depth = result[key] stmt = tokenize(stmt) segment_info = parse_chunk(stmt) cores += segment_info['nodes'] return cores
def extract_extra_braces(c): tokens = filter_space(tokenize(c, new_line=False)) extra_close_curly, extra_open_curly, goto_stmt = False, False, False if len(tokens) >= 1 and tokens[-1].kind == 'operator' and tokens[-1].value == ':': goto_stmt = True elif len(tokens) >= 2 and tokens[-2].kind == 'operator' and tokens[-2].value == ':' \ and (tokens[-1].kind == 'semicolon' and tokens[-1].value == ';'): goto_stmt = True if goto_stmt: return extra_close_curly, extra_open_curly, goto_stmt stack = [] num_tokens = len(tokens) for idx, t in enumerate(tokens): kind = t.kind if kind == 'open_curly': stack.append((idx, kind)) elif kind == 'close_curly': if len(stack) != 0 and stack[-1][1] == 'open_curly': del stack[-1] else: stack.append((idx, kind)) for idx, kind in stack: if (kind == 'close_curly' and idx != 0) or (kind == 'open_curly' and idx != num_tokens - 1): return None if kind == 'close_curly': extra_close_curly = True if kind == 'open_curly': extra_open_curly = True return extra_close_curly, extra_open_curly, goto_stmt
def parse_gen(code, grammar): tokens = filter_space(tokenize(round_trip(code), new_line=False)) result = OrderedDict() result['code'] = code result['start_w_close_curly'] = start_w_close_curly(tokens) result['end_w_open_curly'] = end_w_open_curly(tokens) cur_idx = 0 if tokens[-1].value == ';': tokens = tokens[:-1] for atom in grammar: if cur_idx >= len(tokens): next_idx, content = None, None else: next_idx, content = atom.f(cur_idx, tokens) if next_idx is None and not atom.optional: raise parse_single_line_exception( 'Error parsing atom %s, which is required' % atom.name) result[atom.name] = content if next_idx is not None: cur_idx = next_idx if 'stmt' in result: result['new_scope'] = result['stmt'] is None else: result['new_scope'] = False return result
def to_onmt(s): values = [ plain2onmt_tok(t.value, t.kind) for t in tokenize(s, new_line=False) if t.kind != 'whitespace' ] result = ' '.join([v for v in values if v is not None]) return result
def return_result_for_line_marker(code): tokens = filter_space(tokenize(round_trip(code), new_line=False)) result = OrderedDict() result['code'] = code result['start_w_close_curly'] = start_w_close_curly(tokens) result['end_w_open_curly'] = end_w_open_curly(tokens) result['new_scope'] = result['end_w_open_curly'] result['line_type'] = 'marker' return result
def annotate_type(psu, table): values = [plain2onmt_tok(t.value, t.kind) for t in tokenize(psu, new_line=False) if t.kind != 'whitespace'] def get_type_from_table(v): if v not in table: return 'None' else: return str(table[v][0]).replace(' ', '_').replace('\'', '').replace('\"', '') result = ' '.join([(v + '│' + get_type_from_table(v)) for v in values]) return result
def type_line(line_code): line_code = round_trip(line_code) atoms_declared, atoms_used, prototype = {}, {}, None forest = [] try: try: tokens = tokenize(line_code, new_line=False) result = parse_func_header(tokens) line_type = 'prototype' if result['is_prototype'] else 'function' prototype = adddepth2func(result, atoms_declared) sw_close_curly, ew_open_curly = start_w_close_curly( tokens), end_w_open_curly(tokens) except ParseChunkError: result = decompose_line(line_code) if result is None: return None line_type = result['line_type'] sw_close_curly, ew_open_curly = result[ 'start_w_close_curly'], result['end_w_open_curly'] for key in result: if 'stmt' in key: stmt, depth = result[key] stmt = tokenize(stmt, new_line=False) segment_info = parse_chunk(stmt) addvar_decl2line(segment_info, atoms_declared, depth) parse_var_used(segment_info['nodes'], atoms_used, depth) forest += segment_info['nodes'] return { # used for consider scope 'line_type': line_type, 'start_w_close_curly': sw_close_curly, 'end_w_open_curly': ew_open_curly, 'line_complete': len(line_code) > 0 and line_code[-1] in ('}', ';'), 'atoms_declared': atoms_declared, 'atoms_used': atoms_used, 'prototype': prototype, 'forest': forest, 'code': line_code } except ParseChunkError: return None
def braces_acceptable(program_str): tokens = filter_space(tokenize(program_str, new_line=False)) counter = 0 for t in tokens: if t.kind == 'open_curly': counter += 1 elif t.kind == 'close_curly': counter -= 1 if counter < 0: return False return counter == 0
def split_by_semicolon(code): tokens = filter_space(tokenize(code, new_line=False)) semi_idxes = [ idx for idx, token in enumerate(tokens) if token.kind == "semicolon" ] semi_idxes = [-1] + semi_idxes + [len(tokens)] segments = [ join_tokens(tokens[semi_idxes[i] + 1:semi_idxes[i + 1]]) for i in range(len(semi_idxes) - 1) ] return segments
def return_result_for_trivial_code(code): tokens = filter_space(tokenize(round_trip(code), new_line=False)) result = OrderedDict() result['code'] = code result['start_w_close_curly'] = start_w_close_curly(tokens) result['end_w_open_curly'] = end_w_open_curly(tokens) result['new_scope'] = result['end_w_open_curly'] if code == '{': result['line_type'] = 'open_curly_only' elif code == '}': result['line_type'] = 'close_curly_only' elif code == '': result['line_type'] = 'empty' elif code == ';': result['line_type'] = 'line' return result
def parse_dowhile(code): while_idx = code.index('while') do_part, while_part = code[:while_idx], code[while_idx:] do_result = parse_doline(do_part) while_result = parse_whileline(while_part) result = OrderedDict() for key in do_result: result[key] = do_result[key] for key in while_result: result[key] = while_result[key] tokens = filter_space(tokenize(round_trip(code), new_line=False)) result['code'] = code result['start_w_close_curly'] = start_w_close_curly(tokens) result['end_w_open_curly'] = end_w_open_curly(tokens) result['new_scope'] = False return result
def line_well_formed_brace(code): tokens = filter_space(tokenize(code, new_line=False)) start, end = 0, len(tokens) if len(tokens) == 0: return True if tokens[0].kind == 'close_curly': start += 1 if tokens[-1].kind == 'open_curly': end -= 1 count = 0 for token in tokens[start:end]: if token.kind == 'open_curly': count += 1 elif token.kind == 'close_curly': count -= 1 if count < 0: return False if count != 0: return False return True
def __init__(self, program): self.raw_lines = program.split('\n') self.tokens_by_line = [ tokenize(raw_line) for raw_line in self.raw_lines ] self.line_ids = list( chain(*[[line_num] * len(tokens) for line_num, tokens in enumerate(self.tokens_by_line)])) self.all_tokens = list(chain(*self.tokens_by_line)) self.lc2token_idx = dict([ ((line_id + 1, token.offset + 1), token_idx) for token_idx, ( line_id, token) in enumerate(zip(self.line_ids, self.all_tokens)) ]) for idx, token in enumerate(self.all_tokens): token.set_pid(idx) token.set_program(self) token.set_lid(self.line_ids[idx]) self.start_idx = self.lc2token_idx[(1, 1)] self.covered = [False for _ in range(len(self.all_tokens))]
def __init__(self, code: str): self._toks = list(tokenize(code))
def has_key_word(code, kword): tokens = tokenize(code) for t in tokens: if t.value == kword and t.kind != 'in-quote': return True return False