def __init__(self): self.input_spec = [] # [(formatter, var),(formatter, var)] self.output_spec = [] self.tests = [] # string inputs self.test_number = int(config.get("test_number")) self.output_folder = config.get("output_folder") self.analyzer = config.get("analyzer")
def get_context(doc, segment, linear_value): node = segment[0] context = [] functions = {} if not domutil.beyond_parent(node, linear_value): context = domutil.get_previous_AST_elements(node, linear_value) context.extend(segment) elif not domutil.beyond_method(node, linear_value): linear_value -= domutil.get_previous_AST_element_number(node) + 1 node = domutil.get_parent_AST_element(node) context, functions = get_context(doc, [node], linear_value) else: function_node = domutil.get_function_element(node) name_node = domutil.get_first_child_by_tagname(function_node, 'name') name = domutil.get_text_content(name_node) for call_node in doc.getElementsByTagName('call'): call_name_node = domutil.get_first_child_by_tagname(call_node, 'name') call_name = domutil.get_text_content(call_name_node) if call_name == name: if config.get('show_context_search_debug'): blue('beyound method '+name + ' and found call node: ') linear_value -= domutil.get_previous_AST_element_number_until_function(node) + 1 context, functions = get_context(doc, [call_node], linear_value) functions[call_name] = function_node return context, functions if config.get('show_context_search_debug') == 'true': yellow('beyound method '+name+' but did not found call node: ') return context, functions
def instrument_output(self): logger.info('instrumenting output') if config.get('instrument_loop') == 'true': if config.get('instrument_loop_parameter'): self.do_instrument('@Post', 'variable') self.do_instrument('@Inner') if config.get('instrument_post') == 'true': self.do_instrument('@Post') if config.get('instrument_pre') == 'true': self.do_instrument('@Pre')
def __init__(self): self.output_folder = config.get('output_folder') self.doc = domutil.get_doc_from_c_file(self.output_folder+'/generate.c') self.support_doc = domutil.get_doc_from_c_file(self.output_folder+'/support.h') # the fields used in the generated program. # this is a big heuristic self.needed_fields = set() # dict to hold {alias: name} self.alias_to_name = {} self.code = '' self.output_folder = config.get('output_folder') self.struct_limit = 0 self.helium_size_defined = False
def generate_input(self, type_name, var_name, recursive_count=1): result = '' type_component = typeutil.parse_type(type_name) base = type_component['base'] pointer = type_component['pointer'] array = type_component['array'] if array: if config.get('handle_array') == 'true': return self.generate_input_for_array(type_name, var_name) else: return '' # alloc if pointer: # char array should be treated separately if config.get('handle_array') == 'true' and base == 'char': result += self.generate_input_for_array('char[]', var_name+'_array') result += var_name +' = '+var_name + '_array;\n' return result; result += generate_alloc_string(type_name, var_name) # init if typeutil.is_primitive_type(base): result += generate_primitive_input(type_name, var_name) elif local.check_struct_or_typedef(base.replace('struct','').strip()): if config.get('handle_struct') != 'true': return '' code = local.resolve_single_simple(base.replace('struct','').strip(), t='st') if '{' in code.strip() and code.strip().startswith('typedef struct'): code = code.replace('typedef','').strip() code = code[:code.rfind('}')+1] + ';' result += self.generate_input_for_struct(type_name, var_name, code, recursive_count) elif code.strip().startswith('struct'): result += self.generate_input_for_struct(type_name, var_name, code, recursive_count) elif code.startswith('typedef'): if code.startswith('typedef struct'): code = local.resolve_single_simple(base.replace('struct', '').strip(), t='s') if code: result += self.generate_input_for_struct(type_name, var_name, code, recursive_count) else: _,original = syntaxutil.parse_typedef_code(code) result += self.generate_input(original+pointer+array, var_name, recursive_count) else: logger.warning('Not recognized code for generate input') else: sys_type = systype.resolve(base) if sys_type: result += self.generate_input(sys_type+pointer+array, var_name) else: logger.warning('the type '+type_name+' is not solved for init') return result
def analyze(self): analyzer = config.get("analyzer") logger.info("doing analyzing, analyzer: " + analyzer) if analyzer == "invariant": template_result = compare.parse(self.output_folder + "/output.csv") regression_result = "" try: regression_result = check_output( "Rscript ~/github/PyHelium/helium/analyzer/advancedInvariant.R " + self.output_folder + "/output.csv", shell=True, stderr=DEVNULL, ) regression_result = regression_result.decode("utf8") except CalledProcessError as e: pass result = template_result + "\n" + regression_result elif analyzer == "recursive": result = recursive_compare.compare("output.csv", self.output_folder + "/output.csv") elif analyzer == "store": # result = recursive_compare.compare('output.csv', self.output_folder+'/output.csv') store(len(self.input_spec)) result = "" else: red("unsupported analyzer") exit(1) return result
def get_segments(doc): results = [] code_selection = config.get('code_selection') if code_selection == 'annotation' or code_selection == 'assertion': comments = doc.getElementsByTagName('comment') for comment in comments: comment_text = domutil.get_text_content(comment) if '@HeliumStart' in comment_text: results.append(get_annotation_segment(comment)) elif code_selection == 'assertion': comments = doc.getElementsByTagName('comment') for comment in comments: comment_text = domutil.get_text_content(comment) if '@HeliumAssert' in comment_text: if domutil.has_next_AST_element(comment): results.append([domutil.get_next_AST_element(comment)]) elif code_selection == 'loop': for while_loop_node in doc.getElementsByTagName('while'): results.append([while_loop_node]) for for_loop_node in doc.getElementsByTagName('for'): results.append([for_loop_node]) else: logger.error('unsupported code selection method: ' + repr(code_selection)) exit(1) return results
def __init__(self): self.doc = None self.context = None self.functions = None self.inputs = None self.resolved = {} self.unresolved = None self.output_folder = config.get("output_folder")
def test(self): logger.info("testing") self.input_spec, self.output_spec = get_io_specification(self.output_folder + "/generate.c") # if self.input_spec and self.output_spec: self.generate_input() self.run() if config.get("run_analyze") == "true": return self.analyze()
def handle_segment(self, doc, segment): if not segment: return segment_text = '' for node in segment: segment_text += domutil.get_text_content(node) + '\n' if config.get('max_segment_size'): if segment_text.count('\n') > int(config.get('max_segment_size')): return if config.get('show_segment_size') == 'true': print('segment size: '+str(segment_text.count('\n'))) if config.get('show_parent_function_size') == 'true': function_node = domutil.get_parent_by_tagname(segment[0], 'function') function_size = 0 if function_node: text = domutil.get_text_content(function_node) function_size = text.count('\n') print('parent function size: '+str(function_size)) if config.get('show_segment') == 'true': print(segment_text) instrument_segment(doc, segment) max_linear_value = int(config.get('max_linear_value')) stop_criteria = StopCriteria() for i in range(max_linear_value+1): logger.info('context search: ' + str(i)) if config.get('show_context_search_value') == 'true': print('context search value: '+str(i)) context, functions = get_context(doc, segment, i) if not context: break if self.builder.build(doc, context, functions): if config.get('run_test') == 'true': result = self.tester.test() if self.analyzer == 'recursive': if result == True: green('found equivalent loops for recursive call') print_nodes(segment) elif self.analyzer == 'invariant': if config.get('show_analyze_result') == 'true': print(result) stop_criteria.add(result) stable_result = stop_criteria.get_stable() if stable_result: green(stable_result) if config.get('interact_after_test') == 'true': input('Enter to continue ...') remove_instrument(doc)
def start(self): for root,_,files in os.walk(self.folder): for f in files: if f.endswith('.c') and f != 'generate.c': if config.get('file_include'): if f.split('/')[-1] not in file_include: continue if config.get('file_exclude'): if f.split('/')[-1] in file_exclude: continue filename = os.path.join(root, f) doc = domutil.get_doc_from_c_file(filename) segments = get_segments(doc) if segments: if config.get('show_filename') == 'true': print(filename) if config.get('interact_before_segment') == 'true': c = input('Enter to start the file, anything to break the file ...') if c: continue for segment in segments: self.handle_segment(doc, segment)
def build(path): logger.info('building tag file') this['path'] = path if config.get('build_ctags') == 'true': tag_file = ctags.build_ctags(path=path, recursive=True, opts='--languages=c,c++ -n --c-kinds=+x --exclude=helium_out') else: tag_file = os.path.join(path, 'tags') with codecs.open(tag_file, encoding='utf-8') as output: content = output.readlines() output.close() this['cache'] = ctags.parse_tag_lines(content)
def generate(self): os.makedirs(self.output_folder, exist_ok=True) with open(self.output_folder + "/generate.c", "w") as f: f.write('#include "support.h"\n') # main f.write("int main() {\n") f.write("//@HeliumInput\n") for var_name in self.inputs: type_name = self.inputs[var_name] type_component = typeutil.parse_type(type_name) base = type_component["base"] array = type_component["array"] pointer = type_component["pointer"] f.write(base + pointer + " " + var_name + array + ";\n") f.write("//@HeliumInputEnd\n") f.write("/**********Context********/\n") context_size = 0 function_size = 0 context_text = "" for node in self.context: context_text += domutil.get_text_content(node) context_text = re.sub(r"\breturn\b[^;\n]*;", "return 0;", context_text) if config.get("show_context") == "true": print(context_text) context_size = context_text.count("\n") + 1 f.write(context_text) f.write("\n}\n") # functions for function in self.functions.values(): text = domutil.get_text_content(function) function_size += text.count("\n") f.write(text) f.write("\n") if config.get("show_context_size") == "true": print("context size: " + str(context_size)) if function_size != 0: print("context function size: " + str(function_size)) f.close()
def compile(self): logger.info("compiling") # print out the generated and support.h loc if config.get("show_generate_size") == "true": loc = 0 loc += int(check_output("cat " + self.output_folder + "/support.h | wc -l", shell=True)) loc += int(check_output("cat " + self.output_folder + "/generate.c | wc -l", shell=True)) print("total line of code generated: " + str(loc)) call("make clean -C " + self.output_folder, shell=True, stdout=DEVNULL) if config.get("show_compile_error_message") == "true": stderr_value = None else: stderr_value = DEVNULL if call("make -C " + self.output_folder, shell=True, stdout=DEVNULL, stderr=stderr_value) == 0: # logger.critical('compile success') if config.get("interact_compile_success") == "true": input("Enter to continue ...") return True else: # logger.error('compile error') if config.get("interact_compile_error") == "true": input("Enter to continue ...") return False
def generate_input(self): test_generation = config.get("test_generation") if test_generation == "random": self.generate_random_tests() elif test_generation == "pair": if len(self.input_spec) >= 2: self.generate_pairwise_tests() else: logger.info("only one parameter, use random generation") self.generate_random_tests() elif test_generation == "predefined": self.generate_predefined_tests() else: logger.warning("unsupported generation type")
def instrument_input(self): logger.info('instrumenting input') init_code = '' input_nodes = annotation.get_input_nodes(self.doc) if not input_nodes: return for input_node in input_nodes: # input_nodes should be 'decl_stmt' type_name, var_name, _ = syntaxutil.parse_decl_stmt(input_node) if config.get('handle_array') == 'true': if '[' in type_name: text = domutil.get_text_content(input_node) self.code = self.code.replace(text, '') # We need to do the simplify self.struct_limit = 30 init_code += self.generate_input(type_name, var_name) self.code = self.code.replace('//@HeliumInputEnd', init_code, 1)
def do_instrument(self, annotate, instrument_type=''): output_code = '' if not instrument_type: instrument_type = config.get('instrument_type') instrument_node = annotation.get_comment_node_by_annotation(self.doc, annotate) if instrument_node: if instrument_type == 'variable': alive_vars = io.resolve_alive_vars(instrument_node, set()) for var_name in alive_vars: type_name = alive_vars[var_name] self.struct_limit = 30 output_code += self.generate_output(type_name, var_name) elif instrument_type == 'count': output_code = 'printf("%d ", 5678);\n' # so that it can print out something(such as precondition) even if the program crash output_code += 'printf("\\n");' self.code = self.code.replace('//'+annotate, output_code, 1)
def instrument_segment(doc, segment): first_node = segment[0] last_node = segment[-1] parent = first_node.parentNode pre = doc.createElement('comment') pre.appendChild(doc.createTextNode('\n//@Pre\n')) parent.insertBefore(pre, first_node) post = doc.createElement('comment') post.appendChild(doc.createTextNode('\n//@Post\n')) parent.insertBefore(post, last_node.nextSibling) segment.insert(0, pre) segment.append(post) # loop invariant if config.get('instrument_loop') == 'true': for node in segment: if domutil.is_element(node) and node.tagName == 'for' or node.tagName == 'while': block_node = domutil.get_first_child_by_tagname(node, 'block') if not block_node: continue inner = doc.createElement('comment') inner.appendChild(doc.createTextNode('\n//@Inner\n')) block_node.insertBefore(inner, block_node.lastChild) return segment
def resolve(node, exception = set()): """Resolve the identifiers used in node """ # to_resolve = extract_to_resolve(node, exception) to_resolve = extract_to_resolve_from_code(domutil.get_text_content_except(node, 'comment'), exception) result = {} resolved = set() unresolved = set() while to_resolve: new_resolve = {} local_result, local_resolved = local.resolve(to_resolve) result.update(local_result) sys_resolved = sys.resolve(to_resolve - local_resolved) unresolved.update(to_resolve - local_resolved - sys_resolved) resolved.update(local_resolved | sys_resolved) new_code = '' for text,_,_ in local_result.values(): new_code += text to_resolve = extract_to_resolve_from_code(new_code, exception | resolved | unresolved) if unresolved: if config.get('show_unresolved') == 'true': print('unresolved: '+repr(list(unresolved))) return result
def build(self, doc, context, functions): """ :param doc: document of the context :param context: context nodes :param functions: functions because of the linear search up above function. functions = {call_name: node, ...} """ self.doc = doc self.context = context self.functions = functions self.inputs = io.resolve_undefined_vars(context, set()) self.resolved = {} self.unresolved = set() self.generate() self.resolve() self.generate_support() Instrumenter().instrument() compile_success = self.compile() if config.get("show_compile_result") == "true": if compile_success: green("compile success") else: red("compile error") return compile_success
def __init__(self, folder): self.builder = Builder() self.tester = Tester() self.folder = folder self.analyzer = config.get('analyzer') self.start()
def run(self): fout = open(self.output_folder + "/output.csv", "w") fin = open(self.output_folder + "/input.csv", "w") var_list = [var for _, var in self.output_spec] fout.write(",".join(var_list)) output_number = len(var_list) fout.write("\n") input_var_list = [var for _, var in self.input_spec] fin.write(",".join(input_var_list)) fin.write("\n") logger.info("Running " + str(len(self.tests)) + " tests") run_success = False for test in self.tests: if config.get("show_test_data") == "true": logger.debug("input:" + repr(test)) fin.write(",".join(test.split())) fin.write("\n") p = Popen(self.output_folder + "/a.out", shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE) # print(test) if not test.strip(): continue timeout = int(config.get("timeout")) try: stdout, _ = p.communicate(input=test.encode("utf8"), timeout=timeout / 1000) stdout = stdout.decode("utf8") except TimeoutExpired: logger.warning("testing timeout") p.kill() stdout = "" except UnicodeDecodeError: logger.warning("UnicodeDecodeError") stdout = "" # stderr = stderr.decode('utf8') # -6: 134: assertion fail # -11: 139: segment fault if p.returncode == 0: run_success = True if config.get("show_returncode") == "true": print("return code: " + str(p.returncode)) if p.returncode != 0: # stdout = '' pass if config.get("show_test_data") == "true": logger.debug("output:" + stdout) output = stdout.split() if not output: fout.write(",".join(["NA" for _ in range(output_number)])) fout.write("\n") else: if config.get("instrument_type") == "count": # fout.write(str(len(output))) fout.write(str(stdout.count("5678"))) if config.get("instrument_loop_parameter") == "true": output = stdout.replace("5678", "").split() for _ in range(output_number - len(output) - 1): output.append("NA") fout.write(",") fout.write(",".join(output)) fout.write("\n") else: for _ in range(output_number - len(output)): output.append("NA") fout.write(",".join(output)) fout.write("\n") logger.info("done running testing") if run_success and config.get("show_run_success") == "true": green("run success") fout.close() fin.close()