def extract_tstat_data(pcap_filepath): """ Given the pcap filepath, return a dictionary of as many elements as there are tcp flows """ connections = {} conn_id = 0 with co.cd(os.path.basename(pcap_filepath[:-5])): with co.cd(os.listdir('.')[0]): # Complete TCP connections connections, conn_id = extract_tstat_data_tcp_complete( 'log_tcp_complete', connections, conn_id) # Non complete TCP connections (less info, but still interesting data) connections, conn_id = extract_tstat_data_tcp_nocomplete( 'log_tcp_nocomplete', connections, conn_id) return connections
def read_graphs(filename,path=""): graphs = [] #Read graphs file with cm.cd(path): with open(filename,"r") as input_graphs: str_graphs = input_graphs.read().split("#")[:-1] #Generate graphs graphs = [nx.parse_gml(i) for i in str_graphs] #Generate node and edge label sets node_labels = set() edge_labels = set() for g in graphs: for label in list(nx.get_node_attributes(g,'type').values()): node_labels.add(label) for label in list(nx.get_edge_attributes(g,'type').values()): edge_labels.add(label) node_labels = sorted(list(node_labels)) edge_labels = sorted(list(edge_labels)) edge_labels = cm.fill_label_set(edge_labels) node_labels = cm.fill_label_set(node_labels) for g in graphs: g.graph['node_map'] = {k:v for v,k in enumerate(sorted(g.nodes()))} return graphs,node_labels,edge_labels
def run_inference(project): common.setup_checker_framework_env() classpath = os.path.join(os.environ['JSR308'], 'generic-type-inference-solver', 'bin') if os.environ.get('CLASSPATH'): os.environ['CLASSPATH'] += ':' + classpath else: os.environ['CLASSPATH'] = classpath project_dir = common.get_project_dir(project) annotation_dir = os.path.join(project_dir, common.DLJC_OUTPUT_DIR, 'annotations') if os.path.isdir(annotation_dir): shutil.rmtree(annotation_dir) with common.cd(project_dir): common.clean_project(project) common.run_dljc(project, ['inference'], [ '--solverArgs=backEndType=maxsatbackend.MaxSat', '--checker', 'ontology.OntologyChecker', '--solver', 'constraintsolver.ConstraintSolver', '-m', 'ROUNDTRIP', '-afud', annotation_dir ])
def sweep_work(hard_sweep=False): # Sweep workdir with cd(ctrldir): payu_sweep(model_type=None, config_path=None, hard_sweep=hard_sweep, lab_path=str(labdir))
def run_petablox(project): with common.cd(common.get_project_dir(project)): petablox_cmd = ['java', '-cp', common.get_jar('petablox.jar'), '-Dpetablox.reflect.kind=none', '-Dpetablox.run.analyses=cipa-0cfa-dlog', 'petablox.project.Boot'] common.run_cmd(petablox_cmd)
def run_petablox(project): with common.cd(common.get_project_dir(project)): petablox_cmd = [ 'java', '-cp', common.get_jar('petablox.jar'), '-Dpetablox.reflect.kind=none', '-Dpetablox.run.analyses=cipa-0cfa-dlog', 'petablox.project.Boot' ] common.run_cmd(petablox_cmd)
def recompile_checker_framework(): if not os.environ.get('JAVA_HOME'): print "ERROR in pa2checker.recompile_checker_framework(): Gradle will fail if your JAVA_HOME environment variable is unset. Please set it and try again." sys.exit(0) type_infer_tool_dir = os.path.join(common.TOOLS_DIR, "checker-framework-inference") with common.cd(type_infer_tool_dir): common.setup_checker_framework_env() common.run_cmd(["gradle", "dist", "-i"], print_output=True)
def test_init(): # Initialise a payu laboratory with cd(ctrldir): payu_init(None, None, str(labdir)) # Check all the correct directories have been created for subdir in ['bin', 'input', 'archive', 'codebase']: assert ((labdir / subdir).is_dir())
def insert_anno_to_project(project, jaif_file): """ Insert annotation info in the ${jaif_file} to ${project}. """ project_dir = common.get_project_dir(project) with common.cd(project_dir): common.setup_checker_framework_env() insert_cmd = ['insert-annotations-to-source', '-i', jaif_file] # using glob2.glob to recursive get java files under project dir java_files = glob.glob('{}/**/*.java'.format(project_dir)) insert_cmd.extend(java_files) common.run_cmd(insert_cmd, print_output=True)
def update(): for cmd in ['pip', 'git']: if not spawn.find_executable(cmd): print '{0} not found. are you sure it\'s installed?'.format(cmd) exit(1) pip_version = subprocess.Popen('pip --version', shell=True, stdout=subprocess.PIPE).stdout.readline().split(' ')[1] pip_flags = '--process-dependency-links' if pip_version.startswith('1.5') else '' repo = tempfile.mkdtemp(suffix='lobo') subprocess.Popen('git clone {0} {1}'.format(WOLF_REPO_URL, repo), shell=True).wait() with cd(repo): subprocess.Popen('sudo pip install -U {0} .'.format(pip_flags), shell=True).wait() print BOLD('update complete')
def payu_setup(model_type=None, config_path=None, lab_path=None, force_archive=None, reproduce=None): """ Wrapper around original setup command to provide default arguments and run in ctrldir """ with cd(ctrldir): payu_sweep(model_type=None, config_path=None, hard_sweep=False, lab_path=str(labdir)) payu_setup_orignal(model_type, config_path, lab_path, force_archive, reproduce)
def __call__(self, cmd, post_traversal=False, concurrent=False): for submodule in self.all_submodules(root_first=not post_traversal): with cd(submodule): if modules.is_module_excluded(submodule): continue pool = ThreadPool(processes=4) if concurrent else None if callable(cmd): if concurrent: o = pool.apply_async(cmd).get() else: o = cmd() else: if concurrent: o = pool.apply_async(gitcmd, (cmd, False)).get() else: o = gitcmd(cmd, git=False) yield submodule, o
def main(): with common.cd(WORKING_DIR): test_dtrace = "test.dtrace.gz" test_inv_name = "TestInvariant" ontology_to_daikon.create_daikon_invariant("README.md", test_inv_name) cmd = ["javac", "-classpath", daikon_jar + ":.", test_inv_name + ".java"] common.run_cmd(cmd, print_output=True) print ("Finding program points") ppts = find_ppts_that_establish_inv(test_dtrace, WORKING_DIR, test_inv_name) print ("deleting temp files") os.remove(test_inv_name + ".class") os.remove(test_inv_name + ".java") os.remove("test.inv.gz") # output = run_daikon_on_dtrace_file(test_dtrace, checked_invariant="daikon.inv.unary.sequence.EltwiseIntLessThan") # print output # ppts = find_ppts_that_establish_inv_in_daikon_output(output, " sorted by ") print ("Methods that establish FirstMuseInvariant:") for ppt in ppts: print ppt
def build_jar(project_name): project = common.project_info(project_name) project_dir = common.get_project_dir(project_name) if 'jar' not in project: print('No jar command available, skipping {}.') return jar_cmd = project['jar'].strip().split() build_system = jar_cmd[0] if build_system == "mvn": add_mvn_deps(project_dir) elif build_system == "gradle": add_gradle_deps(project_dir) else: print("Don't know how to build jar file for {} projects".format( build_system)) return with common.cd(project_dir): common.run_cmd(jar_cmd)
def main(): with common.cd(common.WORKING_DIR): test_dtrace = "test.dtrace.gz" test_inv_name = "TestInvariant" ontology_to_daikon.create_daikon_invariant("README.md", test_inv_name) cmd = [ "javac", "-classpath", daikon_jar + ":.", test_inv_name + ".java" ] common.run_cmd(cmd, print_output=True) print("Finding program points") ppts = find_ppts_that_establish_inv(test_dtrace, WORKING_DIR, test_inv_name) print("deleting temp files") os.remove(test_inv_name + ".class") os.remove(test_inv_name + ".java") os.remove("test.inv.gz") #output = run_daikon_on_dtrace_file(test_dtrace, checked_invariant="daikon.inv.unary.sequence.EltwiseIntLessThan") #print output #ppts = find_ppts_that_establish_inv_in_daikon_output(output, " sorted by ") print("Methods that establish FirstMuseInvariant:") for ppt in ppts: print ppt
def run_inference(project): common.setup_checker_framework_env() classpath = os.path.join(os.environ['JSR308'], 'generic-type-inference-solver', 'bin') if os.environ.get('CLASSPATH'): os.environ['CLASSPATH'] += ':' + classpath else: os.environ['CLASSPATH'] = classpath project_dir = common.get_project_dir(project) annotation_dir = os.path.join(project_dir, common.DLJC_OUTPUT_DIR, 'annotations') if os.path.isdir(annotation_dir): shutil.rmtree(annotation_dir) with common.cd(project_dir): common.clean_project(project) common.run_dljc(project, ['inference'], ['--solverArgs=backEndType=maxsatbackend.MaxSat', '--checker', 'ontology.OntologyChecker', '--solver', 'constraintsolver.ConstraintSolver', '-m', 'ROUNDTRIP', '-afud', annotation_dir])
def revert_checker_source(): with common.cd(SOLVER_SRC_DIR): common.run_cmd(['git', 'clean', '-f', '.']) common.run_cmd(['git', 'checkout', '.'])
#print(n_neighbors) n_cluster, connected = eg.calculate_gap(data_matrix, n_neighbors=n_neighbors,path=dir_path) analytics.append([int(n_cluster),connected,n_neighbors]) if connected: break max_gap.append(tuple(max(analytics))) connected_gaps = list(filter(lambda x: x[1],analytics)) if len(connected_gaps): max_gap_connected.append(max(connected_gaps)) else: max_gap_connected.append((0,True)) analytics = {'analytics':analytics, 'n_neighbors':neighbors, 'n_components': n_components} with (dir_path / 'analytics.json' ).open(mode='w') as out_analytics: json.dump(analytics,out_analytics,indent=4) with (Path(path) / 'analytics.json' ).open(mode='w') as out_analytics: json.dump({'max_gap':max_gap, 'max_gap_connected':max_gap_connected},out_analytics,indent=4) with cm.cd(path): join_labels = True fancy = ("Joined" if join_labels else "") dataset = pdbids_file.split(".")[0] + "_" + (fancy[0]) + ("_T" if join_labels else "_F") plot_max_gap(cl_range,max_gap,max_gap_connected,save=True,show=False,fname="eigen_summary.png") plot_eigen_gap(max_gap_connected,var_comp,fancy=fancy,dataset=dataset,adjust=False)
def process_trace( pcap_filepath, graph_dir_exp, stat_dir_exp, aggl_dir_exp, rtt_dir_exp, rtt_subflow_dir_exp, failed_conns_dir_exp, acksize_dir_exp, acksize_tcp_dir_exp, plot_cwin, tcpcsm, min_bytes=0, light=False, return_dict=False, ): """ Process a mptcp pcap file and generate graphs of its subflows Notice that we can't change dir per thread, we should use processes """ # if not check_mptcp_joins(pcap_filepath): # print("WARNING: no mptcp joins on " + pcap_filepath, file=sys.stderr) csv_tmp_dir = tempfile.mkdtemp(dir=os.getcwd()) connections = None do_tcp_processing = False try: with co.cd(csv_tmp_dir): # If segmentation faults, remove the -S option # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-S', '-t', '5000', '-w', '0'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3', '-a'] # connections = process_mptcptrace_cmd(cmd, pcap_filepath) # # # Useful to count the number of reinjected bytes # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-a', '-t', '5000', '-w', '2'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3'] # devnull = open(os.devnull, 'w') # if subprocess.call(cmd, stdout=devnull) != 0: # raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath) # devnull.close() # # cmd = ['mptcptrace', '-f', pcap_filepath, '-r', '2', '-t', '5000', '-w', '2'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3'] # devnull = open(os.devnull, 'w') # if subprocess.call(cmd, stdout=devnull) != 0: # raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath) # devnull.close() cmd = ["mptcptrace", "-f", pcap_filepath, "-s", "-S", "-a", "-A", "-R", "-r", "2", "-t", "5000", "-w", "2"] connections = process_mptcptrace_cmd(cmd, pcap_filepath) # The mptcptrace call will generate .xpl files to cope with # First see all xpl files, to detect the relative 0 of all connections # Also, compute the duration and number of bytes of the MPTCP connection first_pass_on_files(connections) rtt_all = {co.C2S: {}, co.S2C: {}} acksize_all = {co.C2S: {}, co.S2C: {}} # Then really process xpl files if return_dict: for xpl_fname in glob.glob(os.path.join("*.xpl")): try: os.remove(xpl_fname) except IOError as e: print(str(e), file=sys.stderr) else: for xpl_fname in glob.glob(os.path.join("*.xpl")): try: directory = co.DEF_RTT_DIR if MPTCP_RTT_FNAME in xpl_fname else co.TSG_THGPT_DIR shutil.move( xpl_fname, os.path.join( graph_dir_exp, directory, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(xpl_fname), ), ) except IOError as e: print(str(e), file=sys.stderr) # And by default, save only seq csv files for csv_fname in glob.glob(os.path.join("*.csv")): if not light: if MPTCP_GPUT_FNAME in os.path.basename(csv_fname): process_gput_csv(csv_fname, connections) try: if os.path.basename(csv_fname).startswith(MPTCP_ADDADDR_FNAME): conn_id = get_connection_id(os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue process_add_addr_csv(csv_fname, connections, conn_id) os.remove(csv_fname) elif os.path.basename(csv_fname).startswith(MPTCP_RMADDR_FNAME): conn_id = get_connection_id(os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue process_rm_addr_csv(csv_fname, connections, conn_id) os.remove(csv_fname) elif MPTCP_RTT_FNAME in os.path.basename(csv_fname): conn_id = get_connection_id(os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue is_reversed = is_reverse_connection(os.path.basename(csv_fname)) process_rtt_csv(csv_fname, rtt_all, connections, conn_id, is_reversed) os.remove(csv_fname) # co.move_file(csv_fname, os.path.join( # graph_dir_exp, co.DEF_RTT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + csv_fname)) elif MPTCP_SEQ_FNAME in os.path.basename(csv_fname): conn_id = get_connection_id(os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue is_reversed = is_reverse_connection(os.path.basename(csv_fname)) process_csv(csv_fname, connections, conn_id, is_reversed) if return_dict: try: os.remove(csv_fname) except Exception: pass else: co.move_file( csv_fname, os.path.join( graph_dir_exp, co.TSG_THGPT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(csv_fname), ), ) elif MPTCP_ACKSIZE_FNAME in os.path.basename(csv_fname): collect_acksize_csv(csv_fname, connections, acksize_all) os.remove(csv_fname) else: if not light and not return_dict: co.move_file( csv_fname, os.path.join( graph_dir_exp, co.TSG_THGPT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(csv_fname), ), ) else: os.remove(csv_fname) except IOError as e: print(str(e), file=sys.stderr) do_tcp_processing = True except MPTCPTraceError as e: print(str(e) + "; skip mptcp process", file=sys.stderr) shutil.rmtree(csv_tmp_dir) # This will save the mptcp connections if connections and do_tcp_processing: dicts = tcp.process_trace( pcap_filepath, graph_dir_exp, stat_dir_exp, failed_conns_dir_exp, acksize_tcp_dir_exp, tcpcsm, mptcp_connections=connections, light=light, return_dict=return_dict, ) if return_dict: tcp_connections, acksize_all_tcp = dicts return connections, tcp_connections, rtt_all, acksize_all, acksize_all_tcp else: co.save_data(pcap_filepath, acksize_dir_exp, acksize_all) co.save_data(pcap_filepath, rtt_dir_exp, rtt_all) co.save_data(pcap_filepath, stat_dir_exp, connections)
def main(corpus, annotations): """ SUMMARY: use case of the user-driven functionality of PASCALI. Scenario: User provides the concept of Sequence and the equivalent Java types, and the concept of sorted sequence and the relevant type invariant. Goal: learn how to get from Sequence -> Sorted Sequence. """ """ INPUT: annotations, dictionary mapping string -> list of strings OUTPUT: recompiles generic-inference-solver with new annotations""" run_pa2checker(annotations) """ Look for new mapping from 'ontology concepts'->'java type' and run checker framework. Should be implemented in type_inference Mapping example: Sequence -> java.lang.Array, java.util.List, LinkedHashSet, etc. INPUT: corpus, file containing set of concept->java_type mapping OUTPUT: Set of jaif files that are merged into the classes. The jaif files are stored as default.jaif in each project's directory. BODY: This also triggers back-end labeled graph generation. """ for project in corpus: run_inference(project) """ Missing step: interact with PA to add a definition of Sorted Sequence which is a specialization of Sequence that has a sortedness invariants. The sortedness invariant gets turned into a Daikon template INPUT: user interaction OUTPUT: type_annotation and type_invariant (for sorted sequence) """ ordering_operator = "<=" ontology_invariant_file = "TODO_from_Howie.txt" with open(ontology_invariant_file, 'w') as f: f.write(ordering_operator) invariant_name = "TODO_sorted_sequence" daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant(ontology_invariant_file, invariant_name) """ Find all methods that have one input parameter annotated as Sequence and return a variable also annotated as Sequence. INPUT: The corpus and the desired annotations on the method signature OUTPUT: List of methods that have the desired signature. NOTE: This is a stub and will be implemented as LB query in the future. """ sig_methods = find_methods_with_signature(corpus, "@ontology.qual.Sequence", ["@ontology.qual.Sequence"]) print ("\n ************") print ("The following corpus methods have the signature Sequence->Sequence {}:") for (project, package, clazz, method) in sig_methods: print("{}:\t{}.{}.{}".format(project, package, clazz, method)) print ("\n ************") """ Search for methods that have a return type annotated with Sequence and for which we can establish a sortedness invariant (may done by LB). INPUT: dtrace file of project daikon_pattern_java_file that we want to check on the dtrace file. OUTPUT: list of ppt names that establish the invariant. Here a ppt is a Daikon program point, s.a. test01.TestClass01.sort(int[]):::EXIT Note: this step translate the type_invariant into a Daikon template (which is a Java file). """ pattern_class_name = invariant_name pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass") if os.path.isdir(pattern_class_dir): shutil.rmtree(pattern_class_dir) os.mkdir(pattern_class_dir) cmd = ["javac", "-g", "-classpath", common.get_jar('daikon.jar'), daikon_pattern_java_file, "-d", pattern_class_dir] common.run_cmd(cmd) list_of_methods = [] for project in corpus: dtrace_file = backend.get_dtrace_file_for_project(project) if not dtrace_file: print ("Ignoring folder {} because it does not contain dtrace file".format(project)) continue ppt_names = inv_check.find_ppts_that_establish_inv(dtrace_file, pattern_class_dir, pattern_class_name) methods = set() for ppt in ppt_names: method_name = ppt[:ppt.find(':::EXIT')] methods.add(method_name) list_of_methods +=[(project, methods)] print ("\n ************") print ("The following corpus methods return a sequence sorted by {}:".format(ordering_operator)) for project, methods in list_of_methods: if len(methods)>0: print (project) for m in methods: print("\t{}".format(m)) print ("\n ************") shutil.rmtree(pattern_class_dir) """ Expansion of dynamic analysis results .... Find a list of similar methods that are similar to the ones found above (list_of_methods). INPUT: list_of_methods, corpus with labeled graphs generated, threshold value for similarity, OUTPUT: superset_list_of_methods """ # WENCHAO print("Expanding the dynamic analysis results using graph-based similarity:") union_set = set() for project, methods in list_of_methods: # map Daikon output on sort method to method signature in methods.txt in generated graphs for m in methods: method_name = common.get_method_from_daikon_out(m) #kernel_file = common.get_kernel_path(project) method_file = common.get_method_path(project) dot_name = common.find_dot_name(method_name, method_file) if dot_name: # find the right dot file for each method dot_file = common.get_dot_path(project, dot_name) # find all graphs that are similar to it using WL based on some threshold sys.path.insert(0, 'simprog') from similarity import Similarity sim = Similarity() sim.read_graph_kernels("corpus_kernel.txt") top_k = 3 iter_num = 3 result_program_list_with_score = sim.find_top_k_similar_graphs(dot_file, 'g', top_k, iter_num) print(project+":") print(result_program_list_with_score) result_set = set([x[0] for x in result_program_list_with_score]) # take the union of all these graphs union_set = union_set | result_set print("Expanded set:") print([x.split('/')[-4] for x in union_set]) # return this set as a list of (project, method) fo = open("methods.txt", "w") expanded_list = [] for dot_path in union_set: method_summary = common.get_method_summary_from_dot_path(dot_path) fo.write(method_summary) fo.write("\n") fo.close() """ Update the type annotations for the expanded dynamic analysis results. INPUT: superset_list_of_methods, annotation to be added OUTPUT: nothing EFFECT: updates the type annotations of the methods in superset_list_of_methods. This requires some additional checks to make sure that the methods actually perform some kind of sorting. Note that we do it on the superset because the original list_of_methods might miss many implementations because fuzz testing could not reach them. """ for class_file in []: # MARTIN generated_jaif_file = "TODO" insert_jaif.merge_jaif_into_class(class_file, generated_jaif_file) """ Ordering of expanded dynamic analysis results .... Find the k 'best' implementations in superset of list_of_methods INPUT: superset_list_of_methods, corpus, k OUTPUT: k_list_of_methods Note: similarity score is used. may consider using other scores; e.g., TODO:??? """ #TODO: create input file for huascar where each line is formatted like: # ../corpus/Sort05/src/Sort05.java::sort(int[]):int[] ordering_dir = os.path.join(common.WORKING_DIR, "ordering_results/") methods_file = os.path.join(common.WORKING_DIR, 'methods.txt') with common.cd(ordering_dir): #TODO generate a proper relevant methods file. cmd = ["./run.sh", "-k", "3", "-t", "typicality", "-f", methods_file] common.run_cmd(cmd, print_output=True) """ Close the loop and add the best implementation found in the previous step back to the ontology. INPUT: k_list_of_methods OUTPUT: patch file for the ontology. Worst case: just add the 'best' implementation found in the corpus as a blob to the ontology. Best case: generate an equivalent flow-graph in the ontology. """ print "TODO" # ALL
def main(corpus, annotations, limit=3): """ SUMMARY: use case of the user-driven functionality of PASCALI. Scenario: User provides the concept of Sequence and the equivalent Java types, and the concept of sorted sequence and the relevant type invariant. Goal: learn how to get from Sequence -> Sorted Sequence. """ """ INPUT: annotations, dictionary mapping string -> list of strings OUTPUT: recompiles generic-inference-solver with new annotations""" run_pa2checker(annotations) """ Look for new mapping from 'ontology concepts'->'java type' and run checker framework. Should be implemented in type_inference Mapping example: Sequence -> java.lang.Array, java.util.List, LinkedHashSet, etc. INPUT: corpus, file containing set of concept->java_type mapping OUTPUT: Set of jaif files that are merged into the classes. The jaif files are stored as default.jaif in each project's directory. BODY: This also triggers back-end labeled graph generation. """ for project in corpus: run_inference(project) """ Missing step: interact with PA to add a definition of Sorted Sequence which is a specialization of Sequence that has a sortedness invariants. The sortedness invariant gets turned into a Daikon template INPUT: user interaction OUTPUT: type_annotation and type_invariant (for sorted sequence) """ ordering_operator = "<=" ontology_invariant_file = "TODO_from_Howie.txt" with open(ontology_invariant_file, 'w') as f: f.write(ordering_operator) invariant_name = "TODO_sorted_sequence" daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant( ontology_invariant_file, invariant_name) """ Find all methods that have one input parameter annotated as Sequence and return a variable also annotated as Sequence. INPUT: The corpus and the desired annotations on the method signature OUTPUT: List of methods that have the desired signature. NOTE: This is a stub and will be implemented as LB query in the future. """ sig_methods = find_methods_with_signature(corpus, "@ontology.qual.Sequence", ["@ontology.qual.Sequence"]) print("\n ************") print( "The following corpus methods have the signature Sequence->Sequence {}:" ) for (project, package, clazz, method) in sig_methods: print("{}:\t{}.{}.{}".format(project, package, clazz, method)) print("\n ************") """ Search for methods that have a return type annotated with Sequence and for which we can establish a sortedness invariant (may done by LB). INPUT: dtrace file of project daikon_pattern_java_file that we want to check on the dtrace file. OUTPUT: list of ppt names that establish the invariant. Here a ppt is a Daikon program point, s.a. test01.TestClass01.sort(int[]):::EXIT Note: this step translate the type_invariant into a Daikon template (which is a Java file). """ pattern_class_name = invariant_name pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass") if os.path.isdir(pattern_class_dir): shutil.rmtree(pattern_class_dir) os.mkdir(pattern_class_dir) cmd = [ "javac", "-g", "-classpath", common.get_jar('daikon.jar'), daikon_pattern_java_file, "-d", pattern_class_dir ] common.run_cmd(cmd) list_of_methods = [] for project in corpus: dtrace_file = backend.get_dtrace_file_for_project(project) if not dtrace_file: print("Ignoring folder {} because it does not contain dtrace file". format(project)) continue ppt_names = inv_check.find_ppts_that_establish_inv( dtrace_file, pattern_class_dir, pattern_class_name) methods = set() for ppt in ppt_names: method_name = ppt[:ppt.find(':::EXIT')] methods.add(method_name) list_of_methods += [(project, methods)] print("\n ************") print( "The following corpus methods return a sequence sorted by {}:".format( ordering_operator)) for project, methods in list_of_methods: if len(methods) > 0: print(project) for m in methods: print("\t{}".format(m)) print("\n ************") shutil.rmtree(pattern_class_dir) """ Expansion of dynamic analysis results .... Find a list of similar methods that are similar to the ones found above (list_of_methods). INPUT: list_of_methods, corpus with labeled graphs generated, threshold value for similarity, OUTPUT: superset_list_of_methods """ # WENCHAO print( "Expanding the dynamic analysis results using graph-based similarity:") union_set = set() for project, methods in list_of_methods: # map Daikon output on sort method to method signature in methods.txt in generated graphs for m in methods: method_name = common.get_method_from_daikon_out(m) #kernel_file = common.get_kernel_path(project) method_file = common.get_method_path(project) dot_name = common.find_dot_name(method_name, method_file) if dot_name: # find the right dot file for each method dot_file = common.get_dot_path(project, dot_name) # find all graphs that are similar to it using WL based on some threshold sys.path.append(os.path.join(common.WORKING_DIR, 'simprog')) from similarity import Similarity sim = Similarity() sim.read_graph_kernels( os.path.join(common.WORKING_DIR, "corpus_kernel.txt")) top_k = 3 iter_num = 3 result_program_list_with_score = sim.find_top_k_similar_graphs( dot_file, 'g', top_k, iter_num) print(project + ":") print(result_program_list_with_score) result_set = set( [x[0] for x in result_program_list_with_score]) # take the union of all these graphs union_set = union_set | result_set print("Expanded set:") print([x.split('/')[-4] for x in union_set]) # return this set as a list of (project, method) fo = open("methods.txt", "w") expanded_list = [] for dot_path in union_set: method_summary = common.get_method_summary_from_dot_path(dot_path) fo.write(method_summary) fo.write("\n") fo.close() """ Update the type annotations for the expanded dynamic analysis results. INPUT: superset_list_of_methods, annotation to be added OUTPUT: nothing EFFECT: updates the type annotations of the methods in superset_list_of_methods. This requires some additional checks to make sure that the methods actually perform some kind of sorting. Note that we do it on the superset because the original list_of_methods might miss many implementations because fuzz testing could not reach them. """ for class_file in []: # MARTIN generated_jaif_file = "TODO" insert_jaif.merge_jaif_into_class(class_file, generated_jaif_file) """ Ordering of expanded dynamic analysis results .... Find the k 'best' implementations in superset of list_of_methods INPUT: superset_list_of_methods, corpus, k OUTPUT: k_list_of_methods Note: similarity score is used. may consider using other scores; e.g., TODO:??? """ #TODO: create input file for huascar where each line is formatted like: # ../corpus/Sort05/src/Sort05.java::sort(int[]):int[] ordering_dir = os.path.join(common.WORKING_DIR, "ordering_results/") methods_file = os.path.join(common.WORKING_DIR, 'methods.txt') with common.cd(ordering_dir): #TODO generate a proper relevant methods file. cmd = [ "./run.sh", "-k", "{}".format(limit), "-t", "typicality", "-f", methods_file ] common.run_cmd(cmd, print_output=True) """
def process_trace(pcap_filepath, graph_dir_exp, stat_dir_exp, aggl_dir_exp, rtt_dir_exp, rtt_subflow_dir_exp, failed_conns_dir_exp, acksize_dir_exp, acksize_tcp_dir_exp, plot_cwin, tcpcsm, min_bytes=0, light=False, return_dict=False): """ Process a mptcp pcap file and generate graphs of its subflows Notice that we can't change dir per thread, we should use processes """ # if not check_mptcp_joins(pcap_filepath): # print("WARNING: no mptcp joins on " + pcap_filepath, file=sys.stderr) csv_tmp_dir = tempfile.mkdtemp(dir=os.getcwd()) connections = None do_tcp_processing = False try: with co.cd(csv_tmp_dir): # If segmentation faults, remove the -S option # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-S', '-t', '5000', '-w', '0'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3', '-a'] # connections = process_mptcptrace_cmd(cmd, pcap_filepath) # # # Useful to count the number of reinjected bytes # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-a', '-t', '5000', '-w', '2'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3'] # devnull = open(os.devnull, 'w') # if subprocess.call(cmd, stdout=devnull) != 0: # raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath) # devnull.close() # # cmd = ['mptcptrace', '-f', pcap_filepath, '-r', '2', '-t', '5000', '-w', '2'] # if not light: # cmd += ['-G', '250', '-r', '2', '-F', '3'] # devnull = open(os.devnull, 'w') # if subprocess.call(cmd, stdout=devnull) != 0: # raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath) # devnull.close() cmd = [ 'mptcptrace', '-f', pcap_filepath, '-s', '-S', '-a', '-A', '-R', '-r', '2', '-t', '5000', '-w', '2' ] connections = process_mptcptrace_cmd(cmd, pcap_filepath) # The mptcptrace call will generate .xpl files to cope with # First see all xpl files, to detect the relative 0 of all connections # Also, compute the duration and number of bytes of the MPTCP connection first_pass_on_files(connections) rtt_all = {co.C2S: {}, co.S2C: {}} acksize_all = {co.C2S: {}, co.S2C: {}} # Then really process xpl files if return_dict: for xpl_fname in glob.glob(os.path.join('*.xpl')): try: os.remove(xpl_fname) except IOError as e: print(str(e), file=sys.stderr) else: for xpl_fname in glob.glob(os.path.join('*.xpl')): try: directory = co.DEF_RTT_DIR if MPTCP_RTT_FNAME in xpl_fname else co.TSG_THGPT_DIR shutil.move( xpl_fname, os.path.join( graph_dir_exp, directory, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(xpl_fname))) except IOError as e: print(str(e), file=sys.stderr) # And by default, save only seq csv files for csv_fname in glob.glob(os.path.join('*.csv')): if not light: if MPTCP_GPUT_FNAME in os.path.basename(csv_fname): process_gput_csv(csv_fname, connections) try: if os.path.basename(csv_fname).startswith( MPTCP_ADDADDR_FNAME): conn_id = get_connection_id( os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue process_add_addr_csv(csv_fname, connections, conn_id) os.remove(csv_fname) elif os.path.basename(csv_fname).startswith( MPTCP_RMADDR_FNAME): conn_id = get_connection_id( os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue process_rm_addr_csv(csv_fname, connections, conn_id) os.remove(csv_fname) elif MPTCP_RTT_FNAME in os.path.basename(csv_fname): conn_id = get_connection_id( os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue is_reversed = is_reverse_connection( os.path.basename(csv_fname)) process_rtt_csv(csv_fname, rtt_all, connections, conn_id, is_reversed) os.remove(csv_fname) # co.move_file(csv_fname, os.path.join( # graph_dir_exp, co.DEF_RTT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + csv_fname)) elif MPTCP_SEQ_FNAME in os.path.basename(csv_fname): conn_id = get_connection_id( os.path.basename(csv_fname)) if conn_id not in connections: # Not a real connection; skip it continue is_reversed = is_reverse_connection( os.path.basename(csv_fname)) process_csv(csv_fname, connections, conn_id, is_reversed) if return_dict: try: os.remove(csv_fname) except Exception: pass else: co.move_file( csv_fname, os.path.join( graph_dir_exp, co.TSG_THGPT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(csv_fname))) elif MPTCP_ACKSIZE_FNAME in os.path.basename(csv_fname): collect_acksize_csv(csv_fname, connections, acksize_all) os.remove(csv_fname) else: if not light and not return_dict: co.move_file( csv_fname, os.path.join( graph_dir_exp, co.TSG_THGPT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(csv_fname))) else: os.remove(csv_fname) except IOError as e: print(str(e), file=sys.stderr) do_tcp_processing = True except MPTCPTraceError as e: print(str(e) + "; skip mptcp process", file=sys.stderr) shutil.rmtree(csv_tmp_dir) # This will save the mptcp connections if connections and do_tcp_processing: dicts = tcp.process_trace(pcap_filepath, graph_dir_exp, stat_dir_exp, failed_conns_dir_exp, acksize_tcp_dir_exp, tcpcsm, mptcp_connections=connections, light=light, return_dict=return_dict) if return_dict: tcp_connections, acksize_all_tcp = dicts return connections, tcp_connections, rtt_all, acksize_all, acksize_all_tcp else: co.save_data(pcap_filepath, acksize_dir_exp, acksize_all) co.save_data(pcap_filepath, rtt_dir_exp, rtt_all) co.save_data(pcap_filepath, stat_dir_exp, connections)