Example #1
0
def extract_tstat_data(pcap_filepath):
    """ Given the pcap filepath, return a dictionary of as many elements as there are tcp flows """
    connections = {}
    conn_id = 0
    with co.cd(os.path.basename(pcap_filepath[:-5])):
        with co.cd(os.listdir('.')[0]):
            # Complete TCP connections
            connections, conn_id = extract_tstat_data_tcp_complete(
                'log_tcp_complete', connections, conn_id)
            # Non complete TCP connections (less info, but still interesting data)
            connections, conn_id = extract_tstat_data_tcp_nocomplete(
                'log_tcp_nocomplete', connections, conn_id)

    return connections
Example #2
0
def read_graphs(filename,path=""):

	graphs = []
	#Read graphs file	
	with cm.cd(path):
		with open(filename,"r") as input_graphs:
			str_graphs = input_graphs.read().split("#")[:-1]

	#Generate graphs
	graphs = [nx.parse_gml(i) for i in str_graphs]

	#Generate node and edge label sets
	node_labels = set()
	edge_labels = set()

	for g in graphs:
		for label in list(nx.get_node_attributes(g,'type').values()):
			node_labels.add(label)
		for label in list(nx.get_edge_attributes(g,'type').values()):
			edge_labels.add(label)

	node_labels = sorted(list(node_labels))
	edge_labels = sorted(list(edge_labels))
	
	edge_labels = cm.fill_label_set(edge_labels)
	node_labels = cm.fill_label_set(node_labels)
	
	for g in graphs:
		g.graph['node_map'] = {k:v for v,k in enumerate(sorted(g.nodes()))}
	
	return graphs,node_labels,edge_labels
Example #3
0
def run_inference(project):
    common.setup_checker_framework_env()

    classpath = os.path.join(os.environ['JSR308'],
                             'generic-type-inference-solver', 'bin')
    if os.environ.get('CLASSPATH'):
        os.environ['CLASSPATH'] += ':' + classpath
    else:
        os.environ['CLASSPATH'] = classpath

    project_dir = common.get_project_dir(project)
    annotation_dir = os.path.join(project_dir, common.DLJC_OUTPUT_DIR,
                                  'annotations')

    if os.path.isdir(annotation_dir):
        shutil.rmtree(annotation_dir)

    with common.cd(project_dir):
        common.clean_project(project)
        common.run_dljc(project, ['inference'], [
            '--solverArgs=backEndType=maxsatbackend.MaxSat', '--checker',
            'ontology.OntologyChecker', '--solver',
            'constraintsolver.ConstraintSolver', '-m', 'ROUNDTRIP', '-afud',
            annotation_dir
        ])
Example #4
0
def sweep_work(hard_sweep=False):
    # Sweep workdir
    with cd(ctrldir):
        payu_sweep(model_type=None,
                   config_path=None,
                   hard_sweep=hard_sweep,
                   lab_path=str(labdir))
def run_petablox(project):
  with common.cd(common.get_project_dir(project)):
    petablox_cmd = ['java',
                    '-cp', common.get_jar('petablox.jar'),
                    '-Dpetablox.reflect.kind=none',
                    '-Dpetablox.run.analyses=cipa-0cfa-dlog',
                    'petablox.project.Boot']
    common.run_cmd(petablox_cmd)
def run_petablox(project):
    with common.cd(common.get_project_dir(project)):
        petablox_cmd = [
            'java', '-cp',
            common.get_jar('petablox.jar'), '-Dpetablox.reflect.kind=none',
            '-Dpetablox.run.analyses=cipa-0cfa-dlog', 'petablox.project.Boot'
        ]
        common.run_cmd(petablox_cmd)
def recompile_checker_framework():
  if not os.environ.get('JAVA_HOME'):
    print "ERROR in pa2checker.recompile_checker_framework(): Gradle will fail if your JAVA_HOME environment variable is unset. Please set it and try again."
    sys.exit(0)
  type_infer_tool_dir = os.path.join(common.TOOLS_DIR, "checker-framework-inference")
  with common.cd(type_infer_tool_dir):
    common.setup_checker_framework_env()
    common.run_cmd(["gradle", "dist", "-i"], print_output=True)
Example #8
0
def recompile_checker_framework():
    if not os.environ.get('JAVA_HOME'):
        print "ERROR in pa2checker.recompile_checker_framework(): Gradle will fail if your JAVA_HOME environment variable is unset. Please set it and try again."
        sys.exit(0)
    type_infer_tool_dir = os.path.join(common.TOOLS_DIR,
                                       "checker-framework-inference")
    with common.cd(type_infer_tool_dir):
        common.setup_checker_framework_env()
        common.run_cmd(["gradle", "dist", "-i"], print_output=True)
Example #9
0
def test_init():

    # Initialise a payu laboratory
    with cd(ctrldir):
        payu_init(None, None, str(labdir))

    # Check all the correct directories have been created
    for subdir in ['bin', 'input', 'archive', 'codebase']:
        assert ((labdir / subdir).is_dir())
Example #10
0
def insert_anno_to_project(project, jaif_file):
    """ Insert annotation info in the ${jaif_file} to ${project}.
    """
    project_dir = common.get_project_dir(project)
    with common.cd(project_dir):
        common.setup_checker_framework_env()
        insert_cmd = ['insert-annotations-to-source', '-i', jaif_file]
        # using glob2.glob to recursive get java files under project dir
        java_files = glob.glob('{}/**/*.java'.format(project_dir))
        insert_cmd.extend(java_files)
        common.run_cmd(insert_cmd, print_output=True)
Example #11
0
def update():
    for cmd in ['pip', 'git']:
        if not spawn.find_executable(cmd):
            print '{0} not found. are you sure it\'s installed?'.format(cmd)
            exit(1)

    pip_version = subprocess.Popen('pip --version', shell=True, stdout=subprocess.PIPE).stdout.readline().split(' ')[1]
    pip_flags = '--process-dependency-links' if pip_version.startswith('1.5') else ''

    repo = tempfile.mkdtemp(suffix='lobo')

    subprocess.Popen('git clone {0} {1}'.format(WOLF_REPO_URL, repo), shell=True).wait()
    with cd(repo):
        subprocess.Popen('sudo pip install -U {0} .'.format(pip_flags), shell=True).wait()
        print BOLD('update complete')
Example #12
0
def payu_setup(model_type=None,
               config_path=None,
               lab_path=None,
               force_archive=None,
               reproduce=None):
    """
    Wrapper around original setup command to provide default arguments
    and run in ctrldir
    """
    with cd(ctrldir):
        payu_sweep(model_type=None,
                   config_path=None,
                   hard_sweep=False,
                   lab_path=str(labdir))
        payu_setup_orignal(model_type, config_path, lab_path, force_archive,
                           reproduce)
Example #13
0
    def __call__(self, cmd, post_traversal=False, concurrent=False):
        for submodule in self.all_submodules(root_first=not post_traversal):
            with cd(submodule):
                if modules.is_module_excluded(submodule):
                    continue

                pool = ThreadPool(processes=4) if concurrent else None
                if callable(cmd):
                    if concurrent:
                        o = pool.apply_async(cmd).get()
                    else:
                        o = cmd()
                else:
                    if concurrent:
                        o = pool.apply_async(gitcmd, (cmd, False)).get()
                    else:
                        o = gitcmd(cmd, git=False)
            yield submodule, o
Example #14
0
    def __call__(self, cmd, post_traversal=False, concurrent=False):
        for submodule in self.all_submodules(root_first=not post_traversal):
            with cd(submodule):
                if modules.is_module_excluded(submodule):
                    continue

                pool = ThreadPool(processes=4) if concurrent else None
                if callable(cmd):
                    if concurrent:
                        o = pool.apply_async(cmd).get()
                    else:
                        o = cmd()
                else:
                    if concurrent:
                        o = pool.apply_async(gitcmd, (cmd, False)).get()
                    else:
                        o = gitcmd(cmd, git=False)
            yield submodule, o
def main():
    with common.cd(WORKING_DIR):
        test_dtrace = "test.dtrace.gz"
        test_inv_name = "TestInvariant"
        ontology_to_daikon.create_daikon_invariant("README.md", test_inv_name)
        cmd = ["javac", "-classpath", daikon_jar + ":.", test_inv_name + ".java"]
        common.run_cmd(cmd, print_output=True)
        print ("Finding program points")
        ppts = find_ppts_that_establish_inv(test_dtrace, WORKING_DIR, test_inv_name)
        print ("deleting temp files")
        os.remove(test_inv_name + ".class")
        os.remove(test_inv_name + ".java")
        os.remove("test.inv.gz")
        # output = run_daikon_on_dtrace_file(test_dtrace, checked_invariant="daikon.inv.unary.sequence.EltwiseIntLessThan")
        # print output
        # ppts = find_ppts_that_establish_inv_in_daikon_output(output, " sorted by ")
        print ("Methods that establish FirstMuseInvariant:")
        for ppt in ppts:
            print ppt
Example #16
0
def build_jar(project_name):
    project = common.project_info(project_name)
    project_dir = common.get_project_dir(project_name)
    if 'jar' not in project:
        print('No jar command available, skipping {}.')
        return

    jar_cmd = project['jar'].strip().split()
    build_system = jar_cmd[0]

    if build_system == "mvn":
        add_mvn_deps(project_dir)
    elif build_system == "gradle":
        add_gradle_deps(project_dir)
    else:
        print("Don't know how to build jar file for {} projects".format(
            build_system))
        return

    with common.cd(project_dir):
        common.run_cmd(jar_cmd)
Example #17
0
def main():
    with common.cd(common.WORKING_DIR):
        test_dtrace = "test.dtrace.gz"
        test_inv_name = "TestInvariant"
        ontology_to_daikon.create_daikon_invariant("README.md", test_inv_name)
        cmd = [
            "javac", "-classpath", daikon_jar + ":.", test_inv_name + ".java"
        ]
        common.run_cmd(cmd, print_output=True)
        print("Finding program points")
        ppts = find_ppts_that_establish_inv(test_dtrace, WORKING_DIR,
                                            test_inv_name)
        print("deleting temp files")
        os.remove(test_inv_name + ".class")
        os.remove(test_inv_name + ".java")
        os.remove("test.inv.gz")
        #output = run_daikon_on_dtrace_file(test_dtrace, checked_invariant="daikon.inv.unary.sequence.EltwiseIntLessThan")
        #print output
        #ppts = find_ppts_that_establish_inv_in_daikon_output(output, " sorted by ")
        print("Methods that establish FirstMuseInvariant:")
        for ppt in ppts:
            print ppt
def run_inference(project):
  common.setup_checker_framework_env()

  classpath = os.path.join(os.environ['JSR308'], 'generic-type-inference-solver', 'bin')
  if os.environ.get('CLASSPATH'):
    os.environ['CLASSPATH'] += ':' + classpath
  else:
    os.environ['CLASSPATH'] = classpath

  project_dir = common.get_project_dir(project)
  annotation_dir = os.path.join(project_dir, common.DLJC_OUTPUT_DIR, 'annotations')

  if os.path.isdir(annotation_dir):
    shutil.rmtree(annotation_dir)

  with common.cd(project_dir):
    common.clean_project(project)
    common.run_dljc(project,
                    ['inference'],
                    ['--solverArgs=backEndType=maxsatbackend.MaxSat',
                     '--checker', 'ontology.OntologyChecker',
                     '--solver', 'constraintsolver.ConstraintSolver',
                     '-m', 'ROUNDTRIP',
                     '-afud', annotation_dir])
def revert_checker_source():
  with common.cd(SOLVER_SRC_DIR):
    common.run_cmd(['git', 'clean', '-f', '.'])
    common.run_cmd(['git', 'checkout', '.'])
Example #20
0
			#print(n_neighbors)
			n_cluster, connected = eg.calculate_gap(data_matrix,
											n_neighbors=n_neighbors,path=dir_path)
			analytics.append([int(n_cluster),connected,n_neighbors])
			if connected:
				break

		max_gap.append(tuple(max(analytics)))

		connected_gaps = list(filter(lambda x: x[1],analytics))
		if len(connected_gaps):
			max_gap_connected.append(max(connected_gaps))
		else:
			max_gap_connected.append((0,True))

		
		analytics = {'analytics':analytics, 'n_neighbors':neighbors, 'n_components': n_components}

		with (dir_path / 'analytics.json' ).open(mode='w') as out_analytics:
			json.dump(analytics,out_analytics,indent=4)
		
	with (Path(path) / 'analytics.json' ).open(mode='w') as out_analytics:
			json.dump({'max_gap':max_gap, 'max_gap_connected':max_gap_connected},out_analytics,indent=4)

	with cm.cd(path):
		join_labels = True
		fancy = ("Joined" if join_labels else "")
		dataset = pdbids_file.split(".")[0] + "_" + (fancy[0]) + ("_T" if join_labels else "_F")

		plot_max_gap(cl_range,max_gap,max_gap_connected,save=True,show=False,fname="eigen_summary.png")
		plot_eigen_gap(max_gap_connected,var_comp,fancy=fancy,dataset=dataset,adjust=False)
def process_trace(
    pcap_filepath,
    graph_dir_exp,
    stat_dir_exp,
    aggl_dir_exp,
    rtt_dir_exp,
    rtt_subflow_dir_exp,
    failed_conns_dir_exp,
    acksize_dir_exp,
    acksize_tcp_dir_exp,
    plot_cwin,
    tcpcsm,
    min_bytes=0,
    light=False,
    return_dict=False,
):
    """ Process a mptcp pcap file and generate graphs of its subflows
        Notice that we can't change dir per thread, we should use processes
    """
    # if not check_mptcp_joins(pcap_filepath):
    #     print("WARNING: no mptcp joins on " + pcap_filepath, file=sys.stderr)
    csv_tmp_dir = tempfile.mkdtemp(dir=os.getcwd())
    connections = None
    do_tcp_processing = False
    try:
        with co.cd(csv_tmp_dir):
            # If segmentation faults, remove the -S option
            # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-S', '-t', '5000', '-w', '0']
            # if not light:
            #     cmd += ['-G', '250', '-r', '2', '-F', '3', '-a']
            # connections = process_mptcptrace_cmd(cmd, pcap_filepath)
            #
            # # Useful to count the number of reinjected bytes
            # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-a', '-t', '5000', '-w', '2']
            # if not light:
            #     cmd += ['-G', '250', '-r', '2', '-F', '3']
            # devnull = open(os.devnull, 'w')
            # if subprocess.call(cmd, stdout=devnull) != 0:
            #     raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath)
            # devnull.close()
            #
            # cmd = ['mptcptrace', '-f', pcap_filepath, '-r', '2', '-t', '5000', '-w', '2']
            # if not light:
            #     cmd += ['-G', '250', '-r', '2', '-F', '3']
            # devnull = open(os.devnull, 'w')
            # if subprocess.call(cmd, stdout=devnull) != 0:
            #     raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath)
            # devnull.close()

            cmd = ["mptcptrace", "-f", pcap_filepath, "-s", "-S", "-a", "-A", "-R", "-r", "2", "-t", "5000", "-w", "2"]
            connections = process_mptcptrace_cmd(cmd, pcap_filepath)

            # The mptcptrace call will generate .xpl files to cope with
            # First see all xpl files, to detect the relative 0 of all connections
            # Also, compute the duration and number of bytes of the MPTCP connection
            first_pass_on_files(connections)
            rtt_all = {co.C2S: {}, co.S2C: {}}
            acksize_all = {co.C2S: {}, co.S2C: {}}

            # Then really process xpl files
            if return_dict:
                for xpl_fname in glob.glob(os.path.join("*.xpl")):
                    try:
                        os.remove(xpl_fname)
                    except IOError as e:
                        print(str(e), file=sys.stderr)
            else:
                for xpl_fname in glob.glob(os.path.join("*.xpl")):
                    try:
                        directory = co.DEF_RTT_DIR if MPTCP_RTT_FNAME in xpl_fname else co.TSG_THGPT_DIR
                        shutil.move(
                            xpl_fname,
                            os.path.join(
                                graph_dir_exp,
                                directory,
                                os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(xpl_fname),
                            ),
                        )
                    except IOError as e:
                        print(str(e), file=sys.stderr)

            # And by default, save only seq csv files
            for csv_fname in glob.glob(os.path.join("*.csv")):
                if not light:
                    if MPTCP_GPUT_FNAME in os.path.basename(csv_fname):
                        process_gput_csv(csv_fname, connections)
                try:
                    if os.path.basename(csv_fname).startswith(MPTCP_ADDADDR_FNAME):
                        conn_id = get_connection_id(os.path.basename(csv_fname))
                        if conn_id not in connections:
                            # Not a real connection; skip it
                            continue

                        process_add_addr_csv(csv_fname, connections, conn_id)
                        os.remove(csv_fname)

                    elif os.path.basename(csv_fname).startswith(MPTCP_RMADDR_FNAME):
                        conn_id = get_connection_id(os.path.basename(csv_fname))
                        if conn_id not in connections:
                            # Not a real connection; skip it
                            continue

                        process_rm_addr_csv(csv_fname, connections, conn_id)
                        os.remove(csv_fname)

                    elif MPTCP_RTT_FNAME in os.path.basename(csv_fname):
                        conn_id = get_connection_id(os.path.basename(csv_fname))
                        if conn_id not in connections:
                            # Not a real connection; skip it
                            continue

                        is_reversed = is_reverse_connection(os.path.basename(csv_fname))
                        process_rtt_csv(csv_fname, rtt_all, connections, conn_id, is_reversed)
                        os.remove(csv_fname)
                        # co.move_file(csv_fname, os.path.join(
                        #    graph_dir_exp, co.DEF_RTT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + csv_fname))
                    elif MPTCP_SEQ_FNAME in os.path.basename(csv_fname):
                        conn_id = get_connection_id(os.path.basename(csv_fname))
                        if conn_id not in connections:
                            # Not a real connection; skip it
                            continue

                        is_reversed = is_reverse_connection(os.path.basename(csv_fname))
                        process_csv(csv_fname, connections, conn_id, is_reversed)
                        if return_dict:
                            try:
                                os.remove(csv_fname)
                            except Exception:
                                pass
                        else:
                            co.move_file(
                                csv_fname,
                                os.path.join(
                                    graph_dir_exp,
                                    co.TSG_THGPT_DIR,
                                    os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(csv_fname),
                                ),
                            )
                    elif MPTCP_ACKSIZE_FNAME in os.path.basename(csv_fname):
                        collect_acksize_csv(csv_fname, connections, acksize_all)
                        os.remove(csv_fname)
                    else:
                        if not light and not return_dict:
                            co.move_file(
                                csv_fname,
                                os.path.join(
                                    graph_dir_exp,
                                    co.TSG_THGPT_DIR,
                                    os.path.basename(pcap_filepath[:-5]) + "_" + os.path.basename(csv_fname),
                                ),
                            )
                        else:
                            os.remove(csv_fname)
                except IOError as e:
                    print(str(e), file=sys.stderr)

            do_tcp_processing = True

    except MPTCPTraceError as e:
        print(str(e) + "; skip mptcp process", file=sys.stderr)

    shutil.rmtree(csv_tmp_dir)

    # This will save the mptcp connections
    if connections and do_tcp_processing:
        dicts = tcp.process_trace(
            pcap_filepath,
            graph_dir_exp,
            stat_dir_exp,
            failed_conns_dir_exp,
            acksize_tcp_dir_exp,
            tcpcsm,
            mptcp_connections=connections,
            light=light,
            return_dict=return_dict,
        )
        if return_dict:
            tcp_connections, acksize_all_tcp = dicts
            return connections, tcp_connections, rtt_all, acksize_all, acksize_all_tcp
        else:
            co.save_data(pcap_filepath, acksize_dir_exp, acksize_all)
            co.save_data(pcap_filepath, rtt_dir_exp, rtt_all)
            co.save_data(pcap_filepath, stat_dir_exp, connections)
Example #22
0
def revert_checker_source():
    with common.cd(SOLVER_SRC_DIR):
        common.run_cmd(['git', 'clean', '-f', '.'])
        common.run_cmd(['git', 'checkout', '.'])
def main(corpus, annotations):
  """ SUMMARY: use case of the user-driven functionality of PASCALI.
  Scenario: User provides the concept of Sequence and the equivalent Java
  types, and the concept of sorted sequence and the relevant type invariant.
  Goal: learn how to get from Sequence -> Sorted Sequence.
  """

  """
  INPUT: annotations, dictionary mapping string -> list of strings
  OUTPUT: recompiles generic-inference-solver with new annotations"""

  run_pa2checker(annotations)

  """ Look for new mapping from 'ontology concepts'->'java type' and run
  checker framework. Should be implemented in type_inference
  Mapping example:
    Sequence -> java.lang.Array, java.util.List, LinkedHashSet, etc.

  INPUT: corpus, file containing set of concept->java_type mapping
  OUTPUT: Set of jaif files that are merged into the classes. The jaif files are
          stored as default.jaif in each project's directory.
  BODY: This also triggers back-end labeled graph generation.
  """

  for project in corpus:
    run_inference(project)

  """ Missing step: interact with PA to add a definition of Sorted Sequence
  which is a specialization of Sequence that has a sortedness invariants.
  The sortedness invariant gets turned into a Daikon template
  INPUT: user interaction
  OUTPUT: type_annotation and type_invariant (for sorted sequence)

  """

  ordering_operator = "<="

  ontology_invariant_file = "TODO_from_Howie.txt"
  with open(ontology_invariant_file, 'w') as f:
    f.write(ordering_operator)

  invariant_name = "TODO_sorted_sequence"

  daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant(ontology_invariant_file, invariant_name)


  """ Find all methods that have one input parameter annotated as Sequence and return a variable also
  annotated as Sequence.
  INPUT: The corpus and the desired annotations on the method signature
  OUTPUT: List of methods that have the desired signature.
  NOTE: This is a stub and will be implemented as LB query in the future.
  """
  sig_methods = find_methods_with_signature(corpus, "@ontology.qual.Sequence", ["@ontology.qual.Sequence"])
  print ("\n   ************")
  print ("The following corpus methods have the signature Sequence->Sequence {}:")
  for (project, package, clazz, method) in sig_methods:
    print("{}:\t{}.{}.{}".format(project, package, clazz, method))
  print ("\n   ************")


  """ Search for methods that have a return type annotated with Sequence
  and for which we can establish a sortedness invariant (may done by LB).

  INPUT: dtrace file of project
         daikon_pattern_java_file that we want to check on the dtrace file.

  OUTPUT: list of ppt names that establish the invariant. Here a ppt
  is a Daikon program point, s.a. test01.TestClass01.sort(int[]):::EXIT

  Note: this step translate the type_invariant into a Daikon
  template (which is a Java file).
  """

  pattern_class_name = invariant_name
  pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass")
  if os.path.isdir(pattern_class_dir):
    shutil.rmtree(pattern_class_dir)
  os.mkdir(pattern_class_dir)

  cmd = ["javac", "-g", "-classpath", common.get_jar('daikon.jar'),
         daikon_pattern_java_file, "-d", pattern_class_dir]
  common.run_cmd(cmd)

  list_of_methods = []
  for project in corpus:
    dtrace_file = backend.get_dtrace_file_for_project(project)
    if not dtrace_file:
      print ("Ignoring folder {} because it does not contain dtrace file".format(project))
      continue
    ppt_names = inv_check.find_ppts_that_establish_inv(dtrace_file, pattern_class_dir, pattern_class_name)
    methods = set()
    for ppt in ppt_names:
      method_name = ppt[:ppt.find(':::EXIT')]
      methods.add(method_name)
    list_of_methods +=[(project, methods)]

  print ("\n   ************")
  print ("The following corpus methods return a sequence sorted by {}:".format(ordering_operator))
  for project, methods in list_of_methods:
    if len(methods)>0:
      print (project)
      for m in methods:
        print("\t{}".format(m))
  print ("\n   ************")

  shutil.rmtree(pattern_class_dir)

  """ Expansion of dynamic analysis results ....
  Find a list of similar methods that are similar to the ones found above (list_of_methods).
  INPUT: list_of_methods, corpus with labeled graphs generated, threshold value for similarity,
  OUTPUT: superset_list_of_methods
  """

  # WENCHAO
  print("Expanding the dynamic analysis results using graph-based similarity:")
  union_set = set()
  for project, methods in list_of_methods:
    # map Daikon output on sort method to method signature in methods.txt in generated graphs
    for m in methods:
      method_name = common.get_method_from_daikon_out(m)
      #kernel_file = common.get_kernel_path(project)
      method_file = common.get_method_path(project)
      dot_name = common.find_dot_name(method_name, method_file)
      if dot_name:
        # find the right dot file for each method
        dot_file = common.get_dot_path(project, dot_name)
        # find all graphs that are similar to it using WL based on some threshold
        sys.path.insert(0, 'simprog')
        from similarity import Similarity
        sim = Similarity()
        sim.read_graph_kernels("corpus_kernel.txt")
        top_k = 3
        iter_num = 3
        result_program_list_with_score = sim.find_top_k_similar_graphs(dot_file, 'g', top_k, iter_num)
        print(project+":")
        print(result_program_list_with_score)
        result_set = set([x[0] for x in result_program_list_with_score])
        # take the union of all these graphs
        union_set = union_set | result_set
  print("Expanded set:")
  print([x.split('/')[-4] for x in union_set])

  # return this set as a list of (project, method)
  fo = open("methods.txt", "w")
  expanded_list = []
  for dot_path in union_set:
    method_summary = common.get_method_summary_from_dot_path(dot_path)
    fo.write(method_summary)
    fo.write("\n")
  fo.close()

  """ Update the type annotations for the expanded dynamic analysis results.
  INPUT: superset_list_of_methods, annotation to be added
  OUTPUT: nothing
  EFFECT: updates the type annotations of the methods in superset_list_of_methods.
  This requires some additional checks to make sure that the methods actually
  perform some kind of sorting. Note that we do it on the superset because the original
  list_of_methods might miss many implementations because fuzz testing could not
  reach them.
  """
  for class_file in []: # MARTIN
    generated_jaif_file = "TODO"
    insert_jaif.merge_jaif_into_class(class_file, generated_jaif_file)


  """ Ordering of expanded dynamic analysis results ....
  Find the k 'best' implementations in superset of list_of_methods
  INPUT: superset_list_of_methods, corpus, k
  OUTPUT: k_list_of_methods
  Note: similarity score is used. may consider using other scores; e.g., TODO:???
  """

  #TODO: create input file for huascar where each line is formatted like:
  # ../corpus/Sort05/src/Sort05.java::sort(int[]):int[]

  ordering_dir = os.path.join(common.WORKING_DIR, "ordering_results/")

  methods_file = os.path.join(common.WORKING_DIR, 'methods.txt')
  with common.cd(ordering_dir):
    #TODO generate a proper relevant methods file.
    cmd = ["./run.sh",
           "-k", "3",
           "-t", "typicality",
           "-f", methods_file]
    common.run_cmd(cmd, print_output=True)

  """
  Close the loop and add the best implementation found in the previous
  step back to the ontology.
  INPUT: k_list_of_methods
  OUTPUT: patch file for the ontology. Worst case: just add the 'best' implementation
  found in the corpus as a blob to the ontology. Best case: generate an equivalent
  flow-graph in the ontology.
  """
  print "TODO" # ALL
Example #24
0
def main(corpus, annotations, limit=3):
    """ SUMMARY: use case of the user-driven functionality of PASCALI.
  Scenario: User provides the concept of Sequence and the equivalent Java
  types, and the concept of sorted sequence and the relevant type invariant.
  Goal: learn how to get from Sequence -> Sorted Sequence.
  """
    """
  INPUT: annotations, dictionary mapping string -> list of strings
  OUTPUT: recompiles generic-inference-solver with new annotations"""

    run_pa2checker(annotations)
    """ Look for new mapping from 'ontology concepts'->'java type' and run
  checker framework. Should be implemented in type_inference
  Mapping example:
    Sequence -> java.lang.Array, java.util.List, LinkedHashSet, etc.

  INPUT: corpus, file containing set of concept->java_type mapping
  OUTPUT: Set of jaif files that are merged into the classes. The jaif files are
          stored as default.jaif in each project's directory.
  BODY: This also triggers back-end labeled graph generation.
  """

    for project in corpus:
        run_inference(project)
    """ Missing step: interact with PA to add a definition of Sorted Sequence
  which is a specialization of Sequence that has a sortedness invariants.
  The sortedness invariant gets turned into a Daikon template
  INPUT: user interaction
  OUTPUT: type_annotation and type_invariant (for sorted sequence)

  """

    ordering_operator = "<="

    ontology_invariant_file = "TODO_from_Howie.txt"
    with open(ontology_invariant_file, 'w') as f:
        f.write(ordering_operator)

    invariant_name = "TODO_sorted_sequence"

    daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant(
        ontology_invariant_file, invariant_name)
    """ Find all methods that have one input parameter annotated as Sequence and return a variable also
  annotated as Sequence.
  INPUT: The corpus and the desired annotations on the method signature
  OUTPUT: List of methods that have the desired signature.
  NOTE: This is a stub and will be implemented as LB query in the future.
  """
    sig_methods = find_methods_with_signature(corpus,
                                              "@ontology.qual.Sequence",
                                              ["@ontology.qual.Sequence"])
    print("\n   ************")
    print(
        "The following corpus methods have the signature Sequence->Sequence {}:"
    )
    for (project, package, clazz, method) in sig_methods:
        print("{}:\t{}.{}.{}".format(project, package, clazz, method))
    print("\n   ************")
    """ Search for methods that have a return type annotated with Sequence
  and for which we can establish a sortedness invariant (may done by LB).

  INPUT: dtrace file of project
         daikon_pattern_java_file that we want to check on the dtrace file.

  OUTPUT: list of ppt names that establish the invariant. Here a ppt
  is a Daikon program point, s.a. test01.TestClass01.sort(int[]):::EXIT

  Note: this step translate the type_invariant into a Daikon
  template (which is a Java file).
  """

    pattern_class_name = invariant_name
    pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass")
    if os.path.isdir(pattern_class_dir):
        shutil.rmtree(pattern_class_dir)
    os.mkdir(pattern_class_dir)

    cmd = [
        "javac", "-g", "-classpath",
        common.get_jar('daikon.jar'), daikon_pattern_java_file, "-d",
        pattern_class_dir
    ]
    common.run_cmd(cmd)

    list_of_methods = []
    for project in corpus:
        dtrace_file = backend.get_dtrace_file_for_project(project)
        if not dtrace_file:
            print("Ignoring folder {} because it does not contain dtrace file".
                  format(project))
            continue
        ppt_names = inv_check.find_ppts_that_establish_inv(
            dtrace_file, pattern_class_dir, pattern_class_name)
        methods = set()
        for ppt in ppt_names:
            method_name = ppt[:ppt.find(':::EXIT')]
            methods.add(method_name)
        list_of_methods += [(project, methods)]

    print("\n   ************")
    print(
        "The following corpus methods return a sequence sorted by {}:".format(
            ordering_operator))
    for project, methods in list_of_methods:
        if len(methods) > 0:
            print(project)
            for m in methods:
                print("\t{}".format(m))
    print("\n   ************")

    shutil.rmtree(pattern_class_dir)
    """ Expansion of dynamic analysis results ....
  Find a list of similar methods that are similar to the ones found above (list_of_methods).
  INPUT: list_of_methods, corpus with labeled graphs generated, threshold value for similarity,
  OUTPUT: superset_list_of_methods
  """

    # WENCHAO
    print(
        "Expanding the dynamic analysis results using graph-based similarity:")
    union_set = set()
    for project, methods in list_of_methods:
        # map Daikon output on sort method to method signature in methods.txt in generated graphs
        for m in methods:
            method_name = common.get_method_from_daikon_out(m)
            #kernel_file = common.get_kernel_path(project)
            method_file = common.get_method_path(project)
            dot_name = common.find_dot_name(method_name, method_file)
            if dot_name:
                # find the right dot file for each method
                dot_file = common.get_dot_path(project, dot_name)
                # find all graphs that are similar to it using WL based on some threshold
                sys.path.append(os.path.join(common.WORKING_DIR, 'simprog'))
                from similarity import Similarity
                sim = Similarity()
                sim.read_graph_kernels(
                    os.path.join(common.WORKING_DIR, "corpus_kernel.txt"))
                top_k = 3
                iter_num = 3
                result_program_list_with_score = sim.find_top_k_similar_graphs(
                    dot_file, 'g', top_k, iter_num)
                print(project + ":")
                print(result_program_list_with_score)
                result_set = set(
                    [x[0] for x in result_program_list_with_score])
                # take the union of all these graphs
                union_set = union_set | result_set
    print("Expanded set:")
    print([x.split('/')[-4] for x in union_set])

    # return this set as a list of (project, method)
    fo = open("methods.txt", "w")
    expanded_list = []
    for dot_path in union_set:
        method_summary = common.get_method_summary_from_dot_path(dot_path)
        fo.write(method_summary)
        fo.write("\n")
    fo.close()
    """ Update the type annotations for the expanded dynamic analysis results.
  INPUT: superset_list_of_methods, annotation to be added
  OUTPUT: nothing
  EFFECT: updates the type annotations of the methods in superset_list_of_methods.
  This requires some additional checks to make sure that the methods actually
  perform some kind of sorting. Note that we do it on the superset because the original
  list_of_methods might miss many implementations because fuzz testing could not
  reach them.
  """
    for class_file in []:  # MARTIN
        generated_jaif_file = "TODO"
        insert_jaif.merge_jaif_into_class(class_file, generated_jaif_file)
    """ Ordering of expanded dynamic analysis results ....
  Find the k 'best' implementations in superset of list_of_methods
  INPUT: superset_list_of_methods, corpus, k
  OUTPUT: k_list_of_methods
  Note: similarity score is used. may consider using other scores; e.g., TODO:???
  """

    #TODO: create input file for huascar where each line is formatted like:
    # ../corpus/Sort05/src/Sort05.java::sort(int[]):int[]

    ordering_dir = os.path.join(common.WORKING_DIR, "ordering_results/")

    methods_file = os.path.join(common.WORKING_DIR, 'methods.txt')
    with common.cd(ordering_dir):
        #TODO generate a proper relevant methods file.
        cmd = [
            "./run.sh", "-k", "{}".format(limit), "-t", "typicality", "-f",
            methods_file
        ]
        common.run_cmd(cmd, print_output=True)
    """
Example #25
0
def process_trace(pcap_filepath,
                  graph_dir_exp,
                  stat_dir_exp,
                  aggl_dir_exp,
                  rtt_dir_exp,
                  rtt_subflow_dir_exp,
                  failed_conns_dir_exp,
                  acksize_dir_exp,
                  acksize_tcp_dir_exp,
                  plot_cwin,
                  tcpcsm,
                  min_bytes=0,
                  light=False,
                  return_dict=False):
    """ Process a mptcp pcap file and generate graphs of its subflows
        Notice that we can't change dir per thread, we should use processes
    """
    # if not check_mptcp_joins(pcap_filepath):
    #     print("WARNING: no mptcp joins on " + pcap_filepath, file=sys.stderr)
    csv_tmp_dir = tempfile.mkdtemp(dir=os.getcwd())
    connections = None
    do_tcp_processing = False
    try:
        with co.cd(csv_tmp_dir):
            # If segmentation faults, remove the -S option
            # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-S', '-t', '5000', '-w', '0']
            # if not light:
            #     cmd += ['-G', '250', '-r', '2', '-F', '3', '-a']
            # connections = process_mptcptrace_cmd(cmd, pcap_filepath)
            #
            # # Useful to count the number of reinjected bytes
            # cmd = ['mptcptrace', '-f', pcap_filepath, '-s', '-a', '-t', '5000', '-w', '2']
            # if not light:
            #     cmd += ['-G', '250', '-r', '2', '-F', '3']
            # devnull = open(os.devnull, 'w')
            # if subprocess.call(cmd, stdout=devnull) != 0:
            #     raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath)
            # devnull.close()
            #
            # cmd = ['mptcptrace', '-f', pcap_filepath, '-r', '2', '-t', '5000', '-w', '2']
            # if not light:
            #     cmd += ['-G', '250', '-r', '2', '-F', '3']
            # devnull = open(os.devnull, 'w')
            # if subprocess.call(cmd, stdout=devnull) != 0:
            #     raise MPTCPTraceError("Error of mptcptrace with " + pcap_filepath)
            # devnull.close()

            cmd = [
                'mptcptrace', '-f', pcap_filepath, '-s', '-S', '-a', '-A',
                '-R', '-r', '2', '-t', '5000', '-w', '2'
            ]
            connections = process_mptcptrace_cmd(cmd, pcap_filepath)

            # The mptcptrace call will generate .xpl files to cope with
            # First see all xpl files, to detect the relative 0 of all connections
            # Also, compute the duration and number of bytes of the MPTCP connection
            first_pass_on_files(connections)
            rtt_all = {co.C2S: {}, co.S2C: {}}
            acksize_all = {co.C2S: {}, co.S2C: {}}

            # Then really process xpl files
            if return_dict:
                for xpl_fname in glob.glob(os.path.join('*.xpl')):
                    try:
                        os.remove(xpl_fname)
                    except IOError as e:
                        print(str(e), file=sys.stderr)
            else:
                for xpl_fname in glob.glob(os.path.join('*.xpl')):
                    try:
                        directory = co.DEF_RTT_DIR if MPTCP_RTT_FNAME in xpl_fname else co.TSG_THGPT_DIR
                        shutil.move(
                            xpl_fname,
                            os.path.join(
                                graph_dir_exp, directory,
                                os.path.basename(pcap_filepath[:-5]) + "_" +
                                os.path.basename(xpl_fname)))
                    except IOError as e:
                        print(str(e), file=sys.stderr)

            # And by default, save only seq csv files
            for csv_fname in glob.glob(os.path.join('*.csv')):
                if not light:
                    if MPTCP_GPUT_FNAME in os.path.basename(csv_fname):
                        process_gput_csv(csv_fname, connections)
                try:
                    if os.path.basename(csv_fname).startswith(
                            MPTCP_ADDADDR_FNAME):
                        conn_id = get_connection_id(
                            os.path.basename(csv_fname))
                        if conn_id not in connections:
                            # Not a real connection; skip it
                            continue

                        process_add_addr_csv(csv_fname, connections, conn_id)
                        os.remove(csv_fname)

                    elif os.path.basename(csv_fname).startswith(
                            MPTCP_RMADDR_FNAME):
                        conn_id = get_connection_id(
                            os.path.basename(csv_fname))
                        if conn_id not in connections:
                            # Not a real connection; skip it
                            continue

                        process_rm_addr_csv(csv_fname, connections, conn_id)
                        os.remove(csv_fname)

                    elif MPTCP_RTT_FNAME in os.path.basename(csv_fname):
                        conn_id = get_connection_id(
                            os.path.basename(csv_fname))
                        if conn_id not in connections:
                            # Not a real connection; skip it
                            continue

                        is_reversed = is_reverse_connection(
                            os.path.basename(csv_fname))
                        process_rtt_csv(csv_fname, rtt_all, connections,
                                        conn_id, is_reversed)
                        os.remove(csv_fname)
                        # co.move_file(csv_fname, os.path.join(
                        #    graph_dir_exp, co.DEF_RTT_DIR, os.path.basename(pcap_filepath[:-5]) + "_" + csv_fname))
                    elif MPTCP_SEQ_FNAME in os.path.basename(csv_fname):
                        conn_id = get_connection_id(
                            os.path.basename(csv_fname))
                        if conn_id not in connections:
                            # Not a real connection; skip it
                            continue

                        is_reversed = is_reverse_connection(
                            os.path.basename(csv_fname))
                        process_csv(csv_fname, connections, conn_id,
                                    is_reversed)
                        if return_dict:
                            try:
                                os.remove(csv_fname)
                            except Exception:
                                pass
                        else:
                            co.move_file(
                                csv_fname,
                                os.path.join(
                                    graph_dir_exp, co.TSG_THGPT_DIR,
                                    os.path.basename(pcap_filepath[:-5]) +
                                    "_" + os.path.basename(csv_fname)))
                    elif MPTCP_ACKSIZE_FNAME in os.path.basename(csv_fname):
                        collect_acksize_csv(csv_fname, connections,
                                            acksize_all)
                        os.remove(csv_fname)
                    else:
                        if not light and not return_dict:
                            co.move_file(
                                csv_fname,
                                os.path.join(
                                    graph_dir_exp, co.TSG_THGPT_DIR,
                                    os.path.basename(pcap_filepath[:-5]) +
                                    "_" + os.path.basename(csv_fname)))
                        else:
                            os.remove(csv_fname)
                except IOError as e:
                    print(str(e), file=sys.stderr)

            do_tcp_processing = True

    except MPTCPTraceError as e:
        print(str(e) + "; skip mptcp process", file=sys.stderr)

    shutil.rmtree(csv_tmp_dir)

    # This will save the mptcp connections
    if connections and do_tcp_processing:
        dicts = tcp.process_trace(pcap_filepath,
                                  graph_dir_exp,
                                  stat_dir_exp,
                                  failed_conns_dir_exp,
                                  acksize_tcp_dir_exp,
                                  tcpcsm,
                                  mptcp_connections=connections,
                                  light=light,
                                  return_dict=return_dict)
        if return_dict:
            tcp_connections, acksize_all_tcp = dicts
            return connections, tcp_connections, rtt_all, acksize_all, acksize_all_tcp
        else:
            co.save_data(pcap_filepath, acksize_dir_exp, acksize_all)
            co.save_data(pcap_filepath, rtt_dir_exp, rtt_all)
            co.save_data(pcap_filepath, stat_dir_exp, connections)