コード例 #1
def add_project_to_corpus(project):
  """ Assumes that the project_dir contains a
  text file named build_command.txt that contains the build command(s) for the
  project in this directory, and a clean_command.txt that will clean the project.

  """Run dljc
  Run Randoop to generate test sources
  Compile test sources
  Run daikon.Chicory on tests to create dtrace file
  Precompute graph kernels that are independent of ontology stuff
                  ['dyntrace', 'graphtool'],
                  ['--graph-jar', common.get_jar('prog2dfg.jar'),
                   '--dyntrace-libs', common.LIBS_DIR])

  """ run petablox """

  """ run graph kernel computation """
  project_dir = common.get_project_dir(project)
  kernel_file_path = common.get_kernel_path(project)
  graph_kernel_cmd = ['python',
  print 'Generated kernel file for {0}.'.format(project)
  return kernel_file_path
コード例 #2
def compute_clusters_for_classes(project_list, out_file_name, cf_map_file_name="./class_field_map.json", wf_map_file_name="./word_based_field_clusters.json"):
  class_dirs = list()
  for project in project_list:
    print common.get_class_dirs(project)
  if len(class_dirs)<1:
    print("No class dirs found to cluster. Make sure you run dljc first.")


  clusterer_cmd = ['java', '-jar', common.get_jar('clusterer.jar'),
                   '-cs', '3',
                   '-out', out_file_name,
                   '-cfm', cf_map_file_name,
                   '-wfm', wf_map_file_name,

  common.run_cmd(clusterer_cmd, True) 

  # Check if the file exists and is not empty.
  if os.path.exists(wf_map_file_name) and os.path.getsize(wf_map_file_name) > 0:
    print ("Generate jaif file")
    map2annotation.field_mappings_to_annotation(project_list, wf_map_file_name)
    for project in project_list:
    print("Warning: Missing or empty {0} file.".format(wf_map_file_name))
    print("Warning: map2annotation won't be executed.")
コード例 #3
def get_daikon_patterns():
    ordering_operator = "<="

    ontology_invariant_file = "TODO_from_Howie.txt"
    with open(ontology_invariant_file, 'w') as f:

    invariant_name = "TODO_sorted_sequence"

    daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant(
        ontology_invariant_file, invariant_name)

    pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass")
    if os.path.isdir(pattern_class_dir):

    cmd = [
        "javac", "-g", "-classpath",
        common.get_jar('daikon.jar'), daikon_pattern_java_file, "-d",

    return pattern_class_dir
コード例 #4
def add_project_to_corpus(project):
    """ Assumes that the project_dir contains a
  text file named build_command.txt that contains the build command(s) for the
  project in this directory, and a clean_command.txt that will clean the project.
    """Run dljc
  Run Randoop to generate test sources
  Compile test sources
  Run daikon.Chicory on tests to create dtrace file
  Precompute graph kernels that are independent of ontology stuff
    common.run_dljc(project, ['dyntrace', 'graphtool'], [
        common.get_jar('prog2dfg.jar'), '--dyntrace-libs', common.LIBS_DIR
    """ run petablox """
    """ run graph kernel computation """
    project_dir = common.get_project_dir(project)
    kernel_file_path = common.get_kernel_path(project)
    graph_kernel_cmd = [
        common.get_simprog('precompute_kernel.py'), project_dir,
    print 'Generated kernel file for {0}.'.format(project)
    return kernel_file_path
コード例 #5
def run_petablox(project):
  with common.cd(common.get_project_dir(project)):
    petablox_cmd = ['java',
                    '-cp', common.get_jar('petablox.jar'),
コード例 #6
ファイル: backend.py プロジェクト: mernst/integration-test2
def generate_graphs(project):
    """Run dljc
  Generate program graphs using prog2dfg
  Precompute graph kernels that are independent of ontology stuff
    common.run_dljc(project, ['graphtool'],
                     common.get_jar('prog2dfg.jar'), '--cache'])
コード例 #7
def run_petablox(project):
    with common.cd(common.get_project_dir(project)):
        petablox_cmd = [
            'java', '-cp',
            common.get_jar('petablox.jar'), '-Dpetablox.reflect.kind=none',
            '-Dpetablox.run.analyses=cipa-0cfa-dlog', 'petablox.project.Boot'
コード例 #8
def generate_graphs(project):
  """Run dljc
  Compile test sources
  Generate program graphs using prog2dfg
  Precompute graph kernels that are independent of ontology stuff
  print("Generating graphs for {0}...".format(project))
                  ['--graph-jar', common.get_jar('prog2dfg.jar'),
コード例 #9
def main(corpus, annotations, limit=3):
    """ SUMMARY: use case of the user-driven functionality of PASCALI.
  Scenario: User provides the concept of Sequence and the equivalent Java
  types, and the concept of sorted sequence and the relevant type invariant.
  Goal: learn how to get from Sequence -> Sorted Sequence.
  INPUT: annotations, dictionary mapping string -> list of strings
  OUTPUT: recompiles generic-inference-solver with new annotations"""

    """ Look for new mapping from 'ontology concepts'->'java type' and run
  checker framework. Should be implemented in type_inference
  Mapping example:
    Sequence -> java.lang.Array, java.util.List, LinkedHashSet, etc.

  INPUT: corpus, file containing set of concept->java_type mapping
  OUTPUT: Set of jaif files that are merged into the classes. The jaif files are
          stored as default.jaif in each project's directory.
  BODY: This also triggers back-end labeled graph generation.

    for project in corpus:
    """ Missing step: interact with PA to add a definition of Sorted Sequence
  which is a specialization of Sequence that has a sortedness invariants.
  The sortedness invariant gets turned into a Daikon template
  INPUT: user interaction
  OUTPUT: type_annotation and type_invariant (for sorted sequence)


    ordering_operator = "<="

    ontology_invariant_file = "TODO_from_Howie.txt"
    with open(ontology_invariant_file, 'w') as f:

    invariant_name = "TODO_sorted_sequence"

    daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant(
        ontology_invariant_file, invariant_name)
    """ Find all methods that have one input parameter annotated as Sequence and return a variable also
  annotated as Sequence.
  INPUT: The corpus and the desired annotations on the method signature
  OUTPUT: List of methods that have the desired signature.
  NOTE: This is a stub and will be implemented as LB query in the future.
    sig_methods = find_methods_with_signature(corpus,
    print("\n   ************")
        "The following corpus methods have the signature Sequence->Sequence {}:"
    for (project, package, clazz, method) in sig_methods:
        print("{}:\t{}.{}.{}".format(project, package, clazz, method))
    print("\n   ************")
    """ Search for methods that have a return type annotated with Sequence
  and for which we can establish a sortedness invariant (may done by LB).

  INPUT: dtrace file of project
         daikon_pattern_java_file that we want to check on the dtrace file.

  OUTPUT: list of ppt names that establish the invariant. Here a ppt
  is a Daikon program point, s.a. test01.TestClass01.sort(int[]):::EXIT

  Note: this step translate the type_invariant into a Daikon
  template (which is a Java file).

    pattern_class_name = invariant_name
    pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass")
    if os.path.isdir(pattern_class_dir):

    cmd = [
        "javac", "-g", "-classpath",
        common.get_jar('daikon.jar'), daikon_pattern_java_file, "-d",

    list_of_methods = []
    for project in corpus:
        dtrace_file = backend.get_dtrace_file_for_project(project)
        if not dtrace_file:
            print("Ignoring folder {} because it does not contain dtrace file".
        ppt_names = inv_check.find_ppts_that_establish_inv(
            dtrace_file, pattern_class_dir, pattern_class_name)
        methods = set()
        for ppt in ppt_names:
            method_name = ppt[:ppt.find(':::EXIT')]
        list_of_methods += [(project, methods)]

    print("\n   ************")
        "The following corpus methods return a sequence sorted by {}:".format(
    for project, methods in list_of_methods:
        if len(methods) > 0:
            for m in methods:
    print("\n   ************")

    """ Expansion of dynamic analysis results ....
  Find a list of similar methods that are similar to the ones found above (list_of_methods).
  INPUT: list_of_methods, corpus with labeled graphs generated, threshold value for similarity,
  OUTPUT: superset_list_of_methods

        "Expanding the dynamic analysis results using graph-based similarity:")
    union_set = set()
    for project, methods in list_of_methods:
        # map Daikon output on sort method to method signature in methods.txt in generated graphs
        for m in methods:
            method_name = common.get_method_from_daikon_out(m)
            #kernel_file = common.get_kernel_path(project)
            method_file = common.get_method_path(project)
            dot_name = common.find_dot_name(method_name, method_file)
            if dot_name:
                # find the right dot file for each method
                dot_file = common.get_dot_path(project, dot_name)
                # find all graphs that are similar to it using WL based on some threshold
                sys.path.append(os.path.join(common.WORKING_DIR, 'simprog'))
                from similarity import Similarity
                sim = Similarity()
                    os.path.join(common.WORKING_DIR, "corpus_kernel.txt"))
                top_k = 3
                iter_num = 3
                result_program_list_with_score = sim.find_top_k_similar_graphs(
                    dot_file, 'g', top_k, iter_num)
                print(project + ":")
                result_set = set(
                    [x[0] for x in result_program_list_with_score])
                # take the union of all these graphs
                union_set = union_set | result_set
    print("Expanded set:")
    print([x.split('/')[-4] for x in union_set])

    # return this set as a list of (project, method)
    fo = open("methods.txt", "w")
    expanded_list = []
    for dot_path in union_set:
        method_summary = common.get_method_summary_from_dot_path(dot_path)
    """ Update the type annotations for the expanded dynamic analysis results.
  INPUT: superset_list_of_methods, annotation to be added
  OUTPUT: nothing
  EFFECT: updates the type annotations of the methods in superset_list_of_methods.
  This requires some additional checks to make sure that the methods actually
  perform some kind of sorting. Note that we do it on the superset because the original
  list_of_methods might miss many implementations because fuzz testing could not
  reach them.
    for class_file in []:  # MARTIN
        generated_jaif_file = "TODO"
        insert_jaif.merge_jaif_into_class(class_file, generated_jaif_file)
    """ Ordering of expanded dynamic analysis results ....
  Find the k 'best' implementations in superset of list_of_methods
  INPUT: superset_list_of_methods, corpus, k
  OUTPUT: k_list_of_methods
  Note: similarity score is used. may consider using other scores; e.g., TODO:???

    #TODO: create input file for huascar where each line is formatted like:
    # ../corpus/Sort05/src/Sort05.java::sort(int[]):int[]

    ordering_dir = os.path.join(common.WORKING_DIR, "ordering_results/")

    methods_file = os.path.join(common.WORKING_DIR, 'methods.txt')
    with common.cd(ordering_dir):
        #TODO generate a proper relevant methods file.
        cmd = [
            "./run.sh", "-k", "{}".format(limit), "-t", "typicality", "-f",
        common.run_cmd(cmd, print_output=True)
コード例 #10
ファイル: inv_check.py プロジェクト: mernst/integration-test2
import sys, os
import subprocess
import traceback
import urllib
import zipfile
import ontology_to_daikon
import common

daikon_jar = common.get_jar("daikon.jar")
DAIKON_SPLITTER = "====================="

def run_daikon_on_dtrace_file(dtrace_file,
    cmd = ["java", "-classpath", classpath, "daikon.DaikonSimple", dtrace_file]
    if checked_invariant:
        cmd += [
            "--disable-all-invariants", "--user-defined-invariant",
        cmd += ["--config_option", "daikon.Daikon.undo_opts=true"]
    return common.run_cmd(cmd, print_output=True)['output']

def find_ppts_that_establish_inv_in_daikon_output(daikon_output,
    ppts_with_inv = []
    start_of_new_block = False
    current_method = None
    lines = daikon_output.splitlines(True)
コード例 #11
def main(corpus, annotations):
  """ SUMMARY: use case of the user-driven functionality of PASCALI.
  Scenario: User provides the concept of Sequence and the equivalent Java
  types, and the concept of sorted sequence and the relevant type invariant.
  Goal: learn how to get from Sequence -> Sorted Sequence.

  INPUT: annotations, dictionary mapping string -> list of strings
  OUTPUT: recompiles generic-inference-solver with new annotations"""


  """ Look for new mapping from 'ontology concepts'->'java type' and run
  checker framework. Should be implemented in type_inference
  Mapping example:
    Sequence -> java.lang.Array, java.util.List, LinkedHashSet, etc.

  INPUT: corpus, file containing set of concept->java_type mapping
  OUTPUT: Set of jaif files that are merged into the classes. The jaif files are
          stored as default.jaif in each project's directory.
  BODY: This also triggers back-end labeled graph generation.

  for project in corpus:

  """ Missing step: interact with PA to add a definition of Sorted Sequence
  which is a specialization of Sequence that has a sortedness invariants.
  The sortedness invariant gets turned into a Daikon template
  INPUT: user interaction
  OUTPUT: type_annotation and type_invariant (for sorted sequence)


  ordering_operator = "<="

  ontology_invariant_file = "TODO_from_Howie.txt"
  with open(ontology_invariant_file, 'w') as f:

  invariant_name = "TODO_sorted_sequence"

  daikon_pattern_java_file = ontology_to_daikon.create_daikon_invariant(ontology_invariant_file, invariant_name)

  """ Find all methods that have one input parameter annotated as Sequence and return a variable also
  annotated as Sequence.
  INPUT: The corpus and the desired annotations on the method signature
  OUTPUT: List of methods that have the desired signature.
  NOTE: This is a stub and will be implemented as LB query in the future.
  sig_methods = find_methods_with_signature(corpus, "@ontology.qual.Sequence", ["@ontology.qual.Sequence"])
  print ("\n   ************")
  print ("The following corpus methods have the signature Sequence->Sequence {}:")
  for (project, package, clazz, method) in sig_methods:
    print("{}:\t{}.{}.{}".format(project, package, clazz, method))
  print ("\n   ************")

  """ Search for methods that have a return type annotated with Sequence
  and for which we can establish a sortedness invariant (may done by LB).

  INPUT: dtrace file of project
         daikon_pattern_java_file that we want to check on the dtrace file.

  OUTPUT: list of ppt names that establish the invariant. Here a ppt
  is a Daikon program point, s.a. test01.TestClass01.sort(int[]):::EXIT

  Note: this step translate the type_invariant into a Daikon
  template (which is a Java file).

  pattern_class_name = invariant_name
  pattern_class_dir = os.path.join(common.WORKING_DIR, "invClass")
  if os.path.isdir(pattern_class_dir):

  cmd = ["javac", "-g", "-classpath", common.get_jar('daikon.jar'),
         daikon_pattern_java_file, "-d", pattern_class_dir]

  list_of_methods = []
  for project in corpus:
    dtrace_file = backend.get_dtrace_file_for_project(project)
    if not dtrace_file:
      print ("Ignoring folder {} because it does not contain dtrace file".format(project))
    ppt_names = inv_check.find_ppts_that_establish_inv(dtrace_file, pattern_class_dir, pattern_class_name)
    methods = set()
    for ppt in ppt_names:
      method_name = ppt[:ppt.find(':::EXIT')]
    list_of_methods +=[(project, methods)]

  print ("\n   ************")
  print ("The following corpus methods return a sequence sorted by {}:".format(ordering_operator))
  for project, methods in list_of_methods:
    if len(methods)>0:
      print (project)
      for m in methods:
  print ("\n   ************")


  """ Expansion of dynamic analysis results ....
  Find a list of similar methods that are similar to the ones found above (list_of_methods).
  INPUT: list_of_methods, corpus with labeled graphs generated, threshold value for similarity,
  OUTPUT: superset_list_of_methods

  print("Expanding the dynamic analysis results using graph-based similarity:")
  union_set = set()
  for project, methods in list_of_methods:
    # map Daikon output on sort method to method signature in methods.txt in generated graphs
    for m in methods:
      method_name = common.get_method_from_daikon_out(m)
      #kernel_file = common.get_kernel_path(project)
      method_file = common.get_method_path(project)
      dot_name = common.find_dot_name(method_name, method_file)
      if dot_name:
        # find the right dot file for each method
        dot_file = common.get_dot_path(project, dot_name)
        # find all graphs that are similar to it using WL based on some threshold
        sys.path.insert(0, 'simprog')
        from similarity import Similarity
        sim = Similarity()
        top_k = 3
        iter_num = 3
        result_program_list_with_score = sim.find_top_k_similar_graphs(dot_file, 'g', top_k, iter_num)
        result_set = set([x[0] for x in result_program_list_with_score])
        # take the union of all these graphs
        union_set = union_set | result_set
  print("Expanded set:")
  print([x.split('/')[-4] for x in union_set])

  # return this set as a list of (project, method)
  fo = open("methods.txt", "w")
  expanded_list = []
  for dot_path in union_set:
    method_summary = common.get_method_summary_from_dot_path(dot_path)

  """ Update the type annotations for the expanded dynamic analysis results.
  INPUT: superset_list_of_methods, annotation to be added
  OUTPUT: nothing
  EFFECT: updates the type annotations of the methods in superset_list_of_methods.
  This requires some additional checks to make sure that the methods actually
  perform some kind of sorting. Note that we do it on the superset because the original
  list_of_methods might miss many implementations because fuzz testing could not
  reach them.
  for class_file in []: # MARTIN
    generated_jaif_file = "TODO"
    insert_jaif.merge_jaif_into_class(class_file, generated_jaif_file)

  """ Ordering of expanded dynamic analysis results ....
  Find the k 'best' implementations in superset of list_of_methods
  INPUT: superset_list_of_methods, corpus, k
  OUTPUT: k_list_of_methods
  Note: similarity score is used. may consider using other scores; e.g., TODO:???

  #TODO: create input file for huascar where each line is formatted like:
  # ../corpus/Sort05/src/Sort05.java::sort(int[]):int[]

  ordering_dir = os.path.join(common.WORKING_DIR, "ordering_results/")

  methods_file = os.path.join(common.WORKING_DIR, 'methods.txt')
  with common.cd(ordering_dir):
    #TODO generate a proper relevant methods file.
    cmd = ["./run.sh",
           "-k", "3",
           "-t", "typicality",
           "-f", methods_file]
    common.run_cmd(cmd, print_output=True)

  Close the loop and add the best implementation found in the previous
  step back to the ontology.
  INPUT: k_list_of_methods
  OUTPUT: patch file for the ontology. Worst case: just add the 'best' implementation
  found in the corpus as a blob to the ontology. Best case: generate an equivalent
  flow-graph in the ontology.
  print "TODO" # ALL