Ejemplo n.º 1
0
def local_first(_list=[], n=10, m=3):

    import utility.Util as u

    # Calculating initial SPT
    import algorithms.SPT as SPT
    matrix = SPT.SPT(_list, n, m)

    # Reversing each row of matrix
    for i in range(m):
        matrix[i].reverse()

    # Length of the shortest matrix row(last is always shortest for SPT)
    _length = len(matrix[-1])

    # Operating with ordering levels(ukr: rivni vporiadkovanosti)
    # Swap each min-max pair on current ordering level, while there still unswapped elements
    for i in range(_length):
        start_iterator = 0
        end_iterator = m - 1

        while start_iterator < end_iterator:
            if u.radial_swap_calculate(matrix, start_iterator, i, end_iterator,
                                       i)[1] < 0:
                u.matrix_swap(matrix, start_iterator, i, end_iterator, i)
            start_iterator += 1
            end_iterator -= 1

    # Reverse matrix rows to their previous order
    for i in range(m):
        matrix[i].reverse()

    return matrix
 def load_data(self, fname):
    obj = Util()
    lnum = obj.file_len(fname)
    with open(fname, 'r') as f:
    	#output = open(fname, 'r')
   	 for i in range(0, lnum+1):  
          data = cPickle.load(f)
          print data 
    f.close()
Ejemplo n.º 3
0
def local_second(_list=[], n=10, m=3):

    import utility.Util as u
    import algorithms.SPT as SPT

    # initial SPT
    matrix = SPT.SPT(_list, n, m)

    _length = len(matrix[-1])

    # n times try to swap values and recalculate TF. Accept the swaps with best possible TF value
    for _ in range(n):
        for i in range(_length):
            u.try_all_in_radius(matrix, i, m)

    return matrix
    def get_similarity_relevance_dict(self, trace_links):
        """
        Returns a dict with the shape: req_dict["req_name"] = [(sim_to_code_1: float, relevant: bool), (sim_to_code_2, relevant), ...]
        This is used for the average precision calculation
        """
        req_dict = {}
        sol_matrix_copy = Util.deep_copy(
            self._solution_trace_matrix
        )  # Use copy to track false negatives and avoid duplicate trace links
        for trace_link in trace_links:
            req_name = trace_link.req_key
            code_name = trace_link.code_key
            sim_rel_tuple_to_add = (trace_link.similarity, False)
            if sol_matrix_copy.contains_req_code_pair(req_name, code_name):
                sim_rel_tuple_to_add = (trace_link.similarity, True)
                sol_matrix_copy.remove_trace_pair(req_name, code_name)
            if req_name in req_dict:
                req_dict[req_name].append(sim_rel_tuple_to_add)
            else:
                req_dict[req_name] = [sim_rel_tuple_to_add]

        if self._print_false_negatives:
            self._print_false_negatives(sol_matrix_copy)

        return req_dict
Ejemplo n.º 5
0
def getPackagename(filename: str) -> str:
    retval = ""
    with open(filename) as f:
        in_comment = False
        for line in f.readlines():
            if retval: break
            tokenized = Util.splitToken(line)
            if tokenized:
                for i, token in enumerate(tokenized):
                    if token == "//" and not in_comment: break
                    if token == "/*" and not in_comment:
                        in_comment = True
                        continue
                    elif token == "*/" and in_comment:
                        in_comment = False
                        continue
                    if token == "package" and not in_comment:
                        try:
                            buf = ""
                            token_ = tokenized[i + 1]
                            j = i + 1
                            while token_ != ";":
                                buf += token_
                                j += 1
                                token_ = tokenized[j]
                            retval = buf
                            break
                        except IndexError as e:
                            print("Error line: " + line.rstrip(os.linesep))
    return retval
    def get_true_positives(self, trace_link_candidates):
        if not trace_link_candidates:
            log.debug("No Trace Link candidates!")
            return []
        valid_trace_links = []
        sol_matrix_copy = Util.deep_copy(self._solution_trace_matrix)
        false_positives_matrix = SolutionMatrix()
        for trace_link in trace_link_candidates:
            if sol_matrix_copy.contains_req_code_pair(trace_link.req_key,
                                                      trace_link.code_key):
                # Remove correct trace links on copy to avoid duplicate true positive count
                sol_matrix_copy.remove_trace_pair(trace_link.req_key,
                                                  trace_link.code_key)
                valid_trace_links.append(trace_link)
            elif self._print_false_positives:
                false_positives_matrix.add_trace_pair(trace_link.req_key,
                                                      trace_link.code_key)
        if self._print_false_negatives:
            self._print_false_negatives(sol_matrix_copy)
        if self._print_false_positives:
            log.info(
                "\n\nFalse Positives: {} Links, {} unique Reqs, {} unique Code"
                .format(false_positives_matrix._number_of_trace_links,
                        false_positives_matrix.num_unique_reqs(),
                        false_positives_matrix.num_unique_code()))
            log.info("\n" + false_positives_matrix.print_str())

        return valid_trace_links
 def __init__(self,
              return_type: IdentifierString,
              name: IdentifierString,
              comment: IdentifierString,
              body: IdentifierString,
              left_side_identifiers: IdentifierString,
              parameters: [Parameter] = [],
              line=None):
     self.return_type = return_type
     self.name = name
     self.original_name = Util.deep_copy(name)
     self.parameters = parameters
     self.original_parameters = Util.deep_copy(parameters)
     self.comment = comment
     self.body = body
     self.left_side_identifiers = left_side_identifiers
     self.line = line  # The line number in the code file where the method signature is written
     self.token_list = self._create_token_list()
    def _create_embeddings(self, file_representation):
        chosen_word_groups = self._requirements_word_chooser.choose_words_from(
            file_representation)
        chosen_word_groups_embeddings = []
        requirement_element_vectors = []
        for word_group in chosen_word_groups:
            word_embeddings = self._create_word_embeddings_from_word_list(
                word_group)
            chosen_word_groups_embeddings.append(word_embeddings)
            requirement_element_vectors.append(
                Util.create_averaged_vector(word_embeddings))

        file_vector = Util.create_averaged_vector(
            [word for word_group in chosen_word_groups for word in word_group]
        )  # flat average over all (nested) word embeddings in chosen_word_groups

        return RequirementEmbeddingContainer(file_representation.file_path,
                                             file_vector,
                                             requirement_element_vectors)
    def process(self, majority_drop_thresh) -> [TraceLink]:

        # Step 1: Calculate code element to (whole) req trace links
        trace_link_data_structure = self._element_level_trace_link_aggregator.process(
            Util.deep_copy(self._trace_link_data_structure))

        # Step 2: (optional) Update code element to (whole) req trace links according to call graph neighbors
        if self._callgraph_aggregator:
            trace_link_data_structure = self._callgraph_aggregator.process(
                trace_link_data_structure)
        # Step 3: Do majority decision to obtain (whole) code file to (whole) req similarities

        return self._majority_decision.process(trace_link_data_structure,
                                               majority_drop_thresh)
    def _do_majority_decision(self, code_file_name, votes, sims_per_req):
        voted_trace_links = []
        if votes:
            majority_ranked_dict, max_vote_count = Util.majority_count(votes)
            for req_file_name in majority_ranked_dict:
                if majority_ranked_dict[req_file_name] == max_vote_count:
                    code_file_to_req_file_similarity = self._code_reduce_function(
                        sims_per_req[req_file_name])
                    voted_trace_links.append(
                        TraceLink(req_file_name, code_file_name,
                                  code_file_to_req_file_similarity))

        else:
            log.debug(f"No votes for {code_file_name}")
        return voted_trace_links
 def __init__(self,
              name: IdentifierString,
              comment: IdentifierString,
              attributes: [Attribute] = [],
              methods: [Method] = [],
              inner_classifiers=[],
              extended_classifiers=[],
              implemented_classifiers=[],
              line=None):
     self.name = name
     self.original_name = Util.deep_copy(name)
     self.extended_classifiers = extended_classifiers
     self.implemented_classifiers = implemented_classifiers
     self.attributes = attributes
     self.methods = methods
     self.comment = comment
     self.inner_classifiers = inner_classifiers
     self.line = line
     self.token_list = self._create_token_list()
Ejemplo n.º 12
0
def getClassname(filename: str) -> list:
    retval = []
    with open(filename) as f:
        in_comment = False
        for line in f.readlines():
            tokenized = Util.splitToken(line)
            if tokenized:
                if tokenized[0] == "//": continue
                for i, token in enumerate(tokenized):
                    if token == "//" and not in_comment: break
                    if token == "/*" and not in_comment:
                        in_comment = True
                        continue
                    elif token == "*/" and in_comment:
                        in_comment = False
                        continue
                    if token == "class" and not in_comment:
                        try:
                            if not tokenized[i + 1] in Util.symbol:
                                retval.append(tokenized[i + 1])
                        except IndexError as e:
                            print("Error line: " + line.rstrip(os.linesep))
    return retval
Ejemplo n.º 13
0
 def _embedd_and_average(self, word_list):
     word_embd = self._create_word_embeddings_from_word_list(word_list)
     return [Util.create_averaged_vector(word_embd)] if word_embd else []  # Return as (empty) lists to avoid is-None-check later on
 def get_copy_of_similarity_matrix(self):
     return Util.deep_copy(self._similarity_matrix)
Ejemplo n.º 15
0
 def _addTestNumber(self, ESTest_path):
     print("_addTestNumber start")
     try:
         source_lines = self._fileCopyAndMakeLineList(
             ESTest_path,
             path.join(self.getThisProjectPath(), "temp", "temp2.java"))
         # source_lines = self._removeComment(source_lines)
         # print(source_lines)
         with open(ESTest_path, mode='w') as f:
             func_dive = 0
             in_func = False
             objectname = []
             imported = False
             for line in source_lines:
                 tokenized_line = Util.splitToken(line)
                 if len(tokenized_line) >= 3:
                     if tokenized_line[0] == "public" and tokenized_line[
                             1] == "void" and "test" in tokenized_line[2]:
                         test_number = tokenized_line[2].replace("test", "")
                         print(line, file=f)
                         print("System.out.println(\"ESTest_test[" +
                               test_number + "]\");",
                               file=f)
                         func_dive = 1
                         in_func = True
                         continue
                 if in_func:
                     func_dive += tokenized_line.count('{')
                     func_dive -= tokenized_line.count('}')
                     if len(tokenized_line) >= 2:
                         if getClassname(
                                 self.javasource_path
                         )[0] == tokenized_line[0] and Util.isIdentifier(
                                 tokenized_line[1]):
                             objectname.append(tokenized_line[1])
                     if func_dive == 0:
                         in_func = False
                         for o in objectname:
                             print(
                                 "System.out.println(\"FilallyObjectAttributes_start["
                                 + o + "]\");",
                                 file=f)
                             print(
                                 "try{System.out.println(new XStream().toXML("
                                 + o +
                                 "));}catch(Exception e){e.printStackTrace();}",
                                 file=f)
                             print(
                                 "System.out.println(\"FilallyObjectAttributes_end["
                                 + o + "]\");",
                                 file=f)
                         objectname = []
                         print(line, file=f)
                         continue
                 if not imported and "import" in tokenized_line:
                     print("import com.thoughtworks.xstream.XStream;",
                           file=f)
                     imported = True
                 print(line.replace("mockJVMNonDeterminism = true",
                                    "mockJVMNonDeterminism = false"),
                       file=f)
     except:
         raise
Ejemplo n.º 16
0
import tensorflow as tf
import numpy as np
import sys
sys.path.append("..")
import utility.Util as Util

credit_data = Util.open_file("../data/credit_data.csv")

# create lists of types of features
numerical = ["Duration", 'InstallmentRatePecnt', 'PresentResidenceTime', 'Age']
target = ['CreditStatus']

credit_data = credit_data.sample(frac=1).reset_index(drop=True)
train_x, train_y = Util.pre_process_data(credit_data, numerical, target)

# dividing the dataset into training and test sets
x_train, y_train, x_test, y_test = Util.split_data(0.8, train_x, train_y)

n_hidden_1 = 8
n_input = train_x.shape[1]
n_classes = train_y.shape[1]

weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}
import pandas as pd
import tensorflow as tf
import numpy as np
import sys
sys.path.append("..")
import utility.Util as Util

credit_data = Util.open_file("../data/credit_data.csv")

# create lists of types of features
numerical = ["Duration", 'InstallmentRatePecnt', 'PresentResidenceTime', 'Age']
# categorical = ["CheckingAcctStat", "CreditHistory", "Purpose", 'Savings', 'Employment', 'Property', 'Telephone']
target = ['CreditStatus']

positive_class, negative_class = Util.decompose_classes(
    credit_data, 'CreditStatus')

# get numerical, categorical and labels for each class
positive_numerical, positive_target = Util.pre_process_data(
    positive_class, numerical, target)
negative_numerical, negative_target = Util.pre_process_data(
    negative_class, numerical, target)

# cluster data and get cluster labels
positive_cluster = Util.cluster_data(
    positive_numerical)  # .join(positive_categorical)
negative_cluster = Util.cluster_data(negative_numerical)
negative_cluster = np.array([x + 3 for x in negative_cluster])

# give the new cluster label column a name
positive_cluster_labels = pd.DataFrame(positive_cluster,
 def set_params(self, param_list):
     self.parameters = param_list
     self.original_parameters = Util.deep_copy(param_list)