Esempio n. 1
0
def load_files(file1, file2):
    """
    Loads the results from two tests, and generate an HTML page with the differences.
    """

    f = open("results/" + file1, "r")
    results = json.loads(f.read())
    f.close()

    f = open("results/" + file2, "r")
    previous_results = json.loads(f.read())
    f.close()

    compare(results, previous_results)
Esempio n. 2
0
def load_files(file1, file2):
    """
    Loads the results from two tests, and generate an HTML page with the differences.
    """

    f = open("results/"+file1, "r")
    results = json.loads(f.read())
    f.close()

    f = open("results/"+file2, "r")
    previous_results = json.loads(f.read())
    f.close()

    compare(results, previous_results)
Esempio n. 3
0
    def bestCode(self, Count, response, PrecedentTry):
        self.reponse = response
        self.TryNumber = Count
        self.PrecedentTry = PrecedentTry

        h = 1
        eligible = []
        #On fait se développer la population tant que l'on a pas assez d'evenement éligible
        while (h <= self.MaxGen and len(eligible) <= self.MaxSize):
            self.DevGenetic()
            for i in range(self.TaillePopu):
                difBP = 0
                difMP = 0
                for j in range(len(self.PrecedentTry)):
                    BP, MP = util.compare(self.PrecedentTry[j], self.popu[i])
                    difBP += self.B * abs(BP - self.reponse[j][0])
                    difMP += abs(MP - self.reponse[j][1])

                if difBP == 0 and difMP == 0:
                    exists = False
                    for elements in eligible:
                        if elements == self.popu[i]:
                            exists = True
                    if exists == False:
                        eligible.append(self.popu[i])
            h += 1
        if len(eligible) == 0:
            bestfit = 10000
            bestcode = util.gen(self.positions, self.CouleurPossible)
            for i in range(self.TaillePopu):
                if self.fitness(self.popu[i]) > bestfit:
                    bestfit = self.fitness(self.popu[i])
                    bestcode = self.popu[i]
            return bestcode

        bestguess = eligible[0]
        mostSimilarity = 0
        similarity = 0
        # On choisit celui le plus semblable au code de base
        for elements in eligible:
            for elements2 in eligible:
                if (elements != elements2):
                    BP, MP = util.compare(elements2, elements)
                    similarity += BP + MP
                    if (similarity >= mostSimilarity):
                        mostSimilarity = similarity
                        bestguess = elements
        return bestguess
Esempio n. 4
0
	def calc_plot_error(self):
		perc_errors = {"gamma_func": [], "params": []}
		abs_errors = {"gamma_func": [], "params": []}
		for art in self.articles:
			try:
				perc_error,abs_error = compare(art.views,art.est_params)
				perc_errors["params"].append(perc_error)
				abs_errors["params"].append(abs_error)
				if self.pattern.has_gamma:
					perc_error,abs_error = compare(art.views,art.est_gamma_func)
					perc_errors["gamma_func"].append(perc_error)
					abs_errors["gamma_func"].append(abs_error)
			except IndexError:
				self.catch_error(art,"calc_error")

		plt.plot_errors(perc_errors,abs_errors,self.plotfolder,self.pattern)
		self.errors = {"normalized": perc_errors, "absolute": abs_errors}
Esempio n. 5
0
def save_compare_results():
    """
    Save the current results, and if previous results are available, generate an HTML page with the differences
    """
    global results, str_date

    commit = out = subprocess.Popen(
        ['git', 'log', '-n 1', '--pretty=format:"%H"'],
        stdout=subprocess.PIPE).communicate()[0]
    results["hash"] = commit

    # Save results
    if not os.path.exists("results"):
        os.makedirs("results")

    str_date = time.strftime("%y.%m.%d-%H:%M:%S")
    f = open("results/result_" + str_date + ".txt", "w")

    str_res = json.dumps(results, indent=2)
    f.write(str_res)
    f.close()

    # Read all the previous results stored in results/
    file_paths = []
    for root, directories, files in os.walk("results/"):
        for filename in files:
            # Join the two strings in order to form the full filepath.
            filepath = os.path.join(root, filename)
            file_paths.append(filepath)  # Add it to the list.

    # Sort files, we just need the last one (the most recent one)
    file_paths.sort()
    if len(file_paths) > 1:
        last_file = file_paths[-2]  #The last file is the one we just saved

        f = open(last_file, "r")
        previous_results = json.loads(f.read())
        f.close()
    else:
        previous_results = {}

    compare(results, previous_results)
Esempio n. 6
0
def save_compare_results():
    """
    Save the current results, and if previous results are available, generate an HTML page with the differences
    """
    global results, str_date

    commit = out = subprocess.Popen(['git', 'log', '-n 1', '--pretty=format:"%H"'], stdout=subprocess.PIPE).communicate()[0]
    results["hash"] = commit

    # Save results
    if not os.path.exists("results"):
        os.makedirs("results")

    str_date = time.strftime("%y.%m.%d-%H:%M:%S")
    f = open("results/result_"+str_date+".txt", "w")

    str_res = json.dumps(results, indent=2)
    f.write(str_res)
    f.close()

    # Read all the previous results stored in results/
    file_paths = []
    for root, directories, files in os.walk("results/"):
        for filename in files:
            # Join the two strings in order to form the full filepath.
            filepath = os.path.join(root, filename) 
            file_paths.append(filepath)  # Add it to the list.

    # Sort files, we just need the last one (the most recent one)
    file_paths.sort()
    if len(file_paths) > 1:
        last_file = file_paths[-2] #The last file is the one we just saved

        f = open(last_file, "r")
        previous_results = json.loads(f.read())
        f.close()
    else:
        previous_results = {}

    compare(results, previous_results)
Esempio n. 7
0
def defects_data_ccn(stream_info, error_lines, blame_lines):
    defects = []
    try:
        for idx, line in enumerate(error_lines):
            ccn_cc = {}
            line_array = line.strip().split("->")
            if len(line_array) == 7:
                ccn_cc["function_name"] = line_array[1]
                ccn_cc["long_name"] = line_array[2]
                ccn_cc["total_lines"] = line_array[4]
                ccn_cc["ccn"] = line_array[5]
                ccn_cc["condition_lines"] = line_array[6].replace(
                    ' ', '').replace('{', '').replace('}', '')
                function_lines = line_array[3].split('-')
                ccn_cc["start_lines"] = function_lines[0]
                ccn_cc["end_lines"] = function_lines[1]
                author_info = ""
                for i in range(int(function_lines[0]), int(function_lines[1])):
                    line_blame_data = blame_lines[i].split('->')
                    if author_info != "":
                        info_array = author_info.split('->')
                        if util.compare(
                                info_array[1].strip().split('.')[0],
                                line_blame_data[2].strip().split('.')[0]):
                            author_info = line_blame_data[
                                1] + "->" + line_blame_data[2].strip()
                    else:
                        author_info = line_blame_data[
                            1] + "->" + line_blame_data[2].strip()
                ccn_cc["author"] = author_info.split('->')[0]
                temp_datetime = author_info.split('->')[1]
                st = time.strptime(temp_datetime, '%Y-%m-%dT%H:%M:%S.%f')
                final_datetime = time.mktime(st)
                ccn_cc["latest_datetime"] = int(round(final_datetime * 1000))
                defects.append(ccn_cc)
    except Exception as e:
        raise Exception(e)

    return defects
Esempio n. 8
0
NombreCouleur = int(sys.argv[3])
positions = int(sys.argv[2])
Couleur = Couleur[:NombreCouleur]
code = gen()
#print("code to find:" + str(code))
if sys.argv[1]  == 'GA' :
    ponderation = 2
    depart = 0
    pourcentage = 2
    TailleEligible = 15*positions
    TaillePopu = 35*positions
    NombreGen = 40*positions
    GA = UseGA.UseGA(Couleur, positions, ponderation, depart, pourcentage, TailleEligible, TaillePopu, NombreGen)
    while BP != positions :
        #print ("Proposition n° : "+ str(GA.compte) + " -> " + str(GA.nextMove()))
        BP,MP = util.compare(GA.actual_prop, code)
        GA.reponse(MP,BP)
        i += 1

if sys.argv[1]  == 'PSO' :
    NbreCluster = 10
    NbreAgent = 5
    Distmin = 2
    distmax = 4
    distpull = 3
    MaxGen = 10
    PSO = UsePSO.UsePSO(Couleur, positions,NbreCluster, NbreAgent, Distmin, distmax, distpull,MaxGen )
    while BP != positions :
        #print ("Proposition n° : "+ str(len(PSO.propositions)) + " -> " + str(PSO.actual_prop))
        BP,MP = util.compare(PSO.actual_prop, code)
        if BP != positions : 
Esempio n. 9
0
 def evaluation(self, test, i):
     BP, MP = util.compare(test, self.PrecedentTry[i])
     e = abs(
         self.score(self.reponse[i][0], self.reponse[i][1]) -
         self.score(BP, MP))
     return e
Esempio n. 10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--train_data_dir', type=str, default='data/test.txt')
    parser.add_argument('--test_data_dir', type=str, default='data/test.txt')
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--word_dim', type=int, default=128)
    parser.add_argument('--tag_sum', type=int, default=7)
    parser.add_argument('--num_epochs', type=int, default=1)
    parser.add_argument('--min_freq', type=int, default=0)
    parser.add_argument('--learning_rate', type=float, default=0.001)
    parser.add_argument('--test_batch_size', type=int, default=32)
    parser.add_argument('--max_len', type=int, default=100)
    parser.add_argument('--dropout_keep', type=float, default=0.8)

    args = parser.parse_args()
    train_data_loader = Dataloader(args.train_data_dir, args.batch_size,
                                   args.min_freq, True, args.max_len)
    args.vocab_size = train_data_loader.vocab_size

    input_data = tf.placeholder(tf.int32, [args.batch_size, args.max_len])
    target = tf.placeholder(tf.int32, [args.batch_size, args.max_len])
    sequence_length = tf.placeholder(tf.int32, [args.batch_size])

    embeddings = tf.Variable(
        tf.random_uniform([args.vocab_size, args.word_dim], -1.0, 1.0))
    embeddings = tf.nn.l2_normalize(embeddings, 1)

    input_data_embedding = tf.nn.embedding_lookup(embeddings, input_data)
    input_data_embedding = tf.nn.dropout(input_data_embedding,
                                         args.dropout_keep)

    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(args.word_dim,
                                           forget_bias=1.0,
                                           state_is_tuple=True)
    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(args.word_dim,
                                           forget_bias=1.0,
                                           state_is_tuple=True)

    (output_fw, output_bw), state = tf.nn.bidirectional_dynamic_rnn(
        lstm_fw_cell,
        lstm_bw_cell,
        input_data_embedding,
        dtype=tf.float32,
        sequence_length=sequence_length)
    bilstm_output = tf.concat([output_fw, output_bw], axis=2)

    W = tf.get_variable(
        name='W',
        shape=[args.batch_size, 2 * args.word_dim, args.tag_sum],
        dtype=tf.float32,
        initializer=tf.zeros_initializer())
    b = tf.get_variable(name='b',
                        shape=[args.batch_size, args.max_len, args.tag_sum],
                        dtype=tf.float32,
                        initializer=tf.zeros_initializer())

    bilstm_output = tf.tanh(tf.matmul(bilstm_output, W) + b)

    log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
        bilstm_output, target,
        tf.tile(np.array([args.max_len]), np.array([args.batch_size])))
    transition_params_ = tf.placeholder(shape=transition_params.shape,
                                        dtype=tf.float32)
    viterbi_sequence, viterbi_score = tf.contrib.crf.crf_decode(
        bilstm_output, transition_params_,
        tf.tile(np.array([args.max_len], dtype=np.int32),
                np.array([args.batch_size], dtype=np.int32)))

    loss = tf.reduce_mean(-log_likelihood)
    optimizer = tf.train.AdamOptimizer(args.learning_rate)
    train_op = optimizer.minimize(loss)

    # train model
    sess = tf.Session()
    print('---------- train ----------')
    sess.run(tf.global_variables_initializer())
    for e in range(args.num_epochs):
        print('epoch:', e)
        train_data_loader.reset_batch_pointer()
        for b in range(train_data_loader.num_batches):
            x, y, sl = train_data_loader.next_batch()
            # print(sl)
            feed = {input_data: x, target: y, sequence_length: sl}
            tf_transition_params, _ = sess.run([transition_params, train_op],
                                               feed)
            # sess.run(train_op, feed)
            if b % 100 == 0:
                train_loss = sess.run(loss, feed)
                print('iter', b, ' loss:', train_loss)

    # test model
    print('---------- test ----------')
    test_data_loader = Dataloader(args.test_data_dir, args.test_batch_size,
                                  args.min_freq, False, args.max_len)
    test_data_loader.reset_batch_pointer()
    total_y_num = 0
    total_pre_num = 0
    total_true_num = 0
    for b in range(test_data_loader.num_batches):
        x, y, sl = test_data_loader.next_batch()

        feed = {
            input_data: x,
            target: y,
            transition_params_: tf_transition_params,
            sequence_length: sl
        }
        tf_viterbi_sequence, tf_viterbi_score = sess.run(
            [viterbi_sequence, viterbi_score], feed)
        for pre, y_ in zip(tf_viterbi_sequence, y):
            true_num, y_num, pre_num = compare(y_, pre)
            total_true_num += true_num
            total_y_num += y_num
            total_pre_num += pre_num

    # 准确率 = 交集 / 模型抽取出的实体
    # 召回率 = 交集 / 数据集中的所有实体
    # f值 = 2×(准确率×召回率) / (准确率 + 召回率)
    if total_pre_num == 0:
        # print('total pre num is 0')
        accuracy = 0
    else:
        accuracy = total_true_num / total_pre_num
    if total_y_num == 0:
        # print('total y num is 0')
        recall = 0
    else:
        recall = total_true_num / total_y_num
    if total_pre_num == 0 or total_y_num == 0:
        # print('f1 is 0')
        f1 = 0
    else:
        f1 = 2 * accuracy * recall / (accuracy + recall)

    with open('result.txt', 'w') as fin:
        fin.write('------------test result-------------')
        fin.write('total true num:' + str(total_true_num) + '\n')
        fin.write('total y num:' + str(total_y_num) + '\n')
        fin.write('total predict num:' + str(total_pre_num) + '\n')
        fin.write('accuracy:' + str(accuracy) + '\n')
        fin.write('recall:' + str(recall) + '\n')
        fin.write('f1-score:' + str(f1) + '\n')

    print('total true num:' + str(total_true_num))
    print('total y num:' + str(total_y_num))
    print('total predict num:' + str(total_pre_num))
    print('accuracy:' + str(accuracy))
    print('recall:' + str(recall))
    print('f1-score:' + str(f1))
Esempio n. 11
0
 def evaluation(self, test, i ) :
     BP,MP = util.compare(test, self.PrecedentTry[i])
     #e = abs(self.score(self.reponse[i][0],self.reponse[i][1]) - self.score(BP,MP)) + Genetic.B * i
     e = abs(self.reponse[i][0] - BP) + abs(self.reponse[i][1] - MP) 
     return e
Esempio n. 12
0
def dupc_generate_data_json(stream_info):
    try:
        all_result_json = {}
        with open(stream_info['PROJECT_FILE_DUPC_JSON'], "r",
                  encoding='utf-8') as jsonfile:
            all_result_json = json.load(jsonfile)

        for idx, file_info in enumerate(all_result_json['files_info']):
            filedata = {}
            filename_result = result.get_result_file_path(
                stream_info["STREAM_RESULT_PATH"], file_info['file_path'])
            txt_file_path_blame = filename_result + ".scm_blame.txt"
            xml_file_path_info = filename_result + ".scm_info.xml"
            xml_file_path_url = filename_result + ".scm_url.xml"
            file_info["file_change_time"] = scm.parse_info_xml(
                xml_file_path_info, stream_info)
            filedata = scm.parse_log_xml(xml_file_path_url,
                                         file_info['file_path'], stream_info)
            blameline = ""
            file_author_set = set([])
            if os.path.isfile(txt_file_path_blame):
                with open(txt_file_path_blame, "r",
                          encoding='utf-8') as blame_line:
                    blameline = blame_line.readlines()
            for block_idx, block in enumerate(file_info['block_list']):
                author_name_set = set([])
                author_name_list = []
                author_list = {}
                lines_list = range(0, len(blameline))

                if len(blameline) > int(
                        block['start_lines']) and len(blameline) < int(
                            block['end_lines']):
                    lines_list = range(int(block['start_lines']),
                                       len(blameline))

                elif len(blameline) > int(block['end_lines']):
                    lines_list = range(int(block['start_lines']),
                                       int(block['end_lines']))

                for i in lines_list:
                    line_blame_data = blameline[i].split('->')
                    author_name_list.append(line_blame_data[1])
                    author_name_set.add(line_blame_data[1])
                    if line_blame_data[1] in author_list:
                        change_time = author_list[
                            line_blame_data[1]].strip().split('.')[0]
                        if util.compare(
                                change_time,
                                line_blame_data[2].strip().split('.')[0]):
                            author_list[line_blame_data[1]] = line_blame_data[
                                2].strip()
                    else:
                        author_list[
                            line_blame_data[1]] = line_blame_data[2].strip()
                author_info = ""
                for author_name in author_name_set:
                    if author_info != "":
                        info_array = author_info.split('->')
                        if int(info_array[1]) < author_name_list.count(
                                author_name):
                            author_info = author_name + "->" + str(
                                author_name_list.count(author_name)
                            ) + "->" + author_list[author_name]
                    else:
                        author_info = author_name + "->" + str(
                            author_name_list.count(
                                author_name)) + "->" + author_list[author_name]
                if author_info != "":
                    file_author_set.add(author_info.split('->')[0])
                    block['author'] = author_info.split('->')[0]
                    temp_datetime = str(author_info.split('->')[2])
                    st = time.strptime(temp_datetime, '%Y-%m-%dT%H:%M:%S.%f')
                    final_datetime = time.mktime(st)
                    block['latest_datetime'] = final_datetime
                file_info['block_list'][block_idx] = block
            file_info['author_list'] = ';'.join(file_author_set)
            filedata['tool_name'] = 'dupc'
            filedata["stream_name"] = stream_info['STREAM_NAME']
            filedata["task_id"] = stream_info['TASK_ID']
            #压缩字符串
            zip_bytes = zlib.compress(
                bytes(json.dumps(file_info), encoding='utf-8'))
            #base64编码
            zip_str = base64.b64encode(zip_bytes).decode('utf-8')
            filedata["defectsCompress"] = zip_str
            filedata_data = json.dumps(filedata)
            filedata_data = filedata_data.replace(": ", ":")
            #print(util.get_datetime()+" start upload file "+file_info['file_path'])
            #print(filedata_data)
            codecc_web.codecc_upload_file_json(filedata_data)

        project_summary = {}
        project_summary['stream_name'] = stream_info['STREAM_NAME']
        project_summary['task_id'] = stream_info['TASK_ID']
        project_summary['scan_summary'] = all_result_json['scan_summary']
        summary_data = json.dumps(project_summary)
        summary_data = summary_data.replace(": ", ":")
        #print(util.get_datetime()+" start submit summary_data")
        codecc_web.upload_project_dupc_summary(summary_data)
    except Exception as e:
        raise Exception(e)