def load_files(file1, file2): """ Loads the results from two tests, and generate an HTML page with the differences. """ f = open("results/" + file1, "r") results = json.loads(f.read()) f.close() f = open("results/" + file2, "r") previous_results = json.loads(f.read()) f.close() compare(results, previous_results)
def load_files(file1, file2): """ Loads the results from two tests, and generate an HTML page with the differences. """ f = open("results/"+file1, "r") results = json.loads(f.read()) f.close() f = open("results/"+file2, "r") previous_results = json.loads(f.read()) f.close() compare(results, previous_results)
def bestCode(self, Count, response, PrecedentTry): self.reponse = response self.TryNumber = Count self.PrecedentTry = PrecedentTry h = 1 eligible = [] #On fait se développer la population tant que l'on a pas assez d'evenement éligible while (h <= self.MaxGen and len(eligible) <= self.MaxSize): self.DevGenetic() for i in range(self.TaillePopu): difBP = 0 difMP = 0 for j in range(len(self.PrecedentTry)): BP, MP = util.compare(self.PrecedentTry[j], self.popu[i]) difBP += self.B * abs(BP - self.reponse[j][0]) difMP += abs(MP - self.reponse[j][1]) if difBP == 0 and difMP == 0: exists = False for elements in eligible: if elements == self.popu[i]: exists = True if exists == False: eligible.append(self.popu[i]) h += 1 if len(eligible) == 0: bestfit = 10000 bestcode = util.gen(self.positions, self.CouleurPossible) for i in range(self.TaillePopu): if self.fitness(self.popu[i]) > bestfit: bestfit = self.fitness(self.popu[i]) bestcode = self.popu[i] return bestcode bestguess = eligible[0] mostSimilarity = 0 similarity = 0 # On choisit celui le plus semblable au code de base for elements in eligible: for elements2 in eligible: if (elements != elements2): BP, MP = util.compare(elements2, elements) similarity += BP + MP if (similarity >= mostSimilarity): mostSimilarity = similarity bestguess = elements return bestguess
def calc_plot_error(self): perc_errors = {"gamma_func": [], "params": []} abs_errors = {"gamma_func": [], "params": []} for art in self.articles: try: perc_error,abs_error = compare(art.views,art.est_params) perc_errors["params"].append(perc_error) abs_errors["params"].append(abs_error) if self.pattern.has_gamma: perc_error,abs_error = compare(art.views,art.est_gamma_func) perc_errors["gamma_func"].append(perc_error) abs_errors["gamma_func"].append(abs_error) except IndexError: self.catch_error(art,"calc_error") plt.plot_errors(perc_errors,abs_errors,self.plotfolder,self.pattern) self.errors = {"normalized": perc_errors, "absolute": abs_errors}
def save_compare_results(): """ Save the current results, and if previous results are available, generate an HTML page with the differences """ global results, str_date commit = out = subprocess.Popen( ['git', 'log', '-n 1', '--pretty=format:"%H"'], stdout=subprocess.PIPE).communicate()[0] results["hash"] = commit # Save results if not os.path.exists("results"): os.makedirs("results") str_date = time.strftime("%y.%m.%d-%H:%M:%S") f = open("results/result_" + str_date + ".txt", "w") str_res = json.dumps(results, indent=2) f.write(str_res) f.close() # Read all the previous results stored in results/ file_paths = [] for root, directories, files in os.walk("results/"): for filename in files: # Join the two strings in order to form the full filepath. filepath = os.path.join(root, filename) file_paths.append(filepath) # Add it to the list. # Sort files, we just need the last one (the most recent one) file_paths.sort() if len(file_paths) > 1: last_file = file_paths[-2] #The last file is the one we just saved f = open(last_file, "r") previous_results = json.loads(f.read()) f.close() else: previous_results = {} compare(results, previous_results)
def save_compare_results(): """ Save the current results, and if previous results are available, generate an HTML page with the differences """ global results, str_date commit = out = subprocess.Popen(['git', 'log', '-n 1', '--pretty=format:"%H"'], stdout=subprocess.PIPE).communicate()[0] results["hash"] = commit # Save results if not os.path.exists("results"): os.makedirs("results") str_date = time.strftime("%y.%m.%d-%H:%M:%S") f = open("results/result_"+str_date+".txt", "w") str_res = json.dumps(results, indent=2) f.write(str_res) f.close() # Read all the previous results stored in results/ file_paths = [] for root, directories, files in os.walk("results/"): for filename in files: # Join the two strings in order to form the full filepath. filepath = os.path.join(root, filename) file_paths.append(filepath) # Add it to the list. # Sort files, we just need the last one (the most recent one) file_paths.sort() if len(file_paths) > 1: last_file = file_paths[-2] #The last file is the one we just saved f = open(last_file, "r") previous_results = json.loads(f.read()) f.close() else: previous_results = {} compare(results, previous_results)
def defects_data_ccn(stream_info, error_lines, blame_lines): defects = [] try: for idx, line in enumerate(error_lines): ccn_cc = {} line_array = line.strip().split("->") if len(line_array) == 7: ccn_cc["function_name"] = line_array[1] ccn_cc["long_name"] = line_array[2] ccn_cc["total_lines"] = line_array[4] ccn_cc["ccn"] = line_array[5] ccn_cc["condition_lines"] = line_array[6].replace( ' ', '').replace('{', '').replace('}', '') function_lines = line_array[3].split('-') ccn_cc["start_lines"] = function_lines[0] ccn_cc["end_lines"] = function_lines[1] author_info = "" for i in range(int(function_lines[0]), int(function_lines[1])): line_blame_data = blame_lines[i].split('->') if author_info != "": info_array = author_info.split('->') if util.compare( info_array[1].strip().split('.')[0], line_blame_data[2].strip().split('.')[0]): author_info = line_blame_data[ 1] + "->" + line_blame_data[2].strip() else: author_info = line_blame_data[ 1] + "->" + line_blame_data[2].strip() ccn_cc["author"] = author_info.split('->')[0] temp_datetime = author_info.split('->')[1] st = time.strptime(temp_datetime, '%Y-%m-%dT%H:%M:%S.%f') final_datetime = time.mktime(st) ccn_cc["latest_datetime"] = int(round(final_datetime * 1000)) defects.append(ccn_cc) except Exception as e: raise Exception(e) return defects
NombreCouleur = int(sys.argv[3]) positions = int(sys.argv[2]) Couleur = Couleur[:NombreCouleur] code = gen() #print("code to find:" + str(code)) if sys.argv[1] == 'GA' : ponderation = 2 depart = 0 pourcentage = 2 TailleEligible = 15*positions TaillePopu = 35*positions NombreGen = 40*positions GA = UseGA.UseGA(Couleur, positions, ponderation, depart, pourcentage, TailleEligible, TaillePopu, NombreGen) while BP != positions : #print ("Proposition n° : "+ str(GA.compte) + " -> " + str(GA.nextMove())) BP,MP = util.compare(GA.actual_prop, code) GA.reponse(MP,BP) i += 1 if sys.argv[1] == 'PSO' : NbreCluster = 10 NbreAgent = 5 Distmin = 2 distmax = 4 distpull = 3 MaxGen = 10 PSO = UsePSO.UsePSO(Couleur, positions,NbreCluster, NbreAgent, Distmin, distmax, distpull,MaxGen ) while BP != positions : #print ("Proposition n° : "+ str(len(PSO.propositions)) + " -> " + str(PSO.actual_prop)) BP,MP = util.compare(PSO.actual_prop, code) if BP != positions :
def evaluation(self, test, i): BP, MP = util.compare(test, self.PrecedentTry[i]) e = abs( self.score(self.reponse[i][0], self.reponse[i][1]) - self.score(BP, MP)) return e
def main(): parser = argparse.ArgumentParser() parser.add_argument('--train_data_dir', type=str, default='data/test.txt') parser.add_argument('--test_data_dir', type=str, default='data/test.txt') parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--word_dim', type=int, default=128) parser.add_argument('--tag_sum', type=int, default=7) parser.add_argument('--num_epochs', type=int, default=1) parser.add_argument('--min_freq', type=int, default=0) parser.add_argument('--learning_rate', type=float, default=0.001) parser.add_argument('--test_batch_size', type=int, default=32) parser.add_argument('--max_len', type=int, default=100) parser.add_argument('--dropout_keep', type=float, default=0.8) args = parser.parse_args() train_data_loader = Dataloader(args.train_data_dir, args.batch_size, args.min_freq, True, args.max_len) args.vocab_size = train_data_loader.vocab_size input_data = tf.placeholder(tf.int32, [args.batch_size, args.max_len]) target = tf.placeholder(tf.int32, [args.batch_size, args.max_len]) sequence_length = tf.placeholder(tf.int32, [args.batch_size]) embeddings = tf.Variable( tf.random_uniform([args.vocab_size, args.word_dim], -1.0, 1.0)) embeddings = tf.nn.l2_normalize(embeddings, 1) input_data_embedding = tf.nn.embedding_lookup(embeddings, input_data) input_data_embedding = tf.nn.dropout(input_data_embedding, args.dropout_keep) lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(args.word_dim, forget_bias=1.0, state_is_tuple=True) lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(args.word_dim, forget_bias=1.0, state_is_tuple=True) (output_fw, output_bw), state = tf.nn.bidirectional_dynamic_rnn( lstm_fw_cell, lstm_bw_cell, input_data_embedding, dtype=tf.float32, sequence_length=sequence_length) bilstm_output = tf.concat([output_fw, output_bw], axis=2) W = tf.get_variable( name='W', shape=[args.batch_size, 2 * args.word_dim, args.tag_sum], dtype=tf.float32, initializer=tf.zeros_initializer()) b = tf.get_variable(name='b', shape=[args.batch_size, args.max_len, args.tag_sum], dtype=tf.float32, initializer=tf.zeros_initializer()) bilstm_output = tf.tanh(tf.matmul(bilstm_output, W) + b) log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood( bilstm_output, target, tf.tile(np.array([args.max_len]), np.array([args.batch_size]))) transition_params_ = tf.placeholder(shape=transition_params.shape, dtype=tf.float32) viterbi_sequence, viterbi_score = tf.contrib.crf.crf_decode( bilstm_output, transition_params_, tf.tile(np.array([args.max_len], dtype=np.int32), np.array([args.batch_size], dtype=np.int32))) loss = tf.reduce_mean(-log_likelihood) optimizer = tf.train.AdamOptimizer(args.learning_rate) train_op = optimizer.minimize(loss) # train model sess = tf.Session() print('---------- train ----------') sess.run(tf.global_variables_initializer()) for e in range(args.num_epochs): print('epoch:', e) train_data_loader.reset_batch_pointer() for b in range(train_data_loader.num_batches): x, y, sl = train_data_loader.next_batch() # print(sl) feed = {input_data: x, target: y, sequence_length: sl} tf_transition_params, _ = sess.run([transition_params, train_op], feed) # sess.run(train_op, feed) if b % 100 == 0: train_loss = sess.run(loss, feed) print('iter', b, ' loss:', train_loss) # test model print('---------- test ----------') test_data_loader = Dataloader(args.test_data_dir, args.test_batch_size, args.min_freq, False, args.max_len) test_data_loader.reset_batch_pointer() total_y_num = 0 total_pre_num = 0 total_true_num = 0 for b in range(test_data_loader.num_batches): x, y, sl = test_data_loader.next_batch() feed = { input_data: x, target: y, transition_params_: tf_transition_params, sequence_length: sl } tf_viterbi_sequence, tf_viterbi_score = sess.run( [viterbi_sequence, viterbi_score], feed) for pre, y_ in zip(tf_viterbi_sequence, y): true_num, y_num, pre_num = compare(y_, pre) total_true_num += true_num total_y_num += y_num total_pre_num += pre_num # 准确率 = 交集 / 模型抽取出的实体 # 召回率 = 交集 / 数据集中的所有实体 # f值 = 2×(准确率×召回率) / (准确率 + 召回率) if total_pre_num == 0: # print('total pre num is 0') accuracy = 0 else: accuracy = total_true_num / total_pre_num if total_y_num == 0: # print('total y num is 0') recall = 0 else: recall = total_true_num / total_y_num if total_pre_num == 0 or total_y_num == 0: # print('f1 is 0') f1 = 0 else: f1 = 2 * accuracy * recall / (accuracy + recall) with open('result.txt', 'w') as fin: fin.write('------------test result-------------') fin.write('total true num:' + str(total_true_num) + '\n') fin.write('total y num:' + str(total_y_num) + '\n') fin.write('total predict num:' + str(total_pre_num) + '\n') fin.write('accuracy:' + str(accuracy) + '\n') fin.write('recall:' + str(recall) + '\n') fin.write('f1-score:' + str(f1) + '\n') print('total true num:' + str(total_true_num)) print('total y num:' + str(total_y_num)) print('total predict num:' + str(total_pre_num)) print('accuracy:' + str(accuracy)) print('recall:' + str(recall)) print('f1-score:' + str(f1))
def evaluation(self, test, i ) : BP,MP = util.compare(test, self.PrecedentTry[i]) #e = abs(self.score(self.reponse[i][0],self.reponse[i][1]) - self.score(BP,MP)) + Genetic.B * i e = abs(self.reponse[i][0] - BP) + abs(self.reponse[i][1] - MP) return e
def dupc_generate_data_json(stream_info): try: all_result_json = {} with open(stream_info['PROJECT_FILE_DUPC_JSON'], "r", encoding='utf-8') as jsonfile: all_result_json = json.load(jsonfile) for idx, file_info in enumerate(all_result_json['files_info']): filedata = {} filename_result = result.get_result_file_path( stream_info["STREAM_RESULT_PATH"], file_info['file_path']) txt_file_path_blame = filename_result + ".scm_blame.txt" xml_file_path_info = filename_result + ".scm_info.xml" xml_file_path_url = filename_result + ".scm_url.xml" file_info["file_change_time"] = scm.parse_info_xml( xml_file_path_info, stream_info) filedata = scm.parse_log_xml(xml_file_path_url, file_info['file_path'], stream_info) blameline = "" file_author_set = set([]) if os.path.isfile(txt_file_path_blame): with open(txt_file_path_blame, "r", encoding='utf-8') as blame_line: blameline = blame_line.readlines() for block_idx, block in enumerate(file_info['block_list']): author_name_set = set([]) author_name_list = [] author_list = {} lines_list = range(0, len(blameline)) if len(blameline) > int( block['start_lines']) and len(blameline) < int( block['end_lines']): lines_list = range(int(block['start_lines']), len(blameline)) elif len(blameline) > int(block['end_lines']): lines_list = range(int(block['start_lines']), int(block['end_lines'])) for i in lines_list: line_blame_data = blameline[i].split('->') author_name_list.append(line_blame_data[1]) author_name_set.add(line_blame_data[1]) if line_blame_data[1] in author_list: change_time = author_list[ line_blame_data[1]].strip().split('.')[0] if util.compare( change_time, line_blame_data[2].strip().split('.')[0]): author_list[line_blame_data[1]] = line_blame_data[ 2].strip() else: author_list[ line_blame_data[1]] = line_blame_data[2].strip() author_info = "" for author_name in author_name_set: if author_info != "": info_array = author_info.split('->') if int(info_array[1]) < author_name_list.count( author_name): author_info = author_name + "->" + str( author_name_list.count(author_name) ) + "->" + author_list[author_name] else: author_info = author_name + "->" + str( author_name_list.count( author_name)) + "->" + author_list[author_name] if author_info != "": file_author_set.add(author_info.split('->')[0]) block['author'] = author_info.split('->')[0] temp_datetime = str(author_info.split('->')[2]) st = time.strptime(temp_datetime, '%Y-%m-%dT%H:%M:%S.%f') final_datetime = time.mktime(st) block['latest_datetime'] = final_datetime file_info['block_list'][block_idx] = block file_info['author_list'] = ';'.join(file_author_set) filedata['tool_name'] = 'dupc' filedata["stream_name"] = stream_info['STREAM_NAME'] filedata["task_id"] = stream_info['TASK_ID'] #压缩字符串 zip_bytes = zlib.compress( bytes(json.dumps(file_info), encoding='utf-8')) #base64编码 zip_str = base64.b64encode(zip_bytes).decode('utf-8') filedata["defectsCompress"] = zip_str filedata_data = json.dumps(filedata) filedata_data = filedata_data.replace(": ", ":") #print(util.get_datetime()+" start upload file "+file_info['file_path']) #print(filedata_data) codecc_web.codecc_upload_file_json(filedata_data) project_summary = {} project_summary['stream_name'] = stream_info['STREAM_NAME'] project_summary['task_id'] = stream_info['TASK_ID'] project_summary['scan_summary'] = all_result_json['scan_summary'] summary_data = json.dumps(project_summary) summary_data = summary_data.replace(": ", ":") #print(util.get_datetime()+" start submit summary_data") codecc_web.upload_project_dupc_summary(summary_data) except Exception as e: raise Exception(e)