def find_new_invalid_svc(self, invalid_history_file_path): # Use dell support URL to check those unknown even after invalid history valid_set = set([]) max_retry = 3 # retry 3 times at most new_invalid_count = 0 for svc in self.target_svc_set: for i in xrange(max_retry): try: if i == max_retry - 1: time.sleep(1) # last time retry, sleep for 1 second if SVCGenerator.check_svc_valid(svc): valid_set.add(svc) else: new_invalid_count += 1 self.logger.info("不合法:%s " % svc) # Append new invalid SVC into history file if provided FileUtil.save_object_to_path(svc, invalid_history_file_path, append=True) break except requests.exceptions.ConnectionError: # if ConnectionError, pass continue else: self.logger.warn("检查查询码超时%s,忽略" % svc) self.target_svc_set = valid_set self.logger.info("新增%s个不合法的查询码" % new_invalid_count)
def _run(self, final_thresholds, maj_thresholds, matrix_file_path=None, artifact_map_file_path=None): if not matrix_file_path: matrix_file_path = self.default_matrix_path() if not artifact_map_file_path: artifact_map_file_path = self._default_a2eMap_path() if not FileUtil.file_exists(matrix_file_path): log.error( f"File does not exists: {matrix_file_path}\n" f"Please pass a valid file path or call {self.__class__.__name__}().precalculate() first" ) if not FileUtil.file_exists(artifact_map_file_path): log.error( f"File does not exists: {artifact_map_file_path}\n" f"Please pass a valid file path or call {self.__class__.__name__}().precalculate() first" ) trace_link_data_structure = ElementLevelTraceLinkDataStructure.load_data_from( matrix_file_path, artifact_map_file_path) trace_link_processor = MajProcessor(trace_link_data_structure, self.similarity_filter, self.req_reduce_func, self.code_reduce_function, final_thresholds, maj_thresholds, self.callgraph_aggregator) return trace_link_processor.run()
def process_trace_link_2D_dict(self, trace_link_2D_dict: Dict[float, Dict[float, List[TraceLink]]]): print_str_dict, best_eval_result, best_final_threshold, best_maj_thresh = self._process_trace_link_2D_dict(trace_link_2D_dict) header_row = [""] # First header cell is empty -> needed for header column header_row += [self.FILE_LEVEL_DROP_THRESH_PATTERN.format(final_threshold) for final_threshold in print_str_dict[best_maj_thresh].keys()] excel_array = [header_row] for maj_thresh in sorted(print_str_dict): next_row = [self.MAJ_DROP_THRESH_PATTERN.format(maj_thresh)] # First cell is the maj thresh, followed by the evaluated f1 metrics for this maj thresh for final_threshold in sorted(print_str_dict[maj_thresh]): next_row.append(print_str_dict[maj_thresh][final_threshold]) if self._also_print_eval: log.info(f"\nm{maj_thresh} f{final_threshold}\n" f"{next_row[-1]}") excel_array.append(next_row) excel_array.append([""]) # Add empty row as divider if isinstance(best_eval_result, F1ResultObject): excel_array = self._add_best_f1_2D_excel_rows(excel_array, print_str_dict, best_eval_result, best_final_threshold, best_maj_thresh) else: excel_array.append([self.NO_BEST_F1_MESSAGE]) FileUtil.write_eval_to_excel(excel_array, self._excel_output_file_path)
def __init__(self, lemmatizer_type=LemmatizerType.english_nltk): self._lemmatizer_type = lemmatizer_type self._lemmatizer = None if lemmatizer_type == self.LemmatizerType.english_nltk: self._lemmatizer = WordNetLemmatizer() elif lemmatizer_type == self.LemmatizerType.english_spacy: # Use precalculated files for spacy since free google colab can't handle fasttext model and spacy lemmatizer at once if not FileUtil.file_exists(PRECALCULATED_SPACY_ENGLISH_LEMMA_CSV): log.error( f"{PRECALCULATED_SPACY_ENGLISH_LEMMA_CSV} does not exists. The spacy lemmatizer needs an precalculated lemma file." ) self._lemmatizer = PandasUtil.read_csv_to_dataframe( PRECALCULATED_SPACY_ENGLISH_LEMMA_CSV) elif lemmatizer_type == self.LemmatizerType.italian_nltk: self._lemmatizer = SnowballStemmer("italian") elif lemmatizer_type == self.LemmatizerType.italian_spacy: # Use precalculated files for spacy since free google colab can't handle fasttext model and spacy lemmatizer at once if not FileUtil.file_exists(PRECALCULATED_SPACY_ITALIAN_LEMMA_CSV): log.error( f"{PRECALCULATED_SPACY_ITALIAN_LEMMA_CSV} does not exists. The spacy lemmatizer needs an precalculated lemma file." ) self._lemmatizer = PandasUtil.read_csv_to_dataframe( PRECALCULATED_SPACY_ITALIAN_LEMMA_CSV) else: log.error(f"Unknown case for LemmatizerType: {lemmatizer_type}")
def tokenize_all_sentences_in_directory(self, directory) -> [str]: sentences = [] for file in FileUtil.get_files_in_directory(directory): if self._italian: sentences += sent_tokenize(FileUtil.read_textfile_into_string(file, self._dataset.encoding()), language="italian") else: sentences += sent_tokenize(FileUtil.read_textfile_into_string(file, self._dataset.encoding())) return sentences
def __init__(self, ital=False): if ital: stopwords_as_string = FileUtil.read_textfile_into_string( ITAL_CODE_STOPWORD_FILEPATH) else: stopwords_as_string = FileUtil.read_textfile_into_string( CODE_STOPWORD_FILEPATH) self._stop_words = stopwords_as_string.split("\n")
def serialize_txt_batch(dell_asset_list, output_dir): # Serialize each dell asset as one text file under the output directory # Return all the output file names under the dir in a list for reference output_path_list = list([]) for da in dell_asset_list: file_name = "%s.txt" % da.svc_tag output_path_list.append(os.path.join(output_dir, file_name)) FileUtil.save_object_to_path(da.serialize_txt(), output_path_list[-1]) return output_path_list
def _tokenize_and_preprocess(self, file_path): log.debug(f"Tokenizing {file_path}") file_representation = self._tokenizer.tokenize(file_path) log.debug(f"preprocessing {file_path}") file_representation.preprocess(self._preprocessor) if self._preprocessed_token_output_directory: FileUtil.write_file(self._preprocessed_token_output_directory / (PREPROCESSED_TOKEN_FILENAME_PREFIX +FileUtil.get_filename_from_path(file_path)), file_representation.get_printable_string()) return file_representation
def write_to(self, file_path): json_to_write = { self.REQ_FILE_TO_REQ_ELEMENT_ID_MAP: self._req_file_to_req_element_id_map, self.CODE_FILE_TO_METHOD_MAP: self._code_file_to_method_map, self.CODE_FILE_TO_NON_CG_ELEMENT_MAP: self._code_file_to_non_cg_element_map } FileUtil.write_to_json(file_path, json_to_write)
def create_all_embeddings(self, input_directory, output_emb_filepath=None) -> [EmbeddingContainer]: """ Creates embeddings for all files in the input directory. Writes all embeddings in a file at output_emb_filepath if not None. Returns the embeddings as list """ log.info("Read directory: " + str(input_directory)) embedding_list = self.embedd_all_files_in_directory(input_directory) if output_emb_filepath is not None: FileUtil.write_file(output_emb_filepath, "\n".join(map(str, embedding_list))) return embedding_list
def main(svc_input, configs): logger = Logger("查询日志", verbose=True) log_file_name = "log%s_%s.txt" % (svc_input.replace("?", "#"), DateTimeUtil.get_current_datetime(is_date=True)) log_file_path = WindowsUtil.convert_win_path(os.path.join(temp_dir, log_file_name)) logger.info("[开始查询] %s" % svc_input) try: # 找到本地匹配的保修历史记录 history_zip = ZipFileSVC(zip_file_path=history_zipfile, mode='a') start_time = DateTimeUtil.get_current_datetime() # 创建出所有可能查询码 svc_generator = SVCGenerator(svc_input, logger) logger.info("创建出所有可能查询码:%s" % len(svc_generator.target_svc_set)) # 根据本地匹配的非法查询码历史,筛选出目标查询码,以及非法查询码 existed_svc = history_zip.find_file_regex(svc_generator.regex) svc_generator.generate_target_svc_batch(existed_svc, invalid_history_file_path) # 调用戴尔查询API,并将API数据转化为实体类数据 output_dell_asset_list = list([]) if svc_generator.target_svc_set: batch = Batch(logger, configs) api_dell_asset_list = batch.begin(svc_generator.target_svc_set) output_dell_asset_list = api_dell_asset_list logger.info("从API中总共得到%s个结果" % (len(api_dell_asset_list))) logger.info("将实体类序列化到本地临时TXT文件") temp_text_files_path = DellAsset.serialize_txt_batch(api_dell_asset_list, temp_dir) logger.info("将序列化临时文件存到本地zip历史记录,总数:%s" % len(temp_text_files_path)) history_zip.add_new_file_batch(temp_text_files_path) logger.info("删除临时 %s 个TXT文件" % len(temp_text_files_path)) for file_path in temp_text_files_path: FileUtil.delete_file(file_path) logger.info("将API得到的实体类和历史记录实体类合并") else: logger.warn("目标查询码为空,仅从从历史记录中导出结果") for svc in svc_generator.existed_svc_set: dell_asset_content = history_zip.get_member_content(file_name="%s.txt" % svc) output_dell_asset_list.append(DellAsset.deserialize_txt(dell_asset_content)) logger.info("添加历史记录,总共得到%s个结果" % (len(output_dell_asset_list))) excel_output_path = WindowsUtil.convert_win_path(os.path.join(excel_dir, "%s.xlsx" % svc_generator.get_file_name())) DellAsset.save_as_excel_batch(output_dell_asset_list, excel_output_path) if FileUtil.is_path_existed(excel_output_path): logger.info("存为Excel文档成功") end_time = DateTimeUtil.get_current_datetime() logger.info("总用时 %s " % DateTimeUtil.datetime_diff(start_time, end_time)) logger.info("[查询结束] 总共%s个结果 保存在:%s" % (len(output_dell_asset_list), excel_output_path)) else: logger.error("[保存结果失败] %s" % excel_output_path) except Exception as e: # 若程序出现错误失败,发送邮件 logger.error("[查询失败] 已发送报告 请等待解决") logger.error("%s\n%s" % (e, traceback.format_exc())) logger.save(log_file_path) email_api_key = configs["email_api_key"] email = Email(email_api_key, subject="[查询失败] %s %s" % (DateTimeUtil.get_current_datetime(is_date=True), svc_input)) email.add_attachment(log_file_path) email.send(cc_mode=logger.has_error)
def tokenize(self, file_path): text_as_string = FileUtil.read_textfile_into_string(file_path, self._dataset.encoding()) if self._italian: tokenized_text = " ".join(text_as_string.split("'")) return TextFileRepresentation(word_tokenize(tokenized_text), file_path) else: return TextFileRepresentation(word_tokenize(text_as_string), file_path)
def add_new_file_batch(self, file_path_list): # duplicated files are allowed, so be careful for file_path in file_path_list: if not FileUtil.is_path_existed(file_path): continue file_name = os.path.split(file_path)[-1] if len(file_name) >= 7: self.file.write(filename=file_path, arcname=file_name)
def convert_comet_to_recall_prec_csv(file_path, dataset, drop_threshs, output_file_name): """ Creates a csv file with recall/precision pairs that are generated by applying the thresholds. e. g. drop_threshs = [0, 0.01, 0.02, ..., 1] The csv file can be used to illustrate a recall/precision graph in LaTex. """ trace_links = _extract_comet_trace_links(file_path) eval_result_list = _eval_comet_data_multiple_thresh( trace_links, dataset, drop_threshs) recall_prec_dict = {} for eval_result_object, _ in eval_result_list: if isinstance(eval_result_object, F1ResultObject): recall_prec_dict[ eval_result_object.recall] = eval_result_object.precision FileUtil.write_recall_precision_csv(recall_prec_dict, output_file_name)
def tokenize(self, file_path) -> FileRepresentation: text_as_string = FileUtil.read_textfile_into_string(file_path, self._dataset.encoding()) word_tokenized_sentences = None if self._italian: tokens = [word_tokenize(" ".join(sent.split("'")), language="italian") for sent in sent_tokenize(text_as_string, language="italian")] return TextFileGroupedRepresentation(tokens, file_path) else: word_tokenized_sentences = [word_tokenize(sent) for sent in sent_tokenize(text_as_string)] return TextFileGroupedRepresentation(word_tokenized_sentences, file_path)
def tokenize(self, file_path) -> FileRepresentation: text_as_string = FileUtil.read_textfile_into_string(file_path, self._dataset.encoding()) grp = re.search(self.JAVADOC_TAGS, text_as_string, re.RegexFlag.IGNORECASE) if grp: matched_tag = grp[0] substring_index = text_as_string.find(matched_tag) text_as_string = text_as_string[:substring_index] text_as_string = super(JavaDocDescriptionOnlyTokenizer, self).tokenize_to_string_list(text_as_string) return TextFileRepresentation(text_as_string, file_path)
def process_trace_link_dict(self, trace_link_dict: Dict[float, List[TraceLink]]): print_str_dict, best_eval_result, best_thresh = self._process_trace_link_dict(trace_link_dict) header_row = [] # Contains thresholds value_row = [] # Contains evaluated f1 metrics for final_threshold in sorted(print_str_dict.keys()): header_row.append(self.FILE_LEVEL_DROP_THRESH_PATTERN.format(final_threshold)) value_row.append(print_str_dict[final_threshold]) if self._also_print_eval: log.info(f"\nf{final_threshold}\n" f"{value_row[-1]}") excel_array = [header_row] + [value_row] excel_array.append([""]) # Add empty row as divider if isinstance(best_eval_result, F1ResultObject): excel_array = self._add_best_f1_excel_rows(excel_array, print_str_dict, best_eval_result, best_thresh) else: excel_array.append([self.NO_BEST_F1_MESSAGE]) FileUtil.write_eval_to_excel(excel_array, self._excel_output_file_path)
def _extract_comet_trace_links(file_path): lines = FileUtil.read_textfile_into_lines_list(file_path) lines = lines[6:] # first 6 lines contain no similarity data trace_links = [] for line in lines: req, code, sim = line.split(" ") code = _remove_package_prefix(code) if code.endswith(".jsp") or code.endswith(".txt"): continue sim = float(sim) trace_links.append(TraceLink(req, code, sim)) return trace_links
def filter_invalid_history(self, invalid_history_file_path): # If invalid history provided, remove those invalid from target svc set if FileUtil.is_path_existed(invalid_history_file_path): with open(invalid_history_file_path, mode='r') as invalid_history_file: # read the history file line by line, in case file too large for svc in invalid_history_file: svc = svc.replace("\n", "") if len(svc) == 7 and svc in self.target_svc_set: self.invalid_history_count += 1 self.target_svc_set.remove(svc) if not self.target_svc_set: break self.logger.info("已知的本地非法查询码历史:%s" % self.invalid_history_count)
def iterate_files(tokenizer, preprecessor, folder): for file in FileUtil.get_files_in_directory(folder, True): file_representation = tokenizer.tokenize(file) file_representation.preprocess(preprecessor) for word in file_representation.token_list: lemma = [token.lemma_ for token in spacy_lemmatizer(word)] if len(lemma) > 1: log.info( f"More than one lemma {lemma} for \"{word}\". Using \"{''.join(lemma)}\" as lemma" ) lemma = "".join(lemma) if word in word_to_lemma_map: if not word_to_lemma_map[word] == lemma: log.info( f"Different Duplicate Lemma for {word}: {word_to_lemma_dataframe[word]} <-> {lemma}" ) else: word_to_lemma_map[word] = lemma
class Warranty(object): translation = FileUtil.read_file(file_path=translation_url, isYML=True, isURL=True) def __init__(self, service_en, start_date, end_date, provider, service_ch=None): self.start_date = DateTimeUtil.parse_str_date(start_date) self.end_date = DateTimeUtil.parse_str_date(end_date) self.service_en = str(service_en) self.service_en.replace(",", " ") self.provider = provider self.service_ch = service_ch if not service_ch or service_ch == "?": self.service_ch = Warranty.translation.get(service_en, "?") def to_excel_data(self): return [ self.service_ch, self.service_en, self.start_date, self.end_date, self.provider ] def __repr__(self): return "%s,%s,%s,%s,%s" % (self.service_ch, self.service_en, self.start_date, self.end_date, self.provider) @staticmethod def deserialize_txt(warranty_line): if warranty_line: items = warranty_line.split(",") if len(items) >= 5: if items[1] or items[0]: return Warranty(service_ch=items[0], service_en=items[1], start_date=items[2], end_date=items[3], provider=items[4]) return None
def embedd_all_files_in_directory(self, directory): all_filenames = FileUtil.get_files_in_directory(directory) all_embeddings = [] for filename in all_filenames: try: file_representation = self._tokenize_and_preprocess(filename) except (FileNotFoundError, IsADirectoryError, PermissionError, UnicodeDecodeError,) as e: log.info(f"SKIPPED: Error on reading or tokenizing {filename}: {e}") continue except JavaSyntaxError as j: log.info(f"SKIPPED: JavaSyntaxError on tokenizing {filename} (Note: code files needs to be compilable): {j.at}") continue except (JavaParserError, LexerError) as j: log.info(f"SKIPPED: Error on tokenizing {filename} (Note: code files needs to be compilable): {j}") continue file_embedding = self._create_embeddings(file_representation) if file_embedding: all_embeddings.append(file_embedding) else: log.info(f"No embedding for {filename}") return all_embeddings
def load_from(cls, file_path): loaded_json = FileUtil.read_from_json(file_path) return cls(loaded_json[cls.REQ_FILE_TO_REQ_ELEMENT_ID_MAP], loaded_json[cls.CODE_FILE_TO_METHOD_MAP], loaded_json[cls.CODE_FILE_TO_NON_CG_ELEMENT_MAP])
def main(svc_input, configs): logger = Logger("查询日志", verbose=True) log_file_name = "log%s_%s.txt" % (svc_input.replace( "?", "#"), DateTimeUtil.get_current_datetime(is_date=True)) log_file_path = WindowsUtil.convert_win_path( os.path.join(temp_dir, log_file_name)) logger.info("[开始查询] %s" % svc_input) try: # 找到本地匹配的保修历史记录 history_zip = ZipFileSVC(zip_file_path=history_zipfile, mode='a') start_time = DateTimeUtil.get_current_datetime() # 创建出所有可能查询码 svc_generator = SVCGenerator(svc_input, logger) logger.info("创建出所有可能查询码:%s" % len(svc_generator.target_svc_set)) # 根据本地匹配的非法查询码历史,筛选出目标查询码,以及非法查询码 existed_svc = history_zip.find_file_regex(svc_generator.regex) svc_generator.generate_target_svc_batch(existed_svc, invalid_history_file_path) # 调用戴尔查询API,并将API数据转化为实体类数据 output_dell_asset_list = list([]) if svc_generator.target_svc_set: batch = Batch(logger, configs) api_dell_asset_list = batch.begin(svc_generator.target_svc_set) output_dell_asset_list = api_dell_asset_list logger.info("从API中总共得到%s个结果" % (len(api_dell_asset_list))) logger.info("将实体类序列化到本地临时TXT文件") temp_text_files_path = DellAsset.serialize_txt_batch( api_dell_asset_list, temp_dir) logger.info("将序列化临时文件存到本地zip历史记录,总数:%s" % len(temp_text_files_path)) history_zip.add_new_file_batch(temp_text_files_path) logger.info("删除临时 %s 个TXT文件" % len(temp_text_files_path)) for file_path in temp_text_files_path: FileUtil.delete_file(file_path) logger.info("将API得到的实体类和历史记录实体类合并") else: logger.warn("目标查询码为空,仅从从历史记录中导出结果") for svc in svc_generator.existed_svc_set: dell_asset_content = history_zip.get_member_content( file_name="%s.txt" % svc) output_dell_asset_list.append( DellAsset.deserialize_txt(dell_asset_content)) logger.info("添加历史记录,总共得到%s个结果" % (len(output_dell_asset_list))) excel_output_path = WindowsUtil.convert_win_path( os.path.join(excel_dir, "%s.xlsx" % svc_generator.get_file_name())) DellAsset.save_as_excel_batch(output_dell_asset_list, excel_output_path) if FileUtil.is_path_existed(excel_output_path): logger.info("存为Excel文档成功") end_time = DateTimeUtil.get_current_datetime() logger.info("总用时 %s " % DateTimeUtil.datetime_diff(start_time, end_time)) logger.info("[查询结束] 总共%s个结果 保存在:%s" % (len(output_dell_asset_list), excel_output_path)) else: logger.error("[保存结果失败] %s" % excel_output_path) except Exception as e: # 若程序出现错误失败,发送邮件 logger.error("[查询失败] 已发送报告 请等待解决") logger.error("%s\n%s" % (e, traceback.format_exc())) logger.save(log_file_path) email_api_key = configs["email_api_key"] email = Email( email_api_key, subject="[查询失败] %s %s" % (DateTimeUtil.get_current_datetime(is_date=True), svc_input)) email.add_attachment(log_file_path) email.send(cc_mode=logger.has_error)
def create_callgraph_from_raw_file(dataset: Dataset, create_class_callgraph=False): """ Extract class and method call graph from a raw call graph file generated by the java call graph tool The inout raw call graph file is automatically retrieved from dataset.raw_call_graph_path() Saves the call graphs as json files at dataset.method_callgraph_path() and dataset.class_callgraph_path() resulting class call graph: dict["classname"] = dict{ called_by=[str] calls=[str] } resulting method call graph: dict["classname.methodname(paramtyp1,paramtyp2)"] = dict{ called_by=[classname.methodname(paramtyp1,paramtyp2),...] calls=[classname.methodname(paramtyp1,paramtyp2),...] class_name=str method_name=str params=[str] } } """ raw_txt_path = dataset.raw_call_graph_path() output_class_callgraph = dataset.class_callgraph_path() output_method_callgraph = dataset.method_callgraph_path() text_rows = [] try: file = open(raw_txt_path, 'r', encoding='utf8') text_rows = file.readlines() except IOError: log.error("Unable to read " + str(raw_txt_path)) class_call_graph = dict() method_call_graph = dict() def insert_class(class_name, calls=set(), called_by=set()): if class_name in class_call_graph: class_call_graph[class_name][CALLS] |= calls class_call_graph[class_name][CALLED_BY] |= called_by else: class_ref = dict() class_ref[CALLED_BY] = called_by class_ref[CALLS] = calls class_call_graph[class_name] = class_ref def insert_entry(dict_key, class_name, method_name, param_list, called_by=set(), calls=set()): if dict_key in method_call_graph: method_call_graph[dict_key][CALLS] |= calls method_call_graph[dict_key][CALLED_BY] |= called_by else: method_dict = dict() method_dict[CALLS] = calls method_dict[CALLED_BY] = called_by method_dict[CLASS_NAME] = class_name method_dict[METHOD_NAME] = method_name method_dict[PARAMS] = param_list method_call_graph[dict_key] = method_dict def remove_external_calls(): for dict_key in method_call_graph: method_call_graph[dict_key][CALLS] = [ callee for callee in method_call_graph[dict_key][CALLS] if callee in method_call_graph ] method_call_graph[dict_key][CALLED_BY] = [ caller for caller in method_call_graph[dict_key][CALLED_BY] if caller in method_call_graph ] for row in text_rows: row_split = row.split(":") if row_split[0] == "C": # Class level call classes = row_split[1].split(" ") class_1 = _clean(classes[0]) class_2 = _clean(classes[1]) if _is_external_class(dataset, class_1) or _is_external_class( dataset, class_2): continue caller_class_name = _extract_name(classes[0]) callee_class_name = _extract_name(classes[1].replace('\r', '').replace( '\n', '')) if caller_class_name == callee_class_name: continue if "$" in caller_class_name or "$" in callee_class_name: continue # Leave out inner classes if create_class_callgraph: insert_class(caller_class_name, set([callee_class_name]), set()) insert_class(callee_class_name, set(), set([caller_class_name])) elif row_split[0] == "M": # method level call # row_split[1] = Class of caller method # row_split[2] = caller method<whitespace>calltype and class of callee method # row_split[3] = callee method split_2 = row_split[2].split(" ") split_3 = split_2[1].split(")") if _is_external_class(dataset, row_split[1]) or _is_external_class( dataset, split_3[1]): continue caller_method = split_2[0] callee_method = row_split[3] if _is_constructor(caller_method) or _is_constructor( callee_method): continue if _is_access(caller_method) or _is_access(callee_method): continue caller_class = _extract_name(row_split[1]) callee_class = _extract_name(split_3[1]) if "$" in caller_class or "$" in callee_class: continue # Leave out references to inner classes # call_type = split_3[0][1] split_4 = caller_method.split("(") caller_name = split_4[0] caller_param = [] if not split_4[1].startswith(")"): # params existing caller_param = _split_param( split_4[1][:-1]) # Leave out last character, which is a ) split_5 = callee_method.split("(") callee_name = split_5[0] callee_param = [] if not split_5[1].startswith(")"): # params existing callee_param = _split_param(split_5[1].replace( '\r', '').replace( '\n', '')[:-1]) # Leave out last character, which is ) caller_dict_key = build_class_method_param_dict_key( caller_class, caller_name, caller_param) callee_dict_key = build_class_method_param_dict_key( callee_class, callee_name, callee_param) # called_by = caller_dict_key # calls = callee_dict_key insert_entry(caller_dict_key, caller_class, caller_name, caller_param, set(), set([callee_dict_key])) insert_entry(callee_dict_key, callee_class, callee_name, callee_param, set([caller_dict_key]), set()) else: log.error("Unknow start character: " + row_split[0]) remove_external_calls() # convert all sets to lists since set is not json serializable if create_class_callgraph: for entry in class_call_graph: class_call_graph[entry][CALLS] = list( class_call_graph[entry][CALLS]) class_call_graph[entry][CALLED_BY] = list( class_call_graph[entry][CALLED_BY]) FileUtil.write_to_json(output_class_callgraph, class_call_graph) for entry in method_call_graph: method_call_graph[entry][CALLS] = list(method_call_graph[entry][CALLS]) method_call_graph[entry][CALLED_BY] = list( method_call_graph[entry][CALLED_BY]) FileUtil.write_to_json(output_method_callgraph, method_call_graph)
email_api_key, subject="[查询失败] %s %s" % (DateTimeUtil.get_current_datetime(is_date=True), svc_input)) email.add_attachment(log_file_path) email.send(cc_mode=logger.has_error) if __name__ == '__main__': while True: print "请输入7位查询码,未知位用?代替,比如ABCEF??(符号为英文符号)" required_file_path = [ history_zipfile, config_yml_path, invalid_history_file_path ] start = True for f in required_file_path: if not FileUtil.is_path_existed(f): print "请把程序运行文件放到程序运行文件夹下" start = False break if start: line = sys.stdin.readline() svc_input = line.split()[0] configs = FileUtil.read_file(config_yml_path, isYML=True) if len(svc_input) != 7: print "需要7位查询码" elif configs is None: print "请把正确的配置文件放到程序运行文件夹下" else: wild_card_count = 0 for w in svc_input: if w not in letters:
def __init__(self, file_path, file_vector=None): self.file_path = file_path self.file_vector = file_vector self.file_name = FileUtil.get_filename_from_path(self.file_path)
logger.error("[查询失败] 已发送报告 请等待解决") logger.error("%s\n%s" % (e, traceback.format_exc())) logger.save(log_file_path) email_api_key = configs["email_api_key"] email = Email(email_api_key, subject="[查询失败] %s %s" % (DateTimeUtil.get_current_datetime(is_date=True), svc_input)) email.add_attachment(log_file_path) email.send(cc_mode=logger.has_error) if __name__ == '__main__': while True: print "请输入7位查询码,未知位用?代替,比如ABCEF??(符号为英文符号)" required_file_path = [history_zipfile, config_yml_path, invalid_history_file_path] start = True for f in required_file_path: if not FileUtil.is_path_existed(f): print "请把程序运行文件放到程序运行文件夹下" start = False break if start: line = sys.stdin.readline() svc_input = line.split()[0] configs = FileUtil.read_file(config_yml_path, isYML=True) if len(svc_input) != 7: print "需要7位查询码" elif configs is None: print "请把正确的配置文件放到程序运行文件夹下" else: wild_card_count = 0 for w in svc_input: if w not in letters:
def __init__(self, file_path): self.file_path = file_path self.file_name = FileUtil.get_filename_from_path(file_path)
def tokenize(self, file_path): text_lines = FileUtil.read_textfile_into_lines_list(file_path, self._dataset.encoding()) uc_name_words = [] uc_actor_words = [] uc_precond_words = [] uc_postcond_words = [] uc_description_words = [] uc_quality_req_words = [] uc_flow_of_events_words = [] last_word_category = uc_description_words # Default for line in text_lines: line = line.lstrip() # Remove leading white spaces/tabs if self._dataset.UC_NAME_TEMPLATE_REGEX.match(line): matched_string = self._dataset.UC_NAME_TEMPLATE_REGEX.match(line).group(0) uc_name_words += self.tokenize_to_string_list(line[len(matched_string):]) last_word_category = uc_name_words elif self._dataset.UC_DESCRIPTION_TEMPLATE_REGEX.match(line): matched_string = self._dataset.UC_DESCRIPTION_TEMPLATE_REGEX.match(line).group(0) uc_description_words += self.tokenize_to_string_list(line[len(matched_string):]) last_word_category = uc_description_words elif self._dataset.UC_ACTOR_TEMPLATE_REGEX.match(line): matched_string = self._dataset.UC_ACTOR_TEMPLATE_REGEX.match(line).group(0) uc_actor_words += self.tokenize_to_string_list(line[len(matched_string):]) last_word_category = uc_actor_words elif self._dataset.UC_PRECONDITION_TEMPLATE_REGEX.match(line): matched_string = self._dataset.UC_PRECONDITION_TEMPLATE_REGEX.match(line).group(0) uc_precond_words += self.tokenize_to_string_list(line[len(matched_string):]) last_word_category = uc_precond_words elif self._dataset.UC_POSTCONDITION_TEMPLATE_REGEX.match(line): matched_string = self._dataset.UC_POSTCONDITION_TEMPLATE_REGEX.match(line).group(0) uc_postcond_words += self.tokenize_to_string_list(line[len(matched_string):]) last_word_category = uc_postcond_words elif self._dataset.UC_FLOW_OF_EVENTS_TEMPLATE_REGEX.match(line): matched_string = self._dataset.UC_FLOW_OF_EVENTS_TEMPLATE_REGEX.match(line).group(0) uc_flow_of_events_words += self.tokenize_to_string_list(line[len(matched_string):]) last_word_category = uc_flow_of_events_words elif self._dataset.UC_QUALI_REQ_TEMPLATE_REGEX.match(line): matched_string = self._dataset.UC_QUALI_REQ_TEMPLATE_REGEX.match(line).group(0) uc_quality_req_words += self.tokenize_to_string_list(line[len(matched_string):]) last_word_category = uc_quality_req_words elif self._dataset.UC_USER_TEMPLATE_REGEX.match(line): # part of flow of events matched_string = self._dataset.UC_USER_TEMPLATE_REGEX.match(line).group(0) uc_flow_of_events_words += self.tokenize_to_string_list(line[len(matched_string):]) last_word_category = uc_flow_of_events_words elif self._dataset.UC_SYSTEM_TEMPLATE_REGEX.match(line): # part of flow of events matched_string = self._dataset.UC_SYSTEM_TEMPLATE_REGEX.match(line).group(0) uc_flow_of_events_words += self.tokenize_to_string_list(line[len(matched_string):]) last_word_category = uc_flow_of_events_words else: last_word_category += self.tokenize_to_string_list(line) complete_uc_flow_of_events_words_string = " ".join(uc_flow_of_events_words) if self._italian: uc_flow_of_events_words = [word_tokenize(" ".join(sent.split("'")), language="italian") for sent in sent_tokenize(complete_uc_flow_of_events_words_string, language="italian")] else: uc_flow_of_events_words = [word_tokenize(sent) for sent in sent_tokenize(complete_uc_flow_of_events_words_string)] return UseCaseFileRepresentation(file_path, uc_name_words, uc_description_words, uc_actor_words, uc_precond_words, uc_postcond_words, uc_flow_of_events_words, uc_quality_req_words)
def tokenize(self, file_path) -> FileRepresentation: text_as_string = FileUtil.read_textfile_into_string(file_path, self._dataset.encoding()) if self._italian: return TextFileRepresentation(sent_tokenize(text_as_string, language="italian"), file_path) return TextFileRepresentation(sent_tokenize(text_as_string), file_path)
def class_callgraph(self): return FileUtil.read_from_json(self.EANCI_CLASS_CALLGRAPH_PATH)