def __init__(self, lemmatizer_type=LemmatizerType.english_nltk): self._lemmatizer_type = lemmatizer_type self._lemmatizer = None if lemmatizer_type == self.LemmatizerType.english_nltk: self._lemmatizer = WordNetLemmatizer() elif lemmatizer_type == self.LemmatizerType.english_spacy: # Use precalculated files for spacy since free google colab can't handle fasttext model and spacy lemmatizer at once if not FileUtil.file_exists(PRECALCULATED_SPACY_ENGLISH_LEMMA_CSV): log.error( f"{PRECALCULATED_SPACY_ENGLISH_LEMMA_CSV} does not exists. The spacy lemmatizer needs an precalculated lemma file." ) self._lemmatizer = PandasUtil.read_csv_to_dataframe( PRECALCULATED_SPACY_ENGLISH_LEMMA_CSV) elif lemmatizer_type == self.LemmatizerType.italian_nltk: self._lemmatizer = SnowballStemmer("italian") elif lemmatizer_type == self.LemmatizerType.italian_spacy: # Use precalculated files for spacy since free google colab can't handle fasttext model and spacy lemmatizer at once if not FileUtil.file_exists(PRECALCULATED_SPACY_ITALIAN_LEMMA_CSV): log.error( f"{PRECALCULATED_SPACY_ITALIAN_LEMMA_CSV} does not exists. The spacy lemmatizer needs an precalculated lemma file." ) self._lemmatizer = PandasUtil.read_csv_to_dataframe( PRECALCULATED_SPACY_ITALIAN_LEMMA_CSV) else: log.error(f"Unknown case for LemmatizerType: {lemmatizer_type}")
def _run(self, final_thresholds, maj_thresholds, matrix_file_path=None, artifact_map_file_path=None): if not matrix_file_path: matrix_file_path = self.default_matrix_path() if not artifact_map_file_path: artifact_map_file_path = self._default_a2eMap_path() if not FileUtil.file_exists(matrix_file_path): log.error( f"File does not exists: {matrix_file_path}\n" f"Please pass a valid file path or call {self.__class__.__name__}().precalculate() first" ) if not FileUtil.file_exists(artifact_map_file_path): log.error( f"File does not exists: {artifact_map_file_path}\n" f"Please pass a valid file path or call {self.__class__.__name__}().precalculate() first" ) trace_link_data_structure = ElementLevelTraceLinkDataStructure.load_data_from( matrix_file_path, artifact_map_file_path) trace_link_processor = MajProcessor(trace_link_data_structure, self.similarity_filter, self.req_reduce_func, self.code_reduce_function, final_thresholds, maj_thresholds, self.callgraph_aggregator) return trace_link_processor.run()