def __init__(self, pro_name, version): self.model_dir_path = PathUtil.sim_model(pro_name=pro_name, version=version, model_type="svm") self.model = FilterSemanticTFIDFNode2VectorModel( name="svm", model_dir_path=self.model_dir_path) self.document_collection_path = PathUtil.doc(pro_name, version) self.collection = MultiFieldDocumentCollection.load( str(self.document_collection_path)) self.processor = Preprocessor() self.doc_collection = PreprocessMultiFieldDocumentCollection.create_from_doc_collection( self.processor, self.collection) self.pretrain_node2vec_path = PathUtil.node2vec(pro_name=pro_name, version=version, weight="unweight") self.kg_name_searcher_path = PathUtil.name_searcher(pro_name, version) self.doc_sim_model_path = PathUtil.sim_model(pro_name=pro_name, version=version, model_type="avg_w2v")
def train_model(pro_name, version, weight): document_collection_path = PathUtil.doc(pro_name, version) collection = MultiFieldDocumentCollection.load(str(document_collection_path)) processor = CodeDocPreprocessor() doc_collection = PreprocessMultiFieldDocumentCollection.create_from_doc_collection(processor, collection) graph_data_path = PathUtil.graph_data(pro_name=pro_name, version=version) pretrain_node2vec_path = PathUtil.node2vec(pro_name=pro_name, version=version, weight=weight) embedding_size = 100 kg_name_searcher_path = PathUtil.name_searcher(pro_name=pro_name, version=version) model_dir_path = PathUtil.sim_model(pro_name=pro_name, version=version, model_type="avg_n2v") model = AVGNode2VectorModel.train(model_dir_path=model_dir_path, doc_collection=doc_collection, embedding_size=embedding_size, pretrain_node2vec_path=pretrain_node2vec_path, graph_data_path=graph_data_path, kg_name_searcher_path=kg_name_searcher_path, ) return model_dir_path