def text_tag(): # # 获得text_tag_model text_tag_recommend_model = gx_text_tag_continue_model(new_old) text_tag_recommend_model.prepare() text_tag_model = text_tag_recommend_model.get_model() # 训练测试 text_tag_model = train_model( text_tag_recommend_model, text_tag_model, train_data, test_data, *new_Para.param.train_paras) # 'monitor loss&acc' text_tag_recommend_model.show_slt_apis_tag_features(a_dataset.train_data) print('show_slt_apis_tag_features, done!') if if_whole: # 整个模型 text_tag_MLP_only_continue_recommend_model = gx_text_tag_continue_only_MLP_model( new_old) # 为搭建模型做准备 text_tag_MLP_only_continue_recommend_model.prepare( text_tag_recommend_model) text_tag_MLP_only_continue_model = text_tag_MLP_only_continue_recommend_model.get_model( text_tag_model) train_model(text_tag_MLP_only_continue_recommend_model, text_tag_MLP_only_continue_model, train_data, test_data, *new_Para.param.train_paras) """
def CI_NI_fineTuning(): args = data_repository.get_args() train_data, test_data = data_repository.get_ds( ).train_data, data_repository.get_ds().test_data CI_recommend_model = CI_Model(args) CI_model_obj = CI_recommend_model.get_model() CI_model_obj = train_model(CI_recommend_model, CI_model_obj, train_data, test_data, args.train_mode, args.train_new)
def text_only_oldScene(a_dataset): # 测试text_only model和only_MLP whole_model text_recommend_model = gx_text_only_model() text_model = text_recommend_model.get_model() text_model = train_model(text_recommend_model, text_model, a_dataset.train_data, a_dataset.test_data, *new_Para.param.train_paras) # 'monitor loss&acc' print('train text_only_model, done!') # 基于text_only model的 text feature 继续训练模型 text_only_MLP_recommend_model = gx_text_only_MLP_model( 'feature_cosine') # 传入相似度计算方法 text_only_MLP_recommend_model.prepare(text_recommend_model) text_only_MLP_model = text_only_MLP_recommend_model.get_model(text_model) train_model(text_only_MLP_recommend_model, text_only_MLP_model, a_dataset.train_data, a_dataset.test_data, *new_Para.param.train_paras) print('train gx_text_only_MLP_model, done!') """
def bl_PNCF(a_dataset): mlp_modes = ['MLP'] # 'MLP', concate_modes = ['concate'] # 'multiply',,'concate' text_mode = 'HDP' for mlp_mode in mlp_modes: for concate_mode in concate_modes: pncf_rec_model = PNCF_model(mlp_mode=mlp_mode, concate_mode=concate_mode, text_mode=text_mode) pncf_model = pncf_rec_model.get_model() pncf_model = train_model(pncf_rec_model, pncf_model, a_dataset.train_data, a_dataset.test_data, *new_Para.param.train_paras)
def DINRec(a_dataset, new_old='new'): train_data, test_data = a_dataset.train_data, a_dataset.test_data CI_recommend_model = CI_Model(new_old) # 'old' CI_recommend_model.prepare() CI_model_obj = CI_recommend_model.get_model() CI_model_obj = train_model( CI_recommend_model, CI_model_obj, train_data, test_data, *new_Para.param.train_paras ) # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc' DINRec_model = DIN_Rec(CI_recommend_model, new_Para.param.predict_fc_unit_nums) DINRec_model.prepare() DINRec_model_obj = DINRec_model.get_model() DINRec_model_obj = train_model( DINRec_model, DINRec_model_obj, train_data, test_data, *new_Para.param.train_paras ) # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc' evalute_by_epoch(DINRec_model, DINRec_model_obj, DINRec_model.simple_name, test_data, if_save_recommend_result=True, evaluate_by_slt_apiNum=True)
def bl_DHSR(a_dataset): dhsr_recommend_model = DHSR_model() dhsr_model = dhsr_recommend_model.get_model() # a_dataset.transfer() # 将重复sample删除? 'newScene'且need_slt_apis=False时 train_data, test_data = get_train_test_data(a_dataset.train_data, a_dataset.test_data) dhsr_model = train_model(dhsr_recommend_model, dhsr_model, train_data, test_data, *new_Para.param.train_paras) # 'monitor loss&acc' dhsr_recommend_model.save_sth() evalute_by_epoch( dhsr_recommend_model, dhsr_model, dhsr_recommend_model.model_name, test_data ) # ,if_save_recommend_result=True,evaluate_by_slt_apiNum = True)
def NI_online(): # 可以用于CI,NI,topMLP,ft等 # HINRec_model = HINRec(model_name='IsRec_best',semantic_mode='TF_IDF', epoch_num=40, neighbor_size=15,topTopicNum=3,cluster_mode='LDA',cluster_mode_topic_num=50) HINRec_model = HINRec(model_name='PasRec', epoch_num=40, neighbor_size=15, topTopicNum=3) CI_recommend_model = CI_Model(new_old) # 'old' # CI_recommend_model.prepare() # CI_model_obj = CI_recommend_model.get_model() # CI_model_obj = train_model(CI_recommend_model, CI_model_obj,train_data,test_data,*new_Para.param.train_paras) # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc' # evalute_by_epoch(CI_recommend_model, CI_model_obj, CI_recommend_model.model_name, test_data, # if_save_recommend_result=True, evaluate_by_slt_apiNum=True) # analyze_result(CI_recommend_model, new_Para.param.topKs) # CI_recommend_model.show_slt_apis_tag_features(a_dataset.train_data) # 检查中间feature结果 # # CI_recommend_model.get_slt_apis_mid_features(train_data,test_data) # 存储所有样本的attention的中间结果,为deepFm准备 # # CI_recommend_model.save_for_deepFM() # # 调优NI的score # for pruned_neighbor_baseScore in [0,0.2,0.3]: # # NI_OL_recommend_model = NI_Model(new_old, if_implict=True, if_explict=False, # if_correlation=False, # pruned_neighbor_baseScore=pruned_neighbor_baseScore) # sim_model = HINRec_model if new_Para.param.NI_OL_mode == 'IsRec_best_Sim' else None # CI_recommend_model # NI_OL_recommend_model.prepare(sim_model, train_data, test_data) # NI_OL_model_obj = NI_OL_recommend_model.get_model() # NI_OL_model_obj = train_model(NI_OL_recommend_model, NI_OL_model_obj, train_data, test_data, # *new_Para.param.train_paras, # true_candidates_dict=HINRec_model.get_true_candi_apis()) NI_OL_recommend_model = NI_Model_online( 'new', if_implict=True, if_explict=False, if_correlation=False) # 'new' ,pruned_neighbor_baseScore = 0 # 构建即可,读取之前训练好的相似度数据 # HINRec_model = HINRec(model_name='IsRec_best',semantic_mode='TF_IDF', epoch_num=40, neighbor_size=15,topTopicNum=3,cluster_mode='LDA',cluster_mode_topic_num=50) # 'IsRec_best_Sim' sim_model = CI_recommend_model if new_Para.param.NI_OL_mode == 'tagSim' else HINRec_model # NI_OL_recommend_model.prepare(sim_model, train_data, test_data) NI_OL_model_obj = NI_OL_recommend_model.get_model() NI_OL_model_obj = train_model( NI_OL_recommend_model, NI_OL_model_obj, train_data, test_data, *new_Para.param.train_paras ) # ,true_candidates_dict=HINRec_model.get_true_candi_apis()
def test_PNCF_doubleTower_OR_DIN(): CI_recommend_model, NI_OL_recommend_model = get_preTrain_CINI_model() # PNCF_recommend_model = PNCF_doubleTower(CI_recommend_model= CI_recommend_model,CI_model=CI_model_obj,NI_recommend_model=NI_OL_recommend_model,NI_model1=NI_OL_model_obj) # PNCF_model_obj = PNCF_recommend_model.get_model() # PNCF_model_obj = train_model(PNCF_recommend_model, PNCF_model_obj, train_data, test_data,*new_Para.param.train_paras) # evalute_by_epoch(PNCF_recommend_model, PNCF_model_obj, PNCF_recommend_model.model_name, test_data, if_save_recommend_result=True, evaluate_by_slt_apiNum=True) # analyze_result(PNCF_recommend_model, new_Para.param.topKs) # model_name = 'MLP_embedding'!!! 'DINRec' DINRec_recommend_model = DINRec_model( CI_recommend_model=CI_recommend_model, NI_recommend_model=NI_OL_recommend_model ) # !!! ,model_name='MLP_embedding' DINRec_model_obj = DINRec_recommend_model.get_model() DINRec_model_obj = train_model(DINRec_recommend_model, DINRec_model_obj, train_data, test_data, *new_Para.param.train_paras)
def bl_DHSR_new(a_dataset): train_datas, test_datas = a_dataset.transfer_false_test_DHSR( if_reduct_train=True) # 是否约减训练集 # 选择的服务数目不同,训练对应的模型,并评估效果 for slt_num in range(1, new_Para.param.slt_item_num + 1): train_data, test_data = train_datas[slt_num - 1], test_datas[slt_num - 1] # old_new = 'new','new_sigmoid', 'new_reduct'效果最好 dhsr_recommend_model = DHSR_model(old_new='new_reduct', slt_num=slt_num) dhsr_model = dhsr_recommend_model.get_model() dhsr_model = train_model( dhsr_recommend_model, dhsr_model, train_data, test_data, *new_Para.param.train_paras) # 'monitor loss&acc' evalute_by_epoch(dhsr_recommend_model, dhsr_model, dhsr_recommend_model.model_name, test_data, evaluate_by_slt_apiNum=True) dhsr_recommend_model.save_sth() print('DHSR, slt_num:{}, train_predict,done!'.format(slt_num))
def newDeepFM(): CI_recommend_model = CI_Model(new_old) # 'old' CI_recommend_model.prepare() HINRec_model = HINRec(model_name='PasRec', epoch_num=40, neighbor_size=15, topTopicNum=3) sim_model = CI_recommend_model if new_Para.param.NI_OL_mode == 'tagSim' else HINRec_model # NI_OL_recommend_model = NI_Model_online( new_old, if_implict=True, if_explict=False, if_correlation=False) # 'new' ,pruned_neighbor_baseScore = 0 NI_OL_recommend_model.prepare(sim_model, train_data, test_data) mashup_NI_features = NI_OL_recommend_model.mid_sltAids_2NI_feas api_NI_features = NI_OL_recommend_model.i_factors_matrix NI_feas = mashup_NI_features, api_NI_features if not os.path.exists(CI_recommend_model.ma_text_tag_feas_path): # 如果特征的存储文件不存在,再加载模型,退出重新运行 CI_model_obj = CI_recommend_model.get_model() CI_model_obj = train_model( CI_recommend_model, CI_model_obj, train_data, test_data, *new_Para.param.train_paras ) # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc' CI_feas = CI_recommend_model.get_mashup_api_features( CI_recommend_model.all_mashup_num, CI_recommend_model.all_api_num + 1) # 最后一个是填充虚拟api的特征 print('re-run the program!') else: CI_feas = CI_recommend_model.get_mashup_api_features( CI_recommend_model.all_mashup_num, CI_recommend_model.all_api_num + 1) run_new_deepFM(CI_feas, NI_feas, train_data, test_data, CI_recommend_model.all_api_num, epoch_num=10)
def co_trainCINI(): # 参数完全随机化,联合训练CI和NI CI_recommend_model = CI_Model(new_old) CI_recommend_model.prepare() CI_model_obj = CI_recommend_model.get_model() NI_OL_recommend_model = NI_Model_online(new_old, if_implict=True, if_explict=False, if_correlation=False) # CI_recommend_model = CI_recommend_model if 'Sim' in new_Para.param.NI_OL_mode else None 只训练NI时,OL_GE不需要CI NI_OL_recommend_model.prepare(CI_recommend_model) NI_OL_model_obj = NI_OL_recommend_model.get_model() co_trainCINI_recommend_model = fine_Tune(CI_recommend_model, CI_model_obj, NI_OL_recommend_model, NI_OL_model_obj, model_mode='co_train', lr=0.001) # 0.0003 co_trainCINI_model = co_trainCINI_recommend_model.get_model() co_trainCINI_model = train_model(co_trainCINI_recommend_model, co_trainCINI_model, train_data, test_data, *new_Para.param.train_paras)
def get_preTrain_CINI_model(): HINRec_model = HINRec(model_name='PasRec', epoch_num=40, neighbor_size=15, topTopicNum=3) CI_recommend_model = CI_Model(new_old) # 'old' CI_recommend_model.prepare() CI_model_obj = CI_recommend_model.get_model() CI_model_obj = train_model( CI_recommend_model, CI_model_obj, train_data, test_data, *new_Para.param.train_paras ) # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc' NI_OL_recommend_model = NI_Model_online( new_old, if_implict=True, if_explict=False, if_correlation=False) # 'new' ,pruned_neighbor_baseScore = 0 # 构建即可,读取之前训练好的相似度数据 # HINRec_model = HINRec(model_name='IsRec_best',semantic_mode='TF_IDF', epoch_num=40, neighbor_size=15,topTopicNum=3,cluster_mode='LDA',cluster_mode_topic_num=50) # 'IsRec_best_Sim' sim_model = CI_recommend_model if new_Para.param.NI_OL_mode == 'tagSim' else HINRec_model # NI_OL_recommend_model.prepare(sim_model, train_data, test_data) return CI_recommend_model, NI_OL_recommend_model
def test_simModes(a_dataset, new_old='new', if_few=False): if if_few: train_data, test_data = a_dataset.get_few_samples(128) print(type(train_data)) print(type(test_data)) else: train_data, test_data = a_dataset.train_data, a_dataset.test_data HINRec_model = HINRec(model_name=new_Para.param.NI_OL_mode, epoch_num=20, neighbor_size=15, topTopicNum=3) # 'IsRec_best' 这个是预训练的相似度模型 CI_recommend_model = CI_Model(new_old) # 'old' CI_recommend_model.prepare() CI_model_obj = CI_recommend_model.get_model() CI_model_obj = train_model( CI_recommend_model, CI_model_obj, train_data, test_data, *new_Para.param.train_paras ) # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc' # evalute_by_epoch(CI_recommend_model, CI_model_obj, CI_recommend_model.simple_name, test_data, # if_save_recommend_result=True, evaluate_by_slt_apiNum=True) # 为no_slt CI设计 # CI_model_obj = train_model(CI_recommend_model, CI_model_obj, train_data, a_dataset.test_data_no_reduct, # *new_Para.param.train_paras) # # evalute_by_epoch(CI_recommend_model, CI_model_obj, CI_recommend_model.simple_name, a_dataset.test_data_no_reduct, # if_save_recommend_result=True, evaluate_by_slt_apiNum=True) sim_model = CI_recommend_model if new_Para.param.NI_OL_mode == 'tagSim' else HINRec_model # NI_OL_recommend_model = NI_Model_online( new_old, if_implict=True, if_explict=False, if_correlation=False, eachPath_topK=True) # 'new' ,pruned_neighbor_baseScore = 0 # 构建即可,读取之前训练好的相似度数据 NI_OL_recommend_model.prepare_sims(sim_model, train_data, test_data) NI_OL_model_obj = NI_OL_recommend_model.get_model() NI_OL_model_obj = train_model(NI_OL_recommend_model, NI_OL_model_obj, train_data, test_data, *new_Para.param.train_paras) # # # # explicit # explicit_NI_recommend_model = NI_Model(new_old,if_implict=False,if_explict=True,if_correlation=False) # explicit_NI_recommend_model.prepare(sim_model,train_data, test_data) # NI的模型搭建需要CI模型生成所有mashup/api的feature # explict_NI_model_obj = explicit_NI_recommend_model.get_model() # explict_NI_model_obj = train_model(explicit_NI_recommend_model, explict_NI_model_obj,train_data, test_data, *new_Para.param.train_paras) # evalute_by_epoch(NI_OL_recommend_model, NI_OL_model_obj, NI_OL_recommend_model.simple_name, a_dataset.test_data, # if_save_recommend_result=True, evaluate_by_slt_apiNum=True) # 专门为no_slt NI设计 # NI_OL_model_obj = train_model(NI_OL_recommend_model, NI_OL_model_obj, train_data, a_dataset.test_data_no_reduct, # *new_Para.param.train_paras) # # evalute_by_epoch(NI_OL_recommend_model, NI_OL_model_obj, NI_OL_recommend_model.simple_name, a_dataset.test_data_no_reduct, # if_save_recommend_result=True, evaluate_by_slt_apiNum=True) # # # # # # # CI+ implict top_MLP_recommend_model = top_MLP( CI_recommend_model, CI_model_obj, NI_recommend_model1=NI_OL_recommend_model, NI_model1=NI_OL_model_obj) top_MLP_model = top_MLP_recommend_model.get_model() top_MLP_model = train_model(top_MLP_recommend_model, top_MLP_model, train_data, test_data, *new_Para.param.train_paras) top_MLP_recommend_model.save_sth() # 存储训练测试过程中使用的所有实例的中间结果
def __run_training(self, event): if self.model_name == "Train": train_model(hyper_params=self.hyper_params) time.sleep(3)