예제 #1
0
def text_tag():
    # # 获得text_tag_model
    text_tag_recommend_model = gx_text_tag_continue_model(new_old)
    text_tag_recommend_model.prepare()
    text_tag_model = text_tag_recommend_model.get_model()

    # 训练测试
    text_tag_model = train_model(
        text_tag_recommend_model, text_tag_model, train_data, test_data,
        *new_Para.param.train_paras)  # 'monitor loss&acc'

    text_tag_recommend_model.show_slt_apis_tag_features(a_dataset.train_data)
    print('show_slt_apis_tag_features, done!')

    if if_whole:
        # 整个模型
        text_tag_MLP_only_continue_recommend_model = gx_text_tag_continue_only_MLP_model(
            new_old)
        # 为搭建模型做准备
        text_tag_MLP_only_continue_recommend_model.prepare(
            text_tag_recommend_model)
        text_tag_MLP_only_continue_model = text_tag_MLP_only_continue_recommend_model.get_model(
            text_tag_model)

        train_model(text_tag_MLP_only_continue_recommend_model,
                    text_tag_MLP_only_continue_model, train_data, test_data,
                    *new_Para.param.train_paras)
        """
예제 #2
0
def CI_NI_fineTuning():
    args = data_repository.get_args()
    train_data, test_data = data_repository.get_ds(
    ).train_data, data_repository.get_ds().test_data

    CI_recommend_model = CI_Model(args)
    CI_model_obj = CI_recommend_model.get_model()
    CI_model_obj = train_model(CI_recommend_model, CI_model_obj, train_data,
                               test_data, args.train_mode, args.train_new)
예제 #3
0
def text_only_oldScene(a_dataset):
    # 测试text_only model和only_MLP whole_model
    text_recommend_model = gx_text_only_model()
    text_model = text_recommend_model.get_model()
    text_model = train_model(text_recommend_model, text_model,
                             a_dataset.train_data, a_dataset.test_data,
                             *new_Para.param.train_paras)  # 'monitor loss&acc'
    print('train text_only_model, done!')

    # 基于text_only model的 text feature 继续训练模型
    text_only_MLP_recommend_model = gx_text_only_MLP_model(
        'feature_cosine')  # 传入相似度计算方法
    text_only_MLP_recommend_model.prepare(text_recommend_model)
    text_only_MLP_model = text_only_MLP_recommend_model.get_model(text_model)
    train_model(text_only_MLP_recommend_model, text_only_MLP_model,
                a_dataset.train_data, a_dataset.test_data,
                *new_Para.param.train_paras)
    print('train gx_text_only_MLP_model, done!')
    """
예제 #4
0
def bl_PNCF(a_dataset):

    mlp_modes = ['MLP']  # 'MLP',
    concate_modes = ['concate']  # 'multiply',,'concate'
    text_mode = 'HDP'
    for mlp_mode in mlp_modes:
        for concate_mode in concate_modes:
            pncf_rec_model = PNCF_model(mlp_mode=mlp_mode,
                                        concate_mode=concate_mode,
                                        text_mode=text_mode)
            pncf_model = pncf_rec_model.get_model()
            pncf_model = train_model(pncf_rec_model, pncf_model,
                                     a_dataset.train_data, a_dataset.test_data,
                                     *new_Para.param.train_paras)
예제 #5
0
def DINRec(a_dataset, new_old='new'):
    train_data, test_data = a_dataset.train_data, a_dataset.test_data
    CI_recommend_model = CI_Model(new_old)  # 'old'
    CI_recommend_model.prepare()
    CI_model_obj = CI_recommend_model.get_model()
    CI_model_obj = train_model(
        CI_recommend_model, CI_model_obj, train_data, test_data,
        *new_Para.param.train_paras
    )  # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc'

    DINRec_model = DIN_Rec(CI_recommend_model,
                           new_Para.param.predict_fc_unit_nums)
    DINRec_model.prepare()
    DINRec_model_obj = DINRec_model.get_model()
    DINRec_model_obj = train_model(
        DINRec_model, DINRec_model_obj, train_data, test_data,
        *new_Para.param.train_paras
    )  # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc'
    evalute_by_epoch(DINRec_model,
                     DINRec_model_obj,
                     DINRec_model.simple_name,
                     test_data,
                     if_save_recommend_result=True,
                     evaluate_by_slt_apiNum=True)
예제 #6
0
def bl_DHSR(a_dataset):
    dhsr_recommend_model = DHSR_model()
    dhsr_model = dhsr_recommend_model.get_model()

    # a_dataset.transfer() # 将重复sample删除?  'newScene'且need_slt_apis=False时

    train_data, test_data = get_train_test_data(a_dataset.train_data,
                                                a_dataset.test_data)
    dhsr_model = train_model(dhsr_recommend_model, dhsr_model, train_data,
                             test_data,
                             *new_Para.param.train_paras)  # 'monitor loss&acc'
    dhsr_recommend_model.save_sth()
    evalute_by_epoch(
        dhsr_recommend_model, dhsr_model, dhsr_recommend_model.model_name,
        test_data
    )  # ,if_save_recommend_result=True,evaluate_by_slt_apiNum = True)
예제 #7
0
def NI_online():  # 可以用于CI,NI,topMLP,ft等
    # HINRec_model = HINRec(model_name='IsRec_best',semantic_mode='TF_IDF', epoch_num=40, neighbor_size=15,topTopicNum=3,cluster_mode='LDA',cluster_mode_topic_num=50)
    HINRec_model = HINRec(model_name='PasRec',
                          epoch_num=40,
                          neighbor_size=15,
                          topTopicNum=3)

    CI_recommend_model = CI_Model(new_old)  # 'old'
    # CI_recommend_model.prepare()
    # CI_model_obj = CI_recommend_model.get_model()
    # CI_model_obj = train_model(CI_recommend_model, CI_model_obj,train_data,test_data,*new_Para.param.train_paras)  # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc'
    # evalute_by_epoch(CI_recommend_model, CI_model_obj, CI_recommend_model.model_name, test_data,
    #                  if_save_recommend_result=True, evaluate_by_slt_apiNum=True)
    # analyze_result(CI_recommend_model, new_Para.param.topKs)

    # CI_recommend_model.show_slt_apis_tag_features(a_dataset.train_data) # 检查中间feature结果
    # # CI_recommend_model.get_slt_apis_mid_features(train_data,test_data) # 存储所有样本的attention的中间结果,为deepFm准备
    # # CI_recommend_model.save_for_deepFM()
    #

    # 调优NI的score
    # for pruned_neighbor_baseScore in [0,0.2,0.3]: #
    #     NI_OL_recommend_model = NI_Model(new_old, if_implict=True, if_explict=False,
    #                                             if_correlation=False,
    #                                             pruned_neighbor_baseScore=pruned_neighbor_baseScore)
    #     sim_model = HINRec_model if new_Para.param.NI_OL_mode == 'IsRec_best_Sim' else None  # CI_recommend_model
    #     NI_OL_recommend_model.prepare(sim_model, train_data, test_data)
    #     NI_OL_model_obj = NI_OL_recommend_model.get_model()
    #     NI_OL_model_obj = train_model(NI_OL_recommend_model, NI_OL_model_obj, train_data, test_data,
    #                                             *new_Para.param.train_paras,
    #                                             true_candidates_dict=HINRec_model.get_true_candi_apis())

    NI_OL_recommend_model = NI_Model_online(
        'new', if_implict=True, if_explict=False,
        if_correlation=False)  # 'new' ,pruned_neighbor_baseScore = 0
    # 构建即可,读取之前训练好的相似度数据
    # HINRec_model = HINRec(model_name='IsRec_best',semantic_mode='TF_IDF', epoch_num=40, neighbor_size=15,topTopicNum=3,cluster_mode='LDA',cluster_mode_topic_num=50)
    # 'IsRec_best_Sim'
    sim_model = CI_recommend_model if new_Para.param.NI_OL_mode == 'tagSim' else HINRec_model  #
    NI_OL_recommend_model.prepare(sim_model, train_data, test_data)
    NI_OL_model_obj = NI_OL_recommend_model.get_model()
    NI_OL_model_obj = train_model(
        NI_OL_recommend_model, NI_OL_model_obj, train_data, test_data,
        *new_Para.param.train_paras
    )  # ,true_candidates_dict=HINRec_model.get_true_candi_apis()
예제 #8
0
def test_PNCF_doubleTower_OR_DIN():
    CI_recommend_model, NI_OL_recommend_model = get_preTrain_CINI_model()

    # PNCF_recommend_model = PNCF_doubleTower(CI_recommend_model= CI_recommend_model,CI_model=CI_model_obj,NI_recommend_model=NI_OL_recommend_model,NI_model1=NI_OL_model_obj)
    # PNCF_model_obj = PNCF_recommend_model.get_model()
    # PNCF_model_obj = train_model(PNCF_recommend_model, PNCF_model_obj, train_data, test_data,*new_Para.param.train_paras)

    # evalute_by_epoch(PNCF_recommend_model, PNCF_model_obj, PNCF_recommend_model.model_name, test_data, if_save_recommend_result=True, evaluate_by_slt_apiNum=True)
    # analyze_result(PNCF_recommend_model, new_Para.param.topKs)

    # model_name = 'MLP_embedding'!!! 'DINRec'
    DINRec_recommend_model = DINRec_model(
        CI_recommend_model=CI_recommend_model,
        NI_recommend_model=NI_OL_recommend_model
    )  # !!! ,model_name='MLP_embedding'
    DINRec_model_obj = DINRec_recommend_model.get_model()
    DINRec_model_obj = train_model(DINRec_recommend_model, DINRec_model_obj,
                                   train_data, test_data,
                                   *new_Para.param.train_paras)
예제 #9
0
def bl_DHSR_new(a_dataset):
    train_datas, test_datas = a_dataset.transfer_false_test_DHSR(
        if_reduct_train=True)  # 是否约减训练集
    # 选择的服务数目不同,训练对应的模型,并评估效果
    for slt_num in range(1, new_Para.param.slt_item_num + 1):
        train_data, test_data = train_datas[slt_num - 1], test_datas[slt_num -
                                                                     1]
        # old_new = 'new','new_sigmoid', 'new_reduct'效果最好
        dhsr_recommend_model = DHSR_model(old_new='new_reduct',
                                          slt_num=slt_num)
        dhsr_model = dhsr_recommend_model.get_model()
        dhsr_model = train_model(
            dhsr_recommend_model, dhsr_model, train_data, test_data,
            *new_Para.param.train_paras)  # 'monitor loss&acc'
        evalute_by_epoch(dhsr_recommend_model,
                         dhsr_model,
                         dhsr_recommend_model.model_name,
                         test_data,
                         evaluate_by_slt_apiNum=True)
        dhsr_recommend_model.save_sth()
        print('DHSR, slt_num:{}, train_predict,done!'.format(slt_num))
예제 #10
0
def newDeepFM():
    CI_recommend_model = CI_Model(new_old)  # 'old'
    CI_recommend_model.prepare()

    HINRec_model = HINRec(model_name='PasRec',
                          epoch_num=40,
                          neighbor_size=15,
                          topTopicNum=3)
    sim_model = CI_recommend_model if new_Para.param.NI_OL_mode == 'tagSim' else HINRec_model  #
    NI_OL_recommend_model = NI_Model_online(
        new_old, if_implict=True, if_explict=False,
        if_correlation=False)  # 'new' ,pruned_neighbor_baseScore = 0
    NI_OL_recommend_model.prepare(sim_model, train_data, test_data)
    mashup_NI_features = NI_OL_recommend_model.mid_sltAids_2NI_feas
    api_NI_features = NI_OL_recommend_model.i_factors_matrix
    NI_feas = mashup_NI_features, api_NI_features

    if not os.path.exists(CI_recommend_model.ma_text_tag_feas_path):
        # 如果特征的存储文件不存在,再加载模型,退出重新运行
        CI_model_obj = CI_recommend_model.get_model()
        CI_model_obj = train_model(
            CI_recommend_model, CI_model_obj, train_data, test_data,
            *new_Para.param.train_paras
        )  # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc'
        CI_feas = CI_recommend_model.get_mashup_api_features(
            CI_recommend_model.all_mashup_num,
            CI_recommend_model.all_api_num + 1)  # 最后一个是填充虚拟api的特征
        print('re-run the program!')

    else:
        CI_feas = CI_recommend_model.get_mashup_api_features(
            CI_recommend_model.all_mashup_num,
            CI_recommend_model.all_api_num + 1)

        run_new_deepFM(CI_feas,
                       NI_feas,
                       train_data,
                       test_data,
                       CI_recommend_model.all_api_num,
                       epoch_num=10)
예제 #11
0
def co_trainCINI():  # 参数完全随机化,联合训练CI和NI
    CI_recommend_model = CI_Model(new_old)
    CI_recommend_model.prepare()
    CI_model_obj = CI_recommend_model.get_model()

    NI_OL_recommend_model = NI_Model_online(new_old,
                                            if_implict=True,
                                            if_explict=False,
                                            if_correlation=False)
    # CI_recommend_model = CI_recommend_model if 'Sim' in new_Para.param.NI_OL_mode else None 只训练NI时,OL_GE不需要CI
    NI_OL_recommend_model.prepare(CI_recommend_model)
    NI_OL_model_obj = NI_OL_recommend_model.get_model()

    co_trainCINI_recommend_model = fine_Tune(CI_recommend_model,
                                             CI_model_obj,
                                             NI_OL_recommend_model,
                                             NI_OL_model_obj,
                                             model_mode='co_train',
                                             lr=0.001)  # 0.0003
    co_trainCINI_model = co_trainCINI_recommend_model.get_model()
    co_trainCINI_model = train_model(co_trainCINI_recommend_model,
                                     co_trainCINI_model, train_data, test_data,
                                     *new_Para.param.train_paras)
예제 #12
0
def get_preTrain_CINI_model():
    HINRec_model = HINRec(model_name='PasRec',
                          epoch_num=40,
                          neighbor_size=15,
                          topTopicNum=3)
    CI_recommend_model = CI_Model(new_old)  # 'old'
    CI_recommend_model.prepare()
    CI_model_obj = CI_recommend_model.get_model()
    CI_model_obj = train_model(
        CI_recommend_model, CI_model_obj, train_data, test_data,
        *new_Para.param.train_paras
    )  # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc'

    NI_OL_recommend_model = NI_Model_online(
        new_old, if_implict=True, if_explict=False,
        if_correlation=False)  # 'new' ,pruned_neighbor_baseScore = 0
    # 构建即可,读取之前训练好的相似度数据
    # HINRec_model = HINRec(model_name='IsRec_best',semantic_mode='TF_IDF', epoch_num=40, neighbor_size=15,topTopicNum=3,cluster_mode='LDA',cluster_mode_topic_num=50)
    # 'IsRec_best_Sim'
    sim_model = CI_recommend_model if new_Para.param.NI_OL_mode == 'tagSim' else HINRec_model  #
    NI_OL_recommend_model.prepare(sim_model, train_data, test_data)

    return CI_recommend_model, NI_OL_recommend_model
예제 #13
0
def test_simModes(a_dataset, new_old='new', if_few=False):
    if if_few:
        train_data, test_data = a_dataset.get_few_samples(128)
        print(type(train_data))
        print(type(test_data))
    else:
        train_data, test_data = a_dataset.train_data, a_dataset.test_data

    HINRec_model = HINRec(model_name=new_Para.param.NI_OL_mode,
                          epoch_num=20,
                          neighbor_size=15,
                          topTopicNum=3)
    # 'IsRec_best' 这个是预训练的相似度模型
    CI_recommend_model = CI_Model(new_old)  # 'old'
    CI_recommend_model.prepare()
    CI_model_obj = CI_recommend_model.get_model()
    CI_model_obj = train_model(
        CI_recommend_model, CI_model_obj, train_data, test_data,
        *new_Para.param.train_paras
    )  # ,true_candidates_dict=HINRec_model.get_true_candi_apis() 'monitor loss&acc'
    # evalute_by_epoch(CI_recommend_model, CI_model_obj, CI_recommend_model.simple_name, test_data,
    #                     if_save_recommend_result=True, evaluate_by_slt_apiNum=True)

    # 为no_slt CI设计
    # CI_model_obj = train_model(CI_recommend_model, CI_model_obj, train_data, a_dataset.test_data_no_reduct,
    #                                         *new_Para.param.train_paras)  #
    # evalute_by_epoch(CI_recommend_model, CI_model_obj, CI_recommend_model.simple_name, a_dataset.test_data_no_reduct,
    #                  if_save_recommend_result=True, evaluate_by_slt_apiNum=True)

    sim_model = CI_recommend_model if new_Para.param.NI_OL_mode == 'tagSim' else HINRec_model  #
    NI_OL_recommend_model = NI_Model_online(
        new_old,
        if_implict=True,
        if_explict=False,
        if_correlation=False,
        eachPath_topK=True)  # 'new' ,pruned_neighbor_baseScore = 0
    # 构建即可,读取之前训练好的相似度数据
    NI_OL_recommend_model.prepare_sims(sim_model, train_data, test_data)
    NI_OL_model_obj = NI_OL_recommend_model.get_model()
    NI_OL_model_obj = train_model(NI_OL_recommend_model, NI_OL_model_obj,
                                  train_data, test_data,
                                  *new_Para.param.train_paras)  #
    # # # explicit
    # explicit_NI_recommend_model = NI_Model(new_old,if_implict=False,if_explict=True,if_correlation=False)
    # explicit_NI_recommend_model.prepare(sim_model,train_data, test_data) # NI的模型搭建需要CI模型生成所有mashup/api的feature
    # explict_NI_model_obj = explicit_NI_recommend_model.get_model()
    # explict_NI_model_obj = train_model(explicit_NI_recommend_model, explict_NI_model_obj,train_data, test_data, *new_Para.param.train_paras)

    # evalute_by_epoch(NI_OL_recommend_model, NI_OL_model_obj, NI_OL_recommend_model.simple_name, a_dataset.test_data,
    #                  if_save_recommend_result=True, evaluate_by_slt_apiNum=True)

    # 专门为no_slt NI设计
    # NI_OL_model_obj = train_model(NI_OL_recommend_model, NI_OL_model_obj, train_data, a_dataset.test_data_no_reduct,
    #                                         *new_Para.param.train_paras)  #
    # evalute_by_epoch(NI_OL_recommend_model, NI_OL_model_obj, NI_OL_recommend_model.simple_name, a_dataset.test_data_no_reduct,
    #                  if_save_recommend_result=True, evaluate_by_slt_apiNum=True)
    # #
    # # # # # CI+ implict
    top_MLP_recommend_model = top_MLP(
        CI_recommend_model,
        CI_model_obj,
        NI_recommend_model1=NI_OL_recommend_model,
        NI_model1=NI_OL_model_obj)
    top_MLP_model = top_MLP_recommend_model.get_model()
    top_MLP_model = train_model(top_MLP_recommend_model, top_MLP_model,
                                train_data, test_data,
                                *new_Para.param.train_paras)
    top_MLP_recommend_model.save_sth()  # 存储训练测试过程中使用的所有实例的中间结果
예제 #14
0
 def __run_training(self, event):
     if self.model_name == "Train":
         train_model(hyper_params=self.hyper_params)
         time.sleep(3)