def transfer_false_test_MF(self): # 只需要返回一个train_mashup_api_list # 把测试集的数据转化为训练集,按照已选服务个数划分 # 分别训练测试,得到1,2,3场景下的指标 # 正例训练集 train_mashup_id_list, train_api_id_list = [], [] Mid_Aid_set = set() _zip = zip(self.train_data[0], self.train_data[1]) train_labels = self.train_data[-1] for index, Mid_Aid_pair in enumerate(_zip): if train_labels[index] and tuple( Mid_Aid_pair) not in Mid_Aid_set: # 正例且之前未出现过 train_mashup_id_list.append(Mid_Aid_pair[0]) train_api_id_list.append(Mid_Aid_pair[1]) def certain_slt_num_split(train_mashup_id_list, train_api_id_list, slt_num): # 测试集 set_ = set() # 存储某个mashup,某个长度已选的数据的集合 test_mashup_id_list, test_api_id_list, grounds = [], [], [] for index, test_mashup_ids in enumerate(self.test_mashup_id_list): m_id, slt_api_ids = test_mashup_ids[0], self.test_slt_ids[ index] if len(slt_api_ids) == slt_num and ( m_id, len(slt_api_ids)) not in set_: train_mashup_id_list.extend([m_id] * slt_num) # 测试集中已选的服务作为正例,还有负例 train_api_id_list.extend(slt_api_ids) # 同时也需要测试,跟原来格式相同 test_mashup_id_list.append(test_mashup_ids) test_api_id_list.append(self.test_api_id_list[index]) grounds.append(self.grounds[index]) self.train_data.append( list(zip(train_mashup_id_list, train_api_id_list))) # 供get_U_V使用 self.test_data.append( (test_mashup_id_list, test_api_id_list, grounds)) self.train_data, self.test_data = [], [ ] # 改变格式,按照测试集已选的数目,生成几个不同的训练和测试 for i in range(1, self.args.slt_item_num + 1): print('slt_num:', i) print('before, train samples:{}'.format( len(self.train_mashup_id_list))) certain_slt_num_split(list(train_mashup_id_list), list(train_api_id_list), i) print('after, train samples:{}'.format(len(self.train_data[-1]))) true_train_set_path = os.path.join( self.data_root, 'train_set_MF_{}.data'.format(i)) save_2D_list(true_train_set_path, self.train_data[i - 1]) # 把训练集(加上了某个长度的测试集)存起来,java处理 print('transfer for MF,done!') return self.train_data, self.test_data
def save_true_train_data(self): # 存储训练集中的正例,供lirbec使用 # 但是选择的服务可能不同,所以同一个m_id,a_id对可能出现多次??? 所以新场景下的MF不用这个数据处理方法? true_train_set_path = os.path.join(self.root_path, 'train_set.data') if not os.path.exists(true_train_set_path): true_train_mashup_api_pairs =[] for index,label in enumerate(self.train_labels): if label: true_train_mashup_api_pairs.append(self.train_mashup_api_list[index]) save_2D_list(true_train_set_path,true_train_mashup_api_pairs) return true_train_mashup_api_pairs
def show_text_tag_features(self, train_data, show_num=10): """ 检查生成的mashup和api的text和tag的特征是否正常 """ if self.old_new == 'old': m_ids, a_ids = train_data[:-1] instances_tuple = self.get_instances(m_ids[:show_num], a_ids[:show_num]) elif self.old_new == 'new': m_ids, a_ids, slt_a_ids = train_data[:-1] instances_tuple = self.get_instances(m_ids[:show_num], a_ids[:show_num], slt_a_ids[:show_num]) text_tag_middle_model = Model( inputs=[*self.model.inputs], outputs=[ *self.model.get_layer('all_content_concatenate').input[:4] ]) mashup_text_features, apis_text_features, mashup_tag_features, apis_tag_features = text_tag_middle_model.predict( [*instances_tuple], verbose=0) mashup_text_features_path = os.path.join(self.model_dir, 'mashup_text_features.dat') apis_text_features_path = os.path.join(self.model_dir, 'apis_text_features.dat') mashup_tag_features_path = os.path.join(self.model_dir, 'mashup_tag_features.dat') apis_tag_features_path = os.path.join(self.model_dir, 'apis_tag_features.dat') save_2D_list(mashup_text_features_path, mashup_text_features, 'a+') save_2D_list(apis_text_features_path, apis_text_features, 'a+') save_2D_list(mashup_tag_features_path, mashup_tag_features, 'a+') save_2D_list(apis_tag_features_path, apis_tag_features, 'a+')
def save(self): # 存储训练测试样本集 save_split_train(self.train_instances_path, self.train_mashup_api_list, self.train_labels) save_test_instance(self.test_instances_path, self.test_mashup_id_list, self.test_api_id_list) save_2D_list(self.all_ground_api_ids_path, self.grounds) # 新场景多存储slt_ids if new_Para.param.data_mode == 'newScene': save_2D_list(self.train_slt_ids_path, self.slt_api_ids_instances) save_2D_list(self.test_slt_ids_path, self.test_slt_ids)