def train(self, training_data_t: pd.DataFrame, training_data_f: pd.DataFrame, user_id=None): self.copula_dict = {} self.marg_dict = {} self.kl_dict = {} max_kl_value = -1000 self.kl_dict = {'sum': 0} for score_type in util.DEFAULT_SCORE_TYPE_LIST: marg = marginal.Norm(training_data_t[score_type]) self.marg_dict[score_type] = marg kl = util.kl_divergence_between_population_and_users( marg, score_type) self.kl_dict['sum'] += kl self.kl_dict[score_type] = kl if kl > max_kl_value: max_kl_value = kl self.main_axis = score_type self.kl_dict['sum'] -= max_kl_value main_marg = marginal.Norm(training_data_t[self.main_axis]) main_cdf_list = [ main_marg.cdf(x) for x in training_data_t[self.main_axis] ] for score_type in util.DEFAULT_SCORE_TYPE_LIST: if score_type == self.main_axis: continue marginal_cdf_list_list = [ main_cdf_list, [marg.cdf(x) for x in training_data_t[score_type]] ] cdf_matrix = np.matrix(marginal_cdf_list_list).T self.copula_dict[score_type] = copula.Copula( cdf_matrix, self.cop)
def adhoc_task(): ROLE1 = [7, 12] ROLE2 = [1, 2, 5, 6] ROLE3 = [8, 9, 10] ROLE4 = [3, 4, 11] user_all = [] user_all.append(pd.read_json(measure.InputDir+"/user1_kfolded.json")) user_all.append(pd.read_json(measure.InputDir+"/user2_kfolded.json")) user_all.append(pd.read_json(measure.InputDir+"/user3_kfolded.json")) user_all.append(pd.read_json(measure.InputDir+"/user4_kfolded.json")) user_all.append(pd.read_json(measure.InputDir+"/user5_kfolded.json")) user_all.append(pd.read_json(measure.InputDir+"/user6_kfolded.json")) user_all.append(pd.read_json(measure.InputDir+"/user7_kfolded.json")) user_all.append(pd.read_json(measure.InputDir+"/user8_kfolded.json")) user_all.append(pd.read_json(measure.InputDir+"/user9_kfolded.json")) user_all.append(pd.read_json(measure.InputDir+"/user10_kfolded.json")) user_all.append(pd.read_json(measure.InputDir+"/user11_kfolded.json")) user_all.append(pd.read_json(measure.InputDir+"/user12_kfolded.json")) user_norm = [{x:marginal.Norm(user[x]) for x in DEFAULT_SCORE_TYPE_LIST} for user in user_all] dic = {} for x in DEFAULT_SCORE_TYPE_LIST: dic[x] = 0 for i in ROLE4: norm_dict = user_norm[i-1] for x in DEFAULT_SCORE_TYPE_LIST: dic[x] += np.log1p(kl_divergence_between_population_and_users(norm_dict[x], x)) for k, v in dic.items(): men = v / len(ROLE4) print(k, men)
def train(self, training_data_t: pd.DataFrame, training_data_f: pd.DataFrame, user_id): marg_dict = {} kl_dict = {} max_kl_value = 0 for score_type in util.DEFAULT_SCORE_TYPE_LIST: marg = marginal.Norm(training_data_t[score_type]) marg_dict[score_type] = marg kl = util.kl_divergence_between_population_and_users( marg, score_type) kl_dict[score_type] = kl if kl > max_kl_value: max_kl_value = kl self.main_axis = score_type self.marg_dict = marg_dict print(kl_dict.values()) new_kl_dict = {k: v for k, v in kl_dict.items() if v > 0.05} self.score_type_list = [ k for k, v in sorted(new_kl_dict.items(), key=lambda x: x[1]) ] if len(self.score_type_list) == 1: self.score_type_list = util.DEFAULT_SCORE_TYPE_LIST else: kl_dict = new_kl_dict #todo remove for score_type in self.score_type_list: print(score_type, kl_dict[score_type]) #todo remove nested_copula_list = [] for i in range(0, len(self.score_type_list) - 1): former_kl = kl_dict[self.score_type_list[i]] current_kl = kl_dict[self.score_type_list[i + 1]] nested_copula_list.append( copula.Copula(np.matrix([]), 'gumbel', param=1 + self.param_a * np.log1p(current_kl), dim=2)) self.nested_copula_list = nested_copula_list
def train(self, training_data_t: pd.DataFrame, training_data_f: pd.DataFrame, user_id=None): self.copula_dict = {} self.marg_dict = {} self.kl_dict = {} max_kl_value = -1000 self.kl_dict = {'sum': 0} score_type_list = [] for score_type in util.DEFAULT_SCORE_TYPE_LIST: marg = marginal.Norm(training_data_t[score_type]) self.marg_dict[score_type] = marg kl = util.kl_divergence_between_population_and_users( marg, score_type) print(score_type, kl) self.kl_dict['sum'] += kl self.kl_dict[score_type] = kl if kl > max_kl_value: max_kl_value = kl self.main_axis = score_type print(self.main_axis) self.kl_dict['sum'] -= max_kl_value main_marg = marginal.Norm(training_data_t[self.main_axis]) main_cdf_list = [ main_marg.cdf(x) for x in training_data_t[self.main_axis] ] for score_type in self.score_type_list: if score_type == self.main_axis: continue target = [self.main_axis, score_type] clust = models.create_cluster(training_data_t, self.n_clusters, target) self.copula_dict[ score_type] = models.create_weight_and_scoring_model_list( clust, self.marg, self.cop, target, [score_type], []) each_copula_cdf_list_list = [] each_copula_cdf_list_list2 = [] each_copula_cdf_list_list3 = [] for score_type in self.score_type_list: if score_type == self.main_axis: continue each_copula_cdf_list_list.append([]) each_copula_cdf_list_list2.append([]) each_copula_cdf_list_list3.append([]) for index, row in training_data_t.iterrows(): #main_cdf = self.marg_dict[self.main_axis].cdf(row[self.main_axis]) cnt = 0 for score_type in self.score_type_list: if score_type == self.main_axis: continue main_cdf = 0 sub_cdf = 0 cop_cdf = 0 for weight_and_scoring_model in self.copula_dict[score_type]: weight = weight_and_scoring_model[0] score_model = weight_and_scoring_model[1] marginal_cdf_list = [] for axis in [self.main_axis, score_type]: marginal_score_model = score_model[axis] marg_cdf = marginal_score_model.cdf(row[axis]) marginal_cdf_list.append(marg_cdf) if axis != self.main_axis: sub_cdf += weight * marg_cdf else: main_cdf += weight * marg_cdf cop_cdf += score_model['copula'].cdf( np.matrix(marginal_cdf_list)) * weight each_copula_cdf_list_list[cnt].append(cop_cdf) each_copula_cdf_list_list2[cnt].append(cop_cdf * sub_cdf) each_copula_cdf_list_list3[cnt].append(cop_cdf * main_cdf * sub_cdf) cnt += 1 cop_mat1 = np.matrix(each_copula_cdf_list_list).T cop_mat2 = np.matrix(each_copula_cdf_list_list2).T cop_mat3 = np.matrix(each_copula_cdf_list_list3).T if self.indep_copulaed: pass else: self.top_copula1 = copula.Copula(cop_mat1, self.cop) self.top_copula2 = copula.Copula(cop_mat2, self.cop) self.top_copula3 = copula.Copula(cop_mat3, self.cop)