def __init__(self, k): super(RatingGetter, self).__init__() self.config = ConfigX() self.k_current = k self.user = {} self.item = {} self.all_User = {} self.all_Item = {} self.id2user = {} self.id2item = {} self.dataSet_u = defaultdict(dict) self.trainSet_u = defaultdict(dict) self.trainSet_i = defaultdict(dict) self.testSet_u = defaultdict( dict) # used to store the test set by hierarchy user:[item,rating] self.testSet_i = defaultdict( dict) # used to store the test set by hierarchy item:[user,rating] self.testColdUserSet_u = defaultdict( dict) # cold start users in test set self.trainHotUserSet = [] # hot users in train set self.trainSetLength = 0 self.testSetLength = 0 self.userMeans = {} # used to store the mean values of users's ratings self.itemMeans = {} # used to store the mean values of items's ratings self.globalMean = 0 self.generate_data_set() # generate train and test set self.getDataSet() self.get_data_statistics() self.get_cold_start_users()
def __init__(self): super(RatingGetter, self).__init__() self.config = ConfigX() self.user = {} self.item = {} self.all_User = {} self.all_Item = {} self.id2user = {} self.id2item = {} self.trainSet_u = defaultdict(dict) self.trainSet_i = defaultdict(dict) self.testSet_u = defaultdict(dict) # used to store the test set by hierarchy user:[item,rating] self.testSet_i = defaultdict(dict) # used to store the test set by hierarchy item:[user,rating] self.testColdUserSet_u = defaultdict(dict) # 用来存储冷启动用户test的rating信息 self.trainHotUserSet = [] # 训练充足的用户 in train set self.trainSetLength = 0 self.testSetLength = 0 self.userMeans = {} # used to store the mean values of users's ratings self.itemMeans = {} # used to store the mean values of items's ratings self.globalMean = 0 self.generate_data_set() # 生成训练、测试集 self.get_data_statistics() # 统计user,item评分均值以及全局均值 self.get_cold_start_users() # 获得冷启动的user self.get_full_users() # 获得训练充分的user
def __init__(self): self.dataSet_u = defaultdict(dict) self.dataSet_i = defaultdict(dict) self.CUNet = defaultdict(list) self.CINet = defaultdict(list) self.config = ConfigX() self.CU_file = "../data/net/%s_CUnet.txt" % self.config.dataset_name self.CI_file = "../data/net/%s_CInet.txt" % self.config.dataset_name
def __init__(self): super(SimBase, self).__init__() self.config = ConfigX() self.user_sim = SimMatrix() self.item_sim = SimMatrix() self.user_k_neibor = defaultdict(dict) self.item_k_neibor = defaultdict(dict)
def __init__(self): super(MF, self).__init__() self.config = ConfigX() self.rg = RatingGetter() # loading raing data # self.init_model() self.iter_rmse = [] self.iter_mae = [] pass
def __init__(self): super(MF, self).__init__() self.config = ConfigX() cpprint(self.config.__dict__) #print the configuration # self.rg = RatingGetter() # loading raing data # self.init_model() self.iter_rmse = [] self.iter_mae = [] pass
def __init__(self): super(DataStatis, self).__init__() self.config = ConfigX() self.rg = RatingGetter() # loading raing data self.tg = TrustGetter() self.cold_rating = 0 self.cold_social = 0 self.cold_rating_social = 0 self.cold_rating_warm_social = 0 self.warm_rating_cold_social = 0 self.warm_rating_warm_social = 0
def __init__(self): super(TrustGetter, self).__init__() self.config = ConfigX() self.user = {} # used to store the order of users self.relations = self.get_relations() self.followees = defaultdict(dict) self.followers = {} self.matrix_User = {} self.matrix_Item = {} self.generate_data_set()
def __init__(self): super(SimGe, self).__init__() self.config = ConfigX() self.config.walkCount = 30 self.config.walkLength = 20 self.config.walkDim = 20 self.config.winSize = 5 self.config.topK = 50 self.user_sim = SimMatrix() self.item_sim = SimMatrix() self.user_k_neibor = defaultdict(dict) self.item_k_neibor = defaultdict(dict)
def __init__(self, fixseed = True): super(MF, self).__init__() self.config = ConfigX() self.configc = ConfigCUC() cpprint(self.config.__dict__) #print the configuration # 打印数据统计 print_data_file_stats(self.config.rating_path) print_data_file_stats(self.config.trust_path) if fixseed: np.random.seed(seed=self.config.random_state) # 固定随机种子 # self.rg = RatingGetter() # loading raing data # self.init_model() self.iter_rmse = [] self.iter_mae = [] pass
def __init__(self, k): """ k - fold number 运行 第几个 fold """ super(MetaGetter, self).__init__() self.log = logging.getLogger('reader.MetaGetter') self.config = ConfigX() self.configc = ConfigCUC() self.k_current = k self.meta = {} #key: id,value: 流水号 self.item = {} #key: id,value: 流水号 self.all_Meta = {} # 相当于 meta self.all_Item = {} # 相当于 item self.id2meta = {} #key: 流水号,value: id self.id2item = {} #key: 流水号,value: id self.dataSet_m = defaultdict(dict) self.trainSet_m = defaultdict(dict) self.trainSet_i = defaultdict(dict) self.testSet_m = defaultdict( dict) # used to store the test set by hierarchy meta:[item,rating] self.testSet_i = defaultdict( dict) # used to store the test set by hierarchy item:[meta,rating] self.testColdMetaSet_m = defaultdict( dict) # cold start metas in test set self.trainHotMetaSet = [] # hot metas in train set self.trainSetLength = 0 self.testSetLength = 0 self.metaMeans = {} # used to store the mean values of metas's meta self.itemMeans = {} # used to store the mean values of items's meta self.globalMean = 0 self.generate_data_set() # generate train and test set self.getDataSet() self.get_data_statistics() self.get_cold_start_metas() self.log.debug(" 准备好各种结构,便于使用 ") self.log.debug("all_Meta len: %s " % len(self.all_Meta)) self.log.debug("all_Item len: %s " % len(self.all_Item)) self.log.debug("meta len: %s " % len(self.meta)) self.log.debug("item len: %s " % len(self.item)) self.log.debug("id2meta len: %s " % len(self.id2meta)) self.log.debug("id2item len: %s " % len(self.id2item)) self.log.debug("嵌套 dict trainSet_m len: %s " % len(self.trainSet_m)) self.log.debug("嵌套 dict trainSet_i len: %s " % len(self.trainSet_i))
self.loss += 0.5 * self.config.lambdaP * (self.P * self.P).sum( ) + 0.5 * self.config.lambdaQ * (self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break if __name__ == '__main__': # srg = SocialReg() # srg.train_model(0) # coldrmse = srg.predict_model_cold_users() # print('cold start user rmse is :' + str(coldrmse)) # srg.show_rmse() configx = ConfigX() configx.k_fold_num = 5 configx.rating_path = "../data/ft_ratings.txt" configx.rating_cv_path = "../data/cv/" split_5_folds(configx) rmses = [] maes = [] tcsr = SocialReg() # print(bmf.rg.trainSet_u[1]) for i in range(tcsr.config.k_fold_num): print('the %dth cross validation training' % i) tcsr.train_model(i) rmse, mae = tcsr.predict_model() rmses.append(rmse)
# encoding:utf-8 import sys sys.path.append("..") import numpy as np from numpy.linalg import norm from configx.configx import ConfigX config = ConfigX() def l1(x): return norm(x, ord=1) def l2(x): return norm(x) def normalize(rating, minVal=config.min_val, maxVal=config.max_val): 'get the normalized value using min-max normalization' if maxVal > minVal: return float(rating - minVal) / (maxVal - minVal) + 0.01 elif maxVal == minVal: return rating / maxVal else: print('error... maximum value is less than minimum value.') raise ArithmeticError
def roulette(_datas, _ps): return np.random.choice(_datas, p=_ps) def save_walks(walks, result_path): with open(result_path, "w") as f: for walk in walks: f.writelines(' '.join(walk) + '\n') pass if __name__ == '__main__': g = Graph() c = ConfigX() number_walks = 5 path_length = 5 G = load_edgelist(c.trust_path, undirected=True) walks = build_deepwalk_corpus(G, number_walks, path_length) # cpprint(walks) save_walks(walks, "../data/social_corpus.txt") print("Number of nodes: {}".format(len(G.nodes()))) num_walks = len(G.nodes()) * number_walks print("Number of walks: {}".format(num_walks)) data_size = num_walks * path_length