def getUsers(): ret = [] with open("data/edinburgh_restaurant_users.json") as f: data = f.readlines() data = map(lambda x: x.rstrip(), data) data_json_str = "[" + ','.join(data) + "]" data_df = pd.read_json(data_json_str) for i in range(0, data_df.shape[0]): datai = data_df.iloc[i] u = User() u.setVoteCount(sum(map(int, datai['votes'].values()))) u.setEliteNum(len(datai["elite"])) for friend in datai["friends"]: u.addFriend(friend) u.setID(datai["user_id"]) u.setFanCount(datai["fans"]) u.setFriendCount(len(datai["friends"])) u.setReviewCount(datai["review_count"]) u.setYelpingSince(datai["yelping_since"]) ret.append(u) return ret
if fav_id != 0: break u.addFavorite(fav_id) #a long-tailed number of friends num_friends = int(round(random.lognormvariate(d, .5))) for i in range(num_friends): #long-tailed distribution on friends friend_id = 0 while True: friend_id = (1 + int(round(random.lognormvariate(4, 5)))) % 10000 if friend_id != 0: break u.addFriend(friend_id) #long-tailed number of purchases num_purchases = int(round(random.lognormvariate(d / 2, .1))) - 2 for i in range(num_purchases): pruch_id = 0 while True: purch_id = (1 + int(round(random.lognormvariate(4, 4.5)))) % 50000 if purch_id != 0: break u.addPurchase(purch_id) #print "%d\t%d\t%d\t%d" %(u.id, len(u.favorites), len(u.friends), len(u.purchases)) print u.toJson()
if fav_id != 0: break u.addFavorite(fav_id) #a long-tailed number of friends num_friends = int(round(random.lognormvariate(d, .5))) for i in range(num_friends): #long-tailed distribution on friends friend_id = 0 while True: friend_id = (1+int(round(random.lognormvariate(4, 5))))%10000 if friend_id != 0: break u.addFriend(friend_id) #long-tailed number of purchases num_purchases = int(round(random.lognormvariate(d/2, .1))) - 2 for i in range(num_purchases): pruch_id = 0 while True: purch_id = (1+int(round(random.lognormvariate(4, 4.5))))%50000 if purch_id != 0: break u.addPurchase(purch_id) #print "%d\t%d\t%d\t%d" %(u.id, len(u.favorites), len(u.friends), len(u.purchases))