def load_business_data(self, business_file): business_item_hash = {} print_debug("Starting to read business file from:" + business_file, INFO_INT) f_bus = open(business_file) count = 0 rating = 0 for line in f_bus: business_line = json.loads(line) business_item = Business(business_line) business_item_hash[business_item.business_id] = business_item count+=1 rating+=business_item.stars print_debug("Reading record business_id: " + str(business_item.business_id), DEBUG_INT) all_bus_avg_rating = float(rating)/count print_debug("Total business line read: " + str(count), INFO_INT) print_debug("Total unique business item read: " + str(len(business_item_hash.keys())), INFO_INT) print_debug("All business avg rating: " + str(all_bus_avg_rating), INFO_INT) data = { 'business_item_hash' : business_item_hash, 'all_bus_avg_rating' : all_bus_avg_rating } self.data = { 'data' : data}
def load_user_data(self, user_file): user_item_hash = {} print_debug("Starting to read user file from:" + user_file, INFO_INT) f_bus = open(user_file) count = 0 total_stars = 0 total_review = 0 for line in f_bus: user_line = json.loads(line) user_item = User(user_line) user_item_hash[user_item.user_id] = user_item stars = user_item.average_stars total_stars += stars total_review += user_item.review_count if user_item.votes: self.total_funny += user_item.funny self.total_cool += user_item.cool self.total_useful += user_item.useful count += 1 print_debug("Reading record user_id: " + str(user_item.user_id), DEBUG_INT) self.user_item_hash = user_item_hash self.total_review_count = total_review self.count = count self.total_rating_count = total_stars f_bus.close()
def load_business_data(self, business_file): business_item_hash = {} print_debug("Starting to read business file from:" + business_file, INFO_INT) f_bus = open(business_file) count = 0 rating = 0 for line in f_bus: business_line = json.loads(line) business_item = Business(business_line) business_item_hash[business_item.business_id] = business_item count += 1 rating += business_item.stars print_debug( "Reading record business_id: " + str(business_item.business_id), DEBUG_INT) all_bus_avg_rating = float(rating) / count print_debug("Total business line read: " + str(count), INFO_INT) print_debug( "Total unique business item read: " + str(len(business_item_hash.keys())), INFO_INT) print_debug("All business avg rating: " + str(all_bus_avg_rating), INFO_INT) data = { 'business_item_hash': business_item_hash, 'all_bus_avg_rating': all_bus_avg_rating } self.data = {'data': data}
def load_user_data(self, user_file): user_item_hash = {} print_debug("Starting to read user file from:" + user_file, INFO_INT) f_bus = open(user_file) count = 0 total_rating = 0 for line in f_bus: user_line = json.loads(line) user_item = User(user_line) user_item_hash[user_item.user_id] = user_item count += 1 total_rating += user_item.average_stars print_debug("Reading record user_id: " + str(user_item.user_id), DEBUG_INT) all_user_avg_rating = float(total_rating) / count print_debug("Total users line read: " + str(count), INFO_INT) print_debug("Total unique user item read: " + str(len(user_item_hash.keys())), INFO_INT) print_debug("All user avg rating: " + str(all_user_avg_rating), INFO_INT) data = { 'user_item_hash' : user_item_hash, 'all_user_avg_rating' : all_user_avg_rating } self.data = { 'data' : data}
def load_user_data(self, user_file): user_item_hash = {} print_debug("Starting to read user file from:" + user_file, INFO_INT) f_bus = open(user_file) count = 0 total_stars = 0 total_review = 0 for line in f_bus: user_line = json.loads(line) user_item = User(user_line) user_item_hash[user_item.user_id] = user_item stars = user_item.average_stars total_stars+=stars total_review+= user_item.review_count if user_item.votes: self.total_funny += user_item.funny self.total_cool += user_item.cool self.total_useful += user_item.useful count+=1 print_debug("Reading record user_id: " + str(user_item.user_id), DEBUG_INT) self.user_item_hash = user_item_hash self.total_review_count = total_review self.count = count self.total_rating_count = total_stars f_bus.close()
def load_user_data(self, user_file): user_item_hash = {} print_debug("Starting to read user file from:" + user_file, INFO_INT) f_bus = open(user_file) count = 0 total_rating = 0 for line in f_bus: user_line = json.loads(line) user_item = User(user_line) user_item_hash[user_item.user_id] = user_item count += 1 total_rating += user_item.average_stars print_debug("Reading record user_id: " + str(user_item.user_id), DEBUG_INT) all_user_avg_rating = float(total_rating) / count print_debug("Total users line read: " + str(count), INFO_INT) print_debug( "Total unique user item read: " + str(len(user_item_hash.keys())), INFO_INT) print_debug("All user avg rating: " + str(all_user_avg_rating), INFO_INT) data = { 'user_item_hash': user_item_hash, 'all_user_avg_rating': all_user_avg_rating } self.data = {'data': data}
def load_review_data(self, review_file): review_item_hash = {} print_debug("Starting to read review file from:" + review_file, INFO_INT) f_bus = open(review_file) for line in f_bus: review_line = json.loads(line) review_item = Review(review_line) review_item_hash[str(review_item.user_id) + "-" + str(review_item.business_id)] = review_item print_debug("Reading record review_id: " + str(review_item.review_id), DEBUG_INT) f_bus.close() self.review_item_hash = review_item_hash
def get_global_avg_rating_matrix(self): # f_write = open("../data/user_business.matrix", 'w') # """ returns a very basic user to business rating matrix # Returns a user to business rating matrix by replacing unknown ratings # with average user rating, average business rating # """ dim_user = len(self.user_manager.get_all_item_keys()) dim_bus = len(self.business_manager.get_all_item_keys()) print "user dimension: " + str(dim_user) print "business dimension: " + str(dim_bus) # initialize the final user business matrix with zeros # rating_matrix = zeros((dim_user, dim_user)) print_debug("Generating the user business avg rating matrix", INFO_INT) index = 0 mod_val = 10000 all_user_avg_rating = self.user_manager.get_all_user_avg_rating() all_bus_avg_rating = self.business_manager.get_all_business_avg_rating( ) index_i = 0 output_line = "" for user_id in self.user_manager.get_all_item_keys(): index_j = 0 user_row_array = zeros((1, dim_bus)) for business_id in self.business_manager.get_all_item_keys(): key = "-".join([user_id, business_id]) index += 1 # if ( index % mod_val == 0): # print(".",) rating = 0 if self.review_manager.is_exist_user_business_rating( user_id, business_id): rating = self.review_manager.get_user_business_rating(key) else: rating = self.review_manager.get_business_avg_rating(business_id) +\ all_user_avg_rating - self.user_manager.get_user_avg_rating(user_id) +\ all_bus_avg_rating - self.business_manager.get_business_avg_rating(business_id) # print str(index_i) + ":" + str(index_j) + " " + "user:business_id:" + str(user_id) + ":" + str(business_id) + "\t" + str(rating) output_line += str(rating) + "," #user_row_array[index_i, index_j] = rating index_j += 1 #rating_matrix[index_i,:] = user_row_array if (index_i % 100 == 0): print str(index_i) index_i += 1
def get_global_avg_rating_matrix(self): # f_write = open("../data/user_business.matrix", 'w') # """ returns a very basic user to business rating matrix # Returns a user to business rating matrix by replacing unknown ratings # with average user rating, average business rating # """ dim_user = len(self.user_manager.get_all_item_keys()) dim_bus = len(self.business_manager.get_all_item_keys()) print "user dimension: " + str(dim_user) print "business dimension: " + str(dim_bus) # initialize the final user business matrix with zeros # rating_matrix = zeros((dim_user, dim_user)) print_debug("Generating the user business avg rating matrix", INFO_INT) index = 0 mod_val = 10000 all_user_avg_rating = self.user_manager.get_all_user_avg_rating() all_bus_avg_rating = self.business_manager.get_all_business_avg_rating() index_i = 0 output_line = "" for user_id in self.user_manager.get_all_item_keys(): index_j = 0 user_row_array = zeros((1,dim_bus)) for business_id in self.business_manager.get_all_item_keys(): key = "-".join([user_id, business_id]) index+=1 # if ( index % mod_val == 0): # print(".",) rating = 0 if self.review_manager.is_exist_user_business_rating(user_id, business_id): rating = self.review_manager.get_user_business_rating(key) else: rating = self.review_manager.get_business_avg_rating(business_id) +\ all_user_avg_rating - self.user_manager.get_user_avg_rating(user_id) +\ all_bus_avg_rating - self.business_manager.get_business_avg_rating(business_id) # print str(index_i) + ":" + str(index_j) + " " + "user:business_id:" + str(user_id) + ":" + str(business_id) + "\t" + str(rating) output_line+=str(rating) + "," #user_row_array[index_i, index_j] = rating index_j+=1 #rating_matrix[index_i,:] = user_row_array if(index_i % 100 == 0): print str(index_i) index_i+=1
def load_business_data(self, business_file): business_item_hash = {} print_debug("Starting to read business file from:" + business_file, INFO_INT) f_bus = open(business_file) count = 0 cat_count = 0 total_review_count = 0 for line in f_bus: business_line = json.loads(line) business_item = Business(business_line) business_item_hash[business_item.business_id] = business_item cat_count = self.update_categories(business_item, self.category_hash, cat_count) total_review_count+=business_item.review_count count+=1 print_debug("Reading record business_id: " + str(business_item.business_id), DEBUG_INT) self.total_review_count= total_review_count self.count = count self.business_item_hash = business_item_hash
def load_review_data(self, review_file): total_stars = 0 total_reviews = 0 review_item_hash = {} user_business_rating = {} total_business_rating_by_user = {} total_business_rating_by_user_count = {} total_user_rating_in_business = {} total_user_rating_in_business_count = {} print_debug("Starting to read review file from:" + review_file, INFO_INT) f_bus = open(review_file) count = 0 for line in f_bus: review_line = json.loads(line) review_item = Review(review_line) review_item_hash[review_item.review_id] = review_item key = self.user_business_key(review_item) user_business_rating[key] = review_item count += 1 #increment user rating self.increment_hash_count( total_user_rating_in_business, review_item.user_id, review_item.stars, ) self.increment_hash_count( total_user_rating_in_business_count, review_item.user_id, 1, ) #increment business rating self.increment_hash_count( total_business_rating_by_user, review_item.business_id, review_item.stars, ) #increment business rating self.increment_hash_count( total_business_rating_by_user_count, review_item.business_id, 1, ) total_stars += review_item.stars total_reviews += 1 print_debug( "Reading record review_id: " + str(review_item.review_id), DEBUG_INT) f_bus.close() # print_debug("Total reviews line read: " + str(count), INFO_INT) # print_debug("Total unique review item read: " + str(len(review_item_hash.keys())), INFO_INT) # print_debug("Total stars: " + str(self.get_total_stars()), INFO_INT) # print_debug("Avg stars rating:" + str(self.get_average_review()), INFO_INT) data = { 'review_item_hash': review_item_hash, 'user_business_rating': user_business_rating, 'total_business_rating_by_user': total_business_rating_by_user, 'total_business_rating_by_user_count': total_business_rating_by_user_count, 'total_user_rating_in_business': total_user_rating_in_business, 'total_user_rating_in_business_count': total_user_rating_in_business_count, 'total_reviews': total_reviews, 'total_stars': total_stars } self.data = {'data': data}
def load_review_data(self, review_file): total_stars = 0 total_reviews = 0 review_item_hash = {} user_business_rating = {} total_business_rating_by_user = {} total_business_rating_by_user_count = {} total_user_rating_in_business = {} total_user_rating_in_business_count = {} print_debug("Starting to read review file from:" + review_file, INFO_INT) f_bus = open(review_file) count = 0 for line in f_bus: review_line = json.loads(line) review_item = Review(review_line) review_item_hash[review_item.review_id] = review_item key = self.user_business_key(review_item) user_business_rating[key] = review_item count+=1 #increment user rating self.increment_hash_count( total_user_rating_in_business, review_item.user_id, review_item.stars, ) self.increment_hash_count( total_user_rating_in_business_count, review_item.user_id, 1, ) #increment business rating self.increment_hash_count( total_business_rating_by_user, review_item.business_id, review_item.stars, ) #increment business rating self.increment_hash_count( total_business_rating_by_user_count, review_item.business_id, 1, ) total_stars += review_item.stars total_reviews += 1 print_debug("Reading record review_id: " + str(review_item.review_id), DEBUG_INT) f_bus.close() # print_debug("Total reviews line read: " + str(count), INFO_INT) # print_debug("Total unique review item read: " + str(len(review_item_hash.keys())), INFO_INT) # print_debug("Total stars: " + str(self.get_total_stars()), INFO_INT) # print_debug("Avg stars rating:" + str(self.get_average_review()), INFO_INT) data = { 'review_item_hash' : review_item_hash, 'user_business_rating' : user_business_rating, 'total_business_rating_by_user' : total_business_rating_by_user, 'total_business_rating_by_user_count': total_business_rating_by_user_count, 'total_user_rating_in_business' : total_user_rating_in_business, 'total_user_rating_in_business_count' : total_user_rating_in_business_count, 'total_reviews' : total_reviews, 'total_stars' : total_stars } self.data = { 'data' : data}