Esempio n. 1
0
	def load_business_data(self, business_file):

		business_item_hash = {}
		print_debug("Starting to read business file from:" + business_file, INFO_INT)
 		f_bus = open(business_file)

 		count = 0
 		rating = 0
 		for line in f_bus:
 			business_line = json.loads(line)
 			business_item = Business(business_line)
 			business_item_hash[business_item.business_id] = business_item
 			count+=1
 			rating+=business_item.stars
 			print_debug("Reading record business_id: " + str(business_item.business_id), DEBUG_INT)

 		all_bus_avg_rating = float(rating)/count
 		print_debug("Total business line  read: " + str(count), INFO_INT)
 		print_debug("Total unique business item read: " + str(len(business_item_hash.keys())), INFO_INT)
 		print_debug("All business avg rating: " + str(all_bus_avg_rating), INFO_INT)
	
 		data = { 'business_item_hash' : business_item_hash, 
				'all_bus_avg_rating' : all_bus_avg_rating
		}
 		
 		
 		self.data = { 'data' :  data}
Esempio n. 2
0
    def load_user_data(self, user_file):

        user_item_hash = {}
        print_debug("Starting to read user file from:" + user_file, INFO_INT)
        f_bus = open(user_file)

        count = 0
        total_stars = 0
        total_review = 0
        for line in f_bus:
            user_line = json.loads(line)
            user_item = User(user_line)
            user_item_hash[user_item.user_id] = user_item
            stars = user_item.average_stars
            total_stars += stars
            total_review += user_item.review_count
            if user_item.votes:
                self.total_funny += user_item.funny
                self.total_cool += user_item.cool
                self.total_useful += user_item.useful

            count += 1
            print_debug("Reading record user_id: " + str(user_item.user_id),
                        DEBUG_INT)

        self.user_item_hash = user_item_hash
        self.total_review_count = total_review
        self.count = count
        self.total_rating_count = total_stars
        f_bus.close()
Esempio n. 3
0
    def load_business_data(self, business_file):

        business_item_hash = {}
        print_debug("Starting to read business file from:" + business_file,
                    INFO_INT)
        f_bus = open(business_file)

        count = 0
        rating = 0
        for line in f_bus:
            business_line = json.loads(line)
            business_item = Business(business_line)
            business_item_hash[business_item.business_id] = business_item
            count += 1
            rating += business_item.stars
            print_debug(
                "Reading record business_id: " +
                str(business_item.business_id), DEBUG_INT)

        all_bus_avg_rating = float(rating) / count
        print_debug("Total business line  read: " + str(count), INFO_INT)
        print_debug(
            "Total unique business item read: " +
            str(len(business_item_hash.keys())), INFO_INT)
        print_debug("All business avg rating: " + str(all_bus_avg_rating),
                    INFO_INT)

        data = {
            'business_item_hash': business_item_hash,
            'all_bus_avg_rating': all_bus_avg_rating
        }

        self.data = {'data': data}
Esempio n. 4
0
	def load_user_data(self, user_file):


		user_item_hash = {}
		print_debug("Starting to read user file from:" + user_file, INFO_INT)
 		f_bus = open(user_file)

 		count = 0
 		total_rating = 0
 		for line in f_bus:
 			user_line = json.loads(line)
 			user_item = User(user_line)
 			user_item_hash[user_item.user_id] = user_item
 			count += 1
 			total_rating += user_item.average_stars
 			print_debug("Reading record user_id: " + str(user_item.user_id), DEBUG_INT)

 		all_user_avg_rating = float(total_rating) / count
 		print_debug("Total users line read: " + str(count), INFO_INT)
 		print_debug("Total unique user item read: " + str(len(user_item_hash.keys())), INFO_INT)
 		print_debug("All user avg rating: " + str(all_user_avg_rating), INFO_INT)
 		
 		data = { 'user_item_hash' : user_item_hash, 
				'all_user_avg_rating' : all_user_avg_rating
		}
 		
 		
 		self.data = { 'data' :  data}
Esempio n. 5
0
	def load_user_data(self, user_file):


		user_item_hash = {}
		print_debug("Starting to read user file from:" + user_file, INFO_INT)
 		f_bus = open(user_file)

 		count = 0
 		total_stars = 0
 		total_review = 0
 		for line in f_bus:
 			user_line = json.loads(line)
 			user_item = User(user_line)
 			user_item_hash[user_item.user_id] = user_item
 			stars = user_item.average_stars
 			total_stars+=stars
 			total_review+= user_item.review_count
 			if user_item.votes:
				self.total_funny += user_item.funny
				self.total_cool += user_item.cool
				self.total_useful += user_item.useful

 			count+=1
 			print_debug("Reading record user_id: " + str(user_item.user_id), DEBUG_INT)

 		self.user_item_hash = user_item_hash
 		self.total_review_count = total_review
 		self.count = count
 		self.total_rating_count = total_stars
 		f_bus.close()
Esempio n. 6
0
    def load_user_data(self, user_file):

        user_item_hash = {}
        print_debug("Starting to read user file from:" + user_file, INFO_INT)
        f_bus = open(user_file)

        count = 0
        total_rating = 0
        for line in f_bus:
            user_line = json.loads(line)
            user_item = User(user_line)
            user_item_hash[user_item.user_id] = user_item
            count += 1
            total_rating += user_item.average_stars
            print_debug("Reading record user_id: " + str(user_item.user_id),
                        DEBUG_INT)

        all_user_avg_rating = float(total_rating) / count
        print_debug("Total users line read: " + str(count), INFO_INT)
        print_debug(
            "Total unique user item read: " + str(len(user_item_hash.keys())),
            INFO_INT)
        print_debug("All user avg rating: " + str(all_user_avg_rating),
                    INFO_INT)

        data = {
            'user_item_hash': user_item_hash,
            'all_user_avg_rating': all_user_avg_rating
        }

        self.data = {'data': data}
Esempio n. 7
0
	def load_review_data(self, review_file):

		review_item_hash = {}
		print_debug("Starting to read review file from:" + review_file, INFO_INT)
 		f_bus = open(review_file)

 		for line in f_bus:
 			review_line = json.loads(line)
 			review_item = Review(review_line)
 			review_item_hash[str(review_item.user_id) + "-" + str(review_item.business_id)] = review_item
 			print_debug("Reading record review_id: " + str(review_item.review_id), DEBUG_INT)
 		f_bus.close()

 		self.review_item_hash = review_item_hash
Esempio n. 8
0
    def get_global_avg_rating_matrix(self):

        #		f_write  = open("../data/user_business.matrix", 'w')
        # """ returns a very basic user to business rating matrix

        # Returns a user to business rating matrix by replacing unknown ratings
        # with average user rating, average business rating
        # """

        dim_user = len(self.user_manager.get_all_item_keys())
        dim_bus = len(self.business_manager.get_all_item_keys())
        print "user dimension: " + str(dim_user)
        print "business dimension: " + str(dim_bus)

        # initialize the final user business matrix with zeros
        #		rating_matrix = zeros((dim_user, dim_user))

        print_debug("Generating the user business avg rating matrix", INFO_INT)
        index = 0
        mod_val = 10000
        all_user_avg_rating = self.user_manager.get_all_user_avg_rating()
        all_bus_avg_rating = self.business_manager.get_all_business_avg_rating(
        )
        index_i = 0
        output_line = ""
        for user_id in self.user_manager.get_all_item_keys():
            index_j = 0
            user_row_array = zeros((1, dim_bus))
            for business_id in self.business_manager.get_all_item_keys():
                key = "-".join([user_id, business_id])
                index += 1
                #				if ( index %  mod_val == 0):
                #					print(".",)

                rating = 0
                if self.review_manager.is_exist_user_business_rating(
                        user_id, business_id):
                    rating = self.review_manager.get_user_business_rating(key)
                else:
                    rating = self.review_manager.get_business_avg_rating(business_id) +\
                    all_user_avg_rating - self.user_manager.get_user_avg_rating(user_id) +\
                    all_bus_avg_rating - self.business_manager.get_business_avg_rating(business_id)
#				print  str(index_i) + ":" + str(index_j) + " " + "user:business_id:" + str(user_id) + ":" + str(business_id) + "\t" + str(rating)
                output_line += str(rating) + ","
                #user_row_array[index_i, index_j] = rating
                index_j += 1
            #rating_matrix[index_i,:] = user_row_array
            if (index_i % 100 == 0):
                print str(index_i)
            index_i += 1
Esempio n. 9
0
	def get_global_avg_rating_matrix(self):

#		f_write  = open("../data/user_business.matrix", 'w')
	# """ returns a very basic user to business rating matrix

	# Returns a user to business rating matrix by replacing unknown ratings
	# with average user rating, average business rating 
	# """		

		dim_user = len(self.user_manager.get_all_item_keys())
		dim_bus = len(self.business_manager.get_all_item_keys())
		print "user dimension: " + str(dim_user)
		print "business dimension: " + str(dim_bus)

		# initialize the final user business matrix with zeros
#		rating_matrix = zeros((dim_user, dim_user))


		print_debug("Generating the user business avg rating matrix", INFO_INT)
		index = 0
		mod_val = 10000
		all_user_avg_rating = self.user_manager.get_all_user_avg_rating()
		all_bus_avg_rating = self.business_manager.get_all_business_avg_rating()
		index_i = 0
		output_line = ""
		for user_id in self.user_manager.get_all_item_keys():
			index_j = 0
			user_row_array = zeros((1,dim_bus))
			for business_id in self.business_manager.get_all_item_keys():
				key = "-".join([user_id, business_id])
				index+=1
#				if ( index %  mod_val == 0):
#					print(".",)

				rating  = 0 
				if self.review_manager.is_exist_user_business_rating(user_id, business_id):
					rating = self.review_manager.get_user_business_rating(key)
				else:
					rating = self.review_manager.get_business_avg_rating(business_id) +\
					all_user_avg_rating - self.user_manager.get_user_avg_rating(user_id) +\
					all_bus_avg_rating - self.business_manager.get_business_avg_rating(business_id)
#				print  str(index_i) + ":" + str(index_j) + " " + "user:business_id:" + str(user_id) + ":" + str(business_id) + "\t" + str(rating)
				output_line+=str(rating) + ","
				#user_row_array[index_i, index_j] = rating
				index_j+=1
			#rating_matrix[index_i,:] = user_row_array
			if(index_i % 100 == 0):
				print str(index_i)
			index_i+=1
Esempio n. 10
0
	def load_business_data(self, business_file):

		business_item_hash = {}
		print_debug("Starting to read business file from:" + business_file, INFO_INT)
 		f_bus = open(business_file)

 		count = 0
 		cat_count = 0
 		total_review_count = 0
 		for line in f_bus:
 			business_line = json.loads(line)
 			business_item = Business(business_line)
 			business_item_hash[business_item.business_id] = business_item
 			cat_count = self.update_categories(business_item, self.category_hash, cat_count)
 			total_review_count+=business_item.review_count
 			count+=1
 			print_debug("Reading record business_id: " + str(business_item.business_id), DEBUG_INT)

 		self.total_review_count= total_review_count
 		self.count = count
 	 	self.business_item_hash = business_item_hash
Esempio n. 11
0
    def load_review_data(self, review_file):

        total_stars = 0
        total_reviews = 0
        review_item_hash = {}
        user_business_rating = {}
        total_business_rating_by_user = {}
        total_business_rating_by_user_count = {}
        total_user_rating_in_business = {}
        total_user_rating_in_business_count = {}
        print_debug("Starting to read review file from:" + review_file,
                    INFO_INT)
        f_bus = open(review_file)

        count = 0
        for line in f_bus:
            review_line = json.loads(line)
            review_item = Review(review_line)
            review_item_hash[review_item.review_id] = review_item
            key = self.user_business_key(review_item)
            user_business_rating[key] = review_item
            count += 1

            #increment user rating
            self.increment_hash_count(
                total_user_rating_in_business,
                review_item.user_id,
                review_item.stars,
            )

            self.increment_hash_count(
                total_user_rating_in_business_count,
                review_item.user_id,
                1,
            )

            #increment business rating
            self.increment_hash_count(
                total_business_rating_by_user,
                review_item.business_id,
                review_item.stars,
            )

            #increment business rating
            self.increment_hash_count(
                total_business_rating_by_user_count,
                review_item.business_id,
                1,
            )

            total_stars += review_item.stars

            total_reviews += 1
            print_debug(
                "Reading record review_id: " + str(review_item.review_id),
                DEBUG_INT)
        f_bus.close()

        # 		print_debug("Total reviews line read: " + str(count), INFO_INT)
        # 		print_debug("Total unique review item read: " + str(len(review_item_hash.keys())), INFO_INT)
        # 		print_debug("Total stars: " + str(self.get_total_stars()), INFO_INT)
        # 		print_debug("Avg stars rating:" + str(self.get_average_review()), INFO_INT)

        data = {
            'review_item_hash': review_item_hash,
            'user_business_rating': user_business_rating,
            'total_business_rating_by_user': total_business_rating_by_user,
            'total_business_rating_by_user_count':
            total_business_rating_by_user_count,
            'total_user_rating_in_business': total_user_rating_in_business,
            'total_user_rating_in_business_count':
            total_user_rating_in_business_count,
            'total_reviews': total_reviews,
            'total_stars': total_stars
        }

        self.data = {'data': data}
Esempio n. 12
0
	def load_review_data(self, review_file):

		total_stars = 0
		total_reviews = 0
		review_item_hash = {}
		user_business_rating = {}
		total_business_rating_by_user = {}
		total_business_rating_by_user_count = {}
		total_user_rating_in_business = {}
		total_user_rating_in_business_count = {}
		print_debug("Starting to read review file from:" + review_file, INFO_INT)
 		f_bus = open(review_file)

 		count = 0
 		for line in f_bus:
 			review_line = json.loads(line)
 			review_item = Review(review_line)
 			review_item_hash[review_item.review_id] = review_item
 			key = self.user_business_key(review_item)
 			user_business_rating[key] = review_item
 			count+=1

 			#increment user rating
 			self.increment_hash_count(
 				total_user_rating_in_business, 
 				review_item.user_id,  
 				review_item.stars,
 			)

 			self.increment_hash_count(
 				total_user_rating_in_business_count, 
 				review_item.user_id,  
 				1,
 			)

 			#increment business rating
 			self.increment_hash_count(
 				total_business_rating_by_user,
 				review_item.business_id,
 				review_item.stars,
 			)

 			#increment business rating
 			self.increment_hash_count(
 				total_business_rating_by_user_count,
 				review_item.business_id,
 				1,
 			)

 			total_stars += review_item.stars

 			total_reviews += 1
 			print_debug("Reading record review_id: " + str(review_item.review_id), DEBUG_INT)
 		f_bus.close()

# 		print_debug("Total reviews line read: " + str(count), INFO_INT)
# 		print_debug("Total unique review item read: " + str(len(review_item_hash.keys())), INFO_INT)
# 		print_debug("Total stars: " + str(self.get_total_stars()), INFO_INT)
# 		print_debug("Avg stars rating:" + str(self.get_average_review()), INFO_INT)
 	
 		data = { 'review_item_hash' : review_item_hash, 
				'user_business_rating' : user_business_rating,
				'total_business_rating_by_user' : total_business_rating_by_user,
				'total_business_rating_by_user_count': total_business_rating_by_user_count,
				'total_user_rating_in_business' : total_user_rating_in_business,
				'total_user_rating_in_business_count' : total_user_rating_in_business_count,
				'total_reviews' : total_reviews,
				'total_stars' : total_stars
		}
 		
 		
 		self.data = { 'data' :  data}