def get_categories_and_subcategories(): pages = Mongo.getPageIndex().find({}) subcategories_points = defaultdict(int) for page in pages: if 'Sub_Categories' in page.keys(): for subcategory in page['Sub_Categories']: subcategories_points[page['Main_Category'] + " - " + subcategory] += 1 subcategories = {} counter = 0 for subcategory, _ in sorted(subcategories_points.items(), key=lambda x: x[1], reverse=True): if counter < 5: subcategories[subcategory] = True else: subcategories[subcategory] = False counter += 1 return subcategories
def index_page_logic(self): rake_object = rake.Rake(config.Config.stoppath, max_words_length=2) text = "" if "description" in self.__dict__.keys(): text += self.description if "about" in self.__dict__.keys(): text += self.about print(text) one = text.encode('utf-8').replace("ë", 'e') one = one.replace("$", 'dollar') keywords = rake_object.run(one) print(keywords) json_repr = {"_id": self.id, "name": self.name} json_repr.update(self.get_categories()) json_repr.update({"keywords": {}}) for index, x in enumerate(keywords): json_repr['keywords'].update({x[0]: x[1]}) if Mongo.getPageIndex().insert(json_repr): return True else: return False
def index_page_logic(self): rake_object = rake.Rake(config.Config.stoppath, max_words_length=2) text = "" if "description" in self.__dict__.keys(): text += self.description if "about" in self.__dict__.keys(): text += self.about print(text) one = text.encode('utf-8').replace("ë", 'e') one = one.replace("$", 'dollar') keywords = rake_object.run(one) print(keywords) json_repr = {"_id":self.id,"name":self.name} json_repr.update(self.get_categories()) json_repr.update({"keywords":{}}) for index, x in enumerate(keywords): json_repr['keywords'].update({x[0]: x[1]}) if Mongo.getPageIndex().insert(json_repr): return True else: return False
def get_filtered_evaluations(category=None, subcategory=None, keywords=None, location=None): # If category, subcategory and keywords are given if keywords is not None: keywords_filter = [] for keyword in keywords: keywords_filter.append( {"keywords." + keyword: { '$exists': 'true' }}) pages = Mongo.getPageIndex().find({ '$and': [{ 'Main_Category': category }, { 'Sub_Categories': subcategory }, { '$or': keywords_filter }] }) # If category and subcategory is given elif subcategory is not None: pages = Mongo.getPageIndex().find({ '$and': [{ 'Main_Category': category }, { 'Sub_Categories': subcategory }] }) # If only the category is given elif category is not None: pages = Mongo.getPageIndex().find({'Main_Category': category}) else: pages = Mongo.getPageIndex().find({}) # for page in pages: # print page['name'] # If there are no pages matching the results if pages.count() == 0: return None print "Pages count: " + str(pages.count()) users_points = defaultdict(int) # Get all users and calculate total points for page in pages: page_evaluations = PageEval.get_page_evaluations(page['_id']) # If the page evaluation exists if page_evaluations is not None: for user_id, points in page_evaluations.points.iteritems(): users_points[user_id] += points print(len(users_points)) users = [] start_time = time.time() # Filter by location if location is not None: for user_id, points in users_points.iteritems(): user = User.User.load_from_db(user_id) # If the user doesn't exist in the database, skip if user is None: continue # If the location matches, add the user to the list if user.location[ 'name'] == location: # Get only the name of the city out of the location users.append((user, points)) if location is None: for user_id, points in users_points.iteritems(): user = User.User.load_from_db(user_id) # If the user doesn't exist in the database, skip if user is None: continue users.append((user, points)) print('Time: ' + str(time.time() - start_time)) return users
def get_filtered_evaluations(category=None, subcategory=None, keywords=None, location=None): # If category, subcategory and keywords are given if keywords is not None: keywords_filter = [] for keyword in keywords: keywords_filter.append({"keywords." + keyword: {'$exists': 'true'}}) pages = Mongo.getPageIndex().find( {'$and': [{'Main_Category': category}, {'Sub_Categories': subcategory}, {'$or': keywords_filter}]} ) # If category and subcategory is given elif subcategory is not None: pages = Mongo.getPageIndex().find( {'$and': [{'Main_Category': category}, {'Sub_Categories': subcategory}]} ) # If only the category is given elif category is not None: pages = Mongo.getPageIndex().find({'Main_Category': category}) else: pages = Mongo.getPageIndex().find({}) # for page in pages: # print page['name'] # If there are no pages matching the results if pages.count() == 0: return None print "Pages count: " + str(pages.count()) users_points = defaultdict(int) # Get all users and calculate total points for page in pages: page_evaluations = PageEval.get_page_evaluations(page['_id']) # If the page evaluation exists if page_evaluations is not None: for user_id, points in page_evaluations.points.iteritems(): users_points[user_id] += points print (len(users_points)) users = [] start_time = time.time() # Filter by location if location is not None: for user_id, points in users_points.iteritems(): user = User.User.load_from_db(user_id) # If the user doesn't exist in the database, skip if user is None: continue # If the location matches, add the user to the list if user.location['name'] == location: # Get only the name of the city out of the location users.append((user, points)) if location is None: for user_id, points in users_points.iteritems(): user = User.User.load_from_db(user_id) # If the user doesn't exist in the database, skip if user is None: continue users.append((user, points)) print('Time: ' + str(time.time() - start_time)) return users