def _recently_added(self, bl, user): """ Filtering key point: recently added. If more than self.NEXT_LIMIT books available, randomly pick some Otherwise, pick enough ones by the updated time @returns: a list of isbns """ now = datetime.datetime.now() dt = datetime.timedelta(days=30) one_month_ago = now - dt month_list = bl.isbns_after(one_month_ago) if len(month_list) >= self.NEXT_LIMIT: # enough books for random return random.sample(month_list, self.NEXT_LIMIT) # otherwise, pick enough ones by the updated time helper = SortHelper(user) sorted_isbns = helper.by_updated_time(booklist.LIST_INTERESTED) return sorted_isbns[:self.NEXT_LIMIT]
def _fetch_parse(self, user, list_type=None): """ It may cause problems to fetch and then parse. Do it together.. @param list_type: which booklist to import. """ if list_type is None: list_type = self.request.get('list_type') if not list_type: return try: datas = douban.get_book_list(user, list_type) except utils.errors.ParseJsonError as err: logging.error("ERROR while importing from Douban, user_key: " + user.key() + " list_type: " + list_type) logging.error(err) self._log(err) return bl = BookList.get_or_create(user, list_type) bl.start_importing(len(datas)) # also clear those in memcache helper = SortHelper(user) helper.clear(list_type) for related in datas: # also added into memcache in merge_into_datastore() b = related.merge_into_datastore(user, update_book=False) if b: # when already such book there, b will be None try: url, datas = tongji.get_by_isbn(b.isbn) b.set_tongji_info(url, datas) except Exception as err: logging.error("ERROR while saving TJ info, isbn: " + b.isbn) logging.error(err) self._log(err) # after all, finish importing bl = BookList.get_or_create(user, list_type) bl.douban_amount = None bl.put() return
def _being_praised(self, user): """ Filtering by calculating scores of each book. """ def _rating_weight(rating_score): """ Return the relative weight of a rating score. @param rating_score: In 10 points scale. """ if rating_score < 6.0: base = -2 elif rating_score < 7.0: base = -1 elif rating_score < 7.5: base = 0 elif rating_score < 7.8: base = 0.3 elif rating_score < 8.0: base = 0.6 elif rating_score < 8.3: base = 0.9 elif rating_score < 8.5: base = 1.2 elif rating_score < 8.8: base = 1.5 elif rating_score < 9.0: base = 1.8 else: base = 2 return base + random.random() def _voted_weight(amount): """ @returns: the relative weight of rated_amount. Generally, the larger it is, the more it's rating can present. """ if amount <= 0: base = -2 elif amount < 64: base = -1 else: # too many is also useless amount = max(amount, 10000) base = log10(amount) return base + random.random() def _pages_weight(pages): """ @returns: the relative weight of pages of a book. Generally, if pages is larger than a threshhold, it become bad. """ if pages > self.PAGE_THRESHHOLD: return -1 else: return 1 helper = SortHelper(user) now = datetime.datetime.now() dt = datetime.timedelta(days=30) one_month_ago = now - dt datas = helper.all(booklist.LIST_DONE) consider_pages = False for d in sorted(datas, key=lambda sd: sd.updated_time, reverse=True): if d.updated_time < one_month_ago: break if d.pages > self.PAGE_THRESHHOLD: consider_pages = True break tag_helper = TagHelper(user) week_goals = tag_helper.isbns('thisweek') month_goals = tag_helper.isbns('thismonth') def _calculate(data): """ Calculate the score of that data. @param consider_pages: whether to take pages into account. Default is False. """ r = _rating_weight(data.public_rating) v = _voted_weight(data.rated_amount) score = r * v if consider_pages: score += _pages_weight(data.pages) if data.isbn in week_goals or data.isbn in month_goals: score += 16 * random.random() return score datas = helper.all(booklist.LIST_INTERESTED) sorted_datas = sorted(datas, key=_calculate, reverse=True) return [d.isbn for d in sorted_datas[:self.NEXT_LIMIT]]