def get_duplicates(self, feed_id): """ Compare a list of documents by pair. Pairs of duplicates are sorted by "retrieved date". """ feed = self.get(id=feed_id) duplicates = [] for pair in itertools.combinations(feed.articles, 2): date1, date2 = pair[0].date, pair[1].date if clear_string(pair[0].title) == clear_string(pair[1].title) \ and (date1 - date2) < timedelta(days=1): if pair[0].retrieved_date < pair[1].retrieved_date: duplicates.append((pair[0], pair[1])) else: duplicates.append((pair[1], pair[0])) return feed, duplicates
def get_duplicates(self, feed_id): """ Compare a list of documents by pair. Pairs of duplicates are sorted by "retrieved date". """ feed = self.get(id=feed_id) duplicates = [] for pair in itertools.combinations(feed.articles[:1000], 2): date1, date2 = pair[0].date, pair[1].date if clear_string(pair[0].title) == clear_string(pair[1].title) \ and (date1 - date2) < timedelta(days=1): if pair[0].retrieved_date < pair[1].retrieved_date: duplicates.append((pair[0], pair[1])) else: duplicates.append((pair[1], pair[0])) return feed, duplicates
def feed_view(feed_id=None, user_id=None): feed = FeedController(user_id).get(id=feed_id) word_size = 6 category = None if feed.category_id: category = CategoryController(user_id).get(id=feed.category_id) articles = ArticleController(user_id) \ .read(feed_id=feed_id) \ .order_by(desc("date")).all() top_words = misc_utils.top_words(articles, n=50, size=int(word_size)) tag_cloud = misc_utils.tag_cloud(top_words) today = datetime.now() try: last_article = articles[0].date first_article = articles[-1].date delta = last_article - first_article average = round(float(len(articles)) / abs(delta.days), 2) except: last_article = datetime.fromtimestamp(0) first_article = datetime.fromtimestamp(0) delta = last_article - first_article average = 0 elapsed = today - last_article return render_template('feed.html', head_titles=[utils.clear_string(feed.title)], feed=feed, articles=articles, tag_cloud=tag_cloud, first_post_date=first_article, end_post_date=last_article, category=category, average=average, delta=delta, elapsed=elapsed)
def article(article_id=None): """ Presents an article. """ article = ArticleController(current_user.id).get(id=article_id) return render_template('article.html', head_titles=[clear_string(article.title)], article=article)
def article_pub(article_id=None): """ Presents an article of a public feed if the profile of the owner is also public. """ article = ArticleController().get(id=article_id) if article.source.private or not article.source.user.is_public_profile: return render_template('errors/404.html'), 404 return render_template('article_pub.html', head_titles=[clear_string(article.title)], article=article)
def article(article_id=None): """ Presents an article. """ art_contr = ArticleController(current_user.id) article = art_contr.get(id=article_id) if not article.readed: art_contr.update({"id": article.id}, {"readed": True}) return render_template("article.html", head_titles=[clear_string(article.title)], article=article)
def top_words(articles, n=10, size=5): """ Return the n most frequent words in a list. """ stop_words = load_stop_words() words = Counter() wordre = re.compile(r'\b\w{%s,}\b' % size, re.I) for article in articles: for word in [elem.lower() for elem in wordre.findall(clear_string(article.content)) \ if elem.lower() not in stop_words]: words[word] += 1 return words.most_common(n)
def feed_view(feed_id=None, user_id=None): feed = FeedController(user_id).get(id=feed_id) word_size = 6 category = None if feed.category_id: category = CategoryController(user_id).get(id=feed.category_id) filters = {} filters["feed_id"] = feed_id articles = ArticleController(user_id).read_light(**filters) # Server-side pagination page, per_page, offset = get_page_args(per_page_parameter="per_page") pagination = Pagination( page=page, total=articles.count(), css_framework="bootstrap3", search=False, record_name="articles", per_page=per_page, ) today = datetime.now() try: last_article = articles[0].date first_article = articles[-1].date delta = last_article - first_article average = round(float(articles.count()) / abs(delta.days), 2) except Exception as e: last_article = datetime.fromtimestamp(0) first_article = datetime.fromtimestamp(0) delta = last_article - first_article average = 0 elapsed = today - last_article return render_template( "feed.html", head_titles=[utils.clear_string(feed.title)], feed=feed, category=category, articles=articles.offset(offset).limit(per_page), pagination=pagination, first_post_date=first_article, end_post_date=last_article, average=average, delta=delta, elapsed=elapsed, )