Ejemplo n.º 1
0
Archivo: utils.py Proyecto: bzero/JARR
def compare_documents(feed):
    """
    Compare a list of documents by pair.
    Pairs of duplicates are sorted by "retrieved date".
    """
    duplicates = []
    for pair in itertools.combinations(feed.articles, 2):
        date1, date2 = pair[0].date, pair[1].date
        if clear_string(pair[0].title) == clear_string(pair[1].title) and \
                                        (date1 - date2) < timedelta(days = 1):
            if pair[0].retrieved_date < pair[1].retrieved_date:
                duplicates.append((pair[0], pair[1]))
            else:
                duplicates.append((pair[1], pair[0]))
    return duplicates
Ejemplo n.º 2
0
def compare_documents(feed):
    """
    Compare a list of documents by pair.
    Pairs of duplicates are sorted by "retrieved date".
    """
    duplicates = []
    for pair in itertools.combinations(feed.articles, 2):
        date1, date2 = pair[0].date, pair[1].date
        if clear_string(pair[0].title) == clear_string(pair[1].title) and \
                                        (date1 - date2) < timedelta(days = 1):
            if pair[0].retrieved_date < pair[1].retrieved_date:
                duplicates.append((pair[0], pair[1]))
            else:
                duplicates.append((pair[1], pair[0]))
    return duplicates
Ejemplo n.º 3
0
Archivo: utils.py Proyecto: bzero/JARR
def top_words(articles, n=10, size=5):
    """
    Return the n most frequent words in a list.
    """
    stop_words = load_stop_words()
    words = Counter()
    wordre = re.compile(r'\b\w{%s,}\b' % size, re.I)
    for article in articles:
        for word in [elem.lower() for elem in
                wordre.findall(clear_string(article.content)) \
                if elem.lower() not in stop_words]:
            words[word] += 1
    return words.most_common(n)
Ejemplo n.º 4
0
def top_words(articles, n=10, size=5):
    """
    Return the n most frequent words in a list.
    """
    stop_words = load_stop_words()
    words = Counter()
    wordre = re.compile(r'\b\w{%s,}\b' % size, re.I)
    for article in articles:
        for word in [elem.lower() for elem in
                wordre.findall(clear_string(article.content)) \
                if elem.lower() not in stop_words]:
            words[word] += 1
    return words.most_common(n)
Ejemplo n.º 5
0
def article(article_id=None):
    """
    Presents the content of an article.
    """
    article = ArticleController(g.user.id).get(id=article_id)
    previous_article = article.previous_article()
    if previous_article is None:
        previous_article = article.source.articles[0]
    next_article = article.next_article()
    if next_article is None:
        next_article = article.source.articles[-1]

    return render_template('article.html',
                           head_titles=[clear_string(article.title)],
                           article=article,
                           previous_article=previous_article,
                           next_article=next_article)