Exemple #1
0
def extract_topics_from_url(db):
    articles = db.articles.find()
    topic_re = re.compile('\/\d+\/(?P<topic>[a-z]+)\/(?P<subtopic>[a-z]+)?\/')
    topics = TopicsSet()
    for article in articles:
        match = topic_re.search(article['url'])
        if match:
            t = Topic(match.group('topic'))
            if t not in topics:
                topics.add(t)
            else:
                t = topics.getelement(t)
            subtopic = match.group('subtopic')
            if subtopic:
                t.add_topic(subtopic)

    return topics