Esempio n. 1
0
class Topic:
    def __init__(self, name):
        self.name = name.strip()
        self.children = TopicsSet()

    def __str__(self):
        str = self.name + '\n'
        for child in self.children:
            str += '\t%s\n' % child.name
        return str

    def __repr__(self):
        return self.__str__()

    def __eq__(self, other):
        return self.name == other.name

    def __hash__(self):
        return int(hashlib.md5(self.name).hexdigest(), 16)

    def add_topic(self, name):
        self.children.add(Topic(name))

    def get_children_count(self):
        return len(self.children) + sum([item.get_children_count() for item in self.children])
Esempio n. 2
0
def extract_topics_from_url(db):
    articles = db.articles.find()
    topic_re = re.compile('\/\d+\/(?P<topic>[a-z]+)\/(?P<subtopic>[a-z]+)?\/')
    topics = TopicsSet()
    for article in articles:
        match = topic_re.search(article['url'])
        if match:
            t = Topic(match.group('topic'))
            if t not in topics:
                topics.add(t)
            else:
                t = topics.getelement(t)
            subtopic = match.group('subtopic')
            if subtopic:
                t.add_topic(subtopic)

    return topics
Esempio n. 3
0
 def __init__(self, name):
     self.name = name.strip()
     self.children = TopicsSet()