Beispiel #1
0
 def add_links(self, links, in_url):
     for url in links:
         if (re.sub(r'^http', 'https', url) not in self.__visited and \
             (re.sub(r'^http', 'https', url)) not in self.__current) \
         or (re.sub(r'^https', 'http', url) not in self.__visited and \
             (re.sub(r'^https', 'http', url)) not in self.__current):
             score = 0
             if 'harvard' in url:
                 score += 20
             if 'harvard' in in_url:
                 score += 20
             if url in self.__urls:
                 self.__urls[url] += 1
                 self.__inlinks[url].add(Page.canonical(in_url))
             else:
                 self.__urls[url] = self.__age + score
                 self.__inlinks[url] = Set()
                 self.__inlinks[url].add(Page.canonical(in_url))
                 self.__age -= AGING #no same age