예제 #1
0
파일: CommonCrawl.py 프로젝트: ADS-E/Final
    def download_found(self):
        # Put all the found urls into a queue for the threads to read from
        self.queue = Queue()
        [self.queue.put(url) for url in FileHelper.read_file('urls.txt')]

        # Create the threads and wait for them to finish
        self.create_threads()

        for t in self.threads:
            t.join()
예제 #2
0
파일: Crawler.py 프로젝트: ADS-E/Final
    def __init__(self, sitename, file):
        self.queue = Queue()
        [self.queue.put(url) for url in FileHelper.read_file(file)]

        self.result = UrlResult(sitename)
예제 #3
0
파일: Spider.py 프로젝트: ADS-E/Final
 def __init__(self, url, content, path):
     self.url = url
     self.content = content
     self.result = UrlResult(url)
     self.words = FileHelper.read_file(path)