예제 #1
0
 def __init__(self, siteName, baseURL, threads):
     self.siteName = siteName
     self.baseURL = baseURL
     self.crawledFile = 'domains/' + siteName + '/' + siteName + '_crawled.txt'
     self.indexFile = FileIO.createSiteIndexFile(self.siteName)
     self.links = set()
     self.linksList = None
     self.readSemaphore = True
     self.writeSemaphore = True
     self.MAX_THREADS = threads
     self.inlinkGraph = Graph()
     self.outlinkGraph = Graph()
     self.inlinkGraphFile = 'domains/' + siteName + '/' + siteName + '_inlinks.json'
     self.outlinkGraphFile = 'domains/' + siteName + '/' + siteName + '_outlinks.json'
예제 #2
0
 def __init__(self, siteName):
     self.siteName = siteName
     self.crawledFile = 'domains/' + siteName + '/' + siteName + '_crawled.txt'
     self.indexFile = FileIO.createSiteIndexFile(self.siteName)
     self.links = set()