def __init__(self, siteName, baseURL, threads): self.siteName = siteName self.baseURL = baseURL self.crawledFile = 'domains/' + siteName + '/' + siteName + '_crawled.txt' self.indexFile = FileIO.createSiteIndexFile(self.siteName) self.links = set() self.linksList = None self.readSemaphore = True self.writeSemaphore = True self.MAX_THREADS = threads self.inlinkGraph = Graph() self.outlinkGraph = Graph() self.inlinkGraphFile = 'domains/' + siteName + '/' + siteName + '_inlinks.json' self.outlinkGraphFile = 'domains/' + siteName + '/' + siteName + '_outlinks.json'
def __init__(self, siteName): self.siteName = siteName self.crawledFile = 'domains/' + siteName + '/' + siteName + '_crawled.txt' self.indexFile = FileIO.createSiteIndexFile(self.siteName) self.links = set()