Esempio n. 1
0
 def f**k(self, domain):
     global DomainsProcessed, DomainsStored
     DomainsProcessed  = DomainsProcessed + 1
     if not self._ssdb.isDomainInDB('hdm', domain):
         self._ssdb.setHItem('hdm', MD5(domain), domain)
         HTTPSQSQueue.put(DOMAINQUEUE01, domain)
         DomainsStored = DomainsStored    + 1
Esempio n. 2
0
 def checkDomainsQueue(self):
     self._QueueUnRead01 = self.getQueueUnRead(DOMAINQUEUE01)
     self._QueueUnRead02 = self.getQueueUnRead(DOMAINQUEUE02)
     if self._QueueUnRead01 > 10000:
         C.Info('HTTPSQSQueue %s will be full, waiting for cache!!!!!!!!!!!!!' % DOMAINQUEUE01, C.ALERT)
         self.cacheHTTPSQSQueue(DOMAINQUEUE01)
         C.Info('HTTPSQSQueue cached', C.ALERT)
     if self._QueueUnRead02 > 10000:
         C.Info('HTTPSQSQueue %s will be full, waiting for reset!!!!!!!!!!!!!' % DOMAINQUEUE02, C.ALERT)
         for i in range(0, 100):HTTPSQSQueue.put(DOMAINQUEUE01, HTTPSQSQueue.get(DOMAINQUEUE02))
         HTTPSQSQueue.reset(DOMAINQUEUE02)                
Esempio n. 3
0
 def fuckDomain(self, originalDomain):
     time1       = time.time()
     newDomains  = []
     hc          = self.getHTMLContentFromUrl('http://' + originalDomain)
     urls        = self.parseUrlsFromHTMLContent(hc)
     for url in urls:
         domain  = self.parseDomainFromUrl(url)
         if not domain in newDomains and not domain == originalDomain and not domain == '':
             HTTPSQSQueue.put(DOMAINQUEUE02, domain)
             newDomains.append(domain)
     C.Info('(%2d) get %3d new domains from %s in %.fs' % (self._tid, len(newDomains), originalDomain, time.time()-time1), C.DEBUG)
     newDomains = []
Esempio n. 4
0
 def feed(self, qName, cFile):
     '''
     Feed HTTPSQSQueue with domains from cached file
     '''
     if not os.path.exists(cFile):
         print 'File does not exists!'
         return
     f     = open(cFile)
     lines = f.readlines()
     count = 0
     for line in lines:
         HTTPSQSQueue.put(qName,line.strip())
         count = count + 1
     f.close()
     print HTTPSQSQueue.status(qName)
Esempio n. 5
0
class Monitor(threading.Thread):
    ThreadStartTime  = time.time()
    TotalProcessed   = 0
    def __init__(self):
        threading.Thread.__init__(self)

    def run(self):
        while True:
            self.ShowStatus()
            time.sleep(15)
            pass
    def ShowStatus(self):
        self.TotalProcessed = 0
        for spider in DomainSpidders:
            self.TotalProcessed = self.TotalProcessed + spider.TotalProcessed
        TimeUsed = time.time() - self.ThreadStartTime
        info = 'Totoal Domains:%d, Time used:%.fm, Speed:%.f/m' % (self.TotalProcessed, TimeUsed/60, float(self.TotalProcessed * 60/TimeUsed))
        C.Info(info, C.INFO)

if __name__ == '__main__' :
    reload(sys)
    sys.setdefaultencoding("UTF-8")
    if len(sys.argv) < 2:
        sys.exit()
    if len(sys.argv) > 2:
        HTTPSQSQueue.put(DOMAINQUEUE01,sys.argv[2])
    for i in range(0,int(sys.argv[1])):
        DomainSpidders.append(DomainSpidder(i))
    for digger in DomainSpidders:
        digger.start()
    Monitor().start()