def reset(self): self.running = False self.stopped = True self.Handler.join() self.totalUrls = 0 self.doneUrls = 0 self.POOL = [] self.Done = {} self.Requests = [] self.FormAnalysis = WebAnalyzer(forms=True)
def __init__(self, threads=2, reject=[], store=False, proxy=None, cookie=None): self.FormAnalysis = WebAnalyzer(forms=True) self.running = False self.stopped = False self.CFG_threads = threads self.runningThreads = 0 self.CFG_store = store self.CFG_reject = reject + [ "jpg", "gif", "png", "zip", "exe", "doc", "swf", "rar", "pdf" ] self.totalUrls = 0 self.doneUrls = 0 self.POOL = [] self.Done = {} self.CFG_proxy = proxy self.urlOKS = [] if cookie: self.CFG_cookie = cookie else: self.CFG_cookie = None self.threads_list = [] self.Semaphore = threading.BoundedSemaphore(value=self.CFG_threads) self.Semaphore_Mutex = threading.BoundedSemaphore(value=1) self.reReject = [] self.reNeeded = None consoleNg.Console.__init__(self, "dcrawl> ") self.Handler = None self.Requests = []
def run(self): while True: try: services = self.sounder.get() if not services: self._save_service_config() else: for service in services: time_interval = pendulum.now().subtract( days=service.day_interval, hours=service.hour_interval) if time_interval >= pendulum.parse( service.last_checked): # RUN SERVICE if service == 'openphish': from openphish import OpenPhish for finding in OpenPhish().get(): self.spotted.save(url=finding, source=service) self.publisher.post(finding) if service == 'phishingdatabase': from phishingdatabase import PhishingDatabase for finding in PhishingDatabase().get( today=True): self.spotted.save(url=finding, source=service) self.publisher.post(finding) if service == 'phishtank': from phishtank import PhishTank for finding in PhishTank().get(): self.spotted.save( url=finding['url'], source=service, ipv4_address=finding['ip'], country=finding['country'], registrar=finding['registrar']) self.publisher.post(finding['url']) if service == 'twitter': count = 0 last_id = self.sounder.get( service=service)['last_id'] if not last_id: last_id = None from twitterscraper import TwitterScraper for finding in TwitterScraper().get( since_id=last_id): if not last_id and count == 0: self.sounder.save( service=service, last_id=finding['id']) count += 1 self.spotted.save( tweet_extracted_urls=finding[ 'extracted_urls'], tweet_urls=finding['urls'], tweet_hash_tags=finding['tags'], tweet_text=finding['text'], tweet_id=finding['id'], source=service) self.publisher.post( finding['extracted_urls']) if service == 'urlscan': from urlscan import UrlScan for finding in UrlScan().get(): self.spotted.save( url=finding['url'], parsed_ur=finding['parsed_url'], ipv4_address=finding['ip'], country=finding['country'], domain=finding['domain'], source=finding['source']) self.publisher.post(finding['url']) if service == 'webanalyzer': from webanalyzer import WebAnalyzer for finding in WebAnalyzer().get(): self.spotted.save(url=finding, source=service) self.publisher.post(finding) if service == 'whoisds': from whoisds import WhoisDs for finding in WhoisDs().get(): self.spotted.save(url=finding, source=service) self.publisher.post(finding) except: print('ERROR: Error when calling spotter.run: {}'.format( sys.exc_info()[0])) pass