def extracturl(): emailurlextract=open('crawler.txt', 'w') emailfile=open('emails.txt','r') soup = BeautifulSoup(emailfile.read()) for tag in soup.findAll('a', href=True): link=tag['href'] emailurlextract.write(normalize.normalizeurl(link.strip())+'\n') sp1=re.findall(r'http[s]?://[^\s<>"]+|www\.[^\s<>"]+', str(soup)) for i in sp1: emailurlextract.write(normalize.normalizeurl(i)+"\n") emailfile.close() emailurlextract.close()
def main(url): # Create the threads thread = threading.Thread(target=threadmaker) thread.setDaemon(True) thread.start() script_path = os.path.dirname(os.path.abspath(__file__)) path = script_path + "/tmp" # print path # create the tmp folder(创建文件夹,这个文件夹是来存放下载的文件的) if not os.path.isdir(path): os.makedirs("tmp") # Crawler executemechanize.crawler = True # Logging(创建dubug文件夹) """Initialize logger.""" command = "mkdir -p debug/" # create a temporary folder in your working space folder os.system(command) sys.stdin = open( "debug/" + time.asctime(time.localtime(time.time())) + ".log", "a") logger = logging.getLogger() sh = logging.StreamHandler() sh.setFormatter(SpecialFormatter()) sh2 = logging.StreamHandler(sys.stdin) sh2.setFormatter(SpecialFormatter()) logger.addHandler(sh) logger.addHandler(sh2) logger.setLevel(logging.INFO) # URL(这个部分是核心部分) url = normalize.normalizeurl(url) dict = {} counter = 1 if not (url.startswith("http://")) and not (url.startswith("https://")): url = "http://" + url dict["url"] = url dict["counter"] = counter queue.put(dict) queue.join() scan.scanning(path)
def duplicateremover(): mylist=list() fopen=open("crawler.txt","r") for line in fopen: line=line.strip() line=normalize.normalizeurl(line) if line in mylist: continue if line=="invalid": continue if not line: continue mylist.append(line) mylist.sort() fopen.close() fopen=open("crawler.txt","w") for line in mylist: fopen.write(line+"\n")
def main(): #Create the threads thread = threading.Thread(target=threadmaker) thread.setDaemon(True) thread.start() script_path = os.path.dirname(os.path.abspath( __file__ )) parser = argparse.ArgumentParser(description="Examples:\n/honeypot.py --url www.yahoo.com\nhoneypot.py --file <file path>\n./honeypot.py --blacklist\n./honeypot.py --email\n./honeypot.py --update\n./honeypot.py --search <warez>\n./honeypot.py --local <file/directory path>", formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("--email", help="Retrieves your Spam emails from your mail server and crawls the extracted URLS. Enter your email credentials in honeypotconfig.py file!", action="store_true") parser.add_argument("--update", help="Updates the anti-virus signatures", action="store_true") parser.add_argument("--blacklist", help="Downloads list of suspicious malicious websites from three databases and retrieves/scans them accordingly", action="store_true") parser.add_argument("--file", nargs=1, help="Provide an input file", action="store") parser.add_argument("--url", nargs=1, help="Provide a url", action="store") parser.add_argument("--search", nargs=1, help="searches Bing search engine for a keyword (1 single keyword at the moment) and returns 100 results starting from the 20th result.", action="store") parser.add_argument("--local", nargs=1, help="scans a local file or directory for malicious signatures.", action="store") parser.add_argument("--debug", help="Include http header", action="store_true") parser.add_argument("--crawler", help="Crawl the sites and save any executables found", action="store_true") if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() path = script_path+"/tmp" print path #create the tmp folder if not os.path.isdir(path): os.makedirs("tmp") #Crawler if args.crawler: executemechanize.crawler = True #Logging """Initialize logger.""" command = "mkdir -p debug/" #create a temporary folder in your working space folder os.system(command) sys.stdin=open("debug/" + time.asctime(time.localtime(time.time())) +".log", "a") logger = logging.getLogger() sh = logging.StreamHandler() sh.setFormatter(SpecialFormatter()) sh2 = logging.StreamHandler(sys.stdin) sh2.setFormatter(SpecialFormatter()) logger.addHandler(sh) logger.addHandler(sh2) logger.setLevel(logging.INFO) if args.debug: logger.setLevel(logging.DEBUG) executemechanize.set_logging_level(logging.DEBUG) #Update antivirus signatures if args.update: updateantivirus.updateantivirus() #Blacklist Databases if args.blacklist: try: if not os.path.exists("list"): os.mkdir("list") except OSError as e: logger.error(e) malwebsites.domaindownload() malwebsites.duplicateremover() urls = open("list/malwebsites.txt", "r") counter = 0 for line in urls: dict={} counter += 1 dict["url"] = line.strip() dict["counter"] = counter queue.put(dict) queue.join() scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path) #Email if args.email: imapfile.imap() extraction.extracturl()#extracts urls from emails.txt file extraction.duplicateremover() #removes the duplicate urls from crawler.txt files (which now contain extracted urls from emails.txt) os.remove("emails.txt") urls = open('crawler.txt', "r") counter = 0 for line in urls: dict={} counter += 1 dict["url"] = line.rstrip() dict["counter"] = counter queue.put(dict) queue.join() scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path) #File if args.file: mylist = list() mylist2 = list() counter =0 fopen3 = open(sys.argv[2],"r") for line in fopen3: dict={} line = line.strip() counter += 1 if not (line.startswith("http://")) and not (line.startswith("https://")): line = "http://"+line dict["url"] = line dict["counter"] = counter queue.put(dict) queue.join() fopen3.close() scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path) #URL if args.url: url = readurl() url = normalize.normalizeurl(url) dict={} counter = 1 if not (url.startswith("http://")) and not (url.startswith("https://")): url = "http://"+url dict["url"] = url dict["counter"] = counter queue.put(dict) queue.join() # executemechanize.executemechanize(url) scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path) #Search if args.search: keyword = sys.argv[2] bing.searchBing(keyword) mylist = list() fopen = open("list/searchresult.txt","r") for line in fopen: line = line.strip() if not line: continue mylist.append(line) fopen.close() counter = 0 for line in mylist: dict={} counter += 1 dict["url"] = line dict["counter"] = counter queue.put(dict) queue.join() scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path) #Local Scan if args.local: path = sys.argv[2] scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path)
def main(): #Create the threads thread = threading.Thread(target=threadmaker) thread.setDaemon(True) thread.start() parser = argparse.ArgumentParser(description="Examples:\n/honeypot.py --url www.yahoo.com\nhoneypot.py --file <file path>\n./honeypot.py --blacklist\n./honeypot.py --email\n./honeypot.py --update\n./honeypot.py --search <warez>\n./honeypot.py --local <file/directory path>", formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("--email", help="Retrieves your Spam emails from your mail server and crawls the extracted URLS. Enter your email credentials in honeypotconfig.py file!", action="store_true") parser.add_argument("--update", help="Updates the anti-virus signatures", action="store_true") parser.add_argument("--blacklist", help="Downloads list of suspicious malicious websites from three databases and retrieves/scans them accordingly", action="store_true") parser.add_argument("--file", nargs=1, help="Provide an input file", action="store") parser.add_argument("--url", nargs=1, help="Provide a url", action="store") parser.add_argument("--search", nargs=1, help="searches Bing search engine for a keyword (1 single keyword at the moment) and returns 100 results starting from the 20th result.", action="store") parser.add_argument("--local", nargs=1, help="scans a local file or directory for malicious signatures.", action="store") parser.add_argument("--debug", help="Include http header", action="store_true") parser.add_argument("--crawler", help="Crawl the sites and save any executables found", action="store_true") if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() path = honeypotconfig.wdir + honeypotconfig.tmpfolder #create the tmp folder if not os.path.isdir(os.path.join(honeypotconfig.wdir, honeypotconfig.tmpfolder)): os.makedirs(os.path.join(honeypotconfig.wdir, honeypotconfig.tmpfolder)) #Crawler if args.crawler: executemechanize.exe_crawler = True #Logging """Initialize logger.""" command = "mkdir -p "+honeypotconfig.wdir+"debug/" #create a temporary folder in your working space folder os.system(command) sys.stdin=open(honeypotconfig.wdir+"debug/" + time.asctime(time.localtime(time.time())) +".log", "a") logger = logging.getLogger() sh = logging.StreamHandler() sh.setFormatter(SpecialFormatter()) sh2 = logging.StreamHandler(sys.stdin) sh2.setFormatter(SpecialFormatter()) logger.addHandler(sh) logger.addHandler(sh2) logger.setLevel(logging.INFO) if args.debug: logger.setLevel(logging.DEBUG) executemechanize.set_logging_level(logging.DEBUG) #Update antivirus signatures if args.update: updateantivirus.updateantivirus() #Blacklist Databases if args.blacklist: try: if not os.path.exists(os.path.join(honeypotconfig.wdir, "list")): os.mkdir(os.path.join(honeypotconfig.wdir, "list")) except OSError as e: logger.error(e) malwebsites.domaindownload() malwebsites.duplicateremover() urls = open(honeypotconfig.wdir+"list/malwebsites.txt", "r") counter = 0 for line in urls: dict={} counter += 1 dict["url"] = line.strip() dict["counter"] = counter queue.put(dict) queue.join() scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path) #Email if args.email: imapfile.imap() extractlink.extracturl()#extracts urls from emails.txt file extractlink.duplicateremover() #removes the duplicate urls from crawler.txt files (which now contain extracted urls from emails.txt) urls = open('crawler.txt', "r") counter = 0 for line in urls: dict={} counter += 1 dict["url"] = line dict["counter"] = counter queue.put(dict) queue.join() scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path) #File if args.file: mylist = list() mylist2 = list() counter =0 fopen3 = open(sys.argv[2],"r") for line in fopen3: dict={} line = line.strip() counter += 1 if not (line.startswith("http://")) and not (line.startswith("https://")): line = "http://"+line dict["url"] = line dict["counter"] = counter queue.put(dict) queue.join() fopen3.close() scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path) #URL if args.url: url = readurl() url = normalize.normalizeurl(url) dict={} counter = 1 if not (url.startswith("http://")) and not (url.startswith("https://")): url = "http://"+url dict["url"] = url dict["counter"] = counter queue.put(dict) queue.join() # executemechanize.executemechanize(url) scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path) #Search if args.search: keyword = sys.argv[2] bing.searchBing(keyword) mylist = list() fopen = open("list/searchresult.txt","r") for line in fopen: line = line.strip() if not line: continue mylist.append(line) fopen.close() counter = 0 for line in mylist: dict={} counter += 1 dict["url"] = line dict["counter"] = counter queue.put(dict) queue.join() scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path) #Local Scan if args.local: path = sys.argv[2] scan.scanning(path) yaradetection.listandscan(path) unquote.unquoteDirectory(path)