Python normalizeurl Exemples, normalize.normalizeurl Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : extraction.py Projet : muratyokus/yalih

def extracturl():
	emailurlextract=open('crawler.txt', 'w')
	emailfile=open('emails.txt','r')
	soup = BeautifulSoup(emailfile.read())
	for tag in soup.findAll('a', href=True):
		link=tag['href']
		emailurlextract.write(normalize.normalizeurl(link.strip())+'\n')

	sp1=re.findall(r'http[s]?://[^\s<>"]+|www\.[^\s<>"]+', str(soup))

	for i in sp1:
		emailurlextract.write(normalize.normalizeurl(i)+"\n")

	emailfile.close()
	emailurlextract.close()

Exemple #2

0

Afficher le fichier

def main(url):
    # Create the threads
    thread = threading.Thread(target=threadmaker)
    thread.setDaemon(True)
    thread.start()
    script_path = os.path.dirname(os.path.abspath(__file__))
    path = script_path + "/tmp"
    # print path

    # create the tmp folder(创建文件夹,这个文件夹是来存放下载的文件的)
    if not os.path.isdir(path):
        os.makedirs("tmp")


# Crawler
    executemechanize.crawler = True

    # Logging(创建dubug文件夹)
    """Initialize logger."""
    command = "mkdir -p debug/"  # create a temporary folder in your working space folder
    os.system(command)
    sys.stdin = open(
        "debug/" + time.asctime(time.localtime(time.time())) + ".log", "a")
    logger = logging.getLogger()

    sh = logging.StreamHandler()
    sh.setFormatter(SpecialFormatter())

    sh2 = logging.StreamHandler(sys.stdin)
    sh2.setFormatter(SpecialFormatter())

    logger.addHandler(sh)
    logger.addHandler(sh2)
    logger.setLevel(logging.INFO)

    # URL(这个部分是核心部分)
    url = normalize.normalizeurl(url)
    dict = {}
    counter = 1
    if not (url.startswith("http://")) and not (url.startswith("https://")):
        url = "http://" + url
    dict["url"] = url
    dict["counter"] = counter
    queue.put(dict)
    queue.join()
    scan.scanning(path)

Exemple #3

0

Afficher le fichier

def duplicateremover():
	mylist=list()
	fopen=open("crawler.txt","r")
	for line in fopen:
		line=line.strip()
		line=normalize.normalizeurl(line)
		if line in mylist:
			continue
		if line=="invalid":
			continue
		if not line:
	        	continue
		mylist.append(line)
	mylist.sort()
	fopen.close()
	fopen=open("crawler.txt","w")
	for line in mylist:
		fopen.write(line+"\n")

Exemple #4

0

Afficher le fichier

Fichier : extractlink.py Projet : amohanta/yalih

def duplicateremover():
	mylist=list()
	fopen=open("crawler.txt","r")
	for line in fopen:
		line=line.strip()
		line=normalize.normalizeurl(line)
		if line in mylist:
			continue
		if line=="invalid":
			continue
		if not line:
	        	continue
		mylist.append(line)
	mylist.sort()
	fopen.close()
	fopen=open("crawler.txt","w")
	for line in mylist:
		fopen.write(line+"\n")

Exemple #5

0

Afficher le fichier

Fichier : honeypot.py Projet : muratyokus/yalih

def main():
#Create the threads
	thread = threading.Thread(target=threadmaker)
	thread.setDaemon(True)
	thread.start()
	script_path = os.path.dirname(os.path.abspath( __file__ ))
	parser = argparse.ArgumentParser(description="Examples:\n/honeypot.py --url www.yahoo.com\nhoneypot.py --file <file path>\n./honeypot.py --blacklist\n./honeypot.py --email\n./honeypot.py --update\n./honeypot.py --search <warez>\n./honeypot.py --local <file/directory path>", formatter_class=argparse.RawTextHelpFormatter)
	parser.add_argument("--email", help="Retrieves your Spam emails from your mail server and crawls the extracted URLS. Enter your email credentials in honeypotconfig.py file!", action="store_true")
	parser.add_argument("--update", help="Updates the anti-virus signatures", action="store_true")
	parser.add_argument("--blacklist", help="Downloads list of suspicious malicious websites from three databases and retrieves/scans them accordingly", action="store_true")
	parser.add_argument("--file", nargs=1, help="Provide an input file", action="store")
	parser.add_argument("--url", nargs=1, help="Provide a url", action="store")
	parser.add_argument("--search", nargs=1, help="searches Bing search engine for a keyword (1 single keyword at the moment) and returns 100 results starting from the 20th result.", action="store")
	parser.add_argument("--local", nargs=1, help="scans a local file or directory for malicious signatures.", action="store")
	parser.add_argument("--debug", help="Include http header", action="store_true")
	parser.add_argument("--crawler", help="Crawl the sites and save any executables found", action="store_true")
	if len(sys.argv) == 1:
		parser.print_help()
		sys.exit(1)
	args = parser.parse_args()
	path = script_path+"/tmp"
	print path


#create the tmp folder
	if not os.path.isdir(path):
		os.makedirs("tmp")           

		
#Crawler
	if args.crawler:
		executemechanize.crawler = True
		
#Logging
	"""Initialize logger."""
	command = "mkdir -p debug/" #create a temporary folder in your working space folder
	os.system(command)
	sys.stdin=open("debug/" +  time.asctime(time.localtime(time.time())) +".log", "a")
	logger = logging.getLogger()
	
	sh = logging.StreamHandler()
	sh.setFormatter(SpecialFormatter())

	sh2 = logging.StreamHandler(sys.stdin)
	sh2.setFormatter(SpecialFormatter())
	
	logger.addHandler(sh)
	logger.addHandler(sh2)
	logger.setLevel(logging.INFO)
	
	if args.debug:
		logger.setLevel(logging.DEBUG)
		executemechanize.set_logging_level(logging.DEBUG)

#Update antivirus signatures
	if args.update:
		updateantivirus.updateantivirus()


#Blacklist Databases
	if args.blacklist:
		try:
			if not os.path.exists("list"):
				os.mkdir("list")
		except OSError as e:	
			logger.error(e)	
		malwebsites.domaindownload()
		malwebsites.duplicateremover()
		urls = open("list/malwebsites.txt", "r")
		counter = 0
		for line in urls:
			dict={}
			counter += 1
			dict["url"] = line.strip()
			dict["counter"] = counter
			queue.put(dict)
		queue.join()
		
		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)


#Email
	if args.email:
		imapfile.imap()
		extraction.extracturl()#extracts urls from emails.txt file 
		extraction.duplicateremover() #removes the duplicate urls from crawler.txt files (which now contain extracted urls from emails.txt)
		os.remove("emails.txt")		
		urls = open('crawler.txt', "r")
		counter = 0
		for line in urls:
			dict={}
			counter += 1
			dict["url"] = line.rstrip()
			dict["counter"] = counter
			queue.put(dict)
		queue.join()
		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)

#File

	if args.file:
		mylist = list()
		mylist2 = list()
		counter =0
		fopen3 = open(sys.argv[2],"r")	
		for line in fopen3:
			dict={}
			line = line.strip()
			counter += 1
			if not (line.startswith("http://")) and not (line.startswith("https://")):
				line = "http://"+line
			dict["url"] = line
			dict["counter"] = counter
			queue.put(dict)
		queue.join()
		fopen3.close()
		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)



#URL
	if args.url:
		url = readurl()
		url = normalize.normalizeurl(url)
		dict={}
		counter = 1
		if not (url.startswith("http://")) and not (url.startswith("https://")):
			url = "http://"+url
		dict["url"] = url
		dict["counter"] = counter
		queue.put(dict)
		queue.join()
#		executemechanize.executemechanize(url)
		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)


#Search
	if args.search:
		keyword = sys.argv[2]
		bing.searchBing(keyword)
		mylist = list()
		fopen = open("list/searchresult.txt","r")
		for line in fopen:
			line = line.strip()
			if not line:
				continue
			mylist.append(line)
		fopen.close()
		counter = 0
		for line in mylist:
			dict={}
			counter += 1
			dict["url"] = line
			dict["counter"] = counter
			queue.put(dict)
		queue.join()

		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)



#Local Scan
	if args.local:
		path = sys.argv[2]
		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)

Exemple #6

0

Afficher le fichier

Fichier : honeypot.py Projet : pdfswf/yalih

def main():
#Create the threads
	thread = threading.Thread(target=threadmaker)
	thread.setDaemon(True)
	thread.start()

	parser = argparse.ArgumentParser(description="Examples:\n/honeypot.py --url www.yahoo.com\nhoneypot.py --file <file path>\n./honeypot.py --blacklist\n./honeypot.py --email\n./honeypot.py --update\n./honeypot.py --search <warez>\n./honeypot.py --local <file/directory path>", formatter_class=argparse.RawTextHelpFormatter)
	parser.add_argument("--email", help="Retrieves your Spam emails from your mail server and crawls the extracted URLS. Enter your email credentials in honeypotconfig.py file!", action="store_true")
	parser.add_argument("--update", help="Updates the anti-virus signatures", action="store_true")
	parser.add_argument("--blacklist", help="Downloads list of suspicious malicious websites from three databases and retrieves/scans them accordingly", action="store_true")
	parser.add_argument("--file", nargs=1, help="Provide an input file", action="store")
	parser.add_argument("--url", nargs=1, help="Provide a url", action="store")
	parser.add_argument("--search", nargs=1, help="searches Bing search engine for a keyword (1 single keyword at the moment) and returns 100 results starting from the 20th result.", action="store")
	parser.add_argument("--local", nargs=1, help="scans a local file or directory for malicious signatures.", action="store")
	parser.add_argument("--debug", help="Include http header", action="store_true")
	parser.add_argument("--crawler", help="Crawl the sites and save any executables found", action="store_true")

	if len(sys.argv) == 1:
		parser.print_help()
		sys.exit(1)
	args = parser.parse_args()
	path = honeypotconfig.wdir + honeypotconfig.tmpfolder


#create the tmp folder
	if not os.path.isdir(os.path.join(honeypotconfig.wdir, honeypotconfig.tmpfolder)):
		os.makedirs(os.path.join(honeypotconfig.wdir, honeypotconfig.tmpfolder))           

		
#Crawler
	if args.crawler:
		executemechanize.exe_crawler = True
		
#Logging
	"""Initialize logger."""
	command = "mkdir -p "+honeypotconfig.wdir+"debug/" #create a temporary folder in your working space folder
	os.system(command)
	sys.stdin=open(honeypotconfig.wdir+"debug/" +  time.asctime(time.localtime(time.time())) +".log", "a")
	logger = logging.getLogger()
	
	sh = logging.StreamHandler()
	sh.setFormatter(SpecialFormatter())

	sh2 = logging.StreamHandler(sys.stdin)
	sh2.setFormatter(SpecialFormatter())
	
	logger.addHandler(sh)
	logger.addHandler(sh2)
	logger.setLevel(logging.INFO)
	
	if args.debug:
		logger.setLevel(logging.DEBUG)
		executemechanize.set_logging_level(logging.DEBUG)

#Update antivirus signatures
	if args.update:
		updateantivirus.updateantivirus()


#Blacklist Databases
	if args.blacklist:
		try:
			if not os.path.exists(os.path.join(honeypotconfig.wdir, "list")):
				os.mkdir(os.path.join(honeypotconfig.wdir, "list"))
		except OSError as e:	
			logger.error(e)	
		malwebsites.domaindownload()
		malwebsites.duplicateremover()
		urls = open(honeypotconfig.wdir+"list/malwebsites.txt", "r")
		counter = 0
		for line in urls:
			dict={}
			counter += 1
			dict["url"] = line.strip()
			dict["counter"] = counter
			queue.put(dict)
		queue.join()
		
		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)


#Email
	if args.email:
		imapfile.imap()
		extractlink.extracturl()#extracts urls from emails.txt file 
		extractlink.duplicateremover() #removes the duplicate urls from crawler.txt files (which now contain extracted urls from emails.txt)
		urls = open('crawler.txt', "r")
		counter = 0
		for line in urls:
			dict={}
			counter += 1
			dict["url"] = line
			dict["counter"] = counter
			queue.put(dict)
		queue.join()
		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)

#File

	if args.file:
		mylist = list()
		mylist2 = list()
		counter =0
		fopen3 = open(sys.argv[2],"r")	
		for line in fopen3:
			dict={}
			line = line.strip()
			counter += 1
			if not (line.startswith("http://")) and not (line.startswith("https://")):
				line = "http://"+line
			dict["url"] = line
			dict["counter"] = counter
			queue.put(dict)
		queue.join()
		fopen3.close()
		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)



#URL
	if args.url:
		url = readurl()
		url = normalize.normalizeurl(url)
		dict={}
		counter = 1
		if not (url.startswith("http://")) and not (url.startswith("https://")):
			url = "http://"+url
		dict["url"] = url
		dict["counter"] = counter
		queue.put(dict)
		queue.join()
#		executemechanize.executemechanize(url)
		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)


#Search
	if args.search:
		keyword = sys.argv[2]
		bing.searchBing(keyword)
		mylist = list()
		fopen = open("list/searchresult.txt","r")
		for line in fopen:
			line = line.strip()
			if not line:
				continue
			mylist.append(line)
		fopen.close()
		counter = 0
		for line in mylist:
			dict={}
			counter += 1
			dict["url"] = line
			dict["counter"] = counter
			queue.put(dict)
		queue.join()

		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)




#Local Scan
	if args.local:
		path = sys.argv[2]
		scan.scanning(path)
		yaradetection.listandscan(path)
		unquote.unquoteDirectory(path)