Example #1
0
def result():
    url  = request.args['url']
    result  = phishing_detection.getResult(url)
    return result
Example #2
0
def result():
    urlname  = request.args['name']
    result  = phishing_detection.getResult(urlname)
    return result
def getURL():
	if request.method == 'POST':
		dec=[0,0,0]
		urlname  = request.form['url']
		url = request.form['url']
		print(url)
		tokenizerFolder = "tokenizer"
		savedModelDirectory = "saved_models"
		websiteToTest = url
		threshold = 0.5
		tokenizer = ByteLevelBPETokenizer(
			tokenizerFolder + "/tokenizer.tok-vocab.json",
			tokenizerFolder + "/tokenizer.tok-merges.txt",
		)
		tokenizerVocabSize = tokenizer.get_vocab_size()
		print("Tokenizer files have been loaded and the vocab size is %d..." % tokenizerVocabSize)
		model = load(savedModelDirectory + "/phishytics-model.joblib")
		print("Model loaded...")

		# Load document frequency dictionary
		docDict = np.load(savedModelDirectory + "/phishytics-model-tfidf-dictionary.npy", allow_pickle=True).item()
		print("Document frequency dictionary loaded...")

		# Testing
		print("Loading webpage...")
		try:
			request1 = requests.get(websiteToTest)
			webpageHtml = str(request1.text)
			webpageHtml = webpageHtml.replace("\n", " ")
		except Exception as e:
			print('\n',e)
			print("\nAn error occurred, exiting now... ")
			exit()
        
		# Convert text into feature vector
		output = tokenizer.encode(webpageHtml)
		outputDict = collections.Counter(output.ids)

		# Apply tfidf weighting
		totalFilesUnderConsideration = docDict["totalFilesUnderConsideration"]
		array = [0] * tokenizerVocabSize
		for item in outputDict:
			if len(docDict[item]) > 0:
				array[item] = (outputDict[item]) * (math.log10( totalFilesUnderConsideration / len(docDict[item])))
		predictionProbability = model.predict_proba([array])[0][1]
		print("\n****************************\n--> Probability that the website is phishing: %.2f" % (predictionProbability * 100))

		prediction = "NOT PHISHING"
		flag=0
		##dec[0]=0
		if predictionProbability > threshold:
			prediction = "PHISHING"
			flag=1
			##dec[0]=1
		print("--> Based on your threshold of %.2f, this website is +++'%s'+++" % (threshold, prediction))
		print("****************************")
		
		result  = phishing_detection.getResult(urlname)
		print(result)
		
		if result=="Phishing Url":
			flag=1
		'''
			dec[1]==1
		else:
			dec[1]==0
		'''
		#if (dec[0]==1)or(dec[1]==1):
		if flag==1:
			predicted_value=1
		else:
			predicted_value=0
			
        #print(predicted_value)
		if predicted_value == 0:    
			value = "Legitimate"
			return render_template("home.html",error=value)
		else:
			value = "Phishing"
			return render_template("home.html",error=value)