def get_matrix(mail): file = get_full_path('data/psocols.txt') with open(file, 'rb') as f: lexicon = pickle.load(f) lex = np.zeros(len(lexicon)) for word in mail: if word.lower() in lexicon: index_value = lexicon.index(word.lower()) lex[index_value] += 1 features = np.array(lex, dtype=np.float32) return features
def predict(body): file_content = utils.Process(body) mail = file_content.process() matrix = np.array([encoder.get_matrix(mail)]) model = load_model(get_full_path('data/model.h5')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) result = model.predict_classes(matrix) backend.clear_session() return predictions.get(result[0])
def upload(): blacklist = open(get_full_path(blacklist_dir)).read() if request.method == 'POST': file = request.files['file'] session.clear() if file and allowed_file(file.filename): filename = get_file_name(file.filename) if black_checker(filename): return render_template( 'base.html', blacklist=open(blacklist_dir).read(), message='This sender has been blacklisted!!') else: content = file.read() detect = predict(content) if detect == 'fraud': update_blacklist(filename) return render_template( 'base.html', content=content, blacklist=open(blacklist_dir).read(), message=' This is a fraudulent mail.' 'Therefore,the mail has been updated on the blacklist') elif detect == 'ham': render_template('base.html', blacklist=blacklist, content=content) elif detect == 'spam': return render_template( 'base.html', blacklist=blacklist, message= 'This mail is spam, therefore it will be sent to spam folder' ) redirect( url_for('upload', blacklist=blacklist, content=content)) return render_template('base.html', blacklist=blacklist, content=file.read()) else: return render_template('base.html', blacklist=blacklist)
from flask import Flask, request, session from core import predict from flask_bootstrap import Bootstrap from flask import url_for, redirect, render_template from dir import get_full_path import nltk app = Flask(__name__) bootstrap = Bootstrap(app) app.config['SECRET_KEY'] = 'hard to guess string' ALLOWED_EXTENSIONS = ['txt', 'csv'] blacklist_dir = get_full_path('data/blacklist.txt') @app.route('/', methods=['GET', 'POST']) def upload(): blacklist = open(get_full_path(blacklist_dir)).read() if request.method == 'POST': file = request.files['file'] session.clear() if file and allowed_file(file.filename): filename = get_file_name(file.filename) if black_checker(filename): return render_template( 'base.html', blacklist=open(blacklist_dir).read(), message='This sender has been blacklisted!!') else:
from dir import get_full_path from nltk.corpus import words def check(word): if word in words.words(): return True else: return False def worded(text): return re.findall(r'\w+', text.lower()) WORDS = Counter(worded(open(get_full_path('data/big.txt')).read())) def P(word, N=sum(WORDS.values())): """Probability of `word`.""" return WORDS[word] / N def correction(word): """Most probable spelling correction for word.""" return max(candidates(word), key=P) def candidates(word): """Generate possible spelling corrections for word.""" return (known([word]) or known(edits1(word)) or known(edits2(word))