/
resume.py
139 lines (107 loc) · 3.96 KB
/
resume.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# pip install spacy==2.3.5
# pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
from pyresparser import ResumeParser
import spacy
from pdfminer.high_level import extract_text
import docx2txt
import nltk
import re
from flask import Flask, jsonify, request
import uuid
from werkzeug.utils import secure_filename
import os
from flask_cors import CORS
import pymongo
from fuzzywuzzy import fuzz
app = Flask(__name__)
CORS(app)
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
spacy.load("en_core_web_sm")
# client = pymongo.MongoClient("mongodb+srv://admin:admin@cluster0.elo9f.mongodb.net/hr-supp?retryWrites=true&w=majority")
client = pymongo.MongoClient("mongodb://localhost:27017/hr-supp")
db = client[ "hr-supp" ]
col = db[ "profiles" ]
fav = db[ "favorite" ]
#print(data)
UPLOAD_FOLDER = './uploads'
ALLOWED_EXTENSIONS = {'pdf'}
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
EMAIL_REG = re.compile(r'[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+')
def extract_text_from_docx(docx_path):
txt = docx2txt.process(docx_path)
if txt:
return txt.replace('\t', ' ')
return None
def extract_names(txt):
person_names = []
for sent in nltk.sent_tokenize(txt):
for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent))):
if hasattr(chunk, 'label') and chunk.label() == 'PERSON':
person_names.append(
' '.join(chunk_leave[0] for chunk_leave in chunk.leaves())
)
return person_names
def extract_text_from_pdf(pdf_path):
return extract_text(pdf_path)
def extract_emails(resume_text):
return re.findall(EMAIL_REG, resume_text)
@app.route('/add_favorite', methods=["POST"])
def add_favorite():
profile = request.json
fav.insert(profile)
return jsonify(profile)
@app.route('/recommendation', methods=["GET"])
def recommendation():
similar_profile = []
favorites = db.favorite.find({})
for profile in favorites:
skills = profile["skills"]
exp = profile["years_exp"]
skills_string = ' '.join(skills)
profiles = db.profiles.find({})
for profile in profiles:
profile_skills = " ".join(profile["skills"])
years_exp = profile["years_exp"]
score = fuzz.token_sort_ratio(profile_skills, skills_string)
if score > 50 and abs(exp-years_exp) in [0, 1, 2]:
profile["_id"] = str(profile["_id"])
similar_profile.append(profile)
print(similar_profile)
return jsonify(similar_profile)
@app.route('/upload', methods=["POST"])
def index():
if 'file' not in request.files:
return jsonify({"status": 403, "message": "No file part"})
file = request.files['file']
if file.filename == '':
return jsonify({"status": 403, "message": "No selected file"})
if allowed_file(file.filename) == False:
return jsonify({"status": 403, "message": "Please upload pdf file"})
if file and allowed_file(file.filename):
filename = str(uuid.uuid4())+".pdf"
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
user = {}
data = ResumeParser('./uploads/'+filename).get_extracted_data()
text = extract_text_from_pdf('./uploads/'+filename)
#print(text)
names = extract_names(text)
emails = extract_emails(text)
if emails:
user["emails"] = emails
user["name"] = emails[0].split("@")[0]
user["skills"] = data["skills"]
#user["data"] = text
user["source"] = "upload"
col.insert(user)
print(user)
user["_id"] = str(user["_id"])
user["status"] = 200
return jsonify(user)
if __name__ == '__main__':
app.run(debug=True, port=8000)