def home(request): msg = None skills = None allowed_file_type = ['application/pdf'] if request.method == 'POST': resume_file = request.FILES.get('resume') file_path = None if resume_file: if resume_file.content_type in allowed_file_type: try: fs = FileSystemStorage() filename = fs.save(resume_file.name, resume_file) file_path = os.path.join(fs.location, filename) parsed_data = ResumeParser(file_path).get_extracted_data() except Exception as e: msg = f"Error occurred while parsing the CV. Detail error msg: {str(e)}" else: skills = parsed_data.get('skills') finally: if file_path and os.path.isfile(file_path): os.remove(file_path) else: msg = "Please provide pdf and docx document" else: msg = "Please upload your resume" return render(request, 'home.html', context={'msg': msg, 'skills': skills})
def handleResume(request): if request.method == 'POST': BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) print('post') resume = request.FILES.get('resume', None) print(resume) if resume: saving = Resume(resume=resume) saving.save() media_path = os.path.join(BASE_DIR, 'resumes') lpart = str(saving.resume).split('/') full_path = os.path.join(media_path, lpart[1]) data = ResumeParser(str(full_path)).get_extracted_data() candidate = Candidate( name=data.get('name'), email=data.get('email'), phone=data.get('mobile_number'), experience=float(data.get('total_experience')), total_skills=len(data.get('skills')), designation=data.get('designation'), company="N/A" if data.get('company_names') is None else data.get('company_names')) candidate.save() return render(request, "app/home.html", {}) return render(request, "app/cvform.html", {})
def __extract_from_file(self, file): if os.path.exists(file): print_cyan('Extracting data from: {}'.format(file)) resume_parser = ResumeParser(file) return [resume_parser.get_extracted_data()] else: return 'File not found. Please provide a valid file name.'
def handle_uploaded_file(cv): """ Extracts skills mentioned in a CV. Saves uploaded CV in a temporary location and uses pyresparser to extract all necessary informatiion from that CV. After extraction, Uploaded file is deleted. Parameters: cv (file): CV uploaded form UI Returns: list of string: List of skills extracted from CV. Might be empty list if it is unable to extract skills """ upload_path = os.path.join(settings.BASE_DIR, "uploads", cv.name) with open(upload_path, "wb+") as destination: for chunk in cv.chunks(): destination.write(chunk) try: data = ResumeParser(upload_path).get_extracted_data() except Exception: data = {"skills": []} finally: os.remove(upload_path) return data.get("skills", [])
def __extract_from_file(self, file, skills_file=None, custom_regex=None): if os.path.exists(file): print_cyan('Extracting data from: {}'.format(file)) resume_parser = ResumeParser(file, skills_file, custom_regex) return [resume_parser.get_extracted_data()] else: print('File not found. Please provide a valid file name') sys.exit(1)
def __extract_from_remote_file(self, remote_file): print_cyan('Extracting data from: {}'.format(remote_file)) req = Request(remote_file, headers={'User-Agent': 'Mozilla/5.0'}) webpage = urlopen(req).read() _file = io.BytesIO(webpage) _file.name = remote_file.split('/')[-1] resume_parser = ResumeParser(_file) return [resume_parser.get_extracted_data()]
def resume_result_wrapper(args): if len(args) == 2: print_cyan('Extracting data from: {}'.format(args[0])) parser = ResumeParser(args[0], args[1]) else: print_cyan('Extracting data from: {}'.format(args)) parser = ResumeParser(args) return parser.get_extracted_data()
def __extract_from_remote_file(self, remote_file): try: print_cyan('Extracting data from: {}'.format(remote_file)) req = Request(remote_file, headers={'User-Agent': 'Mozilla/5.0'}) webpage = urlopen(req).read() _file = io.BytesIO(webpage) _file.name = remote_file.split('/')[-1] resume_parser = ResumeParser(_file) return [resume_parser.get_extracted_data()] except urllib.error.HTTPError: return 'File not found. Please provide correct URL for resume file.'
def get_remote_data(): try: remote_file = 'https://www.omkarpathak.in/downloads/OmkarResume.pdf' print('Extracting data from: {}'.format(remote_file)) req = Request(remote_file, headers={'User-Agent': 'Mozilla/5.0'}) webpage = urlopen(req).read() _file = io.BytesIO(webpage) _file.name = remote_file.split('/')[-1] resume_parser = ResumeParser(_file) return [resume_parser.get_extracted_data()] except urllib.error.HTTPError: return 'File not found. Please provide correct URL for resume file.'
def extract_resume(url, skills_file=None, custom_regex=None): try: req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) webpage = urllib.request.urlopen(req).read() _file = io.BytesIO(webpage) _file.name = 'test.pdf' # the name is only really needed to identify the extension resume_parser = ResumeParser(_file, skills_file, custom_regex) return [resume_parser.get_extracted_data()] except urllib.error.HTTPError: s = 'File not found. Please provide correct URL for resume file.' print(s) raise NameError(s)
def homepage(request): if request.method == 'POST': Resume.objects.all().delete() file_form = UploadResumeModelForm(request.POST, request.FILES) files = request.FILES.getlist('resume') resumes_data = [] if file_form.is_valid(): for file in files: try: # saving the file resume = Resume(resume=file) resume.save() # extracting resume entities parser = ResumeParser( os.path.join(settings.MEDIA_ROOT, resume.resume.name)) data = parser.get_extracted_data() resumes_data.append(data) resume.name = data.get('name') resume.email = data.get('email') resume.mobile_number = data.get('mobile_number') if data.get('degree') is not None: resume.education = ', '.join(data.get('degree')) else: resume.education = None resume.company_names = data.get('company_names') resume.college_name = data.get('college_name') resume.designation = data.get('designation') resume.total_experience = data.get('total_experience') if data.get('skills') is not None: resume.skills = ', '.join(data.get('skills')) else: resume.skills = None if data.get('experience') is not None: resume.experience = ', '.join(data.get('experience')) else: resume.experience = None resume.save() except IntegrityError: messages.warning(request, 'Duplicate resume found:', file.name) return redirect('homepage') resumes = Resume.objects.all() messages.success(request, 'Resumes uploaded!') context = { 'resumes': resumes, } return render(request, 'base.html', context) else: form = UploadResumeModelForm() return render(request, 'base.html', {'form': form})
def gather_data(type,direcotry): if (type == 'pdf'): data = ResumeParser("./test_data/cv.pdf").get_extracted_data() elif (type == 'img'): data = ResumeParser("./test_data/cv.pdf").get_extracted_data() else: return 0 # text_data = text_retriever.retrieve_text() # print(data) return data
def main(): jd_file_path = "" total_files = [ os.path.join(mypath, f) for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f)) ] jd_file_path = [ path for path in total_files if str('jd_') in path.lower() ][0] jd_metadata = get_meta_JD(jd_file_path) if type(jd_metadata) != dict: #print("\n\t** JD format Error **") temp = {"415": "JD is in not specific format"} return json.dumps(temp) # return ("\n\t**### Error ###**\n \tJD is not in specific format.\n\t Kindly check JD format.\n ") # print(jd_metadata) # print("files:",total_files) resume_files = total_files.copy() resume_files.remove(jd_file_path) # print("resume_files",resume_files) profiles = {} i = 0 while i < len(resume_files): file = resume_files[i] #print(file) try: text = file_reader(file) profile_dic = ResumeParser(file).get_extracted_data() profile_dic['total_experience'] = get_experience(text) prof_scr_val, comments_list = profile_score( text, profile_dic, jd_metadata) profile_dic['profile_score'] = prof_scr_val profile_dic['comments'] = comments_list job_id, candidate_id, candidate_name = get_job_id_name(file) profile_dic.update({ 'job_id': job_id, 'candidate_id': candidate_id, 'candidate_name': candidate_name }) # print("\n\n",profile_dic) profiles[i] = select_field_dic(profile_dic) except Exception as e: pass i += 1 print(profiles) return profiles
def success(): if request.method == 'POST': f = request.files['file'] f.save(f.filename) data = ResumeParser(f.filename).get_extracted_data() os.remove(f.filename) return data
def get_section_wise(section="PROJECTS", file_path=None): """ Get the text of the defined section along with the Analysed result. """ # Read file doc = docx.Document(file_path) fullText = [] for para in doc.paragraphs: fullText.append(para.text) read_data_docx = fullText # Filter Project related lines projects = list() flag = 0 for i, line in enumerate(read_data_docx): if section in line: projects = read_data_docx[i:] break # Get projects related text project_content = projects[:find_seperation(projects)] # Save the projects related text name = file_path.split('.')[0] ext = file_path.split('.')[1] document = docx.Document() p = document.add_paragraph('\n'.join(project_content)) document.save(unique_id + '_' + str(section).lower() + '.' + ext) # Parse again on the projects to find skills related to Projects data_projects = ResumeParser(unique_id + '_' + str(section).lower() + '.' + ext).get_extracted_data() return project_content, data_projects
def applicant_file(request): print("Second Page ") if request.method == 'POST': #Get the uploaded file fileuploaded = request.FILES['file1'] fs = FileSystemStorage() fs.save(fileuploaded.name, fileuploaded) list_of_files = glob.glob( 'C:\\Users\\prade\\PycharmProjects\\ResumePortal\\media\\*' ) # * means all if need specific format then *.csv latest_file = max(list_of_files, key=os.path.getctime) print(latest_file) # Parse the uploaded resume parsed_details = ResumeParser(latest_file).get_extracted_data() print(parsed_details) resume_dict = parsed_details # Converting to JSON loaded_json = json.loads(json.dumps(resume_dict)) print(loaded_json) # converting into .JSON file for .HTML with open( 'C:\\Users\\prade\\PycharmProjects\\ResumePortal\\resumeapp\\templates\\resumeapp\\Resume_details.json', 'w') as fp: json.dump(loaded_json, fp) return render(request, 'resumeapp/Applicant_Screen.html')
def uploadFile(): f = request.files['file'] filename = secure_filename(f.filename) f.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) data = ResumeParser(os.path.join(app.config['UPLOAD_FOLDER'], filename)).get_extracted_data() return render_template("create_new.html", parsedData=data)
def index(): if 'file' not in request.files: return jsonify({"status": 403, "message": "No file part"}) file = request.files['file'] if file.filename == '': return jsonify({"status": 403, "message": "No selected file"}) if allowed_file(file.filename) == False: return jsonify({"status": 403, "message": "Please upload pdf file"}) if file and allowed_file(file.filename): filename = str(uuid.uuid4()) + ".pdf" file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) user = {} data = ResumeParser('./uploads/' + filename).get_extracted_data() text = extract_text_from_pdf('./uploads/' + filename) #print(text) names = extract_names(text) emails = extract_emails(text) if emails: user["emails"] = emails user["name"] = emails[0].split("@")[0] user["skills"] = data["skills"] #user["data"] = text user["source"] = "upload" col.insert(user) print(user) user["_id"] = str(user["_id"]) user["status"] = 200 return jsonify(user)
def index(request): if request.method == 'POST' and request.FILES['file']: start_time = time.time() upload_file = request.FILES['file'] # print("upload file :",upload_file) extension = os.path.splitext(upload_file.name)[1] if extension == '.pdf' or '.doc': rename = datetime.datetime.now().strftime( "%Y_%m_%d %H_%M_%S") + extension fss = FileSystemStorage() filename = fss.save(rename, upload_file) upload_file_path = fss.path(filename) print("upload file path:", upload_file_path) data = ResumeParser(upload_file_path).get_extracted_data() print("resume data:", data) # os.remove(upload_file_path) stop_time = time.time() response = { #'upload_file_path': upload_file_path, # 'Name': data['name'], 'Email': data['email'], 'Skills': data['skills'], #'time': format(stop_time - start_time, '.2f'), } return render(request, 'skillreader/index.html', context=response) else: return render(request, 'skillreader/index.html')
def extract_skills(resume_pdf): from pyresparser import ResumeParser data = ResumeParser(resume_pdf).get_extracted_data() skills = data['skills'] # in string format str_skills = " ".join(skills) return skills, str_skills
def upload_doc(): client = app.data.driver.db.client db = client['hackit'] if request.method == 'POST': # check if the post request has the file part file = request.files['resume'] # if user does not select file, browser also # submit a empty part without filename print('Entered') print(file.filename) if file: print('andar hu') # filename = secure_filename(file.filename) file.save(file.filename) # file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) data = ResumeParser(file.filename).get_extracted_data() data["f_loc"] = file.filename # # data["skills"] flash('File uploaded successfully', 'success') db.resume.insert_one(data) a = flask.Markup('<h2>') + data["name"] + flask.Markup( '</h2><br/><div class="row">') for i in data['skills']: a += flask.Markup('<div class="col-md-1">' ) + i + ', ' + flask.Markup('</div>') flash(a, 'warning') # flash(data["name"]+flask.Markup('<br /><div class="row">')),'warning') return redirect('/index')
def main(): # Calling ResumeParser library to parse PDF resume data = ResumeParser( "C:/Users/sydne/OneDrive/Documents/Github/final-project-nougat/resume_parser/Engineering_Resume_19-20.pdf" ).get_extracted_data() # Added encoding utf-8 to prevent unicode error with open( "C:/Users/sydne/OneDrive/Documents/Github/final-project-nougat/resume_parser/resume.txt", "w", encoding='utf-8') as rf: rf.truncate() rf.write(str(data)) resume_list = [] # Formatting resume.txt file remove = dict.fromkeys(map(ord, '\n ' + string.punctuation)) resume_file = os.path.dirname(__file__) + "/resume.txt" # Comparing resume.txt with skills.txt file with open(resume_file, 'r', encodings='utf-8') as f: for line in fin: for word in line.split(): word = word.translate(remove) resume_word = word.lower() resume_list.append(resume_word) query_list = extractSkills(resume_list) file_out = open(os.path.dirname(__file__) + "/query_skills.txt", "w") file_out.truncate() file_out.write(str(query_list))
def calculate_score(link, user_name, skills, cgpa): link = link[32:] link = link.split('/') file_loc = 'resume/' + user_name + '.pdf' gdd.download_file_from_google_drive(file_id=link[0], dest_path=file_loc) data = ResumeParser(file_loc).get_extracted_data() score = 0 match = 0 for i in skills: if i in data['skills']: match = match + 1 if match > 4: match = 4 score += match left = len(data['skills']) - match if left > 0: if left > 10: score += 1 else: score += left * 0.1 if data['experience']: if len(data['experience']) > 20: score += 2 else: score += len(data['experience']) * 0.2 cgpa_score = score score += float(cgpa) * 0.3 ls = [] ls.append(cgpa_score) ls.append(score) return ls
def upload_zip(): client = app.data.driver.db.client db = client['hackit'] if request.method == 'POST': # check if the post request has the file part file = request.files['resume'] # if user does not select file, browser also # submit a empty part without filename print('Entered') print(file.filename) if file: print('andar hu') file.save(file.filename) # filename = secure_filename(file.filename) # file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # print('.\n'*50, filename) data = [] with zipfile.ZipFile(file.filename, 'r') as zip_ref: listOfFileNames = zip_ref.namelist() # Iterate over the file names for fileName in listOfFileNames: # Check filename endswith csv if fileName.endswith('.pdf'): # Extract a single file from zip zip_ref.extract(fileName, '') datai = ResumeParser(fileName).get_extracted_data() datai["f_loc"] = fileName data.append(datai) db.resume.insert_many(data) return redirect('/index')
def hello_world(id): sf = Salesforce( username= "", password='', security_token='') # Add your information here sessionId = sf.session_id instance = sf.sf_instance print ('sessionId: ' + sessionId) #r = requests.get("https://vast-escarpment-63477.herokuapp.com/") #print(r.content) attachment = sf.query("SELECT Id, Name,ParentId FROM Attachment where Id='" + id + "' LIMIT 1") #SELECT Id, Name, Body ,ParentId, Parent.Type FROM Attachment where Parent.Type = 'Contact' filename=attachment['records'][0]['Name'] fileid=attachment['records'][0]['Id'] fileparentid=attachment['records'][0]['ParentId'] print('filename: ' + filename) print('fileid: ' + fileid) response = requests.get('https://' + instance + '/services/data/v39.0/sobjects/Attachment/' + fileid + '/body', headers = { 'Content-Type': 'application/text', 'Authorization': 'Bearer ' + sessionId }) f1 = open(filename, "wb") f1.write(response.content) f1.close() print('output file: ' + os.path.realpath(f1.name)) response.close() data = ResumeParser(os.path.realpath(f1.name)).get_extracted_data() # Parse JSON into an object with attributes corresponding to dict keys. cand_dict=json.dumps(data) print(cand_dict) x = json.loads(cand_dict) #print (x["name"]) class candidate: def __init__(self, name, email,skills): self.name = name self.email = email self.skills = skills c1 = candidate(x["name"], x["email"],x["skills"]) print(c1.name) print(c1.email) print(c1.skills) a=json.dumps(c1.name) a =a.replace('"', '') c=json.dumps(c1.email) c =c.replace('"', '') c=c.lstrip() c=c.lstrip() print (c) b=json.dumps(c1.skills) b =b.replace('"', '') b =b.replace('[', '') b =b.replace(']', '') #sf.Contact.create({'LastName':a,'Email':c, 'Skills__c':b,'Record_Typess__c':'Candidat'}) sf.Contact.update(fileparentid,{'Skills__c':b}) return b
def conv(data): pdf = FPDF() pdf.add_page() pdf.set_font('Courier', 'B', 16) pdf.cell(40, 10, data) pdf.output('a.pdf', 'F') data = ResumeParser('a.pdf').get_extracted_data() return data['skills']
def get_contents(): parser = reqparse.RequestParser() parser.add_argument('enc', type=str, location='form') args = parser.parse_args() content = args['enc'] if content == None: return json.dumps({'response': "Empty parameter value"}), 403, { 'Content-Type': 'application/json' } # content = args['jobtype'] # content = request.args.get('enc') # print(content) # return content try: content_bytes = content.encode() content = base64.decodebytes(content_bytes) except binascii.Error: error = "{'Response':'Not a Valid Input string '}" return json.dumps(error), 406, {'Content-Type': 'application/json'} # bytes = base64.urlsafe_b64decode(content) # HERE TO CHECK valid base64 or not if content[0:4] != b'%PDF': error = "{'Response':'Not a Valid PDF file'}" return json.dumps(error), 406, {'Content-Type': 'application/json'} else: rand = get_random_string() filename = 'resume_' + rand outfile = "./Assets/Resume/Saved_Resumes/" + filename + '.pdf' # f = open(filename, 'wb') # f.write(content) # f.close() with open(outfile, 'wb') as resume: resume.write(content) resume.close() resume_text = ResumeParser(outfile).get_extracted_data() jd = ('./Assets/Resume/Devops JD.pdf') jd_text = ResumeParser(jd).get_extracted_data() score = findsimilarity(listtostr(resume_text['skills']), listtostr(jd_text['skills'])) resume_text["Score"] = str(score) return resume_text, 200, {'Content-Type': 'application/json'}
def applicant_file(request): if request.method == 'POST': form = UploadForm(request.POST, request.FILES) if form.is_valid(): fileuploaded = request.FILES['filename'] fs = FileSystemStorage() fs.save(fileuploaded.name, fileuploaded) BASE_DIR = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) dir_location = os.path.join(BASE_DIR, 'media') file_location = dir_location + "/*.docx" list_of_files = glob.glob(file_location) latest_file = max(list_of_files, key=os.path.getctime) # Parse the uploaded resume parsed_details = ResumeParser(latest_file).get_extracted_data() resume_dict = parsed_details candidateForm = CandidateForm() # ContactForm(initial={'subject': 'Hi there!'}) candidateForm = CandidateForm( initial={ 'first_name': resume_dict.get("name"), 'last_name': resume_dict.get("name") }) candidate_DetailsForm = Candidate_DetailsForm() print(resume_dict) candidate_DetailsForm = Candidate_DetailsForm( initial={ 'phone_number': resume_dict.get("mobile_number"), 'email_address': resume_dict.get("email"), 'work_experience': resume_dict.get("experience"), 'technical_skillset': resume_dict.get("skills"), 'education': resume_dict.get("degree") }) # print(candidate_DetailsForm) # resume.save() // jira 81 error context = { 'candidate': candidateForm, 'candidate_Details': candidate_DetailsForm } return render(request, 'resumeapp/Applicants_Detail.html', context) else: print("form is not valid") form = UploadForm(request.POST, request.FILES) totaljobs = Job_Details.objects.all().count() return render(request, 'resumeapp/Homepage.html', { 'totaljobs': totaljobs, 'form': form })
def getResults(resumePath, jobPath): old = time.time() resumeData = ResumeParser(resumePath).get_extracted_data() resumeData["skills"] = parseSkills(resumeData["skills"]) #print("Resume Skills:", resumeData["skills"]) jobData = ResumeParser(jobPath).get_extracted_data() jobData["skills"] = parseSkills(jobData["skills"]) #print("Job Skills:", jobData["skills"]) skillsRes = finalizeSkillsDisplay(resumeData["skills"], jobData["skills"]) resumeData["skillsData"] = skillsRes miscResults = finalizeMetaResults(resumePath, jobPath, resumeData, jobData) resumeData["meta"] = miscResults return resumeData, jobData
def resume_data(filename): if not filename.endswith('.pdf'): if filename.endswith('doc') or filename.endswith('docx'): filename = doc_to_pdf(filename) else: print("Only 'pdf', 'doc' and 'docx' file format supported") sys.exit(1) Resume_Data = ResumeParser(filename).get_extracted_data() return Resume_Data