def delete_catalog_file(): try: req_data = request.get_json() user_id = ObjectId(str(req_data['user_id'])) catalog_id = ObjectId(str(req_data['catalog_id'])) file_id = ObjectId(str(req_data['file_id'])) user = validate_user(user_id) if user is None: response = { "status": "error", "message": "Invalid user_id '{}'".format(user_id) } return json.dumps(response) user_cat_col = USER_CATALOGS_COL.find_one({ 'user': user["_id"], "_id": catalog_id }) if user_cat_col: file_doc = CATALOG_FILES_COL.find_one({ "_id": file_id, "catalog": user_cat_col["_id"] }) if file_doc: file_path = os.path.join(UPLOAD_FILE_PATH, str(user_cat_col["_id"]), file_doc["file_name"]) if os.path.exists(file_path): os.remove(file_path) CATALOG_FILES_COL.delete_one({"_id": file_doc["_id"]}) response = { "status": "success", "message": "Catalog '{}' deleted successfully".format( user_cat_col["name"]) } else: response = { "status": "error", "message": "File doesn't exist." } else: response = {"status": "error", "message": "Invalid file id"} else: response = { "status": "error", "message": "Catalog '{}' does not exist.".format(catalog_id) } return json.dumps(response) except Exception as err: response = {"status": "error", "message": str(err)} return json.dumps(response, default=str)
def update_file_data(): try: req_data = request.get_json() user_id = ObjectId(str(req_data['user_id'])) file_id = ObjectId(str(req_data['file_id'])) name = req_data['name'] email = req_data['email'] mobile = req_data['mobile'] skills = req_data['skills'] qualifications = req_data['qualifications'] is_active = req_data['is_active'] user = validate_user(user_id) if user is None: response = {"status": "error", "message": "User doesn't exist"} return json.dumps(response) file_doc = CATALOG_FILES_COL.find_one({"_id": file_id}) if not file_doc: response = {"status": "error", "message": "Record doesn't exist"} return json.dumps(response) file_catalog = USER_CATALOGS_COL.find_one({"_id": file_doc["catalog"]}) if not file_catalog: response = { "status": "error", "message": "File catalog doesn't exist" } return json.dumps(response) if file_catalog["user"] != user["_id"]: response = { "status": "error", "message": "You are not authorized to update this record." } return json.dumps(response) CATALOG_FILES_COL.update_one({"_id": file_doc["_id"]}, { "$set": { "Name": name, "Email_Address": email, "Mobile_No": mobile, "Skills": skills, "Degree": qualifications, "is_active": is_active, "is_manually_updated": True } }) response = { "status": "success", "message": "Record updated successfully." } except Exception as err: response = { "status": "error", "message": "Unable to process request.", 'error': str(err) } return json.dumps(response)
def remove_columns_from_files_docs(): for file in CATALOG_FILES_COL.find(): print(file) CATALOG_FILES_COL.update({"_id": file["_id"]}, { "$unset": { "College_Name": 1, "Companies_worked_at": 1, "Designation": 1, "Tools": 1 } })
def validate_catalog_file(catalog, file_name): catalog_resume_doc = CATALOG_FILES_COL.find_one({'catalog': catalog['_id'], 'file_name': file_name}) if not catalog_resume_doc: new_catalog_resume_doc = { 'catalog': catalog['_id'], 'file_name': file_name, 'is_active': True, 'is_entity_extracted': True, 'created_date': datetime.now(), "is_manually_updated": False } catalog_resumes = CATALOG_FILES_COL.insert_one(new_catalog_resume_doc) new_catalog_resume_doc['_id'] = catalog_resumes.inserted_id catalog_resume_doc = new_catalog_resume_doc return catalog_resume_doc
def download_file(): req_data = request.args user_id = ObjectId(str(req_data['user_id'])) catalog_id = ObjectId(req_data['catalog_id']) file_id = ObjectId(req_data['file_id']) user = validate_user(user_id) if user is None: response = { "status": "error", "message": "Invalid user_id '{}'".format(user_id) } return json.dumps(response) user_cat_col = USER_CATALOGS_COL.find_one({ 'user': user["_id"], "_id": catalog_id }) if not user_cat_col: response = { "status": "error", "message": "Invalid catalog_id '{}'".format(catalog_id) } return json.dumps(response) resume_file = CATALOG_FILES_COL.find_one({ "_id": file_id, 'catalog': user_cat_col["_id"] }) if resume_file: file_path = os.path.join(UPLOAD_FILE_PATH, str(resume_file["catalog"]), resume_file["file_name"]) return send_file(file_path, as_attachment=True) else: response = {"status": "error", "message": "File does not exist."} return json.dumps(response)
def get_catalog_degrees(): try: req_data = request.get_json() user_id = ObjectId(str(req_data['user_id'])) catalog_id = ObjectId(str(req_data['catalog_id'])) user = validate_user(user_id) if user is None: response = { "status": "error", "message": "Invalid user_id '{}'".format(user_id) } return json.dumps(response) user_cat_col = USER_CATALOGS_COL.find_one({ 'user': user["_id"], "_id": catalog_id }) if not user_cat_col: response = { "status": "error", "message": "Invalid catalog_id '{}'".format(catalog_id) } return json.dumps(response) catalog_resume = CATALOG_FILES_COL.find( {'catalog': user_cat_col['_id']}) degree = catalog_resume.distinct("Degree") degree.sort() response = {"status": "success", "qualifications": degree} except Exception as err: response = {"status": "error", "message": str(err)} return json.dumps(response)
def process_all_files(): catalog_resumes = CATALOG_FILES_COL.find() nlp_obj = TrainModel(model='Resume_Keyword_Extraction') for file_doc in catalog_resumes: file_full_path = os.path.join(UPLOAD_FILE_PATH, str(file_doc["catalog"]), file_doc["file_name"]) file_type = file_doc['file_name'].rsplit('.', 1)[1].lower() if file_type == "txt": file_data = get_text_from_text_file(file_full_path) elif file_type == "pdf": file_data = extract_text_from_pdf_file(file_full_path) elif file_type == "docx": file_data = get_text_from_docx_file(file_full_path) entity_data = nlp_obj.get_entities(text=file_data) entity_data = json.loads(entity_data) entity_data['is_entity_extracted'] = True CATALOG_FILES_COL.update({"_id": file_doc['_id']}, {"$set": entity_data})
def delete_user_catalog(): try: req_data = request.get_json() user_id = ObjectId(str(req_data['user_id'])) catalog_id = ObjectId(str(req_data['catalog_id'])) user = validate_user(user_id) if user is None: response = { "status": "error", "message": "Invalid user_id '{}'".format(user_id) } return json.dumps(response) user_cat_col = USER_CATALOGS_COL.find_one({ 'user': user["_id"], "_id": catalog_id }) if user_cat_col: USER_CATALOGS_COL.delete_one({"_id": user_cat_col["_id"]}) CATALOG_FILES_COL.delete_many({"catalog": user_cat_col["_id"]}) dir_path = os.path.join(UPLOAD_FILE_PATH, str(user_cat_col["_id"])) if os.path.exists(dir_path) and os.path.isdir(dir_path): shutil.rmtree(dir_path) response = { "status": "success", "message": "Catalog '{}' deleted successfully".format( user_cat_col["name"]) } else: response = { "status": "error", "message": "Catalog '{}' does not exist.".format(user_id) } return json.dumps(response) except Exception as err: response = {"status": "error", "message": str(err)} return json.dumps(response, default=str)
def shortlist_catalog_profiles(user_id, catalog_id, min_exp, max_exp, req_skills, qualification=None, opt_skills=None): points_per_req_skill = 10 points_per_opt_skill = 5 opt_skills_score = 0 if not req_skills: raise Exception("'req_skills' should not be empty.") else: req_skills_score = len(req_skills) * points_per_req_skill if opt_skills: opt_skills_score = len(opt_skills) * points_per_opt_skill try: catalog_id = ObjectId(catalog_id) catalog_files = CATALOG_FILES_COL.find({"catalog": catalog_id, "is_active": True, "is_entity_extracted": True}) matched_files = [] for file in catalog_files: if "Years_of_Experience" not in file.keys(): continue if isinstance(file["Years_of_Experience"], list): continue if min_exp and max_exp: if not min_exp <= float(file["Years_of_Experience"]) <= max_exp: continue file['matched_req_skills'] = list(set(req_skills).intersection(set(file["Skills"]))) if not file['matched_req_skills']: continue file['req_skill_match_score'] = len(file['matched_req_skills']) * points_per_req_skill if qualification is not None and "Degree" in file.keys(): file['match_qualification'] = list(set(qualification).intersection(set(file["Degree"]))) if opt_skills is not None: file['matched_opt_skills'] = list(set(opt_skills).intersection(set(file["Skills"]))) file['opt_skill_match_score'] = len(file['matched_opt_skills']) * points_per_opt_skill file['Total_Match_Score'] = 0 if req_skills and opt_skills: file['Total_Match_Score'] = ((file['req_skill_match_score'] + file['opt_skill_match_score'])/( req_skills_score + opt_skills_score)) * 100.0 elif req_skills and not opt_skills: file['Total_Match_Score'] = (file['req_skill_match_score'] / req_skills_score) * 100.0 file['Total_Match_Score'] = round(file['Total_Match_Score'], 2) file['download_url'] = "http://localhost:5000/download-resume?user_id={}&catalog_id={}&file_id={}".format( str(user_id), str(file["catalog"]), str(file["_id"])) if 'created_date' in file.keys(): file["created_date"] = file["created_date"].strftime("%d-%m-%Y") matched_files.append(file) matched_files = sorted(matched_files, key=lambda i: i['Total_Match_Score'], reverse=True) return matched_files except Exception as err: raise Exception(err)
def get_catalog_details(): try: req_data = request.get_json() user_id = ObjectId(str(req_data['user_id'])) user = validate_user(user_id) if user is None: response = { "status": "error", "message": "Invalid user_id '{}'".format(user_id) } return json.dumps(response) user_catalogs = list( USER_CATALOGS_COL.find({'user': user['_id']}, {"user": 0})) for catalog in user_catalogs: catalog["total_files"] = len( list(CATALOG_FILES_COL.find({'catalog': catalog['_id']}))) if 'created_date' in catalog.keys(): catalog["created_date"] = catalog["created_date"].strftime( "%d-%m-%Y") response = {"status": "success", "catalogs": list(user_catalogs)} except Exception as err: response = {"status": "error", "message": str(err)} return json.dumps(response, default=str)
def get_catalog_files(): try: req_data = request.get_json() user_id = ObjectId(str(req_data['user_id'])) catalog_id = ObjectId(str(req_data['catalog_id'])) user = validate_user(user_id) if user is None: response = { "status": "error", "message": "Invalid user_id '{}'".format(user_id) } return json.dumps(response) user_cat_col = USER_CATALOGS_COL.find_one({ 'user': user["_id"], "_id": catalog_id }) if not user_cat_col: response = { "status": "error", "message": "Invalid catalog id '{}'".format(catalog_id) } return json.dumps(response) catalog_resumes = list( CATALOG_FILES_COL.find({ 'catalog': user_cat_col['_id'] }).sort([("Name", pymongo.ASCENDING)])) for file in catalog_resumes: if 'created_date' in file.keys(): file["created_date"] = file["created_date"].strftime( "%d-%m-%Y") file[ 'download_url'] = "http://localhost:5000/download-resume?user_id={}&catalog_id={}&file_id={}".format( str(user_id), str(file["catalog"]), str(file["_id"])) response = {"status": "success", "files": catalog_resumes} except Exception as err: response = {"status": "error", "message": str(err)} return json.dumps(response, default=str)
def process_catalog_files(): try: req_data = request.get_json() user_id = ObjectId(str(req_data['user_id'])) catalog_id = ObjectId(str(req_data['catalog_id'])) user = validate_user(user_id) if user is None: response = { "status": "error", "message": "Invalid user_id '{}'".format(user_id) } return json.dumps(response) user_cat_col = USER_CATALOGS_COL.find_one({ "user": user["_id"], "_id": catalog_id }) if not user_cat_col: response = { "status": "error", "message": "Invalid catalog_id '{}'".format(catalog_id) } return json.dumps(response) catalog_resumes = list( CATALOG_FILES_COL.find({ "catalog": user_cat_col['_id'], "is_entity_extracted": False, "is_manually_updated": False })) nlp_obj = TrainModel(model='Resume_Keyword_Extraction') for file_doc in catalog_resumes: file_full_path = os.path.join(UPLOAD_FILE_PATH, str(file_doc["catalog"]), file_doc["file_name"]) file_type = file_doc['file_name'].rsplit('.', 1)[1].lower() if file_type == "txt": file_data = get_text_from_text_file(file_full_path) elif file_type == "pdf": file_data = extract_text_from_pdf_file(file_full_path) elif file_type == "docx": file_data = get_text_from_docx_file(file_full_path) entity_data = nlp_obj.get_entities(text=file_data) entity_data = json.loads(entity_data) entity_data['is_entity_extracted'] = True CATALOG_FILES_COL.update({"_id": file_doc['_id']}, {"$set": entity_data}) if "data_un_extracted_files" in user_cat_col: data_un_extracted_files_count = user_cat_col[ "data_un_extracted_files"] - len(list(catalog_resumes)) else: data_un_extracted_files_count = 0 USER_CATALOGS_COL.update_one({"_id": user_cat_col["_id"]}, { "$set": { "prev_data_extracted_date": datetime.now(), "data_un_extracted_files": data_un_extracted_files_count } }) return json.dumps({ 'status': 'success', 'message': 'Data extracted from "{}" catalog files'.format( user_cat_col["name"]) }) except Exception as err: return json.dumps({'status': 'error', 'message': str(err)})
def update_file(up_file_doc): CATALOG_FILES_COL.update_one({"_id": up_file_doc["_id"]}, {'$set': {'is_entity_extracted': False}}) return up_file_doc