print(str(candidatetotal)) #print(candidateIdList) log_file = open("resumeParser.log", "a") etl_job_log = {} temp = [] charcteristics_list = getCharacteristicMap() for i in range(0,candidatetotal): candidateResumeSkills = list(db.candidate_resume_text.find({"candidate_id":candidateIdList[i]})) print(str(candidateIdList[i])) #print(str(candidateResumeSkills)) print("Parsing") temp = candResumeParser(candidateResumeSkills) print("Classifier") candidate_classifier_incre(candidateIdList[i],charcteristics_list) print("Scoring") rzindex_candidate(candidateIdList[i]) #print("---Total Time: %s seconds ---" % (time.time() - start_time)) etl_job_log["job_name"] = JOB_NAME etl_job_log["start_datetime"] = beginTime etl_job_log["end_datetime"] = datetime.utcnow() etl_job_log["elapsed_time_in_seconds"] = time.time() - start_time etl_job_log["total_records_processed"] = candidatetotal db.etl_job_log.insert_one(etl_job_log) log_file.write(str(etl_job_log)) except Exception as e: DebugException(e) log_file.write("Exception during resume parsing: [" + str(e) + "]") log_file.close()
def main(): print("GO") start_time = time.time() document_map = {} category_map = db["category_candidate_map"] try: cand_table = db["candidate"] characteristic_list = getCharacteristicMap() print("[candidateClassifierJob] ---query Time: %s seconds ---" % (time.time() - start_time)) skip_amount = 0 cand_count = 0 total_cand = cand_table.find( {"$or": [{"loaded_date": {"$gt": lastRunDate}}, {"update_date": {"$gt": lastRunDate}}]}).count() # print("[candidateClassifierJob] [TotalCandidates] %s" % total_cand) start_delta = 0 query_dict = { "$and": [ {"job_skill_names.0": {"$exists": True}}, {"$or": [ {"loaded_date": {"$gt": lastRunDate}}, {"update_date": {"$gt": lastRunDate}} ]}, ] } proj_dict = {"candidate_id": 1, "job_skill_names": 1} # candidate_list = list( # cand_table.find(query_dict, proj_dict).skip(start_delta + skip_amount).limit(fetch_limit)) candidate_list = cand_table.find(query_dict, proj_dict) # .distinct("candidate_id") candlist = [] for candidate in candidate_list: candlist.append(candidate["candidate_id"]) match_count = 0 if len(candidate["job_skill_names"]) > 0: cand_count += 1 cand_job_skill_name_list = [] for can_job_skill in candidate["job_skill_names"]: cand_job_skill_name_list.append(can_job_skill["job_skill_name"].lower()) for job_cat_id, skill in characteristic_list.items(): # print("main : Running candidate [" + str(candidate["candidate_id"]) + "] reqSkillCount[" + # str(len(skill["Skills"])) + "] vs candSkillsCount [" + str(len(cand_job_skill_name_list)) +"]") if matchSkills(cand_job_skill_name_list, skill["Skills"], .40): # print("main : <<<MACTHED>> IdealCharac Section JobCatId[{0}] candidate_id [{1}".format( # str(job_cat_id), str(candidate["candidate_id"]))) match_count += 1 if document_map.get(job_cat_id) is None: document_map[job_cat_id] = [] if candidate["candidate_id"] not in document_map[job_cat_id]: document_map[job_cat_id].append(candidate["candidate_id"]) # print("main : job_cat_id [" + str(job_cat_id) + "] candidate_id [" + str( # candidate["candidate_id"]) + "]") # print("Candidate_id [" + str(candidate["candidate_id"]) + " matched catids [" + str(match_count) + "]") # print("%s" % json.dumps(document_map, default=obj_dict)) # Execute the update for cat_id, val in document_map.items(): category_map.update({"global_job_category_id": cat_id}, {"$push": {"candidates": {"$each": val}}}) print(candlist) for cand in candlist: print(cand) rzindex_candidate(cand) # print("---Parse Time: %s seconds ---" % (time.time() - start_time)) etl_job_log["job_name"] = JOB_NAME etl_job_log["start_datetime"] = beginTime etl_job_log["end_datetime"] = datetime.now() etl_job_log["elapsed_time_in_seconds"] = time.time() - start_time etl_job_log["total_records_processed"] = cand_count db.etl_job_log.insert_one(etl_job_log) # log_file.write(str(etl_job_log)) timeNow = datetime.utcnow().isoformat() print("[" + timeNow + "] [candidateClassifierJob] ---Run Time: [" + str( time.time() - start_time) + "] seconds ---") except Exception as e: DebugException(e) print("[candidateClassifierJob] ---Run Time: %s seconds ---" % (time.time() - start_time)) print("Job : [candidate_classifier] failed due to error [" + str(e) + "]")