Beispiel #1
0
    print(str(candidatetotal))
    #print(candidateIdList)
    log_file = open("resumeParser.log", "a")
    etl_job_log = {}
    temp = []
    charcteristics_list = getCharacteristicMap()
    for i in range(0,candidatetotal):
        candidateResumeSkills = list(db.candidate_resume_text.find({"candidate_id":candidateIdList[i]}))
        print(str(candidateIdList[i]))
        #print(str(candidateResumeSkills))
        print("Parsing")
        temp = candResumeParser(candidateResumeSkills)
        print("Classifier")
        candidate_classifier_incre(candidateIdList[i],charcteristics_list)
        print("Scoring")
        rzindex_candidate(candidateIdList[i])

    #print("---Total Time: %s seconds ---" % (time.time() - start_time))
    etl_job_log["job_name"] = JOB_NAME
    etl_job_log["start_datetime"] = beginTime
    etl_job_log["end_datetime"] = datetime.utcnow()
    etl_job_log["elapsed_time_in_seconds"] = time.time() - start_time
    etl_job_log["total_records_processed"] = candidatetotal
    db.etl_job_log.insert_one(etl_job_log)
    log_file.write(str(etl_job_log))

except Exception as e:
    DebugException(e)
    log_file.write("Exception during resume parsing: [" + str(e) + "]")

log_file.close()
def main():
    print("GO")
    start_time = time.time()
    document_map = {}
    category_map = db["category_candidate_map"]
    try:
        cand_table = db["candidate"]
        characteristic_list = getCharacteristicMap()

        print("[candidateClassifierJob] ---query Time: %s seconds ---" % (time.time() - start_time))
        skip_amount = 0
        cand_count = 0
        total_cand = cand_table.find(
            {"$or": [{"loaded_date": {"$gt": lastRunDate}}, {"update_date": {"$gt": lastRunDate}}]}).count()
        # print("[candidateClassifierJob] [TotalCandidates]  %s" % total_cand)
        start_delta = 0

        query_dict = {
            "$and": [
                {"job_skill_names.0": {"$exists": True}},
                {"$or": [
                    {"loaded_date": {"$gt": lastRunDate}},
                    {"update_date": {"$gt": lastRunDate}}
                ]},
            ]
        }

        proj_dict = {"candidate_id": 1, "job_skill_names": 1}

        # candidate_list = list(
        # cand_table.find(query_dict, proj_dict).skip(start_delta + skip_amount).limit(fetch_limit))
        candidate_list = cand_table.find(query_dict, proj_dict)  # .distinct("candidate_id")

        candlist = []
        for candidate in candidate_list:
            candlist.append(candidate["candidate_id"])
            match_count = 0
            if len(candidate["job_skill_names"]) > 0:
                cand_count += 1
                cand_job_skill_name_list = []
                for can_job_skill in candidate["job_skill_names"]:
                    cand_job_skill_name_list.append(can_job_skill["job_skill_name"].lower())
                for job_cat_id, skill in characteristic_list.items():
                    # print("main : Running candidate [" + str(candidate["candidate_id"]) + "] reqSkillCount[" +
                    # str(len(skill["Skills"])) + "] vs candSkillsCount [" + str(len(cand_job_skill_name_list)) +"]")
                    if matchSkills(cand_job_skill_name_list, skill["Skills"], .40):
                        # print("main :        <<<MACTHED>> IdealCharac Section JobCatId[{0}] candidate_id [{1}".format(
                        # str(job_cat_id), str(candidate["candidate_id"])))
                        match_count += 1
                        if document_map.get(job_cat_id) is None:
                            document_map[job_cat_id] = []
                        if candidate["candidate_id"] not in document_map[job_cat_id]:
                            document_map[job_cat_id].append(candidate["candidate_id"])
                            # print("main : job_cat_id [" + str(job_cat_id) + "]  candidate_id [" + str(
                            # candidate["candidate_id"]) + "]")

        # print("Candidate_id [" + str(candidate["candidate_id"]) + " matched catids [" + str(match_count) + "]")
        # print("%s" % json.dumps(document_map, default=obj_dict))

        # Execute the update
        for cat_id, val in document_map.items():
            category_map.update({"global_job_category_id": cat_id}, {"$push": {"candidates": {"$each": val}}})
        
        print(candlist)        
        for cand in candlist:
            print(cand)
            rzindex_candidate(cand)


        # print("---Parse Time: %s seconds ---" % (time.time() - start_time))
        etl_job_log["job_name"] = JOB_NAME
        etl_job_log["start_datetime"] = beginTime
        etl_job_log["end_datetime"] = datetime.now()
        etl_job_log["elapsed_time_in_seconds"] = time.time() - start_time
        etl_job_log["total_records_processed"] = cand_count
        db.etl_job_log.insert_one(etl_job_log)
        # log_file.write(str(etl_job_log))
        timeNow = datetime.utcnow().isoformat()
        print("[" + timeNow + "] [candidateClassifierJob]  ---Run Time: [" + str(
            time.time() - start_time) + "] seconds ---")

    except Exception as e:
        DebugException(e)
        print("[candidateClassifierJob]  ---Run Time: %s seconds ---" % (time.time() - start_time))
        print("Job : [candidate_classifier] failed due to error [" + str(e) + "]")