def computeAlertsChunk(chunkID): ######################################################################################################### ############-----------------Creating the connection to Mongo (172.22.66.233) ######################################################################################################### #monconn_users_static = MongoConnect('candidates_processed_4', host = 'localhost', database = 'JobAlerts') monconn_users_static = MongoConnect('candidates_processed_4', host = '172.22.66.198', database = 'JobAlerts') monconn_users_static_cur = monconn_users_static.getCursor() #monconn_applications = MongoConnect('candidate_applications', host = 'localhost', database = 'JobAlerts') monconn_applications = MongoConnect('candidate_applications', host = '172.22.66.198', database = 'JobAlerts') monconn_applications_cur = monconn_users_static.getCursor() tablename = 'candidates_processed_5' #monconn_recommendations = MongoConnect(tablename, host='localhost', database='JobAlerts') monconn_recommendations = MongoConnect(tablename, host='172.22.66.198', database='JobAlerts') print 'Chunk:', chunkID, 'initiated at:', time.ctime() myCondition = {'p':chunkID} users = monconn_users_static.loadFromTable(myCondition) for row in users : user_profiletitle = row['user_profiletitle'] user_industry = row['user_industry'] user_functionalarea = row['user_functionalarea'] user_jobtitle = row['user_jobtitle'] user_skills = row['user_skills'] preferred_subfa = row["preferred_sub_fa"] subject_status = row["subject_status"] user_experience = row["user_experience"] apply_data = monconn_applications.loadFromTable({'fcu':row['_id']}) apply_data_list = list(apply_data) application_list = [] if len(apply_data) == 0: pass else: for element in apply_data_list: application_list.append(element['fjj']) application_list.sort() row['application_list'] = application_list application_count = len(application_list) row['application_count'] = application_count if application_count == 0: monconn_recommendations.saveToTable(row)
def computeAlertsChunk(chunkID): ######################################################################################################### ############-----------------Creating a connection to output mongodb ######################################################################################################### tablename = 'MonthlyMsgQueue' monconn_recommendations = MongoConnect(tablename, host='localhost', database='mailer_monthly') print 'Chunk:', chunkID, 'initiated at:', time.ctime() ifile = open('CompanyNames.csv', 'r') reader = csv.reader(ifile) company_dict = {} for row in reader: company_dict[row[0]] = row[1] ######################################################################################################### ############-----------------Fetch the user data from the database ######################################################################################################### tablename = "candidates_processed" monconn_users = MongoConnect(tablename, host='localhost', database='mailer_monthly') mongo_users_cur = monconn_users.getCursor() myCondition = {'p': chunkID} #myCondition = {} users = monconn_users.loadFromTable(myCondition) #print "Fetching the users data from Mongodb....completed for ChunkID:",chunkID ######################################################################################################### ############-----------------Loop to generate recommendations and save in Mongo ######################################################################################################### count = 0 for user in users: ######################################################################################################### ############-----------------Extracting the user details ######################################################################################################### count += 1 user_ctc = user['user_ctc'] user_exp = user['user_experience'] user_id = user['user_id'] user_email = user['user_email'] user_bow = user['user_bow']['bow'] user_current_time = datetime.datetime.now() user_jobtitle = user['user_jobtitle'] user_lastlogin = user['user_lastlogin'] user_phone = user['user_phone'] user_gender = user['user_gender'] user_current_company = user['user_current_company'] user_functionalarea_id = user['user_functionalarea_id'] user_lastmodified = user['user_lastmodified'] user_fullname = user['user_fullname'] user_phone_verified = user['user_phone_verified'] user_location_id = user['user_location_id'] user_ctc_id = user['user_ctc_id'] user_highest_qual = user['user_highest_qual'] user_edu_special = user['user_edu_special'] user_email_verified = user['user_email_verified'] user_spam_status = user['user_spam_status'] user_bounce_status = user['user_bounce_status'] user_email_alert_status = user['user_email_alert_status'] user_functionalarea = user['user_functionalarea'] user_industry = user['user_industry'] user_jobtitle = user['user_jobtitle'] user_profiletitle = user['user_profiletitle'] user_edom = user['user_edom'] user_industry = user['user_industry'] user_skills = user['user_skills'] user_profiletitle = user['user_profiletitle'] user_pid = user['p'] user_firstname = user_fullname.split(" ")[0] lsi_user = lsiModel[tfIdfModel[user_bow]] simScrChunk = index[lsi_user] sortingExcelSheetList = [] for (jobIntIndex, lsiCosine) in simScrChunk: if lsiCosine < 0.18: continue ######################################################################################################### ############-----------------Loading the Jobs Data ######################################################################################################### job = jobIntIdToJobDict[jobIntIndex] jobid = job['job_id'] job_title = job['job_title'] job_skills = job['job_skills'] job_minsal = job['job_minsal'] job_maxsal = job['job_maxsal'] job_minexp = job['job_minexp'] job_maxexp = job['job_maxexp'] job_bow = job['job_bow']['bow'] job_accounttype = job['job_accounttype'] job_flag = job['job_flag'] job_companyname = job['job_company_name'] job_companyid = job['job_company_id'] ######################################################################################################### ############-----------------Calculating the CTC and Experience Match Scores ######################################################################################################### ctc_match_score = CTCMatchScore(job_minsal, job_maxsal, user_ctc).CTCMatchScore() exp_match_score = ExpMatchScore(job_minexp, job_maxexp, user_exp).ExpMatchScore() paid_boost = PaidBoostScore(job_flag, job_accounttype).PaidBoostScore() ######################################################################################################### ############-----------------Calculating the City Score between a candidate and a job ######################################################################################################### if ctc_match_score == 1 and exp_match_score == 1: jobid = job['job_id'] try: job_city = job['job_location'] except: job_city = 'Delhi' try: user_city = user['user_location'] except: user_city = 'Delhi' #print user_city, job_city try: user_city_list = user_city.lower().replace( 'other', '').strip().split(',') user_city_list = [x.strip() for x in user_city_list] except: user_city_list = [''] try: job_city_list = job_city.lower().replace( 'other', '').strip().split(',') job_city_list = [x.strip() for x in job_city_list] except: job_city_list = [''] #print user_city_list, job_city_list try: cityScore = cm.getCityScore(user_city_list, job_city_list) except: cityScore = 0 ######################################################################################################### ############-----------------Calculating the overall match score and appending the details to the list ############-----------------based on job's published date ######################################################################################################### overallMatchScore = getOverallMatchScore( lsiCosine, cityScore, paid_boost) s = (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp, job_companyid) sortingExcelSheetList.append(s) else: continue ############################################################################################################## ############-----------------Finding the top 10 Jobs based on Overall Score ############################################################################################################## topN = 30 sortingExcelSheetListTopNJobs = heapq.nlargest(topN, sortingExcelSheetList, key=lambda x: x[3]) #pprint(sortingExcelSheetListTopNJobs) jobs2bsent = [] company_ids = [] cosine_score = [] for (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp, job_companyid) in sortingExcelSheetListTopNJobs: #print (userid, jobid, lsiCosine, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) if job_companyid not in company_ids: company_ids.append(job_companyid) jobs2bsent.append(int(jobid)) cosine_score.append(round(overallMatchScore, 2)) else: if company_ids.count(job_companyid) < 3: company_ids.append(job_companyid) jobs2bsent.append(int(jobid)) cosine_score.append(round(overallMatchScore, 2)) else: pass if len(jobs2bsent) >= 10: break else: pass #print user_id #print company_ids #print jobs2bsent companies = [] #print company_ids for comp_id in company_dict.keys(): if int(comp_id) in company_ids: companies.append(company_dict[comp_id]) #print companies #print "Hello" else: pass ############################################################################################################## ############-----------------Creating Subject Line for a candidate ############################################################################################################## \ if len(companies) != 0: try: user_subject = user_firstname + ": " + ', '.join( companies ) + " and other top company jobs matching your profile" #print user_subject except Exception as e: pass else: try: if user_functionalarea == "Fresher (No Experience)": user_subject = user_firstname + ", don't miss out on these new jobs" else: user_subject = user_firstname + ", new " + user_functionalarea.replace( ' /', ',') + " jobs for you" #print user_subject except Exception as e: user_subject = user_firstname + ", don't miss out on these new jobs" ############################################################################################################## ############-----------------Creating a document to be saved in mongo collection ############################################################################################################## document = { "c": user_id, "_id": user_email, "m": user_phone, "te": user_exp, "cr": user_jobtitle, "g": user_gender, "cc": user_current_company, "fa": user_functionalarea, "faid": user_functionalarea_id, "pd": user_lastmodified, "fn": user_fullname, "cpv": user_phone_verified, "sCLID": user_location_id, "sASID": user_ctc_id, "eq": user_highest_qual, "es": user_edu_special, "ev": user_email_verified, "ll": user_lastlogin, "sal": user_ctc, "cosine": cosine_score, "edom": user_edom, "t": user_current_time, "mj": jobs2bsent, "bj": [], "oj": [], "pid": user_pid, "s": False, "sub": user_subject } ############################################################################################################## ############-----------------Dumping the document in mongo collection if recommendations were generated ############################################################################################################## if len(jobs2bsent) > 0: monconn_recommendations.saveToTable(document) #print 'Chunk:', chunkID, 'processed in:', time.ctime() monconn_recommendations.close()
stateCapitalMappingFileName = '../Features/CityScore/stateCapitalMapping.csv' cm = CityMatch(cityScoreMatrixFilename, stateCapitalMappingFileName) ######################################################################################################### ############----------------- Fetch the jobs data from the Mongo ######################################################################################################### print "Fetching the jobs data from Mongodb" tablename = "jobs_processed" monconn_jobs = MongoConnect(tablename, host='localhost', database='mailer_monthly') mongo_jobs_cur = monconn_jobs.getCursor() print "Fetching the jobs data from Mongodb....completed" myCondition = {} jobs = monconn_jobs.loadFromTable(myCondition) ######################################################################################################### ############----------------- Creating Index on Jobs ######################################################################################################### jobs_bow = [] i = 0 jobIntIdToJobDict = {} for job in jobs: job_bow = job['job_bow']['bow'] jobs_bow.append(job_bow) jobIntIdToJobDict[i] = job i += 1
def computeAlertsChunk(chunkID): ######################################################################################################### ############-----------------Creating a connection to output mongodb ######################################################################################################### tablename = 'DailyMsgQueue' monconn_recommendations = MongoConnect(tablename, host='localhost', database='mailer_daily_midout') print 'Chunk:', chunkID, 'initiated at:', time.ctime() ######################################################################################################### ############-----------------Fetch the user data from the database ######################################################################################################### tablename = "candidates_processed_midout" monconn_users = MongoConnect(tablename, host='localhost', database='Midout_Mailers') mongo_users_cur = monconn_users.getCursor() myCondition = {'pid': chunkID} users = monconn_users.loadFromTable(myCondition) ######################################################################################################### ############-----------------Loop to generate recommendations and save in Mongo ######################################################################################################### count = 0 for user in users: count += 1 ######################################################################################################### ############-----------------Extracting the user details ######################################################################################################### _id = user['user_email'] user_ctc = user['user_ctc'] user_exp = user['user_experience'] user_id = user['user_id'] user_email = user['user_email'] user_bow = user['user_bow']['bow'] user_current_time = datetime.datetime.now() user_countrycode = user.get('user_countrycode', '') user_pid = user['pid'] user_phone_number = "+" + str(user_countrycode) + "-" + str( user.get('user_phone_number', '')) user_firstname = user.get('user_firstname', '') user_lastname = user.get('user_lastname', '') user_fullname = user_firstname + " " + user_lastname if len(user_fullname) < 3: user_fullname = "Candidate" try: user_registration_start_date = rfc3339(user.get( 'user_registration_start_date', ''), utc=True) except: user_registration_start_date = "" lsi_user = lsiModel[tfIdfModel[user_bow]] simScrChunk = index[lsi_user] sortingExcelSheetList = [] for (jobIntIndex, lsiCosine) in simScrChunk: if lsiCosine < 0.18: continue ######################################################################################################### ############-----------------Loading the Jobs Data ######################################################################################################### job = jobIntIdToJobDict[jobIntIndex] jobid = job['job_id'] job_title = job['job_title'] job_skills = job['job_skills'] job_minsal = job['job_minsal'] job_maxsal = job['job_maxsal'] job_minexp = job['job_minexp'] job_maxexp = job['job_maxexp'] job_bow = job['job_bow']['bow'] ######################################################################################################### ############-----------------Calculating the CTC and Experience Match Scores ######################################################################################################### ctc_match_score = CTCMatchScore(job_minsal, job_maxsal, user_ctc).CTCMatchScore() exp_match_score = ExpMatchScore(job_minexp, job_maxexp, user_exp).ExpMatchScore() ######################################################################################################### ############-----------------Calculating the City Score between a candidate and a job ######################################################################################################### if ctc_match_score == 1 and exp_match_score == 1: jobid = job['job_id'] #lsiCosine = getLSICosine(user_bow, job_bow).getLSICosine() #City Score try: job_city = job['job_location'] except: job_city = 'Delhi' try: user_city = user['user_location'] except: user_city = 'Delhi' #print user_city, job_city try: user_city_list = user_city.lower().replace( 'other', '').strip().split(',') user_city_list = [x.strip() for x in user_city_list] except: user_city_list = [''] try: job_city_list = job_city.lower().replace( 'other', '').strip().split(',') job_city_list = [x.strip() for x in job_city_list] except: job_city_list = [''] #print user_city_list, job_city_list try: cityScore = cm.getCityScore(user_city_list, job_city_list) except: cityScore = 0 #if cityScore == 0: #count = count +1 #print user_city_list, job_city_list, cityScore paidboost = 0 ######################################################################################################### ############-----------------Calculating the overall match score ######################################################################################################### overallMatchScore = getOverallMatchScore( lsiCosine, cityScore, paidboost) s = (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) sortingExcelSheetList.append(s) else: continue ############################################################################################################## ############-----------------Finding the top 10 Jobs based on Overall Score ############################################################################################################## topN = 10 sortingExcelSheetListTopNJobs = heapq.nlargest(topN, sortingExcelSheetList, key=lambda x: x[3]) jobs2bsent = [] for (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) in sortingExcelSheetListTopNJobs: jobs2bsent.append(int(jobid)) ############################################################################################################## ############-----------------Creating a document to be saved in mongo collection ############################################################################################################## document = {"_id":user_email, \ "fn" : user_fullname,\ "c": user_id,\ "mj":jobs2bsent, \ "oj":[], \ "s": False ,\ "t": user_current_time ,\ "pid": user_pid,\ "m": user_phone_number, \ "pd":user_registration_start_date } ############################################################################################################## ############-----------------Dumping the document in mongo collection if recommendations were generated ############################################################################################################## if len(jobs2bsent) > 0: monconn_recommendations.saveToTable(document) monconn_recommendations.close()
ifile = open('UserData.csv', 'rb') reader = csv.reader(ifile) reader.next() ofile = open("User_Applications.csv", "w") writer = csv.writer(ofile) writer.writerow(['User_Id', 'Job_Applied', 'Application_Date']) candidate_id = [] '''for records in reader: candidate_id.append(str(records[0]).strip())''' #print len(candidate_id) for records in reader: data_user = monconn_users_static.loadFromTable({"fcu": str(records[0])}) for records in data_user: try: User_Id = records.get('fcu', 'N/A') Job_Applied = records.get('fjj', 'N/A') Application_Date = records.get('ad', 'N/A') writer.writerow([User_Id, Job_Applied, Application_Date]) except Exception as e: print e pass '''try: details = list(data_user) print details sys.exit(0) User_Id = details.get('fcu',"N/A") Job_Applied = details.get('fjj','N/A')
def computeAlertsChunk(chunkID): ######################################################################################################### ############-----------------Creating a connection to output mongodb ######################################################################################################### tablename = 'JobSuggestions' monconn_recommendations = MongoConnect(tablename, host='localhost', database='similar_jobs_onsite') print 'Chunk:', chunkID, 'initiated at:', time.ctime() ######################################################################################################### ############-----------------Fetch the 3 month jobs data from mongo ######################################################################################################### tablename = "active_jobs_dump" monconn_jobs_1 = MongoConnect(tablename, host='localhost', database='similar_jobs_onsite') mongo_jobs_1_cur = monconn_jobs_1.getCursor() myCondition = {'pid': chunkID} jobs_1 = monconn_jobs_1.loadFromTable(myCondition) ######################################################################################################### ############-----------------Calculating the overall score of a 3month jobs based on cosine,ctc, ############-----------------experience,city scores for each 1month Job ######################################################################################################### count = 0 for job_1 in jobs_1: count += 1 jobid_1 = job_1['job_id'] job_title_1 = job_1['job_title'] job_skills_1 = job_1['job_skills'] job_minsal_1 = job_1['job_minsal'] job_maxsal_1 = job_1['job_maxsal'] job_minexp_1 = job_1['job_minexp'] job_maxexp_1 = job_1['job_maxexp'] job_bow_1 = job_1['job_bow']['bow'] job_index_1 = job_1['job_index'] lsi_job_1 = lsiModel[tfIdfModel[job_bow_1]] simScrChunk = index[lsi_job_1] sortingExcelSheetList = [] for (jobIntIndex, lsiCosine) in simScrChunk: job = jobIntIdToJobDict[jobIntIndex] jobid = job['job_id'] job_title = job['job_title'] job_skills = job['job_skills'] job_minsal = job['job_minsal'] job_maxsal = job['job_maxsal'] job_minexp = job['job_minexp'] job_maxexp = job['job_maxexp'] job_bow = job['job_bow']['bow'] job_index = job['job_index'] job_company_id = job['job_company_id'] ######################################################################################################### ############-----------------Calculating the CTC and Experience and City Match Scores ######################################################################################################### ctc_match = CTCMatchScore(job_minsal_1, job_maxsal_1, job_minsal, job_maxsal) ctc_match_score = ctc_match.CTCMatchScore() exp_match_score = ExpMatchScore(job_minexp_1, job_maxexp_1, job_minexp, job_maxexp).ExpMatchScore() paid_boost = 0 if ctc_match_score == 1 and exp_match_score == 1: if jobid != jobid_1: try: job_city_1 = job_1['job_location'] except: job_city_1 = ["Delhi"] try: job_city = job['job_location'] except: job_city = ["Delhi"] #lsiCosine = getLSICosine(user_bow, job_bow).getLSICosine() try: cityScore = cm.getCityScore(job_city_1, job_city) except: cityScore = 0 overallMatchScore = getOverallMatchScore( lsiCosine, cityScore, paid_boost) s = (jobid_1, job_index_1, jobid, job_index, overallMatchScore, job_company_id) sortingExcelSheetList.append(s) else: continue else: continue ######################################################################################################### ############-----------------Finding the top 10 Jobs based on overall sccore ######################################################################################################### topN = 30 sortingExcelSheetListTopNJobs = heapq.nlargest(topN, sortingExcelSheetList, key=lambda x: x[4]) jobs2bsent = [] company_ids = [] for (jobid_1, job_index_1, jobid, job_index, overallMatchScore, job_company_id) in sortingExcelSheetListTopNJobs: if job_company_id not in company_ids: company_ids.append(job_company_id) jobs2bsent.append(int(jobid)) else: if company_ids.count(job_company_id) < 2: company_ids.append(job_company_id) jobs2bsent.append(int(jobid)) else: pass if len(jobs2bsent) >= 10: break else: pass ############################################################################################################## ############-----------------Creating a document to be saved in mongo collection ############################################################################################################## \ document = { '_id': jobid_1, 'sj': jobs2bsent, 'sjlen': len(jobs2bsent), 'lud': datetime.datetime.now() } ############################################################################################################## ############-----------------Dumping the document in mongo collection if recommendations were generated ############################################################################################################## monconn_recommendations.saveToTable(document) monconn_recommendations.close()
############-----------------Try Except to provide alert in case of code failure ######################################################################################################### try: ######################################################################################################### ############-----------------Creating a mongo connection to miscellaneous DB ######################################################################################################### monconn_users = MongoConnect(tableName, host='172.22.65.88', port=27018, database='miscellaneous', username=username, password=password, authenticate=True) monconn_users_cur = monconn_users.getCursor() myCondition = {"cd": {'$gt': date1}} users = monconn_users.loadFromTable(myCondition) print "Number of recoreds : " + str(len(users)) ######################################################################################################### ############-----------------Creating a mongo connection to resume dump DB Mongo(172.22.66.233) ######################################################################################################### tableName = 'candidate_data' monconn_resume = MongoConnect(tableName, host='172.22.66.198', database='ResumeDump') monconn_resume_cur = monconn_resume.getCursor() ######################################################################################################### ############-----------------Extracting the resume data and dumping in local Mongo ######################################################################################################### i = 0
try: datadict["user_edom"] = (doc['sEm']).split("@")[1] except: datadict["user_edom"] = None try: datadict["user_skills"] = ', '.join(doc['sS']) except: datadict["user_skills"] = None ######################################################################################################### ############-----------------Getting the resume data for a candidate ######################################################################################################### Condition = {"_id": doc['id']} resume_data = monconn_resume.loadFromTable(Condition) if len(resume_data) != 0: datadict['resume_jobtitle'] = resume_data[0][ "user_jobtitle_resume"] datadict['resume_skills'] = resume_data[0][ "user_skills_resume"] else: datadict['resume_jobtitle'] = None datadict['resume_skills'] = None ######################################################################################################### ############-----------------Creating the list of applied jobs for a candidate ######################################################################################################### Condition = {'fcu': doc['id']} apply_data = monconn_applies.loadFromTable(Condition) apply_data_list = list(apply_data)
#ifile = open('boxx_leads.csv','r') #ifile = open("email_id_cp.csv",'r') ifile = open('customer.csv', 'r') reader = csv.reader(ifile) reader.next() i = 0 user_id = [] for records in ifile: user_id.append(str(records).strip()) print len(user_id) print user_id[1:10] data_user = monconn_users_static.loadFromTable({"_id": {'$in': user_id}}) for records in data_user: try: user_id = records.get('_id') user_email = records.get('user_email') user_gender = records.get('user_gender') user_industry = records.get("user_industry", 'N/A') user_jobtitle = records.get("user_jobtitle", "N/A") user_exp = records.get("user_experience", 'N/A') user_salary = records.get("user_ctc", 'N/A') user_functional_area = records.get("user_functionalarea", "N/A") user_subfa = records.get("user_functionalarea", 'N/A') user_cellphone = records.get("user_phone", 'N/A') writer.writerow([ user_email, user_id, user_gender, user_industry, user_jobtitle, user_exp, user_salary, user_functional_area, user_subfa,
def computeAlertsChunk(chunkID): ######################################### '''Creating a connection to output mongodb''' ######################################### tablename = 'WeeklyMsgQueue' monconn_recommendations = MongoConnect(tablename, host='localhost', database='mailer_weekly') print 'Chunk:', chunkID, 'initiated at:', time.ctime() ################################################# '''Fetch the user data from the database''' ################################################# #print "Fetching the users data from Mongodb for ChunkID:",chunkID #tablename="candidates_processed" tablename = "candidates_processed" monconn_users = MongoConnect(tablename, host='localhost', database='mailer_weekly') mongo_users_cur = monconn_users.getCursor() myCondition = {'p': chunkID} #myCondition = {} users = monconn_users.loadFromTable(myCondition) #print "Fetching the users data from Mongodb....completed for ChunkID:",chunkID ################################################################## '''Get the top 10 matching jobs based on cosine for each candidate''' ################################################################## count = 0 for user in users: count += 1 user_ctc = user['user_ctc'] user_exp = user['user_experience'] user_id = user['user_id'] user_email = user['user_email'] user_bow = user['user_bow']['bow'] user_current_time = datetime.datetime.now() user_jobtitle = user['user_jobtitle'] user_lastlogin = user['user_lastlogin'] user_phone = user['user_phone'] user_gender = user['user_gender'] user_current_company = user['user_current_company'] user_functionalarea_id = user['user_functionalarea_id'] user_lastmodified = user['user_lastmodified'] user_fullname = user['user_fullname'] user_phone_verified = user['user_phone_verified'] user_location_id = user['user_location_id'] user_ctc_id = user['user_ctc_id'] user_highest_qual = user['user_highest_qual'] user_edu_special = user['user_edu_special'] user_email_verified = user['user_email_verified'] user_spam_status = user['user_spam_status'] user_bounce_status = user['user_bounce_status'] user_email_alert_status = user['user_email_alert_status'] user_functionalarea = user['user_functionalarea'] user_industry = user['user_industry'] user_jobtitle = user['user_jobtitle'] user_profiletitle = user['user_profiletitle'] user_edom = user['user_edom'] user_industry = user['user_industry'] user_skills = user['user_skills'] user_profiletitle = user['user_profiletitle'] user_pid = user['p'] lsi_user = lsiModel[tfIdfModel[user_bow]] simScrChunk = index[lsi_user] sortingExcelSheetList = [] for (jobIntIndex, lsiCosine) in simScrChunk: if lsiCosine < 0.18: continue job = jobIntIdToJobDict[jobIntIndex] jobid = job['job_id'] job_title = job['job_title'] job_skills = job['job_skills'] job_minsal = job['job_minsal'] job_maxsal = job['job_maxsal'] job_minexp = job['job_minexp'] job_maxexp = job['job_maxexp'] job_bow = job['job_bow']['bow'] job_accounttype = job['job_accounttype'] job_flag = job['job_flag'] ####################################################### ''' Calculating the CTC and Experience Match Scores''' ####################################################### ctc_match_score = CTCMatchScore(job_minsal, job_maxsal, user_ctc).CTCMatchScore() exp_match_score = ExpMatchScore(job_minexp, job_maxexp, user_exp).ExpMatchScore() paid_boost = PaidBoostScore(job_flag, job_accounttype).PaidBoostScore() #ctc_match_score = 1 #exp_match_score = 1 paid_boost = 0 ####################################################### ''' For Low earning desperate guy uncomment this ''' ####################################################### ''' if (1 + user_ctc)/(1 + user_exp) < 0.3: ctc_match_score = 1 exp_match_score = 1 ''' if ctc_match_score == 1 and exp_match_score == 1: jobid = job['job_id'] try: job_city = job['job_location'] except: job_city = 'Delhi' try: user_city = user['user_location'] except: user_city = 'Delhi' #print user_city, job_city try: user_city_list = user_city.lower().replace( 'other', '').strip().split(',') user_city_list = [x.strip() for x in user_city_list] except: user_city_list = [''] try: job_city_list = job_city.lower().replace( 'other', '').strip().split(',') job_city_list = [x.strip() for x in job_city_list] except: job_city_list = [''] #print user_city_list, job_city_list try: cityScore = cm.getCityScore(user_city_list, job_city_list) except: cityScore = 0 #if cityScore == 0: #count = count +1 #print user_city_list, job_city_list, cityScore overallMatchScore = getOverallMatchScore( lsiCosine, cityScore, paid_boost) s = (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) sortingExcelSheetList.append(s) else: continue ################################# '''Finding the top 10 Jobs''' ################################# topN = 10 sortingExcelSheetListTopNJobs = heapq.nlargest(topN, sortingExcelSheetList, key=lambda x: x[3]) #pprint(sortingExcelSheetListTopNJobs) jobs2bsent = [] for (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) in sortingExcelSheetListTopNJobs: #print (userid, jobid, lsiCosine, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) jobs2bsent.append(int(jobid)) document = { "c": user_id, "_id": user_email, "m": user_phone, "te": user_exp, "cr": user_jobtitle, "g": user_gender, "cc": user_current_company, "fa": user_functionalarea, "faid": user_functionalarea_id, "pd": user_lastmodified, "fn": user_fullname, "cpv": user_phone_verified, "sCLID": user_location_id, "sASID": user_ctc_id, "eq": user_highest_qual, "es": user_edu_special, "ev": user_email_verified, "ll": user_lastlogin, "sal": user_ctc, "edom": user_edom, "t": user_current_time, "mj": jobs2bsent, "bj": [], "oj": [], "pid": user_pid, "s": False } if len(jobs2bsent) > 0: monconn_recommendations.saveToTable(document) #print 'Chunk:', chunkID, 'processed in:', time.ctime() monconn_recommendations.close()
writer = csv.writer(ofile,lineterminator = '\n') writer.writerow(['JobId','JobTitle','JobSkills','HiringFor','JT_url','JT_email','JT_phone','JT_len','JD_url','JD_email','JD_phone','JD_len','Skills Count','HF_url','HF_email','HF_phone','JobTitleScore','JobDescriptionScore','HiringForScore','SkillScore','TotalScore','PublishedDate','DateDifference','Applications']) for row in reader: jobid = row[0] jt = row[1] jd = row[2] jd_clean = row[3] skills = row[4] hf = row[5] pub_date = row[6] date_diff = row[7] applications = len(monconn_users.loadFromTable({'fjj':int(jobid)})) jt_score = 0 jd_score = 0 hf_score = 0 skill_score = 0 #JT Score jt_phone = 0 jt_url = len(re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', jt)) jt_email = len(re.findall(r'[\w\.-]+@[\w\.-]+', jt)) jt_numbers = re.findall('\d+', jt) for number in jt_numbers: