def computeAlertsChunk(chunkID): ######################################################################################################### ############-----------------Creating the connection to Mongo (172.22.66.233) ######################################################################################################### #monconn_users_static = MongoConnect('candidates_processed_4', host = 'localhost', database = 'JobAlerts') monconn_users_static = MongoConnect('candidates_processed_4', host = '172.22.66.198', database = 'JobAlerts') monconn_users_static_cur = monconn_users_static.getCursor() #monconn_applications = MongoConnect('candidate_applications', host = 'localhost', database = 'JobAlerts') monconn_applications = MongoConnect('candidate_applications', host = '172.22.66.198', database = 'JobAlerts') monconn_applications_cur = monconn_users_static.getCursor() tablename = 'candidates_processed_5' #monconn_recommendations = MongoConnect(tablename, host='localhost', database='JobAlerts') monconn_recommendations = MongoConnect(tablename, host='172.22.66.198', database='JobAlerts') print 'Chunk:', chunkID, 'initiated at:', time.ctime() myCondition = {'p':chunkID} users = monconn_users_static.loadFromTable(myCondition) for row in users : user_profiletitle = row['user_profiletitle'] user_industry = row['user_industry'] user_functionalarea = row['user_functionalarea'] user_jobtitle = row['user_jobtitle'] user_skills = row['user_skills'] preferred_subfa = row["preferred_sub_fa"] subject_status = row["subject_status"] user_experience = row["user_experience"] apply_data = monconn_applications.loadFromTable({'fcu':row['_id']}) apply_data_list = list(apply_data) application_list = [] if len(apply_data) == 0: pass else: for element in apply_data_list: application_list.append(element['fjj']) application_list.sort() row['application_list'] = application_list application_count = len(application_list) row['application_count'] = application_count if application_count == 0: monconn_recommendations.saveToTable(row)
def preProcessChunk(chunkID): print 'Connecting to Mongodb..' tableName = 'jobs_status_check' monconn_status_check = MongoConnect(tableName, host='localhost', database='jam_status') monconn_status_check_cur = monconn_status_check.getCursor() ###################################### '''Fetching the Jobs from SQL''' ###################################### #Connect to SQL table and get the jobs data host = "172.22.65.157" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3308 print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4) and rj.jobid%''' + str(numChunks) + '=' + str(chunkID) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)''' #print cmd cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.isbocreated as back_office_job, rj.publisheddate as publisheddate, rj.republisheddate as republisheddate, rj.companyid_id as Company_id, rj.displayname as Company_name, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id WHERE c.AttType in (3,12,13) group by rj.jobid ''' cmd5 = '''drop table if exists SumoPlus.XY ''' print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) mysql_conn.query(cmd2) mysql_conn.query(cmd3) jobs = mysql_conn.query(cmd4) mysql_conn.query(cmd5) print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ###################################### '''Connecting to Mongo 233 Server''' ###################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='JobAlerts') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ###################################### '''Processing the Jobs''' ###################################### i = 0 for job in jobs: #pprint(job) #print i if i % 1000 == 0: print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' job_id = job['Jobid'] job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) job_jd = cleanHTML(cleanToken(job['JD'])) job_industry = cleanToken(job['Industry']) job_location = removeDup(job['location']) job_subfunction = removeDup(cleanToken(job['subfunction'])) job_function = removeDup(cleanToken(job['function'])) job_skills = removeDup(cleanToken(job['keySkills'])) job_flag = job['flag'] job_accounttype = job['account_type'] job_company_id = job['Company_id'] job_company_name = cleanToken(job['Company_name']) job_published_date = job['publisheddate'] job_republished_date = job['republisheddate'] job_back_office = int(job['back_office_job']) if job_company_id == 421880: ################## Altimetrik Jobs removed ########################## continue job_location = job_location.replace(', ', ',').lower().split(',') ##Extract additional fields like bow text = 5 * (" " + job_title) + ' ' + 5 * ( " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * ( " " + job_industry) + ' ' + 2 * ( " " + job_function) + ' ' + 2 * (" " + job_subfunction) text = text.replace('candidates', ' ') ''' try: text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction) text = text.replace('candidates', ' ') except: text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd) text = text.replace('candidates', ' ') ''' job_bow = mb.getBow(text, getbowdict=0) #job_keySkills = ','.join([x for x in jobKeySkills.split(',') if x.strip() != '']) #pprint(job_bow) document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \ 'job_flag':job_flag,'job_accounttype':job_accounttype, \ 'job_company_id':job_company_id,'job_company_name':job_company_name, 'job_published':job_published_date,'job_republished':job_republished_date,'job_back_office':job_back_office } monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time os.system( ' echo "Jobs Processed ' + str(i) + ' in :' + str(end_time - start_time) + ' seconds' + ' " | mutt -s "Job Alert Mailer " [email protected] ,[email protected]' ) del (monconn_jobs_local) del (mysql_conn) monconn_status_check.saveToTable({'_id': 1, 'status': 1}) del (monconn_status_check)
print 'Loading the mappings for bow' synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv' keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv' #This file is created mb = MyBOW(synMappingFileName, keywordIdMappingFileName) print 'Loading the mappings for bow...finished' print 'Connecting to Mongodb..' tableName = 'jobs_status_check' monconn_status_check = MongoConnect(tableName, host='localhost', database='jam_status') monconn_status_check_cur = monconn_status_check.getCursor() #monconn_status_check.dropTable() #del(monconn_status_check) monconn_status_check.saveToTable({'_id': 1, 'status': 0}) ############################################# '''Dropping the existing collection of jobs''' ############################################# print 'Connecting to Mongodb..' tableName = 'jobs_processed' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='JobAlerts') monconn_jobs_local_cur = monconn_jobs_local.getCursor() monconn_jobs_local.dropTable() print 'Connecting to Mongodb...finished' del (monconn_jobs_local)
def computeAlertsChunk(chunkID): ######################################################################################################### ############-----------------Creating a connection to output mongodb ######################################################################################################### tablename = 'MonthlyMsgQueue' monconn_recommendations = MongoConnect(tablename, host='localhost', database='mailer_monthly') print 'Chunk:', chunkID, 'initiated at:', time.ctime() ifile = open('CompanyNames.csv', 'r') reader = csv.reader(ifile) company_dict = {} for row in reader: company_dict[row[0]] = row[1] ######################################################################################################### ############-----------------Fetch the user data from the database ######################################################################################################### tablename = "candidates_processed" monconn_users = MongoConnect(tablename, host='localhost', database='mailer_monthly') mongo_users_cur = monconn_users.getCursor() myCondition = {'p': chunkID} #myCondition = {} users = monconn_users.loadFromTable(myCondition) #print "Fetching the users data from Mongodb....completed for ChunkID:",chunkID ######################################################################################################### ############-----------------Loop to generate recommendations and save in Mongo ######################################################################################################### count = 0 for user in users: ######################################################################################################### ############-----------------Extracting the user details ######################################################################################################### count += 1 user_ctc = user['user_ctc'] user_exp = user['user_experience'] user_id = user['user_id'] user_email = user['user_email'] user_bow = user['user_bow']['bow'] user_current_time = datetime.datetime.now() user_jobtitle = user['user_jobtitle'] user_lastlogin = user['user_lastlogin'] user_phone = user['user_phone'] user_gender = user['user_gender'] user_current_company = user['user_current_company'] user_functionalarea_id = user['user_functionalarea_id'] user_lastmodified = user['user_lastmodified'] user_fullname = user['user_fullname'] user_phone_verified = user['user_phone_verified'] user_location_id = user['user_location_id'] user_ctc_id = user['user_ctc_id'] user_highest_qual = user['user_highest_qual'] user_edu_special = user['user_edu_special'] user_email_verified = user['user_email_verified'] user_spam_status = user['user_spam_status'] user_bounce_status = user['user_bounce_status'] user_email_alert_status = user['user_email_alert_status'] user_functionalarea = user['user_functionalarea'] user_industry = user['user_industry'] user_jobtitle = user['user_jobtitle'] user_profiletitle = user['user_profiletitle'] user_edom = user['user_edom'] user_industry = user['user_industry'] user_skills = user['user_skills'] user_profiletitle = user['user_profiletitle'] user_pid = user['p'] user_firstname = user_fullname.split(" ")[0] lsi_user = lsiModel[tfIdfModel[user_bow]] simScrChunk = index[lsi_user] sortingExcelSheetList = [] for (jobIntIndex, lsiCosine) in simScrChunk: if lsiCosine < 0.18: continue ######################################################################################################### ############-----------------Loading the Jobs Data ######################################################################################################### job = jobIntIdToJobDict[jobIntIndex] jobid = job['job_id'] job_title = job['job_title'] job_skills = job['job_skills'] job_minsal = job['job_minsal'] job_maxsal = job['job_maxsal'] job_minexp = job['job_minexp'] job_maxexp = job['job_maxexp'] job_bow = job['job_bow']['bow'] job_accounttype = job['job_accounttype'] job_flag = job['job_flag'] job_companyname = job['job_company_name'] job_companyid = job['job_company_id'] ######################################################################################################### ############-----------------Calculating the CTC and Experience Match Scores ######################################################################################################### ctc_match_score = CTCMatchScore(job_minsal, job_maxsal, user_ctc).CTCMatchScore() exp_match_score = ExpMatchScore(job_minexp, job_maxexp, user_exp).ExpMatchScore() paid_boost = PaidBoostScore(job_flag, job_accounttype).PaidBoostScore() ######################################################################################################### ############-----------------Calculating the City Score between a candidate and a job ######################################################################################################### if ctc_match_score == 1 and exp_match_score == 1: jobid = job['job_id'] try: job_city = job['job_location'] except: job_city = 'Delhi' try: user_city = user['user_location'] except: user_city = 'Delhi' #print user_city, job_city try: user_city_list = user_city.lower().replace( 'other', '').strip().split(',') user_city_list = [x.strip() for x in user_city_list] except: user_city_list = [''] try: job_city_list = job_city.lower().replace( 'other', '').strip().split(',') job_city_list = [x.strip() for x in job_city_list] except: job_city_list = [''] #print user_city_list, job_city_list try: cityScore = cm.getCityScore(user_city_list, job_city_list) except: cityScore = 0 ######################################################################################################### ############-----------------Calculating the overall match score and appending the details to the list ############-----------------based on job's published date ######################################################################################################### overallMatchScore = getOverallMatchScore( lsiCosine, cityScore, paid_boost) s = (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp, job_companyid) sortingExcelSheetList.append(s) else: continue ############################################################################################################## ############-----------------Finding the top 10 Jobs based on Overall Score ############################################################################################################## topN = 30 sortingExcelSheetListTopNJobs = heapq.nlargest(topN, sortingExcelSheetList, key=lambda x: x[3]) #pprint(sortingExcelSheetListTopNJobs) jobs2bsent = [] company_ids = [] cosine_score = [] for (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp, job_companyid) in sortingExcelSheetListTopNJobs: #print (userid, jobid, lsiCosine, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) if job_companyid not in company_ids: company_ids.append(job_companyid) jobs2bsent.append(int(jobid)) cosine_score.append(round(overallMatchScore, 2)) else: if company_ids.count(job_companyid) < 3: company_ids.append(job_companyid) jobs2bsent.append(int(jobid)) cosine_score.append(round(overallMatchScore, 2)) else: pass if len(jobs2bsent) >= 10: break else: pass #print user_id #print company_ids #print jobs2bsent companies = [] #print company_ids for comp_id in company_dict.keys(): if int(comp_id) in company_ids: companies.append(company_dict[comp_id]) #print companies #print "Hello" else: pass ############################################################################################################## ############-----------------Creating Subject Line for a candidate ############################################################################################################## \ if len(companies) != 0: try: user_subject = user_firstname + ": " + ', '.join( companies ) + " and other top company jobs matching your profile" #print user_subject except Exception as e: pass else: try: if user_functionalarea == "Fresher (No Experience)": user_subject = user_firstname + ", don't miss out on these new jobs" else: user_subject = user_firstname + ", new " + user_functionalarea.replace( ' /', ',') + " jobs for you" #print user_subject except Exception as e: user_subject = user_firstname + ", don't miss out on these new jobs" ############################################################################################################## ############-----------------Creating a document to be saved in mongo collection ############################################################################################################## document = { "c": user_id, "_id": user_email, "m": user_phone, "te": user_exp, "cr": user_jobtitle, "g": user_gender, "cc": user_current_company, "fa": user_functionalarea, "faid": user_functionalarea_id, "pd": user_lastmodified, "fn": user_fullname, "cpv": user_phone_verified, "sCLID": user_location_id, "sASID": user_ctc_id, "eq": user_highest_qual, "es": user_edu_special, "ev": user_email_verified, "ll": user_lastlogin, "sal": user_ctc, "cosine": cosine_score, "edom": user_edom, "t": user_current_time, "mj": jobs2bsent, "bj": [], "oj": [], "pid": user_pid, "s": False, "sub": user_subject } ############################################################################################################## ############-----------------Dumping the document in mongo collection if recommendations were generated ############################################################################################################## if len(jobs2bsent) > 0: monconn_recommendations.saveToTable(document) #print 'Chunk:', chunkID, 'processed in:', time.ctime() monconn_recommendations.close()
def computeAlertsChunk(chunkID): ######################################################################################################### ############-----------------Creating a connection to output mongodb ######################################################################################################### tablename = 'DailyMsgQueue' monconn_recommendations = MongoConnect(tablename, host='localhost', database='mailer_daily_midout') print 'Chunk:', chunkID, 'initiated at:', time.ctime() ######################################################################################################### ############-----------------Fetch the user data from the database ######################################################################################################### tablename = "candidates_processed_midout" monconn_users = MongoConnect(tablename, host='localhost', database='Midout_Mailers') mongo_users_cur = monconn_users.getCursor() myCondition = {'pid': chunkID} users = monconn_users.loadFromTable(myCondition) ######################################################################################################### ############-----------------Loop to generate recommendations and save in Mongo ######################################################################################################### count = 0 for user in users: count += 1 ######################################################################################################### ############-----------------Extracting the user details ######################################################################################################### _id = user['user_email'] user_ctc = user['user_ctc'] user_exp = user['user_experience'] user_id = user['user_id'] user_email = user['user_email'] user_bow = user['user_bow']['bow'] user_current_time = datetime.datetime.now() user_countrycode = user.get('user_countrycode', '') user_pid = user['pid'] user_phone_number = "+" + str(user_countrycode) + "-" + str( user.get('user_phone_number', '')) user_firstname = user.get('user_firstname', '') user_lastname = user.get('user_lastname', '') user_fullname = user_firstname + " " + user_lastname if len(user_fullname) < 3: user_fullname = "Candidate" try: user_registration_start_date = rfc3339(user.get( 'user_registration_start_date', ''), utc=True) except: user_registration_start_date = "" lsi_user = lsiModel[tfIdfModel[user_bow]] simScrChunk = index[lsi_user] sortingExcelSheetList = [] for (jobIntIndex, lsiCosine) in simScrChunk: if lsiCosine < 0.18: continue ######################################################################################################### ############-----------------Loading the Jobs Data ######################################################################################################### job = jobIntIdToJobDict[jobIntIndex] jobid = job['job_id'] job_title = job['job_title'] job_skills = job['job_skills'] job_minsal = job['job_minsal'] job_maxsal = job['job_maxsal'] job_minexp = job['job_minexp'] job_maxexp = job['job_maxexp'] job_bow = job['job_bow']['bow'] ######################################################################################################### ############-----------------Calculating the CTC and Experience Match Scores ######################################################################################################### ctc_match_score = CTCMatchScore(job_minsal, job_maxsal, user_ctc).CTCMatchScore() exp_match_score = ExpMatchScore(job_minexp, job_maxexp, user_exp).ExpMatchScore() ######################################################################################################### ############-----------------Calculating the City Score between a candidate and a job ######################################################################################################### if ctc_match_score == 1 and exp_match_score == 1: jobid = job['job_id'] #lsiCosine = getLSICosine(user_bow, job_bow).getLSICosine() #City Score try: job_city = job['job_location'] except: job_city = 'Delhi' try: user_city = user['user_location'] except: user_city = 'Delhi' #print user_city, job_city try: user_city_list = user_city.lower().replace( 'other', '').strip().split(',') user_city_list = [x.strip() for x in user_city_list] except: user_city_list = [''] try: job_city_list = job_city.lower().replace( 'other', '').strip().split(',') job_city_list = [x.strip() for x in job_city_list] except: job_city_list = [''] #print user_city_list, job_city_list try: cityScore = cm.getCityScore(user_city_list, job_city_list) except: cityScore = 0 #if cityScore == 0: #count = count +1 #print user_city_list, job_city_list, cityScore paidboost = 0 ######################################################################################################### ############-----------------Calculating the overall match score ######################################################################################################### overallMatchScore = getOverallMatchScore( lsiCosine, cityScore, paidboost) s = (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) sortingExcelSheetList.append(s) else: continue ############################################################################################################## ############-----------------Finding the top 10 Jobs based on Overall Score ############################################################################################################## topN = 10 sortingExcelSheetListTopNJobs = heapq.nlargest(topN, sortingExcelSheetList, key=lambda x: x[3]) jobs2bsent = [] for (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) in sortingExcelSheetListTopNJobs: jobs2bsent.append(int(jobid)) ############################################################################################################## ############-----------------Creating a document to be saved in mongo collection ############################################################################################################## document = {"_id":user_email, \ "fn" : user_fullname,\ "c": user_id,\ "mj":jobs2bsent, \ "oj":[], \ "s": False ,\ "t": user_current_time ,\ "pid": user_pid,\ "m": user_phone_number, \ "pd":user_registration_start_date } ############################################################################################################## ############-----------------Dumping the document in mongo collection if recommendations were generated ############################################################################################################## if len(jobs2bsent) > 0: monconn_recommendations.saveToTable(document) monconn_recommendations.close()
def preProcessChunk(chunkID): ######################################################################################################### ############----------------- SQL Credentials ######################################################################################################### ''' host="172.22.65.157" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3308 ''' host = "172.22.66.204" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3306 ######################################################################################################### ############----------------- Creating the SQL Query ######################################################################################################### print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.isbocreated as back_office_job, rj.publisheddate as publisheddate, rj.republisheddate as republisheddate, rj.companyid_id as Company_id, rj.displayname as Company_name, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 16 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 16) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id WHERE c.AttType in (3,12,13) group by rj.jobid ''' cmd5 = '''drop table if exists SumoPlus.XY ''' ######################################################################################################### ############----------------- Executing the SQL Query ######################################################################################################### print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) mysql_conn.query(cmd2) mysql_conn.query(cmd3) jobs = mysql_conn.query(cmd4) mysql_conn.query(cmd5) print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ######################################################################################################### ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233) ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='mailer_weekly') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ######################################################################################################### ############-----------------Processing the Jobs data extracted from SQL ######################################################################################################### i = 0 for job in jobs: if i % 1000 == 0: print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' job_id = job['Jobid'] job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) job_jd = cleanHTML(cleanToken(job['JD'])) job_industry = cleanToken(job['Industry']) job_location = removeDup(job['location']) job_subfunction = removeDup(cleanToken(job['subfunction'])) job_function = removeDup(cleanToken(job['function'])) job_skills = removeDup(cleanToken(job['keySkills'])) job_flag = job['flag'] job_accounttype = job['account_type'] job_company_id = job['Company_id'] job_company_name = cleanToken(job['Company_name']) job_published_date = job['publisheddate'] job_republished_date = job['republisheddate'] job_back_office = int(job['back_office_job']) job_location = job_location.replace(', ', ',').lower().split(',') if job_company_id == 421880: #######---------- Altimetrik Jobs removed continue ######################################################################################################### ############-----------------Creating Bag of Words for Text ######################################################################################################### text = 5 * (" " + job_title) + ' ' + 5 * ( " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * ( " " + job_industry) + ' ' + 2 * ( " " + job_function) + ' ' + 2 * (" " + job_subfunction) text = text.replace('candidates', ' ') job_bow = mb.getBow(text, getbowdict=0) ######################################################################################################### ############-----------------Creating Job document to be saved in Mongo ######################################################################################################### document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \ 'job_flag':job_flag,'job_accounttype':job_accounttype, \ 'job_company_id':job_company_id,'job_company_name':job_company_name, 'job_published':job_published_date,'job_republished':job_republished_date,'job_back_office':job_back_office } ######################################################################################################### ############-----------------Saving the document in Job collection Mongo (172.22.66.233) ######################################################################################################### monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time send_email([ '*****@*****.**', '*****@*****.**' ], "Revival Mailer Weekly", 'Jobs Processed ' + str(i) + ' in :' + str(end_time - start_time) + ' seconds') ######################################################################################################### ############-----------------Changing the status of completion and deleting the mongo connections ######################################################################################################### del (monconn_jobs_local) del (mysql_conn)
def computeAlertsChunk(chunkID): ######################################################################################################### ############-----------------Creating a connection to output mongodb ######################################################################################################### tablename = 'JobSuggestions' monconn_recommendations = MongoConnect(tablename, host='localhost', database='similar_jobs_onsite') print 'Chunk:', chunkID, 'initiated at:', time.ctime() ######################################################################################################### ############-----------------Fetch the 3 month jobs data from mongo ######################################################################################################### tablename = "active_jobs_dump" monconn_jobs_1 = MongoConnect(tablename, host='localhost', database='similar_jobs_onsite') mongo_jobs_1_cur = monconn_jobs_1.getCursor() myCondition = {'pid': chunkID} jobs_1 = monconn_jobs_1.loadFromTable(myCondition) ######################################################################################################### ############-----------------Calculating the overall score of a 3month jobs based on cosine,ctc, ############-----------------experience,city scores for each 1month Job ######################################################################################################### count = 0 for job_1 in jobs_1: count += 1 jobid_1 = job_1['job_id'] job_title_1 = job_1['job_title'] job_skills_1 = job_1['job_skills'] job_minsal_1 = job_1['job_minsal'] job_maxsal_1 = job_1['job_maxsal'] job_minexp_1 = job_1['job_minexp'] job_maxexp_1 = job_1['job_maxexp'] job_bow_1 = job_1['job_bow']['bow'] job_index_1 = job_1['job_index'] lsi_job_1 = lsiModel[tfIdfModel[job_bow_1]] simScrChunk = index[lsi_job_1] sortingExcelSheetList = [] for (jobIntIndex, lsiCosine) in simScrChunk: job = jobIntIdToJobDict[jobIntIndex] jobid = job['job_id'] job_title = job['job_title'] job_skills = job['job_skills'] job_minsal = job['job_minsal'] job_maxsal = job['job_maxsal'] job_minexp = job['job_minexp'] job_maxexp = job['job_maxexp'] job_bow = job['job_bow']['bow'] job_index = job['job_index'] job_company_id = job['job_company_id'] ######################################################################################################### ############-----------------Calculating the CTC and Experience and City Match Scores ######################################################################################################### ctc_match = CTCMatchScore(job_minsal_1, job_maxsal_1, job_minsal, job_maxsal) ctc_match_score = ctc_match.CTCMatchScore() exp_match_score = ExpMatchScore(job_minexp_1, job_maxexp_1, job_minexp, job_maxexp).ExpMatchScore() paid_boost = 0 if ctc_match_score == 1 and exp_match_score == 1: if jobid != jobid_1: try: job_city_1 = job_1['job_location'] except: job_city_1 = ["Delhi"] try: job_city = job['job_location'] except: job_city = ["Delhi"] #lsiCosine = getLSICosine(user_bow, job_bow).getLSICosine() try: cityScore = cm.getCityScore(job_city_1, job_city) except: cityScore = 0 overallMatchScore = getOverallMatchScore( lsiCosine, cityScore, paid_boost) s = (jobid_1, job_index_1, jobid, job_index, overallMatchScore, job_company_id) sortingExcelSheetList.append(s) else: continue else: continue ######################################################################################################### ############-----------------Finding the top 10 Jobs based on overall sccore ######################################################################################################### topN = 30 sortingExcelSheetListTopNJobs = heapq.nlargest(topN, sortingExcelSheetList, key=lambda x: x[4]) jobs2bsent = [] company_ids = [] for (jobid_1, job_index_1, jobid, job_index, overallMatchScore, job_company_id) in sortingExcelSheetListTopNJobs: if job_company_id not in company_ids: company_ids.append(job_company_id) jobs2bsent.append(int(jobid)) else: if company_ids.count(job_company_id) < 2: company_ids.append(job_company_id) jobs2bsent.append(int(jobid)) else: pass if len(jobs2bsent) >= 10: break else: pass ############################################################################################################## ############-----------------Creating a document to be saved in mongo collection ############################################################################################################## \ document = { '_id': jobid_1, 'sj': jobs2bsent, 'sjlen': len(jobs2bsent), 'lud': datetime.datetime.now() } ############################################################################################################## ############-----------------Dumping the document in mongo collection if recommendations were generated ############################################################################################################## monconn_recommendations.saveToTable(document) monconn_recommendations.close()
def preProcessChunk(chunkID): ######################################################################################################### ############----------------- SQL Credentials ######################################################################################################### ''' host="172.22.65.157" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3308 ''' host = "172.22.66.204" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3306 ######################################################################################################### ############----------------- Creating the SQL Query ######################################################################################################### print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.companyid_id as Company_id, rj.publisheddate as publisheddate, rj.displayname as Company_name, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 20 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 20) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id WHERE c.AttType in (3,12,13) group by rj.jobid ''' cmd5 = '''drop table if exists SumoPlus.XY ''' ######################################################################################################### ############----------------- Executing the SQL Query ######################################################################################################### print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) mysql_conn.query(cmd2) mysql_conn.query(cmd3) jobs = mysql_conn.query(cmd4) mysql_conn.query(cmd5) print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ######################################################################################################### ############----------------- Connecting to Jobs Tech Dump Collections Mongo (172.22.66.233) ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'JobDesc_weekly' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='JobDescDB') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ######################################################################################################### ############-----------------Processing the Jobs data extracted from SQL ######################################################################################################### i = 0 for job in jobs: if i % 1000 == 0: print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' _id = job['Jobid'] comp_name = cleanToken_1(job.get('Company_name', None)) loc = (removeDup(job.get('location', None))).replace(', ', ',').split(',') min_exp = job.get('ExpMin', None) title = cleanToken_1(job.get('JobTitle', None)) max_exp = job.get('ExpMax', None) pub_date = job.get('publisheddate', None) id = job['Jobid'] job_flag = job.get('flag') p = 0 if job_flag == "Paid": p = 1 else: p = 0 desc = None ######################################################################################################### ############-----------------Creating Job document to be saved in Mongo ######################################################################################################### document = { '_id': _id, 'comp_name': comp_name, 'loc': loc, 'min_exp': min_exp, 'title': title, 'max_exp': max_exp, 'pub_date': pub_date, 'id': id, 'p': p, 'desc': desc } ######################################################################################################### ############-----------------Saving the document in Job collection Mongo (172.22.66.233) ######################################################################################################### monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time send_email([ '*****@*****.**', '*****@*****.**' ], "Revival Mailer Weekly", 'TEch Dump Jobs Processed ' + str(i) + ' in :' + str(end_time - start_time) + ' seconds') ######################################################################################################### ############-----------------Deleting the mongo connections ######################################################################################################### del (monconn_jobs_local) del (mysql_conn)
" " + user_industry) + ' ' + 1 * (" " + user_functionalarea) user_bow = mb.getBow(text, getbowdict=0) document = {'_id':_id,'user_firstname':user_firstname,'user_lastname':user_lastname , \ 'user_phone_number':user_phone_number, 'user_id':user_id , \ 'user_email':user_email , 'user_lastlogin':user_lastlogin ,\ 'user_lastmodified' :user_lastmodified, 'user_profiletitle' :user_profiletitle ,\ 'user_registration_start_date':user_registration_start_date , 'user_experience':user_experience ,\ 'user_location':user_location , 'user_industry':user_industry, 'user_ctc':user_ctc ,\ 'user_functionalarea':user_functionalarea , 'user_skills':user_skills , 'user_bow': user_bow, \ 'user_countrycode':user_countrycode , 'pid':pid } monconn_jobs_local.saveToTable(document) ######################################################################################################### ############----------------- Creating a method to cater all typer of User ID's ######################################################################################################### if len(user_dict) < 1000 and len(str(user_id)) > 20: break if len(user_dict) < 1000 and len(str(user_id)) < 20: user_id = ObjectId("11c7dc9de839202efae641ef") if len(str(user_id)) < 20: id = user_id else: id = ObjectId(user_id)
skills = ','.join(skills_extracted) jobs_list = user.get('_jobs', []) if len(jobs_list) != 0: user_job = jobs_list[0].get('jt', '') else: user_job = '' document = { '_id': user['cid'], 'user_creation_date': user['cd'], 'user_jobtitle_resume': cleanToken(user_job), 'user_skills_resume': cleanToken(skills) } monconn_resume.saveToTable(document) send_email( [ '*****@*****.**', '*****@*****.**' ], "Resume Dump Incremental - JAM", 'Resume Details Dump Incremental Completed!! \n Collection Name : candidate_data \n DB Name : ResumeDump' ) del (monconn_users) del (monconn_resume) except: send_email( [ '*****@*****.**', '*****@*****.**', '*****@*****.**'
def preProcessChunk(chunkId1, chunkId2): ###################################### '''Fetching the Jobs from SQL''' ###################################### #host="172.22.65.157" host = "172.22.66.204" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3306 print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4) and rj.jobid%''' + str(numChunks) + '=' + str(chunkID) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)''' #print cmd cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.companyid_id as Company_id, rj.displayname as Company_name, rj.publisheddate as Published_Date, rj.republisheddate as RePublished_Date, rj.expirydate as Expiry_Date, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, lj.Applications as Application_Number, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where ( (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) <= %s) OR (DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) <= %s))) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id left join ShineReport.LiveJobsApplications AS lj on rj.jobid = lj.JobId WHERE c.AttType in (3,12,13) group by rj.jobid ''' % (chunkId1, chunkId2, chunkId1, chunkId2) cmd5 = '''drop table if exists SumoPlus.XY ''' print 'chnukID:', chunkId1, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) print 'cmd1' mysql_conn.query(cmd2) print 'cmd2' mysql_conn.query(cmd3) print 'cmd3' jobs = mysql_conn.query(cmd4) print 'jobs' mysql_conn.query(cmd5) print 'chunkID:', chunkId1, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkId1, ' : Number of jobs loaded: ', len(jobs) ###################################### '''Connecting to Mongo 233 Server''' ###################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed_9months' monconn_jobs_local = MongoConnect(tableName, host='172.22.66.198', database='SimilarJobs') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ###################################### '''Processing the Jobs''' ###################################### global i #i = 0 for job in jobs: #pprint(job) #print i if i % 1000 == 0: print '\tchunkID:', chunkId1, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' job_id = job['Jobid'] job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) job_jd = cleanHTML(cleanToken(job['JD'])) job_industry = cleanToken(job['Industry']) job_location = removeDup(job['location']) job_subfunction = removeDup(cleanToken(job['subfunction'])) job_function = removeDup(cleanToken(job['function'])) job_skills = removeDup(cleanToken(job['keySkills'])) job_flag = job['flag'] job_accounttype = job['account_type'] job_company_id = job['Company_id'] job_company_name = cleanToken(job['Company_name']) job_index = i job_publishedate = job['Published_Date'] job_repubslisheddate = job['RePublished_Date'] job_expirydate = job['Expiry_Date'] pid = i % 5000 job_applications = job['Application_Number'] job_location = job_location.replace(', ', ',').lower().split(',') ################################################# '''Creating Bag of Words from the text fields''' ################################################# text = 5 * (" " + job_title) + ' ' + 3 * ( " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * ( " " + job_industry) + ' ' + 2 * ( " " + job_function) + ' ' + 2 * (" " + job_subfunction) text = text.replace('candidates', ' ') job_bow = mb.getBow(text, getbowdict=0) ################################################## '''Dumping Job Details in Mongo (172.22.66.253)''' ################################################## document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \ 'job_flag':job_flag,'job_accounttype':job_accounttype, \ 'job_company_id':job_company_id,'job_company_name':job_company_name,'job_index':job_index, \ 'application_number': job_applications,'pid':pid,'job_publishedate':job_publishedate , \ 'job_repubslisheddate':job_repubslisheddate,'job_expirydate':job_expirydate } monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkId1, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time monconn_jobs_local.doIndexing('pid') #send_email(['*****@*****.**', '*****@*****.**','*****@*****.**'],"Similar Jobs Mailer 9 Month Jobs",'Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds') #os.system(' echo "Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Similar Jobs Mailer" [email protected], [email protected], [email protected]') del (monconn_jobs_local) del (mysql_conn)
def preProcessChunk(chunkID): ######################################################################################################### ############----------------- SQL Credentials ######################################################################################################### #Connect to SQL table and get the jobs data #host="172.16.66.64" #user="******" #password="******" ''' host="172.22.65.157" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3308 ''' host="172.22.66.204" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3306 ######################################################################################################### ############----------------- Creating the SQL Query ######################################################################################################### print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4) and rj.jobid%''' + str(numChunks) + '=' + str(chunkID) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)''' #print cmd cmd='''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE c.AttType in (3,12,13) group by rj.jobid ''' ######################################################################################################### ############----------------- Executing the SQL Query ######################################################################################################### print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() jobs = mysql_conn.query(cmd) print 'chunkID:', chunkID,': Loading jobs from SQL....completed..', time.ctime() print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ######################################################################################################### ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233) ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed_midout' monconn_jobs_local = MongoConnect(tableName , host = 'localhost', database = 'Midout_Mailers') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ######################################################################################################### ############-----------------Processing the Jobs data extracted from SQL ######################################################################################################### i = 0 for job in jobs: #pprint(job) #print i if i%1000 == 0: print '\tchunkID:', chunkID, ' numRecords:' , i, ' completed in ', time.time() - start_time, ' seconds' job_id = job['Jobid'] job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) job_jd = cleanHTML(cleanToken(job['JD']) ) job_industry = cleanToken(job['Industry']) job_location=removeDup(job['location']) job_subfunction=removeDup(job['subfunction']) job_function=removeDup(job['function']) job_skills=removeDup(cleanToken(job['keySkills'])) ######################################################################################################### ############-----------------Creating Bag of Words for Text ######################################################################################################### text = 5*(" "+job_title) + ' ' + 5*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction) text = text.replace('candidates', ' ') job_bow = mb.getBow(text, getbowdict = 0) ######################################################################################################### ############-----------------Creating Job document to be saved in Mongo ######################################################################################################### document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd } ######################################################################################################### ############-----------------Saving the document in Job collection Mongo (172.22.66.233) ######################################################################################################### monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time() - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time send_email(['*****@*****.**', '*****@*****.**'],"Midout Mailers",'Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds') #os.system(' echo "Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Midout Mailers" [email protected] ,[email protected]') del(monconn_jobs_local) del(mysql_conn)
def computeAlertsChunk(chunkID): ######################################### '''Creating a connection to output mongodb''' ######################################### tablename = 'WeeklyMsgQueue' monconn_recommendations = MongoConnect(tablename, host='localhost', database='mailer_weekly') print 'Chunk:', chunkID, 'initiated at:', time.ctime() ################################################# '''Fetch the user data from the database''' ################################################# #print "Fetching the users data from Mongodb for ChunkID:",chunkID #tablename="candidates_processed" tablename = "candidates_processed" monconn_users = MongoConnect(tablename, host='localhost', database='mailer_weekly') mongo_users_cur = monconn_users.getCursor() myCondition = {'p': chunkID} #myCondition = {} users = monconn_users.loadFromTable(myCondition) #print "Fetching the users data from Mongodb....completed for ChunkID:",chunkID ################################################################## '''Get the top 10 matching jobs based on cosine for each candidate''' ################################################################## count = 0 for user in users: count += 1 user_ctc = user['user_ctc'] user_exp = user['user_experience'] user_id = user['user_id'] user_email = user['user_email'] user_bow = user['user_bow']['bow'] user_current_time = datetime.datetime.now() user_jobtitle = user['user_jobtitle'] user_lastlogin = user['user_lastlogin'] user_phone = user['user_phone'] user_gender = user['user_gender'] user_current_company = user['user_current_company'] user_functionalarea_id = user['user_functionalarea_id'] user_lastmodified = user['user_lastmodified'] user_fullname = user['user_fullname'] user_phone_verified = user['user_phone_verified'] user_location_id = user['user_location_id'] user_ctc_id = user['user_ctc_id'] user_highest_qual = user['user_highest_qual'] user_edu_special = user['user_edu_special'] user_email_verified = user['user_email_verified'] user_spam_status = user['user_spam_status'] user_bounce_status = user['user_bounce_status'] user_email_alert_status = user['user_email_alert_status'] user_functionalarea = user['user_functionalarea'] user_industry = user['user_industry'] user_jobtitle = user['user_jobtitle'] user_profiletitle = user['user_profiletitle'] user_edom = user['user_edom'] user_industry = user['user_industry'] user_skills = user['user_skills'] user_profiletitle = user['user_profiletitle'] user_pid = user['p'] lsi_user = lsiModel[tfIdfModel[user_bow]] simScrChunk = index[lsi_user] sortingExcelSheetList = [] for (jobIntIndex, lsiCosine) in simScrChunk: if lsiCosine < 0.18: continue job = jobIntIdToJobDict[jobIntIndex] jobid = job['job_id'] job_title = job['job_title'] job_skills = job['job_skills'] job_minsal = job['job_minsal'] job_maxsal = job['job_maxsal'] job_minexp = job['job_minexp'] job_maxexp = job['job_maxexp'] job_bow = job['job_bow']['bow'] job_accounttype = job['job_accounttype'] job_flag = job['job_flag'] ####################################################### ''' Calculating the CTC and Experience Match Scores''' ####################################################### ctc_match_score = CTCMatchScore(job_minsal, job_maxsal, user_ctc).CTCMatchScore() exp_match_score = ExpMatchScore(job_minexp, job_maxexp, user_exp).ExpMatchScore() paid_boost = PaidBoostScore(job_flag, job_accounttype).PaidBoostScore() #ctc_match_score = 1 #exp_match_score = 1 paid_boost = 0 ####################################################### ''' For Low earning desperate guy uncomment this ''' ####################################################### ''' if (1 + user_ctc)/(1 + user_exp) < 0.3: ctc_match_score = 1 exp_match_score = 1 ''' if ctc_match_score == 1 and exp_match_score == 1: jobid = job['job_id'] try: job_city = job['job_location'] except: job_city = 'Delhi' try: user_city = user['user_location'] except: user_city = 'Delhi' #print user_city, job_city try: user_city_list = user_city.lower().replace( 'other', '').strip().split(',') user_city_list = [x.strip() for x in user_city_list] except: user_city_list = [''] try: job_city_list = job_city.lower().replace( 'other', '').strip().split(',') job_city_list = [x.strip() for x in job_city_list] except: job_city_list = [''] #print user_city_list, job_city_list try: cityScore = cm.getCityScore(user_city_list, job_city_list) except: cityScore = 0 #if cityScore == 0: #count = count +1 #print user_city_list, job_city_list, cityScore overallMatchScore = getOverallMatchScore( lsiCosine, cityScore, paid_boost) s = (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) sortingExcelSheetList.append(s) else: continue ################################# '''Finding the top 10 Jobs''' ################################# topN = 10 sortingExcelSheetListTopNJobs = heapq.nlargest(topN, sortingExcelSheetList, key=lambda x: x[3]) #pprint(sortingExcelSheetListTopNJobs) jobs2bsent = [] for (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) in sortingExcelSheetListTopNJobs: #print (userid, jobid, lsiCosine, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) jobs2bsent.append(int(jobid)) document = { "c": user_id, "_id": user_email, "m": user_phone, "te": user_exp, "cr": user_jobtitle, "g": user_gender, "cc": user_current_company, "fa": user_functionalarea, "faid": user_functionalarea_id, "pd": user_lastmodified, "fn": user_fullname, "cpv": user_phone_verified, "sCLID": user_location_id, "sASID": user_ctc_id, "eq": user_highest_qual, "es": user_edu_special, "ev": user_email_verified, "ll": user_lastlogin, "sal": user_ctc, "edom": user_edom, "t": user_current_time, "mj": jobs2bsent, "bj": [], "oj": [], "pid": user_pid, "s": False } if len(jobs2bsent) > 0: monconn_recommendations.saveToTable(document) #print 'Chunk:', chunkID, 'processed in:', time.ctime() monconn_recommendations.close()