def preProcessChunk(chunkID): ######################################################################################################### ############----------------- SQL Credentials ######################################################################################################### #Connect to SQL table and get the jobs data #host="172.16.66.64" #user="******" #password="******" ''' host="172.22.65.157" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3308 ''' host="172.22.66.204" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3306 ######################################################################################################### ############----------------- Creating the SQL Query ######################################################################################################### print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4) and rj.jobid%''' + str(numChunks) + '=' + str(chunkID) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)''' #print cmd cmd='''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE c.AttType in (3,12,13) group by rj.jobid ''' ######################################################################################################### ############----------------- Executing the SQL Query ######################################################################################################### print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() jobs = mysql_conn.query(cmd) print 'chunkID:', chunkID,': Loading jobs from SQL....completed..', time.ctime() print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ######################################################################################################### ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233) ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed_midout' monconn_jobs_local = MongoConnect(tableName , host = 'localhost', database = 'Midout_Mailers') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ######################################################################################################### ############-----------------Processing the Jobs data extracted from SQL ######################################################################################################### i = 0 for job in jobs: #pprint(job) #print i if i%1000 == 0: print '\tchunkID:', chunkID, ' numRecords:' , i, ' completed in ', time.time() - start_time, ' seconds' job_id = job['Jobid'] job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) job_jd = cleanHTML(cleanToken(job['JD']) ) job_industry = cleanToken(job['Industry']) job_location=removeDup(job['location']) job_subfunction=removeDup(job['subfunction']) job_function=removeDup(job['function']) job_skills=removeDup(cleanToken(job['keySkills'])) ######################################################################################################### ############-----------------Creating Bag of Words for Text ######################################################################################################### text = 5*(" "+job_title) + ' ' + 5*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction) text = text.replace('candidates', ' ') job_bow = mb.getBow(text, getbowdict = 0) ######################################################################################################### ############-----------------Creating Job document to be saved in Mongo ######################################################################################################### document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd } ######################################################################################################### ############-----------------Saving the document in Job collection Mongo (172.22.66.233) ######################################################################################################### monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time() - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time send_email(['*****@*****.**', '*****@*****.**'],"Midout Mailers",'Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds') #os.system(' echo "Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Midout Mailers" [email protected] ,[email protected]') del(monconn_jobs_local) del(mysql_conn)
def computeAlertsChunk(chunkID): ######################################################################################################### ############-----------------Creating a connection to output mongodb ######################################################################################################### tablename = 'JobSuggestions' monconn_recommendations = MongoConnect(tablename, host='localhost', database='similar_jobs_onsite') print 'Chunk:', chunkID, 'initiated at:', time.ctime() ######################################################################################################### ############-----------------Fetch the 3 month jobs data from mongo ######################################################################################################### tablename = "active_jobs_dump" monconn_jobs_1 = MongoConnect(tablename, host='localhost', database='similar_jobs_onsite') mongo_jobs_1_cur = monconn_jobs_1.getCursor() myCondition = {'pid': chunkID} jobs_1 = monconn_jobs_1.loadFromTable(myCondition) ######################################################################################################### ############-----------------Calculating the overall score of a 3month jobs based on cosine,ctc, ############-----------------experience,city scores for each 1month Job ######################################################################################################### count = 0 for job_1 in jobs_1: count += 1 jobid_1 = job_1['job_id'] job_title_1 = job_1['job_title'] job_skills_1 = job_1['job_skills'] job_minsal_1 = job_1['job_minsal'] job_maxsal_1 = job_1['job_maxsal'] job_minexp_1 = job_1['job_minexp'] job_maxexp_1 = job_1['job_maxexp'] job_bow_1 = job_1['job_bow']['bow'] job_index_1 = job_1['job_index'] lsi_job_1 = lsiModel[tfIdfModel[job_bow_1]] simScrChunk = index[lsi_job_1] sortingExcelSheetList = [] for (jobIntIndex, lsiCosine) in simScrChunk: job = jobIntIdToJobDict[jobIntIndex] jobid = job['job_id'] job_title = job['job_title'] job_skills = job['job_skills'] job_minsal = job['job_minsal'] job_maxsal = job['job_maxsal'] job_minexp = job['job_minexp'] job_maxexp = job['job_maxexp'] job_bow = job['job_bow']['bow'] job_index = job['job_index'] job_company_id = job['job_company_id'] ######################################################################################################### ############-----------------Calculating the CTC and Experience and City Match Scores ######################################################################################################### ctc_match = CTCMatchScore(job_minsal_1, job_maxsal_1, job_minsal, job_maxsal) ctc_match_score = ctc_match.CTCMatchScore() exp_match_score = ExpMatchScore(job_minexp_1, job_maxexp_1, job_minexp, job_maxexp).ExpMatchScore() paid_boost = 0 if ctc_match_score == 1 and exp_match_score == 1: if jobid != jobid_1: try: job_city_1 = job_1['job_location'] except: job_city_1 = ["Delhi"] try: job_city = job['job_location'] except: job_city = ["Delhi"] #lsiCosine = getLSICosine(user_bow, job_bow).getLSICosine() try: cityScore = cm.getCityScore(job_city_1, job_city) except: cityScore = 0 overallMatchScore = getOverallMatchScore( lsiCosine, cityScore, paid_boost) s = (jobid_1, job_index_1, jobid, job_index, overallMatchScore, job_company_id) sortingExcelSheetList.append(s) else: continue else: continue ######################################################################################################### ############-----------------Finding the top 10 Jobs based on overall sccore ######################################################################################################### topN = 30 sortingExcelSheetListTopNJobs = heapq.nlargest(topN, sortingExcelSheetList, key=lambda x: x[4]) jobs2bsent = [] company_ids = [] for (jobid_1, job_index_1, jobid, job_index, overallMatchScore, job_company_id) in sortingExcelSheetListTopNJobs: if job_company_id not in company_ids: company_ids.append(job_company_id) jobs2bsent.append(int(jobid)) else: if company_ids.count(job_company_id) < 2: company_ids.append(job_company_id) jobs2bsent.append(int(jobid)) else: pass if len(jobs2bsent) >= 10: break else: pass ############################################################################################################## ############-----------------Creating a document to be saved in mongo collection ############################################################################################################## \ document = { '_id': jobid_1, 'sj': jobs2bsent, 'sjlen': len(jobs2bsent), 'lud': datetime.datetime.now() } ############################################################################################################## ############-----------------Dumping the document in mongo collection if recommendations were generated ############################################################################################################## monconn_recommendations.saveToTable(document) monconn_recommendations.close()
############----------------- Loading the mappings for bow ######################################################################################################### print "Loading Mapping for BOW" synMappingFileName = '/data/Projects/JobAlerts/Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv' keywordIdMappingFileName = '/data/Projects/JobAlerts/Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv' mb = MyBOW(synMappingFileName, keywordIdMappingFileName) ######################################################################################################### ############-----------------Creating the connection to candidates_processed_5 and dropping if already exist ######################################################################################################### tablename = 'candidates_processed_5' monconn_recommendations = MongoConnect(tablename, host='localhost', database='JobAlerts') monconn_recommendations.dropTable() monconn_recommendations.close() ######################################################################################################### ############----------------- Initiating Multiprocessing and computing Recommendations ######################################################################################################### print "Starting the Process" pprocessing = 1 if pprocessing == 0: numChunks = 80 computeAlertsChunk(0)
def preProcessChunk(chunkID): ######################################################################################################### ############----------------- SQL Credentials ######################################################################################################### ''' host="172.22.65.157" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3308 ''' host = "172.22.66.204" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3306 ######################################################################################################### ############----------------- Creating the SQL Query ######################################################################################################### print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.companyid_id as Company_id, rj.publisheddate as publisheddate, rj.displayname as Company_name, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 20 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 20) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id WHERE c.AttType in (3,12,13) group by rj.jobid ''' cmd5 = '''drop table if exists SumoPlus.XY ''' ######################################################################################################### ############----------------- Executing the SQL Query ######################################################################################################### print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) mysql_conn.query(cmd2) mysql_conn.query(cmd3) jobs = mysql_conn.query(cmd4) mysql_conn.query(cmd5) print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ######################################################################################################### ############----------------- Connecting to Jobs Tech Dump Collections Mongo (172.22.66.233) ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'JobDesc_weekly' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='JobDescDB') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ######################################################################################################### ############-----------------Processing the Jobs data extracted from SQL ######################################################################################################### i = 0 for job in jobs: if i % 1000 == 0: print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' _id = job['Jobid'] comp_name = cleanToken_1(job.get('Company_name', None)) loc = (removeDup(job.get('location', None))).replace(', ', ',').split(',') min_exp = job.get('ExpMin', None) title = cleanToken_1(job.get('JobTitle', None)) max_exp = job.get('ExpMax', None) pub_date = job.get('publisheddate', None) id = job['Jobid'] job_flag = job.get('flag') p = 0 if job_flag == "Paid": p = 1 else: p = 0 desc = None ######################################################################################################### ############-----------------Creating Job document to be saved in Mongo ######################################################################################################### document = { '_id': _id, 'comp_name': comp_name, 'loc': loc, 'min_exp': min_exp, 'title': title, 'max_exp': max_exp, 'pub_date': pub_date, 'id': id, 'p': p, 'desc': desc } ######################################################################################################### ############-----------------Saving the document in Job collection Mongo (172.22.66.233) ######################################################################################################### monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time send_email([ '*****@*****.**', '*****@*****.**' ], "Revival Mailer Weekly", 'TEch Dump Jobs Processed ' + str(i) + ' in :' + str(end_time - start_time) + ' seconds') ######################################################################################################### ############-----------------Deleting the mongo connections ######################################################################################################### del (monconn_jobs_local) del (mysql_conn)
def salary_data(): date1 = datetime.now() - timedelta(days=183) print datetime.now() print date1 ofile = open('/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Data.csv', 'w') writer = csv.writer(ofile) writer.writerow([ 'user_id', 'specialization', 'specialization_id', 'total_exp_months', 'city', 'city_id', 'industry', 'industry_id', 'company', 'company_id', 'salary_lacs', 'job_title' ]) ###### Loading Mongo Cursors ############# ########################################## mongo_conn = getMongoMaster() collection = getattr(mongo_conn, "candidates_processed_4") lookup_industry = MongoConnect('LookupIndustry', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() lookup_company = MongoConnect('LookupCompanyName', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() ###### Creating Industry Dict############# ########################################## industry_dict = {} Industry_Name = lookup_industry.find({}, {'ii': 1, 'idesc': 1}) for records in Industry_Name: industry_dict[records['idesc']] = records['ii'] ####### Creating Specialization Dict########### ############################################### specialization_dict = {} ifile = open( '/data/Projects/Salary_Tool_HT_Campus/Output/Specilization.csv', 'rb') reader = csv.reader(ifile) for records in reader: specialization_dict[records[0].strip()] = records[1] ####### Creating Company Dict ############ ########################################## company_dict = {} Company_Name = lookup_company.find({}, {'v': 1, 'd': 1}) for records in Company_Name: company_dict[records['d']] = records['v'] ######Fetching Last Six Months Active Cands############# ######################################################## required_data = collection.find({ 'user_lastlogin': { '$gt': str(date1) } }).limit(100000) #required_data = collection.find({'_id':'10000083'}) try: for data in required_data: try: user_id = data.get('_id', '') except: user try: specialization = str(data.get('user_edu_special', '')) print specialization except: specialization = '' try: specialization_id = specialization_dict[str( data.get('user_edu_special', ''))] print specialization_id except: specialization_id = '' try: total_exp = str(data.get('user_experience', '')) total_exp = re.split('Yrs|Yr|Months|Month', total_exp) exp_yrs = int(str(total_exp[0]).strip()) except: exp_yrs = 0 try: exp_months = int(str(total_exp[1]).strip()) except: exp_months = 0 total_exp_months = exp_yrs * 12 + exp_months try: city = data.get('user_location', '') city = str(city[0]) except: city = '' try: city_id = data.get('user_location_id', '') except: city_id = '' try: industry = data.get('user_industry') except: industry = '' try: industry_id = industry_dict[data.get('user_industry')] except: industry_id = '' try: company = str(data.get('user_current_company', '')).title() except: company = '' try: company_id = company_dict[str( data.get('user_current_company', '')).title()] except: company_id = '' try: salary = str(data.get('user_ctc', '')) salary = re.split('-|Lakh', salary) salary = str(salary[1]).strip() except: salary = '' try: job_title = str(data.get('user_jobtitle', '')).title() except: job_title = '' writer.writerow([ user_id, specialization, specialization_id, total_exp_months, city, city_id, industry, industry_id, company, company_id, salary, job_title ]) except: print user_id, specialization, specialization_id, total_exp_months, city, city_id, industry, industry_id, company, company_id, salary, job_title ofile.close()
######################################################################################################### ############----------------- Loading the mapping for Bag of Words ######################################################################################################### print 'Loading the mappings for bow' synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv' keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv' #This file is created mb = MyBOW(synMappingFileName, keywordIdMappingFileName) print 'Loading the mappings for bow...finished' ######################################################################################################### ############----------------- Dropping the existing collection of Jobs ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'JobDesc_analytics' monconn_jobs_local = MongoConnect(tableName, host='172.22.66.198', database='JobDescDB_analytics') monconn_jobs_local_cur = monconn_jobs_local.getCursor() monconn_jobs_local.dropTable() print 'Connecting to Mongodb...finished' del (monconn_jobs_local) ######################################################################################################### ############----------------- Initiating Multiprocessing and extracting Jobs ############----------------- Set flag pprocessing = 1 for multiprocessing (avoid) ######################################################################################################### numChunks = 100 chunkIDs = range(0, numChunks) print chunkIDs pprocessing = 0 if pprocessing == 0:
from Features.LSI_common.MyBOW import MyBOW #Custom Module - /data/Projects/JobAlerts/Features/LSI_common/MyBOW.py from Model.getOverallMatchScore import getOverallMatchScore #Custom Module - /data/Projects/JobAlerts/Model/getOverallMatchScore.py from Main.getLSICosine import getLSICosine #Custom Module - /data/Projects/JobAlerts/Main/getLSICosine.py from DataConnections.MySQLConnect.MySQLConnect import MySQLConnect #Custom Module - /data/Projects/JobAlerts/DataConnections/MySQLConnect/MySQLConnect.py from DataConnections.MongoConnect.MongoConnect import MongoConnect #Custom Module - /data/Projects/JobAlerts/DataConnections/MongoConnect/MongoConnect.py from Utils.Utils_1 import cleanToken #Custom Module - /data/Projects/JobAlerts/Utils/Utils_1.py from Utils.HtmlCleaner import HTMLStripper #Custom Module - /data/Projects/JobAlerts/Utils/HtmlCleaner.py from Utils.Cleaning import * #Custom Module - /data/Projects/JobAlerts/Utils/Cleaning.py from Notifier.Notifier import send_email #Custom Module - /data/Projects/JobAlerts/Notifier/Notifier.py ######################################################################################################### ############----------------- Creating a Mongo Connection to Jobs Database ######################################################################################################### tableName = 'jobs_processed' monconn_jobs_local = MongoConnect(tableName, host='172.22.66.198', database='JobAlerts') monconn_jobs_local_cur = monconn_jobs_local.getCursor() jobs_processed_count = monconn_jobs_local_cur.count() del (monconn_jobs_local) ######################################################################################################### ############----------------- Creating a Mongo Connection to Tech dump of Jobs ######################################################################################################### tableName = 'JobDesc_analytics' monconn_jobs_local_1 = MongoConnect(tableName, host='172.22.66.198', database='JobDescDB_analytics') #monconn_jobs_local_1 = MongoConnect(tableName, host = 'localhost', database = 'JobDescDB_analytics') monconn_jobs_local_cur_1 = monconn_jobs_local_1.getCursor() jobs_processed_tech_dump_count = monconn_jobs_local_cur_1.count()
def computeAlertsChunk(chunkID): ######################################################################################################### ############-----------------Creating a connection to output mongodb ######################################################################################################### tablename = 'WeeklyMsgQueue' monconn_recommendations = MongoConnect(tablename, host='localhost', database='mailer_weekly') print 'Chunk:', chunkID, 'initiated at:', time.ctime() ifile = open('CompanyNames.csv', 'r') reader = csv.reader(ifile) company_dict = {} for row in reader: company_dict[row[0]] = row[1] ######################################################################################################### ############-----------------Fetch the user data from the database ######################################################################################################### tablename = "candidates_processed" monconn_users = MongoConnect(tablename, host='localhost', database='mailer_weekly') mongo_users_cur = monconn_users.getCursor() myCondition = {'p': chunkID} users = monconn_users.loadFromTable(myCondition) ######################################################################################################### ############-----------------Loop to generate recommendations and save in Mongo ######################################################################################################### count = 0 for user in users: ######################################################################################################### ############-----------------Extracting the user details ######################################################################################################### count += 1 user_ctc = user['user_ctc'] user_exp = user['user_experience'] user_id = user['user_id'] user_email = user['user_email'] user_bow = user['user_bow']['bow'] user_current_time = datetime.datetime.now() user_jobtitle = user['user_jobtitle'] user_lastlogin = user['user_lastlogin'] user_phone = user['user_phone'] user_gender = user['user_gender'] user_current_company = user['user_current_company'] user_functionalarea_id = user['user_functionalarea_id'] user_lastmodified = user['user_lastmodified'] user_fullname = user['user_fullname'] user_phone_verified = user['user_phone_verified'] user_location_id = user['user_location_id'] user_ctc_id = user['user_ctc_id'] user_highest_qual = user['user_highest_qual'] user_edu_special = user['user_edu_special'] user_email_verified = user['user_email_verified'] user_spam_status = user['user_spam_status'] user_bounce_status = user['user_bounce_status'] user_email_alert_status = user['user_email_alert_status'] user_functionalarea = user['user_functionalarea'] user_industry = user['user_industry'] user_jobtitle = user['user_jobtitle'] user_profiletitle = user['user_profiletitle'] user_edom = user['user_edom'] user_industry = user['user_industry'] user_skills = user['user_skills'] user_profiletitle = user['user_profiletitle'] user_pid = user['p'] user_firstname = user_fullname.split(" ")[0] lsi_user = lsiModel[tfIdfModel[user_bow]] simScrChunk = index[lsi_user] sortingExcelSheetList = [] for (jobIntIndex, lsiCosine) in simScrChunk: if lsiCosine < 0.18: continue ######################################################################################################### ############-----------------Loading the Jobs Data ######################################################################################################### job = jobIntIdToJobDict[jobIntIndex] jobid = job['job_id'] job_title = job['job_title'] job_skills = job['job_skills'] job_minsal = job['job_minsal'] job_maxsal = job['job_maxsal'] job_minexp = job['job_minexp'] job_maxexp = job['job_maxexp'] job_bow = job['job_bow']['bow'] job_accounttype = job['job_accounttype'] job_flag = job['job_flag'] job_companyname = job['job_company_name'] job_companyid = job['job_company_id'] ######################################################################################################### ############-----------------Calculating the CTC and Experience Match Scores ######################################################################################################### ctc_match_score = CTCMatchScore(job_minsal, job_maxsal, user_ctc).CTCMatchScore() exp_match_score = ExpMatchScore(job_minexp, job_maxexp, user_exp).ExpMatchScore() paid_boost = PaidBoostScore(job_flag, job_accounttype).PaidBoostScore() ######################################################################################################### ############-----------------Calculating the City Score between a candidate and a job ######################################################################################################### if ctc_match_score == 1 and exp_match_score == 1: jobid = job['job_id'] try: job_city = job['job_location'] except: job_city = 'Delhi' try: user_city = user['user_location'] except: user_city = 'Delhi' #print user_city, job_city try: user_city_list = user_city.lower().replace( 'other', '').strip().split(',') user_city_list = [x.strip() for x in user_city_list] except: user_city_list = [''] try: job_city_list = job_city.lower().replace( 'other', '').strip().split(',') job_city_list = [x.strip() for x in job_city_list] except: job_city_list = [''] #print user_city_list, job_city_list try: cityScore = cm.getCityScore(user_city_list, job_city_list) except: cityScore = 0 ######################################################################################################### ############-----------------Calculating the overall match score and appending the details to the list ############-----------------based on job's published date ######################################################################################################### overallMatchScore = getOverallMatchScore( lsiCosine, cityScore, paid_boost) s = (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp, job_companyid) sortingExcelSheetList.append(s) else: continue ############################################################################################################## ############-----------------Finding the top 10 Jobs based on Overall Score ############################################################################################################## topN = 10 sortingExcelSheetListTopNJobs = heapq.nlargest(topN, sortingExcelSheetList, key=lambda x: x[3]) jobs2bsent = [] company_ids = [] cosine_score = [] for (user_id, user_email, jobid, overallMatchScore, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp, job_companyid) in sortingExcelSheetListTopNJobs: #print (userid, jobid, lsiCosine, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp) if job_companyid not in company_ids: company_ids.append(job_companyid) jobs2bsent.append(int(jobid)) cosine_score.append(round(overallMatchScore, 2)) else: if company_ids.count(job_companyid) < 3: company_ids.append(job_companyid) jobs2bsent.append(int(jobid)) cosine_score.append(round(overallMatchScore, 2)) else: pass if len(jobs2bsent) >= 10: break else: pass companies = [] #print company_ids for comp_id in company_dict.keys(): if int(comp_id) in company_ids: companies.append(company_dict[comp_id]) else: pass ############################################################################################################## ############-----------------Creating Subject Line for a candidate ############################################################################################################## \ if len(companies) != 0: try: user_subject = user_firstname + ": " + ', '.join( companies ) + " and other top company jobs matching your profile" #print user_subject except Exception as e: pass else: try: if user_functionalarea == "Fresher (No Experience)": user_subject = user_firstname + ", don't miss out on these new jobs" else: user_subject = user_firstname + ", new " + user_functionalarea.replace( ' /', ',') + " jobs for you" #print user_subject except Exception as e: user_subject = user_firstname + ", don't miss out on these new jobs" ############################################################################################################## ############-----------------Creating a document to be saved in mongo collection ############################################################################################################## document = { "c": user_id, "_id": user_email, "m": user_phone, "te": user_exp, "cr": user_jobtitle, "g": user_gender, "cc": user_current_company, "fa": user_functionalarea, "faid": user_functionalarea_id, "pd": user_lastmodified, "fn": user_fullname, "cpv": user_phone_verified, "sCLID": user_location_id, "sASID": user_ctc_id, "eq": user_highest_qual, "es": user_edu_special, "ev": user_email_verified, "ll": user_lastlogin, "sal": user_ctc, "edom": user_edom, "cosine": cosine_score, "t": user_current_time, "mj": jobs2bsent, "bj": [], "oj": [], "pid": user_pid, "s": False, "sub": user_subject } ############################################################################################################## ############-----------------Dumping the document in mongo collection if recommendations were generated ############################################################################################################## if len(jobs2bsent) > 0: monconn_recommendations.saveToTable(document) #print 'Chunk:', chunkID, 'processed in:', time.ctime() monconn_recommendations.close()
######################################################################################################### mongo_conn = getMongoMaster() collection = getattr(mongo_conn, "candidates_processed_4") collection.remove({'user_lastlogin': {'$lt': str(date1)}}) print "Candidates with last login less than 183 days removed" ######################################################################################################### ############-----------------Connecting to Mongo CandidateStatic and CandidatePreferences ######################################################################################################### username = '******' password = '******' #monconn_users_static = MongoConnect('CandidateStatic', host = '172.22.65.157', port = 27018, database = 'sumoplus', username = username, password = password, authenticate = True).getCursor() monconn_users_preferences = MongoConnect('CandidatePreferences', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True) monconn_users_preferences_cur = monconn_users_preferences.getCursor() ######################################################################################################### ############-----------------Creating a Dictionary of Subfa to FA ######################################################################################################### ifile = open('subfa_fa.csv', 'r') reader = csv.reader(ifile) reader.next() sub_fa_dict = {} for row in reader: sub_fa_dict[int(row[3])] = [row[4], int(row[1])]
######################################################################################################### ############----------------- Loading the mapping for Bag of Words ######################################################################################################### print 'Loading the mappings for bow' synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv' keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv' #This file is created mb = MyBOW(synMappingFileName, keywordIdMappingFileName) print 'Loading the mappings for bow...finished' ######################################################################################################### ############----------------- Dropping the existing collection of Jobs ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='mailer_monthly') monconn_jobs_local_cur = monconn_jobs_local.getCursor() monconn_jobs_local.dropTable() print 'Connecting to Mongodb...finished' del (monconn_jobs_local) ######################################################################################################### ############----------------- Initiating Multiprocessing and extracting Jobs ############----------------- Set flag pprocessing = 1 for multiprocessing (avoid) ######################################################################################################### numChunks = 100 chunkIDs = range(0, numChunks) print chunkIDs pprocessing = 0 if pprocessing == 0:
from _sqlite3 import Row sys.path.append('./../') from pprint import pprint from DataConnections.MySQLConnect.MySQLConnect import MySQLConnect from DataConnections.MongoConnect.MongoConnect import MongoConnect import pdb import csv import time from multiprocessing import Pool import os import datetime from datetime import timedelta from Features.JobAlert_Functions import * monconn_users_static = MongoConnect('candidates_processed_4', host='localhost', database='JobAlerts') ''' ifile = open('JAM_27_JAN_opens.csv','r') reader = csv.reader(ifile) reader.next() count = 0 i = 0 for row in reader: i+=1 if i%5000 == 0: print i #print row ''' count = 0
def preProcessChunk(chunkId1, chunkId2): ###################################### '''Fetching the Jobs from SQL''' ###################################### #host="172.22.65.157" host = "172.22.66.204" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3306 print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4) and rj.jobid%''' + str(numChunks) + '=' + str(chunkID) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)''' #print cmd cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.companyid_id as Company_id, rj.displayname as Company_name, rj.publisheddate as Published_Date, rj.republisheddate as RePublished_Date, rj.expirydate as Expiry_Date, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, lj.Applications as Application_Number, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where ( (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) <= %s) OR (DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) <= %s))) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id left join ShineReport.LiveJobsApplications AS lj on rj.jobid = lj.JobId WHERE c.AttType in (3,12,13) group by rj.jobid ''' % (chunkId1, chunkId2, chunkId1, chunkId2) cmd5 = '''drop table if exists SumoPlus.XY ''' print 'chnukID:', chunkId1, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) print 'cmd1' mysql_conn.query(cmd2) print 'cmd2' mysql_conn.query(cmd3) print 'cmd3' jobs = mysql_conn.query(cmd4) print 'jobs' mysql_conn.query(cmd5) print 'chunkID:', chunkId1, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkId1, ' : Number of jobs loaded: ', len(jobs) ###################################### '''Connecting to Mongo 233 Server''' ###################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed_9months' monconn_jobs_local = MongoConnect(tableName, host='172.22.66.198', database='SimilarJobs') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ###################################### '''Processing the Jobs''' ###################################### global i #i = 0 for job in jobs: #pprint(job) #print i if i % 1000 == 0: print '\tchunkID:', chunkId1, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' job_id = job['Jobid'] job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) job_jd = cleanHTML(cleanToken(job['JD'])) job_industry = cleanToken(job['Industry']) job_location = removeDup(job['location']) job_subfunction = removeDup(cleanToken(job['subfunction'])) job_function = removeDup(cleanToken(job['function'])) job_skills = removeDup(cleanToken(job['keySkills'])) job_flag = job['flag'] job_accounttype = job['account_type'] job_company_id = job['Company_id'] job_company_name = cleanToken(job['Company_name']) job_index = i job_publishedate = job['Published_Date'] job_repubslisheddate = job['RePublished_Date'] job_expirydate = job['Expiry_Date'] pid = i % 5000 job_applications = job['Application_Number'] job_location = job_location.replace(', ', ',').lower().split(',') ################################################# '''Creating Bag of Words from the text fields''' ################################################# text = 5 * (" " + job_title) + ' ' + 3 * ( " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * ( " " + job_industry) + ' ' + 2 * ( " " + job_function) + ' ' + 2 * (" " + job_subfunction) text = text.replace('candidates', ' ') job_bow = mb.getBow(text, getbowdict=0) ################################################## '''Dumping Job Details in Mongo (172.22.66.253)''' ################################################## document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \ 'job_flag':job_flag,'job_accounttype':job_accounttype, \ 'job_company_id':job_company_id,'job_company_name':job_company_name,'job_index':job_index, \ 'application_number': job_applications,'pid':pid,'job_publishedate':job_publishedate , \ 'job_repubslisheddate':job_repubslisheddate,'job_expirydate':job_expirydate } monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkId1, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time monconn_jobs_local.doIndexing('pid') #send_email(['*****@*****.**', '*****@*****.**','*****@*****.**'],"Similar Jobs Mailer 9 Month Jobs",'Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds') #os.system(' echo "Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Similar Jobs Mailer" [email protected], [email protected], [email protected]') del (monconn_jobs_local) del (mysql_conn)
###################################### print 'Loading the mappings for bow' synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv' keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv' #This file is created mb = MyBOW(synMappingFileName, keywordIdMappingFileName) print 'Loading the mappings for bow...finished' ############################################# '''Dropping the existing collection of jobs''' ############################################# print 'Connecting to Mongodb..' tableName = 'jobs_processed_9months' monconn_jobs_local = MongoConnect(tableName, host='172.22.66.198', database='SimilarJobs') monconn_jobs_local_cur = monconn_jobs_local.getCursor() #monconn_jobs_local.dropTable() print 'Connecting to Mongodb...finished' #del(monconn_jobs_local) ###################################### '''Preprocessing of Jobs''' ###################################### numChunks = 100 chunkIDs = range(0, numChunks) #print chunkIDs pprocessing = 0 chunkId1 = 0
import datetime from datetime import timedelta from datetime import * #from datetime import date, datetime, time import calendar import pandas as pd import numpy as np from random import sample username = '******' password = '******' mongo_conn = MongoConnect('CandidateStatic', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() monconn_users_static = MongoConnect('candidates_processed_4', host='172.22.66.198', database='JobAlerts').getCursor() mon_conn_sub_fa = MongoConnect('LookupSubFunctionalArea', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() ifile = open('/data/Projects/Cold_Calling/Pycode/concentrix_leads_v1.csv',
if __name__ == '__main__': try: #os.system(' echo "Application Indexing Started.... '' " | mutt -s "Similar Jobs Mailer" [email protected],[email protected], [email protected]') send_email([ '*****@*****.**', '*****@*****.**' ], "Similar Jobs Mailer applies preprocessing", 'Application Indexing Started.... !!') #send_email(['*****@*****.**'],"Similar Jobs Mailer applies preprocessing",'Application Indexing Started.... !!') ############################# 'Dropping the old collection' ############################# tablename = "apply_data" monconn_user = MongoConnect(tablename, host='172.22.66.198', database='SimilarJobs') monconn_user.dropTable() monconn_user.close() ############################# 'Starting Index Creation' ############################# ApplicationIndexing() ############################# 'Creating Index on Collection' ############################# tablename = "apply_data" monconn_user = MongoConnect(tablename, host='172.22.66.198',
######################################################################################################### ############----------------- Loading the mapping for Bag of Words ######################################################################################################### print 'Loading the mappings for bow' synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv' keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv' #This file is created mb = MyBOW(synMappingFileName, keywordIdMappingFileName) print 'Loading the mappings for bow...finished' ######################################################################################################### ############----------------- Dropping the existing collection of Jobs ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'new_jobs_dump' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='similar_jobs_onsite') monconn_jobs_local_cur = monconn_jobs_local.getCursor() monconn_jobs_local.dropTable() print 'Connecting to Mongodb...finished' del (monconn_jobs_local) ######################################################################################################### ############----------------- Initiating Multiprocessing and extracting Jobs ############----------------- Set flag pprocessing = 1 for multiprocessing (avoid) ######################################################################################################### numChunks = 100 chunkIDs = range(0, numChunks) print chunkIDs pprocessing = 0
def ApplicationIndexing(): ####################################### 'Initiating and Declaring variables ' ####################################### user_mapping = {} i = 0 user_index = 0 ############################### ' Creating the previous date ' ############################### todayDate = date.today() previousDate = todayDate + relativedelta(days=-183) day1 = datetime.combine(previousDate, time(0, 0)) day2 = datetime.combine(todayDate, time(0, 0)) ########################################################### ' Connecting to the Candidate Apply DB (without indexes) ' ########################################################### tablename = 'candidate_applications' mongo_conn = MongoConnect(tablename, host='172.22.66.198', database='JobAlerts') mongo_conn_cur = mongo_conn.getCursor() ################################################################# ' Connecting to DB where indexed applications are to be dumped ' ################################################################# tablename = "apply_data" monconn_user = MongoConnect(tablename, host='172.22.66.198', database='SimilarJobs') ###################### ' Creating indexes ' ###################### last_user_ObjectId = 1 previous_id = "0" id = "0" #recency_score = try: while True: myCondition = {"fcu": {'$gt': id}} data = mongo_conn_cur.find(myCondition).sort('fcu').limit(100000) insert = [] for row in data: try: userid = row['fcu'] user_ObjectID = row['_id'] if userid == previous_id: pass else: previous_id = userid user_index += 1 index = user_index except: continue jobid = row['fjj'] application_date = row['ad'] #print "application_date",application_date current_time = datetime.now() #print "today",current_time difference = abs((current_time - application_date).days) #print difference #recency_score = 1/(1+ math.sqrt(difference)) if difference <= 10: recency_score = 1 elif difference > 10 and difference <= 20: recency_score = 0.9 elif difference > 20 and difference <= 30: recency_score = 0.8 else: recency_score = 0.6 #print "recency_score",recency_score #break pid = i % 5000 document = {"userid":userid,\ "user_index":index, \ "jobid": jobid , \ 'score':recency_score, \ 'application_date':application_date, \ '_id': user_ObjectID , \ 'pid':pid } insert.append(document) id = row['fcu'] i += 1 if i % 100000 == 0: print "Records Processed :", i #sys.exit(0) monconn_user.insert(insert) except Exception as E: print E
def preProcessChunk(chunkID): ######################################################################################################### ############----------------- SQL Credentials ######################################################################################################### #Connect to SQL table and get the jobs data #host="172.16.66.64" #user="******" #password="******" host1 = "172.22.65.157" user1 = "analytics" password1 = "Anal^tics@11" database1 = "SumoPlus" unix_socket1 = "/tmp/mysql.sock" port1 = 3308 host = "172.22.66.204" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3306 ######################################################################################################### ############----------------- Creating the SQL Query ######################################################################################################### print "Loading Jobs From MySql...." try: mysql_conn = MySQLConnect(database1, host1, user1, password1, unix_socket1, port1) except: mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4) and rj.jobid%''' + str(numChunks) + '=' + str(chunkID) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)''' #print cmd cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.companyid_id as Company_id, rj.displayname as Company_name, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 30 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 30) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id WHERE c.AttType in (3,12,13) group by rj.jobid ''' cmd5 = '''drop table if exists SumoPlus.XY ''' ######################################################################################################### ############----------------- Executing the SQL Query ######################################################################################################### print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) mysql_conn.query(cmd2) mysql_conn.query(cmd3) jobs = mysql_conn.query(cmd4) mysql_conn.query(cmd5) print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ######################################################################################################### ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233) ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'new_jobs_dump' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='similar_jobs_onsite') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ######################################################################################################### ############-----------------Processing the Jobs data extracted from SQL ######################################################################################################### i = 0 for job in jobs: #pprint(job) #print i if i % 1000 == 0: print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' job_id = job['Jobid'] job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) job_jd = cleanHTML(cleanToken(job['JD'])) job_industry = cleanToken(job['Industry']) job_location = removeDup(job['location']) job_subfunction = removeDup(cleanToken(job['subfunction'])) job_function = removeDup(cleanToken(job['function'])) job_skills = removeDup(cleanToken(job['keySkills'])) job_flag = job['flag'] job_accounttype = job['account_type'] job_company_id = int(job['Company_id']) job_company_name = cleanToken(job['Company_name']) job_index = i job_location = job_location.replace(', ', ',').lower().split(',') ######################################################################################################### ############-----------------Creating Bag of Words for Text ######################################################################################################### text = 5 * (" " + job_title) + ' ' + 3 * ( " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * ( " " + job_industry) + ' ' + 2 * ( " " + job_function) + ' ' + 2 * (" " + job_subfunction) text = text.replace('candidates', ' ') ''' try: text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction) text = text.replace('candidates', ' ') except: text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd) text = text.replace('candidates', ' ') ''' job_bow = mb.getBow(text, getbowdict=0) ######################################################################################################### ############-----------------Creating Job document to be saved in Mongo ######################################################################################################### document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \ 'job_flag':job_flag,'job_accounttype':job_accounttype, \ 'job_company_id':job_company_id,'job_company_name':job_company_name,'job_index':job_index } ######################################################################################################### ############-----------------Saving the document in Job collection Mongo (172.22.66.233) ######################################################################################################### monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time send_email([ '*****@*****.**', '*****@*****.**' ], "SJ Onsite", '1 Month Jobs Processed ' + str(i) + ' in :' + str(end_time - start_time) + ' seconds') #os.system(' echo "1 Month Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Similar Jobs OnSite" [email protected] ,[email protected]') del (monconn_jobs_local) del (mysql_conn)
username = '******' password = '******' print 'Mongo connect module:' output = open('cold_calling_file.csv',"w") writer = csv.writer(output, lineterminator='\n') i=0 user_email_list = [] date1 = datetime.now() - timedelta(days= 2) date1 = date1.isoformat() print date1 monconn_users_static = MongoConnect('candidates_processed_4', host = 'localhost', database = 'JobAlerts').getCursor() mon_conn_sub_fa = MongoConnect('LookupSubFunctionalArea', host = '172.22.65.88', port = 27018,database = 'sumoplus',username= username,password = password,authenticate = True).getCursor() print 'Mongo_Connected',monconn_users_static data_user = monconn_users_static.find({'user_lastlogin':{'$gt':date1}}) data_user_1 = monconn_users_static.find({'user_lastlogin':{'$gt':date1}}).count() sub_fa_lookup = mon_conn_sub_fa.find() sub_fa = {} for records in sub_fa_lookup: sub_fa[records['sfe']] = records['fe'] print 'Candidates_picked:',str(data_user_1) writer.writerow(["Email",'Candidate_Name','Phone','City','cpv','applications','edu_qual','loc_id','Total_Experience','Industry','Salary','Functional_Area','last_login','Sub_FA'])
password = '******' tableName = 'ResumeParserDump' date1 = datetime.now() - timedelta(days=2) print "Date : ", datetime.now() ######################################################################################################### ############-----------------Try Except to provide alert in case of code failure ######################################################################################################### try: ######################################################################################################### ############-----------------Creating a mongo connection to miscellaneous DB ######################################################################################################### monconn_users = MongoConnect(tableName, host='172.22.65.88', port=27018, database='miscellaneous', username=username, password=password, authenticate=True) monconn_users_cur = monconn_users.getCursor() myCondition = {"cd": {'$gt': date1}} users = monconn_users.loadFromTable(myCondition) print "Number of recoreds : " + str(len(users)) ######################################################################################################### ############-----------------Creating a mongo connection to resume dump DB Mongo(172.22.66.233) ######################################################################################################### tableName = 'candidate_data' monconn_resume = MongoConnect(tableName, host='172.22.66.198', database='ResumeDump')
######################################################################################################### ############----------------- Loading the mapping for Bag of Words ######################################################################################################### print 'Loading the mappings for bow' synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv' keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv' #This file is created mb = MyBOW(synMappingFileName, keywordIdMappingFileName) print 'Loading the mappings for bow...finished' ######################################################################################################### ############----------------- Dropping the existing collection of Jobs ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'JobDesc_weekly' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='JobDescDB') monconn_jobs_local_cur = monconn_jobs_local.getCursor() monconn_jobs_local.dropTable() print 'Connecting to Mongodb...finished' del (monconn_jobs_local) ######################################################################################################### ############----------------- Initiating Multiprocessing and extracting Jobs ############----------------- Set flag pprocessing = 1 for multiprocessing (avoid) ######################################################################################################### numChunks = 100 chunkIDs = range(0, numChunks) print chunkIDs pprocessing = 0 if pprocessing == 0:
], "Midout Mailers", "Midouts Candidates Processing Started!!!!") print 'Mongo connect module:' username = '******' password = '******' ######################################################################################################### ############----------------- Dictionary for LookupExperience ######################################################################################################### print "Loading Dictionary for Experience" tableName = 'LookupExperience' monconn_users = MongoConnect(tableName, host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True) monconn_users_cur = monconn_users.getCursor() user_experience_dict = {} for user in monconn_users_cur.find(): user_experience_dict[user['v']] = user['d'] ######################################################################################################### ############----------------- Dictionary for LookupJobTitle ######################################################################################################### print "Loading Dictionary for JobTitle" tableName = 'LookupJobTitle' monconn_users = MongoConnect(tableName, host='172.22.65.88',
def getedu_details(): ######### Creating Mongo Cursors######### ######################################### monconn_users_edu = MongoConnect('CandidateEducation', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() lookup_educationstudy = MongoConnect('LookupEducationStream', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() lookup_institute = MongoConnect('LookupEducationInstitute', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() ###### Creating Study Field Dict ######## ######################################### Study_Field = lookup_educationstudy.find({}, {'si': 1, 'sd': 1}) study_field_dict = {} for records in Study_Field: study_field_dict[records['si']] = records['sd'] ###### Creating Institute Dict ########### ########################################## institute_dict = {} Institute_Name = lookup_institute.find({}, {'asi': 1, 'asd': 1}) for records in Institute_Name: institute_dict[records['asi']] = records['asd'] ifile = open('/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Data.csv', 'rb') #### Loading Candidate Level csv File ###### reader = csv.reader(ifile) reader.next() ofile = open( '/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Edu_Data.csv', 'wb') writer = csv.writer(ofile) writer.writerow([ 'user_id', 'institute', 'institute_id', 'stream', 'stream_id', 'course_type', 'course_type_id', 'most_recent' ]) try: for records in reader: try: required_data = monconn_users_edu.find( {'fcu': str(records[0])}) for data in required_data: user_id = data.get('fcu', '') if data.has_key('ins') == True and data.get( 'ins', '') is not None: institute = institute_dict[data['ins']].encode( 'utf8', 'ignore').encode('utf-8') else: institute = data.get('inc').encode( 'utf8', 'ignore').encode('utf-8') ins_id = data.get('ins', '') stream = study_field_dict[data.get('el')] stream_id = data.get('el', '') course_type_id = data.get('ct', '') if course_type_id == 1: course_type = 'Full Time' if course_type_id == 2: course_type = 'Part Time' if course_type_id == 3: course_type = 'Correspondence' mr = data.get('mr', '') writer.writerow([ user_id, institute, ins_id, stream, stream_id, course_type, course_type_id, mr ]) except: user_id = records[0] institute = '' ins_id = '' stream = '' stream_id = '' course_type = '' course_type_id = '' mr = '' writer.writerow([ user_id, institute, ins_id, stream, stream_id, course_type, course_type_id, mr ]) except: print records[0] ofile.close() df = pd.read_csv( '/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Edu_Data.csv') ########Imputing Missing Value of "mr" field with -100 ####################### ############################################################################## df[['most_recent']] = df[['most_recent']].fillna(value=-100) ##### Sorting Dataframe ascending on user id and descending on mr field ###### ############################################################################## df_1 = df.sort(['user_id', 'most_recent'], ascending=[1, 0]) ##### Grouping on User_Id Level to Fetch Latest Institute of Candidate ####### ############################################################################## df_2 = df_1.groupby( 'user_id', group_keys=False).apply(lambda x: x.ix[x.most_recent.idxmax()]) df_3 = df_2[[ 'user_id', 'institute', 'institute_id', 'stream', 'stream_id', 'course_type', 'course_type_id', 'most_recent' ]] df_3.to_csv( '/data/Projects/Salary_Tool_HT_Campus/Output/Institute_Level_Data.csv')
user_email_list.append(row[0]) i += 1 print len(user_email_list) ''' #if i>1000: # break #print user_email_list ''' date1 = datetime.datetime.now() - datetime.timedelta(days=58) monconn_users_static = MongoConnect('CandidateStatic', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() j = 0 while True: emails_list = user_email_list[j:j + 5000] j = j + 5000 print j data_user = monconn_users_static.find({'e': {'$in': emails_list}}) #data_user = monconn_users_static.find({'ut':1,'rsd':{'$gt':date1}},{'_id':1,'ut':1,'red':1,'rsd':1}) count = 0
'*****@*****.**', '*****@*****.**' ], "SJ Onsite", "Similar Jobs Onsite Creation started !! ") ######################################################################################################### ############----------------- Start the timer ######################################################################################################### start_time = time.time() print "Started at time", start_time, "seconds" ######################################################################################################### ############----------------- Remove the previous Mongo dump of Similar Jobs ######################################################################################################### tablename = 'JobSuggestions' monconn_recommendations = MongoConnect(tablename, host='localhost', database='similar_jobs_onsite') monconn_recommendations.dropTable() monconn_recommendations.close() ######################################################################################################### ############----------------- Load the LSI and tfidf models ######################################################################################################### tfIdfModelFilename_unifiedtke = '/data/Projects/JobAlerts/Model/tfidf_model.tfidf' lsiModelFilename_unifiedtke = '/data/Projects/JobAlerts/Model/lsi_model.lsi' tfIdfModel = gensim.models.tfidfmodel.TfidfModel.load( tfIdfModelFilename_unifiedtke) lsiModel = models.lsimodel.LsiModel.load(lsiModelFilename_unifiedtke) #########################################################################################################
#send_email(['*****@*****.**', '*****@*****.**'],"Job Alert Mailer","Jobs Processing from SQL Started!!!") ######################################################################################################### ############----------------- Start the timer ######################################################################################################### print 'preProcessing Jobs...', time.ctime() start_time = time.time() htmls = HTMLStripper() ######################################################################################################### ############----------------- Remove the completion status if already exist ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'jobs_status_check' monconn_status_check = MongoConnect(tableName, host='172.22.66.198', database='jam_status') monconn_status_check_cur = monconn_status_check.getCursor() monconn_status_check.dropTable() del (monconn_status_check) #monconn_status_check.saveToTable({'_id':1,'status':0}) ######################################################################################################### ############----------------- Loading the mapping for Bag of Words ######################################################################################################### print 'Loading the mappings for bow' synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv' keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv' #This file is created mb = MyBOW(synMappingFileName, keywordIdMappingFileName) print 'Loading the mappings for bow...finished'
import sys sys.path.append('./../') from DataConnections.MongoConnect.MongoConnect import MongoConnect import csv import os import datetime from datetime import timedelta monconn_users_static = MongoConnect('candidate_applications', host='localhost', database='JobAlerts') ifile = open('UserData.csv', 'rb') reader = csv.reader(ifile) reader.next() ofile = open("User_Applications.csv", "w") writer = csv.writer(ofile) writer.writerow(['User_Id', 'Job_Applied', 'Application_Date']) candidate_id = [] '''for records in reader: candidate_id.append(str(records[0]).strip())''' #print len(candidate_id) for records in reader: data_user = monconn_users_static.loadFromTable({"fcu": str(records[0])}) for records in data_user: try: User_Id = records.get('fcu', 'N/A') Job_Applied = records.get('fjj', 'N/A')
def preProcessChunk(chunkID): ######################################################################################################### ############----------------- Creating a Mongo Connection to status collection ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'jobs_status_check' monconn_status_check = MongoConnect(tableName, host='172.22.66.198', database='jam_status') monconn_status_check_cur = monconn_status_check.getCursor() ######################################################################################################### ############----------------- SQL Credentials ######################################################################################################### host1 = "172.22.65.157" user1 = "analytics" password1 = "Anal^tics@11" database1 = "SumoPlus" unix_socket1 = "/tmp/mysql.sock" port1 = 3308 host = "172.22.66.204" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3306 ######################################################################################################### ############----------------- Creating the SQL Query ######################################################################################################### print "Loading Jobs From MySql...." try: mysql_conn = MySQLConnect(database1, host1, user1, password1, unix_socket1, port1) except: mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.isbocreated as back_office_job, rj.publisheddate as publisheddate, rj.republisheddate as republisheddate, rj.companyid_id as Company_id, rj.displayname as Company_name, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(fn.field_id,'') as faid, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id WHERE c.AttType in (3,12,13) group by rj.jobid ''' cmd5 = '''drop table if exists SumoPlus.XY ''' ######################################################################################################### ############----------------- Executing the SQL Query ######################################################################################################### print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) mysql_conn.query(cmd2) mysql_conn.query(cmd3) jobs = mysql_conn.query(cmd4) mysql_conn.query(cmd5) print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ######################################################################################################### ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233) ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed' monconn_jobs_local = MongoConnect(tableName, host='172.22.66.198', database='JobAlerts') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ######################################################################################################### ############-----------------Processing the Jobs data extracted from SQL ######################################################################################################### i = 0 for job in jobs: if i % 1000 == 0: print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' job_id = job['Jobid'] #job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) #job_jd = cleanHTML(cleanToken(job['JD']) ) #job_industry = cleanToken(job['Industry']) job_location = removeDup(job['location']) #job_subfunction=removeDup(cleanToken(job['subfunction'])) #job_function=removeDup(cleanToken(job['function'])) #job_skills=removeDup(cleanToken(job['keySkills'])) job_flag = job['flag'] job_accounttype = job['account_type'] job_company_id = job['Company_id'] job_company_name = cleanToken(job['Company_name']) job_published_date = job['publisheddate'] job_republished_date = job['republisheddate'] #job_faid = job['faid'] job_back_office = int(job['back_office_job']) job_location = job_location.replace(', ', ',').lower().split(',') if job_company_id == 421880: #######---------- Altimetrik Jobs removed continue job_faid = job['faid'] job_title = cleanText( job['JobTitle'] ) #######---------- cleanText Function is present in Cleaning.py in Utils folder job_jd = cleanText(cleanHTML(job['JD'])) job_industry = cleanText(job['Industry']) job_function = removeDup(cleanText(job['function'])) job_subfunction = removeDup(cleanText(job['subfunction'])) job_skills = removeDup(cleanText(job['keySkills'])) ######################################################################################################### ############-----------------Creating Bag of Words for Text ######################################################################################################### text = 5 * (" " + job_title) + ' ' + 5 * ( " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * ( " " + job_function) + ' ' + 2 * (" " + job_subfunction) text = re.sub(' +', ' ', text).strip() ''' try: text = 5*(" "+job_title) + ' ' + 5*(" "+job_skills.replace(',', ' ')) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction) except: text = 5*(" "+job_title) + ' ' + 5*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction) ''' #text = text.replace('candidates', ' ') job_bow = mb.getBow(text, getbowdict=0) ######################################################################################################### ############-----------------Creating Job document to be saved in Mongo ######################################################################################################### document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \ 'job_flag':job_flag,'job_accounttype':job_accounttype, \ 'job_company_id':job_company_id,'job_company_name':job_company_name, 'job_published':job_published_date,'job_republished':job_republished_date,'job_back_office':job_back_office,'job_faid':job_faid } ######################################################################################################### ############-----------------Saving the document in Job collection Mongo (172.22.66.233) ######################################################################################################### monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time #send_email(['*****@*****.**', '*****@*****.**'],"Job Alert Mailer",'Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds') ######################################################################################################### ############-----------------Changing the status of completion and deleting the mongo connections ######################################################################################################### del (monconn_jobs_local) del (mysql_conn) monconn_status_check.saveToTable({'_id': 1, 'status': 1}) del (monconn_status_check)
def computeAlertsChunk(chunkID): ######################################################################################################### ############-----------------Creating the connection to Mongo (172.22.66.233) ######################################################################################################### monconn_users_static = MongoConnect('candidates_processed_4', host = 'localhost', database = 'JobAlerts') monconn_users_static_cur = monconn_users_static.getCursor() monconn_applications = MongoConnect('candidate_applications', host = 'localhost', database = 'JobAlerts') monconn_applications_cur = monconn_users_static.getCursor() tablename = 'candidates_processed_5' monconn_recommendations = MongoConnect(tablename, host='localhost', database='JobAlerts') print 'Chunk:', chunkID, 'initiated at:', time.ctime() myCondition = {'p':chunkID} users = monconn_users_static.loadFromTable(myCondition) for row in users : user_profiletitle = row['user_profiletitle'] user_industry = row['user_industry'] user_functionalarea = row['user_functionalarea'] user_jobtitle = row['user_jobtitle'] user_skills = row['user_skills'] preferred_subfa = row["preferred_sub_fa"] subject_status = row["subject_status"] user_experience = row["user_experience"] apply_data = monconn_applications.loadFromTable({'fcu':row['_id']}) apply_data_list = list(apply_data) application_list = [] if len(apply_data) == 0: pass else: for element in apply_data_list: application_list.append(element['fjj']) application_list.sort() row['application_list'] = application_list application_count = len(application_list) row['application_count'] = application_count if application_count == 0: monconn_recommendations.saveToTable(row)
############----------------- Loading the mapping for Bag of Words ######################################################################################################### print 'Loading the mappings for bow' synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv' keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv' #This file is created mb = MyBOW(synMappingFileName, keywordIdMappingFileName) print 'Loading the mappings for bow...finished' ######################################################################################################### ############----------------- Dropping the existing collection of Jobs ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed_midout' monconn_jobs_local = MongoConnect(tableName, host = 'localhost', database = 'Midout_Mailers') monconn_jobs_local_cur = monconn_jobs_local.getCursor() monconn_jobs_local.dropTable() print 'Connecting to Mongodb...finished' del(monconn_jobs_local) ######################################################################################################### ############----------------- Initiating Multiprocessing and extracting Jobs ############----------------- Set flag pprocessing = 1 for multiprocessing (avoid) ######################################################################################################### numChunks = 100 chunkIDs = range(0, numChunks) print chunkIDs