def __init__(self): #1. Connect to MySQL database table wp_posts host="127.0.0.1" user="******" password="******" database="tap_sg" self.mysql_conn = MySQLConnect(database, host, user, password) cmd = '''select * from wp_posts limit 2''' self.articles = self.mysql_conn.query(cmd) # Call Google API for Thai to English Translation API_KEY = "AIzaSyBGGfOdtKFhlJ1w2bitjsj194jUKIxoPT0" self.TRANSLATE_URL = "https://www.googleapis.com/language/translate/v2?key=" + API_KEY self.DETECT_URL = "https://www.googleapis.com/language/translate/v2/detect?key=" + API_KEY # &q=google+translate+is+fast
def preProcessChunk(chunkID): print 'Connecting to Mongodb..' tableName = 'jobs_status_check' monconn_status_check = MongoConnect(tableName, host='localhost', database='jam_status') monconn_status_check_cur = monconn_status_check.getCursor() ###################################### '''Fetching the Jobs from SQL''' ###################################### #Connect to SQL table and get the jobs data host = "172.22.65.157" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3308 print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4) and rj.jobid%''' + str(numChunks) + '=' + str(chunkID) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)''' #print cmd cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.isbocreated as back_office_job, rj.publisheddate as publisheddate, rj.republisheddate as republisheddate, rj.companyid_id as Company_id, rj.displayname as Company_name, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id WHERE c.AttType in (3,12,13) group by rj.jobid ''' cmd5 = '''drop table if exists SumoPlus.XY ''' print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) mysql_conn.query(cmd2) mysql_conn.query(cmd3) jobs = mysql_conn.query(cmd4) mysql_conn.query(cmd5) print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ###################################### '''Connecting to Mongo 233 Server''' ###################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='JobAlerts') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ###################################### '''Processing the Jobs''' ###################################### i = 0 for job in jobs: #pprint(job) #print i if i % 1000 == 0: print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' job_id = job['Jobid'] job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) job_jd = cleanHTML(cleanToken(job['JD'])) job_industry = cleanToken(job['Industry']) job_location = removeDup(job['location']) job_subfunction = removeDup(cleanToken(job['subfunction'])) job_function = removeDup(cleanToken(job['function'])) job_skills = removeDup(cleanToken(job['keySkills'])) job_flag = job['flag'] job_accounttype = job['account_type'] job_company_id = job['Company_id'] job_company_name = cleanToken(job['Company_name']) job_published_date = job['publisheddate'] job_republished_date = job['republisheddate'] job_back_office = int(job['back_office_job']) if job_company_id == 421880: ################## Altimetrik Jobs removed ########################## continue job_location = job_location.replace(', ', ',').lower().split(',') ##Extract additional fields like bow text = 5 * (" " + job_title) + ' ' + 5 * ( " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * ( " " + job_industry) + ' ' + 2 * ( " " + job_function) + ' ' + 2 * (" " + job_subfunction) text = text.replace('candidates', ' ') ''' try: text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction) text = text.replace('candidates', ' ') except: text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd) text = text.replace('candidates', ' ') ''' job_bow = mb.getBow(text, getbowdict=0) #job_keySkills = ','.join([x for x in jobKeySkills.split(',') if x.strip() != '']) #pprint(job_bow) document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \ 'job_flag':job_flag,'job_accounttype':job_accounttype, \ 'job_company_id':job_company_id,'job_company_name':job_company_name, 'job_published':job_published_date,'job_republished':job_republished_date,'job_back_office':job_back_office } monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time os.system( ' echo "Jobs Processed ' + str(i) + ' in :' + str(end_time - start_time) + ' seconds' + ' " | mutt -s "Job Alert Mailer " [email protected] ,[email protected]' ) del (monconn_jobs_local) del (mysql_conn) monconn_status_check.saveToTable({'_id': 1, 'status': 1}) del (monconn_status_check)
def preProcessChunk(chunkID): ######################################################################################################### ############----------------- SQL Credentials ######################################################################################################### ''' host="172.22.65.157" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3308 ''' host = "172.22.66.204" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3306 ######################################################################################################### ############----------------- Creating the SQL Query ######################################################################################################### print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.isbocreated as back_office_job, rj.publisheddate as publisheddate, rj.republisheddate as republisheddate, rj.companyid_id as Company_id, rj.displayname as Company_name, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 16 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 16) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id WHERE c.AttType in (3,12,13) group by rj.jobid ''' cmd5 = '''drop table if exists SumoPlus.XY ''' ######################################################################################################### ############----------------- Executing the SQL Query ######################################################################################################### print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) mysql_conn.query(cmd2) mysql_conn.query(cmd3) jobs = mysql_conn.query(cmd4) mysql_conn.query(cmd5) print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ######################################################################################################### ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233) ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='mailer_weekly') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ######################################################################################################### ############-----------------Processing the Jobs data extracted from SQL ######################################################################################################### i = 0 for job in jobs: if i % 1000 == 0: print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' job_id = job['Jobid'] job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) job_jd = cleanHTML(cleanToken(job['JD'])) job_industry = cleanToken(job['Industry']) job_location = removeDup(job['location']) job_subfunction = removeDup(cleanToken(job['subfunction'])) job_function = removeDup(cleanToken(job['function'])) job_skills = removeDup(cleanToken(job['keySkills'])) job_flag = job['flag'] job_accounttype = job['account_type'] job_company_id = job['Company_id'] job_company_name = cleanToken(job['Company_name']) job_published_date = job['publisheddate'] job_republished_date = job['republisheddate'] job_back_office = int(job['back_office_job']) job_location = job_location.replace(', ', ',').lower().split(',') if job_company_id == 421880: #######---------- Altimetrik Jobs removed continue ######################################################################################################### ############-----------------Creating Bag of Words for Text ######################################################################################################### text = 5 * (" " + job_title) + ' ' + 5 * ( " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * ( " " + job_industry) + ' ' + 2 * ( " " + job_function) + ' ' + 2 * (" " + job_subfunction) text = text.replace('candidates', ' ') job_bow = mb.getBow(text, getbowdict=0) ######################################################################################################### ############-----------------Creating Job document to be saved in Mongo ######################################################################################################### document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \ 'job_flag':job_flag,'job_accounttype':job_accounttype, \ 'job_company_id':job_company_id,'job_company_name':job_company_name, 'job_published':job_published_date,'job_republished':job_republished_date,'job_back_office':job_back_office } ######################################################################################################### ############-----------------Saving the document in Job collection Mongo (172.22.66.233) ######################################################################################################### monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time send_email([ '*****@*****.**', '*****@*****.**' ], "Revival Mailer Weekly", 'Jobs Processed ' + str(i) + ' in :' + str(end_time - start_time) + ' seconds') ######################################################################################################### ############-----------------Changing the status of completion and deleting the mongo connections ######################################################################################################### del (monconn_jobs_local) del (mysql_conn)
def preProcessChunk(chunkID): ######################################################################################################### ############----------------- SQL Credentials ######################################################################################################### ''' host="172.22.65.157" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3308 ''' host = "172.22.66.204" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3306 ######################################################################################################### ############----------------- Creating the SQL Query ######################################################################################################### print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.companyid_id as Company_id, rj.publisheddate as publisheddate, rj.displayname as Company_name, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 20 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 20) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id WHERE c.AttType in (3,12,13) group by rj.jobid ''' cmd5 = '''drop table if exists SumoPlus.XY ''' ######################################################################################################### ############----------------- Executing the SQL Query ######################################################################################################### print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) mysql_conn.query(cmd2) mysql_conn.query(cmd3) jobs = mysql_conn.query(cmd4) mysql_conn.query(cmd5) print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ######################################################################################################### ############----------------- Connecting to Jobs Tech Dump Collections Mongo (172.22.66.233) ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'JobDesc_weekly' monconn_jobs_local = MongoConnect(tableName, host='localhost', database='JobDescDB') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ######################################################################################################### ############-----------------Processing the Jobs data extracted from SQL ######################################################################################################### i = 0 for job in jobs: if i % 1000 == 0: print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' _id = job['Jobid'] comp_name = cleanToken_1(job.get('Company_name', None)) loc = (removeDup(job.get('location', None))).replace(', ', ',').split(',') min_exp = job.get('ExpMin', None) title = cleanToken_1(job.get('JobTitle', None)) max_exp = job.get('ExpMax', None) pub_date = job.get('publisheddate', None) id = job['Jobid'] job_flag = job.get('flag') p = 0 if job_flag == "Paid": p = 1 else: p = 0 desc = None ######################################################################################################### ############-----------------Creating Job document to be saved in Mongo ######################################################################################################### document = { '_id': _id, 'comp_name': comp_name, 'loc': loc, 'min_exp': min_exp, 'title': title, 'max_exp': max_exp, 'pub_date': pub_date, 'id': id, 'p': p, 'desc': desc } ######################################################################################################### ############-----------------Saving the document in Job collection Mongo (172.22.66.233) ######################################################################################################### monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time send_email([ '*****@*****.**', '*****@*****.**' ], "Revival Mailer Weekly", 'TEch Dump Jobs Processed ' + str(i) + ' in :' + str(end_time - start_time) + ' seconds') ######################################################################################################### ############-----------------Deleting the mongo connections ######################################################################################################### del (monconn_jobs_local) del (mysql_conn)
class getArticlesData(): def __init__(self): #1. Connect to MySQL database table wp_posts host="127.0.0.1" user="******" password="******" database="tap_sg" self.mysql_conn = MySQLConnect(database, host, user, password) cmd = '''select * from wp_posts limit 2''' self.articles = self.mysql_conn.query(cmd) # Call Google API for Thai to English Translation API_KEY = "AIzaSyBGGfOdtKFhlJ1w2bitjsj194jUKIxoPT0" self.TRANSLATE_URL = "https://www.googleapis.com/language/translate/v2?key=" + API_KEY self.DETECT_URL = "https://www.googleapis.com/language/translate/v2/detect?key=" + API_KEY # &q=google+translate+is+fast def unicode_urlencode(params): if isinstance(params, dict): params = params.items() return urllib.urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v) for k, v in params]) def make_request(url): return urllib.urlopen(url).read() def quick_translate(text, target, source): try: #print translate(text, target, source) return translate(text, target, source)["data"]["translations"][0]["translatedText"].replace(''', "'") except: return "" def translate(text, target, source,self): query_params = {"q": text, "source": source, "target": target} url = self.TRANSLATE_URL + "&" + unicode_urlencode(query_params) try: return demjson.decode(make_request(url)) except: return {} def quick_detect(text,self): try: lang= detect(text)["data"]["detections"][0][0]["language"] return lang except: return "" def detect(text): query_params = {"q": text} url = self.DETECT_URL + "&" + unicode_urlencode(query_params) try: return demjson.decode(make_request(url)) except: return {} def getArticlesList(self): #2. Create a list of articles articlesDict = {} #article details snoToArticleDict = {} #sno -> article details articlesTitlesToDetailsDict = {} #article title -> article details articleText=[] # To store articles'text i=0 for article in self.articles: article_title = article['post_title'] article_content = article['post_content'] if self.quick_detect(article_title)=="th": self.quick_translate(article_title,"en","th") self.quick_translate(article_content,"en","th") else: pass article_title = callRemoveHtml(article_title) articleid = article['ID'] article_url = article['guid'] article_type = article['post_type'] article_status = article['post_status'] article_sno = i if (article_type == 'post') or (article_status == 'publish'): # Filtering only articles from database which are live article_title_with_space = ' ' + article_title ''' Creating text string with 5/6 weightage of title and 1/6 of article content''' article_text = article_content + article_title_with_space*5 article_text = article_text.lower() article_text = getASCIIString(strip_tags(article_text)) articleText.append(article_text) articleDetailsDict = {'article_text': article_text, 'articleid': articleid, 'article_title': article_title, 'article_url': article_url, 'article_sno': article_sno } articlesDict[articleid] = articleDetailsDict articlesTitlesToDetailsDict[article_title] = articleDetailsDict snoToArticleDict[i] = articleDetailsDict i += 1 else: pass #5. Close the MySQL connection self.mysql_conn.close() return [articleText,articlesDict, snoToArticleDict, articlesTitlesToDetailsDict]
def preProcessChunk(chunkId1, chunkId2): ###################################### '''Fetching the Jobs from SQL''' ###################################### #host="172.22.65.157" host = "172.22.66.204" user = "******" password = "******" database = "SumoPlus" unix_socket = "/tmp/mysql.sock" port = 3306 print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4) and rj.jobid%''' + str(numChunks) + '=' + str(chunkID) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)''' #print cmd cmd1 = '''drop table if exists SumoPlus.XY''' cmd2 = '''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4 = '''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.companyid_id as Company_id, rj.displayname as Company_name, rj.publisheddate as Published_Date, rj.republisheddate as RePublished_Date, rj.expirydate as Expiry_Date, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, lj.Applications as Application_Number, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where ( (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) <= %s) OR (DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) <= %s))) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id left join ShineReport.LiveJobsApplications AS lj on rj.jobid = lj.JobId WHERE c.AttType in (3,12,13) group by rj.jobid ''' % (chunkId1, chunkId2, chunkId1, chunkId2) cmd5 = '''drop table if exists SumoPlus.XY ''' print 'chnukID:', chunkId1, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) print 'cmd1' mysql_conn.query(cmd2) print 'cmd2' mysql_conn.query(cmd3) print 'cmd3' jobs = mysql_conn.query(cmd4) print 'jobs' mysql_conn.query(cmd5) print 'chunkID:', chunkId1, ': Loading jobs from SQL....completed..', time.ctime( ) print 'chunkid:', chunkId1, ' : Number of jobs loaded: ', len(jobs) ###################################### '''Connecting to Mongo 233 Server''' ###################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed_9months' monconn_jobs_local = MongoConnect(tableName, host='172.22.66.198', database='SimilarJobs') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ###################################### '''Processing the Jobs''' ###################################### global i #i = 0 for job in jobs: #pprint(job) #print i if i % 1000 == 0: print '\tchunkID:', chunkId1, ' numRecords:', i, ' completed in ', time.time( ) - start_time, ' seconds' job_id = job['Jobid'] job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) job_jd = cleanHTML(cleanToken(job['JD'])) job_industry = cleanToken(job['Industry']) job_location = removeDup(job['location']) job_subfunction = removeDup(cleanToken(job['subfunction'])) job_function = removeDup(cleanToken(job['function'])) job_skills = removeDup(cleanToken(job['keySkills'])) job_flag = job['flag'] job_accounttype = job['account_type'] job_company_id = job['Company_id'] job_company_name = cleanToken(job['Company_name']) job_index = i job_publishedate = job['Published_Date'] job_repubslisheddate = job['RePublished_Date'] job_expirydate = job['Expiry_Date'] pid = i % 5000 job_applications = job['Application_Number'] job_location = job_location.replace(', ', ',').lower().split(',') ################################################# '''Creating Bag of Words from the text fields''' ################################################# text = 5 * (" " + job_title) + ' ' + 3 * ( " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * ( " " + job_industry) + ' ' + 2 * ( " " + job_function) + ' ' + 2 * (" " + job_subfunction) text = text.replace('candidates', ' ') job_bow = mb.getBow(text, getbowdict=0) ################################################## '''Dumping Job Details in Mongo (172.22.66.253)''' ################################################## document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \ 'job_flag':job_flag,'job_accounttype':job_accounttype, \ 'job_company_id':job_company_id,'job_company_name':job_company_name,'job_index':job_index, \ 'application_number': job_applications,'pid':pid,'job_publishedate':job_publishedate , \ 'job_repubslisheddate':job_repubslisheddate,'job_expirydate':job_expirydate } monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkId1, ' Total time taken is: ', time.time( ) - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time monconn_jobs_local.doIndexing('pid') #send_email(['*****@*****.**', '*****@*****.**','*****@*****.**'],"Similar Jobs Mailer 9 Month Jobs",'Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds') #os.system(' echo "Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Similar Jobs Mailer" [email protected], [email protected], [email protected]') del (monconn_jobs_local) del (mysql_conn)
def preProcessChunk(chunkID): ######################################################################################################### ############----------------- SQL Credentials ######################################################################################################### #Connect to SQL table and get the jobs data #host="172.16.66.64" #user="******" #password="******" ''' host="172.22.65.157" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3308 ''' host="172.22.66.204" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3306 ######################################################################################################### ############----------------- Creating the SQL Query ######################################################################################################### print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4) and rj.jobid%''' + str(numChunks) + '=' + str(chunkID) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)''' #print cmd cmd='''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE c.AttType in (3,12,13) group by rj.jobid ''' ######################################################################################################### ############----------------- Executing the SQL Query ######################################################################################################### print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() jobs = mysql_conn.query(cmd) print 'chunkID:', chunkID,': Loading jobs from SQL....completed..', time.ctime() print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs) ######################################################################################################### ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233) ######################################################################################################### print 'Connecting to Mongodb..' tableName = 'jobs_processed_midout' monconn_jobs_local = MongoConnect(tableName , host = 'localhost', database = 'Midout_Mailers') monconn_jobs_local_cur = monconn_jobs_local.getCursor() print 'Connecting to Mongodb...finished' ######################################################################################################### ############-----------------Processing the Jobs data extracted from SQL ######################################################################################################### i = 0 for job in jobs: #pprint(job) #print i if i%1000 == 0: print '\tchunkID:', chunkID, ' numRecords:' , i, ' completed in ', time.time() - start_time, ' seconds' job_id = job['Jobid'] job_title = cleanToken(job['JobTitle']) job_maxexp = cleanToken(job['ExpMax']) job_minexp = cleanToken(job['ExpMin']) job_maxsal = cleanToken(job['SalaryMax']) job_minsal = cleanToken(job['SalaryMin']) job_jd = cleanHTML(cleanToken(job['JD']) ) job_industry = cleanToken(job['Industry']) job_location=removeDup(job['location']) job_subfunction=removeDup(job['subfunction']) job_function=removeDup(job['function']) job_skills=removeDup(cleanToken(job['keySkills'])) ######################################################################################################### ############-----------------Creating Bag of Words for Text ######################################################################################################### text = 5*(" "+job_title) + ' ' + 5*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction) text = text.replace('candidates', ' ') job_bow = mb.getBow(text, getbowdict = 0) ######################################################################################################### ############-----------------Creating Job document to be saved in Mongo ######################################################################################################### document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \ 'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\ 'job_location':job_location, 'job_subfunction':job_subfunction,\ 'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \ 'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd } ######################################################################################################### ############-----------------Saving the document in Job collection Mongo (172.22.66.233) ######################################################################################################### monconn_jobs_local.saveToTable(document) i += 1 print "Processing finished....." print 'chunkID:', chunkID, ' Total time taken is: ', time.time() - start_time, ' seconds.' end_time = time.time() time_taken = end_time - start_time send_email(['*****@*****.**', '*****@*****.**'],"Midout Mailers",'Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds') #os.system(' echo "Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Midout Mailers" [email protected] ,[email protected]') del(monconn_jobs_local) del(mysql_conn)
def getArticlesData(): #1. Connect to MySQL database table wp_posts host = "127.0.0.1" user = "******" password = "******" database = "tap_sg_new" mysql_conn = MySQLConnect(database, host, user, password) cmd = '''select * from wp_posts''' articles = mysql_conn.query(cmd) #2. Create a list of articles articleText = [] articlesDict = {} #article details snoToArticleDict = {} #sno -> article details articlesTitlesToDetailsDict = {} #article title -> article details i = 0 for article in articles: article_content = article['post_content'] articleid = article['ID'] article_title = article['post_title'] article_title = callRemoveHtml(article_title) article_url = article['guid'] article_type = article['post_type'] article_status = article['post_status'] article_sno = i if (article_type == 'post') or ( article_status == 'publish' ): # Filtering only articles from database which are live article_title_with_space = ' ' + article_title ''' Creating text string with 5/6 weightage of title and 1/6 of article content''' article_text = article_content + article_title_with_space * 5 article_text = article_text.lower() article_text = getASCIIString(strip_tags(article_text)) articleText.append(article_text) articleDetailsDict = { 'article_text': article_text, 'articleid': articleid, 'article_title': article_title, 'article_url': article_url, 'article_sno': article_sno } articlesDict[articleid] = articleDetailsDict articlesTitlesToDetailsDict[article_title] = articleDetailsDict snoToArticleDict[i] = articleDetailsDict i += 1 else: pass #5. Close the MySQL connection mysql_conn.close() return [ articleText, articlesDict, snoToArticleDict, articlesTitlesToDetailsDict ]
def getDataFromSQL(): ###################################### '''Fetching the Jobs from SQL''' ###################################### #host="172.22.65.157" host = "172.22.66.204" user="******" password="******" database="SumoPlus" unix_socket="/tmp/mysql.sock" port = 3306 print "Loading Jobs From MySql...." mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4) and rj.jobid%''' + str(numChunks) + '=' + str(chunkID) #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)''' #print cmd cmd1='''drop table if exists SumoPlus.XY''' cmd2='''create table SumoPlus.XY as SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date from SumoPlus.backoffice_accountsales a1 where enabled in (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id) group by 1 ''' cmd3='''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)''' cmd4='''SELECT rj.jobid as Jobid, rj.jobtitle as JobTitle, rj.description as JD, rj.companyid_id as Company_id, rj.displayname as Company_name, la1.text_value_MAX as SalaryMax, la2.text_value_MIN as SalaryMin, le1.display as ExpMin, le2.display as ExpMax, li.industry_desc as Industry, group_concat(c.AttValueCustom,'') as keySkills, group_concat(fn.field_enu,'') as function, group_concat(l.city_desc,'') as location, group_concat(fn.sub_field_enu,'') as subfunction, K.Applications as Applications, K.MatchedApplications as MatchedApplications, case account_type when 0 THEN "Company" when 1 THEN "Consultant" when 2 THEN "Others" when 3 THEN "Enterprise" ELSE "Not Specified" END AS account_type, IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag' from (select * from recruiter_job where recruiter_job.jobstatus in (3,9) and ((DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 51 AND (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) > 6)) OR (DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 51 AND (DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) > 6))) ) AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on l.city_id = c.AttValue AND c.AttType = 13 left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id left join SumoPlus.backoffice_companyaccount AS F on F.id= rj.companyid_id left join ShineReport.LiveJobsApplications as K on K.JobId = rj.jobid WHERE c.AttType in (3,12,13) group by rj.jobid ''' cmd5= '''drop table if exists SumoPlus.XY ''' #print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime() mysql_conn.query(cmd1) mysql_conn.query(cmd2) mysql_conn.query(cmd3) jobs = mysql_conn.query(cmd4) mysql_conn.query(cmd5)