def computeAlertsChunk(chunkID):

    #########################################################################################################             
    ############-----------------Creating the connection to Mongo (172.22.66.233)
    #########################################################################################################
    #monconn_users_static = MongoConnect('candidates_processed_4', host = 'localhost', database = 'JobAlerts')
    monconn_users_static = MongoConnect('candidates_processed_4', host = '172.22.66.198', database = 'JobAlerts')	
    monconn_users_static_cur = monconn_users_static.getCursor()
    
    #monconn_applications = MongoConnect('candidate_applications', host = 'localhost', database = 'JobAlerts')
    monconn_applications = MongoConnect('candidate_applications', host = '172.22.66.198', database = 'JobAlerts')
    monconn_applications_cur = monconn_users_static.getCursor()
    
    tablename = 'candidates_processed_5'
    #monconn_recommendations = MongoConnect(tablename, host='localhost', database='JobAlerts')    
    monconn_recommendations = MongoConnect(tablename, host='172.22.66.198', database='JobAlerts')
    print 'Chunk:', chunkID, 'initiated at:', time.ctime()
    
    myCondition = {'p':chunkID}  
    users = monconn_users_static.loadFromTable(myCondition)
  
    for row in users :
        user_profiletitle = row['user_profiletitle']
        user_industry  = row['user_industry']
        user_functionalarea = row['user_functionalarea']
        user_jobtitle = row['user_jobtitle']
        user_skills   = row['user_skills']
        preferred_subfa = row["preferred_sub_fa"]
        subject_status = row["subject_status"]
        user_experience = row["user_experience"]
        
        apply_data = monconn_applications.loadFromTable({'fcu':row['_id']})
        apply_data_list = list(apply_data)
        application_list = []
        if len(apply_data) == 0:
            pass
        else:
            for element  in apply_data_list:
                application_list.append(element['fjj'])
                
        application_list.sort()
        row['application_list'] = application_list
        application_count = len(application_list)
        row['application_count'] = application_count
        
        if application_count == 0:
            monconn_recommendations.saveToTable(row)
def createUserData():
    # Connecting to user log table from which data needs to be retreived
    monconn_get=MongoConnect("users",database="capi_logs")
    monconn_cur=monconn_get.getCursor()
    
    i=0
    userarticleDict={}
    
    for user in monconn_cur.find():
        userid=user["userid"]
        articleid=user["articleid"]
        date_str=user["_id"].generation_time.strftime('%m/%d/%Y')
        
        date=datetime.strptime(date_str,"%m/%d/%Y")
        days=(datetime.now()-date).days
        
          
        if i%10000==0:
            print i
            
        i+=1
        
        #print days
        if days>90:
            continue
 
    
        if userid is None:
            continue
        
        if userid in userarticleDict:
            if articleid in userarticleDict[userid]:
                continue
            else:
                userarticleDict[userid].append(articleid)
        else:
            userarticleDict[userid]=[]
            userarticleDict[userid].append(articleid)    
    
    return userarticleDict
Beispiel #3
0
def computeAlertsChunk(chunkID):

    #########################################################################################################
    ############-----------------Creating a connection to output mongodb
    #########################################################################################################
    tablename = 'DailyMsgQueue'
    monconn_recommendations = MongoConnect(tablename,
                                           host='localhost',
                                           database='mailer_daily_midout')
    print 'Chunk:', chunkID, 'initiated at:', time.ctime()

    #########################################################################################################
    ############-----------------Fetch the user data from the database
    #########################################################################################################
    tablename = "candidates_processed_midout"
    monconn_users = MongoConnect(tablename,
                                 host='localhost',
                                 database='Midout_Mailers')
    mongo_users_cur = monconn_users.getCursor()

    myCondition = {'pid': chunkID}
    users = monconn_users.loadFromTable(myCondition)

    #########################################################################################################
    ############-----------------Loop to generate recommendations and save in Mongo
    #########################################################################################################
    count = 0
    for user in users:
        count += 1

        #########################################################################################################
        ############-----------------Extracting the user details
        #########################################################################################################

        _id = user['user_email']
        user_ctc = user['user_ctc']
        user_exp = user['user_experience']
        user_id = user['user_id']
        user_email = user['user_email']
        user_bow = user['user_bow']['bow']
        user_current_time = datetime.datetime.now()
        user_countrycode = user.get('user_countrycode', '')
        user_pid = user['pid']
        user_phone_number = "+" + str(user_countrycode) + "-" + str(
            user.get('user_phone_number', ''))
        user_firstname = user.get('user_firstname', '')
        user_lastname = user.get('user_lastname', '')
        user_fullname = user_firstname + " " + user_lastname

        if len(user_fullname) < 3:
            user_fullname = "Candidate"

        try:
            user_registration_start_date = rfc3339(user.get(
                'user_registration_start_date', ''),
                                                   utc=True)
        except:
            user_registration_start_date = ""

        lsi_user = lsiModel[tfIdfModel[user_bow]]
        simScrChunk = index[lsi_user]
        sortingExcelSheetList = []

        for (jobIntIndex, lsiCosine) in simScrChunk:

            if lsiCosine < 0.18:
                continue

            #########################################################################################################
            ############-----------------Loading the Jobs Data
            #########################################################################################################

            job = jobIntIdToJobDict[jobIntIndex]
            jobid = job['job_id']
            job_title = job['job_title']
            job_skills = job['job_skills']
            job_minsal = job['job_minsal']
            job_maxsal = job['job_maxsal']
            job_minexp = job['job_minexp']
            job_maxexp = job['job_maxexp']
            job_bow = job['job_bow']['bow']

            #########################################################################################################
            ############-----------------Calculating the CTC and Experience Match Scores
            #########################################################################################################
            ctc_match_score = CTCMatchScore(job_minsal, job_maxsal,
                                            user_ctc).CTCMatchScore()
            exp_match_score = ExpMatchScore(job_minexp, job_maxexp,
                                            user_exp).ExpMatchScore()

            #########################################################################################################
            ############-----------------Calculating the City Score between a candidate and a job
            #########################################################################################################

            if ctc_match_score == 1 and exp_match_score == 1:
                jobid = job['job_id']
                #lsiCosine = getLSICosine(user_bow, job_bow).getLSICosine()

                #City Score
                try:
                    job_city = job['job_location']
                except:
                    job_city = 'Delhi'
                try:
                    user_city = user['user_location']
                except:
                    user_city = 'Delhi'

                #print user_city, job_city
                try:
                    user_city_list = user_city.lower().replace(
                        'other', '').strip().split(',')
                    user_city_list = [x.strip() for x in user_city_list]
                except:
                    user_city_list = ['']

                try:
                    job_city_list = job_city.lower().replace(
                        'other', '').strip().split(',')
                    job_city_list = [x.strip() for x in job_city_list]
                except:
                    job_city_list = ['']
                #print user_city_list, job_city_list
                try:
                    cityScore = cm.getCityScore(user_city_list, job_city_list)
                except:
                    cityScore = 0

                #if cityScore == 0:
                #count = count +1
                #print user_city_list, job_city_list, cityScore
                paidboost = 0

                #########################################################################################################
                ############-----------------Calculating the overall match score
                #########################################################################################################
                overallMatchScore = getOverallMatchScore(
                    lsiCosine, cityScore, paidboost)

                s = (user_id, user_email, jobid, overallMatchScore, job_title,
                     job_skills, job_minsal, job_maxsal, job_minexp,
                     job_maxexp)
                sortingExcelSheetList.append(s)

            else:
                continue

        ##############################################################################################################
        ############-----------------Finding the top 10 Jobs based on Overall Score
        ##############################################################################################################
        topN = 10
        sortingExcelSheetListTopNJobs = heapq.nlargest(topN,
                                                       sortingExcelSheetList,
                                                       key=lambda x: x[3])

        jobs2bsent = []
        for (user_id, user_email, jobid, overallMatchScore, job_title,
             job_skills, job_minsal, job_maxsal, job_minexp,
             job_maxexp) in sortingExcelSheetListTopNJobs:

            jobs2bsent.append(int(jobid))

        ##############################################################################################################
        ############-----------------Creating a document to be saved in mongo collection
        ##############################################################################################################

        document = {"_id":user_email, \
                    "fn" : user_fullname,\
                    "c": user_id,\
                    "mj":jobs2bsent, \
                    "oj":[], \
                    "s": False ,\
                    "t": user_current_time ,\
                    "pid": user_pid,\
                    "m": user_phone_number, \
                    "pd":user_registration_start_date
                    }

        ##############################################################################################################
        ############-----------------Dumping the document in mongo collection if recommendations were generated
        ##############################################################################################################

        if len(jobs2bsent) > 0:
            monconn_recommendations.saveToTable(document)

    monconn_recommendations.close()
Beispiel #4
0
        #########################################################################################################
        ############-----------------  Load the city Mapping in RAM
        #########################################################################################################
        cityScoreMatrixFilename = '../Features/CityScore/CityMatrix.json'
        stateCapitalMappingFileName = '../Features/CityScore/stateCapitalMapping.csv'
        cm = CityMatch(cityScoreMatrixFilename, stateCapitalMappingFileName)

        #########################################################################################################
        ############-----------------  Fetch the jobs data from the Mongo
        #########################################################################################################
        print "Fetching the jobs data from Mongodb"
        tablename = "jobs_processed_midout"
        monconn_jobs = MongoConnect(tablename,
                                    host='localhost',
                                    database='Midout_Mailers')
        mongo_jobs_cur = monconn_jobs.getCursor()
        print "Fetching the jobs data from Mongodb....completed"

        myCondition = {}
        jobs = monconn_jobs.loadFromTable(myCondition)

        #########################################################################################################
        ############-----------------  Creating Index on Jobs
        #########################################################################################################
        jobs_bow = []
        i = 0
        jobIntIdToJobDict = {}

        for job in jobs:
            job_bow = job['job_bow']['bow']
            jobs_bow.append(job_bow)
Beispiel #5
0
def preProcessChunk(chunkID):

    #########################################################################################################
    ############-----------------    SQL Credentials
    #########################################################################################################
    '''
    host="172.22.65.157"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3308
    '''
    host = "172.22.66.204"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3306

    #########################################################################################################
    ############-----------------    Creating the SQL Query
    #########################################################################################################
    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                              port)
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.isbocreated as back_office_job,
         rj.publisheddate as publisheddate,
         rj.republisheddate as republisheddate,
         rj.companyid_id as Company_id,
         rj.displayname as Company_name,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 16 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 16) 
         ) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         '''

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    #########################################################################################################
    ############-----------------    Executing the SQL Query
    #########################################################################################################
    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    mysql_conn.query(cmd2)
    mysql_conn.query(cmd3)
    jobs = mysql_conn.query(cmd4)
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime(
    )
    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)

    #########################################################################################################
    ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233)
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='localhost',
                                      database='mailer_weekly')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    #########################################################################################################
    ############-----------------Processing the Jobs data extracted from SQL
    #########################################################################################################
    i = 0
    for job in jobs:
        if i % 1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'
        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])
        job_jd = cleanHTML(cleanToken(job['JD']))
        job_industry = cleanToken(job['Industry'])
        job_location = removeDup(job['location'])
        job_subfunction = removeDup(cleanToken(job['subfunction']))
        job_function = removeDup(cleanToken(job['function']))
        job_skills = removeDup(cleanToken(job['keySkills']))
        job_flag = job['flag']
        job_accounttype = job['account_type']
        job_company_id = job['Company_id']
        job_company_name = cleanToken(job['Company_name'])
        job_published_date = job['publisheddate']
        job_republished_date = job['republisheddate']
        job_back_office = int(job['back_office_job'])
        job_location = job_location.replace(', ', ',').lower().split(',')
        if job_company_id == 421880:  #######---------- Altimetrik Jobs removed
            continue

        #########################################################################################################
        ############-----------------Creating Bag of Words for Text
        #########################################################################################################
        text = 5 * (" " + job_title) + ' ' + 5 * (
            " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * (
                " " + job_industry) + ' ' + 2 * (
                    " " + job_function) + ' ' + 2 * (" " + job_subfunction)
        text = text.replace('candidates', ' ')
        job_bow = mb.getBow(text, getbowdict=0)

        #########################################################################################################
        ############-----------------Creating Job document to be saved in Mongo
        #########################################################################################################
        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \
             'job_flag':job_flag,'job_accounttype':job_accounttype, \
             'job_company_id':job_company_id,'job_company_name':job_company_name,
             'job_published':job_published_date,'job_republished':job_republished_date,'job_back_office':job_back_office
             }

        #########################################################################################################
        ############-----------------Saving the document in Job collection Mongo (172.22.66.233)
        #########################################################################################################
        monconn_jobs_local.saveToTable(document)
        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    send_email([
        '*****@*****.**',
        '*****@*****.**'
    ], "Revival Mailer Weekly", 'Jobs Processed ' + str(i) + ' in :' +
               str(end_time - start_time) + ' seconds')

    #########################################################################################################
    ############-----------------Changing the status of completion and deleting the mongo connections
    #########################################################################################################
    del (monconn_jobs_local)
    del (mysql_conn)
Beispiel #6
0
        #########################################################################################################
        print 'Loading the mappings for bow'
        synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv'
        keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv'  #This file is created
        mb = MyBOW(synMappingFileName, keywordIdMappingFileName)
        print 'Loading the mappings for bow...finished'

        #########################################################################################################
        ############-----------------    Dropping the existing collection of Jobs
        #########################################################################################################
        print 'Connecting to Mongodb..'
        tableName = 'jobs_processed'
        monconn_jobs_local = MongoConnect(tableName,
                                          host='localhost',
                                          database='mailer_weekly')
        monconn_jobs_local_cur = monconn_jobs_local.getCursor()
        monconn_jobs_local.dropTable()
        print 'Connecting to Mongodb...finished'
        del (monconn_jobs_local)

        #########################################################################################################
        ############----------------- Initiating Multiprocessing and extracting Jobs
        ############----------------- Set flag pprocessing = 1 for multiprocessing (avoid)
        #########################################################################################################
        numChunks = 100
        chunkIDs = range(0, numChunks)
        print chunkIDs
        pprocessing = 0
        if pprocessing == 0:
            preProcessChunk(1)
            pass
def computeAlertsChunk(chunkID):

    #########################################################################################################
    ############-----------------Creating a connection to output mongodb
    #########################################################################################################
    tablename = 'JobSuggestions'
    monconn_recommendations = MongoConnect(tablename,
                                           host='localhost',
                                           database='similar_jobs_onsite')

    print 'Chunk:', chunkID, 'initiated at:', time.ctime()

    #########################################################################################################
    ############-----------------Fetch the 3 month jobs data from mongo
    #########################################################################################################
    tablename = "active_jobs_dump"
    monconn_jobs_1 = MongoConnect(tablename,
                                  host='localhost',
                                  database='similar_jobs_onsite')
    mongo_jobs_1_cur = monconn_jobs_1.getCursor()
    myCondition = {'pid': chunkID}
    jobs_1 = monconn_jobs_1.loadFromTable(myCondition)

    #########################################################################################################
    ############-----------------Calculating the overall score of a 3month jobs based on cosine,ctc,
    ############-----------------experience,city scores for each 1month Job
    #########################################################################################################

    count = 0

    for job_1 in jobs_1:
        count += 1
        jobid_1 = job_1['job_id']
        job_title_1 = job_1['job_title']
        job_skills_1 = job_1['job_skills']
        job_minsal_1 = job_1['job_minsal']
        job_maxsal_1 = job_1['job_maxsal']
        job_minexp_1 = job_1['job_minexp']
        job_maxexp_1 = job_1['job_maxexp']
        job_bow_1 = job_1['job_bow']['bow']
        job_index_1 = job_1['job_index']

        lsi_job_1 = lsiModel[tfIdfModel[job_bow_1]]
        simScrChunk = index[lsi_job_1]

        sortingExcelSheetList = []

        for (jobIntIndex, lsiCosine) in simScrChunk:

            job = jobIntIdToJobDict[jobIntIndex]
            jobid = job['job_id']
            job_title = job['job_title']
            job_skills = job['job_skills']
            job_minsal = job['job_minsal']
            job_maxsal = job['job_maxsal']
            job_minexp = job['job_minexp']
            job_maxexp = job['job_maxexp']
            job_bow = job['job_bow']['bow']
            job_index = job['job_index']
            job_company_id = job['job_company_id']

            #########################################################################################################
            ############-----------------Calculating the CTC and Experience and City Match Scores
            #########################################################################################################

            ctc_match = CTCMatchScore(job_minsal_1, job_maxsal_1, job_minsal,
                                      job_maxsal)
            ctc_match_score = ctc_match.CTCMatchScore()
            exp_match_score = ExpMatchScore(job_minexp_1, job_maxexp_1,
                                            job_minexp,
                                            job_maxexp).ExpMatchScore()
            paid_boost = 0
            if ctc_match_score == 1 and exp_match_score == 1:
                if jobid != jobid_1:
                    try:
                        job_city_1 = job_1['job_location']
                    except:
                        job_city_1 = ["Delhi"]

                    try:
                        job_city = job['job_location']
                    except:
                        job_city = ["Delhi"]

                    #lsiCosine = getLSICosine(user_bow, job_bow).getLSICosine()
                    try:
                        cityScore = cm.getCityScore(job_city_1, job_city)
                    except:
                        cityScore = 0

                    overallMatchScore = getOverallMatchScore(
                        lsiCosine, cityScore, paid_boost)
                    s = (jobid_1, job_index_1, jobid, job_index,
                         overallMatchScore, job_company_id)
                    sortingExcelSheetList.append(s)

                else:
                    continue
            else:
                continue

        #########################################################################################################
        ############-----------------Finding the top 10 Jobs based on overall sccore
        #########################################################################################################

        topN = 30
        sortingExcelSheetListTopNJobs = heapq.nlargest(topN,
                                                       sortingExcelSheetList,
                                                       key=lambda x: x[4])

        jobs2bsent = []
        company_ids = []
        for (jobid_1, job_index_1, jobid, job_index, overallMatchScore,
             job_company_id) in sortingExcelSheetListTopNJobs:
            if job_company_id not in company_ids:
                company_ids.append(job_company_id)
                jobs2bsent.append(int(jobid))
            else:
                if company_ids.count(job_company_id) < 2:
                    company_ids.append(job_company_id)
                    jobs2bsent.append(int(jobid))
                else:
                    pass

            if len(jobs2bsent) >= 10:
                break
            else:
                pass

        ##############################################################################################################
        ############-----------------Creating a document to be saved in mongo collection
        ##############################################################################################################                                     \
        document = {
            '_id': jobid_1,
            'sj': jobs2bsent,
            'sjlen': len(jobs2bsent),
            'lud': datetime.datetime.now()
        }

        ##############################################################################################################
        ############-----------------Dumping the document in mongo collection if recommendations were generated
        ##############################################################################################################
        monconn_recommendations.saveToTable(document)

    monconn_recommendations.close()
def preProcessChunk(chunkID):
    
    #########################################################################################################             
    ############-----------------    SQL Credentials
    #########################################################################################################
    
    #Connect to SQL table and get the jobs data
    #host="172.16.66.64"
    #user="******"
    #password="******"
    '''
    host="172.22.65.157"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3308
    '''

    host="172.22.66.204"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3306



    #########################################################################################################             
    ############-----------------    Creating the SQL Query
    #########################################################################################################
    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)  and rj.jobid%''' + str(numChunks) + '=' + str(chunkID)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)'''
    #print cmd
    cmd='''SELECT
        rj.jobid as Jobid,
        rj.jobtitle as JobTitle,
        rj.description as JD,
        la1.text_value_MAX as SalaryMax,
        la2.text_value_MIN as SalaryMin,
        le1.display as ExpMin,
        le2.display as ExpMax,
        li.industry_desc as Industry,
        group_concat(c.AttValueCustom,'') as keySkills,
        group_concat(fn.field_enu,'') as function,
        group_concat(l.city_desc,'') as location,
        group_concat(fn.sub_field_enu,'') as subfunction 
        
        from 
        (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8)  
        ) AS rj 
        left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
        left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
        left join lookup_experience AS le1 on rj.minexperience = le1.value 
        left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
        left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
        left join  lookup_industry AS li on rj.industry=li.industry_id 
        left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
        left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
        
        WHERE 
        
        c.AttType in (3,12,13) 
        
        group by rj.jobid
        
        
        '''


    #########################################################################################################             
    ############-----------------    Executing the SQL Query
    #########################################################################################################
    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    jobs = mysql_conn.query(cmd)
    print 'chunkID:', chunkID,': Loading jobs from SQL....completed..', time.ctime()
    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)




    #########################################################################################################             
    ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233)
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed_midout'
    monconn_jobs_local = MongoConnect(tableName , host = 'localhost', database = 'Midout_Mailers')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'
    
    
        
    #########################################################################################################             
    ############-----------------Processing the Jobs data extracted from SQL
    #########################################################################################################
    i = 0
    for job in jobs:
        #pprint(job)
        #print i
        if i%1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:' , i,  ' completed in ', time.time() - start_time, ' seconds'
        
        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])  
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])  
        job_jd = cleanHTML(cleanToken(job['JD']) )
        job_industry = cleanToken(job['Industry'])
        job_location=removeDup(job['location'])
        job_subfunction=removeDup(job['subfunction'])
        job_function=removeDup(job['function'])
        job_skills=removeDup(cleanToken(job['keySkills']))
        

        
        #########################################################################################################             
        ############-----------------Creating Bag of Words for Text
        #########################################################################################################
        text = 5*(" "+job_title) + ' ' + 5*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction)
        text = text.replace('candidates', ' ')
        job_bow = mb.getBow(text, getbowdict = 0)
    


        #########################################################################################################             
        ############-----------------Creating Job document to be saved in Mongo
        #########################################################################################################        
        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd
             }



        #########################################################################################################             
        ############-----------------Saving the document in Job collection Mongo (172.22.66.233)
        #########################################################################################################        
        monconn_jobs_local.saveToTable(document)
    
        i += 1
        

    print "Processing finished....."    
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time() - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    send_email(['*****@*****.**', '*****@*****.**'],"Midout Mailers",'Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds')
    #os.system(' echo "Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Midout Mailers" [email protected] ,[email protected]')
    del(monconn_jobs_local)
    del(mysql_conn)
        ######################################

        print 'preProcessing Jobs...', time.ctime()
        start_time = time.time()
        htmls = HTMLStripper()

        #############################################
        '''Status Check'''
        #############################################

        print 'Connecting to Mongodb..'
        tableName = 'jobs_status_check'
        monconn_status_check = MongoConnect(tableName,
                                            host='localhost',
                                            database='jam_status')
        monconn_status_check_cur = monconn_status_check.getCursor()
        monconn_status_check.dropTable()
        del (monconn_status_check)
        #monconn_status_check.saveToTable({'_id':1,'status':0})

        ######################################
        '''Load the mapping for Bag of Words'''
        ######################################

        print 'Loading the mappings for bow'
        synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv'
        keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv'  #This file is created
        mb = MyBOW(synMappingFileName, keywordIdMappingFileName)
        print 'Loading the mappings for bow...finished'

        print 'Connecting to Mongodb..'
Beispiel #10
0
def ApplicationIndexing():

    #######################################
    'Initiating and Declaring variables '
    #######################################

    user_mapping = {}
    i = 0
    user_index = 0

    ###############################
    ' Creating the previous date '
    ###############################
    todayDate = date.today()
    previousDate = todayDate + relativedelta(days=-183)
    day1 = datetime.combine(previousDate, time(0, 0))
    day2 = datetime.combine(todayDate, time(0, 0))

    ###########################################################
    ' Connecting to the Candidate Apply DB (without indexes) '
    ###########################################################
    tablename = 'candidate_applications'
    mongo_conn = MongoConnect(tablename,
                              host='172.22.66.198',
                              database='JobAlerts')
    mongo_conn_cur = mongo_conn.getCursor()

    #################################################################
    ' Connecting to DB where indexed applications are to be dumped '
    #################################################################
    tablename = "apply_data"
    monconn_user = MongoConnect(tablename,
                                host='172.22.66.198',
                                database='SimilarJobs')

    ######################
    ' Creating indexes  '
    ######################

    last_user_ObjectId = 1
    previous_id = "0"
    id = "0"

    #recency_score =

    try:
        while True:
            myCondition = {"fcu": {'$gt': id}}
            data = mongo_conn_cur.find(myCondition).sort('fcu').limit(100000)
            insert = []

            for row in data:
                try:
                    userid = row['fcu']
                    user_ObjectID = row['_id']

                    if userid == previous_id:
                        pass
                    else:
                        previous_id = userid
                        user_index += 1

                    index = user_index
                except:
                    continue

                jobid = row['fjj']
                application_date = row['ad']
                #print "application_date",application_date
                current_time = datetime.now()
                #print "today",current_time

                difference = abs((current_time - application_date).days)
                #print difference
                #recency_score = 1/(1+ math.sqrt(difference))
                if difference <= 10:
                    recency_score = 1
                elif difference > 10 and difference <= 20:
                    recency_score = 0.9
                elif difference > 20 and difference <= 30:
                    recency_score = 0.8
                else:
                    recency_score = 0.6

                #print "recency_score",recency_score

                #break
                pid = i % 5000
                document = {"userid":userid,\
                                "user_index":index, \
                                "jobid": jobid , \
                                'score':recency_score, \
                                'application_date':application_date, \
                                '_id': user_ObjectID , \
                                'pid':pid
                                }

                insert.append(document)
                id = row['fcu']
                i += 1
                if i % 100000 == 0:
                    print "Records Processed :", i
                    #sys.exit(0)

            monconn_user.insert(insert)
    except Exception as E:
        print E
        username = '******'
        password = '******'

        #########################################################################################################
        ############----------------- Dictionary for LookupExperience
        #########################################################################################################
        print "Loading Dictionary for Experience"
        tableName = 'LookupExperience'
        monconn_users = MongoConnect(tableName,
                                     host='172.22.65.88',
                                     port=27018,
                                     database='sumoplus',
                                     username=username,
                                     password=password,
                                     authenticate=True)
        monconn_users_cur = monconn_users.getCursor()
        user_experience_dict = {}
        for user in monconn_users_cur.find():
            user_experience_dict[user['v']] = user['d']

        #########################################################################################################
        ############----------------- Dictionary for LookupJobTitle
        #########################################################################################################
        print "Loading Dictionary for JobTitle"
        tableName = 'LookupJobTitle'
        monconn_users = MongoConnect(tableName,
                                     host='172.22.65.88',
                                     port=27018,
                                     database='sumoplus',
                                     username=username,
                                     password=password,
#########################################################################################################
############-----------------Try Except to provide alert in case of code failure
#########################################################################################################
try:
    #########################################################################################################
    ############-----------------Creating a mongo connection to miscellaneous DB
    #########################################################################################################
    monconn_users = MongoConnect(tableName,
                                 host='172.22.65.88',
                                 port=27018,
                                 database='miscellaneous',
                                 username=username,
                                 password=password,
                                 authenticate=True)
    monconn_users_cur = monconn_users.getCursor()
    myCondition = {"cd": {'$gt': date1}}
    users = monconn_users.loadFromTable(myCondition)
    print "Number of recoreds : " + str(len(users))

    #########################################################################################################
    ############-----------------Creating a mongo connection to resume dump DB Mongo(172.22.66.233)
    #########################################################################################################
    tableName = 'candidate_data'
    monconn_resume = MongoConnect(tableName,
                                  host='172.22.66.198',
                                  database='ResumeDump')
    monconn_resume_cur = monconn_resume.getCursor()

    #########################################################################################################
    ############-----------------Extracting the resume data and dumping in local Mongo
Beispiel #13
0
    print "Candidates with last login less than 183 days removed"

    #########################################################################################################
    ############-----------------Connecting to Mongo CandidateStatic and CandidatePreferences
    #########################################################################################################
    username = '******'
    password = '******'
    #monconn_users_static = MongoConnect('CandidateStatic', host = '172.22.65.157', port = 27018, database = 'sumoplus', username = username, password = password, authenticate = True).getCursor()
    monconn_users_preferences = MongoConnect('CandidatePreferences',
                                             host='172.22.65.88',
                                             port=27018,
                                             database='sumoplus',
                                             username=username,
                                             password=password,
                                             authenticate=True)
    monconn_users_preferences_cur = monconn_users_preferences.getCursor()

    #########################################################################################################
    ############-----------------Creating a Dictionary of Subfa to FA
    #########################################################################################################
    ifile = open('subfa_fa.csv', 'r')
    reader = csv.reader(ifile)
    reader.next()
    sub_fa_dict = {}
    for row in reader:
        sub_fa_dict[int(row[3])] = [row[4], int(row[1])]

    #########################################################################################################
    ############-----------------Loading the FA lookup by passing filename to LoadFA_lookup function in
    ############-----------------JobAlert_Functions script
    #########################################################################################################
Beispiel #14
0
def preProcessChunk(chunkId1, chunkId2):

    ######################################
    '''Fetching the Jobs from SQL'''
    ######################################

    #host="172.22.65.157"
    host = "172.22.66.204"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3306

    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                              port)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)  and rj.jobid%''' + str(numChunks) + '=' + str(chunkID)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)'''
    #print cmd
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.companyid_id as Company_id,
         rj.displayname as Company_name,
         rj.publisheddate as Published_Date,
         rj.republisheddate as RePublished_Date,
         rj.expirydate as Expiry_Date,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         lj.Applications as Application_Number,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where ( (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) <= %s) OR (DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) <= %s))) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         left join ShineReport.LiveJobsApplications AS lj on rj.jobid = lj.JobId
         
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         ''' % (chunkId1, chunkId2, chunkId1, chunkId2)

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    print 'chnukID:', chunkId1, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    print 'cmd1'
    mysql_conn.query(cmd2)
    print 'cmd2'
    mysql_conn.query(cmd3)
    print 'cmd3'
    jobs = mysql_conn.query(cmd4)
    print 'jobs'
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkId1, ': Loading jobs from SQL....completed..', time.ctime(
    )

    print 'chunkid:', chunkId1, ' : Number of jobs loaded: ', len(jobs)

    ######################################
    '''Connecting to Mongo 233 Server'''
    ######################################

    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed_9months'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='172.22.66.198',
                                      database='SimilarJobs')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    ######################################
    '''Processing the Jobs'''
    ######################################
    global i
    #i = 0
    for job in jobs:
        #pprint(job)
        #print i
        if i % 1000 == 0:
            print '\tchunkID:', chunkId1, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'

        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])
        job_jd = cleanHTML(cleanToken(job['JD']))
        job_industry = cleanToken(job['Industry'])
        job_location = removeDup(job['location'])
        job_subfunction = removeDup(cleanToken(job['subfunction']))
        job_function = removeDup(cleanToken(job['function']))
        job_skills = removeDup(cleanToken(job['keySkills']))
        job_flag = job['flag']
        job_accounttype = job['account_type']
        job_company_id = job['Company_id']
        job_company_name = cleanToken(job['Company_name'])
        job_index = i
        job_publishedate = job['Published_Date']
        job_repubslisheddate = job['RePublished_Date']
        job_expirydate = job['Expiry_Date']
        pid = i % 5000
        job_applications = job['Application_Number']
        job_location = job_location.replace(', ', ',').lower().split(',')

        #################################################
        '''Creating Bag of Words from the text fields'''
        #################################################

        text = 5 * (" " + job_title) + ' ' + 3 * (
            " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * (
                " " + job_industry) + ' ' + 2 * (
                    " " + job_function) + ' ' + 2 * (" " + job_subfunction)
        text = text.replace('candidates', ' ')
        job_bow = mb.getBow(text, getbowdict=0)

        ##################################################
        '''Dumping Job Details in Mongo (172.22.66.253)'''
        ##################################################

        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \
             'job_flag':job_flag,'job_accounttype':job_accounttype, \
             'job_company_id':job_company_id,'job_company_name':job_company_name,'job_index':job_index, \
             'application_number': job_applications,'pid':pid,'job_publishedate':job_publishedate , \
             'job_repubslisheddate':job_repubslisheddate,'job_expirydate':job_expirydate
             }

        monconn_jobs_local.saveToTable(document)

        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkId1, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    monconn_jobs_local.doIndexing('pid')
    #send_email(['*****@*****.**', '*****@*****.**','*****@*****.**'],"Similar Jobs Mailer 9 Month Jobs",'Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds')
    #os.system(' echo "Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Similar Jobs Mailer" [email protected], [email protected], [email protected]')
    del (monconn_jobs_local)
    del (mysql_conn)
Beispiel #15
0
        #########################################################################################################
        ############-----------------  Load the city Mapping in RAM
        #########################################################################################################
        cityScoreMatrixFilename = '../Features/CityScore/CityMatrix.json'
        stateCapitalMappingFileName = '../Features/CityScore/stateCapitalMapping.csv'
        cm = CityMatch(cityScoreMatrixFilename, stateCapitalMappingFileName)

        #########################################################################################################
        ############-----------------  Fetch the jobs data from the Mongo
        #########################################################################################################
        print "Fetching the jobs data from Mongodb"
        tablename = "jobs_processed"
        monconn_jobs = MongoConnect(tablename,
                                    host='localhost',
                                    database='mailer_monthly')
        mongo_jobs_cur = monconn_jobs.getCursor()
        print "Fetching the jobs data from Mongodb....completed"

        myCondition = {}
        jobs = monconn_jobs.loadFromTable(myCondition)

        #########################################################################################################
        ############-----------------  Creating Index on Jobs
        #########################################################################################################
        jobs_bow = []
        i = 0
        jobIntIdToJobDict = {}

        for job in jobs:
            job_bow = job['job_bow']['bow']
            jobs_bow.append(job_bow)
Beispiel #16
0
from Main.getLSICosine import getLSICosine  #Custom Module - /data/Projects/JobAlerts/Main/getLSICosine.py
from DataConnections.MySQLConnect.MySQLConnect import MySQLConnect  #Custom Module - /data/Projects/JobAlerts/DataConnections/MySQLConnect/MySQLConnect.py
from DataConnections.MongoConnect.MongoConnect import MongoConnect  #Custom Module - /data/Projects/JobAlerts/DataConnections/MongoConnect/MongoConnect.py
from Utils.Utils_1 import cleanToken  #Custom Module - /data/Projects/JobAlerts/Utils/Utils_1.py
from Utils.HtmlCleaner import HTMLStripper  #Custom Module - /data/Projects/JobAlerts/Utils/HtmlCleaner.py
from Utils.Cleaning import *  #Custom Module - /data/Projects/JobAlerts/Utils/Cleaning.py
from Notifier.Notifier import send_email  #Custom Module - /data/Projects/JobAlerts/Notifier/Notifier.py

#########################################################################################################
############-----------------    Creating a Mongo Connection to Jobs Database
#########################################################################################################
tableName = 'jobs_processed'
monconn_jobs_local = MongoConnect(tableName,
                                  host='172.22.66.198',
                                  database='JobAlerts')
monconn_jobs_local_cur = monconn_jobs_local.getCursor()
jobs_processed_count = monconn_jobs_local_cur.count()
del (monconn_jobs_local)

#########################################################################################################
############-----------------    Creating a Mongo Connection to Tech dump of Jobs
#########################################################################################################
tableName = 'JobDesc_analytics'
monconn_jobs_local_1 = MongoConnect(tableName,
                                    host='172.22.66.198',
                                    database='JobDescDB_analytics')
#monconn_jobs_local_1 = MongoConnect(tableName, host = 'localhost', database = 'JobDescDB_analytics')
monconn_jobs_local_cur_1 = monconn_jobs_local_1.getCursor()
jobs_processed_tech_dump_count = monconn_jobs_local_cur_1.count()
del (monconn_jobs_local_1)
Beispiel #17
0
def computeAlertsChunk(chunkID):

    #########################################################################################################
    ############-----------------Creating a connection to output mongodb
    #########################################################################################################
    tablename = 'MonthlyMsgQueue'
    monconn_recommendations = MongoConnect(tablename,
                                           host='localhost',
                                           database='mailer_monthly')

    print 'Chunk:', chunkID, 'initiated at:', time.ctime()

    ifile = open('CompanyNames.csv', 'r')
    reader = csv.reader(ifile)
    company_dict = {}
    for row in reader:
        company_dict[row[0]] = row[1]

    #########################################################################################################
    ############-----------------Fetch the user data from the database
    #########################################################################################################
    tablename = "candidates_processed"
    monconn_users = MongoConnect(tablename,
                                 host='localhost',
                                 database='mailer_monthly')
    mongo_users_cur = monconn_users.getCursor()

    myCondition = {'p': chunkID}
    #myCondition = {}
    users = monconn_users.loadFromTable(myCondition)
    #print "Fetching the users data from Mongodb....completed for ChunkID:",chunkID

    #########################################################################################################
    ############-----------------Loop to generate recommendations and save in Mongo
    #########################################################################################################
    count = 0

    for user in users:

        #########################################################################################################
        ############-----------------Extracting the user details
        #########################################################################################################

        count += 1
        user_ctc = user['user_ctc']
        user_exp = user['user_experience']
        user_id = user['user_id']
        user_email = user['user_email']
        user_bow = user['user_bow']['bow']
        user_current_time = datetime.datetime.now()
        user_jobtitle = user['user_jobtitle']
        user_lastlogin = user['user_lastlogin']
        user_phone = user['user_phone']
        user_gender = user['user_gender']
        user_current_company = user['user_current_company']
        user_functionalarea_id = user['user_functionalarea_id']
        user_lastmodified = user['user_lastmodified']
        user_fullname = user['user_fullname']
        user_phone_verified = user['user_phone_verified']
        user_location_id = user['user_location_id']
        user_ctc_id = user['user_ctc_id']
        user_highest_qual = user['user_highest_qual']
        user_edu_special = user['user_edu_special']
        user_email_verified = user['user_email_verified']
        user_spam_status = user['user_spam_status']
        user_bounce_status = user['user_bounce_status']
        user_email_alert_status = user['user_email_alert_status']
        user_functionalarea = user['user_functionalarea']
        user_industry = user['user_industry']
        user_jobtitle = user['user_jobtitle']
        user_profiletitle = user['user_profiletitle']
        user_edom = user['user_edom']
        user_industry = user['user_industry']
        user_skills = user['user_skills']
        user_profiletitle = user['user_profiletitle']
        user_pid = user['p']
        user_firstname = user_fullname.split(" ")[0]

        lsi_user = lsiModel[tfIdfModel[user_bow]]
        simScrChunk = index[lsi_user]
        sortingExcelSheetList = []

        for (jobIntIndex, lsiCosine) in simScrChunk:

            if lsiCosine < 0.18:
                continue

            #########################################################################################################
            ############-----------------Loading the Jobs Data
            #########################################################################################################

            job = jobIntIdToJobDict[jobIntIndex]
            jobid = job['job_id']
            job_title = job['job_title']
            job_skills = job['job_skills']
            job_minsal = job['job_minsal']
            job_maxsal = job['job_maxsal']
            job_minexp = job['job_minexp']
            job_maxexp = job['job_maxexp']
            job_bow = job['job_bow']['bow']
            job_accounttype = job['job_accounttype']
            job_flag = job['job_flag']
            job_companyname = job['job_company_name']
            job_companyid = job['job_company_id']

            #########################################################################################################
            ############-----------------Calculating the CTC and Experience Match Scores
            #########################################################################################################
            ctc_match_score = CTCMatchScore(job_minsal, job_maxsal,
                                            user_ctc).CTCMatchScore()
            exp_match_score = ExpMatchScore(job_minexp, job_maxexp,
                                            user_exp).ExpMatchScore()
            paid_boost = PaidBoostScore(job_flag,
                                        job_accounttype).PaidBoostScore()

            #########################################################################################################
            ############-----------------Calculating the City Score between a candidate and a job
            #########################################################################################################

            if ctc_match_score == 1 and exp_match_score == 1:
                jobid = job['job_id']

                try:
                    job_city = job['job_location']
                except:
                    job_city = 'Delhi'
                try:
                    user_city = user['user_location']
                except:
                    user_city = 'Delhi'

                #print user_city, job_city
                try:
                    user_city_list = user_city.lower().replace(
                        'other', '').strip().split(',')
                    user_city_list = [x.strip() for x in user_city_list]
                except:
                    user_city_list = ['']

                try:
                    job_city_list = job_city.lower().replace(
                        'other', '').strip().split(',')
                    job_city_list = [x.strip() for x in job_city_list]
                except:
                    job_city_list = ['']
                #print user_city_list, job_city_list
                try:
                    cityScore = cm.getCityScore(user_city_list, job_city_list)
                except:
                    cityScore = 0

                #########################################################################################################
                ############-----------------Calculating the overall match score and appending the details to the list
                ############-----------------based on job's published date
                #########################################################################################################
                overallMatchScore = getOverallMatchScore(
                    lsiCosine, cityScore, paid_boost)

                s = (user_id, user_email, jobid, overallMatchScore, job_title,
                     job_skills, job_minsal, job_maxsal, job_minexp,
                     job_maxexp, job_companyid)
                sortingExcelSheetList.append(s)

            else:
                continue

        ##############################################################################################################
        ############-----------------Finding the top 10 Jobs based on Overall Score
        ##############################################################################################################
        topN = 30
        sortingExcelSheetListTopNJobs = heapq.nlargest(topN,
                                                       sortingExcelSheetList,
                                                       key=lambda x: x[3])
        #pprint(sortingExcelSheetListTopNJobs)

        jobs2bsent = []
        company_ids = []
        cosine_score = []
        for (user_id, user_email, jobid, overallMatchScore, job_title,
             job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp,
             job_companyid) in sortingExcelSheetListTopNJobs:
            #print (userid, jobid, lsiCosine, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp)
            if job_companyid not in company_ids:
                company_ids.append(job_companyid)
                jobs2bsent.append(int(jobid))
                cosine_score.append(round(overallMatchScore, 2))
            else:
                if company_ids.count(job_companyid) < 3:
                    company_ids.append(job_companyid)
                    jobs2bsent.append(int(jobid))
                    cosine_score.append(round(overallMatchScore, 2))
                else:
                    pass
            if len(jobs2bsent) >= 10:
                break
            else:
                pass
        #print user_id
        #print company_ids
        #print jobs2bsent

        companies = []
        #print company_ids
        for comp_id in company_dict.keys():

            if int(comp_id) in company_ids:

                companies.append(company_dict[comp_id])
                #print companies
                #print "Hello"
            else:
                pass

        ##############################################################################################################
        ############-----------------Creating Subject Line for a candidate
        ##############################################################################################################                                     \

        if len(companies) != 0:
            try:
                user_subject = user_firstname + ": " + ', '.join(
                    companies
                ) + " and other top company jobs matching your profile"
                #print user_subject
            except Exception as e:
                pass
        else:
            try:
                if user_functionalarea == "Fresher (No Experience)":
                    user_subject = user_firstname + ", don't miss out on these new jobs"
                else:
                    user_subject = user_firstname + ", new " + user_functionalarea.replace(
                        ' /', ',') + " jobs for you"
                #print user_subject
            except Exception as e:
                user_subject = user_firstname + ", don't miss out on these new jobs"

        ##############################################################################################################
        ############-----------------Creating a document to be saved in mongo collection
        ##############################################################################################################

        document = {
            "c": user_id,
            "_id": user_email,
            "m": user_phone,
            "te": user_exp,
            "cr": user_jobtitle,
            "g": user_gender,
            "cc": user_current_company,
            "fa": user_functionalarea,
            "faid": user_functionalarea_id,
            "pd": user_lastmodified,
            "fn": user_fullname,
            "cpv": user_phone_verified,
            "sCLID": user_location_id,
            "sASID": user_ctc_id,
            "eq": user_highest_qual,
            "es": user_edu_special,
            "ev": user_email_verified,
            "ll": user_lastlogin,
            "sal": user_ctc,
            "cosine": cosine_score,
            "edom": user_edom,
            "t": user_current_time,
            "mj": jobs2bsent,
            "bj": [],
            "oj": [],
            "pid": user_pid,
            "s": False,
            "sub": user_subject
        }

        ##############################################################################################################
        ############-----------------Dumping the document in mongo collection if recommendations were generated
        ##############################################################################################################

        if len(jobs2bsent) > 0:
            monconn_recommendations.saveToTable(document)

        #print 'Chunk:', chunkID, 'processed in:', time.ctime()

    monconn_recommendations.close()
Beispiel #18
0
def preProcessChunk(chunkID):

    #########################################################################################################
    ############-----------------    SQL Credentials
    #########################################################################################################
    '''
    host="172.22.65.157"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3308
    '''
    host = "172.22.66.204"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3306

    #########################################################################################################
    ############-----------------    Creating the SQL Query
    #########################################################################################################
    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                              port)
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.companyid_id as Company_id,
         rj.publisheddate as publisheddate,
         rj.displayname as Company_name,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 20 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 20)  
         ) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         '''

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    #########################################################################################################
    ############-----------------    Executing the SQL Query
    #########################################################################################################
    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    mysql_conn.query(cmd2)
    mysql_conn.query(cmd3)
    jobs = mysql_conn.query(cmd4)
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime(
    )
    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)

    #########################################################################################################
    ############-----------------    Connecting to Jobs Tech Dump Collections Mongo (172.22.66.233)
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'JobDesc_weekly'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='localhost',
                                      database='JobDescDB')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    #########################################################################################################
    ############-----------------Processing the Jobs data extracted from SQL
    #########################################################################################################
    i = 0
    for job in jobs:
        if i % 1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'
        _id = job['Jobid']
        comp_name = cleanToken_1(job.get('Company_name', None))
        loc = (removeDup(job.get('location', None))).replace(', ',
                                                             ',').split(',')
        min_exp = job.get('ExpMin', None)
        title = cleanToken_1(job.get('JobTitle', None))
        max_exp = job.get('ExpMax', None)
        pub_date = job.get('publisheddate', None)
        id = job['Jobid']
        job_flag = job.get('flag')

        p = 0
        if job_flag == "Paid":
            p = 1
        else:
            p = 0

        desc = None

        #########################################################################################################
        ############-----------------Creating Job document to be saved in Mongo
        #########################################################################################################
        document = {
            '_id': _id,
            'comp_name': comp_name,
            'loc': loc,
            'min_exp': min_exp,
            'title': title,
            'max_exp': max_exp,
            'pub_date': pub_date,
            'id': id,
            'p': p,
            'desc': desc
        }

        #########################################################################################################
        ############-----------------Saving the document in Job collection Mongo (172.22.66.233)
        #########################################################################################################
        monconn_jobs_local.saveToTable(document)
        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    send_email([
        '*****@*****.**',
        '*****@*****.**'
    ], "Revival Mailer Weekly", 'TEch Dump Jobs Processed ' + str(i) +
               ' in :' + str(end_time - start_time) + ' seconds')

    #########################################################################################################
    ############-----------------Deleting the mongo connections
    #########################################################################################################
    del (monconn_jobs_local)
    del (mysql_conn)
def preProcessChunk(chunkID):

    print 'Connecting to Mongodb..'
    tableName = 'jobs_status_check'
    monconn_status_check = MongoConnect(tableName,
                                        host='localhost',
                                        database='jam_status')
    monconn_status_check_cur = monconn_status_check.getCursor()

    ######################################
    '''Fetching the Jobs from SQL'''
    ######################################

    #Connect to SQL table and get the jobs data
    host = "172.22.65.157"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3308

    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                              port)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)  and rj.jobid%''' + str(numChunks) + '=' + str(chunkID)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)'''
    #print cmd
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.isbocreated as back_office_job,
         rj.publisheddate as publisheddate,
         rj.republisheddate as republisheddate,
         rj.companyid_id as Company_id,
         rj.displayname as Company_name,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8) 
         ) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         '''

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    mysql_conn.query(cmd2)
    mysql_conn.query(cmd3)
    jobs = mysql_conn.query(cmd4)
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime(
    )

    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)

    ######################################
    '''Connecting to Mongo 233 Server'''
    ######################################

    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='localhost',
                                      database='JobAlerts')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    ######################################
    '''Processing the Jobs'''
    ######################################

    i = 0
    for job in jobs:
        #pprint(job)
        #print i
        if i % 1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'

        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])
        job_jd = cleanHTML(cleanToken(job['JD']))
        job_industry = cleanToken(job['Industry'])
        job_location = removeDup(job['location'])
        job_subfunction = removeDup(cleanToken(job['subfunction']))
        job_function = removeDup(cleanToken(job['function']))
        job_skills = removeDup(cleanToken(job['keySkills']))
        job_flag = job['flag']
        job_accounttype = job['account_type']
        job_company_id = job['Company_id']
        job_company_name = cleanToken(job['Company_name'])
        job_published_date = job['publisheddate']
        job_republished_date = job['republisheddate']
        job_back_office = int(job['back_office_job'])

        if job_company_id == 421880:  ################## Altimetrik Jobs removed ##########################
            continue

        job_location = job_location.replace(', ', ',').lower().split(',')

        ##Extract additional fields like bow
        text = 5 * (" " + job_title) + ' ' + 5 * (
            " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * (
                " " + job_industry) + ' ' + 2 * (
                    " " + job_function) + ' ' + 2 * (" " + job_subfunction)
        text = text.replace('candidates', ' ')
        '''
        try:
            text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction)
            text = text.replace('candidates', ' ')
            
        except:
            text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd)
            text = text.replace('candidates', ' ')
        '''

        job_bow = mb.getBow(text, getbowdict=0)

        #job_keySkills = ','.join([x for x in jobKeySkills.split(',') if x.strip() != ''])

        #pprint(job_bow)
        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \
             'job_flag':job_flag,'job_accounttype':job_accounttype, \
             'job_company_id':job_company_id,'job_company_name':job_company_name,
             'job_published':job_published_date,'job_republished':job_republished_date,'job_back_office':job_back_office
             }

        monconn_jobs_local.saveToTable(document)

        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    os.system(
        ' echo "Jobs Processed ' + str(i) + ' in :' +
        str(end_time - start_time) + ' seconds' +
        ' " | mutt -s "Job Alert Mailer " [email protected] ,[email protected]'
    )
    del (monconn_jobs_local)
    del (mysql_conn)
    monconn_status_check.saveToTable({'_id': 1, 'status': 1})
    del (monconn_status_check)
Beispiel #20
0
def computeAlertsChunk(chunkID):

    #########################################
    '''Creating a connection to output mongodb'''
    #########################################

    tablename = 'WeeklyMsgQueue'
    monconn_recommendations = MongoConnect(tablename,
                                           host='localhost',
                                           database='mailer_weekly')

    print 'Chunk:', chunkID, 'initiated at:', time.ctime()

    #################################################
    '''Fetch the user data from the database'''
    #################################################

    #print "Fetching the users data from Mongodb for ChunkID:",chunkID
    #tablename="candidates_processed"
    tablename = "candidates_processed"
    monconn_users = MongoConnect(tablename,
                                 host='localhost',
                                 database='mailer_weekly')
    mongo_users_cur = monconn_users.getCursor()

    myCondition = {'p': chunkID}
    #myCondition = {}
    users = monconn_users.loadFromTable(myCondition)
    #print "Fetching the users data from Mongodb....completed for ChunkID:",chunkID

    ##################################################################
    '''Get the top 10 matching jobs based on cosine for each candidate'''
    ##################################################################

    count = 0

    for user in users:

        count += 1
        user_ctc = user['user_ctc']
        user_exp = user['user_experience']
        user_id = user['user_id']
        user_email = user['user_email']
        user_bow = user['user_bow']['bow']
        user_current_time = datetime.datetime.now()
        user_jobtitle = user['user_jobtitle']
        user_lastlogin = user['user_lastlogin']
        user_phone = user['user_phone']
        user_gender = user['user_gender']
        user_current_company = user['user_current_company']
        user_functionalarea_id = user['user_functionalarea_id']
        user_lastmodified = user['user_lastmodified']
        user_fullname = user['user_fullname']
        user_phone_verified = user['user_phone_verified']
        user_location_id = user['user_location_id']
        user_ctc_id = user['user_ctc_id']
        user_highest_qual = user['user_highest_qual']
        user_edu_special = user['user_edu_special']
        user_email_verified = user['user_email_verified']
        user_spam_status = user['user_spam_status']
        user_bounce_status = user['user_bounce_status']
        user_email_alert_status = user['user_email_alert_status']
        user_functionalarea = user['user_functionalarea']
        user_industry = user['user_industry']
        user_jobtitle = user['user_jobtitle']
        user_profiletitle = user['user_profiletitle']
        user_edom = user['user_edom']
        user_industry = user['user_industry']
        user_skills = user['user_skills']
        user_profiletitle = user['user_profiletitle']
        user_pid = user['p']

        lsi_user = lsiModel[tfIdfModel[user_bow]]
        simScrChunk = index[lsi_user]
        sortingExcelSheetList = []

        for (jobIntIndex, lsiCosine) in simScrChunk:

            if lsiCosine < 0.18:
                continue

            job = jobIntIdToJobDict[jobIntIndex]
            jobid = job['job_id']
            job_title = job['job_title']
            job_skills = job['job_skills']
            job_minsal = job['job_minsal']
            job_maxsal = job['job_maxsal']
            job_minexp = job['job_minexp']
            job_maxexp = job['job_maxexp']
            job_bow = job['job_bow']['bow']
            job_accounttype = job['job_accounttype']
            job_flag = job['job_flag']

            #######################################################
            ''' Calculating the CTC and Experience Match Scores'''
            #######################################################

            ctc_match_score = CTCMatchScore(job_minsal, job_maxsal,
                                            user_ctc).CTCMatchScore()
            exp_match_score = ExpMatchScore(job_minexp, job_maxexp,
                                            user_exp).ExpMatchScore()
            paid_boost = PaidBoostScore(job_flag,
                                        job_accounttype).PaidBoostScore()
            #ctc_match_score = 1
            #exp_match_score = 1
            paid_boost = 0

            #######################################################
            ''' For Low earning desperate guy uncomment this '''
            #######################################################
            '''
            if (1 + user_ctc)/(1 + user_exp) < 0.3: 
                ctc_match_score = 1
                exp_match_score = 1
            
            '''

            if ctc_match_score == 1 and exp_match_score == 1:
                jobid = job['job_id']

                try:
                    job_city = job['job_location']
                except:
                    job_city = 'Delhi'
                try:
                    user_city = user['user_location']
                except:
                    user_city = 'Delhi'

                #print user_city, job_city
                try:
                    user_city_list = user_city.lower().replace(
                        'other', '').strip().split(',')
                    user_city_list = [x.strip() for x in user_city_list]
                except:
                    user_city_list = ['']

                try:
                    job_city_list = job_city.lower().replace(
                        'other', '').strip().split(',')
                    job_city_list = [x.strip() for x in job_city_list]
                except:
                    job_city_list = ['']
                #print user_city_list, job_city_list
                try:
                    cityScore = cm.getCityScore(user_city_list, job_city_list)
                except:
                    cityScore = 0

                #if cityScore == 0:
                #count = count +1
                #print user_city_list, job_city_list, cityScore

                overallMatchScore = getOverallMatchScore(
                    lsiCosine, cityScore, paid_boost)

                s = (user_id, user_email, jobid, overallMatchScore, job_title,
                     job_skills, job_minsal, job_maxsal, job_minexp,
                     job_maxexp)
                sortingExcelSheetList.append(s)

            else:
                continue

        #################################
        '''Finding the top 10 Jobs'''
        #################################
        topN = 10
        sortingExcelSheetListTopNJobs = heapq.nlargest(topN,
                                                       sortingExcelSheetList,
                                                       key=lambda x: x[3])
        #pprint(sortingExcelSheetListTopNJobs)

        jobs2bsent = []
        for (user_id, user_email, jobid, overallMatchScore, job_title,
             job_skills, job_minsal, job_maxsal, job_minexp,
             job_maxexp) in sortingExcelSheetListTopNJobs:
            #print (userid, jobid, lsiCosine, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp)

            jobs2bsent.append(int(jobid))

        document = {
            "c": user_id,
            "_id": user_email,
            "m": user_phone,
            "te": user_exp,
            "cr": user_jobtitle,
            "g": user_gender,
            "cc": user_current_company,
            "fa": user_functionalarea,
            "faid": user_functionalarea_id,
            "pd": user_lastmodified,
            "fn": user_fullname,
            "cpv": user_phone_verified,
            "sCLID": user_location_id,
            "sASID": user_ctc_id,
            "eq": user_highest_qual,
            "es": user_edu_special,
            "ev": user_email_verified,
            "ll": user_lastlogin,
            "sal": user_ctc,
            "edom": user_edom,
            "t": user_current_time,
            "mj": jobs2bsent,
            "bj": [],
            "oj": [],
            "pid": user_pid,
            "s": False
        }

        if len(jobs2bsent) > 0:
            monconn_recommendations.saveToTable(document)

        #print 'Chunk:', chunkID, 'processed in:', time.ctime()

    monconn_recommendations.close()