def preProcessChunk(chunkID):
    
    #########################################################################################################             
    ############-----------------    SQL Credentials
    #########################################################################################################
    
    #Connect to SQL table and get the jobs data
    #host="172.16.66.64"
    #user="******"
    #password="******"
    '''
    host="172.22.65.157"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3308
    '''

    host="172.22.66.204"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3306



    #########################################################################################################             
    ############-----------------    Creating the SQL Query
    #########################################################################################################
    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket, port)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)  and rj.jobid%''' + str(numChunks) + '=' + str(chunkID)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)'''
    #print cmd
    cmd='''SELECT
        rj.jobid as Jobid,
        rj.jobtitle as JobTitle,
        rj.description as JD,
        la1.text_value_MAX as SalaryMax,
        la2.text_value_MIN as SalaryMin,
        le1.display as ExpMin,
        le2.display as ExpMax,
        li.industry_desc as Industry,
        group_concat(c.AttValueCustom,'') as keySkills,
        group_concat(fn.field_enu,'') as function,
        group_concat(l.city_desc,'') as location,
        group_concat(fn.sub_field_enu,'') as subfunction 
        
        from 
        (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8)  
        ) AS rj 
        left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
        left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
        left join lookup_experience AS le1 on rj.minexperience = le1.value 
        left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
        left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
        left join  lookup_industry AS li on rj.industry=li.industry_id 
        left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
        left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
        
        WHERE 
        
        c.AttType in (3,12,13) 
        
        group by rj.jobid
        
        
        '''


    #########################################################################################################             
    ############-----------------    Executing the SQL Query
    #########################################################################################################
    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    jobs = mysql_conn.query(cmd)
    print 'chunkID:', chunkID,': Loading jobs from SQL....completed..', time.ctime()
    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)




    #########################################################################################################             
    ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233)
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed_midout'
    monconn_jobs_local = MongoConnect(tableName , host = 'localhost', database = 'Midout_Mailers')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'
    
    
        
    #########################################################################################################             
    ############-----------------Processing the Jobs data extracted from SQL
    #########################################################################################################
    i = 0
    for job in jobs:
        #pprint(job)
        #print i
        if i%1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:' , i,  ' completed in ', time.time() - start_time, ' seconds'
        
        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])  
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])  
        job_jd = cleanHTML(cleanToken(job['JD']) )
        job_industry = cleanToken(job['Industry'])
        job_location=removeDup(job['location'])
        job_subfunction=removeDup(job['subfunction'])
        job_function=removeDup(job['function'])
        job_skills=removeDup(cleanToken(job['keySkills']))
        

        
        #########################################################################################################             
        ############-----------------Creating Bag of Words for Text
        #########################################################################################################
        text = 5*(" "+job_title) + ' ' + 5*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction)
        text = text.replace('candidates', ' ')
        job_bow = mb.getBow(text, getbowdict = 0)
    


        #########################################################################################################             
        ############-----------------Creating Job document to be saved in Mongo
        #########################################################################################################        
        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd
             }



        #########################################################################################################             
        ############-----------------Saving the document in Job collection Mongo (172.22.66.233)
        #########################################################################################################        
        monconn_jobs_local.saveToTable(document)
    
        i += 1
        

    print "Processing finished....."    
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time() - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    send_email(['*****@*****.**', '*****@*****.**'],"Midout Mailers",'Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds')
    #os.system(' echo "Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Midout Mailers" [email protected] ,[email protected]')
    del(monconn_jobs_local)
    del(mysql_conn)
def computeAlertsChunk(chunkID):

    #########################################################################################################
    ############-----------------Creating a connection to output mongodb
    #########################################################################################################
    tablename = 'JobSuggestions'
    monconn_recommendations = MongoConnect(tablename,
                                           host='localhost',
                                           database='similar_jobs_onsite')

    print 'Chunk:', chunkID, 'initiated at:', time.ctime()

    #########################################################################################################
    ############-----------------Fetch the 3 month jobs data from mongo
    #########################################################################################################
    tablename = "active_jobs_dump"
    monconn_jobs_1 = MongoConnect(tablename,
                                  host='localhost',
                                  database='similar_jobs_onsite')
    mongo_jobs_1_cur = monconn_jobs_1.getCursor()
    myCondition = {'pid': chunkID}
    jobs_1 = monconn_jobs_1.loadFromTable(myCondition)

    #########################################################################################################
    ############-----------------Calculating the overall score of a 3month jobs based on cosine,ctc,
    ############-----------------experience,city scores for each 1month Job
    #########################################################################################################

    count = 0

    for job_1 in jobs_1:
        count += 1
        jobid_1 = job_1['job_id']
        job_title_1 = job_1['job_title']
        job_skills_1 = job_1['job_skills']
        job_minsal_1 = job_1['job_minsal']
        job_maxsal_1 = job_1['job_maxsal']
        job_minexp_1 = job_1['job_minexp']
        job_maxexp_1 = job_1['job_maxexp']
        job_bow_1 = job_1['job_bow']['bow']
        job_index_1 = job_1['job_index']

        lsi_job_1 = lsiModel[tfIdfModel[job_bow_1]]
        simScrChunk = index[lsi_job_1]

        sortingExcelSheetList = []

        for (jobIntIndex, lsiCosine) in simScrChunk:

            job = jobIntIdToJobDict[jobIntIndex]
            jobid = job['job_id']
            job_title = job['job_title']
            job_skills = job['job_skills']
            job_minsal = job['job_minsal']
            job_maxsal = job['job_maxsal']
            job_minexp = job['job_minexp']
            job_maxexp = job['job_maxexp']
            job_bow = job['job_bow']['bow']
            job_index = job['job_index']
            job_company_id = job['job_company_id']

            #########################################################################################################
            ############-----------------Calculating the CTC and Experience and City Match Scores
            #########################################################################################################

            ctc_match = CTCMatchScore(job_minsal_1, job_maxsal_1, job_minsal,
                                      job_maxsal)
            ctc_match_score = ctc_match.CTCMatchScore()
            exp_match_score = ExpMatchScore(job_minexp_1, job_maxexp_1,
                                            job_minexp,
                                            job_maxexp).ExpMatchScore()
            paid_boost = 0
            if ctc_match_score == 1 and exp_match_score == 1:
                if jobid != jobid_1:
                    try:
                        job_city_1 = job_1['job_location']
                    except:
                        job_city_1 = ["Delhi"]

                    try:
                        job_city = job['job_location']
                    except:
                        job_city = ["Delhi"]

                    #lsiCosine = getLSICosine(user_bow, job_bow).getLSICosine()
                    try:
                        cityScore = cm.getCityScore(job_city_1, job_city)
                    except:
                        cityScore = 0

                    overallMatchScore = getOverallMatchScore(
                        lsiCosine, cityScore, paid_boost)
                    s = (jobid_1, job_index_1, jobid, job_index,
                         overallMatchScore, job_company_id)
                    sortingExcelSheetList.append(s)

                else:
                    continue
            else:
                continue

        #########################################################################################################
        ############-----------------Finding the top 10 Jobs based on overall sccore
        #########################################################################################################

        topN = 30
        sortingExcelSheetListTopNJobs = heapq.nlargest(topN,
                                                       sortingExcelSheetList,
                                                       key=lambda x: x[4])

        jobs2bsent = []
        company_ids = []
        for (jobid_1, job_index_1, jobid, job_index, overallMatchScore,
             job_company_id) in sortingExcelSheetListTopNJobs:
            if job_company_id not in company_ids:
                company_ids.append(job_company_id)
                jobs2bsent.append(int(jobid))
            else:
                if company_ids.count(job_company_id) < 2:
                    company_ids.append(job_company_id)
                    jobs2bsent.append(int(jobid))
                else:
                    pass

            if len(jobs2bsent) >= 10:
                break
            else:
                pass

        ##############################################################################################################
        ############-----------------Creating a document to be saved in mongo collection
        ##############################################################################################################                                     \
        document = {
            '_id': jobid_1,
            'sj': jobs2bsent,
            'sjlen': len(jobs2bsent),
            'lud': datetime.datetime.now()
        }

        ##############################################################################################################
        ############-----------------Dumping the document in mongo collection if recommendations were generated
        ##############################################################################################################
        monconn_recommendations.saveToTable(document)

    monconn_recommendations.close()
     ############-----------------  Loading the mappings for bow
     #########################################################################################################
     print "Loading Mapping for BOW"
     synMappingFileName = '/data/Projects/JobAlerts/Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv'
     keywordIdMappingFileName = '/data/Projects/JobAlerts/Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv' 
     mb = MyBOW(synMappingFileName, keywordIdMappingFileName)
 
     
     
     
     #########################################################################################################             
     ############-----------------Creating the connection to candidates_processed_5 and dropping if already exist
     #########################################################################################################
 
     tablename = 'candidates_processed_5'
     monconn_recommendations = MongoConnect(tablename, host='localhost', database='JobAlerts')
     monconn_recommendations.dropTable()
     monconn_recommendations.close()
 
 
 
     #########################################################################################################             
     ############-----------------  Initiating Multiprocessing and computing Recommendations
     ######################################################################################################### 
     print "Starting the Process"      
     pprocessing = 1
 
     if pprocessing == 0:            
         numChunks = 80
         computeAlertsChunk(0)
 
Exemple #4
0
def preProcessChunk(chunkID):

    #########################################################################################################
    ############-----------------    SQL Credentials
    #########################################################################################################
    '''
    host="172.22.65.157"
    user="******"
    password="******"
    database="SumoPlus"
    unix_socket="/tmp/mysql.sock"
    port = 3308
    '''
    host = "172.22.66.204"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3306

    #########################################################################################################
    ############-----------------    Creating the SQL Query
    #########################################################################################################
    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                              port)
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.companyid_id as Company_id,
         rj.publisheddate as publisheddate,
         rj.displayname as Company_name,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 20 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 20)  
         ) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         '''

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    #########################################################################################################
    ############-----------------    Executing the SQL Query
    #########################################################################################################
    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    mysql_conn.query(cmd2)
    mysql_conn.query(cmd3)
    jobs = mysql_conn.query(cmd4)
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime(
    )
    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)

    #########################################################################################################
    ############-----------------    Connecting to Jobs Tech Dump Collections Mongo (172.22.66.233)
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'JobDesc_weekly'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='localhost',
                                      database='JobDescDB')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    #########################################################################################################
    ############-----------------Processing the Jobs data extracted from SQL
    #########################################################################################################
    i = 0
    for job in jobs:
        if i % 1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'
        _id = job['Jobid']
        comp_name = cleanToken_1(job.get('Company_name', None))
        loc = (removeDup(job.get('location', None))).replace(', ',
                                                             ',').split(',')
        min_exp = job.get('ExpMin', None)
        title = cleanToken_1(job.get('JobTitle', None))
        max_exp = job.get('ExpMax', None)
        pub_date = job.get('publisheddate', None)
        id = job['Jobid']
        job_flag = job.get('flag')

        p = 0
        if job_flag == "Paid":
            p = 1
        else:
            p = 0

        desc = None

        #########################################################################################################
        ############-----------------Creating Job document to be saved in Mongo
        #########################################################################################################
        document = {
            '_id': _id,
            'comp_name': comp_name,
            'loc': loc,
            'min_exp': min_exp,
            'title': title,
            'max_exp': max_exp,
            'pub_date': pub_date,
            'id': id,
            'p': p,
            'desc': desc
        }

        #########################################################################################################
        ############-----------------Saving the document in Job collection Mongo (172.22.66.233)
        #########################################################################################################
        monconn_jobs_local.saveToTable(document)
        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    send_email([
        '*****@*****.**',
        '*****@*****.**'
    ], "Revival Mailer Weekly", 'TEch Dump Jobs Processed ' + str(i) +
               ' in :' + str(end_time - start_time) + ' seconds')

    #########################################################################################################
    ############-----------------Deleting the mongo connections
    #########################################################################################################
    del (monconn_jobs_local)
    del (mysql_conn)
Exemple #5
0
def salary_data():

    date1 = datetime.now() - timedelta(days=183)
    print datetime.now()
    print date1

    ofile = open('/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Data.csv',
                 'w')
    writer = csv.writer(ofile)
    writer.writerow([
        'user_id', 'specialization', 'specialization_id', 'total_exp_months',
        'city', 'city_id', 'industry', 'industry_id', 'company', 'company_id',
        'salary_lacs', 'job_title'
    ])

    ###### Loading Mongo Cursors #############
    ##########################################

    mongo_conn = getMongoMaster()
    collection = getattr(mongo_conn, "candidates_processed_4")
    lookup_industry = MongoConnect('LookupIndustry',
                                   host='172.22.65.88',
                                   port=27018,
                                   database='sumoplus',
                                   username=username,
                                   password=password,
                                   authenticate=True).getCursor()
    lookup_company = MongoConnect('LookupCompanyName',
                                  host='172.22.65.88',
                                  port=27018,
                                  database='sumoplus',
                                  username=username,
                                  password=password,
                                  authenticate=True).getCursor()

    ###### Creating Industry Dict#############
    ##########################################

    industry_dict = {}
    Industry_Name = lookup_industry.find({}, {'ii': 1, 'idesc': 1})
    for records in Industry_Name:
        industry_dict[records['idesc']] = records['ii']

    ####### Creating Specialization Dict###########
    ###############################################

    specialization_dict = {}
    ifile = open(
        '/data/Projects/Salary_Tool_HT_Campus/Output/Specilization.csv', 'rb')
    reader = csv.reader(ifile)
    for records in reader:
        specialization_dict[records[0].strip()] = records[1]

    ####### Creating Company Dict ############
    ##########################################

    company_dict = {}
    Company_Name = lookup_company.find({}, {'v': 1, 'd': 1})
    for records in Company_Name:
        company_dict[records['d']] = records['v']

    ######Fetching Last Six Months Active Cands#############
    ########################################################

    required_data = collection.find({
        'user_lastlogin': {
            '$gt': str(date1)
        }
    }).limit(100000)
    #required_data = collection.find({'_id':'10000083'})

    try:
        for data in required_data:

            try:
                user_id = data.get('_id', '')
            except:
                user

            try:
                specialization = str(data.get('user_edu_special', ''))
                print specialization
            except:
                specialization = ''

            try:
                specialization_id = specialization_dict[str(
                    data.get('user_edu_special', ''))]
                print specialization_id
            except:
                specialization_id = ''

            try:
                total_exp = str(data.get('user_experience', ''))
                total_exp = re.split('Yrs|Yr|Months|Month', total_exp)
                exp_yrs = int(str(total_exp[0]).strip())

            except:
                exp_yrs = 0

            try:
                exp_months = int(str(total_exp[1]).strip())
            except:
                exp_months = 0

            total_exp_months = exp_yrs * 12 + exp_months

            try:
                city = data.get('user_location', '')
                city = str(city[0])

            except:
                city = ''

            try:
                city_id = data.get('user_location_id', '')
            except:
                city_id = ''

            try:
                industry = data.get('user_industry')

            except:
                industry = ''

            try:
                industry_id = industry_dict[data.get('user_industry')]

            except:
                industry_id = ''

            try:
                company = str(data.get('user_current_company', '')).title()
            except:
                company = ''

            try:
                company_id = company_dict[str(
                    data.get('user_current_company', '')).title()]
            except:
                company_id = ''

            try:
                salary = str(data.get('user_ctc', ''))
                salary = re.split('-|Lakh', salary)
                salary = str(salary[1]).strip()
            except:
                salary = ''

            try:
                job_title = str(data.get('user_jobtitle', '')).title()
            except:
                job_title = ''

            writer.writerow([
                user_id, specialization, specialization_id, total_exp_months,
                city, city_id, industry, industry_id, company, company_id,
                salary, job_title
            ])

    except:
        print user_id, specialization, specialization_id, total_exp_months, city, city_id, industry, industry_id, company, company_id, salary, job_title
    ofile.close()
Exemple #6
0
    #########################################################################################################
    ############-----------------    Loading the mapping for Bag of Words
    #########################################################################################################
    print 'Loading the mappings for bow'
    synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv'
    keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv'  #This file is created
    mb = MyBOW(synMappingFileName, keywordIdMappingFileName)
    print 'Loading the mappings for bow...finished'

    #########################################################################################################
    ############-----------------    Dropping the existing collection of Jobs
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'JobDesc_analytics'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='172.22.66.198',
                                      database='JobDescDB_analytics')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    monconn_jobs_local.dropTable()
    print 'Connecting to Mongodb...finished'
    del (monconn_jobs_local)

    #########################################################################################################
    ############----------------- Initiating Multiprocessing and extracting Jobs
    ############----------------- Set flag pprocessing = 1 for multiprocessing (avoid)
    #########################################################################################################
    numChunks = 100
    chunkIDs = range(0, numChunks)
    print chunkIDs
    pprocessing = 0
    if pprocessing == 0:
Exemple #7
0
from Features.LSI_common.MyBOW import MyBOW  #Custom Module - /data/Projects/JobAlerts/Features/LSI_common/MyBOW.py
from Model.getOverallMatchScore import getOverallMatchScore  #Custom Module - /data/Projects/JobAlerts/Model/getOverallMatchScore.py
from Main.getLSICosine import getLSICosine  #Custom Module - /data/Projects/JobAlerts/Main/getLSICosine.py
from DataConnections.MySQLConnect.MySQLConnect import MySQLConnect  #Custom Module - /data/Projects/JobAlerts/DataConnections/MySQLConnect/MySQLConnect.py
from DataConnections.MongoConnect.MongoConnect import MongoConnect  #Custom Module - /data/Projects/JobAlerts/DataConnections/MongoConnect/MongoConnect.py
from Utils.Utils_1 import cleanToken  #Custom Module - /data/Projects/JobAlerts/Utils/Utils_1.py
from Utils.HtmlCleaner import HTMLStripper  #Custom Module - /data/Projects/JobAlerts/Utils/HtmlCleaner.py
from Utils.Cleaning import *  #Custom Module - /data/Projects/JobAlerts/Utils/Cleaning.py
from Notifier.Notifier import send_email  #Custom Module - /data/Projects/JobAlerts/Notifier/Notifier.py

#########################################################################################################
############-----------------    Creating a Mongo Connection to Jobs Database
#########################################################################################################
tableName = 'jobs_processed'
monconn_jobs_local = MongoConnect(tableName,
                                  host='172.22.66.198',
                                  database='JobAlerts')
monconn_jobs_local_cur = monconn_jobs_local.getCursor()
jobs_processed_count = monconn_jobs_local_cur.count()
del (monconn_jobs_local)

#########################################################################################################
############-----------------    Creating a Mongo Connection to Tech dump of Jobs
#########################################################################################################
tableName = 'JobDesc_analytics'
monconn_jobs_local_1 = MongoConnect(tableName,
                                    host='172.22.66.198',
                                    database='JobDescDB_analytics')
#monconn_jobs_local_1 = MongoConnect(tableName, host = 'localhost', database = 'JobDescDB_analytics')
monconn_jobs_local_cur_1 = monconn_jobs_local_1.getCursor()
jobs_processed_tech_dump_count = monconn_jobs_local_cur_1.count()
def computeAlertsChunk(chunkID):

    #########################################################################################################
    ############-----------------Creating a connection to output mongodb
    #########################################################################################################
    tablename = 'WeeklyMsgQueue'
    monconn_recommendations = MongoConnect(tablename,
                                           host='localhost',
                                           database='mailer_weekly')

    print 'Chunk:', chunkID, 'initiated at:', time.ctime()

    ifile = open('CompanyNames.csv', 'r')
    reader = csv.reader(ifile)
    company_dict = {}
    for row in reader:
        company_dict[row[0]] = row[1]

    #########################################################################################################
    ############-----------------Fetch the user data from the database
    #########################################################################################################
    tablename = "candidates_processed"
    monconn_users = MongoConnect(tablename,
                                 host='localhost',
                                 database='mailer_weekly')
    mongo_users_cur = monconn_users.getCursor()
    myCondition = {'p': chunkID}
    users = monconn_users.loadFromTable(myCondition)

    #########################################################################################################
    ############-----------------Loop to generate recommendations and save in Mongo
    #########################################################################################################
    count = 0

    for user in users:

        #########################################################################################################
        ############-----------------Extracting the user details
        #########################################################################################################

        count += 1
        user_ctc = user['user_ctc']
        user_exp = user['user_experience']
        user_id = user['user_id']
        user_email = user['user_email']
        user_bow = user['user_bow']['bow']
        user_current_time = datetime.datetime.now()
        user_jobtitle = user['user_jobtitle']
        user_lastlogin = user['user_lastlogin']
        user_phone = user['user_phone']
        user_gender = user['user_gender']
        user_current_company = user['user_current_company']
        user_functionalarea_id = user['user_functionalarea_id']
        user_lastmodified = user['user_lastmodified']
        user_fullname = user['user_fullname']
        user_phone_verified = user['user_phone_verified']
        user_location_id = user['user_location_id']
        user_ctc_id = user['user_ctc_id']
        user_highest_qual = user['user_highest_qual']
        user_edu_special = user['user_edu_special']
        user_email_verified = user['user_email_verified']
        user_spam_status = user['user_spam_status']
        user_bounce_status = user['user_bounce_status']
        user_email_alert_status = user['user_email_alert_status']
        user_functionalarea = user['user_functionalarea']
        user_industry = user['user_industry']
        user_jobtitle = user['user_jobtitle']
        user_profiletitle = user['user_profiletitle']
        user_edom = user['user_edom']
        user_industry = user['user_industry']
        user_skills = user['user_skills']
        user_profiletitle = user['user_profiletitle']
        user_pid = user['p']
        user_firstname = user_fullname.split(" ")[0]

        lsi_user = lsiModel[tfIdfModel[user_bow]]
        simScrChunk = index[lsi_user]
        sortingExcelSheetList = []

        for (jobIntIndex, lsiCosine) in simScrChunk:

            if lsiCosine < 0.18:
                continue

            #########################################################################################################
            ############-----------------Loading the Jobs Data
            #########################################################################################################

            job = jobIntIdToJobDict[jobIntIndex]
            jobid = job['job_id']
            job_title = job['job_title']
            job_skills = job['job_skills']
            job_minsal = job['job_minsal']
            job_maxsal = job['job_maxsal']
            job_minexp = job['job_minexp']
            job_maxexp = job['job_maxexp']
            job_bow = job['job_bow']['bow']
            job_accounttype = job['job_accounttype']
            job_flag = job['job_flag']
            job_companyname = job['job_company_name']
            job_companyid = job['job_company_id']

            #########################################################################################################
            ############-----------------Calculating the CTC and Experience Match Scores
            #########################################################################################################
            ctc_match_score = CTCMatchScore(job_minsal, job_maxsal,
                                            user_ctc).CTCMatchScore()
            exp_match_score = ExpMatchScore(job_minexp, job_maxexp,
                                            user_exp).ExpMatchScore()
            paid_boost = PaidBoostScore(job_flag,
                                        job_accounttype).PaidBoostScore()

            #########################################################################################################
            ############-----------------Calculating the City Score between a candidate and a job
            #########################################################################################################
            if ctc_match_score == 1 and exp_match_score == 1:
                jobid = job['job_id']

                try:
                    job_city = job['job_location']
                except:
                    job_city = 'Delhi'
                try:
                    user_city = user['user_location']
                except:
                    user_city = 'Delhi'

                #print user_city, job_city
                try:
                    user_city_list = user_city.lower().replace(
                        'other', '').strip().split(',')
                    user_city_list = [x.strip() for x in user_city_list]
                except:
                    user_city_list = ['']

                try:
                    job_city_list = job_city.lower().replace(
                        'other', '').strip().split(',')
                    job_city_list = [x.strip() for x in job_city_list]
                except:
                    job_city_list = ['']
                #print user_city_list, job_city_list
                try:
                    cityScore = cm.getCityScore(user_city_list, job_city_list)
                except:
                    cityScore = 0

                #########################################################################################################
                ############-----------------Calculating the overall match score and appending the details to the list
                ############-----------------based on job's published date
                #########################################################################################################
                overallMatchScore = getOverallMatchScore(
                    lsiCosine, cityScore, paid_boost)

                s = (user_id, user_email, jobid, overallMatchScore, job_title,
                     job_skills, job_minsal, job_maxsal, job_minexp,
                     job_maxexp, job_companyid)
                sortingExcelSheetList.append(s)

            else:
                continue

        ##############################################################################################################
        ############-----------------Finding the top 10 Jobs based on Overall Score
        ##############################################################################################################
        topN = 10
        sortingExcelSheetListTopNJobs = heapq.nlargest(topN,
                                                       sortingExcelSheetList,
                                                       key=lambda x: x[3])

        jobs2bsent = []
        company_ids = []
        cosine_score = []
        for (user_id, user_email, jobid, overallMatchScore, job_title,
             job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp,
             job_companyid) in sortingExcelSheetListTopNJobs:
            #print (userid, jobid, lsiCosine, job_title, job_skills, job_minsal, job_maxsal, job_minexp, job_maxexp)
            if job_companyid not in company_ids:
                company_ids.append(job_companyid)
                jobs2bsent.append(int(jobid))
                cosine_score.append(round(overallMatchScore, 2))
            else:
                if company_ids.count(job_companyid) < 3:
                    company_ids.append(job_companyid)
                    jobs2bsent.append(int(jobid))
                    cosine_score.append(round(overallMatchScore, 2))
                else:
                    pass
            if len(jobs2bsent) >= 10:
                break
            else:
                pass

        companies = []
        #print company_ids
        for comp_id in company_dict.keys():

            if int(comp_id) in company_ids:

                companies.append(company_dict[comp_id])

            else:
                pass

        ##############################################################################################################
        ############-----------------Creating Subject Line for a candidate
        ##############################################################################################################                                     \
        if len(companies) != 0:
            try:
                user_subject = user_firstname + ": " + ', '.join(
                    companies
                ) + " and other top company jobs matching your profile"
                #print user_subject
            except Exception as e:
                pass
        else:
            try:
                if user_functionalarea == "Fresher (No Experience)":
                    user_subject = user_firstname + ", don't miss out on these new jobs"
                else:
                    user_subject = user_firstname + ", new " + user_functionalarea.replace(
                        ' /', ',') + " jobs for you"
                #print user_subject
            except Exception as e:
                user_subject = user_firstname + ", don't miss out on these new jobs"

        ##############################################################################################################
        ############-----------------Creating a document to be saved in mongo collection
        ##############################################################################################################
        document = {
            "c": user_id,
            "_id": user_email,
            "m": user_phone,
            "te": user_exp,
            "cr": user_jobtitle,
            "g": user_gender,
            "cc": user_current_company,
            "fa": user_functionalarea,
            "faid": user_functionalarea_id,
            "pd": user_lastmodified,
            "fn": user_fullname,
            "cpv": user_phone_verified,
            "sCLID": user_location_id,
            "sASID": user_ctc_id,
            "eq": user_highest_qual,
            "es": user_edu_special,
            "ev": user_email_verified,
            "ll": user_lastlogin,
            "sal": user_ctc,
            "edom": user_edom,
            "cosine": cosine_score,
            "t": user_current_time,
            "mj": jobs2bsent,
            "bj": [],
            "oj": [],
            "pid": user_pid,
            "s": False,
            "sub": user_subject
        }

        ##############################################################################################################
        ############-----------------Dumping the document in mongo collection if recommendations were generated
        ##############################################################################################################

        if len(jobs2bsent) > 0:
            monconn_recommendations.saveToTable(document)

        #print 'Chunk:', chunkID, 'processed in:', time.ctime()

    monconn_recommendations.close()
Exemple #9
0
    #########################################################################################################
    mongo_conn = getMongoMaster()
    collection = getattr(mongo_conn, "candidates_processed_4")
    collection.remove({'user_lastlogin': {'$lt': str(date1)}})
    print "Candidates with last login less than 183 days removed"

    #########################################################################################################
    ############-----------------Connecting to Mongo CandidateStatic and CandidatePreferences
    #########################################################################################################
    username = '******'
    password = '******'
    #monconn_users_static = MongoConnect('CandidateStatic', host = '172.22.65.157', port = 27018, database = 'sumoplus', username = username, password = password, authenticate = True).getCursor()
    monconn_users_preferences = MongoConnect('CandidatePreferences',
                                             host='172.22.65.88',
                                             port=27018,
                                             database='sumoplus',
                                             username=username,
                                             password=password,
                                             authenticate=True)
    monconn_users_preferences_cur = monconn_users_preferences.getCursor()

    #########################################################################################################
    ############-----------------Creating a Dictionary of Subfa to FA
    #########################################################################################################
    ifile = open('subfa_fa.csv', 'r')
    reader = csv.reader(ifile)
    reader.next()
    sub_fa_dict = {}
    for row in reader:
        sub_fa_dict[int(row[3])] = [row[4], int(row[1])]
Exemple #10
0
        #########################################################################################################
        ############-----------------    Loading the mapping for Bag of Words
        #########################################################################################################
        print 'Loading the mappings for bow'
        synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv'
        keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv'  #This file is created
        mb = MyBOW(synMappingFileName, keywordIdMappingFileName)
        print 'Loading the mappings for bow...finished'

        #########################################################################################################
        ############-----------------    Dropping the existing collection of Jobs
        #########################################################################################################
        print 'Connecting to Mongodb..'
        tableName = 'jobs_processed'
        monconn_jobs_local = MongoConnect(tableName,
                                          host='localhost',
                                          database='mailer_monthly')
        monconn_jobs_local_cur = monconn_jobs_local.getCursor()
        monconn_jobs_local.dropTable()
        print 'Connecting to Mongodb...finished'
        del (monconn_jobs_local)

        #########################################################################################################
        ############----------------- Initiating Multiprocessing and extracting Jobs
        ############----------------- Set flag pprocessing = 1 for multiprocessing (avoid)
        #########################################################################################################
        numChunks = 100
        chunkIDs = range(0, numChunks)
        print chunkIDs
        pprocessing = 0
        if pprocessing == 0:
from _sqlite3 import Row
sys.path.append('./../')
from pprint import pprint
from DataConnections.MySQLConnect.MySQLConnect import MySQLConnect
from DataConnections.MongoConnect.MongoConnect import MongoConnect
import pdb
import csv
import time
from multiprocessing import Pool
import os
import datetime
from datetime import timedelta
from Features.JobAlert_Functions import *

monconn_users_static = MongoConnect('candidates_processed_4',
                                    host='localhost',
                                    database='JobAlerts')
'''
ifile = open('JAM_27_JAN_opens.csv','r')
reader = csv.reader(ifile)
reader.next()
count = 0

i = 0
for row in reader:
    i+=1
    if i%5000 == 0:
        print i
    #print row
'''
count = 0
Exemple #12
0
def preProcessChunk(chunkId1, chunkId2):

    ######################################
    '''Fetching the Jobs from SQL'''
    ######################################

    #host="172.22.65.157"
    host = "172.22.66.204"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3306

    print "Loading Jobs From MySql...."
    mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                              port)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)  and rj.jobid%''' + str(numChunks) + '=' + str(chunkID)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)'''
    #print cmd
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.companyid_id as Company_id,
         rj.displayname as Company_name,
         rj.publisheddate as Published_Date,
         rj.republisheddate as RePublished_Date,
         rj.expirydate as Expiry_Date,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         lj.Applications as Application_Number,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where ( (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) <= %s) OR (DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) > %s AND DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) <= %s))) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         left join ShineReport.LiveJobsApplications AS lj on rj.jobid = lj.JobId
         
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         ''' % (chunkId1, chunkId2, chunkId1, chunkId2)

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    print 'chnukID:', chunkId1, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    print 'cmd1'
    mysql_conn.query(cmd2)
    print 'cmd2'
    mysql_conn.query(cmd3)
    print 'cmd3'
    jobs = mysql_conn.query(cmd4)
    print 'jobs'
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkId1, ': Loading jobs from SQL....completed..', time.ctime(
    )

    print 'chunkid:', chunkId1, ' : Number of jobs loaded: ', len(jobs)

    ######################################
    '''Connecting to Mongo 233 Server'''
    ######################################

    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed_9months'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='172.22.66.198',
                                      database='SimilarJobs')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    ######################################
    '''Processing the Jobs'''
    ######################################
    global i
    #i = 0
    for job in jobs:
        #pprint(job)
        #print i
        if i % 1000 == 0:
            print '\tchunkID:', chunkId1, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'

        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])
        job_jd = cleanHTML(cleanToken(job['JD']))
        job_industry = cleanToken(job['Industry'])
        job_location = removeDup(job['location'])
        job_subfunction = removeDup(cleanToken(job['subfunction']))
        job_function = removeDup(cleanToken(job['function']))
        job_skills = removeDup(cleanToken(job['keySkills']))
        job_flag = job['flag']
        job_accounttype = job['account_type']
        job_company_id = job['Company_id']
        job_company_name = cleanToken(job['Company_name'])
        job_index = i
        job_publishedate = job['Published_Date']
        job_repubslisheddate = job['RePublished_Date']
        job_expirydate = job['Expiry_Date']
        pid = i % 5000
        job_applications = job['Application_Number']
        job_location = job_location.replace(', ', ',').lower().split(',')

        #################################################
        '''Creating Bag of Words from the text fields'''
        #################################################

        text = 5 * (" " + job_title) + ' ' + 3 * (
            " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * (
                " " + job_industry) + ' ' + 2 * (
                    " " + job_function) + ' ' + 2 * (" " + job_subfunction)
        text = text.replace('candidates', ' ')
        job_bow = mb.getBow(text, getbowdict=0)

        ##################################################
        '''Dumping Job Details in Mongo (172.22.66.253)'''
        ##################################################

        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \
             'job_flag':job_flag,'job_accounttype':job_accounttype, \
             'job_company_id':job_company_id,'job_company_name':job_company_name,'job_index':job_index, \
             'application_number': job_applications,'pid':pid,'job_publishedate':job_publishedate , \
             'job_repubslisheddate':job_repubslisheddate,'job_expirydate':job_expirydate
             }

        monconn_jobs_local.saveToTable(document)

        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkId1, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    monconn_jobs_local.doIndexing('pid')
    #send_email(['*****@*****.**', '*****@*****.**','*****@*****.**'],"Similar Jobs Mailer 9 Month Jobs",'Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds')
    #os.system(' echo "Jobs Processing 9 Months Completed !!\nJobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Similar Jobs Mailer" [email protected], [email protected], [email protected]')
    del (monconn_jobs_local)
    del (mysql_conn)
Exemple #13
0
        ######################################

        print 'Loading the mappings for bow'
        synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv'
        keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv'  #This file is created
        mb = MyBOW(synMappingFileName, keywordIdMappingFileName)
        print 'Loading the mappings for bow...finished'

        #############################################
        '''Dropping the existing collection of jobs'''
        #############################################

        print 'Connecting to Mongodb..'
        tableName = 'jobs_processed_9months'
        monconn_jobs_local = MongoConnect(tableName,
                                          host='172.22.66.198',
                                          database='SimilarJobs')
        monconn_jobs_local_cur = monconn_jobs_local.getCursor()
        #monconn_jobs_local.dropTable()
        print 'Connecting to Mongodb...finished'
        #del(monconn_jobs_local)

        ######################################
        '''Preprocessing of Jobs'''
        ######################################

        numChunks = 100
        chunkIDs = range(0, numChunks)
        #print chunkIDs
        pprocessing = 0
        chunkId1 = 0
Exemple #14
0
import datetime
from datetime import timedelta
from datetime import *
#from datetime import date, datetime, time
import calendar
import pandas as pd
import numpy as np
from random import sample

username = '******'
password = '******'

mongo_conn = MongoConnect('CandidateStatic',
                          host='172.22.65.88',
                          port=27018,
                          database='sumoplus',
                          username=username,
                          password=password,
                          authenticate=True).getCursor()
monconn_users_static = MongoConnect('candidates_processed_4',
                                    host='172.22.66.198',
                                    database='JobAlerts').getCursor()
mon_conn_sub_fa = MongoConnect('LookupSubFunctionalArea',
                               host='172.22.65.88',
                               port=27018,
                               database='sumoplus',
                               username=username,
                               password=password,
                               authenticate=True).getCursor()

ifile = open('/data/Projects/Cold_Calling/Pycode/concentrix_leads_v1.csv',
Exemple #15
0
if __name__ == '__main__':
    try:

        #os.system(' echo "Application Indexing Started.... '' " | mutt -s "Similar Jobs Mailer" [email protected],[email protected], [email protected]')
        send_email([
            '*****@*****.**',
            '*****@*****.**'
        ], "Similar Jobs Mailer applies preprocessing",
                   'Application Indexing Started.... !!')
        #send_email(['*****@*****.**'],"Similar Jobs Mailer applies preprocessing",'Application Indexing Started.... !!')
        #############################
        'Dropping the old collection'
        #############################
        tablename = "apply_data"
        monconn_user = MongoConnect(tablename,
                                    host='172.22.66.198',
                                    database='SimilarJobs')
        monconn_user.dropTable()
        monconn_user.close()

        #############################
        'Starting Index Creation'
        #############################
        ApplicationIndexing()

        #############################
        'Creating Index on Collection'
        #############################
        tablename = "apply_data"
        monconn_user = MongoConnect(tablename,
                                    host='172.22.66.198',
Exemple #16
0
        #########################################################################################################
        ############-----------------    Loading the mapping for Bag of Words
        #########################################################################################################
        print 'Loading the mappings for bow'
        synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv'
        keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv'  #This file is created
        mb = MyBOW(synMappingFileName, keywordIdMappingFileName)
        print 'Loading the mappings for bow...finished'

        #########################################################################################################
        ############-----------------    Dropping the existing collection of Jobs
        #########################################################################################################
        print 'Connecting to Mongodb..'
        tableName = 'new_jobs_dump'
        monconn_jobs_local = MongoConnect(tableName,
                                          host='localhost',
                                          database='similar_jobs_onsite')
        monconn_jobs_local_cur = monconn_jobs_local.getCursor()
        monconn_jobs_local.dropTable()
        print 'Connecting to Mongodb...finished'
        del (monconn_jobs_local)

        #########################################################################################################
        ############----------------- Initiating Multiprocessing and extracting Jobs
        ############----------------- Set flag pprocessing = 1 for multiprocessing (avoid)
        #########################################################################################################
        numChunks = 100
        chunkIDs = range(0, numChunks)
        print chunkIDs
        pprocessing = 0
Exemple #17
0
def ApplicationIndexing():

    #######################################
    'Initiating and Declaring variables '
    #######################################

    user_mapping = {}
    i = 0
    user_index = 0

    ###############################
    ' Creating the previous date '
    ###############################
    todayDate = date.today()
    previousDate = todayDate + relativedelta(days=-183)
    day1 = datetime.combine(previousDate, time(0, 0))
    day2 = datetime.combine(todayDate, time(0, 0))

    ###########################################################
    ' Connecting to the Candidate Apply DB (without indexes) '
    ###########################################################
    tablename = 'candidate_applications'
    mongo_conn = MongoConnect(tablename,
                              host='172.22.66.198',
                              database='JobAlerts')
    mongo_conn_cur = mongo_conn.getCursor()

    #################################################################
    ' Connecting to DB where indexed applications are to be dumped '
    #################################################################
    tablename = "apply_data"
    monconn_user = MongoConnect(tablename,
                                host='172.22.66.198',
                                database='SimilarJobs')

    ######################
    ' Creating indexes  '
    ######################

    last_user_ObjectId = 1
    previous_id = "0"
    id = "0"

    #recency_score =

    try:
        while True:
            myCondition = {"fcu": {'$gt': id}}
            data = mongo_conn_cur.find(myCondition).sort('fcu').limit(100000)
            insert = []

            for row in data:
                try:
                    userid = row['fcu']
                    user_ObjectID = row['_id']

                    if userid == previous_id:
                        pass
                    else:
                        previous_id = userid
                        user_index += 1

                    index = user_index
                except:
                    continue

                jobid = row['fjj']
                application_date = row['ad']
                #print "application_date",application_date
                current_time = datetime.now()
                #print "today",current_time

                difference = abs((current_time - application_date).days)
                #print difference
                #recency_score = 1/(1+ math.sqrt(difference))
                if difference <= 10:
                    recency_score = 1
                elif difference > 10 and difference <= 20:
                    recency_score = 0.9
                elif difference > 20 and difference <= 30:
                    recency_score = 0.8
                else:
                    recency_score = 0.6

                #print "recency_score",recency_score

                #break
                pid = i % 5000
                document = {"userid":userid,\
                                "user_index":index, \
                                "jobid": jobid , \
                                'score':recency_score, \
                                'application_date':application_date, \
                                '_id': user_ObjectID , \
                                'pid':pid
                                }

                insert.append(document)
                id = row['fcu']
                i += 1
                if i % 100000 == 0:
                    print "Records Processed :", i
                    #sys.exit(0)

            monconn_user.insert(insert)
    except Exception as E:
        print E
Exemple #18
0
def preProcessChunk(chunkID):

    #########################################################################################################
    ############-----------------    SQL Credentials
    #########################################################################################################

    #Connect to SQL table and get the jobs data
    #host="172.16.66.64"
    #user="******"
    #password="******"

    host1 = "172.22.65.157"
    user1 = "analytics"
    password1 = "Anal^tics@11"
    database1 = "SumoPlus"
    unix_socket1 = "/tmp/mysql.sock"
    port1 = 3308

    host = "172.22.66.204"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3306

    #########################################################################################################
    ############-----------------    Creating the SQL Query
    #########################################################################################################
    print "Loading Jobs From MySql...."
    try:
        mysql_conn = MySQLConnect(database1, host1, user1, password1,
                                  unix_socket1, port1)
    except:
        mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                                  port)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)  and rj.jobid%''' + str(numChunks) + '=' + str(chunkID)
    #cmd = '''SELECT rj.jobid as Jobid,rj.jobtitle as JobTitle,rj.description as JD,la1.text_value_MAX as SalaryMax,la2.text_value_MIN as SalaryMin,le1.display as ExpMin,le2.display as ExpMax,li.industry_desc as Industry,c.AttValueCustom as keySkills,l.city_desc as location,fn.field_enu as function,fn.sub_field_enu as subfunction from recruiter_job AS rj left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id left join lookup_experience AS le1 on rj.minexperience = le1.value left join  lookup_experience AS le2 on rj.maxexperience = le2.value left join recruiter_jobattribute as c on rj.jobid = c.jobid_id left join  lookup_industry AS li on rj.industry=li.industry_id left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 WHERE rj.jobstatus in (3,5,6,9) and c.AttType in (3,12,13) and (DATEDIFF( CURDATE(),DATE(rj.publisheddate)) < 4 OR DATEDIFF( CURDATE(),DATE(rj.republisheddate)) < 4)'''
    #print cmd
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.companyid_id as Company_id,
         rj.displayname as Company_name,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 30 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 30)  
         ) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         '''

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    #########################################################################################################
    ############-----------------    Executing the SQL Query
    #########################################################################################################
    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    mysql_conn.query(cmd2)
    mysql_conn.query(cmd3)
    jobs = mysql_conn.query(cmd4)
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime(
    )

    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)

    #########################################################################################################
    ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233)
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'new_jobs_dump'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='localhost',
                                      database='similar_jobs_onsite')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    #########################################################################################################
    ############-----------------Processing the Jobs data extracted from SQL
    #########################################################################################################
    i = 0
    for job in jobs:
        #pprint(job)
        #print i
        if i % 1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'

        job_id = job['Jobid']
        job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])
        job_jd = cleanHTML(cleanToken(job['JD']))
        job_industry = cleanToken(job['Industry'])
        job_location = removeDup(job['location'])
        job_subfunction = removeDup(cleanToken(job['subfunction']))
        job_function = removeDup(cleanToken(job['function']))
        job_skills = removeDup(cleanToken(job['keySkills']))
        job_flag = job['flag']
        job_accounttype = job['account_type']
        job_company_id = int(job['Company_id'])
        job_company_name = cleanToken(job['Company_name'])
        job_index = i
        job_location = job_location.replace(', ', ',').lower().split(',')

        #########################################################################################################
        ############-----------------Creating Bag of Words for Text
        #########################################################################################################
        text = 5 * (" " + job_title) + ' ' + 3 * (
            " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * (
                " " + job_industry) + ' ' + 2 * (
                    " " + job_function) + ' ' + 2 * (" " + job_subfunction)
        text = text.replace('candidates', ' ')
        '''
        try:
            text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction)
            text = text.replace('candidates', ' ')
            
        except:
            text = 5*(" "+job_title) + ' ' + 3*(" "+job_skills) + ' ' + 1*(" "+job_jd)
            text = text.replace('candidates', ' ')
        '''
        job_bow = mb.getBow(text, getbowdict=0)

        #########################################################################################################
        ############-----------------Creating Job document to be saved in Mongo
        #########################################################################################################
        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \
             'job_flag':job_flag,'job_accounttype':job_accounttype, \
             'job_company_id':job_company_id,'job_company_name':job_company_name,'job_index':job_index
             }

        #########################################################################################################
        ############-----------------Saving the document in Job collection Mongo (172.22.66.233)
        #########################################################################################################
        monconn_jobs_local.saveToTable(document)

        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    send_email([
        '*****@*****.**',
        '*****@*****.**'
    ], "SJ Onsite", '1 Month Jobs Processed ' + str(i) + ' in :' +
               str(end_time - start_time) + ' seconds')
    #os.system(' echo "1 Month Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds' +' " | mutt -s "Similar Jobs OnSite" [email protected] ,[email protected]')
    del (monconn_jobs_local)
    del (mysql_conn)
Exemple #19
0
 username = '******'
 password = '******'
 print 'Mongo connect module:'
 
 output = open('cold_calling_file.csv',"w") 
 writer = csv.writer(output, lineterminator='\n')
 
 
 i=0
 user_email_list = []
 
 date1 = datetime.now() - timedelta(days= 2)
 date1 = date1.isoformat()	
 print date1
 
 monconn_users_static = MongoConnect('candidates_processed_4', host = 'localhost', database = 'JobAlerts').getCursor()
 mon_conn_sub_fa = MongoConnect('LookupSubFunctionalArea', host = '172.22.65.88', port = 27018,database = 'sumoplus',username= username,password = password,authenticate = True).getCursor()
 
 print 'Mongo_Connected',monconn_users_static
 
 data_user = monconn_users_static.find({'user_lastlogin':{'$gt':date1}})
 data_user_1 = monconn_users_static.find({'user_lastlogin':{'$gt':date1}}).count()
 
 sub_fa_lookup = mon_conn_sub_fa.find()
 sub_fa = {}
 for records in sub_fa_lookup:
     sub_fa[records['sfe']] = records['fe']
     
 print 'Candidates_picked:',str(data_user_1)
 
 writer.writerow(["Email",'Candidate_Name','Phone','City','cpv','applications','edu_qual','loc_id','Total_Experience','Industry','Salary','Functional_Area','last_login','Sub_FA'])        
password = '******'
tableName = 'ResumeParserDump'
date1 = datetime.now() - timedelta(days=2)
print "Date : ", datetime.now()

#########################################################################################################
############-----------------Try Except to provide alert in case of code failure
#########################################################################################################
try:
    #########################################################################################################
    ############-----------------Creating a mongo connection to miscellaneous DB
    #########################################################################################################
    monconn_users = MongoConnect(tableName,
                                 host='172.22.65.88',
                                 port=27018,
                                 database='miscellaneous',
                                 username=username,
                                 password=password,
                                 authenticate=True)
    monconn_users_cur = monconn_users.getCursor()
    myCondition = {"cd": {'$gt': date1}}
    users = monconn_users.loadFromTable(myCondition)
    print "Number of recoreds : " + str(len(users))

    #########################################################################################################
    ############-----------------Creating a mongo connection to resume dump DB Mongo(172.22.66.233)
    #########################################################################################################
    tableName = 'candidate_data'
    monconn_resume = MongoConnect(tableName,
                                  host='172.22.66.198',
                                  database='ResumeDump')
Exemple #21
0
        #########################################################################################################
        ############-----------------    Loading the mapping for Bag of Words
        #########################################################################################################
        print 'Loading the mappings for bow'
        synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv'
        keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv'  #This file is created
        mb = MyBOW(synMappingFileName, keywordIdMappingFileName)
        print 'Loading the mappings for bow...finished'

        #########################################################################################################
        ############-----------------    Dropping the existing collection of Jobs
        #########################################################################################################
        print 'Connecting to Mongodb..'
        tableName = 'JobDesc_weekly'
        monconn_jobs_local = MongoConnect(tableName,
                                          host='localhost',
                                          database='JobDescDB')
        monconn_jobs_local_cur = monconn_jobs_local.getCursor()
        monconn_jobs_local.dropTable()
        print 'Connecting to Mongodb...finished'
        del (monconn_jobs_local)

        #########################################################################################################
        ############----------------- Initiating Multiprocessing and extracting Jobs
        ############----------------- Set flag pprocessing = 1 for multiprocessing (avoid)
        #########################################################################################################
        numChunks = 100
        chunkIDs = range(0, numChunks)
        print chunkIDs
        pprocessing = 0
        if pprocessing == 0:
        ], "Midout Mailers", "Midouts Candidates Processing  Started!!!!")

        print 'Mongo connect module:'

        username = '******'
        password = '******'

        #########################################################################################################
        ############----------------- Dictionary for LookupExperience
        #########################################################################################################
        print "Loading Dictionary for Experience"
        tableName = 'LookupExperience'
        monconn_users = MongoConnect(tableName,
                                     host='172.22.65.88',
                                     port=27018,
                                     database='sumoplus',
                                     username=username,
                                     password=password,
                                     authenticate=True)
        monconn_users_cur = monconn_users.getCursor()
        user_experience_dict = {}
        for user in monconn_users_cur.find():
            user_experience_dict[user['v']] = user['d']

        #########################################################################################################
        ############----------------- Dictionary for LookupJobTitle
        #########################################################################################################
        print "Loading Dictionary for JobTitle"
        tableName = 'LookupJobTitle'
        monconn_users = MongoConnect(tableName,
                                     host='172.22.65.88',
Exemple #23
0
def getedu_details():

    ######### Creating Mongo Cursors#########
    #########################################

    monconn_users_edu = MongoConnect('CandidateEducation',
                                     host='172.22.65.88',
                                     port=27018,
                                     database='sumoplus',
                                     username=username,
                                     password=password,
                                     authenticate=True).getCursor()
    lookup_educationstudy = MongoConnect('LookupEducationStream',
                                         host='172.22.65.88',
                                         port=27018,
                                         database='sumoplus',
                                         username=username,
                                         password=password,
                                         authenticate=True).getCursor()
    lookup_institute = MongoConnect('LookupEducationInstitute',
                                    host='172.22.65.88',
                                    port=27018,
                                    database='sumoplus',
                                    username=username,
                                    password=password,
                                    authenticate=True).getCursor()

    ###### Creating Study Field Dict ########
    #########################################

    Study_Field = lookup_educationstudy.find({}, {'si': 1, 'sd': 1})
    study_field_dict = {}
    for records in Study_Field:
        study_field_dict[records['si']] = records['sd']

    ###### Creating Institute Dict ###########
    ##########################################

    institute_dict = {}
    Institute_Name = lookup_institute.find({}, {'asi': 1, 'asd': 1})
    for records in Institute_Name:
        institute_dict[records['asi']] = records['asd']

    ifile = open('/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Data.csv',
                 'rb')  #### Loading Candidate Level csv File ######
    reader = csv.reader(ifile)
    reader.next()
    ofile = open(
        '/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Edu_Data.csv', 'wb')
    writer = csv.writer(ofile)
    writer.writerow([
        'user_id', 'institute', 'institute_id', 'stream', 'stream_id',
        'course_type', 'course_type_id', 'most_recent'
    ])

    try:
        for records in reader:
            try:
                required_data = monconn_users_edu.find(
                    {'fcu': str(records[0])})
                for data in required_data:
                    user_id = data.get('fcu', '')
                    if data.has_key('ins') == True and data.get(
                            'ins', '') is not None:
                        institute = institute_dict[data['ins']].encode(
                            'utf8', 'ignore').encode('utf-8')
                    else:
                        institute = data.get('inc').encode(
                            'utf8', 'ignore').encode('utf-8')
                    ins_id = data.get('ins', '')
                    stream = study_field_dict[data.get('el')]
                    stream_id = data.get('el', '')
                    course_type_id = data.get('ct', '')
                    if course_type_id == 1:
                        course_type = 'Full Time'
                    if course_type_id == 2:
                        course_type = 'Part Time'
                    if course_type_id == 3:
                        course_type = 'Correspondence'
                    mr = data.get('mr', '')
                    writer.writerow([
                        user_id, institute, ins_id, stream, stream_id,
                        course_type, course_type_id, mr
                    ])
            except:
                user_id = records[0]
                institute = ''
                ins_id = ''
                stream = ''
                stream_id = ''
                course_type = ''
                course_type_id = ''
                mr = ''
                writer.writerow([
                    user_id, institute, ins_id, stream, stream_id, course_type,
                    course_type_id, mr
                ])

    except:
        print records[0]

    ofile.close()
    df = pd.read_csv(
        '/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Edu_Data.csv')

    ########Imputing Missing Value of "mr" field with -100 #######################
    ##############################################################################
    df[['most_recent']] = df[['most_recent']].fillna(value=-100)

    ##### Sorting Dataframe ascending on user id and descending on mr field ######
    ##############################################################################
    df_1 = df.sort(['user_id', 'most_recent'], ascending=[1, 0])

    ##### Grouping on User_Id Level to Fetch Latest Institute of Candidate #######
    ##############################################################################
    df_2 = df_1.groupby(
        'user_id',
        group_keys=False).apply(lambda x: x.ix[x.most_recent.idxmax()])

    df_3 = df_2[[
        'user_id', 'institute', 'institute_id', 'stream', 'stream_id',
        'course_type', 'course_type_id', 'most_recent'
    ]]
    df_3.to_csv(
        '/data/Projects/Salary_Tool_HT_Campus/Output/Institute_Level_Data.csv')
Exemple #24
0
        user_email_list.append(row[0])
        i += 1
    print len(user_email_list)
    ''' 
        #if i>1000:
        #    break
        
    
    #print user_email_list    
    '''
    date1 = datetime.datetime.now() - datetime.timedelta(days=58)

    monconn_users_static = MongoConnect('CandidateStatic',
                                        host='172.22.65.88',
                                        port=27018,
                                        database='sumoplus',
                                        username=username,
                                        password=password,
                                        authenticate=True).getCursor()

    j = 0
    while True:
        emails_list = user_email_list[j:j + 5000]
        j = j + 5000

        print j

        data_user = monconn_users_static.find({'e': {'$in': emails_list}})
        #data_user = monconn_users_static.find({'ut':1,'rsd':{'$gt':date1}},{'_id':1,'ut':1,'red':1,'rsd':1})

        count = 0
            '*****@*****.**',
            '*****@*****.**'
        ], "SJ Onsite", "Similar Jobs Onsite Creation started !! ")

        #########################################################################################################
        ############-----------------  Start the timer
        #########################################################################################################
        start_time = time.time()
        print "Started at time", start_time, "seconds"

        #########################################################################################################
        ############-----------------  Remove the previous Mongo dump of Similar Jobs
        #########################################################################################################
        tablename = 'JobSuggestions'
        monconn_recommendations = MongoConnect(tablename,
                                               host='localhost',
                                               database='similar_jobs_onsite')
        monconn_recommendations.dropTable()
        monconn_recommendations.close()

        #########################################################################################################
        ############-----------------  Load the LSI and tfidf models
        #########################################################################################################

        tfIdfModelFilename_unifiedtke = '/data/Projects/JobAlerts/Model/tfidf_model.tfidf'
        lsiModelFilename_unifiedtke = '/data/Projects/JobAlerts/Model/lsi_model.lsi'
        tfIdfModel = gensim.models.tfidfmodel.TfidfModel.load(
            tfIdfModelFilename_unifiedtke)
        lsiModel = models.lsimodel.LsiModel.load(lsiModelFilename_unifiedtke)

        #########################################################################################################
Exemple #26
0
    #send_email(['*****@*****.**', '*****@*****.**'],"Job Alert Mailer","Jobs Processing from SQL Started!!!")

    #########################################################################################################
    ############-----------------    Start the timer
    #########################################################################################################
    print 'preProcessing Jobs...', time.ctime()
    start_time = time.time()
    htmls = HTMLStripper()

    #########################################################################################################
    ############-----------------    Remove the completion status if already exist
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'jobs_status_check'
    monconn_status_check = MongoConnect(tableName,
                                        host='172.22.66.198',
                                        database='jam_status')
    monconn_status_check_cur = monconn_status_check.getCursor()
    monconn_status_check.dropTable()
    del (monconn_status_check)
    #monconn_status_check.saveToTable({'_id':1,'status':0})

    #########################################################################################################
    ############-----------------    Loading the mapping for Bag of Words
    #########################################################################################################
    print 'Loading the mappings for bow'
    synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv'
    keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv'  #This file is created
    mb = MyBOW(synMappingFileName, keywordIdMappingFileName)
    print 'Loading the mappings for bow...finished'
Exemple #27
0
import sys
sys.path.append('./../')
from DataConnections.MongoConnect.MongoConnect import MongoConnect
import csv
import os
import datetime
from datetime import timedelta

monconn_users_static = MongoConnect('candidate_applications',
                                    host='localhost',
                                    database='JobAlerts')

ifile = open('UserData.csv', 'rb')
reader = csv.reader(ifile)
reader.next()

ofile = open("User_Applications.csv", "w")
writer = csv.writer(ofile)
writer.writerow(['User_Id', 'Job_Applied', 'Application_Date'])

candidate_id = []
'''for records in reader:
    candidate_id.append(str(records[0]).strip())'''
#print len(candidate_id)

for records in reader:
    data_user = monconn_users_static.loadFromTable({"fcu": str(records[0])})
    for records in data_user:
        try:
            User_Id = records.get('fcu', 'N/A')
            Job_Applied = records.get('fjj', 'N/A')
Exemple #28
0
def preProcessChunk(chunkID):

    #########################################################################################################
    ############-----------------    Creating a Mongo Connection to status collection
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'jobs_status_check'
    monconn_status_check = MongoConnect(tableName,
                                        host='172.22.66.198',
                                        database='jam_status')
    monconn_status_check_cur = monconn_status_check.getCursor()

    #########################################################################################################
    ############-----------------    SQL Credentials
    #########################################################################################################

    host1 = "172.22.65.157"
    user1 = "analytics"
    password1 = "Anal^tics@11"
    database1 = "SumoPlus"
    unix_socket1 = "/tmp/mysql.sock"
    port1 = 3308

    host = "172.22.66.204"
    user = "******"
    password = "******"
    database = "SumoPlus"
    unix_socket = "/tmp/mysql.sock"
    port = 3306

    #########################################################################################################
    ############-----------------    Creating the SQL Query
    #########################################################################################################
    print "Loading Jobs From MySql...."
    try:
        mysql_conn = MySQLConnect(database1, host1, user1, password1,
                                  unix_socket1, port1)
    except:
        mysql_conn = MySQLConnect(database, host, user, password, unix_socket,
                                  port)
    cmd1 = '''drop table if exists SumoPlus.XY'''
    cmd2 = '''create table SumoPlus.XY as 
         SELECT company_account_id,SUM(final_sale_price)as price,enabled,MAX(expiry_date)as expiry_date 
         from SumoPlus.backoffice_accountsales a1 
         where enabled in 
         (select min(enabled) from SumoPlus.backoffice_accountsales where a1.company_account_id=company_account_id)
         group by 1
        '''
    cmd3 = '''ALTER TABLE SumoPlus.XY add index company_account_id (company_account_id)'''
    cmd4 = '''SELECT
         rj.jobid as Jobid,
         rj.jobtitle as JobTitle,
         rj.description as JD,
         rj.isbocreated as back_office_job,
         rj.publisheddate as publisheddate,
         rj.republisheddate as republisheddate,
         rj.companyid_id as Company_id,
         rj.displayname as Company_name,
         la1.text_value_MAX as SalaryMax,
         la2.text_value_MIN as SalaryMin,
         le1.display as ExpMin,
         le2.display as ExpMax,
         li.industry_desc as Industry,
         group_concat(c.AttValueCustom,'') as keySkills,
         group_concat(fn.field_enu,'') as function,
         group_concat(fn.field_id,'') as faid,
         group_concat(l.city_desc,'') as location,
         group_concat(fn.sub_field_enu,'') as subfunction,
         case account_type
         when 0 THEN "Company"
         when 1 THEN "Consultant"
         when 2 THEN "Others"
         when 3 THEN "Enterprise"
         ELSE "Not Specified"
         END AS account_type,
         IF(XY.enabled = 1 AND XY.price != 0 AND XY.expiry_date > CURDATE(),'Paid','Free') AS 'flag'        
         
         from 
         (select * from recruiter_job 
            where recruiter_job.jobstatus in (3,9) 
            and (DATEDIFF( CURDATE(),DATE(recruiter_job.publisheddate)) < 8 OR DATEDIFF( CURDATE(),DATE(recruiter_job.republisheddate)) < 8) 
         ) AS rj 
         left join lookup_annualsalary AS la1 on rj.salarymax = la1.salary_id 
         left join  lookup_annualsalary AS la2 on rj.salarymin = la2.salary_id 
         left join lookup_experience AS le1 on rj.minexperience = le1.value 
         left join  lookup_experience AS le2 on rj.maxexperience = le2.value 
         left join recruiter_jobattribute as c on rj.jobid = c.jobid_id 
         left join  lookup_industry AS li on rj.industry=li.industry_id 
         left join lookup_subfunctionalarea_new163 AS fn on fn.sub_field_id = c.AttValue AND c.AttType = 12 
         left join lookup_city_new512 AS l on  l.city_id = c.AttValue AND c.AttType = 13 
         left join SumoPlus.XY AS XY on XY.company_account_id = rj.companyid_id
         left join SumoPlus.backoffice_companyaccount AS F on  F.id= rj.companyid_id       
         WHERE 
        
         c.AttType in (3,12,13) 
        
         group by rj.jobid
         '''

    cmd5 = '''drop table if exists SumoPlus.XY
        '''

    #########################################################################################################
    ############-----------------    Executing the SQL Query
    #########################################################################################################
    print 'chnukID:', chunkID, ': Loading jobs from SQL....', time.ctime()
    mysql_conn.query(cmd1)
    mysql_conn.query(cmd2)
    mysql_conn.query(cmd3)
    jobs = mysql_conn.query(cmd4)
    mysql_conn.query(cmd5)
    print 'chunkID:', chunkID, ': Loading jobs from SQL....completed..', time.ctime(
    )
    print 'chunkid:', chunkID, ' : Number of jobs loaded: ', len(jobs)

    #########################################################################################################
    ############-----------------Connecting to Jobs Collections Mongo (172.22.66.233)
    #########################################################################################################
    print 'Connecting to Mongodb..'
    tableName = 'jobs_processed'
    monconn_jobs_local = MongoConnect(tableName,
                                      host='172.22.66.198',
                                      database='JobAlerts')
    monconn_jobs_local_cur = monconn_jobs_local.getCursor()
    print 'Connecting to Mongodb...finished'

    #########################################################################################################
    ############-----------------Processing the Jobs data extracted from SQL
    #########################################################################################################
    i = 0
    for job in jobs:
        if i % 1000 == 0:
            print '\tchunkID:', chunkID, ' numRecords:', i, ' completed in ', time.time(
            ) - start_time, ' seconds'
        job_id = job['Jobid']
        #job_title = cleanToken(job['JobTitle'])
        job_maxexp = cleanToken(job['ExpMax'])
        job_minexp = cleanToken(job['ExpMin'])
        job_maxsal = cleanToken(job['SalaryMax'])
        job_minsal = cleanToken(job['SalaryMin'])
        #job_jd = cleanHTML(cleanToken(job['JD']) )
        #job_industry = cleanToken(job['Industry'])
        job_location = removeDup(job['location'])
        #job_subfunction=removeDup(cleanToken(job['subfunction']))
        #job_function=removeDup(cleanToken(job['function']))
        #job_skills=removeDup(cleanToken(job['keySkills']))
        job_flag = job['flag']
        job_accounttype = job['account_type']
        job_company_id = job['Company_id']
        job_company_name = cleanToken(job['Company_name'])
        job_published_date = job['publisheddate']
        job_republished_date = job['republisheddate']
        #job_faid = job['faid']
        job_back_office = int(job['back_office_job'])
        job_location = job_location.replace(', ', ',').lower().split(',')
        if job_company_id == 421880:  #######---------- Altimetrik Jobs removed
            continue

        job_faid = job['faid']
        job_title = cleanText(
            job['JobTitle']
        )  #######---------- cleanText Function is present in Cleaning.py in Utils folder
        job_jd = cleanText(cleanHTML(job['JD']))
        job_industry = cleanText(job['Industry'])
        job_function = removeDup(cleanText(job['function']))
        job_subfunction = removeDup(cleanText(job['subfunction']))
        job_skills = removeDup(cleanText(job['keySkills']))

        #########################################################################################################
        ############-----------------Creating Bag of Words for Text
        #########################################################################################################
        text = 5 * (" " + job_title) + ' ' + 5 * (
            " " + job_skills) + ' ' + 1 * (" " + job_jd) + ' ' + 2 * (
                " " + job_function) + ' ' + 2 * (" " + job_subfunction)
        text = re.sub(' +', ' ', text).strip()
        '''
        try:
            text = 5*(" "+job_title) + ' ' + 5*(" "+job_skills.replace(',', ' ')) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction)
        except:
            text = 5*(" "+job_title) + ' ' + 5*(" "+job_skills) + ' ' + 1*(" "+job_jd) +' '+2*(" "+job_industry)+' '+2*(" "+job_function)+' '+2*(" "+job_subfunction)
        '''
        #text = text.replace('candidates', ' ')
        job_bow = mb.getBow(text, getbowdict=0)

        #########################################################################################################
        ############-----------------Creating Job document to be saved in Mongo
        #########################################################################################################
        document = {'job_id': job_id, 'job_title': job_title,'job_function':job_function, \
             'job_maxexp': job_maxexp, 'job_minexp': job_minexp,\
             'job_location':job_location, 'job_subfunction':job_subfunction,\
             'job_maxsal':job_maxsal,'job_minsal':job_minsal, 'job_skills': job_skills, \
             'job_bow': job_bow, 'job_industry': job_industry, 'job_jd': job_jd, \
             'job_flag':job_flag,'job_accounttype':job_accounttype, \
             'job_company_id':job_company_id,'job_company_name':job_company_name,
             'job_published':job_published_date,'job_republished':job_republished_date,'job_back_office':job_back_office,'job_faid':job_faid
             }

        #########################################################################################################
        ############-----------------Saving the document in Job collection Mongo (172.22.66.233)
        #########################################################################################################
        monconn_jobs_local.saveToTable(document)
        i += 1

    print "Processing finished....."
    print 'chunkID:', chunkID, ' Total time taken is: ', time.time(
    ) - start_time, ' seconds.'
    end_time = time.time()
    time_taken = end_time - start_time
    #send_email(['*****@*****.**', '*****@*****.**'],"Job Alert Mailer",'Jobs Processed '+str(i)+' in :' + str(end_time - start_time) + ' seconds')

    #########################################################################################################
    ############-----------------Changing the status of completion and deleting the mongo connections
    #########################################################################################################
    del (monconn_jobs_local)
    del (mysql_conn)
    monconn_status_check.saveToTable({'_id': 1, 'status': 1})
    del (monconn_status_check)
def computeAlertsChunk(chunkID):

    #########################################################################################################             
    ############-----------------Creating the connection to Mongo (172.22.66.233)
    #########################################################################################################
    monconn_users_static = MongoConnect('candidates_processed_4', host = 'localhost', database = 'JobAlerts')
    monconn_users_static_cur = monconn_users_static.getCursor()
    
    monconn_applications = MongoConnect('candidate_applications', host = 'localhost', database = 'JobAlerts')
    monconn_applications_cur = monconn_users_static.getCursor()
    
    tablename = 'candidates_processed_5'
    monconn_recommendations = MongoConnect(tablename, host='localhost', database='JobAlerts')    
    print 'Chunk:', chunkID, 'initiated at:', time.ctime()
    
    myCondition = {'p':chunkID}  
    users = monconn_users_static.loadFromTable(myCondition)
  
    for row in users :
        user_profiletitle = row['user_profiletitle']
        user_industry  = row['user_industry']
        user_functionalarea = row['user_functionalarea']
        user_jobtitle = row['user_jobtitle']
        user_skills   = row['user_skills']
        preferred_subfa = row["preferred_sub_fa"]
        subject_status = row["subject_status"]
        user_experience = row["user_experience"]
        
        apply_data = monconn_applications.loadFromTable({'fcu':row['_id']})
        apply_data_list = list(apply_data)
        application_list = []
        if len(apply_data) == 0:
            pass
        else:
            for element  in apply_data_list:
                application_list.append(element['fjj'])
                
        application_list.sort()
        row['application_list'] = application_list
        application_count = len(application_list)
        row['application_count'] = application_count
        
        if application_count == 0:
            monconn_recommendations.saveToTable(row)
        ############-----------------    Loading the mapping for Bag of Words
        #########################################################################################################                
        print 'Loading the mappings for bow'
        synMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist.csv'
        keywordIdMappingFileName = '../Features/rawData/LSI/Model_UnifiedTKE/unifiedtkelist_numbered.csv' #This file is created    
        mb = MyBOW(synMappingFileName, keywordIdMappingFileName)
        print 'Loading the mappings for bow...finished'
        
            

        #########################################################################################################             
        ############-----------------    Dropping the existing collection of Jobs
        #########################################################################################################                
        print 'Connecting to Mongodb..'
        tableName = 'jobs_processed_midout'
        monconn_jobs_local = MongoConnect(tableName, host = 'localhost', database = 'Midout_Mailers')
        monconn_jobs_local_cur = monconn_jobs_local.getCursor()
        monconn_jobs_local.dropTable()
        print 'Connecting to Mongodb...finished'
        del(monconn_jobs_local)
        
        

        
        #########################################################################################################             
        ############----------------- Initiating Multiprocessing and extracting Jobs
        ############----------------- Set flag pprocessing = 1 for multiprocessing (avoid)
        #########################################################################################################                
        numChunks = 100
        chunkIDs = range(0, numChunks)   
        print chunkIDs