예제 #1
0
    except:
        monconn_users_static = MongoConnect('CandidateStatic',
                                            host='172.22.65.157',
                                            port=27017,
                                            database='sumoplus',
                                            username=username,
                                            password=password,
                                            authenticate=True).getCursor()
    #data_new_registrations = monconn_users_static.find({'$or': [{'red':{'$gt':date_object,'$lt':custom_date_object}}, {'rsd':{'$gt':date_object,'$lt':custom_date_object}}],'red':{'$lt':custom_date_object}})
    data_new_registrations = monconn_users_static.find({
        '$or': [{
            'red': {
                '$gt': date_object,
                '$lt': current_date_object
            }
        }, {
            'rsd': {
                '$gt': date_object,
                '$lt': current_date_object
            }
        }]
    })
    #data_new_registrations = monconn_users_static.find({'$or': [{'red':{'$gt':date_object,'$lt':custom_date_object}}, {'rsd':{'$gt':date_object,'$lt':custom_date_object}}]})
    #data_new_registrations = monconn_users_static.find({'$or': [{'red':{'$gt':date_object,'$lt':date_last}}, {'rsd':{'$gt':date_object,'$lt':date_last}}]})
    #monconn_users_local = MongoConnect('candidates_processed_4', host = '172.22.66.233', database = 'JobAlerts').getCursor()
    monconn_users_local = MongoConnect('candidates_processed_4',
                                       host='172.22.66.198',
                                       database='JobAlerts').getCursor(
                                       )  ### Change Done on "03-August-2017###

    cumulative_registration_file = open(
예제 #2
0
def getedu_details():

    ######### Creating Mongo Cursors#########
    #########################################

    monconn_users_edu = MongoConnect('CandidateEducation',
                                     host='172.22.65.88',
                                     port=27018,
                                     database='sumoplus',
                                     username=username,
                                     password=password,
                                     authenticate=True).getCursor()
    lookup_educationstudy = MongoConnect('LookupEducationStream',
                                         host='172.22.65.88',
                                         port=27018,
                                         database='sumoplus',
                                         username=username,
                                         password=password,
                                         authenticate=True).getCursor()
    lookup_institute = MongoConnect('LookupEducationInstitute',
                                    host='172.22.65.88',
                                    port=27018,
                                    database='sumoplus',
                                    username=username,
                                    password=password,
                                    authenticate=True).getCursor()

    ###### Creating Study Field Dict ########
    #########################################

    Study_Field = lookup_educationstudy.find({}, {'si': 1, 'sd': 1})
    study_field_dict = {}
    for records in Study_Field:
        study_field_dict[records['si']] = records['sd']

    ###### Creating Institute Dict ###########
    ##########################################

    institute_dict = {}
    Institute_Name = lookup_institute.find({}, {'asi': 1, 'asd': 1})
    for records in Institute_Name:
        institute_dict[records['asi']] = records['asd']

    ifile = open('/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Data.csv',
                 'rb')  #### Loading Candidate Level csv File ######
    reader = csv.reader(ifile)
    reader.next()
    ofile = open(
        '/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Edu_Data.csv', 'wb')
    writer = csv.writer(ofile)
    writer.writerow([
        'user_id', 'institute', 'institute_id', 'stream', 'stream_id',
        'course_type', 'course_type_id', 'most_recent'
    ])

    try:
        for records in reader:
            try:
                required_data = monconn_users_edu.find(
                    {'fcu': str(records[0])})
                for data in required_data:
                    user_id = data.get('fcu', '')
                    if data.has_key('ins') == True and data.get(
                            'ins', '') is not None:
                        institute = institute_dict[data['ins']].encode(
                            'utf8', 'ignore').encode('utf-8')
                    else:
                        institute = data.get('inc').encode(
                            'utf8', 'ignore').encode('utf-8')
                    ins_id = data.get('ins', '')
                    stream = study_field_dict[data.get('el')]
                    stream_id = data.get('el', '')
                    course_type_id = data.get('ct', '')
                    if course_type_id == 1:
                        course_type = 'Full Time'
                    if course_type_id == 2:
                        course_type = 'Part Time'
                    if course_type_id == 3:
                        course_type = 'Correspondence'
                    mr = data.get('mr', '')
                    writer.writerow([
                        user_id, institute, ins_id, stream, stream_id,
                        course_type, course_type_id, mr
                    ])
            except:
                user_id = records[0]
                institute = ''
                ins_id = ''
                stream = ''
                stream_id = ''
                course_type = ''
                course_type_id = ''
                mr = ''
                writer.writerow([
                    user_id, institute, ins_id, stream, stream_id, course_type,
                    course_type_id, mr
                ])

    except:
        print records[0]

    ofile.close()
    df = pd.read_csv(
        '/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Edu_Data.csv')

    ########Imputing Missing Value of "mr" field with -100 #######################
    ##############################################################################
    df[['most_recent']] = df[['most_recent']].fillna(value=-100)

    ##### Sorting Dataframe ascending on user id and descending on mr field ######
    ##############################################################################
    df_1 = df.sort(['user_id', 'most_recent'], ascending=[1, 0])

    ##### Grouping on User_Id Level to Fetch Latest Institute of Candidate #######
    ##############################################################################
    df_2 = df_1.groupby(
        'user_id',
        group_keys=False).apply(lambda x: x.ix[x.most_recent.idxmax()])

    df_3 = df_2[[
        'user_id', 'institute', 'institute_id', 'stream', 'stream_id',
        'course_type', 'course_type_id', 'most_recent'
    ]]
    df_3.to_csv(
        '/data/Projects/Salary_Tool_HT_Campus/Output/Institute_Level_Data.csv')
예제 #3
0
def salary_data():

    date1 = datetime.now() - timedelta(days=183)
    print datetime.now()
    print date1

    ofile = open('/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Data.csv',
                 'w')
    writer = csv.writer(ofile)
    writer.writerow([
        'user_id', 'specialization', 'specialization_id', 'total_exp_months',
        'city', 'city_id', 'industry', 'industry_id', 'company', 'company_id',
        'salary_lacs', 'job_title'
    ])

    ###### Loading Mongo Cursors #############
    ##########################################

    mongo_conn = getMongoMaster()
    collection = getattr(mongo_conn, "candidates_processed_4")
    lookup_industry = MongoConnect('LookupIndustry',
                                   host='172.22.65.88',
                                   port=27018,
                                   database='sumoplus',
                                   username=username,
                                   password=password,
                                   authenticate=True).getCursor()
    lookup_company = MongoConnect('LookupCompanyName',
                                  host='172.22.65.88',
                                  port=27018,
                                  database='sumoplus',
                                  username=username,
                                  password=password,
                                  authenticate=True).getCursor()

    ###### Creating Industry Dict#############
    ##########################################

    industry_dict = {}
    Industry_Name = lookup_industry.find({}, {'ii': 1, 'idesc': 1})
    for records in Industry_Name:
        industry_dict[records['idesc']] = records['ii']

    ####### Creating Specialization Dict###########
    ###############################################

    specialization_dict = {}
    ifile = open(
        '/data/Projects/Salary_Tool_HT_Campus/Output/Specilization.csv', 'rb')
    reader = csv.reader(ifile)
    for records in reader:
        specialization_dict[records[0].strip()] = records[1]

    ####### Creating Company Dict ############
    ##########################################

    company_dict = {}
    Company_Name = lookup_company.find({}, {'v': 1, 'd': 1})
    for records in Company_Name:
        company_dict[records['d']] = records['v']

    ######Fetching Last Six Months Active Cands#############
    ########################################################

    required_data = collection.find({
        'user_lastlogin': {
            '$gt': str(date1)
        }
    }).limit(100000)
    #required_data = collection.find({'_id':'10000083'})

    try:
        for data in required_data:

            try:
                user_id = data.get('_id', '')
            except:
                user

            try:
                specialization = str(data.get('user_edu_special', ''))
                print specialization
            except:
                specialization = ''

            try:
                specialization_id = specialization_dict[str(
                    data.get('user_edu_special', ''))]
                print specialization_id
            except:
                specialization_id = ''

            try:
                total_exp = str(data.get('user_experience', ''))
                total_exp = re.split('Yrs|Yr|Months|Month', total_exp)
                exp_yrs = int(str(total_exp[0]).strip())

            except:
                exp_yrs = 0

            try:
                exp_months = int(str(total_exp[1]).strip())
            except:
                exp_months = 0

            total_exp_months = exp_yrs * 12 + exp_months

            try:
                city = data.get('user_location', '')
                city = str(city[0])

            except:
                city = ''

            try:
                city_id = data.get('user_location_id', '')
            except:
                city_id = ''

            try:
                industry = data.get('user_industry')

            except:
                industry = ''

            try:
                industry_id = industry_dict[data.get('user_industry')]

            except:
                industry_id = ''

            try:
                company = str(data.get('user_current_company', '')).title()
            except:
                company = ''

            try:
                company_id = company_dict[str(
                    data.get('user_current_company', '')).title()]
            except:
                company_id = ''

            try:
                salary = str(data.get('user_ctc', ''))
                salary = re.split('-|Lakh', salary)
                salary = str(salary[1]).strip()
            except:
                salary = ''

            try:
                job_title = str(data.get('user_jobtitle', '')).title()
            except:
                job_title = ''

            writer.writerow([
                user_id, specialization, specialization_id, total_exp_months,
                city, city_id, industry, industry_id, company, company_id,
                salary, job_title
            ])

    except:
        print user_id, specialization, specialization_id, total_exp_months, city, city_id, industry, industry_id, company, company_id, salary, job_title
    ofile.close()
예제 #4
0
 writer = csv.writer(output, lineterminator='\n')
 
 
 i=0
 user_email_list = []
 
 date1 = datetime.now() - timedelta(days= 2)
 date1 = date1.isoformat()	
 print date1
 
 monconn_users_static = MongoConnect('candidates_processed_4', host = 'localhost', database = 'JobAlerts').getCursor()
 mon_conn_sub_fa = MongoConnect('LookupSubFunctionalArea', host = '172.22.65.88', port = 27018,database = 'sumoplus',username= username,password = password,authenticate = True).getCursor()
 
 print 'Mongo_Connected',monconn_users_static
 
 data_user = monconn_users_static.find({'user_lastlogin':{'$gt':date1}})
 data_user_1 = monconn_users_static.find({'user_lastlogin':{'$gt':date1}}).count()
 
 sub_fa_lookup = mon_conn_sub_fa.find()
 sub_fa = {}
 for records in sub_fa_lookup:
     sub_fa[records['sfe']] = records['fe']
     
 print 'Candidates_picked:',str(data_user_1)
 
 writer.writerow(["Email",'Candidate_Name','Phone','City','cpv','applications','edu_qual','loc_id','Total_Experience','Industry','Salary','Functional_Area','last_login','Sub_FA'])        
 count = 0     
 for row in data_user :
     #print row
     count += 1 
     id = row.get('_id',0)
예제 #5
0
    monconn_users_static = MongoConnect('CandidateStatic',
                                        host='172.22.65.88',
                                        port=27018,
                                        database='sumoplus',
                                        username=username,
                                        password=password,
                                        authenticate=True).getCursor()

    j = 0
    while True:
        emails_list = user_email_list[j:j + 5000]
        j = j + 5000

        print j

        data_user = monconn_users_static.find({'e': {'$in': emails_list}})
        #data_user = monconn_users_static.find({'ut':1,'rsd':{'$gt':date1}},{'_id':1,'ut':1,'red':1,'rsd':1})

        count = 0
        for row in data_user:
            count += 1

            #if count%5000 == 0:
            #    print count
            id = row.get('_id', 0)
            #st = row.get('st',0)
            #sl = row.get('sl',0)
            e = row.get('e', 0)
            ut = row.get('ut', None)
            red = row.get('red', None)
            rsd = row.get('rsd', None)
            data_user = monconn_users_static.find({
                '$and': [{
                    'rm': 1
                }, {
                    'mo': {
                        '$in': [0, 2]
                    }
                }, {
                    'eas': {
                        '$in': [1, 3]
                    }
                }, {
                    'bs': {
                        '$ne': 1
                    }
                }, {
                    'ss': {
                        '$ne': 1
                    }
                }, {
                    'll': {
                        '$gt': date1
                    }
                }, {
                    '_id': {
                        '$gt': id
                    }
                }]
            }).sort('_id').limit(1000)
            print id
예제 #7
0
ofile = open('/data/Projects/Cold_Calling/Pycode/email_fa_mapping.csv', 'wb')
writer = csv.writer(ofile)
writer.writerow([
    'Email', 'Candidate_Name', 'City', 'Industy', 'Salary', 'Total_Experience',
    'Functional_Area', 'Sub_Functional_Area'
])

email_id = []

for records in reader:
    email_id.append(str(records[0]).strip())
print len(email_id)
print email_id[1:5]

data_user = monconn_users_static.find({'user_email': {'$in': email_id}})

sub_fa_lookup = mon_conn_sub_fa.find()
sub_fa = {}
for records in sub_fa_lookup:
    sub_fa[records['sfe']] = records['fe']

for records in data_user:
    try:
        email = records['user_email']
    except:
        email = ''
    try:
        user_sub_functionalarea = row.get(
            "user_functionalarea", "None").encode('ascii',
                                                  'ignore').decode('ascii')
예제 #8
0
'''ofile = open('/data/Projects/Cold_Calling/Pycode/consolidated_cold_calling_files_corrected.csv','wb')
writer = csv.writer(ofile)
writer.writerow(['Email','Candidate_Name','Phone','City','Industry','Functional_Area','Salary','Total_Experience','Sub_FA'])'''

ofile_1 = open('/data/Projects/Cold_Calling/Pycode/corrected_numbers.csv',
               'wb')
writer_1 = csv.writer(ofile_1)
writer_1.writerow(['Email', 'CellPhone'])

email_id = []

for records in reader:
    email_id.append(str(records[0]).strip())
print len(email_id)
print email_id[1:5]
required_data = mongo_conn.find({'e': {'$in': email_id}}, {'e': 1, 'cp': 1})

count = 0
for rows in required_data:
    #print rows
    email = rows['e']
    cellphone = rows['cp']
    writer_1.writerow([email, cellphone])
    count = count + 1
'''
    
    

df_1 = pd.read_csv('/data/Projects/Cold_Calling/Pycode/consolidated_cold_calling_files.csv')
df_2 = pd.read_csv('/data/Projects/Cold_Calling/Pycode/corrected_numbers.csv')