except: monconn_users_static = MongoConnect('CandidateStatic', host='172.22.65.157', port=27017, database='sumoplus', username=username, password=password, authenticate=True).getCursor() #data_new_registrations = monconn_users_static.find({'$or': [{'red':{'$gt':date_object,'$lt':custom_date_object}}, {'rsd':{'$gt':date_object,'$lt':custom_date_object}}],'red':{'$lt':custom_date_object}}) data_new_registrations = monconn_users_static.find({ '$or': [{ 'red': { '$gt': date_object, '$lt': current_date_object } }, { 'rsd': { '$gt': date_object, '$lt': current_date_object } }] }) #data_new_registrations = monconn_users_static.find({'$or': [{'red':{'$gt':date_object,'$lt':custom_date_object}}, {'rsd':{'$gt':date_object,'$lt':custom_date_object}}]}) #data_new_registrations = monconn_users_static.find({'$or': [{'red':{'$gt':date_object,'$lt':date_last}}, {'rsd':{'$gt':date_object,'$lt':date_last}}]}) #monconn_users_local = MongoConnect('candidates_processed_4', host = '172.22.66.233', database = 'JobAlerts').getCursor() monconn_users_local = MongoConnect('candidates_processed_4', host='172.22.66.198', database='JobAlerts').getCursor( ) ### Change Done on "03-August-2017### cumulative_registration_file = open(
def getedu_details(): ######### Creating Mongo Cursors######### ######################################### monconn_users_edu = MongoConnect('CandidateEducation', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() lookup_educationstudy = MongoConnect('LookupEducationStream', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() lookup_institute = MongoConnect('LookupEducationInstitute', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() ###### Creating Study Field Dict ######## ######################################### Study_Field = lookup_educationstudy.find({}, {'si': 1, 'sd': 1}) study_field_dict = {} for records in Study_Field: study_field_dict[records['si']] = records['sd'] ###### Creating Institute Dict ########### ########################################## institute_dict = {} Institute_Name = lookup_institute.find({}, {'asi': 1, 'asd': 1}) for records in Institute_Name: institute_dict[records['asi']] = records['asd'] ifile = open('/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Data.csv', 'rb') #### Loading Candidate Level csv File ###### reader = csv.reader(ifile) reader.next() ofile = open( '/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Edu_Data.csv', 'wb') writer = csv.writer(ofile) writer.writerow([ 'user_id', 'institute', 'institute_id', 'stream', 'stream_id', 'course_type', 'course_type_id', 'most_recent' ]) try: for records in reader: try: required_data = monconn_users_edu.find( {'fcu': str(records[0])}) for data in required_data: user_id = data.get('fcu', '') if data.has_key('ins') == True and data.get( 'ins', '') is not None: institute = institute_dict[data['ins']].encode( 'utf8', 'ignore').encode('utf-8') else: institute = data.get('inc').encode( 'utf8', 'ignore').encode('utf-8') ins_id = data.get('ins', '') stream = study_field_dict[data.get('el')] stream_id = data.get('el', '') course_type_id = data.get('ct', '') if course_type_id == 1: course_type = 'Full Time' if course_type_id == 2: course_type = 'Part Time' if course_type_id == 3: course_type = 'Correspondence' mr = data.get('mr', '') writer.writerow([ user_id, institute, ins_id, stream, stream_id, course_type, course_type_id, mr ]) except: user_id = records[0] institute = '' ins_id = '' stream = '' stream_id = '' course_type = '' course_type_id = '' mr = '' writer.writerow([ user_id, institute, ins_id, stream, stream_id, course_type, course_type_id, mr ]) except: print records[0] ofile.close() df = pd.read_csv( '/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Edu_Data.csv') ########Imputing Missing Value of "mr" field with -100 ####################### ############################################################################## df[['most_recent']] = df[['most_recent']].fillna(value=-100) ##### Sorting Dataframe ascending on user id and descending on mr field ###### ############################################################################## df_1 = df.sort(['user_id', 'most_recent'], ascending=[1, 0]) ##### Grouping on User_Id Level to Fetch Latest Institute of Candidate ####### ############################################################################## df_2 = df_1.groupby( 'user_id', group_keys=False).apply(lambda x: x.ix[x.most_recent.idxmax()]) df_3 = df_2[[ 'user_id', 'institute', 'institute_id', 'stream', 'stream_id', 'course_type', 'course_type_id', 'most_recent' ]] df_3.to_csv( '/data/Projects/Salary_Tool_HT_Campus/Output/Institute_Level_Data.csv')
def salary_data(): date1 = datetime.now() - timedelta(days=183) print datetime.now() print date1 ofile = open('/data/Projects/Salary_Tool_HT_Campus/Output/Cand_Data.csv', 'w') writer = csv.writer(ofile) writer.writerow([ 'user_id', 'specialization', 'specialization_id', 'total_exp_months', 'city', 'city_id', 'industry', 'industry_id', 'company', 'company_id', 'salary_lacs', 'job_title' ]) ###### Loading Mongo Cursors ############# ########################################## mongo_conn = getMongoMaster() collection = getattr(mongo_conn, "candidates_processed_4") lookup_industry = MongoConnect('LookupIndustry', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() lookup_company = MongoConnect('LookupCompanyName', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() ###### Creating Industry Dict############# ########################################## industry_dict = {} Industry_Name = lookup_industry.find({}, {'ii': 1, 'idesc': 1}) for records in Industry_Name: industry_dict[records['idesc']] = records['ii'] ####### Creating Specialization Dict########### ############################################### specialization_dict = {} ifile = open( '/data/Projects/Salary_Tool_HT_Campus/Output/Specilization.csv', 'rb') reader = csv.reader(ifile) for records in reader: specialization_dict[records[0].strip()] = records[1] ####### Creating Company Dict ############ ########################################## company_dict = {} Company_Name = lookup_company.find({}, {'v': 1, 'd': 1}) for records in Company_Name: company_dict[records['d']] = records['v'] ######Fetching Last Six Months Active Cands############# ######################################################## required_data = collection.find({ 'user_lastlogin': { '$gt': str(date1) } }).limit(100000) #required_data = collection.find({'_id':'10000083'}) try: for data in required_data: try: user_id = data.get('_id', '') except: user try: specialization = str(data.get('user_edu_special', '')) print specialization except: specialization = '' try: specialization_id = specialization_dict[str( data.get('user_edu_special', ''))] print specialization_id except: specialization_id = '' try: total_exp = str(data.get('user_experience', '')) total_exp = re.split('Yrs|Yr|Months|Month', total_exp) exp_yrs = int(str(total_exp[0]).strip()) except: exp_yrs = 0 try: exp_months = int(str(total_exp[1]).strip()) except: exp_months = 0 total_exp_months = exp_yrs * 12 + exp_months try: city = data.get('user_location', '') city = str(city[0]) except: city = '' try: city_id = data.get('user_location_id', '') except: city_id = '' try: industry = data.get('user_industry') except: industry = '' try: industry_id = industry_dict[data.get('user_industry')] except: industry_id = '' try: company = str(data.get('user_current_company', '')).title() except: company = '' try: company_id = company_dict[str( data.get('user_current_company', '')).title()] except: company_id = '' try: salary = str(data.get('user_ctc', '')) salary = re.split('-|Lakh', salary) salary = str(salary[1]).strip() except: salary = '' try: job_title = str(data.get('user_jobtitle', '')).title() except: job_title = '' writer.writerow([ user_id, specialization, specialization_id, total_exp_months, city, city_id, industry, industry_id, company, company_id, salary, job_title ]) except: print user_id, specialization, specialization_id, total_exp_months, city, city_id, industry, industry_id, company, company_id, salary, job_title ofile.close()
writer = csv.writer(output, lineterminator='\n') i=0 user_email_list = [] date1 = datetime.now() - timedelta(days= 2) date1 = date1.isoformat() print date1 monconn_users_static = MongoConnect('candidates_processed_4', host = 'localhost', database = 'JobAlerts').getCursor() mon_conn_sub_fa = MongoConnect('LookupSubFunctionalArea', host = '172.22.65.88', port = 27018,database = 'sumoplus',username= username,password = password,authenticate = True).getCursor() print 'Mongo_Connected',monconn_users_static data_user = monconn_users_static.find({'user_lastlogin':{'$gt':date1}}) data_user_1 = monconn_users_static.find({'user_lastlogin':{'$gt':date1}}).count() sub_fa_lookup = mon_conn_sub_fa.find() sub_fa = {} for records in sub_fa_lookup: sub_fa[records['sfe']] = records['fe'] print 'Candidates_picked:',str(data_user_1) writer.writerow(["Email",'Candidate_Name','Phone','City','cpv','applications','edu_qual','loc_id','Total_Experience','Industry','Salary','Functional_Area','last_login','Sub_FA']) count = 0 for row in data_user : #print row count += 1 id = row.get('_id',0)
monconn_users_static = MongoConnect('CandidateStatic', host='172.22.65.88', port=27018, database='sumoplus', username=username, password=password, authenticate=True).getCursor() j = 0 while True: emails_list = user_email_list[j:j + 5000] j = j + 5000 print j data_user = monconn_users_static.find({'e': {'$in': emails_list}}) #data_user = monconn_users_static.find({'ut':1,'rsd':{'$gt':date1}},{'_id':1,'ut':1,'red':1,'rsd':1}) count = 0 for row in data_user: count += 1 #if count%5000 == 0: # print count id = row.get('_id', 0) #st = row.get('st',0) #sl = row.get('sl',0) e = row.get('e', 0) ut = row.get('ut', None) red = row.get('red', None) rsd = row.get('rsd', None)
data_user = monconn_users_static.find({ '$and': [{ 'rm': 1 }, { 'mo': { '$in': [0, 2] } }, { 'eas': { '$in': [1, 3] } }, { 'bs': { '$ne': 1 } }, { 'ss': { '$ne': 1 } }, { 'll': { '$gt': date1 } }, { '_id': { '$gt': id } }] }).sort('_id').limit(1000) print id
ofile = open('/data/Projects/Cold_Calling/Pycode/email_fa_mapping.csv', 'wb') writer = csv.writer(ofile) writer.writerow([ 'Email', 'Candidate_Name', 'City', 'Industy', 'Salary', 'Total_Experience', 'Functional_Area', 'Sub_Functional_Area' ]) email_id = [] for records in reader: email_id.append(str(records[0]).strip()) print len(email_id) print email_id[1:5] data_user = monconn_users_static.find({'user_email': {'$in': email_id}}) sub_fa_lookup = mon_conn_sub_fa.find() sub_fa = {} for records in sub_fa_lookup: sub_fa[records['sfe']] = records['fe'] for records in data_user: try: email = records['user_email'] except: email = '' try: user_sub_functionalarea = row.get( "user_functionalarea", "None").encode('ascii', 'ignore').decode('ascii')
'''ofile = open('/data/Projects/Cold_Calling/Pycode/consolidated_cold_calling_files_corrected.csv','wb') writer = csv.writer(ofile) writer.writerow(['Email','Candidate_Name','Phone','City','Industry','Functional_Area','Salary','Total_Experience','Sub_FA'])''' ofile_1 = open('/data/Projects/Cold_Calling/Pycode/corrected_numbers.csv', 'wb') writer_1 = csv.writer(ofile_1) writer_1.writerow(['Email', 'CellPhone']) email_id = [] for records in reader: email_id.append(str(records[0]).strip()) print len(email_id) print email_id[1:5] required_data = mongo_conn.find({'e': {'$in': email_id}}, {'e': 1, 'cp': 1}) count = 0 for rows in required_data: #print rows email = rows['e'] cellphone = rows['cp'] writer_1.writerow([email, cellphone]) count = count + 1 ''' df_1 = pd.read_csv('/data/Projects/Cold_Calling/Pycode/consolidated_cold_calling_files.csv') df_2 = pd.read_csv('/data/Projects/Cold_Calling/Pycode/corrected_numbers.csv')