def get_start_end_date_of_curr_movement_db(self, employer_name, list_employers_of_candidate): # index_start_date_movement = 0 # index_end_date_movement = 0 cleanDate = CleanupDateTime() new_start_date = None new_end_date = None try : if (not isinstance(list_employers_of_candidate, pd.DataFrame)): raise AttributeError new_end_date = cleanDate.validateDateTime(list_employers_of_candidate.get_value(0, templData.db_col_end_date)) for row_index, row in list_employers_of_candidate.iterrows(): if (list_employers_of_candidate.get_value(row_index, templData.db_col_employer_name).upper() == employer_name.upper()): start_date_curr = cleanDate.validateDateTime(list_employers_of_candidate.get_value(row_index, templData.db_col_start_date)) end_date_past = cleanDate.validateDateTime(list_employers_of_candidate.get_value(row_index + 1, templData.db_col_end_date)) if (not None in (start_date_curr, end_date_past)): diff_date = (strToDate(start_date_curr) - strToDate(end_date_past)).days if diff_date >= 45 : # index_start_date_movement = row_index new_start_date = start_date_curr break else: # index_start_date_movement = row_index new_start_date = start_date_curr break except AttributeError as err: print err print "Input Employers_data_of_candidate is a dataframe" except ValueError as err: print err return (str(new_start_date), str(new_end_date))
def cal_avg_time_movement_db(self, list_employers_of_candidate): new_data = list_employers_of_candidate try: cleanDate = CleanupDateTime() # Clean up Data cleaned_data = cleanDate.cleanDateTimeOfDBRawData( list_employers_of_candidate) # Calculate current time of each employer new_data = self.cal_curr_time_employer_db(cleaned_data) new_Avg_data = new_data.groupby( templData.db_col_candidate_id, as_index=False).agg( {templData.temp_col_current_job_years: numpy.mean}) new_Avg_data = new_Avg_data.rename(columns={ templData.temp_col_current_job_years: templData.temp_col_avg_time }) new_data = pd.merge(new_data, new_Avg_data, how='left', on=[ templData.db_col_candidate_id, templData.db_col_candidate_id ]) except AttributeError as err: print err return new_data
def cal_avg_time_movement_db(self, list_employers_of_candidate): new_data = list_employers_of_candidate try : cleanDate = CleanupDateTime() # Clean up Data cleaned_data = cleanDate.cleanDateTimeOfDBRawData(list_employers_of_candidate) # Calculate current time of each employer new_data = self.cal_curr_time_employer_db(cleaned_data) new_Avg_data= new_data.groupby(templData.db_col_candidate_id, as_index= False).agg({ templData.temp_col_current_job_years: numpy.mean}) new_Avg_data = new_Avg_data.rename(columns={templData.temp_col_current_job_years:templData.temp_col_avg_time}) new_data = pd.merge(new_data, new_Avg_data, how='left', on=[templData.db_col_candidate_id, templData.db_col_candidate_id]) except AttributeError as err: print err return new_data
from src.cleanup.CleanupDateTime import CleanupDateTime from src.cleanup.CleanMovement import CleanMovement import pandas as pd import src.const.TemplateData as templData import src.process.WorkingTimeCalculation as w cl = CleanupDateTime() test_data = "../data/test/data_test_group_by_employer.csv" sample_data = pd.read_csv(test_data) print "111" # print sample_data # print sample_data[templData.col_start_date][1] # a = cl.cleanDateTimeOfRawData(sample_data) # a = cl.validateDateTime("1995-05-01") # print a cm = CleanMovement() # employer_name = "Epson Electronics America" # # employer_data = a.ix[employer_name] # b = cm.get_start_end_date_of_curr_movement_db(employer_name, sample_data) # print b a = w.WorkingTimeCalculation() print type(sample_data) test = a.cal_avg_time_movement_db(sample_data) print test
from src.cleanup.CleanupDateTime import CleanupDateTime from src.cleanup.CleanMovement import CleanMovement import pandas as pd import src.const.TemplateData as templData # import src.process.WorkingTimeCalculation as w cl = CleanupDateTime() # test_data = "../data/test/data_test_group_by_employer.csv" # sample_data = pd.read_csv(test_data) # print "111" # print sample_data # print sample_data[templData.col_start_date][1] # a = cl.cleanDateTimeOfRawData(sample_data) a = cl.validateDateTime("July 2013") # a = cl.check_format("August 2013") print a # cm = CleanMovement() # employer_name = "Epson Electronics America" # # employer_data = a.ix[employer_name] # b = cm.get_start_end_date_of_curr_movement_db(employer_name, sample_data) # print b # # a = w.WorkingTimeCalculation() # print type(sample_data) # test = a.cal_avg_time_movement_db(sample_data) # print test