Exemple #1
0
    def get_start_end_date_of_curr_movement_db(self, employer_name, list_employers_of_candidate):
        # index_start_date_movement = 0
        # index_end_date_movement = 0
        cleanDate = CleanupDateTime()
        new_start_date = None
        new_end_date = None
        try :
            if (not isinstance(list_employers_of_candidate, pd.DataFrame)):
                raise AttributeError
            new_end_date = cleanDate.validateDateTime(list_employers_of_candidate.get_value(0, templData.db_col_end_date))
            for row_index, row in  list_employers_of_candidate.iterrows():
                if (list_employers_of_candidate.get_value(row_index, templData.db_col_employer_name).upper() == employer_name.upper()):
                    start_date_curr = cleanDate.validateDateTime(list_employers_of_candidate.get_value(row_index, templData.db_col_start_date))
                    end_date_past = cleanDate.validateDateTime(list_employers_of_candidate.get_value(row_index + 1, templData.db_col_end_date))
                    if (not None in (start_date_curr, end_date_past)):
                        diff_date = (strToDate(start_date_curr) - strToDate(end_date_past)).days
                        if diff_date >= 45 :
                            # index_start_date_movement = row_index
                            new_start_date = start_date_curr
                            break
                    else:
                        # index_start_date_movement = row_index
                        new_start_date = start_date_curr
                        break

        except AttributeError as err:
            print err
            print "Input  Employers_data_of_candidate  is a dataframe"
        except ValueError as err:
            print err

        return (str(new_start_date), str(new_end_date))
    def cal_avg_time_movement_db(self, list_employers_of_candidate):
        new_data = list_employers_of_candidate
        try:
            cleanDate = CleanupDateTime()
            # Clean up Data
            cleaned_data = cleanDate.cleanDateTimeOfDBRawData(
                list_employers_of_candidate)
            # Calculate current time of each employer
            new_data = self.cal_curr_time_employer_db(cleaned_data)
            new_Avg_data = new_data.groupby(
                templData.db_col_candidate_id, as_index=False).agg(
                    {templData.temp_col_current_job_years: numpy.mean})
            new_Avg_data = new_Avg_data.rename(columns={
                templData.temp_col_current_job_years:
                templData.temp_col_avg_time
            })
            new_data = pd.merge(new_data,
                                new_Avg_data,
                                how='left',
                                on=[
                                    templData.db_col_candidate_id,
                                    templData.db_col_candidate_id
                                ])
        except AttributeError as err:
            print err

        return new_data
Exemple #3
0
    def get_start_end_date_of_curr_movement_db(self, employer_name, list_employers_of_candidate):
        # index_start_date_movement = 0
        # index_end_date_movement = 0
        cleanDate = CleanupDateTime()
        new_start_date = None
        new_end_date = None
        try :
            if (not isinstance(list_employers_of_candidate, pd.DataFrame)):
                raise AttributeError
            new_end_date = cleanDate.validateDateTime(list_employers_of_candidate.get_value(0, templData.db_col_end_date))
            for row_index, row in  list_employers_of_candidate.iterrows():
                if (list_employers_of_candidate.get_value(row_index, templData.db_col_employer_name).upper() == employer_name.upper()):
                    start_date_curr = cleanDate.validateDateTime(list_employers_of_candidate.get_value(row_index, templData.db_col_start_date))
                    end_date_past = cleanDate.validateDateTime(list_employers_of_candidate.get_value(row_index + 1, templData.db_col_end_date))
                    if (not None in (start_date_curr, end_date_past)):
                        diff_date = (strToDate(start_date_curr) - strToDate(end_date_past)).days
                        if diff_date >= 45 :
                            # index_start_date_movement = row_index
                            new_start_date = start_date_curr
                            break
                    else:
                        # index_start_date_movement = row_index
                        new_start_date = start_date_curr
                        break

        except AttributeError as err:
            print err
            print "Input  Employers_data_of_candidate  is a dataframe"
        except ValueError as err:
            print err

        return (str(new_start_date), str(new_end_date))
    def cal_avg_time_movement_db(self, list_employers_of_candidate):
        new_data = list_employers_of_candidate
        try :
            cleanDate = CleanupDateTime()
            # Clean up Data
            cleaned_data  = cleanDate.cleanDateTimeOfDBRawData(list_employers_of_candidate)
            # Calculate current time of each employer
            new_data = self.cal_curr_time_employer_db(cleaned_data)
            new_Avg_data= new_data.groupby(templData.db_col_candidate_id, as_index= False).agg({
                templData.temp_col_current_job_years: numpy.mean})
            new_Avg_data = new_Avg_data.rename(columns={templData.temp_col_current_job_years:templData.temp_col_avg_time})
            new_data = pd.merge(new_data, new_Avg_data, how='left', on=[templData.db_col_candidate_id, templData.db_col_candidate_id])
        except AttributeError as err:
            print err

        return new_data
Exemple #5
0
from src.cleanup.CleanupDateTime import CleanupDateTime
from src.cleanup.CleanMovement import CleanMovement
import  pandas as pd
import src.const.TemplateData as templData
import src.process.WorkingTimeCalculation as w

cl = CleanupDateTime()
test_data = "../data/test/data_test_group_by_employer.csv"
sample_data = pd.read_csv(test_data)
print "111"
# print sample_data
# print  sample_data[templData.col_start_date][1]
# a = cl.cleanDateTimeOfRawData(sample_data)
# a = cl.validateDateTime("1995-05-01")
# print a
cm = CleanMovement()
# employer_name = "Epson Electronics America"
# # employer_data = a.ix[employer_name]
# b = cm.get_start_end_date_of_curr_movement_db(employer_name, sample_data)
# print b

a = w.WorkingTimeCalculation()
print type(sample_data)
test = a.cal_avg_time_movement_db(sample_data)
print test



Exemple #6
0
from src.cleanup.CleanupDateTime import CleanupDateTime
from src.cleanup.CleanMovement import CleanMovement
import  pandas as pd
import src.const.TemplateData as templData
# import src.process.WorkingTimeCalculation as w

cl = CleanupDateTime()
# test_data = "../data/test/data_test_group_by_employer.csv"
# sample_data = pd.read_csv(test_data)
# print "111"
# print sample_data
# print  sample_data[templData.col_start_date][1]
# a = cl.cleanDateTimeOfRawData(sample_data)
a = cl.validateDateTime("July 2013")
# a = cl.check_format("August 2013")
print a
# cm = CleanMovement()
# employer_name = "Epson Electronics America"
# # employer_data = a.ix[employer_name]
# b = cm.get_start_end_date_of_curr_movement_db(employer_name, sample_data)
# print b
#
# a = w.WorkingTimeCalculation()
# print type(sample_data)
# test = a.cal_avg_time_movement_db(sample_data)
# print test