def allYears_df_name_salary_position(): master_dict = {} file_names = [ "sr96.txt", "sr97.txt", "sr98.txt", "sr99.txt", "sr00.txt", "sr01.txt", "sr02.txt", "sr03.txt", "sr04.txt", "sr05.txt", "sr06.txt", "sr07.txt", "sr08.txt", "sr09.txt", "sr10.txt", "sr11.txt", "sr12.txt", "sr13.txt", "sr14.txt", ] for i in range(len(file_names)): id_num = file_names[i].replace("sr", "") id_num = id_num.replace(".txt", "") # print id_num df = df_name_salary_position(id_num, True) # enter the key-value pair into the master dict master_dict[id_num] = df # return the master dict return master_dict
def make_csv_allData(): # code for making the data # create the master dictionary - key is "96" of the year, and value is the dictionary master_dictionary = allYears_listData_dict() k_years = ["96", "97", "98", "99", "00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14"] # test to see whats in master_dicitonary # pprint.pprint(master_dictionary[k_years[0]]) # returns a dictionary of lists *salary contains upl df_list = [] for y in range(len(k_years)): year_data = master_dictionary[k_years[y]] df = df_name_salary_position(k_years[y], True) # check to see if all the lists are the same lengths: year_list = [] if (len(year_data["salary_list"]) == len(year_data["employee_list"]) and len(year_data["position_list"])): year_str = k_years[y] if (int(year_str)<20): # will have to change after 2020! year_str = "20" + year_str else: year_str = "19" + year_str # print year_str year_list = [year_str] * len(year_data["salary_list"]) # pprint.pprint(year_list) # add the new column of the data to dataframe of name, position, salary df["Year"] = year_list df_list.append(df) # made a list of the data frame! time to concatonate it df_allData = pd.concat(df_list) # pprint.pprint(df_allData) # Make a csv of all the data! df_allData.to_csv(path_or_buf="uvm_employee_salary_data_1994-2014.csv", index=False, columns = ["Year", "Name", "Position", "Salary"])
def allYears_df_name_salary_position(): master_dict = {} file_names = ["sr96.txt", "sr97.txt", "sr98.txt", "sr99.txt", "sr00.txt", "sr01.txt", "sr02.txt", "sr03.txt", "sr04.txt", "sr05.txt", "sr06.txt", "sr07.txt", "sr08.txt", "sr09.txt", "sr10.txt", "sr11.txt","sr12.txt", "sr13.txt", "sr14.txt"] for i in range(len(file_names)): id_num = file_names[i].replace("sr","") id_num = id_num.replace(".txt", "") # print id_num df = df_name_salary_position(id_num, True) #enter the key-value pair into the master dict master_dict[id_num] = df # return the master dict return master_dict
def make_csv_allData(): # code for making the data # create the master dictionary - key is "96" of the year, and value is the dictionary master_dictionary = allYears_listData_dict() k_years = [ "96", "97", "98", "99", "00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14" ] # test to see whats in master_dicitonary # pprint.pprint(master_dictionary[k_years[0]]) # returns a dictionary of lists *salary contains upl df_list = [] for y in range(len(k_years)): year_data = master_dictionary[k_years[y]] df = df_name_salary_position(k_years[y], True) # check to see if all the lists are the same lengths: year_list = [] if (len(year_data["salary_list"]) == len(year_data["employee_list"]) and len(year_data["position_list"])): year_str = k_years[y] if (int(year_str) < 20): # will have to change after 2020! year_str = "20" + year_str else: year_str = "19" + year_str # print year_str year_list = [year_str] * len(year_data["salary_list"]) # pprint.pprint(year_list) # add the new column of the data to dataframe of name, position, salary df["Year"] = year_list df_list.append(df) # made a list of the data frame! time to concatonate it df_allData = pd.concat(df_list) # pprint.pprint(df_allData) # Make a csv of all the data! df_allData.to_csv(path_or_buf="uvm_employee_salary_data_1994-2014.csv", index=False, columns=["Year", "Name", "Position", "Salary"])