def allYears_df_name_salary_position():
    master_dict = {}
    file_names = [
        "sr96.txt",
        "sr97.txt",
        "sr98.txt",
        "sr99.txt",
        "sr00.txt",
        "sr01.txt",
        "sr02.txt",
        "sr03.txt",
        "sr04.txt",
        "sr05.txt",
        "sr06.txt",
        "sr07.txt",
        "sr08.txt",
        "sr09.txt",
        "sr10.txt",
        "sr11.txt",
        "sr12.txt",
        "sr13.txt",
        "sr14.txt",
    ]
    for i in range(len(file_names)):
        id_num = file_names[i].replace("sr", "")
        id_num = id_num.replace(".txt", "")
        # print id_num
        df = df_name_salary_position(id_num, True)
        # enter the key-value pair into the master dict
        master_dict[id_num] = df

        # return the master dict
    return master_dict
def make_csv_allData():
	# code for making the data
	# create the master dictionary - key is "96" of the year, and value is the dictionary
	master_dictionary = allYears_listData_dict()
	k_years = ["96", "97", "98", "99", "00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14"] 

	# test to see whats in master_dicitonary
	# pprint.pprint(master_dictionary[k_years[0]]) # returns a dictionary of lists *salary contains upl
	df_list = []
	for y in range(len(k_years)):
		year_data = master_dictionary[k_years[y]]
		df = df_name_salary_position(k_years[y], True)
		# check to see if all the lists are the same lengths:
		year_list = []
		if (len(year_data["salary_list"]) == len(year_data["employee_list"]) and len(year_data["position_list"])):
			year_str = k_years[y]
			if (int(year_str)<20):     # will have to change after 2020!
				year_str = "20" + year_str
			else:
				year_str = "19" + year_str
			# print year_str
			year_list = [year_str] * len(year_data["salary_list"])
		# pprint.pprint(year_list)
		# add the new column of the data to dataframe of name, position, salary
		df["Year"] = year_list
		df_list.append(df)
	# made a list of the data frame! time to concatonate it
	df_allData = pd.concat(df_list)
	# pprint.pprint(df_allData)

	# Make a csv of all the data!
	df_allData.to_csv(path_or_buf="uvm_employee_salary_data_1994-2014.csv", index=False, columns = ["Year", "Name", "Position", "Salary"])
def allYears_df_name_salary_position():
	master_dict = {}
	file_names = ["sr96.txt", "sr97.txt", "sr98.txt", "sr99.txt", "sr00.txt", "sr01.txt", "sr02.txt", "sr03.txt", "sr04.txt", "sr05.txt", "sr06.txt", "sr07.txt", "sr08.txt", "sr09.txt", "sr10.txt", "sr11.txt","sr12.txt", "sr13.txt", "sr14.txt"]
	for i in range(len(file_names)):
		id_num = file_names[i].replace("sr","")
		id_num = id_num.replace(".txt", "")
		# print id_num
		df = df_name_salary_position(id_num, True)
		#enter the key-value pair into the master dict
		master_dict[id_num] = df

	# return the master dict
	return master_dict
예제 #4
0
def make_csv_allData():
    # code for making the data
    # create the master dictionary - key is "96" of the year, and value is the dictionary
    master_dictionary = allYears_listData_dict()
    k_years = [
        "96", "97", "98", "99", "00", "01", "02", "03", "04", "05", "06", "07",
        "08", "09", "10", "11", "12", "13", "14"
    ]

    # test to see whats in master_dicitonary
    # pprint.pprint(master_dictionary[k_years[0]]) # returns a dictionary of lists *salary contains upl
    df_list = []
    for y in range(len(k_years)):
        year_data = master_dictionary[k_years[y]]
        df = df_name_salary_position(k_years[y], True)
        # check to see if all the lists are the same lengths:
        year_list = []
        if (len(year_data["salary_list"]) == len(year_data["employee_list"])
                and len(year_data["position_list"])):
            year_str = k_years[y]
            if (int(year_str) < 20):  # will have to change after 2020!
                year_str = "20" + year_str
            else:
                year_str = "19" + year_str
            # print year_str
            year_list = [year_str] * len(year_data["salary_list"])
        # pprint.pprint(year_list)
        # add the new column of the data to dataframe of name, position, salary
        df["Year"] = year_list
        df_list.append(df)
    # made a list of the data frame! time to concatonate it
    df_allData = pd.concat(df_list)
    # pprint.pprint(df_allData)

    # Make a csv of all the data!
    df_allData.to_csv(path_or_buf="uvm_employee_salary_data_1994-2014.csv",
                      index=False,
                      columns=["Year", "Name", "Position", "Salary"])