from Metadata.DATA import data FamilyID = data()["fid16"] StillAlive = data()["alive_a16_p"] # print(FamilyID[0]) # print(StillAlive[0]) # quit() f = open(".\\HandledFile\\data.txt", 'r') ff = f.readlines() n = 2 # 注:储存的是FID16中家庭中只有一个人的数据,与下面的存储索引的列表区分 OnePeople = [] # 循环读取数据 for i in ff: try: if int(i[-3:]) != 1: pass else: OnePeople.append(int(eval(i[:-3]))) except: pass print("这是家庭中仅为一人的FID16编号:{0:}".format(OnePeople)) # 存储所有样本的的索引!注意 是索引! Code = [] # 通过对编码进行索引获取样本死亡的索引,请注意是索引! # 如果一个样本死亡,此列表将存储他的序列 并从Code中剔除 StateOfAlive = [] # 用来储存存活样本的索引! FinalList = [] # print(OnePeople) for i in OnePeople:
from sklearn.cluster import KMeans import pandas as pd import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from Metadata.DATA import data FamilyID = data()["fid16"] F = open("..\\HandledFile\\Age.txt", "r") Age = F.readlines() AgeList = [] for i in Age: if "E" in i: AgeList.append(0) else: AgeList.append(i[:-1]) AgeDict = {"Age": AgeList} print(AgeDict) UrbanList = [] F2 = open("..\\HandledFile\\Urban.txt", "r") Urban = F2.readlines() for i in Urban: if "U" in i: UrbanList.append(1) elif "R" in i: UrbanList.append(2) else: UrbanList.append(0) UrbanDict = {"Urban": UrbanList} print(UrbanDict)
from Metadata.DATA import data YearOfDeath = data()["ta4y_a16_p"] YearOfBirth = data()["tb1y_a_p"] n = 0 index = 0 YearOfLive = {} error_num = 0 # 表示总存活时间,用于计算平均存活时间 Full = 0 for i in YearOfDeath: index += 1 if isinstance(i, str): pass else: n += 1 YearOfDeathCopy = YearOfDeath.to_numpy()[index - 1] YearOfBirthCopy = YearOfBirth.to_numpy()[index - 1] try: YearOfLives = YearOfDeathCopy - YearOfBirthCopy YearOfLive.update({index: YearOfLives}) except: YearOfLive.update({index: "Error"}) error_num += 1 print(n) print(YearOfLive) print("有{}个无法计算存活时间".format(error_num)) for values in YearOfLive.values(): if values == "Error": pass else: Full += values
""" date:2019 03 07 author: 亓志国 本文件是对个体的最高受教育程度进行独热值编码,编码结果在HDE_Coding.txt文件中。 """ from Metadata.DATA import data from sklearn.preprocessing import OneHotEncoder import os data = data().dropna() HighestDegreeOfEducation = data()['tb4_a16_p'] X = HighestDegreeOfEducation.to_numpy().reshape(-1, 1) enc = OneHotEncoder(categories='auto').fit(X) FeatureName = OneHotEncoder(categories='auto').fit(X).get_feature_names() result = enc.transform(X).toarray() File_state = os.listdir(path=".\\HandledFile\\") if 'HDE_Coding.txt' in File_state: raise ValueError("文件已经存在!请删除HDE_Coding.txt文件重新运行。") else: pass f = open('HDE_Coding.txt', 'a') f.write('tb4_a16_p[HighestDegreeOfEducation]' + '\n' + str(FeatureName) + '\n') for i in result: f.write(str(i) + '\n') f.close() print("编码完毕,请查看HandledFile文件及下的HDE_Coding.txt文件。")
""" date:2019 03 10 author:亓志国 本文件是计算样本的年龄,计算结果在Age.txt文件中,供后续的使用。 """ from Metadata.DATA import data BirthOfYear = data()['tb1y_a_p'] OtherState = frozenset(["Not applicable", "Missing"]) AgeFile = open("..\\HandledFile\\Age.txt", "a") # AgeFile.write("tb1y_a_p[BirthOfYear,Now There Are Information Of Age]" + "\n") for _ in BirthOfYear: if str(_) in OtherState: AgeFile.write("Error\n") else: AgeFile.write(str(2019.0 - float(_)) + "\n") print("计算完毕,如需查看请打开Age.txt文件进行查看。") AgeFile.close()
"Senior high school/secondary school/technical school/vocational senior school": 12, "3-year college": 15, "4-year college": 16, "Not applicable": 0, "Master's degree": 19, "Missing": 0, "Unknown": 0, "Doctoral degree": 22, "Refuse": 0 } # 删除缺失值所在行的数据 HighestDegreeOfEducation = data()['tb4_a16_p'] F = HighestDegreeOfEducation.to_numpy() # 将数据写入到txt文档中,方便后续的调用 HDE_File = open('..\\HandledFile\\HDE_Coding.txt', 'a') # HDE_File.write('tb4_a16_p[HighestDegreeOfEducation]\n') for i in F: HDE_File.write(str(int(Lables.get(i, 0))) + '\n') HDE_File.close() print('编码完成!')
from Metadata.DATA import data Urban = data()["fid_urban16"].to_numpy() # print(Urban.value_counts()) # print(Urban[0]) UrbanFile = open(".\\HandledFile\\Urban.txt", "a") for i in Urban: UrbanFile.write(i + "\n") UrbanFile.close()