Example #1
0
from Metadata.DATA import data
FamilyID = data()["fid16"]
StillAlive = data()["alive_a16_p"]
# print(FamilyID[0])
# print(StillAlive[0])
# quit()
f = open(".\\HandledFile\\data.txt", 'r')
ff = f.readlines()
n = 2
# 注:储存的是FID16中家庭中只有一个人的数据,与下面的存储索引的列表区分
OnePeople = []
# 循环读取数据
for i in ff:
    try:
        if int(i[-3:]) != 1:
            pass
        else:
            OnePeople.append(int(eval(i[:-3])))
    except:
        pass
print("这是家庭中仅为一人的FID16编号:{0:}".format(OnePeople))
# 存储所有样本的的索引!注意 是索引!
Code = []
# 通过对编码进行索引获取样本死亡的索引,请注意是索引!
# 如果一个样本死亡,此列表将存储他的序列 并从Code中剔除
StateOfAlive = []
# 用来储存存活样本的索引!
FinalList = []
# print(OnePeople)

for i in OnePeople:
Example #2
0
from sklearn.cluster import KMeans
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from Metadata.DATA import data
FamilyID = data()["fid16"]
F = open("..\\HandledFile\\Age.txt", "r")
Age = F.readlines()
AgeList = []
for i in Age:
    if "E" in i:
        AgeList.append(0)
    else:
        AgeList.append(i[:-1])
AgeDict = {"Age": AgeList}
print(AgeDict)

UrbanList = []

F2 = open("..\\HandledFile\\Urban.txt", "r")
Urban = F2.readlines()
for i in Urban:
    if "U" in i:
        UrbanList.append(1)
    elif "R" in i:
        UrbanList.append(2)
    else:
        UrbanList.append(0)
UrbanDict = {"Urban": UrbanList}
print(UrbanDict)
Example #3
0
from Metadata.DATA import data
YearOfDeath = data()["ta4y_a16_p"]
YearOfBirth = data()["tb1y_a_p"]
n = 0
index = 0
YearOfLive = {}
error_num = 0
# 表示总存活时间,用于计算平均存活时间
Full = 0
for i in YearOfDeath:
    index += 1
    if isinstance(i, str):
        pass
    else:
        n += 1
        YearOfDeathCopy = YearOfDeath.to_numpy()[index - 1]
        YearOfBirthCopy = YearOfBirth.to_numpy()[index - 1]
        try:
            YearOfLives = YearOfDeathCopy - YearOfBirthCopy
            YearOfLive.update({index: YearOfLives})
        except:
            YearOfLive.update({index: "Error"})
            error_num += 1
print(n)
print(YearOfLive)
print("有{}个无法计算存活时间".format(error_num))
for values in YearOfLive.values():
    if values == "Error":
        pass
    else:
        Full += values
Example #4
0
"""
date:2019 03 07
author: 亓志国
本文件是对个体的最高受教育程度进行独热值编码,编码结果在HDE_Coding.txt文件中。
"""
from Metadata.DATA import data
from sklearn.preprocessing import OneHotEncoder
import os
data = data().dropna()
HighestDegreeOfEducation = data()['tb4_a16_p']
X = HighestDegreeOfEducation.to_numpy().reshape(-1, 1)
enc = OneHotEncoder(categories='auto').fit(X)
FeatureName = OneHotEncoder(categories='auto').fit(X).get_feature_names()
result = enc.transform(X).toarray()
File_state = os.listdir(path=".\\HandledFile\\")
if 'HDE_Coding.txt' in File_state:
    raise ValueError("文件已经存在!请删除HDE_Coding.txt文件重新运行。")
else:
    pass
f = open('HDE_Coding.txt', 'a')
f.write('tb4_a16_p[HighestDegreeOfEducation]' + '\n' + str(FeatureName) + '\n')
for i in result:
    f.write(str(i) + '\n')

f.close()
print("编码完毕,请查看HandledFile文件及下的HDE_Coding.txt文件。")
Example #5
0
"""
date:2019 03 10
author:亓志国
本文件是计算样本的年龄,计算结果在Age.txt文件中,供后续的使用。
"""
from Metadata.DATA import data
BirthOfYear = data()['tb1y_a_p']
OtherState = frozenset(["Not applicable", "Missing"])
AgeFile = open("..\\HandledFile\\Age.txt", "a")
# AgeFile.write("tb1y_a_p[BirthOfYear,Now There Are Information Of Age]" + "\n")
for _ in BirthOfYear:
    if str(_) in OtherState:
        AgeFile.write("Error\n")
    else:
        AgeFile.write(str(2019.0 - float(_)) + "\n")

print("计算完毕,如需查看请打开Age.txt文件进行查看。")
AgeFile.close()
Example #6
0
    "Senior high school/secondary school/technical school/vocational senior school":
    12,
    "3-year college":
    15,
    "4-year college":
    16,
    "Not applicable":
    0,
    "Master's degree":
    19,
    "Missing":
    0,
    "Unknown":
    0,
    "Doctoral degree":
    22,
    "Refuse":
    0
}
# 删除缺失值所在行的数据
HighestDegreeOfEducation = data()['tb4_a16_p']
F = HighestDegreeOfEducation.to_numpy()
# 将数据写入到txt文档中,方便后续的调用
HDE_File = open('..\\HandledFile\\HDE_Coding.txt', 'a')
# HDE_File.write('tb4_a16_p[HighestDegreeOfEducation]\n')
for i in F:
    HDE_File.write(str(int(Lables.get(i, 0))) + '\n')

HDE_File.close()
print('编码完成!')
Example #7
0
from Metadata.DATA import data
Urban = data()["fid_urban16"].to_numpy()
# print(Urban.value_counts())
# print(Urban[0])
UrbanFile = open(".\\HandledFile\\Urban.txt", "a")
for i in Urban:
    UrbanFile.write(i + "\n")

UrbanFile.close()