Ejemplo n.º 1
0
print(text)

columns = [
    "byr",# (Birth Year)",
    "iyr",#  (Issue Year)",
    "eyr",#  (Expiration Year)",
    "hgt",#  (Height)",
    "hcl",#  (Hair Color)",
    "ecl",#  (Eye Color)",
    "pid",#  (Passport ID)",
    "cid"]#  (Country ID)"

data=pd.read_csv(data, names= ["Raw", "byr","iyr","eyr","hgt","hcl","ecl","pid","cid"], header=None)

def datacheck(check):
    if check.find(col) > 0:
        #magic = r'\b'+col+'\S*'  
        magic = col+'\S+'   
        return str(re.search(magic, check)[0]).replace(col+":","")
    else:
        return 
for col in columns:
    data[col]=data.apply(lambda row: datacheck(row.Raw), axis=1)

data = data.drop(columns=["Raw"])

#print(data)

data['missing'] = data.apply(lambda x: 8-x.count(), axis=1)
print(data['missing'].value_counts())
print(data)