Esempio n. 1
0
def _save_wp_as_csv(env_path, out_file_path, wp_ids):
    in_file_path = env_path + '/workplaces.csv'
    ids = set()
    columns = 8
    with open(out_file_path, 'w') as fout:
        print('writing', os.path.abspath(out_file_path))
        with open(in_file_path) as fin:
            print('reading', in_file_path)
            for raw in fin:
                line = raw.strip('\n')
                cells = line.split(',')
                wkb_hex = str(cells[5][1:-1])
                wp_id = cells[1]
                if wkb_hex == 'wkb_geometry':
                    row = 'made-longitude,made-latitude,' + line
                    aid.write_and_check_columns(fout, row, columns)
                elif wp_id in wp_ids:
                    row = wp.to_long_lat_from_hex(wkb_hex) + ',' + line
                    aid.write_and_check_columns(fout, row, columns)
                ids.add(wp_id)
    difference = wp_ids.difference(ids)
    difference.discard('')
    difference.discard('workplace_id')
    if difference:
        raise Exception(difference, "are not found!")
Esempio n. 2
0
def _save_pp_as_csv(in_file_paths, pp_path, gq_pp_path):
    type_column = 1
    hid_column = 7
    age_column = 14
    relp_column = 17
    school_column = 26
    workplace_column = 27
    columns = 29
    hid2cnt = {}
    hids = set()
    wp_ids = set()
    sc_ids = set()
    aid.mkdir(pp_path)
    aid.mkdir(gq_pp_path)
    with open(pp_path, 'w') as pp_csv, open(gq_pp_path, 'w') as gq_pp_csv:
        print('writing', os.path.abspath(pp_path), os.path.abspath(gq_pp_path))
        csvs = [pp_csv, gq_pp_csv]
        file_count = 0
        for in_file_path in in_file_paths:
            with open(in_file_path, 'r') as fin:
                print('reading', in_file_path)
                for raw in fin:
                    line = raw.strip('\n')
                    is_header = line.startswith('RT')
                    if is_header:
                        file_count += 1
                        if file_count > 1:
                            continue
                        row = line + ',made-sporder'
                        for csv in csvs:
                            aid.write_and_check_columns(csv, row, columns)
                        continue
                    cells = line.split(',')
                    school_id = cells[school_column]
                    age = cells[age_column]
                    if school_id:
                        if int(age) > 19:
                            print('Warning: too old at age of', age,
                                  'to go to school ID =', school_id, ':', line)
                            # continue
                    sc_ids.add(school_id)
                    hid = cells[hid_column]
                    if cells[relp_column] == '0':
                        hids.add(hid)
                    order = hid2cnt.get(hid, 0)
                    order += 1
                    hid2cnt[hid] = order
                    workplace_id = cells[workplace_column]
                    wp_ids.add(workplace_id)
                    csv = pp_csv if cells[type_column] == '1' else gq_pp_csv
                    row = line + ',' + str(order)
                    aid.write_and_check_columns(csv, row, columns)
    return hids, sc_ids, wp_ids
Esempio n. 3
0
def _save_hh_as_csv(in_file_paths, hid2cnt, hh_path, gq_path):
    """  0 SERIALNO,puma_id,place_id,SYNTHETIC_HID,longitude,
         5 latitude,AGEGRP,HRSWRK,IMMSTAT,INCTAX,
        10 MODE,OCC,POB,RELIGION,SEX,
        15 SYNTHETIC_PID """
    hid_column = 3
    more_header = 'made-empty,made-persons'
    columns = 18
    aid.mkdir(hh_path)
    with open(hh_path, 'w') as hh_csv, open(gq_path, 'w') as gq_csv:
        print('writing', os.path.abspath(hh_path), os.path.abspath(gq_path))
        file_count = 0
        hids = set()
        for in_file_path in in_file_paths:
            print('reading', in_file_path)
            with open(in_file_path, 'r') as fin:
                for raw in fin:
                    line = raw.strip('\n')
                    if line.startswith('SERIALNO'):
                        file_count += 1
                        if file_count > 1:
                            continue
                        row = line + ',' + more_header
                        aid.write_and_check_columns(hh_csv, row, columns)
                        aid.write_and_check_columns(gq_csv, row, columns)
                    else:
                        cells = line.split(',')
                        hid = cells[hid_column]
                        if hid not in hids:
                            hids.add(hid)
                            row = line + ',' + ',' + str(hid2cnt[hid])
                            aid.write_and_check_columns(hh_csv, row, columns)
Esempio n. 4
0
def _save_hh_as_csv(in_file_paths, hh_path, gq_path):
    type_column = 1
    relp_column = 17
    columns = 29
    aid.mkdir(hh_path)
    aid.mkdir(gq_path)
    with open(hh_path, 'w') as hh_csv, open(gq_path, 'w') as gq_csv:
        print('writing', os.path.abspath(hh_path), os.path.abspath(gq_path))
        file_count = 0
        for in_file_path in in_file_paths:
            print('reading', in_file_path)
            with open(in_file_path, 'r') as fin:
                for raw in fin:
                    line = raw.strip('\n')
                    cells = line.split(',')
                    relate = cells[relp_column]
                    is_header = relate == 'RELP'
                    if is_header:
                        file_count += 1
                        if file_count == 1:
                            row = line + ',made-gq_type'
                            aid.write_and_check_columns(hh_csv, row, columns)
                            aid.write_and_check_columns(gq_csv, row, columns)
                        continue
                    if relate == '0':
                        row = line + ','
                        csv = hh_csv if cells[type_column] == '1' else gq_csv
                        aid.write_and_check_columns(csv, row, columns)
Esempio n. 5
0
def _save_sc_as_csv(env_path, out_file_path, sc_ids):
    long_column = 5
    columns = 11
    in_file_paths = [
        env_path + '/public_schools.csv', env_path + '/private_schools.csv'
    ]
    ids = set()
    with open(out_file_path, 'w') as fout:
        print('writing', os.path.abspath(out_file_path))
        file_count = 0
        for in_file_path in in_file_paths:
            with open(in_file_path) as fin:
                print('reading', in_file_path)
                for line in fin:
                    cells = line.rstrip('\n').split(',')
                    sc_id = cells[2][1:-1]
                    if line.startswith('"","School"'):
                        file_count += 1
                        if file_count > 1:
                            continue
                        row = ','.join(cells) + ',made-empty'
                        aid.write_and_check_columns(fout, row, columns)
                    elif sc_id in sc_ids:
                        cells.append('')
                        row = ','.join(cells[:long_column])
                        if len(cells) < columns:
                            row += ',,'
                        row += ',' + ','.join(cells[long_column:])
                        aid.write_and_check_columns(fout, row, columns)
                    ids.add(sc_id)
    difference = sc_ids.difference(ids)
    difference.discard('')
    difference.discard('school_id')
    if difference:
        raise Exception(str(difference) + " are not found!")
    return ids
Esempio n. 6
0
def _save_hh_as_csv(in_file_paths, hid2hincome, hh_path, gq_path):
    """  0 COUNTRY,YEAR,SERIALNO,PERSONS,puma_id,
         5 HHTYPE,PERNUM,place_id,SYNTHETIC_HID,longitude,
        10 latitude,AGE,SEX,RACE,SCHOOL,
        15 INCTOT,SYNTHETIC_PID+made-age,made-race,made-income,
        20 made-empty """
    persons_column = 3
    hhtype_column = 5
    hid_column = 8
    age_column = 11
    race_column = 13
    more_header = 'made-age,made-race,made-income,made-empty'
    columns = 21
    hids = set()
    aid.mkdir(hh_path)
    aid.mkdir(gq_path)
    with open(hh_path, 'w') as hh_csv, open(gq_path, 'w') as gq_csv:
        abspath = os.path.abspath
        print('writing', abspath(hh_path), abspath(gq_path))
        file_count = 0
        for in_file_path in in_file_paths:
            with open(in_file_path, 'r') as fin:
                print('reading', in_file_path)
                for raw in fin:
                    line = raw.strip('\n')
                    if line.startswith('COUNTRY'):
                        file_count += 1
                        if file_count == 1:
                            row = ','.join([line, more_header])
                            aid.write_and_check_columns(hh_csv, row, columns)
                            aid.write_and_check_columns(gq_csv, row, columns)
                        continue
                    cells = line.split(',')
                    hid = cells[hid_column]
                    if hid not in hids:
                        hids.add(hid)
                        age = cells[age_column]
                        race = cells[race_column]
                        row = ','.join([
                            line,
                            _to_agep(age),
                            str(race2rac1p.get(race, race)),
                            str(hid2hincome[hid]), ''
                        ])
                        csv = gq_csv if cells[hhtype_column] == '11' else hh_csv
                        aid.write_and_check_columns(csv, row, columns)
                        persons = cells[persons_column]
                        if int(persons) > 20:
                            msg = 'Warning: max persons of NP is 20 but got'
                            print(msg, persons, ':', row)
Esempio n. 7
0
def _save_pp_as_csv(in_file_paths, pp_path, gq_path):
    """  0 COUNTRY,YEAR,SERIALNO,PERSONS,puma_id,
         5 HHTYPE,PERNUM,place_id,SYNTHETIC_HID,longitude,
        10 latitude,AGE,SEX,RACE,SCHOOL,
        15 INCTOT,SYNTHETIC_PID+made-sporder,made-age,made-empty,
        20 made-race """
    hhtype_column = 5
    hid_column = 8
    age_column = 11
    race_column = 13
    inctot_column = 15
    more_header = 'made-sporder,made-age,made-empty,made-race'
    columns = 21
    hid2cnt = {}
    hid2hincome = {}
    aid.mkdir(pp_path)
    aid.mkdir(gq_path)
    with open(pp_path, 'w') as pp_csv, open(gq_path, 'w') as gq_csv:
        print('writing', os.path.abspath(pp_path), os.path.abspath(gq_path))
        file_count = 0
        for in_file_path in in_file_paths:
            with open(in_file_path, 'r') as fin:
                print('reading', in_file_path)
                for raw in fin:
                    line = raw.rstrip('\n')
                    if line.startswith('COUNTRY'):
                        file_count += 1
                        if file_count == 1:
                            row = ','.join([line, more_header])
                            aid.write_and_check_columns(pp_csv, row, columns)
                            aid.write_and_check_columns(gq_csv, row, columns)
                        continue
                    cells = line.split(',')
                    hid = cells[hid_column]
                    order = hid2cnt.get(hid, 0) + 1
                    age = cells[age_column]
                    race = cells[race_column]
                    row = ','.join([
                        line,
                        str(order),
                        _to_agep(age), '',
                        str(race2rac1p.get(race, race))
                    ])
                    csv = gq_csv if cells[hhtype_column] == '11' else pp_csv
                    aid.write_and_check_columns(csv, row, columns)
                    hid2cnt[hid] = order
                    income = int('0' + cells[inctot_column])
                    hid2hincome[hid] = hid2hincome.get(hid, 0) + income
    return hid2cnt.keys() | set(), hid2hincome
Esempio n. 8
0
def _save_pp_as_csv(in_file_paths, pp_path, gq_pp_path):
    """  0 SERIALNO,puma_id,place_id,SYNTHETIC_HID,longitude,
         5 latitude,AGEGRP,HRSWRK,IMMSTAT,INCTAX,
        10 MODE,OCC,POB,RELIGION,SEX,
        15 SYNTHETIC_PID """
    hid_column = 3
    agegrp_column = 6
    sex_column = 14
    more_headers = 'made-sporder,made-empty,made-sex,made-age'
    columns = 20
    hid2cnt = {}
    aid.mkdir(pp_path)
    aid.mkdir(gq_pp_path)
    with open(pp_path, 'w') as pp_csv, open(gq_pp_path, 'w') as gq_pp_csv:
        print('writing', os.path.abspath(pp_path), os.path.abspath(gq_pp_path))
        file_count = 0
        for in_file_path in in_file_paths:
            with open(in_file_path, 'r') as fin:
                print('reading', in_file_path)
                for raw in fin:
                    line = raw.rstrip('\n')
                    if line.startswith('SERIALNO'):
                        file_count += 1
                        if file_count > 1:
                            continue
                        row = line + ',' + more_headers
                        aid.write_and_check_columns(pp_csv, row, columns)
                        aid.write_and_check_columns(gq_pp_csv, row, columns)
                    else:
                        cells = line.split(',')
                        sex = cells[sex_column]
                        agegroup = cells[agegrp_column]
                        hid = cells[hid_column]
                        order = hid2cnt.get(hid, 0) + 1
                        hid2cnt[hid] = order
                        row = ','.join([
                            line,
                            str(order), '',
                            _reversed_sex.get(sex, sex),
                            _to_age(agegroup)
                        ])
                        aid.write_and_check_columns(pp_csv, row, columns)
    return hid2cnt