def nycep_acs_extract_tracts(dpath=None):

    # -- defaults
    if dpath==None:
        print("Must set ACS data path!!!")
        return
    d5path = os.path.join(dpath,'2012','5')


    # -- utilities
    opath = os.path.join('../output','acs','2012','5')


    # -- get the file list
    flist = sorted([i for i in os.listdir(d5path) if '000.txt' in i])


    # -- get the Logical Record Numbers for NYC census tracts
    logrecno = nycep_acs_tract_logrecno(dpath=dpath)


    # -- loop through the files and extract the appropriate lines
    for fl in flist:
        in_file  = open(os.path.join(d5path,fl),'r')
        out_file = open(os.path.join(opath,fl),'w')

        print("Writing file {0}...".format(os.path.join(opath,fl)))

        dum = [out_file.write(i) for i in in_file if 
               i.split(',')[5] in logrecno]

        in_file.close()
        out_file.close()

    return
Example #2
0
def nycep_acs_read_table(tlabel, year=2012, summary=5, dpath=None, 
                         margins=False):

    # -- check path
    if dpath==None:
        print("Must set data path to ACS data!")
        return


    # -- utilities
    eom   = 'm' if margins else 'e'
    sfile = os.path.join(dpath,str(year),str(summary),
                         'Sequence_Number_and_Table_Number_Lookup.txt')


    # -- read in the sequence number file
    fopen  = open(sfile,'r')
    slines = [line for line in fopen if 'CELL' in line]
    fopen.close()


    # -- get the sequence number, start, and # of cells for the requested table
    recs   = slines[[line.split(',')[1] for line in slines].index(tlabel)
                    ].split(',')
    seqnum = recs[2]
    cstart = int(recs[4])-1
    ncell  = int(''.join([i for i in recs[5] if i.isdigit()]))


    # -- read the estimates (or margins) file
    dfile  = os.path.join(dpath,str(year),str(summary),
                          eom+str(year)+str(summary)+'ny'+seqnum+'000.txt')
    fopen  = open(dfile,'r')

    data = {'fileid'   : '',
            'filetype' : '',
            'stusab'   : '',
            'chariter' : '',
            'sequence' : '',
            'logrecno' : [],
            'vals'     : []}

    if summary==5:
        logrecno = nycep_acs_tract_logrecno(dpath=dpath)
        dlines   = []
        for line in fopen:
            recs = line.split(',')
            if recs[5] in logrecno:
                data['fileid']   = recs[0]
                data['filetype'] = recs[1]
                data['stusab']   = recs[2]
                data['chariter'] = recs[3]
                data['sequence'] = recs[4]
                data['logrecno'].append(recs[5])
                data['vals'].append(recs[cstart:cstart+ncell])
    elif summary==1:
        data = [line.split(',')[cstart:cstart+ncell] for line in fopen]
    else:
        print("Only 1 and 5 year summaries supported!!!")
        return

    fopen.close()


    # -- return data
    return data