def execute(context): df_codes = context.stage("data.spatial.codes") requested_communes = df_codes["commune_id"].unique() # First, load work table = simpledbf.Dbf5("%s/rp_2015/FD_MOBPRO_2015.dbf" % context.config("data_path")) records = [] with context.progress(label="Reading work flows ...", total=table.numrec) as progress: for df_chunk in table.to_dataframe(chunksize=10240): progress.update(len(df_chunk)) f = df_chunk["COMMUNE"].isin(requested_communes) f |= df_chunk["ARM"].isin(requested_communes) f &= df_chunk["DCLT"].isin(requested_communes) df_chunk = df_chunk[f] df_chunk = df_chunk[["COMMUNE", "ARM", "TRANS", "IPONDI", "DCLT"]] if len(df_chunk) > 0: records.append(df_chunk) pd.concat(records).to_hdf("%s/work.hdf" % context.cache_path, "movements") # Second, load education table = simpledbf.Dbf5("%s/rp_2015/FD_MOBSCO_2015.dbf" % context.config("data_path")) records = [] with context.progress(label="Reading education flows ...", total=4782736) as progress: for df_chunk in table.to_dataframe(chunksize=10240): progress.update(len(df_chunk)) f = df_chunk["COMMUNE"].isin(requested_communes) f |= df_chunk["ARM"].isin(requested_communes) f &= df_chunk["DCETUF"].isin(requested_communes) df_chunk = df_chunk[f] df_chunk = df_chunk[["COMMUNE", "ARM", "IPONDI", "DCETUF"]] if len(df_chunk) > 0: records.append(df_chunk) pd.concat(records).to_hdf("%s/education.hdf" % context.cache_path, "movements")
def execute(context): # First, load work table = simpledbf.Dbf5("%s/rp_2015/FD_MOBPRO_2015.dbf" % context.config("data_path")) records = [] with context.progress(label="Reading work flows ...", total=7943392) as progress: for df_chunk in table.to_dataframe(chunksize=10240): progress.update(len(df_chunk)) f = df_chunk["REGION"] == "11" f |= df_chunk["REGLT"] == "11" df_chunk = df_chunk[f] df_chunk = df_chunk[[ "COMMUNE", "ARM", "TRANS", "IPONDI", "DCLT", "REGLT" ]] if len(df_chunk) > 0: records.append(df_chunk) pd.concat(records).to_hdf("%s/work.hdf" % context.cache_path, "movements") # Second, load education table = simpledbf.Dbf5("%s/rp_2015/FD_MOBSCO_2015.dbf" % context.config("data_path")) records = [] with context.progress(label="Reading education flows ...", total=4782736) as progress: for df_chunk in table.to_dataframe(chunksize=10240): progress.update(len(df_chunk)) f = df_chunk["REGION"] == "11" f |= df_chunk["REGETUD"] == "11" df_chunk = df_chunk[f] df_chunk = df_chunk[[ "COMMUNE", "ARM", "IPONDI", "DCETUF", "REGETUD" ]] if len(df_chunk) > 0: records.append(df_chunk) pd.concat(records).to_hdf("%s/education.hdf" % context.cache_path, "movements")
def county_info(year: int) -> pd.DataFrame: _check_for_files_on_disk(year) zippath = zip_path(year) # NOTE: UTF-8 fails for 2012 dbf = simpledbf.Dbf5(zippath.replace('.zip', '.dbf'), codec='ISO-8859-1') df = dbf.to_dataframe() df = _clean_county_info(df) return df
def execute(context): table = simpledbf.Dbf5("%s/bpe_2018/bpe18_ensemble_xy.dbf" % context.config("data_path"), codec = "latin1") df_records = [] with context.progress(total = 2504782, label = "Reading enterprise census ...") as progress: for df_chunk in table.to_dataframe(chunksize = 10240): progress.update(len(df_chunk)) df_chunk = df_chunk[df_chunk["REG"] == "11"] df_chunk = df_chunk[COLUMNS] if len(df_chunk) > 0: df_records.append(df_chunk) df_records = pd.concat(df_records) return df_records
def execute(context): table = simpledbf.Dbf5("%s/rp_2015/FD_INDCVIZA_2015.dbf" % context.config("data_path")) records = [] with context.progress(total=4320619, label="Reading census ...") as progress: for df_chunk in table.to_dataframe(chunksize=10240): progress.update(len(df_chunk)) df_chunk = df_chunk[df_chunk["REGION"] == "11"] df_chunk = df_chunk[COLUMNS] if len(df_chunk) > 0: records.append(df_chunk) pd.concat(records).to_hdf("%s/census.hdf" % context.path(), "census")
def execute(context): df_codes = context.stage("data.spatial.codes") requested_departements = df_codes["departement_id"].unique() table = simpledbf.Dbf5( "%s/%s" % (context.config("data_path"), context.config("census_path"))) records = [] with context.progress(total=4320619, label="Reading census ...") as progress: for df_chunk in table.to_dataframe(chunksize=10240): progress.update(len(df_chunk)) df_chunk = df_chunk[df_chunk["DEPT"].isin(requested_departements)] df_chunk = df_chunk[COLUMNS] if len(df_chunk) > 0: records.append(df_chunk) pd.concat(records).to_hdf("%s/census.hdf" % context.path(), "census")
def dbf_to_csv(path): assert os.path.splitext(path)[1]=='.dbf' #Name output csv file same as input dbf [dirname,filename]=os.path.split(path) csv_file=dirname+"/"+os.path.splitext(filename)[0]+'.csv' #Read dbf object print('Reading '+path) dbf = simpledbf.Dbf5(path, codec='utf-8') #Write csv file if os.path.isfile(csv_file): os.remove(csv_file) print('Writing '+csv_file) dbf.to_csv(csv_file) return csv_file
def execute(context): df_codes = context.stage("data.spatial.codes") requested_departements = df_codes["departement_id"].unique() table = simpledbf.Dbf5( "%s/%s" % (context.config("data_path"), context.config("bpe_path")), codec="latin1") df_records = [] with context.progress(total=2539520, label="Reading enterprise census ...") as progress: for df_chunk in table.to_dataframe(chunksize=10240): progress.update(len(df_chunk)) df_chunk = df_chunk[df_chunk["DEP"].isin(requested_departements)] df_chunk = df_chunk[COLUMNS] if len(df_chunk) > 0: df_records.append(df_chunk) return pd.concat(df_records)
def _load_state_dbf(state_fips: str) -> pd.DataFrame: dbf_path = _blocks_shape_path(state_fips).replace('.shp', '.dbf') if not os.path.isfile(dbf_path): _unzip_block_dbf(state_fips) df = simpledbf.Dbf5(dbf_path).to_dataframe() return df
Output file: \n {} ------------------------------------------------------------------------------- Compiling... """.format(pars, dates, DATEMIN, os.path.join(os.getcwd(), OUT_FILE))) dfdict = {par: [] for par in pars} for f in allfiles: date = re.findall(DPAT, f)[0] par = f.split('\\')[1] print(f) print("date: {}, par: {}".format(date, par)) try: df = simpledbf.Dbf5(f, codec='utf-8').to_dataframe() except ValueError: w = "Cannot read table {} ! \n" \ "There may be cells in the .dbf file that are incorrectly formatted. Edit these and try again.".format(f) warnings.warn(w, UserWarning) continue if df.isnull().values.any(): w = "Dataframe has empty cells! It may not be complete." warnings.warn(w, UserWarning) df['date'] = date df['time'] = (datetime.strptime(date, DATEFORM) - DATEMIN).days df['par'] = par # col not used but leaving this in in case I need it later. df = df.dropna(axis=1, how='all') # remove empty columns dfdict[par].append(df)
def pull_population(api_key: str, year: int = 2020) -> pd.DataFrame: """ Pull county population data from the Census API. Also, make some clean ups for our data set. In particular: * Make Alaska one county * Change Shannon County, SD, to Ogala Lakota County, SD. Args: api_key: Your census API key year: The decennial Census year you're using. Must be in [1990, 2022) Returns: A DataFrame with columns "id" (which is the 5-digit county FIPS as a str) and "population" which is the integer population. """ decennial_year = ((year - 2) // 10) * 10 if decennial_year not in [1990, 2000, 2010]: raise ValueError(f"Year must be in [1992, 2022), not {year}") census = Census(api_key) if decennial_year == 2010: data = census.sf1.state_county("P001001", "*", "*", year=decennial_year) df = pd.DataFrame(data).rename(columns={"P001001": "population"}) elif decennial_year == 2000: # Something is busted with the Census package for 2000 SF1s # Note that the 1990 SF1 is down :-/ df = pd.read_json( f"https://api.census.gov/data/{decennial_year}/dec/sf1?get=P001001&for=county:*&in=state:*&key={api_key}", orient="values", ) df = df.iloc[1:] df.columns = ["population", "state", "county"] elif decennial_year == 1990: with tempfile.TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) cnty_zipfile = tmpdir / "cnty.zip" with requests.get( "https://www2.cdc.gov/nceh/lead/census90/house11/files/cnty.zip", stream=True, ) as response: response.raise_for_status() with open(cnty_zipfile, "wb") as outfile: for chunk in response.iter_content(chunk_size=8192): outfile.write(chunk) with zipfile.ZipFile(cnty_zipfile) as infile: infile.extract("CNTY.dbf", path=tmpdir) dbf = simpledbf.Dbf5(str(tmpdir / "CNTY.dbf")) df = dbf.to_dataframe() df = df[["P0010001", "STATEFP", "CNTY"]].rename(columns={ "P0010001": "population", "STATEFP": "state", "CNTY": "county" }) else: raise NotImplementedError("Only support years 1990, 2000, and 2010") df["population"] = df["population"].astype(int) # Fix Alaska just_alaska = df[df["state"] == "02"] just_alaska = pd.DataFrame({ "state": ["02"], "county": ["000"], "population": [just_alaska["population"].sum()], }) df = pd.concat([df[df["state"] != "02"], just_alaska]) df["id"] = df["state"] + df["county"] df = df.drop(columns=["state", "county"]) # Finally, Shannon County South Dakota got renamed in 2015. Fix this. if year >= 2015: df["id"] = df["id"].apply(lambda x: "46102" if x == "46113" else x) return df
def getEPHdbf(censusstring): print("Downloading", censusstring) ### First I will heck that it is not already there if not os.path.isfile("data/Individual_" + censusstring + ".dbf"): if os.path.isfile('Individual_' + censusstring + ".dbf"): # if in the current dir just move it if os.system("mv " + 'Individual_' + censusstring + ".dbf " + "data/"): print("Error moving file!, Please check!") # otherwise start looking for the zip file else: if not os.path.isfile("data/" + censusstring + "_dbf.zip"): if not os.path.isfile(censusstring + "_dbf.zip"): os.system( "curl -O http://www.indec.gob.ar/ftp/cuadros/menusuperior/eph/" + censusstring + "_dbf.zip") ### To move it I use the os.system() functions to run bash commands with arguments os.system("mv " + censusstring + "_dbf.zip " + "data/") ### unzip the csv os.system("unzip " + "data/" + censusstring + "_dbf.zip -d data/") if not os.path.isfile("data/" + 'Individual_' + censusstring + ".dbf"): print("WARNING!!! something is wrong: the file is not there!") else: print("file in place, creating CSV file") trimestre = censusstring dbf = simpledbf.Dbf5('data/Individual_' + trimestre + '.dbf', codec='latin1') indRaw = dbf.to_dataframe() indNoW = indRaw.loc[indRaw.REGION == 1, [ 'CODUSU', 'NRO_HOGAR', 'COMPONENTE', 'AGLOMERADO', 'PONDERA', 'CH03', 'CH04', 'CH06', #'CH10', ## borraR 'CH12', ## schoolLevel 'CH13', 'CH14', 'ESTADO', #'NIVEL_ED',## borrar 'CAT_OCUP', 'CAT_INAC', 'ITF', 'IPCF', 'P47T', 'P21', 'DECCFR', 'DECIFR', 'CH07', 'CH09', 'CH15' ]] indNoW.columns = [ 'CODUSU', 'NRO_HOGAR', 'COMPONENTE', 'AGLOMERADO', 'PONDERA', 'familyRelation', 'female', 'age', #'schooled',## borrar 'schoolLevel', ## schoolLevel 'finishedYear', 'lastYear', 'activity', #'educLevel',## borrar 'empCond', 'unempCond', 'ITF', 'IPCF', 'P47T', 'P21', 'DECCFR', 'DECIFR', 'maritalStatus', 'reading', 'placeOfBirth' ] indNoW.index = range(0, indNoW.shape[0]) dbf2 = simpledbf.Dbf5('data/Hogar_' + trimestre + '.dbf', codec='latin1') indRaw2 = dbf2.to_dataframe() indNoW2 = indRaw2.loc[indRaw2.REGION == 1, [ 'CODUSU', 'NRO_HOGAR', 'REGION', 'PONDERA', 'IV1', 'IV1_ESP', 'IV2', 'IV3', 'IV3_ESP', 'IV4', 'IV5', 'IV6', 'IV7', 'IV7_ESP', 'IV8', 'IV9', 'IV10', 'IV11', 'IV12_1', 'IV12_2', 'IV12_3', 'II1', 'II2', 'II3', 'II3_1', 'II4_1', 'II4_2', 'II4_3', 'II7', 'II7_ESP', 'II8', 'II8_ESP', 'II9', 'V1', 'IX_TOT', 'IX_MEN10', 'IX_MAYEQ10', 'ITF', 'VII1_1', 'VII1_2', 'VII2_1', 'VII2_2', 'VII2_3', 'VII2_4', 'ITF', 'DECIFR', 'RDECIFR', 'IPCF', 'DECCFR', 'RDECCFR' ]] indNoW2.columns = [[ 'CODUSU', 'NRO_HOGAR', 'REGION', 'PONDERA', 'HomeType', 'HomeTypeesp', 'RoomsNumber', 'FloorMaterial', 'FloorMaterialesp', 'RoofMaterial', 'RoofCoat', 'Water', 'WaterType', 'WaterTypeesp', 'Toilet', 'ToiletLocation', 'ToiletType', 'Sewer', 'DumpSites', 'Flooding', 'EmergencyLoc', 'UsableTotalRooms', 'SleepingRooms', 'OfficeRooms', 'OnlyWork', 'Kitchen', 'Sink', 'Garage', 'Ownership', 'Ownershipesp', 'CookingCombustible', 'CookingCombustibleesp', 'BathroomUse', 'Working', 'HouseMembers', 'Memberless10', 'Membermore10', 'TotalHouseHoldIncome', 'DomesticService1', 'DomesticService2', 'DomesticService3', 'DomesticService4', 'DomesticService5', 'DomesticService6', 'TotalFamilyIncome', 'TotalFamilyIncomeDec', 'TotalFamilyIncomeDecReg', 'PerCapInc', 'PerCapIncDec', 'PerCapIncDecReg' ]] indNoW2.index = range(0, indNoW2.shape[0]) indNoW2.to_csv('data/cleanDataHousehold' + trimestre + '.csv', index=False) print 'csv file cleanDataHousehold', trimestre, '.csv successfully created in folder data/' indNoW.to_csv('data/cleanData' + trimestre + '.csv', index=False) print 'csv file cleanData', trimestre, '.csv successfully created in folder data/' return
def read_df_dbf(dbf_file: str, **simpledbf_kwargs) -> T_DF: assert isinstance(dbf_file, str) dbf = simpledbf.Dbf5(dbf_file, **simpledbf_kwargs) df = pandas.concat((df for df in dbf.to_dataframe(chunksize=10000)), ignore_index=True) return sd_checks.check_df(df)
def join_tables(self, threshold, user_def_column_name, output_file_name): print("joining tables \n") # make a list of all the tables we have. These are already dataframes possible_dfs = [ self.emissions, self.forest_loss, self.biomass_weight, self.forest_extent ] # get rid of df's we don't have df_list = [x for x in possible_dfs if x is not None] # how to get column names to keep? like extent, emissions, loss? i'm going through and getting # third column for each df which is the analysis name analysis_names = [x.columns.values[3] for x in df_list] # convert original SUM values into the right units for index, item in enumerate(analysis_names): if item == 'forest_loss': analysis_names[index] = 'forest_loss_ha' self.forest_loss[ 'forest_loss'] = self.forest_loss.forest_loss.astype(float) self.forest_loss[ 'forest_loss_ha'] = self.forest_loss['forest_loss'] / 10000 if item == 'forest_extent': analysis_names[index] = 'forest_extent_ha' self.forest_extent['forest_extent_ha'] = self.forest_extent[ 'forest_extent'] / 10000 if item == 'biomass_weight': analysis_names[index] = 'biomass_weight_Tg' self.biomass_weight['biomass_weight_Tg'] = self.biomass_weight[ 'biomass_weight'] / 1000000 # join all the data frames together on Value and ID. Value is the tcd/loss code (41 = loss in 2001 at 1-10%tcd # or loss in 2001 at >30% tcd. ID is the unique ID of the feature in the shapefile merged = pd.concat([df.set_index(['VALUE', 'ID']) for df in df_list], axis=1) merged = merged.reset_index() # To 2 get outputs from a single function and apply to 2 different columns in the dataframe: # http://stackoverflow.com/questions/12356501/pandas-create-two-new-columns-in-a-dataframe-with- # values-calculated-from-a-pre?rq=1 # tcd and year columns is equal to the first and second output from the function: value_to_tcd_year try: merged['tcd'], merged['year'] = list( zip(*merged["VALUE"].map(post_processing.value_to_tcd_year))) except KeyError: print( "oops, loss mosaic doesn't have the arithmetic function applied. Refer to readme file" ) sys.exit() # the value_to_tcd_year function is good for when user runs all thresholds, but not just one. # so, overwrite the tcd column when it comes back if threshold != "all": merged['tcd'] = "> {}%".format(threshold) # get the input shapefile into df format final_aoi_dbf = self.final_aoi.replace(".shp", ".dbf") final_aoi_dbf = simpledbf.Dbf5(final_aoi_dbf) # convert dbf to pandas dataframe final_aoi_df = final_aoi_dbf.to_dataframe() # reset index of final_aoi_df final_aoi_df = final_aoi_df.reset_index() if user_def_column_name: merged = final_columns.user_cols(user_def_column_name, final_aoi_df, merged, analysis_names) else: columns_to_keep = ['ID', 'tcd', 'year'] columns_to_keep.extend(analysis_names) merged = merged[columns_to_keep] print('SAMPLE OF OUTPUT:') print(merged.head(5)) final_output_csv = os.path.join(self.root_dir, 'result', '{}.csv'.format(output_file_name)) merged.to_csv(final_output_csv, index=False)
tables_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'tables') z_stats_tbl = os.path.join(tables_dir, 'output_{}.dbf'.format(i)) start_time = datetime.datetime.now() print("running zstats") outzstats = ZonalStatisticsAsTable(zone, "VALUE", value, z_stats_tbl, "DATA", "SUM") end_time = datetime.datetime.now() - start_time print("debug:time elapsed: {}".format(end_time)) # convert the output zstats table into a pandas DF dbf = simpledbf.Dbf5(z_stats_tbl) df = dbf.to_dataframe() # populate a new field "id" with the FID and analysis with the sum df['ID'] = i df[analysis] = df['SUM'] # sometimes this value came back as an object, so here we are fixing that bug df.VALUE = df.VALUE.astype(int) # name of the sql database to store the sql table zstats_results_db = os.path.join(tables_dir, 'zstats_results_db.db') # create a connection to the sql database conn = sqlite3.connect(zstats_results_db)
transit_seatcap_df = pd.read_csv(SEATCAP_FILE) transit_seatcap_df.columns = transit_seatcap_df.columns.str.replace( '%', 'pct') transit_seatcap_df.rename(columns={ "VEHTYPE": "veh_type_updated", "100pctCapacity": "standcap" }, inplace=True) logging.info("Read {}\n{}".format(SEATCAP_FILE, transit_seatcap_df.head())) # read the transit files all_trn_df = pd.DataFrame() for timeperiod in ['AM', 'EA', 'EV', 'MD', 'PM']: trn_file = os.path.join(trn_dir, 'trnlink{}_ALLMSA.dbf'.format(timeperiod)) dbf = simpledbf.Dbf5(trn_file) trn_df = dbf.to_dataframe() trn_df["period"] = timeperiod logging.info("Read {} links from {}".format(len(trn_df), trn_file)) # print(trn_df.head()) all_trn_df = pd.concat([all_trn_df, trn_df]) logging.info("Read {} total links".format(len(all_trn_df))) # drop columns we won't be updating/using so they don't cause confusion all_trn_df.drop(columns=[ "AB_XITA", "AB_BRDB", "BA_VOL", "BA_BRDA", "BA_XITA", "BA_BRDB", "BA_XITB" ], inplace=True) # sort by mode, line name, time period, sequence
print("Parameter {} not found.".format(par)) continue dates = get_dates_from_filenames(outlist) print("Parameter: {}\nFound {} dates: {}".format(par, len(dates), dates)) dflist = [] for date in dates: infile = list( filter(lambda x: date in x, outlist)) #python 3 requires conversion from filter to list if len(infile) == 1: #make sure there is only one file for each date infile = infile[0] else: #this condition should not occur. If necessary an exception can be added here print('Error: Multiple files found for date {}.'.format(date)) break df = simpledbf.Dbf5(infile).to_dataframe() df['date'] = date if DATEFORM == 'YYYYMMDD': time = datetime(int(date[0:4]), int(date[4:6]), int( date[6:])) - DATEMIN df['time'] = time.days df = df.dropna(axis=1, how='all') #remove empty columns df = df[TO_KEEP] dflist.append(df) out_df = pd.concat(dflist) out_df.columns = [c.replace(REP, par) for c in out_df.columns] dflist_all.append(out_df) finaldf = reduce(lambda x, y: pd.merge(x, y, on=JOIN_ON, how='outer'), dflist_all)
#!/usr/bin/env python #thanks to: #https://gist.github.com/bertspaan/8220892 import simpledbf as sdbf import os import sys filename = sys.argv[1] print(filename) if filename.endswith('.dbf') or filename.endswith('.DBF'): print("Converting %s to csv" % filename) csv_fn = filename[:-4] + ".csv" with open(csv_fn, 'wb') as csvfile: dbf = sdbf.Dbf5(filename) dbf.to_csv(csv_fn) print("Done...") else: print("Filename does not end with .dbf")
outPolygons = "BfQ_polygon.shp" arcpy.RasterToPolygon_conversion(outRas, outPolygons) # Set local variables inZoneData = outPolygons zoneField = "id" inClassData = outPolygons classField = "id" outTable = "BfQ_polygon_table.dbf" processingCellSize = 0.01 # Execute TabulateArea TabulateArea(inZoneData, zoneField, inClassData, classField, outTable, processingCellSize, "CLASSES_AS_ROWS") BfQ_area_dbf = simpledbf.Dbf5(env.workspace + '\\' + outTable) BfQ_partial_area = BfQ_area_dbf.to_dataframe() BfQ_area = np.sum(np.array(BfQ_partial_area['Area'])) del BfQ_area_dbf del BfQ_partial_area #del BfQ_area arcpy.Delete_management(outPolygons) arcpy.Delete_management(outTable) # Reverse #Flow = Flow[::-1] #CalArea = CalArea[::-1] # Non-dimensionalization
dflist = [] for i, date in enumerate(dates): infol = list( filter(lambda x: date in x, outlist)) #python 3 requires conversion from filter to list if len(infol) == 1: #make sure there is only one file for each date infol = infol[0] print('Date {} OK!'.format(date)) else: #this condition should not occur. If necessary an exception can be added here print('Warning: Multiple or no files found for date {}. Skipping.'. format(date)) continue df = simpledbf.Dbf5(os.path.join(infol, POLFILE)).to_dataframe() df['date'] = date df['time'] = (dates_dt[i] - DATEMIN).days df['X'] = pd.to_numeric(df['X']) #otherwise might be strings df['Y'] = pd.to_numeric(df['Y']) df = df.dropna(axis=1, how='all') #remove empty columns dflist.append(df) out_df = pd.concat(dflist) #vertical concatenation out_df = out_df[['X', 'Y', 'time', 'date'] + COLS] #trim out_df.columns = [ par + '_' + col if col in COLS else col for col in out_df.columns ] dflist_all.append(out_df) finaldf = reduce(lambda x, y: pd.merge(x, y, on=JOIN_ON, how='outer'),
help= "Location of transit assignment files (trn_link_onoffs_[EA,AM,MD,PM,EV].dbf" ) parser.add_argument("--byclass", action="store_true", help="Include user class outputs (VOL, ONA, OFFB)") args = parser.parse_args() all_linko_df = pandas.DataFrame() for time_period in TIME_PERIODS.keys(): # for testing: # for time_period in ['EA']: linko_file = os.path.join(args.trn_dir, "trn_link_onoffs_{}.dbf".format(time_period)) linko_dbf = simpledbf.Dbf5(linko_file) linko_df = linko_dbf.to_dataframe() print("Read {} lines from {}".format(len(linko_df), linko_file)) # for now, we want boardings so # filter down to just transit links (no access/egress) linko_df = linko_df.loc[linko_df.MODE < 900] # and those with positive ONA linko_df = linko_df.loc[linko_df.ONA > 0] print("Filtered to {} transit links with boardings".format( len(linko_df))) # drop columns starting with REV colnames = list(linko_df.columns) rev_colnames = [