Exemple #1
0
def execute(context):
    df_codes = context.stage("data.spatial.codes")
    requested_communes = df_codes["commune_id"].unique()

    # First, load work

    table = simpledbf.Dbf5("%s/rp_2015/FD_MOBPRO_2015.dbf" %
                           context.config("data_path"))
    records = []

    with context.progress(label="Reading work flows ...",
                          total=table.numrec) as progress:
        for df_chunk in table.to_dataframe(chunksize=10240):
            progress.update(len(df_chunk))

            f = df_chunk["COMMUNE"].isin(requested_communes)
            f |= df_chunk["ARM"].isin(requested_communes)
            f &= df_chunk["DCLT"].isin(requested_communes)

            df_chunk = df_chunk[f]
            df_chunk = df_chunk[["COMMUNE", "ARM", "TRANS", "IPONDI", "DCLT"]]

            if len(df_chunk) > 0:
                records.append(df_chunk)

    pd.concat(records).to_hdf("%s/work.hdf" % context.cache_path, "movements")

    # Second, load education

    table = simpledbf.Dbf5("%s/rp_2015/FD_MOBSCO_2015.dbf" %
                           context.config("data_path"))
    records = []

    with context.progress(label="Reading education flows ...",
                          total=4782736) as progress:
        for df_chunk in table.to_dataframe(chunksize=10240):
            progress.update(len(df_chunk))

            f = df_chunk["COMMUNE"].isin(requested_communes)
            f |= df_chunk["ARM"].isin(requested_communes)
            f &= df_chunk["DCETUF"].isin(requested_communes)

            df_chunk = df_chunk[f]
            df_chunk = df_chunk[["COMMUNE", "ARM", "IPONDI", "DCETUF"]]

            if len(df_chunk) > 0:
                records.append(df_chunk)

    pd.concat(records).to_hdf("%s/education.hdf" % context.cache_path,
                              "movements")
Exemple #2
0
def execute(context):
    # First, load work

    table = simpledbf.Dbf5("%s/rp_2015/FD_MOBPRO_2015.dbf" %
                           context.config("data_path"))
    records = []

    with context.progress(label="Reading work flows ...",
                          total=7943392) as progress:
        for df_chunk in table.to_dataframe(chunksize=10240):
            progress.update(len(df_chunk))
            f = df_chunk["REGION"] == "11"
            f |= df_chunk["REGLT"] == "11"
            df_chunk = df_chunk[f]
            df_chunk = df_chunk[[
                "COMMUNE", "ARM", "TRANS", "IPONDI", "DCLT", "REGLT"
            ]]

            if len(df_chunk) > 0:
                records.append(df_chunk)

    pd.concat(records).to_hdf("%s/work.hdf" % context.cache_path, "movements")

    # Second, load education

    table = simpledbf.Dbf5("%s/rp_2015/FD_MOBSCO_2015.dbf" %
                           context.config("data_path"))
    records = []

    with context.progress(label="Reading education flows ...",
                          total=4782736) as progress:
        for df_chunk in table.to_dataframe(chunksize=10240):
            progress.update(len(df_chunk))
            f = df_chunk["REGION"] == "11"
            f |= df_chunk["REGETUD"] == "11"
            df_chunk = df_chunk[f]
            df_chunk = df_chunk[[
                "COMMUNE", "ARM", "IPONDI", "DCETUF", "REGETUD"
            ]]

            if len(df_chunk) > 0:
                records.append(df_chunk)

    pd.concat(records).to_hdf("%s/education.hdf" % context.cache_path,
                              "movements")
Exemple #3
0
def county_info(year: int) -> pd.DataFrame:
    _check_for_files_on_disk(year)
    zippath = zip_path(year)

    # NOTE: UTF-8 fails for 2012
    dbf = simpledbf.Dbf5(zippath.replace('.zip', '.dbf'),
                         codec='ISO-8859-1')
    df = dbf.to_dataframe()

    df = _clean_county_info(df)

    return df
Exemple #4
0
def execute(context):
    table = simpledbf.Dbf5("%s/bpe_2018/bpe18_ensemble_xy.dbf" % context.config("data_path"), codec = "latin1")
    df_records = []

    with context.progress(total = 2504782, label = "Reading enterprise census ...") as progress:
        for df_chunk in table.to_dataframe(chunksize = 10240):
            progress.update(len(df_chunk))
            df_chunk = df_chunk[df_chunk["REG"] == "11"]
            df_chunk = df_chunk[COLUMNS]

            if len(df_chunk) > 0:
                df_records.append(df_chunk)


    df_records = pd.concat(df_records)
    return df_records
Exemple #5
0
def execute(context):
    table = simpledbf.Dbf5("%s/rp_2015/FD_INDCVIZA_2015.dbf" %
                           context.config("data_path"))
    records = []

    with context.progress(total=4320619,
                          label="Reading census ...") as progress:
        for df_chunk in table.to_dataframe(chunksize=10240):
            progress.update(len(df_chunk))
            df_chunk = df_chunk[df_chunk["REGION"] == "11"]
            df_chunk = df_chunk[COLUMNS]

            if len(df_chunk) > 0:
                records.append(df_chunk)

    pd.concat(records).to_hdf("%s/census.hdf" % context.path(), "census")
Exemple #6
0
def execute(context):
    df_codes = context.stage("data.spatial.codes")
    requested_departements = df_codes["departement_id"].unique()

    table = simpledbf.Dbf5(
        "%s/%s" % (context.config("data_path"), context.config("census_path")))
    records = []

    with context.progress(total=4320619,
                          label="Reading census ...") as progress:
        for df_chunk in table.to_dataframe(chunksize=10240):
            progress.update(len(df_chunk))

            df_chunk = df_chunk[df_chunk["DEPT"].isin(requested_departements)]
            df_chunk = df_chunk[COLUMNS]

            if len(df_chunk) > 0:
                records.append(df_chunk)

    pd.concat(records).to_hdf("%s/census.hdf" % context.path(), "census")
Exemple #7
0
def dbf_to_csv(path):
    
    assert os.path.splitext(path)[1]=='.dbf'
    
    #Name output csv file same as input dbf
    [dirname,filename]=os.path.split(path)
    csv_file=dirname+"/"+os.path.splitext(filename)[0]+'.csv'
    
    #Read dbf object
    print('Reading '+path)
    dbf = simpledbf.Dbf5(path, codec='utf-8')
    
    #Write csv file
    if os.path.isfile(csv_file):
        os.remove(csv_file)
    
    print('Writing '+csv_file)
    dbf.to_csv(csv_file)
    
    return csv_file
Exemple #8
0
def execute(context):
    df_codes = context.stage("data.spatial.codes")
    requested_departements = df_codes["departement_id"].unique()

    table = simpledbf.Dbf5(
        "%s/%s" % (context.config("data_path"), context.config("bpe_path")),
        codec="latin1")
    df_records = []

    with context.progress(total=2539520,
                          label="Reading enterprise census ...") as progress:
        for df_chunk in table.to_dataframe(chunksize=10240):
            progress.update(len(df_chunk))

            df_chunk = df_chunk[df_chunk["DEP"].isin(requested_departements)]
            df_chunk = df_chunk[COLUMNS]

            if len(df_chunk) > 0:
                df_records.append(df_chunk)

    return pd.concat(df_records)
Exemple #9
0
def _load_state_dbf(state_fips: str) -> pd.DataFrame:
    dbf_path = _blocks_shape_path(state_fips).replace('.shp', '.dbf')
    if not os.path.isfile(dbf_path):
        _unzip_block_dbf(state_fips)
    df = simpledbf.Dbf5(dbf_path).to_dataframe()
    return df
Output file: \n {}
-------------------------------------------------------------------------------

Compiling...
""".format(pars, dates, DATEMIN, os.path.join(os.getcwd(), OUT_FILE)))

dfdict = {par: [] for par in pars}

for f in allfiles:
    date = re.findall(DPAT, f)[0]
    par = f.split('\\')[1]
    print(f)
    print("date: {}, par: {}".format(date, par))

    try:
        df = simpledbf.Dbf5(f, codec='utf-8').to_dataframe()
    except ValueError:
        w = "Cannot read table {} ! \n" \
            "There may be cells in the .dbf file that are incorrectly formatted. Edit these and try again.".format(f)
        warnings.warn(w, UserWarning)
        continue
    if df.isnull().values.any():
        w = "Dataframe has empty cells! It may not be complete."
        warnings.warn(w, UserWarning)

    df['date'] = date
    df['time'] = (datetime.strptime(date, DATEFORM) - DATEMIN).days
    df['par'] = par  # col not used but leaving this in in case I need it later.
    df = df.dropna(axis=1, how='all')  # remove empty columns
    dfdict[par].append(df)
Exemple #11
0
def pull_population(api_key: str, year: int = 2020) -> pd.DataFrame:
    """
    Pull county population data from the Census API. Also, make some clean ups
    for our data set. In particular:
        * Make Alaska one county
        * Change Shannon County, SD, to Ogala Lakota County, SD.

    Args:
        api_key: Your census API key
        year: The decennial Census year you're using. Must be in [1990, 2022)

    Returns:
        A DataFrame with columns "id" (which is the 5-digit county FIPS as a str) and
            "population" which is the integer population.
    """
    decennial_year = ((year - 2) // 10) * 10
    if decennial_year not in [1990, 2000, 2010]:
        raise ValueError(f"Year must be in [1992, 2022), not {year}")

    census = Census(api_key)

    if decennial_year == 2010:
        data = census.sf1.state_county("P001001",
                                       "*",
                                       "*",
                                       year=decennial_year)
        df = pd.DataFrame(data).rename(columns={"P001001": "population"})

    elif decennial_year == 2000:
        # Something is busted with the Census package for 2000 SF1s
        # Note that the 1990 SF1 is down :-/
        df = pd.read_json(
            f"https://api.census.gov/data/{decennial_year}/dec/sf1?get=P001001&for=county:*&in=state:*&key={api_key}",
            orient="values",
        )
        df = df.iloc[1:]
        df.columns = ["population", "state", "county"]

    elif decennial_year == 1990:
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = Path(tmpdir)
            cnty_zipfile = tmpdir / "cnty.zip"
            with requests.get(
                    "https://www2.cdc.gov/nceh/lead/census90/house11/files/cnty.zip",
                    stream=True,
            ) as response:
                response.raise_for_status()
                with open(cnty_zipfile, "wb") as outfile:
                    for chunk in response.iter_content(chunk_size=8192):
                        outfile.write(chunk)

            with zipfile.ZipFile(cnty_zipfile) as infile:
                infile.extract("CNTY.dbf", path=tmpdir)

            dbf = simpledbf.Dbf5(str(tmpdir / "CNTY.dbf"))
            df = dbf.to_dataframe()
            df = df[["P0010001", "STATEFP", "CNTY"]].rename(columns={
                "P0010001": "population",
                "STATEFP": "state",
                "CNTY": "county"
            })
    else:
        raise NotImplementedError("Only support years 1990, 2000, and 2010")

    df["population"] = df["population"].astype(int)

    # Fix Alaska
    just_alaska = df[df["state"] == "02"]
    just_alaska = pd.DataFrame({
        "state": ["02"],
        "county": ["000"],
        "population": [just_alaska["population"].sum()],
    })
    df = pd.concat([df[df["state"] != "02"], just_alaska])

    df["id"] = df["state"] + df["county"]
    df = df.drop(columns=["state", "county"])

    # Finally, Shannon County South Dakota got renamed in 2015. Fix this.
    if year >= 2015:
        df["id"] = df["id"].apply(lambda x: "46102" if x == "46113" else x)

    return df
Exemple #12
0
def getEPHdbf(censusstring):
    print("Downloading", censusstring)
    ### First I will heck that it is not already there
    if not os.path.isfile("data/Individual_" + censusstring + ".dbf"):
        if os.path.isfile('Individual_' + censusstring + ".dbf"):
            # if in the current dir just move it
            if os.system("mv " + 'Individual_' + censusstring + ".dbf " +
                         "data/"):
                print("Error moving file!, Please check!")
        # otherwise start looking for the zip file
        else:
            if not os.path.isfile("data/" + censusstring + "_dbf.zip"):
                if not os.path.isfile(censusstring + "_dbf.zip"):
                    os.system(
                        "curl -O http://www.indec.gob.ar/ftp/cuadros/menusuperior/eph/"
                        + censusstring + "_dbf.zip")
                ###  To move it I use the os.system() functions to run bash commands with arguments
                os.system("mv " + censusstring + "_dbf.zip " + "data/")
            ### unzip the csv
            os.system("unzip " + "data/" + censusstring + "_dbf.zip -d data/")

    if not os.path.isfile("data/" + 'Individual_' + censusstring + ".dbf"):
        print("WARNING!!! something is wrong: the file is not there!")

    else:
        print("file in place, creating CSV file")

    trimestre = censusstring

    dbf = simpledbf.Dbf5('data/Individual_' + trimestre + '.dbf',
                         codec='latin1')
    indRaw = dbf.to_dataframe()

    indNoW = indRaw.loc[indRaw.REGION == 1, [
        'CODUSU',
        'NRO_HOGAR',
        'COMPONENTE',
        'AGLOMERADO',
        'PONDERA',
        'CH03',
        'CH04',
        'CH06',
        #'CH10', ## borraR
        'CH12',  ## schoolLevel
        'CH13',
        'CH14',
        'ESTADO',
        #'NIVEL_ED',## borrar
        'CAT_OCUP',
        'CAT_INAC',
        'ITF',
        'IPCF',
        'P47T',
        'P21',
        'DECCFR',
        'DECIFR',
        'CH07',
        'CH09',
        'CH15'
    ]]

    indNoW.columns = [
        'CODUSU',
        'NRO_HOGAR',
        'COMPONENTE',
        'AGLOMERADO',
        'PONDERA',
        'familyRelation',
        'female',
        'age',
        #'schooled',## borrar
        'schoolLevel',  ## schoolLevel
        'finishedYear',
        'lastYear',
        'activity',
        #'educLevel',## borrar
        'empCond',
        'unempCond',
        'ITF',
        'IPCF',
        'P47T',
        'P21',
        'DECCFR',
        'DECIFR',
        'maritalStatus',
        'reading',
        'placeOfBirth'
    ]

    indNoW.index = range(0, indNoW.shape[0])

    dbf2 = simpledbf.Dbf5('data/Hogar_' + trimestre + '.dbf', codec='latin1')
    indRaw2 = dbf2.to_dataframe()

    indNoW2 = indRaw2.loc[indRaw2.REGION == 1, [
        'CODUSU', 'NRO_HOGAR', 'REGION', 'PONDERA', 'IV1', 'IV1_ESP', 'IV2',
        'IV3', 'IV3_ESP', 'IV4', 'IV5', 'IV6', 'IV7', 'IV7_ESP', 'IV8', 'IV9',
        'IV10', 'IV11', 'IV12_1', 'IV12_2', 'IV12_3', 'II1', 'II2', 'II3',
        'II3_1', 'II4_1', 'II4_2', 'II4_3', 'II7', 'II7_ESP', 'II8', 'II8_ESP',
        'II9', 'V1', 'IX_TOT', 'IX_MEN10', 'IX_MAYEQ10', 'ITF', 'VII1_1',
        'VII1_2', 'VII2_1', 'VII2_2', 'VII2_3', 'VII2_4', 'ITF', 'DECIFR',
        'RDECIFR', 'IPCF', 'DECCFR', 'RDECCFR'
    ]]

    indNoW2.columns = [[
        'CODUSU', 'NRO_HOGAR', 'REGION', 'PONDERA', 'HomeType', 'HomeTypeesp',
        'RoomsNumber', 'FloorMaterial', 'FloorMaterialesp', 'RoofMaterial',
        'RoofCoat', 'Water', 'WaterType', 'WaterTypeesp', 'Toilet',
        'ToiletLocation', 'ToiletType', 'Sewer', 'DumpSites', 'Flooding',
        'EmergencyLoc', 'UsableTotalRooms', 'SleepingRooms', 'OfficeRooms',
        'OnlyWork', 'Kitchen', 'Sink', 'Garage', 'Ownership', 'Ownershipesp',
        'CookingCombustible', 'CookingCombustibleesp', 'BathroomUse',
        'Working', 'HouseMembers', 'Memberless10', 'Membermore10',
        'TotalHouseHoldIncome', 'DomesticService1', 'DomesticService2',
        'DomesticService3', 'DomesticService4', 'DomesticService5',
        'DomesticService6', 'TotalFamilyIncome', 'TotalFamilyIncomeDec',
        'TotalFamilyIncomeDecReg', 'PerCapInc', 'PerCapIncDec',
        'PerCapIncDecReg'
    ]]

    indNoW2.index = range(0, indNoW2.shape[0])

    indNoW2.to_csv('data/cleanDataHousehold' + trimestre + '.csv', index=False)
    print 'csv file cleanDataHousehold', trimestre, '.csv successfully created in folder data/'

    indNoW.to_csv('data/cleanData' + trimestre + '.csv', index=False)
    print 'csv file cleanData', trimestre, '.csv successfully created in folder data/'
    return
Exemple #13
0
def read_df_dbf(dbf_file: str, **simpledbf_kwargs) -> T_DF:
    assert isinstance(dbf_file, str)
    dbf = simpledbf.Dbf5(dbf_file, **simpledbf_kwargs)
    df = pandas.concat((df for df in dbf.to_dataframe(chunksize=10000)),
                       ignore_index=True)
    return sd_checks.check_df(df)
    def join_tables(self, threshold, user_def_column_name, output_file_name):
        print("joining tables \n")

        # make a list of all the tables we have. These are already dataframes
        possible_dfs = [
            self.emissions, self.forest_loss, self.biomass_weight,
            self.forest_extent
        ]

        # get rid of df's we don't have
        df_list = [x for x in possible_dfs if x is not None]

        # how to get column names to keep? like extent, emissions, loss? i'm going through and getting
        # third column for each df which is the analysis name
        analysis_names = [x.columns.values[3] for x in df_list]

        # convert original SUM values into the right units
        for index, item in enumerate(analysis_names):

            if item == 'forest_loss':
                analysis_names[index] = 'forest_loss_ha'

                self.forest_loss[
                    'forest_loss'] = self.forest_loss.forest_loss.astype(float)
                self.forest_loss[
                    'forest_loss_ha'] = self.forest_loss['forest_loss'] / 10000

            if item == 'forest_extent':
                analysis_names[index] = 'forest_extent_ha'
                self.forest_extent['forest_extent_ha'] = self.forest_extent[
                    'forest_extent'] / 10000

            if item == 'biomass_weight':
                analysis_names[index] = 'biomass_weight_Tg'
                self.biomass_weight['biomass_weight_Tg'] = self.biomass_weight[
                    'biomass_weight'] / 1000000

        # join all the data frames together on Value and ID. Value is the tcd/loss code (41 = loss in 2001 at 1-10%tcd
        # or loss in 2001 at >30% tcd. ID is the unique ID of the feature in the shapefile
        merged = pd.concat([df.set_index(['VALUE', 'ID']) for df in df_list],
                           axis=1)
        merged = merged.reset_index()

        # To 2 get outputs from a single function and apply to 2 different columns in the dataframe:
        # http://stackoverflow.com/questions/12356501/pandas-create-two-new-columns-in-a-dataframe-with-
        # values-calculated-from-a-pre?rq=1
        # tcd and year columns is equal to the first and second output from the function: value_to_tcd_year

        try:
            merged['tcd'], merged['year'] = list(
                zip(*merged["VALUE"].map(post_processing.value_to_tcd_year)))
        except KeyError:
            print(
                "oops, loss mosaic doesn't have the arithmetic function applied. Refer to readme file"
            )
            sys.exit()

        # the value_to_tcd_year function is good for when user runs all thresholds, but not just one.
        # so, overwrite the tcd column when it comes back
        if threshold != "all":
            merged['tcd'] = "> {}%".format(threshold)

        # get the input shapefile into df format
        final_aoi_dbf = self.final_aoi.replace(".shp", ".dbf")
        final_aoi_dbf = simpledbf.Dbf5(final_aoi_dbf)

        # convert dbf to pandas dataframe
        final_aoi_df = final_aoi_dbf.to_dataframe()

        # reset index of final_aoi_df
        final_aoi_df = final_aoi_df.reset_index()

        if user_def_column_name:
            merged = final_columns.user_cols(user_def_column_name,
                                             final_aoi_df, merged,
                                             analysis_names)

        else:
            columns_to_keep = ['ID', 'tcd', 'year']
            columns_to_keep.extend(analysis_names)

            merged = merged[columns_to_keep]

        print('SAMPLE OF OUTPUT:')
        print(merged.head(5))

        final_output_csv = os.path.join(self.root_dir, 'result',
                                        '{}.csv'.format(output_file_name))
        merged.to_csv(final_output_csv, index=False)
Exemple #15
0
    tables_dir = os.path.join(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'tables')

    z_stats_tbl = os.path.join(tables_dir, 'output_{}.dbf'.format(i))

    start_time = datetime.datetime.now()

    print("running zstats")
    outzstats = ZonalStatisticsAsTable(zone, "VALUE", value, z_stats_tbl,
                                       "DATA", "SUM")

    end_time = datetime.datetime.now() - start_time
    print("debug:time elapsed: {}".format(end_time))

    # convert the output zstats table into a pandas DF
    dbf = simpledbf.Dbf5(z_stats_tbl)
    df = dbf.to_dataframe()

    # populate a new field "id" with the FID and analysis with the sum
    df['ID'] = i
    df[analysis] = df['SUM']

    # sometimes this value came back as an object, so here we are fixing that bug
    df.VALUE = df.VALUE.astype(int)

    # name of the sql database to store the sql table
    zstats_results_db = os.path.join(tables_dir, 'zstats_results_db.db')

    # create a connection to the sql database
    conn = sqlite3.connect(zstats_results_db)
    transit_seatcap_df = pd.read_csv(SEATCAP_FILE)
    transit_seatcap_df.columns = transit_seatcap_df.columns.str.replace(
        '%', 'pct')
    transit_seatcap_df.rename(columns={
        "VEHTYPE": "veh_type_updated",
        "100pctCapacity": "standcap"
    },
                              inplace=True)
    logging.info("Read {}\n{}".format(SEATCAP_FILE, transit_seatcap_df.head()))

    # read the transit files
    all_trn_df = pd.DataFrame()
    for timeperiod in ['AM', 'EA', 'EV', 'MD', 'PM']:
        trn_file = os.path.join(trn_dir,
                                'trnlink{}_ALLMSA.dbf'.format(timeperiod))
        dbf = simpledbf.Dbf5(trn_file)
        trn_df = dbf.to_dataframe()
        trn_df["period"] = timeperiod
        logging.info("Read {} links from {}".format(len(trn_df), trn_file))
        # print(trn_df.head())
        all_trn_df = pd.concat([all_trn_df, trn_df])
    logging.info("Read {} total links".format(len(all_trn_df)))

    # drop columns we won't be updating/using so they don't cause confusion
    all_trn_df.drop(columns=[
        "AB_XITA", "AB_BRDB", "BA_VOL", "BA_BRDA", "BA_XITA", "BA_BRDB",
        "BA_XITB"
    ],
                    inplace=True)

    # sort by mode, line name, time period, sequence
Exemple #17
0
        print("Parameter {} not found.".format(par))
        continue
    dates = get_dates_from_filenames(outlist)
    print("Parameter: {}\nFound {} dates: {}".format(par, len(dates), dates))

    dflist = []
    for date in dates:
        infile = list(
            filter(lambda x: date in x,
                   outlist))  #python 3 requires conversion from filter to list
        if len(infile) == 1:  #make sure there is only one file for each date
            infile = infile[0]
        else:  #this condition should not occur. If necessary an exception can be added here
            print('Error: Multiple files found for date {}.'.format(date))
            break
        df = simpledbf.Dbf5(infile).to_dataframe()
        df['date'] = date

        if DATEFORM == 'YYYYMMDD':
            time = datetime(int(date[0:4]), int(date[4:6]), int(
                date[6:])) - DATEMIN
            df['time'] = time.days
        df = df.dropna(axis=1, how='all')  #remove empty columns
        df = df[TO_KEEP]
        dflist.append(df)
    out_df = pd.concat(dflist)
    out_df.columns = [c.replace(REP, par) for c in out_df.columns]
    dflist_all.append(out_df)

finaldf = reduce(lambda x, y: pd.merge(x, y, on=JOIN_ON, how='outer'),
                 dflist_all)
Exemple #18
0
#!/usr/bin/env python
#thanks to:
#https://gist.github.com/bertspaan/8220892

import simpledbf as sdbf
import os
import sys

filename = sys.argv[1]
print(filename)
if filename.endswith('.dbf') or filename.endswith('.DBF'):
    print("Converting %s to csv" % filename)
    csv_fn = filename[:-4] + ".csv"
    with open(csv_fn, 'wb') as csvfile:
        dbf = sdbf.Dbf5(filename)
        dbf.to_csv(csv_fn)
        print("Done...")
else:
    print("Filename does not end with .dbf")
            outPolygons = "BfQ_polygon.shp"
            arcpy.RasterToPolygon_conversion(outRas, outPolygons)

            # Set local variables
            inZoneData = outPolygons
            zoneField = "id"
            inClassData = outPolygons
            classField = "id"
            outTable = "BfQ_polygon_table.dbf"
            processingCellSize = 0.01

            # Execute TabulateArea
            TabulateArea(inZoneData, zoneField, inClassData, classField,
                         outTable, processingCellSize, "CLASSES_AS_ROWS")

            BfQ_area_dbf = simpledbf.Dbf5(env.workspace + '\\' + outTable)
            BfQ_partial_area = BfQ_area_dbf.to_dataframe()
            BfQ_area = np.sum(np.array(BfQ_partial_area['Area']))

            del BfQ_area_dbf
            del BfQ_partial_area
            #del BfQ_area

            arcpy.Delete_management(outPolygons)
            arcpy.Delete_management(outTable)

    # Reverse
    #Flow = Flow[::-1]
    #CalArea = CalArea[::-1]

    # Non-dimensionalization
    dflist = []

    for i, date in enumerate(dates):
        infol = list(
            filter(lambda x: date in x,
                   outlist))  #python 3 requires conversion from filter to list
        if len(infol) == 1:  #make sure there is only one file for each date
            infol = infol[0]
            print('Date {} OK!'.format(date))
        else:  #this condition should not occur. If necessary an exception can be added here
            print('Warning: Multiple or no files found for date {}. Skipping.'.
                  format(date))
            continue

        df = simpledbf.Dbf5(os.path.join(infol, POLFILE)).to_dataframe()
        df['date'] = date
        df['time'] = (dates_dt[i] - DATEMIN).days
        df['X'] = pd.to_numeric(df['X'])  #otherwise might be strings
        df['Y'] = pd.to_numeric(df['Y'])
        df = df.dropna(axis=1, how='all')  #remove empty columns
        dflist.append(df)

    out_df = pd.concat(dflist)  #vertical concatenation
    out_df = out_df[['X', 'Y', 'time', 'date'] + COLS]  #trim
    out_df.columns = [
        par + '_' + col if col in COLS else col for col in out_df.columns
    ]
    dflist_all.append(out_df)

finaldf = reduce(lambda x, y: pd.merge(x, y, on=JOIN_ON, how='outer'),
Exemple #21
0
        help=
        "Location of transit assignment files (trn_link_onoffs_[EA,AM,MD,PM,EV].dbf"
    )
    parser.add_argument("--byclass",
                        action="store_true",
                        help="Include user class outputs (VOL, ONA, OFFB)")
    args = parser.parse_args()

    all_linko_df = pandas.DataFrame()
    for time_period in TIME_PERIODS.keys():
        #   for testing:
        #    for time_period in ['EA']:

        linko_file = os.path.join(args.trn_dir,
                                  "trn_link_onoffs_{}.dbf".format(time_period))
        linko_dbf = simpledbf.Dbf5(linko_file)
        linko_df = linko_dbf.to_dataframe()

        print("Read {} lines from {}".format(len(linko_df), linko_file))

        # for now, we want boardings so
        # filter down to just transit links (no access/egress)
        linko_df = linko_df.loc[linko_df.MODE < 900]
        # and those with positive ONA
        linko_df = linko_df.loc[linko_df.ONA > 0]
        print("Filtered to {} transit links with boardings".format(
            len(linko_df)))

        # drop columns starting with REV
        colnames = list(linko_df.columns)
        rev_colnames = [