Exemple #1
0
def add_extra_columns_func(df):
    for col in ["residential_price", "non_residential_rent"]:
        df[col] = 0

    if "deed_restricted_units" not in df.columns:
        df["deed_restricted_units"] = 0
    else:
        print "Number of deed restricted units built = %d" %\
            df.deed_restricted_units.sum()

    df["redfin_sale_year"] = 2012
    df["redfin_sale_price"] = np.nan

    if "residential_units" not in df:
        df["residential_units"] = 0

    if "parcel_size" not in df:
        df["parcel_size"] = \
            orca.get_table("parcels").parcel_size.loc[df.parcel_id]

    if orca.is_injectable("year") and "year_built" not in df:
        df["year_built"] = orca.get_injectable("year")

    if orca.is_injectable("form_to_btype_func") and \
            "building_type" not in df:
        form_to_btype_func = orca.get_injectable("form_to_btype_func")
        df["building_type"] = df.apply(form_to_btype_func, axis=1)

    return df
def add_extra_columns_func(df):
    for col in ["residential_price", "non_residential_rent"]:
        df[col] = 0

    if "deed_restricted_units" not in df.columns:
        df["deed_restricted_units"] = 0
    else:
        print "Number of deed restricted units built = %d" %\
            df.deed_restricted_units.sum()

    df["redfin_sale_year"] = 2012
    df["redfin_sale_price"] = np.nan

    if "residential_units" not in df:
        df["residential_units"] = 0

    if "parcel_size" not in df:
        df["parcel_size"] = \
            orca.get_table("parcels").parcel_size.loc[df.parcel_id]

    if orca.is_injectable("year") and "year_built" not in df:
        df["year_built"] = orca.get_injectable("year")

    if orca.is_injectable("form_to_btype_func") and \
            "building_type" not in df:
        form_to_btype_func = orca.get_injectable("form_to_btype_func")
        df["building_type"] = df.apply(form_to_btype_func, axis=1)

    return df
Exemple #3
0
    def get_dynamic_filepath(self):
        """
        Substitute run id, model iteration, and/or timestamp into the filename. 
        
        For the run id and model iteration, we look for Orca injectables named ``run_id`` 
        and ``iter_var``, respectively. If none is found, we use ``0``.
        
        The timestamp is UTC, formatted as ``YYYYMMDD-HHMMSS``.
        
        Returns
        -------
        str
        
        """
        if self.path is None:
            raise ValueError("Please provide a file path")

        run = 0
        if orca.is_injectable('run_id'):
            run = orca.get_injectable('run_id')

        iter = 0
        if orca.is_injectable('iter_var'):
            iter = orca.get_injectable('iter_var')

        ts = datetime.datetime.utcnow().strftime('%Y%m%d-%H%M%S')

        s = self.path
        s = s.replace('%RUN%', str(run))
        s = s.replace('%ITER%', str(iter))
        s = s.replace('%TS%', ts)

        return s
Exemple #4
0
def assert_injectable_is_registered(injectable_name):
    """
    """
    if not orca.is_injectable(injectable_name):
        msg = "Injectable '%s' is not registered" % injectable_name
        raise OrcaAssertionError(msg)
    return
Exemple #5
0
def large_area_id(jobs, buildings):
    job_la = "jobs_large_area_lookup"
    if (not orca.is_injectable(job_la)) or (len(orca.get_injectable(job_la)) == 0):
        orca.add_injectable(job_la,
                            misc.reindex(buildings.large_area_id, jobs.building_id),
                            autocall=False, cache=True)
    return orca.get_injectable(job_la).loc[jobs.index]
Exemple #6
0
def open_pipeline_store(overwrite=False):
    """
    Open the pipeline checkpoint store and add an orca injectable to access it

    Parameters
    ----------
    overwrite : bool
        delete file before opening (unless resuming)
    """

    if orca.is_injectable('pipeline_store'):
        raise RuntimeError("Pipeline store is already open!")

    pipeline_file_path = orca.get_injectable('pipeline_path')

    if overwrite:
        try:
            if os.path.isfile(pipeline_file_path):
                logger.debug("removing pipeline store: %s" %
                             pipeline_file_path)
                os.unlink(pipeline_file_path)
        except Exception as e:
            print(e)
            logger.warn("Error removing %s: %s" % (e, ))

    store = pd.HDFStore(pipeline_file_path, mode='a')

    orca.add_injectable('pipeline_store', store)

    logger.debug("opened pipeline_store")
Exemple #7
0
def assert_injectable_not_registered(injectable_name):
    """
    """
    if orca.is_injectable(injectable_name):
        msg = "Injectable '%s' is already registered" % injectable_name
        raise OrcaAssertionError(msg)
    return
Exemple #8
0
def get_pipeline_store():
    """
    Return the open pipeline hdf5 checkpoint store or return False if it not been opened
    """
    if orca.is_injectable('pipeline_store'):
        return orca.get_injectable('pipeline_store')
    else:
        return None
Exemple #9
0
def preload_injectables():

    t0 = print_elapsed_time()

    # load skim_stack
    if orca.is_injectable('preload_injectables'):
        orca.get_injectable('preload_injectables')

    t0 = print_elapsed_time("preload_injectables", t0)
Exemple #10
0
def add_extra_columns_func(df):
    df['source'] = 'developer_model'

    for col in ["residential_price", "non_residential_rent"]:
        df[col] = 0

    if "deed_restricted_units" not in df.columns:
        df["deed_restricted_units"] = 0
    else:
        print("Number of deed restricted units built = %d" %
              df.deed_restricted_units.sum())
    df["preserved_units"] = 0.0

    if "inclusionary_units" not in df.columns:
        df["inclusionary_units"] = 0
    else:
        print("Number of inclusionary units built = %d" %
              df.inclusionary_units.sum())

    if "subsidized_units" not in df.columns:
        df["subsidized_units"] = 0
    else:
        print("Number of subsidized units built = %d" %
              df.subsidized_units.sum())

    df["redfin_sale_year"] = 2012
    df["redfin_sale_price"] = np.nan

    if "residential_units" not in df:
        df["residential_units"] = 0

    if "parcel_size" not in df:
        df["parcel_size"] = \
            orca.get_table("parcels").parcel_size.loc[df.parcel_id]

    if orca.is_injectable("year") and "year_built" not in df:
        df["year_built"] = orca.get_injectable("year")

    if orca.is_injectable("form_to_btype_func") and \
            "building_type" not in df:
        form_to_btype_func = orca.get_injectable("form_to_btype_func")
        df["building_type"] = df.apply(form_to_btype_func, axis=1)

    return df
Exemple #11
0
def large_area_id(households, buildings):
    hh_la = "households_large_area_lookup"
    if (not orca.is_injectable(hh_la)) or (len(orca.get_injectable(hh_la))
                                           == 0):
        orca.add_injectable(hh_la,
                            misc.reindex(buildings.large_area_id,
                                         households.building_id),
                            autocall=False,
                            cache=True)
    return orca.get_injectable(hh_la).loc[households.index]
Exemple #12
0
def add_extra_columns_func(df):
    for col in ["residential_price", "non_residential_rent"]:
        if col not in df.columns:
            df[col] = 0
        else:
            df[col] = df[col].fillna(0)

    if "deed_restricted_units" not in df.columns:
        df["deed_restricted_units"] = 0
    else:
        print "Number of deed restricted units built = %d" %\
            df.deed_restricted_units.sum()

    df["redfin_sale_year"] = 2012
    df["redfin_sale_price"] = np.nan

    if "residential_units" not in df.columns:
        df["residential_units"] = 0
    else:
        df["residential_units"] = df["residential_units"].fillna(0)

    # we're keeping sqft per unit in the buildings table but we need
    # to make sure we get a comparable column out of the feasibility
    # table which is what generates new buildings. ave_unit_size is
    # the closest thing, even though its calculated at the parcel level
    # rather than the building level
    if 'sqft_per_unit' not in df.columns:
        df['sqft_per_unit'] = df['ave_unit_size']

    if "parcel_size" not in df:
        df["parcel_size"] = \
            orca.get_table("parcels").parcel_size.loc[df.parcel_id]

    if orca.is_injectable("year") and "year_built" not in df:
        df["year_built"] = orca.get_injectable("year")

    if orca.is_injectable("form_to_btype_func") and \
            "building_type" not in df:
        form_to_btype_func = orca.get_injectable("form_to_btype_func")
        df["building_type"] = df.apply(form_to_btype_func, axis=1)

    return df
Exemple #13
0
def start_pipeline(resume_after=None):
    """
    Start pipeline, either for a new run or, if resume_after, loading checkpoint from pipeline.

    If resume_after, then we expect the pipeline hdf5 file to exist and contain
    checkpoints from a previous run, including a checkpoint with name specified in resume_after

    Parameters
    ----------
    resume_after : str or None
        name of checkpoint to load from pipeline store
    """

    logger.info("start_pipeline...")

    t0 = print_elapsed_time()

    # preload skim_dict
    if orca.is_injectable('skim_dict'):
        orca.get_injectable('skim_dict')
        t0 = print_elapsed_time("load skim_dict", t0)

    # load skim_stack
    if orca.is_injectable('skim_stack'):
        orca.get_injectable('skim_stack')
        t0 = print_elapsed_time("load skim_stack", t0)

    if resume_after:
        # open existing pipeline
        open_pipeline_store(overwrite=False)
        load_checkpoint(resume_after)
        t0 = print_elapsed_time("load_checkpoint '%s'" % resume_after, t0)
    else:
        # open new, empty pipeline
        open_pipeline_store(overwrite=True)
        add_checkpoint(_INITIAL_CHECKPOINT_NAME)
        t0 = print_elapsed_time(
            "add_checkpoint '%s'" % _INITIAL_CHECKPOINT_NAME, t0)

    logger.debug("start_pipeline complete")
Exemple #14
0
def run(models, resume_after=None):
    """
    run the specified list of models, optionally loading checkpoint and resuming after specified
    checkpoint.

    Since we use model_name as checkpoint name, the same model may not be run more than once.

    If resume_after checkpoint is specified and a model with that name appears in the models list,
    then we only run the models after that point in the list. This allows the user always to pass
    the same list of models, but specify a resume_after point if desired.

    Parameters
    ----------
    models : [str]
        list of model_names
    resume_after : str or None
        model_name of checkpoint to load checkpoint and AFTER WHICH to resume model run
    """

    if resume_after and resume_after in models:
        models = models[models.index(resume_after) + 1:]

    t0 = print_elapsed_time()

    open_pipeline(resume_after)
    t0 = print_elapsed_time('open_pipeline', t0)

    # preload any bulky injectables (e.g. skims) not in pipeline
    if orca.is_injectable('preload_injectables'):
        orca.get_injectable('preload_injectables')
        t0 = print_elapsed_time('preload_injectables', t0)

    t0 = print_elapsed_time()
    for model in models:
        t1 = print_elapsed_time()
        run_model(model)
        t1 = print_elapsed_time("run_model %s)" % model, t1)

        logger.debug('#mem after %s, %s' % (model, memory_info()))

    t0 = print_elapsed_time("run (%s models)" % len(models), t0)
Exemple #15
0
def get_injectable(name, default=None):

    if orca.is_injectable(name):
        return orca.get_injectable(name)
    else:
        return default
Exemple #16
0
def is_injectable(name):

    return orca.is_injectable(name)
Exemple #17
0
def add_extra_columns_func(df):
    if 'parcel_id' not in df.columns:
        df["parcel_id"] = df["PARCEL_ID"]
    for col in ["residential_price", "non_residential_rent"]:
        df[col] = 0

    if "deed_restricted_units" not in df.columns:
        df["deed_restricted_units"] = 0
    else:
        print "Number of deed restricted units built = %d" %\
            df.deed_restricted_units.sum()

    df["redfin_sale_year"] = 2012
    df["redfin_sale_price"] = np.nan

    if "residential_units" not in df:
        df["residential_units"] = 0

    if "parcel_size" not in df:
        df["parcel_size"] = \
            orca.get_table("parcels").parcel_size.loc[df.parcel_id]

    if orca.is_injectable("year") and "year_built" not in df:
        df["year_built"] = orca.get_injectable("year")

    if orca.is_injectable("form_to_btype_func") and \
            "building_type" not in df:
        form_to_btype_func = orca.get_injectable("form_to_btype_func")
        df["building_type"] = df.apply(form_to_btype_func, axis=1)
    
    if "GEOM_ID" not in df.columns and "geom_id" in df.columns:
        df["GEOM_ID"] = df["geom_id"]
    if "bld_year" not in df.columns and "year_built" in df.columns:
        df["bld_year"] = df["year_built"]
    if "bldg_sqft" not in df.columns and "building_sqft" in df.columns:
        df["bldg_sqft"] = df["building_sqft"]
    if "bldgt_id" not in df.columns and "building_type" in df.columns:
        df["bldgt_id"] = df["building_type"]
    if "nres_r_ft" not in df.columns and "non_residential_rent" in df.columns:
        df["nres_r_ft"] = df["non_residential_rent"]
    if "nres_sqft" not in df.columns and "non_residential_sqft" in df.columns:
        df["nres_sqft"] = df["non_residential_sqft"]
    if "redf_year" not in df.columns and "redfin_sale_year" in df.columns:
        df["redf_year"] = df["redfin_sale_year"]
    if "res_sqft" not in df.columns and "residential_sqft" in df.columns:
        df["res_sqft"] = df["residential_sqft"]
    if "res_units" not in df.columns and "residential_units" in df.columns:
        df["res_units"] = df["residential_units"]
    if "sale_price" not in df.columns and "building_purchase_price" in df.columns:
        df["sale_price"] = df["building_purchase_price"]
    if "parcel_id" not in df.columns and "PARCEL_ID" in df.columns:
        df['parcel_id'] = df["PARCEL_ID"]
    df = pd.merge(df, orca.get_table('parcels').to_frame(['APN', 'parcel_id']), 
                   on='parcel_id', right_index=True, how='left')
    
    # This holds the column names that devs is missing from buildings, plus
    # APN, since it needs something to join on. I'm not sure, but I think it's 
    # possible for more than one building to have the same APN. If that ever 
    # happens, a new column will need to be chosen to merge devs to buildings
    missing = ['APN']
    for column in orca.get_table('buildings').columns:
        if column not in df.columns:
            missing.append(column)

    df = pd.merge(df.reset_index(), orca.get_table('buildings').to_frame(missing), 
                  on='APN', how='left').set_index('index')


    return df
Exemple #18
0
 def wrapper(**kwargs):
     name = kwargs["inj_name"]
     if not orca.is_injectable(name):
         abort(404)
     return func(**kwargs)
Exemple #19
0
 def wrapper(**kwargs):
     name = kwargs['inj_name']
     if not orca.is_injectable(name):
         abort(404)
     return func(**kwargs)
Exemple #20
0
def get_injectable(name, default=_NO_DEFAULT):

    if orca.is_injectable(name) or default == _NO_DEFAULT:
        return orca.get_injectable(name)
    else:
        return default