def interaction_simulate_data( name="non_mandatory_tour_frequency", edb_directory="output/estimation_data_bundle/{name}/", settings_file="{name}_model_settings.yaml", spec_file="{name}_SPEC.csv", alt_def_file="{name}_alternatives.csv", coefficients_files="{segment_name}/{name}_coefficients_{segment_name}.csv", chooser_data_files="{segment_name}/{name}_choosers_combined.csv", alt_values_files="{segment_name}/{name}_interaction_expression_values.csv", ): edb_directory = edb_directory.format(name=name) def _read_csv(filename, **kwargs): filename = filename.format(name=name) return pd.read_csv(os.path.join(edb_directory, filename), **kwargs) settings_file = settings_file.format(name=name) with open(os.path.join(edb_directory, settings_file), "r") as yf: settings = yaml.load( yf, Loader=yaml.SafeLoader, ) coefficients = {} chooser_data = {} alt_values = {} segment_names = [s['NAME'] for s in settings['SPEC_SEGMENTS']] for segment_name in segment_names: coefficients[segment_name] = _read_csv( coefficients_files.format(name=name, segment_name=segment_name), index_col='coefficient_name', ) chooser_data[segment_name] = _read_csv( chooser_data_files.format(name=name, segment_name=segment_name), ) alt_values[segment_name] = _read_csv( alt_values_files.format(name=name, segment_name=segment_name), ) spec = _read_csv(spec_file, ) spec = remove_apostrophes(spec, ['Label']) # alt_names = list(spec.columns[3:]) # alt_codes = np.arange(1, len(alt_names) + 1) # alt_names_to_codes = dict(zip(alt_names, alt_codes)) # alt_codes_to_names = dict(zip(alt_codes, alt_names)) alt_def = _read_csv(alt_def_file.format(name=name), index_col=0) return Dict( edb_directory=Path(edb_directory), settings=settings, chooser_data=chooser_data, coefficients=coefficients, alt_values=alt_values, spec=spec, alt_def=alt_def, )
def generate_alternatives(n_persons): """ Generate a dictionary of CDAP alternatives. The keys are the names of the patterns, and the values are the alternative code numbers. Parameters ---------- n_persons : int Returns ------- Dict """ basic_patterns = ["M", "N", "H"] alt_names = list( "".join(i) for i in itertools.product(basic_patterns, repeat=n_persons)) alt_codes = np.arange(1, len(alt_names) + 1) return Dict(zip(alt_names, alt_codes))
def stop_frequency_data( edb_directory="output/estimation_data_bundle/{name}/", settings_file="{name}_model_settings.yaml", chooser_data_file="{name}_values_combined.csv", values_index_col="tour_id", ): name = 'stop_frequency' edb_directory = edb_directory.format(name=name) settings_file = settings_file.format(name=name) with open(os.path.join(edb_directory, settings_file), "r") as yf: settings = yaml.load( yf, Loader=yaml.SafeLoader, ) segments = [i['primary_purpose'] for i in settings['SPEC_SEGMENTS']] master_coef = {} prior_segs = [] coef_map = {seg: {} for seg in segments} segment_coef = {} for seg_ in settings["SPEC_SEGMENTS"]: seg_purpose = seg_['primary_purpose'] seg_subdir = Path(os.path.join(edb_directory, seg_purpose)) segment_coef[seg_['primary_purpose']] = pd.read_csv( seg_subdir / seg_['COEFFICIENTS'], index_col="coefficient_name", ) for seg in segments: for cname, value in segment_coef[seg].value.items(): if cname in master_coef: if master_coef[cname] == value: coef_map[seg][cname] = cname else: for pseg in prior_segs: if master_coef.get(f"{cname}_{pseg}", None) == value: coef_map[seg][cname] = f"{cname}_{pseg}" break else: # no break master_coef[f"{cname}_{seg}"] = value coef_map[seg][cname] = f"{cname}_{seg}" else: master_coef[cname] = value coef_map[seg][cname] = cname prior_segs.append(seg) # rewrite revised spec files with common segment_coef names for seg in segments: seg_subdir = Path(os.path.join(edb_directory, seg)) with open(seg_subdir / f"stop_frequency_SPEC.csv", 'rt') as f: spec = f.read() for kcoef, v in coef_map[seg].items(): spec = spec.replace(kcoef, v) with open(seg_subdir / f"stop_frequency_SPEC_.csv", 'wt') as f: f.write(spec) master_coef_df = pd.DataFrame(data=master_coef, index=['value']).T master_coef_df.index.name = "coefficient_name" seg_coefficients = [] seg_spec = [] seg_alt_names = [] seg_alt_codes = [] seg_alt_names_to_codes = [] seg_alt_codes_to_names = [] seg_chooser_data = [] for seg in settings["SPEC_SEGMENTS"]: seg_purpose = seg['primary_purpose'] seg_subdir = Path(os.path.join(edb_directory, seg_purpose)) coeffs_ = pd.read_csv(seg_subdir / seg['COEFFICIENTS'], index_col="coefficient_name") coeffs_.index = pd.Index([f"{i}_{seg_purpose}" for i in coeffs_.index], name="coefficient_name") seg_coefficients.append(coeffs_) spec = pd.read_csv(seg_subdir / "stop_frequency_SPEC_.csv") spec = remove_apostrophes(spec, ["Label"]) # spec.iloc[:, 3:] = spec.iloc[:, 3:].applymap(lambda x: f"{x}_{seg_purpose}" if not pd.isna(x) else x) seg_spec.append(spec) alt_names = list(spec.columns[3:]) alt_codes = np.arange(1, len(alt_names) + 1) alt_names_to_codes = dict(zip(alt_names, alt_codes)) alt_codes_to_names = dict(zip(alt_codes, alt_names)) seg_alt_names.append(alt_names) seg_alt_codes.append(alt_codes) seg_alt_names_to_codes.append(alt_names_to_codes) seg_alt_codes_to_names.append(alt_codes_to_names) chooser_data = pd.read_csv( seg_subdir / chooser_data_file.format(name=name), index_col=values_index_col, ) seg_chooser_data.append(chooser_data) return Dict( edb_directory=Path(edb_directory), settings=settings, chooser_data=seg_chooser_data, coefficients=master_coef_df, spec=seg_spec, alt_names=seg_alt_names, alt_codes=seg_alt_codes, alt_names_to_codes=seg_alt_names_to_codes, alt_codes_to_names=seg_alt_codes_to_names, segments=segments, coefficient_map=coef_map, segment_coefficients=segment_coef, )
def location_choice_model( name="workplace_location", edb_directory="output/estimation_data_bundle/{name}/", coefficients_file="{name}_coefficients.csv", spec_file="{name}_SPEC.csv", size_spec_file="{name}_size_terms.csv", alt_values_file="{name}_alternatives_combined.csv", chooser_file="{name}_choosers_combined.csv", settings_file="{name}_model_settings.yaml", landuse_file="{name}_landuse.csv", return_data=False, ): model_selector = name.replace("_location", "") model_selector = model_selector.replace("_destination", "") model_selector = model_selector.replace("_subtour", "") model_selector = model_selector.replace("_tour", "") if model_selector == 'joint': model_selector = 'non_mandatory' edb_directory = edb_directory.format(name=name) def _read_csv(filename, **kwargs): filename = filename.format(name=name) return pd.read_csv(os.path.join(edb_directory, filename), **kwargs) coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) spec = _read_csv(spec_file, comment="#") alt_values = _read_csv(alt_values_file) chooser_data = _read_csv(chooser_file) landuse = _read_csv(landuse_file, index_col="zone_id") master_size_spec = _read_csv(size_spec_file) # remove temp rows from spec, ASim uses them to calculate the other values written # to the EDB, but they are not actually part of the utility function themselves. spec = spec.loc[~spec.Expression.isna()] spec = spec.loc[~spec.Expression.str.startswith("_")].copy() settings_file = settings_file.format(name=name) with open(os.path.join(edb_directory, settings_file), "r") as yf: settings = yaml.load( yf, Loader=yaml.SafeLoader, ) include_settings = settings.get("include_settings") if include_settings: include_settings = os.path.join(edb_directory, include_settings) if include_settings and os.path.exists(include_settings): with open(include_settings, "r") as yf: more_settings = yaml.load( yf, Loader=yaml.SafeLoader, ) settings.update(more_settings) CHOOSER_SEGMENT_COLUMN_NAME = settings.get("CHOOSER_SEGMENT_COLUMN_NAME") SEGMENT_IDS = settings.get("SEGMENT_IDS") if SEGMENT_IDS is None: SEGMENTS = settings.get("SEGMENTS") if SEGMENTS is not None: SEGMENT_IDS = {i: i for i in SEGMENTS} SIZE_TERM_SELECTOR = settings.get('SIZE_TERM_SELECTOR', model_selector) # filter size spec for this location choice only size_spec = (master_size_spec.query( f"model_selector == '{SIZE_TERM_SELECTOR}'").drop( columns="model_selector").set_index("segment")) size_spec = size_spec.loc[:, size_spec.max() > 0] size_coef = size_coefficients_from_spec(size_spec) indexes_to_drop = [ "util_size_variable", # pre-computed size (will be re-estimated) "util_size_variable_atwork", # pre-computed size (will be re-estimated) "util_utility_adjustment", # shadow pricing (ignored in estimation) "@df['size_term'].apply(np.log1p)", # pre-computed size (will be re-estimated) ] if 'Label' in spec.columns: indexes_to_drop = [ i for i in indexes_to_drop if i in spec.Label.to_numpy() ] label_column_name = 'Label' elif 'Expression' in spec.columns: indexes_to_drop = [ i for i in indexes_to_drop if i in spec.Expression.to_numpy() ] label_column_name = 'Expression' else: raise ValueError("cannot find Label or Expression in spec file") expression_labels = None if label_column_name == 'Expression': expression_labels = { expr: f"variable_label{n:04d}" for n, expr in enumerate(spec.Expression.to_numpy()) } # Remove shadow pricing and pre-existing size expression for re-estimation spec = (spec.set_index(label_column_name).drop( index=indexes_to_drop).reset_index()) if label_column_name == 'Expression': spec.insert(0, "Label", spec['Expression'].map(expression_labels)) alt_values['variable'] = alt_values['variable'].map(expression_labels) label_column_name = "Label" if name == 'trip_destination': CHOOSER_SEGMENT_COLUMN_NAME = 'primary_purpose' primary_purposes = spec.columns[3:] SEGMENT_IDS = {pp: pp for pp in primary_purposes} chooser_index_name = chooser_data.columns[0] x_co = chooser_data.set_index(chooser_index_name) x_ca = cv_to_ca( alt_values.set_index([chooser_index_name, alt_values.columns[1]])) if CHOOSER_SEGMENT_COLUMN_NAME is not None: # label segments with names SEGMENT_IDS_REVERSE = {v: k for k, v in SEGMENT_IDS.items()} x_co["_segment_label"] = x_co[CHOOSER_SEGMENT_COLUMN_NAME].apply( lambda x: SEGMENT_IDS_REVERSE[x]) else: x_co["_segment_label"] = size_spec.index[0] # compute total size values by segment for segment in size_spec.index: total_size_segment = pd.Series(0, index=landuse.index) x_co["total_size_" + segment] = 0 for land_use_field in size_spec.loc[segment].index: total_size_segment += (landuse[land_use_field] * size_spec.loc[segment, land_use_field]) x_co["total_size_" + segment] = total_size_segment.loc[ x_co["override_choice"]].to_numpy() # for each chooser, collate the appropriate total size value x_co["total_size_segment"] = 0 for segment in size_spec.index: labels = "total_size_" + segment rows = x_co["_segment_label"] == segment x_co.loc[rows, "total_size_segment"] = x_co[labels][rows] # Remove choosers with invalid observed choice (appropriate total size value = 0) valid_observed_zone = x_co["total_size_segment"] > 0 x_co = x_co[valid_observed_zone] x_ca = x_ca[x_ca.index.get_level_values(chooser_index_name).isin( x_co.index)] # Merge land use characteristics into CA data try: x_ca_1 = pd.merge(x_ca, landuse, on="zone_id", how="left") except KeyError: # Missing the zone_id variable? # Use the alternative id's instead, which assumes no sampling of alternatives x_ca_1 = pd.merge(x_ca, landuse, left_on=x_ca.index.get_level_values(1), right_index=True, how="left") x_ca_1.index = x_ca.index # Availability of choice zones if "util_no_attractions" in x_ca_1: av = x_ca_1["util_no_attractions"].apply( lambda x: False if x == 1 else True).astype(np.int8) elif "@df['size_term']==0" in x_ca_1: av = x_ca_1["@df['size_term']==0"].apply( lambda x: False if x == 1 else True).astype(np.int8) else: av = 1 d = DataFrames(co=x_co, ca=x_ca_1, av=av) m = Model(dataservice=d) if len(spec.columns) == 4 and all( spec.columns == ['Label', 'Description', 'Expression', 'coefficient']): m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) elif len(spec.columns) == 4 \ and all(spec.columns[:3] == ['Label', 'Description', 'Expression']) \ and len(SEGMENT_IDS) == 1 \ and spec.columns[3] == list(SEGMENT_IDS.values())[0]: m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) else: m.utility_ca = linear_utility_from_spec( spec, x_col=label_column_name, p_col=SEGMENT_IDS, ignore_x=("local_dist", ), segment_id=CHOOSER_SEGMENT_COLUMN_NAME, ) if CHOOSER_SEGMENT_COLUMN_NAME is None: assert len(size_spec) == 1 m.quantity_ca = sum( P(f"{i}_{q}") * X(q) for i in size_spec.index for q in size_spec.columns if size_spec.loc[i, q] != 0) else: m.quantity_ca = sum( P(f"{i}_{q}") * X(q) * X(f"{CHOOSER_SEGMENT_COLUMN_NAME}=={str_repr(SEGMENT_IDS[i])}") for i in size_spec.index for q in size_spec.columns if size_spec.loc[i, q] != 0) apply_coefficients(coefficients, m) apply_coefficients(size_coef, m, minimum=-6, maximum=6) m.choice_co_code = "override_choice" if return_data: return ( m, Dict( edb_directory=Path(edb_directory), alt_values=alt_values, chooser_data=chooser_data, coefficients=coefficients, landuse=landuse, spec=spec, size_spec=size_spec, master_size_spec=master_size_spec, model_selector=model_selector, settings=settings, ), ) return m
def cdap_data( name="cdap", edb_directory="output/estimation_data_bundle/{name}/", coefficients_file="{name}_coefficients.csv", interaction_coeffs_file="{name}_interaction_coefficients.csv", households_file="../../final_households.csv", persons_file="../../final_persons.csv", spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv", settings_file="{name}_model_settings.yaml", chooser_data_file="{name}_values_combined.csv", ): edb_directory = edb_directory.format(name=name) if not os.path.exists(edb_directory): raise FileNotFoundError(edb_directory) def read_csv(filename, **kwargs): filename = filename.format(name=name) return pd.read_csv(os.path.join(edb_directory, filename), **kwargs) def read_yaml(filename, **kwargs): filename = filename.format(name=name) with open(os.path.join(edb_directory, filename), 'rt') as f: return yaml.load(f, Loader=yaml.SafeLoader, **kwargs) settings = read_yaml(settings_file) try: hhs = read_csv(households_file) except FileNotFoundError: hhs = pd.read_csv(households_file) try: persons = read_csv(persons_file) except FileNotFoundError: persons = pd.read_csv(persons_file) person_rank = asim_cdap.assign_cdap_rank(persons) coefficients = read_csv( coefficients_file, index_col='coefficient_name', comment="#", ) interaction_coef = read_csv( interaction_coeffs_file, dtype={ 'interaction_ptypes': str, }, keep_default_na=False, comment="#", ) spec1 = read_csv(spec1_file, comment='#') values = read_csv(chooser_data_file, comment='#') values['cdap_rank'] = person_rank return Dict( edb_directory=Path(edb_directory), person_data=values, spec1=spec1, interaction_coef=interaction_coef, coefficients=coefficients, households=hhs, settings=settings, )
def schedule_choice_model( name, edb_directory="output/estimation_data_bundle/{name}/", coefficients_file="{name}_coefficients.csv", spec_file="{name}_SPEC.csv", alt_values_file="{name}_alternatives_combined.csv", chooser_file="{name}_choosers_combined.csv", settings_file="{name}_model_settings.yaml", return_data=False, ): model_selector = name.replace("_location", "") model_selector = model_selector.replace("_destination", "") model_selector = model_selector.replace("_subtour", "") model_selector = model_selector.replace("_tour", "") edb_directory = edb_directory.format(name=name) def _read_csv(filename, optional=False, **kwargs): filename = filename.format(name=name) try: return pd.read_csv(os.path.join(edb_directory, filename), **kwargs) except FileNotFoundError: if optional: return None else: raise settings_file = settings_file.format(name=name) with open(os.path.join(edb_directory, settings_file), "r") as yf: settings = yaml.load( yf, Loader=yaml.SafeLoader, ) try: coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) except FileNotFoundError: # possibly mis-named file is shown in settings coefficients_file = settings.get('COEFFICIENTS', coefficients_file) coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) spec = _read_csv(spec_file, comment='#') alt_values = _read_csv(alt_values_file) chooser_data = _read_csv(chooser_file) # remove temp rows from spec, ASim uses them to calculate the other values written # to the EDB, but they are not actually part of the utility function themselves. spec = spec.loc[~spec.Expression.str.startswith("_")].copy() include_settings = settings.get("include_settings") if include_settings: with open(os.path.join(edb_directory, include_settings), "r") as yf: more_settings = yaml.load( yf, Loader=yaml.SafeLoader, ) settings.update(more_settings) CHOOSER_SEGMENT_COLUMN_NAME = settings.get("CHOOSER_SEGMENT_COLUMN_NAME") SEGMENT_IDS = settings.get("SEGMENT_IDS") if SEGMENT_IDS is None: SEGMENTS = settings.get("SEGMENTS") if SEGMENTS is not None: SEGMENT_IDS = {i: i for i in SEGMENTS} if 'Label' in spec.columns: label_column_name = 'Label' elif 'Expression' in spec.columns: label_column_name = 'Expression' else: raise ValueError("cannot find Label or Expression in spec file") m = Model() if len(spec.columns) == 4 and ([c.lower() for c in spec.columns] == [ 'label', 'description', 'expression', 'coefficient' ]): m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) elif len(spec.columns) == 4 \ and all(spec.columns[:3] == ['Label', 'Description', 'Expression']) \ and len(SEGMENT_IDS) == 1 \ and spec.columns[3] == list(SEGMENT_IDS.values())[0]: m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) else: m.utility_ca = linear_utility_from_spec( spec, x_col=label_column_name, p_col=SEGMENT_IDS, ignore_x=("local_dist", ), segment_id=CHOOSER_SEGMENT_COLUMN_NAME, ) apply_coefficients(coefficients, m, minimum=-25, maximum=25) chooser_index_name = chooser_data.columns[0] x_co = chooser_data.set_index(chooser_index_name) alt_values.fillna(0, inplace=True) x_ca = cv_to_ca( alt_values.set_index([chooser_index_name, alt_values.columns[1]]), required_labels=spec[label_column_name], ) # if CHOOSER_SEGMENT_COLUMN_NAME is not None: # # label segments with names # SEGMENT_IDS_REVERSE = {v: k for k, v in SEGMENT_IDS.items()} # x_co["_segment_label"] = x_co[CHOOSER_SEGMENT_COLUMN_NAME].apply( # lambda x: SEGMENT_IDS_REVERSE[x] # ) # else: # x_co["_segment_label"] = size_spec.index[0] alt_codes = np.arange(len(x_ca.index.levels[1])) + 1 x_ca.index = x_ca.index.set_levels(alt_codes, 1) x_co["override_choice_plus1"] = x_co["override_choice"] + 1 x_co["model_choice_plus1"] = x_co["model_choice"] + 1 unavail_coefs = coefficients.query( "(constrain == 'T') & (value < -900)").index unavail_data = [i.data for i in m.utility_ca if i.param in unavail_coefs] if len(unavail_data): joint_unavail = "|".join(f"({i}>0)" for i in unavail_data) joint_avail = f"~({joint_unavail})" else: joint_avail = 1 d = DataFrames(co=x_co, ca=x_ca, av=joint_avail) m.dataservice = d m.choice_co_code = "override_choice_plus1" # m.choice_co_code = "model_choice_plus1" if return_data: return ( m, Dict( edb_directory=Path(edb_directory), alt_values=alt_values, chooser_data=chooser_data, coefficients=coefficients, spec=spec, model_selector=model_selector, joint_avail=joint_avail, ), ) return m
def mode_choice_model( name, edb_directory="output/estimation_data_bundle/{name}/", return_data=False, override_filenames=None, ): if override_filenames is None: override_filenames = {} data = simple_simulate_data( name=name, edb_directory=edb_directory, **override_filenames, ) coefficients = data.coefficients coef_template = data.coef_template spec = data.spec chooser_data = data.chooser_data settings = data.settings chooser_data = clean_values( chooser_data, alt_names_to_codes=data.alt_names_to_codes, choice_code="override_choice_code", ) tree = construct_nesting_tree(data.alt_names, settings["NESTS"]) purposes = list(coef_template.columns) if "atwork" in name: purposes = ['atwork'] elif 'atwork' in purposes: purposes.remove('atwork') # Setup purpose specific models m = {purpose: Model(graph=tree, title=purpose) for purpose in purposes} for alt_code, alt_name in tree.elemental_names().items(): # Read in base utility function for this alt_name u = linear_utility_from_spec( spec, x_col="Label", p_col=alt_name, ignore_x=("#", ), ) for purpose in purposes: # Modify utility function based on template for purpose u_purp = sum((P(coef_template[purpose].get(i.param, i.param)) * i.data * i.scale) for i in u) m[purpose].utility_co[alt_code] = u_purp for model in m.values(): explicit_value_parameters(model) apply_coefficients(coefficients, m) avail = construct_availability(m[purposes[0]], chooser_data, data.alt_codes_to_names) d = DataFrames( co=chooser_data, av=avail, alt_codes=data.alt_codes, alt_names=data.alt_names, ) if 'atwork' not in name: for purpose, model in m.items(): model.dataservice = d.selector_co(f"tour_type=='{purpose}'") model.choice_co_code = "override_choice_code" else: for purpose, model in m.items(): model.dataservice = d model.choice_co_code = "override_choice_code" from larch.model.model_group import ModelGroup mg = ModelGroup(m.values()) if return_data: return ( mg, Dict( edb_directory=Path(edb_directory), chooser_data=chooser_data, avail=avail, coefficients=coefficients, coef_template=coef_template, spec=spec, settings=settings, ), ) return mg
def simple_simulate_data( name="tour_mode_choice", edb_directory="output/estimation_data_bundle/{name}/", coefficients_file="{name}_coefficients.csv", coefficients_template="{name}_coefficients_template.csv", spec_file="{name}_SPEC.csv", settings_file="{name}_model_settings.yaml", chooser_data_file="{name}_values_combined.csv", values_index_col="tour_id", ): edb_directory = edb_directory.format(name=name) def _read_csv(filename, **kwargs): filename = filename.format(name=name) return pd.read_csv(os.path.join(edb_directory, filename), **kwargs) settings_file = settings_file.format(name=name) with open(os.path.join(edb_directory, settings_file), "r") as yf: settings = yaml.load(yf, Loader=yaml.SafeLoader,) try: coefficients = _read_csv(coefficients_file, index_col="coefficient_name",) try: coef_template = _read_csv(coefficients_template, index_col="coefficient_name",) except FileNotFoundError: coef_template = None spec = _read_csv(spec_file, comment="#") spec = remove_apostrophes(spec, ["Label"]) # remove temp rows from spec, ASim uses them to calculate the other values written # to the EDB, but they are not actually part of the utility function themselves. spec = spec.loc[~spec.Expression.isna()] spec = spec.loc[~spec.Expression.str.startswith("_")].copy() alt_names = list(spec.columns[3:]) alt_codes = np.arange(1, len(alt_names) + 1) alt_names_to_codes = dict(zip(alt_names, alt_codes)) alt_codes_to_names = dict(zip(alt_codes, alt_names)) chooser_data = _read_csv(chooser_data_file, index_col=values_index_col,) except Exception: # when an error happens in reading anything other than settings, print settings from pprint import pprint pprint(settings) raise return Dict( edb_directory=Path(edb_directory), settings=settings, chooser_data=chooser_data, coefficients=coefficients, coef_template=coef_template, spec=spec, alt_names=alt_names, alt_codes=alt_codes, alt_names_to_codes=alt_names_to_codes, alt_codes_to_names=alt_codes_to_names, )
def simple_simulate_model( name, edb_directory="output/estimation_data_bundle/{name}/", return_data=False, choices=None, construct_avail=False, values_index_col="household_id", ): data = simple_simulate_data( name=name, edb_directory=edb_directory, values_index_col=values_index_col, ) coefficients = data.coefficients # coef_template = data.coef_template # not used spec = data.spec chooser_data = data.chooser_data settings = data.settings alt_names = data.alt_names alt_codes = data.alt_codes from .general import clean_values chooser_data = clean_values( chooser_data, alt_names_to_codes=choices or data.alt_names_to_codes, choice_code="override_choice_code", ) if settings.get('LOGIT_TYPE') == 'NL': tree = construct_nesting_tree(data.alt_names, settings["NESTS"]) m = Model(graph=tree) else: m = Model(alts=data.alt_codes_to_names) m.utility_co = dict_of_linear_utility_from_spec( spec, "Label", dict(zip(alt_names, alt_codes)), ) apply_coefficients(coefficients, m) if construct_avail: avail = construct_availability(m, chooser_data, data.alt_codes_to_names) else: avail = True d = DataFrames(co=chooser_data, av=avail, alt_codes=alt_codes, alt_names=alt_names, ) m.dataservice = d m.choice_co_code = "override_choice_code" if return_data: return ( m, Dict( edb_directory=data.edb_directory, chooser_data=chooser_data, coefficients=coefficients, spec=spec, alt_names=alt_names, alt_codes=alt_codes, settings=settings, ), ) return m
def auto_ownership_model( name="auto_ownership", edb_directory="output/estimation_data_bundle/{name}/", return_data=False, ): data = simple_simulate_data( name=name, edb_directory=edb_directory, values_index_col="household_id", ) coefficients = data.coefficients # coef_template = data.coef_template # not used spec = data.spec chooser_data = data.chooser_data settings = data.settings altnames = list(spec.columns[3:]) altcodes = range(len(altnames)) chooser_data = remove_apostrophes(chooser_data) chooser_data.fillna(0, inplace=True) # Remove choosers with invalid observed choice chooser_data = chooser_data[chooser_data["override_choice"] >= 0] m = Model() # One of the alternatives is coded as 0, so # we need to explicitly initialize the MNL nesting graph # and set to root_id to a value other than zero. m.initialize_graph(alternative_codes=altcodes, root_id=99) m.utility_co = dict_of_linear_utility_from_spec( spec, "Label", dict(zip(altnames, altcodes)), ) apply_coefficients(coefficients, m) d = DataFrames( co=chooser_data, av=True, alt_codes=altcodes, alt_names=altnames, ) m.dataservice = d m.choice_co_code = "override_choice" if return_data: return ( m, Dict( edb_directory=data.edb_directory, chooser_data=chooser_data, coefficients=coefficients, spec=spec, altnames=altnames, altcodes=altcodes, ), ) return m