def from_to_travel_diary_read( trips: pd.DataFrame, persons_attributes: Union[pd.DataFrame, None] = None, hhs_attributes: Union[pd.DataFrame, None] = None, include_loc=False, sort_by_seq: Union[bool, None] = False, ): """ Turn Diary Plan tabular data inputs (derived from travel survey and attributes) into core population format. This is a variation of the standard load_travel_diary() method because it does not require activity inference or home location. We expect broadly the same data schema except rather than purp (purpose) we use trips oact (origin activity) and dact (destination activity). :param trips: DataFrame :param persons_attributes: DataFrame :param hhs_attributes: DataFrame :return: core.Population :param include_loc=False, bool, optionally include location data as shapely Point geometries ('start_loc' and 'end_loc' columns) :param sort_by_seq=None, optionally force trip sorting as True or False """ # TODO check for required col headers and give useful error? logger = logging.getLogger(__name__) if sort_by_seq is None and 'seq' in trips.columns: sort_by_seq = True population = core.Population() for hid, household_trips in trips.groupby('hid'): if hhs_attributes is not None: hh_attributes = hhs_attributes.loc[hid].to_dict() else: hh_attributes = {} household = core.Household(hid, attributes=hh_attributes, freq=hh_attributes.pop('freq', None)) for pid, person_trips in household_trips.groupby('pid'): if sort_by_seq: person_trips = person_trips.sort_values('seq') if persons_attributes is not None: person_attributes = persons_attributes.loc[pid].to_dict() else: person_attributes = {} person = core.Person( pid, attributes=person_attributes, freq=person_attributes.pop('freq', None), ) first_act = person_trips.iloc[0].oact.lower() if not first_act == "home": logger.warning( f" Person pid:{pid} hid:{hid} plan does not start with 'home' activity" ) loc = None if include_loc: loc = person_trips.start_loc.iloc[0] person.add( activity.Activity( seq=0, act=first_act, area=person_trips.iloc[0].ozone, loc=loc, start_time=utils.parse_time(0), )) for n in range(len(person_trips)): trip = person_trips.iloc[n] start_loc = None end_loc = None if include_loc: start_loc = trip.start_loc end_loc = trip.end_loc purpose = trip.dact.lower() person.add( activity.Leg(seq=n, purp=purpose, mode=trip['mode'].lower(), start_area=trip.ozone, end_area=trip.dzone, start_loc=start_loc, end_loc=end_loc, start_time=utils.parse_time(trip.tst), end_time=utils.parse_time(trip.tet))) person.add( activity.Activity( seq=n + 1, act=purpose, area=trip.dzone, loc=end_loc, start_time=utils.parse_time(trip.tet), )) person.plan.finalise_activity_end_times() household.add(person) population.add(household) return population
def tour_based_travel_diary_read( trips: pd.DataFrame, persons_attributes: Union[pd.DataFrame, None] = None, hhs_attributes: Union[pd.DataFrame, None] = None, include_loc=False, sort_by_seq: Union[bool, None] = None, ): """ Complex travel diray reader. Will try to infer home activiity and tour based purposes. :param trips: DataFrame :param persons_attributes: DataFrame :param hhs_attributes: DataFrame :param include_loc=False, bool, optionally include location data as shapely Point geometries ('start_loc' and 'end_loc' columns) :param sort_by_seq=None, optionally force trip sorting as True or False :return: core.Population """ population = core.Population() if sort_by_seq is None and 'seq' in trips.columns: sort_by_seq = True for hid, household_trips in trips.groupby('hid'): if hhs_attributes is not None: hh_attributes = hhs_attributes.loc[hid].to_dict() else: hh_attributes = {} household = core.Household(hid, attributes=hh_attributes, freq=hh_attributes.pop('freq', None)) for pid, person_trips in household_trips.groupby('pid'): if sort_by_seq: person_trips = person_trips.sort_values('seq') if persons_attributes is not None: person_attributes = persons_attributes.loc[pid].to_dict() else: person_attributes = {} home_area = person_trips.hzone.iloc[0] person = core.Person(pid, attributes=person_attributes, home_area=home_area, freq=person_attributes.pop('freq', None)) loc = None if include_loc: loc = person_trips.start_loc.iloc[0] person.add( activity.Activity( seq=0, act=None, area=person_trips.ozone.iloc[0], loc=loc, start_time=utils.parse_time(0), )) for n in range(len(person_trips)): trip = person_trips.iloc[n] start_loc = None end_loc = None if include_loc: start_loc = trip.start_loc end_loc = trip.end_loc person.add( activity.Leg( seq=n, purp=trip.purp.lower(), mode=trip['mode'].lower(), start_area=trip.ozone, end_area=trip.dzone, start_loc=start_loc, end_loc=end_loc, start_time=utils.parse_time(trip.tst), end_time=utils.parse_time(trip.tet), freq=trip.freq, )) person.add( activity.Activity( seq=n + 1, act=None, area=trip.dzone, loc=end_loc, start_time=utils.parse_time(trip.tet), )) person.plan.finalise_activity_end_times() person.plan.infer_activities_from_tour_purpose() person.plan.set_leg_purposes() household.add(person) population.add(household) return population
def tour_based_travel_diary_read( trips: pd.DataFrame, persons_attributes: Union[pd.DataFrame, None] = None, hhs_attributes: Union[pd.DataFrame, None] = None, include_loc=False, sort_by_seq: Union[bool, None] = None, ): """ Complex travel diray reader. Will try to infer home activiity and tour based purposes. :param trips: DataFrame :param persons_attributes: DataFrame :param hhs_attributes: DataFrame :param include_loc=False, bool, optionally include location data as shapely Point geometries ('start_loc' and 'end_loc' columns) :param sort_by_seq=None, optionally force trip sorting as True or False :return: core.Population """ logger = logging.getLogger(__name__) population = build_population(trips=trips, persons_attributes=persons_attributes, hhs_attributes=hhs_attributes) if sort_by_seq is None and 'seq' in trips.columns: sort_by_seq = True for hid, household in population: for pid, person in household: person_trips = trips.loc[(trips.hid == hid) & (trips.pid == pid)] if not len(person_trips): person.stay_at_home() continue if sort_by_seq: person_trips = person_trips.sort_values('seq') loc = None if include_loc: loc = person_trips.start_loc.iloc[0] person = population[hid][pid] person.add( activity.Activity( seq=0, act=None, area=person_trips.ozone.iloc[0], loc=loc, start_time=utils.parse_time(0), )) for n in range(len(person_trips)): trip = person_trips.iloc[n] start_loc = None end_loc = None if include_loc: start_loc = trip.start_loc end_loc = trip.end_loc person.add( activity.Leg( seq=n, purp=trip.purp.lower(), mode=trip['mode'].lower(), start_area=trip.ozone, end_area=trip.dzone, start_loc=start_loc, end_loc=end_loc, start_time=utils.parse_time(trip.tst), end_time=utils.parse_time(trip.tet), freq=trip.freq, )) person.add( activity.Activity( seq=n + 1, act=None, area=trip.dzone, loc=end_loc, start_time=utils.parse_time(trip.tet), )) person.plan.finalise_activity_end_times() person.plan.infer_activities_from_tour_purpose() person.plan.set_leg_purposes() return population
def trip_based_travel_diary_read( trips: pd.DataFrame, persons_attributes: Union[pd.DataFrame, None] = None, hhs_attributes: Union[pd.DataFrame, None] = None, include_loc=False, sort_by_seq: Union[bool, None] = None, ): """ Turn Activity Plan tabular data inputs (derived from travel survey and attributes) into core population format. This is a variation of the standard load_travel_diary() method because it does not require activity inference. However all plans are expected to be tour based, so assumed to start and end at home. We expect broadly the same data schema except rather than trip 'purpose' we use trips 'activity'. :param trips: DataFrame :param persons_attributes: DataFrame :param hhs_attributes: DataFrame :param include_loc=False, bool, optionally include location data as shapely Point geometries ('start_loc' and 'end_loc' columns) :param sort_by_seq=None, optionally force trip sorting as True or False :return: core.Population """ logger = logging.getLogger(__name__) population = build_population(trips=trips, persons_attributes=persons_attributes, hhs_attributes=hhs_attributes) if sort_by_seq is None and 'seq' in trips.columns: sort_by_seq = True for hid, household in population: for pid, person in household: person_trips = trips.loc[(trips.hid == hid) & (trips.pid == pid)] if not len(person_trips): person.stay_at_home() continue if sort_by_seq: person_trips = person_trips.sort_values('seq') home_area = person_trips.hzone.iloc[0] origin_area = person_trips.ozone.iloc[0] if not origin_area == home_area: logger.warning( f" Person pid:{pid} plan does not start with 'home' activity" ) if persons_attributes is not None: person_attributes = persons_attributes.loc[pid].to_dict() else: person_attributes = {} person = core.Person( pid, attributes=person_attributes, freq=person_attributes.pop('freq', None), # home_area=home_area ) loc = None if include_loc: loc = person_trips.start_loc.iloc[0] person.add( activity.Activity( seq=0, act='home', area=origin_area, loc=loc, start_time=utils.parse_time(0), )) for n in range(len(person_trips)): trip = person_trips.iloc[n] start_loc = None end_loc = None if include_loc: start_loc = trip.start_loc end_loc = trip.end_loc purpose = trip.purp.lower() person.add( activity.Leg(seq=n, purp=purpose, mode=trip['mode'].lower(), start_area=trip.ozone, end_area=trip.dzone, start_loc=start_loc, end_loc=end_loc, start_time=utils.parse_time(trip.tst), end_time=utils.parse_time(trip.tet))) person.add( activity.Activity( seq=n + 1, act=purpose, area=trip.dzone, loc=end_loc, start_time=utils.parse_time(trip.tet), )) person.plan.finalise_activity_end_times() household.add(person) population.add(household) return population
def test_time_parse(a, expected): assert parse_time(a) == expected