def test_bounded_generator_should_limnit_with_both_bound(): bound_f = operations.bound_value(lb=10, ub=15) assert bound_f(5) == 10 assert bound_f(10) == 10 assert bound_f(12) == 12 assert bound_f(15) == 15 assert bound_f(20) == 15
def add_mobility_action(circus, params): logging.info(" creating customer mobility action") mov_prof = [ 1., 1., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1. ] mobility_time_gen = CyclicTimerGenerator( clock=circus.clock, seed=next(circus.seeder), config=CyclicTimerProfile( profile=mov_prof, profile_time_steps="1H", start_date=pd.Timestamp("12 September 2016 00:00.00"), )) gaussian_activity = NumpyRandomGenerator( method="normal", loc=params["mean_daily_customer_mobility_activity"], scale=params["std_daily_customer_mobility_activity"], seed=next(circus.seeder)) mobility_activity_gen = gaussian_activity.map(f=bound_value(lb=.5)) mobility_action = circus.create_story( name="customer_mobility", initiating_actor=circus.actors["customers"], actorid_field="CUST_ID", timer_gen=mobility_time_gen, activity_gen=mobility_activity_gen) logging.info(" adding operations") mobility_action.set_operations( circus.actors["customers"].ops.lookup( id_field="CUST_ID", select={"CURRENT_SITE": "PREV_SITE"}), # selects a destination site (or maybe the same as current... ^^) circus.actors["customers"] \ .get_relationship("POSSIBLE_SITES") \ .ops.select_one(from_field="CUST_ID", named_as="NEW_SITE"), # update the SITE attribute of the customers accordingly circus.actors["customers"] \ .get_attribute("CURRENT_SITE") \ .ops.update( id_field="CUST_ID", copy_from_field="NEW_SITE"), circus.clock.ops.timestamp(named_as="TIME"), # create mobility logs FieldLogger(log_id="customer_mobility_logs", cols=["TIME", "CUST_ID", "PREV_SITE", "NEW_SITE"]), )
def add_mobility_action(circus, params): logging.info(" creating field agent mobility action") # Field agents move only during the work hours mobility_time_gen = WorkHoursTimerGenerator(clock=circus.clock, seed=next(circus.seeder)) fa_mean_weekly_activity = mobility_time_gen.activity( n=params["mean_daily_fa_mobility_activity"], per=pd.Timedelta("1day")) fa_weekly_std = mobility_time_gen.activity( n=params["std_daily_fa_mobility_activity"], per=pd.Timedelta("1day")) gaussian_activity = NumpyRandomGenerator(method="normal", loc=fa_mean_weekly_activity, scale=fa_weekly_std, seed=next(circus.seeder)) mobility_activity_gen = gaussian_activity.map(f=bound_value(lb=1)) field_agents = circus.actors["field_agents"] mobility_action = circus.create_story(name="field_agent_mobility", initiating_actor=field_agents, actorid_field="FA_ID", timer_gen=mobility_time_gen, activity_gen=mobility_activity_gen) logging.info(" adding operations") mobility_action.set_operations( field_agents.ops.lookup( id_field="FA_ID", select={"CURRENT_SITE": "PREV_SITE"}), # selects a destination site (or maybe the same as current... ^^) field_agents \ .get_relationship("POSSIBLE_SITES") \ .ops.select_one(from_field="FA_ID", named_as="NEW_SITE"), # update the SITE attribute of the field agents accordingly field_agents \ .get_attribute("CURRENT_SITE") \ .ops.update( id_field="FA_ID", copy_from_field="NEW_SITE"), circus.clock.ops.timestamp(named_as="TIME"), # create mobility logs FieldLogger(log_id="field_agent_mobility_logs", cols=["TIME", "FA_ID", "PREV_SITE", "NEW_SITE"]), )
def test_populations_during_working_hours(): with path.tempdir() as log_parent_folder: log_folder = os.path.join(log_parent_folder, "logs") circus = Circus(name="tested_circus", master_seed=1, start=pd.Timestamp("8 June 2016"), step_duration=pd.Timedelta("1h")) field_agents = circus.create_population( name="fa", size=100, ids_gen=SequencialGenerator(max_length=3, prefix="id_")) mobility_time_gen = WorkHoursTimerGenerator(clock=circus.clock, seed=next(circus.seeder)) five_per_day = mobility_time_gen.activity(n=5, per=pd.Timedelta("1day")) std_per_day = mobility_time_gen.activity(n=.5, per=pd.Timedelta("1day")) gaussian_activity = NumpyRandomGenerator(method="normal", loc=five_per_day, scale=std_per_day, seed=1) mobility_activity_gen = gaussian_activity.map(bound_value(lb=1)) # just a dummy operation to produce some logs story = circus.create_story(name="test_story", initiating_population=field_agents, member_id_field="some_id", timer_gen=mobility_time_gen, activity_gen=mobility_activity_gen) story.set_operations(circus.clock.ops.timestamp(named_as="TIME"), FieldLogger(log_id="the_logs")) circus.run(duration=pd.Timedelta("30 days"), log_output_folder=log_folder) logging.info("loading produced logs") logs = load_all_logs(log_folder)["the_logs"] logging.info("number of produced logs: {} logs".format(logs.shape[0])) # 30 days of simulation should produce 100 * 5 * 30 == 15k logs assert 14e3 <= logs.shape[0] <= 16e3
def scale_quantity_gen(stock_size_gen, scale_factor): """ stock_size_gen must be a generator of positive numbers (think of them as "quantities" of stuff, i.e on a ratio scale) This just builds another generator of numbers scaled a requested, making sure the generated numbers are always positive """ if scale_factor is not None: return stock_size_gen\ .map(f_vect=operations.scale(factor=scale_factor)) \ .map(f=operations.bound_value(lb=1)) return stock_size_gen
def add_listen_story(the_circus): users = the_circus.populations["user"] # using this timer means users only listen to songs during work hours timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock, seed=next(the_circus.seeder)) # this generate activity level distributed as a "truncated normal # distribution", i.e. very high and low activities are prevented. bounded_gaussian_activity_gen = gen.NumpyRandomGenerator( method="normal", seed=next(the_circus.seeder), loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")), scale=5).map(ops.bound_value(lb=10, ub=30)) listen = the_circus.create_story( name="listen_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) repo = the_circus.populations["music_repository"] listen.set_operations( users.ops.lookup(id_field="UID", select={ "FIRST_NAME": "USER_FIRST_NAME", "LAST_NAME": "USER_LAST_NAME", }), # picks a genre at random repo.ops.select_one(named_as="GENRE"), # picks a song at random for that genre repo.get_relationship("songs").ops.select_one(from_field="GENRE", named_as="SONG_ID"), ops.FieldLogger("events"))
def test_bounded_generator_should_limnit_with_upper_bound(): bound_f = operations.bound_value(ub=15) assert bound_f(10) == 10 assert bound_f(15) == 15 assert bound_f(20) == 15
def test_bounding_function_should_not_modify_unbounded_values(): bound_f = operations.bound_value(lb=None, ub=None) for x in np.arange(-1000, 2000, 10000): assert x == bound_f(x)
def add_listen_and_share_stories_with_details(the_circus): """ This is again a copy-paste of add_listen_and_share_stories_with_details, (hopefully this helps to illustrate the progression), here showing the supplementary look-up on the attributes of the songs """ users = the_circus.populations["user"] # using this timer means users only listen to songs during work hours timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock, seed=next(the_circus.seeder)) # this generate activity level distributed as a "truncated normal # distribution", i.e. very high and low activities are prevented. bounded_gaussian_activity_gen = gen.NumpyRandomGenerator( method="normal", seed=next(the_circus.seeder), loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")), scale=5).map(ops.bound_value(lb=10, ub=30)) listen = the_circus.create_story( name="listen_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) share = the_circus.create_story(name="share_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) repo = the_circus.populations["music_repository"] songs = the_circus.populations["song"] select_genre_and_song = ops.Chain( users.ops.lookup(id_field="UID", select={ "FIRST_NAME": "USER_FIRST_NAME", "LAST_NAME": "USER_LAST_NAME", }), # picks a genre at random repo.ops.select_one(named_as="GENRE"), # picks a song at random for that genre repo.get_relationship("songs").ops.select_one(from_field="GENRE", named_as="SONG_ID"), # now also reporting details of listened or shared songs songs.ops.lookup(id_field="SONG_ID", select={ "artist_name": "SONG_ARTIST", "title": "SONG_TITLE", "recording_year": "SONG_YEAR", "duration_seconds": "SONG_DURATION", }), ) listen.set_operations(select_genre_and_song, ops.FieldLogger("listen_events")) share.set_operations( select_genre_and_song, # picks a user this song is shared to users.ops.select_one(named_as="SHARED_TO_UID"), # note we could post-check when user shared a song to their own uid # here, in which case we can use DropRow to discard that share event ops.FieldLogger("share_events"))
def add_listen_and_share_stories(the_circus): """ This is essentially a copy-paste of add_listen_story, + the update for the share story, in order to show the Chain re-usability clearly """ users = the_circus.populations["user"] # using this timer means users only listen to songs during work hours timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock, seed=next(the_circus.seeder)) # this generate activity level distributed as a "truncated normal # distribution", i.e. very high and low activities are prevented. bounded_gaussian_activity_gen = gen.NumpyRandomGenerator( method="normal", seed=next(the_circus.seeder), loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")), scale=5).map(ops.bound_value(lb=10, ub=30)) listen = the_circus.create_story( name="listen_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) share = the_circus.create_story(name="share_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) repo = the_circus.populations["music_repository"] select_genre_and_song = ops.Chain( users.ops.lookup(id_field="UID", select={ "FIRST_NAME": "USER_FIRST_NAME", "LAST_NAME": "USER_LAST_NAME", }), # picks a genre at random repo.ops.select_one(named_as="GENRE"), # picks a song at random for that genre repo.get_relationship("songs").ops.select_one(from_field="GENRE", named_as="SONG_ID"), ) listen.set_operations(select_genre_and_song, ops.FieldLogger("listen_events")) share.set_operations( select_genre_and_song, # picks a user this song is shared to users.ops.select_one(named_as="SHARED_TO_UID"), # note we could post-check when user shared a song to their own uid # here, in which case we can use DropRow to discard that share event ops.FieldLogger("share_events"))
def noisified(df, col, lb, col_type=np.float): fact = np.random.normal(1, .1, size=df.shape[0]) col2 = df[col].apply(operations.bound_value(lb=lb)) * fact return col2.astype(col_type)