def add_report_story(the_circus): """ adds an operations that logs the stock level of each POS at the end of each day """ pos = the_circus.populations["point_of_sale"] report_story = the_circus.create_story( name="report", initiating_population=pos, member_id_field="POS_ID", timer_gen=gen.ConstantDependentGenerator( value=the_circus.clock.n_iterations(duration=pd.Timedelta("24h")) - 1) ) report_story.set_operations( the_circus.clock.ops.timestamp(named_as="TIME", random=False, log_format="%Y-%m-%d"), pos.get_relationship("items").ops.get_neighbourhood_size( from_field="POS_ID", named_as="STOCK_LEVEL"), ops.FieldLogger(log_id="report", cols=["TIME", "POS_ID", "STOCK_LEVEL"]) )
def add_periodic_restock_story_with_combined_generator(the_circus): """ This is a variation of add_periodic_restock_story that shows how to obtain the same result by plugging generators into each other instead of explicitly generating intermediary fields in the story_data. """ pos = the_circus.populations["point_of_sale"] # using this timer means POS are more likely to trigger a re-stock during # day hours rather that at night. timer_gen = profilers.DefaultDailyTimerGenerator( clock=the_circus.clock, seed=next(the_circus.seeder)) restock_story = the_circus.create_story( name="restock", initiating_population=pos, member_id_field="POS_ID", timer_gen=timer_gen, # Using a ConstantGenerator here means each POS will have the same # activity level of exactly one story per day on average. Since # the time itself is random, period between 2 restocks will on # general not be exactly 7days activity_gen=gen.ConstantGenerator(value=timer_gen.activity( n=1, per=pd.Timedelta("7 days") )), ) stock_size_gen = gen.NumpyRandomGenerator(method="choice", a=[5, 15, 20, 25], p=[0.1, 0.2, 0.5, 0.2], seed=next(the_circus.seeder)) item_bulk_gen = stock_size_gen.flatmap( gen.DependentBulkGenerator( element_generator=the_circus.generators["items_gen"]) ) restock_story.set_operations( the_circus.clock.ops.timestamp(named_as="TIME", log_format="%Y-%m-%d"), # include the POS NAME attribute as a field name "POS_NAME" pos.ops.lookup(id_field="POS_ID", select={"NAME": "POS_NAME"}), # stock_size_gen.ops.generate(named_as="RESTOCK_VOLUME"), item_bulk_gen.ops.generate(named_as="NEW_ITEM_IDS"), pos.get_relationship("items").ops.add_grouped(from_field="POS_ID", grouped_items_field="NEW_ITEM_IDS"), ops.FieldLogger(log_id="restock", cols=["TIME", "POS_ID", "POS_NAME"]) )
def add_dealer_bulk_sim_purchase_story(self, dealers, distributors): """ Adds a SIM purchase story from agents to dealer, with impact on stock of both populations """ logging.info("Creating bulk purchase story") timegen = HighWeekDaysTimerGenerator(clock=self.clock, seed=next(self.seeder)) purchase_activity_gen = ConstantGenerator(value=100) build_purchases = self.create_story(name="bulk_purchases", initiating_population=dealers, member_id_field="DEALER_ID", timer_gen=timegen, activity_gen=purchase_activity_gen) build_purchases.set_operations( self.clock.ops.timestamp(named_as="DATETIME"), dealers.get_relationship("DISTRIBUTOR").ops.select_one( from_field="DEALER_ID", named_as="DISTRIBUTOR"), dealers.ops.lookup(id_field="DEALER_ID", select={"BULK_BUY_SIZE": "BULK_BUY_SIZE"}), distributors.get_relationship("SIM").ops.select_many( from_field="DISTRIBUTOR", named_as="SIM_BULK", quantity_field="BULK_BUY_SIZE", # if a SIM is selected, it is removed from the dealer's stock pop=True # (not modeling out-of-stock provider to keep the example simple... ), dealers.get_relationship("SIM").ops.add_grouped( from_field="DEALER_ID", grouped_items_field="SIM_BULK"), # not modeling money transfer to keep the example simple... # just logging the number of sims instead of the sims themselves... operations.Apply(source_fields="SIM_BULK", named_as="NUMBER_OF_SIMS", f=lambda s: s.map(len), f_args="series"), # finally, triggering some re-stocking by the distributor distributors.get_attribute("SIMS_TO_RESTOCK").ops.add( member_id_field="DISTRIBUTOR", added_value_field="NUMBER_OF_SIMS"), self.get_story("distributor_restock").ops.force_act_next( member_id_field="DISTRIBUTOR"), operations.FieldLogger( log_id="bulk_purchases", cols=["DEALER_ID", "DISTRIBUTOR", "NUMBER_OF_SIMS"]), )
def add_agent_reviews_stories(self, agents): """ This illustrates the dynamic creation of new populations: reviews are modeled as "population" (even though they are mostly inactive data container) that are created dynamically and linked to agents. I guess most of the time reviews would be modeled as logs instead of populations, but let's just ignore that for illustration purposes... ^^ """ timegen = HighWeekDaysTimerGenerator(clock=self.clock, seed=next(self.seeder)) review_activity_gen = NumpyRandomGenerator(method="choice", a=range(1, 4), seed=next(self.seeder)) # the system starts with no reviews review_population = self.create_population(name="rev", size=0) review_population.create_attribute("DATE") review_population.create_attribute("TEXT") review_population.create_attribute("AGENT_ID") review_population.create_attribute("AGENT_NAME") reviews = self.create_story( name="agent_reviews", initiating_population=agents, member_id_field="AGENT", timer_gen=timegen, activity_gen=review_activity_gen, ) review_id_gen = SequencialGenerator(start=0, prefix="REVIEW_ID") text_id_gen = FakerGenerator(method="text", seed=next(self.seeder)) reviews.set_operations( self.clock.ops.timestamp(named_as="DATETIME"), agents.ops.lookup(id_field="AGENT", select={"AGENT_NAME": "AGENT_NAME"}), review_id_gen.ops.generate(named_as="REVIEW_ID"), text_id_gen.ops.generate(named_as="REVIEW_TEXT"), review_population.ops.update(id_field="REVIEW_ID", copy_attributes_from_fields={ "DATE": "DATETIME", "TEXT": "REVIEW_TEXT", "AGENT_ID": "AGENT", "AGENT_NAME": "AGENT_NAME", }), # actually, here we're modelling review both as populations and logs.. operations.FieldLogger(log_id="reviews"))
def add_user_preference_story(the_circus): users = the_circus.populations["users"] timer_gen = gen.ConstantDependentGenerator( value=the_circus.clock.n_iterations(duration=pd.Timedelta("24h")) - 1) # # using this timer means users only listen to songs during work hours # timer_gen = profilers.WorkHoursTimerGenerator( # clock=the_circus.clock, seed=next(the_circus.seeder)) # # this generate activity level distributed as a "truncated normal # # distribution", i.e. very high and low activities are prevented. # bounded_gaussian_activity_gen = gen.NumpyRandomGenerator( # method="normal", # seed=next(the_circus.seeder), # loc=timer_gen.activity(n=1, # per=pd.Timedelta("1 day")), # scale=1 # ).map(ops.bound_value(lb=10, ub=20)) prefer = the_circus.create_story( name="prefer_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, # activity_gen=bounded_gaussian_activity_gen ) repo = the_circus.populations["products"] prefer.set_operations( users.ops.lookup(id_field="UID", select={ "FIRST_NAME": "USER_FIRST_NAME", "LAST_NAME": "USER_LAST_NAME", }), # Add user preference value gen.NumpyRandomGenerator( method="uniform", low=-1, high=1, seed=next(the_circus.seeder)).ops.generate(named_as="PREFERENCE"), # Picks a product at random repo.ops.select_one(named_as="PRODUCT_ID"), # Add timestamp column the_circus.clock.ops.timestamp(named_as="DATETIME"), ops.FieldLogger("prefer_events"))
def add_inactive_restock_story(the_circus): """ This is a copy-paste of add_periodic_restock_story(), but without the timer nor the activity levels => as-is, this story never triggers """ pos = the_circus.populations["point_of_sale"] restock_story = the_circus.create_story( name="restock", initiating_population=pos, member_id_field="POS_ID") stock_size_gen = gen.NumpyRandomGenerator(method="choice", a=[5, 15, 20, 25], p=[0.1, 0.2, 0.5, 0.2], seed=next(the_circus.seeder)) item_bulk_gen = gen.DependentBulkGenerator( element_generator=the_circus.generators["items_gen"]) restock_story.set_operations( the_circus.clock.ops.timestamp(named_as="TIME"), # include the POS NAME attribute as a field name "POS_NAME" pos.ops.lookup(id_field="POS_ID", select={"NAME": "POS_NAME"}), pos.get_relationship("items").ops.get_neighbourhood_size( from_field="POS_ID", named_as="PREV_STOCK_LEVEL"), stock_size_gen.ops.generate(named_as="RESTOCK_VOLUME"), item_bulk_gen.ops.generate(named_as="NEW_ITEM_IDS", observed_field="RESTOCK_VOLUME"), pos.get_relationship("items").ops.add_grouped(from_field="POS_ID", grouped_items_field="NEW_ITEM_IDS"), pos.get_relationship("items").ops.get_neighbourhood_size( from_field="POS_ID", named_as="NEW_STOCK_LEVEL"), ops.FieldLogger(log_id="restock", cols=["TIME", "POS_ID", "POS_NAME", "RESTOCK_VOLUME", "PREV_STOCK_LEVEL", "NEW_STOCK_LEVEL"]) )
def add_topups(self, sims, recharge_gen): """ The topups are not triggered by a timer_gen and a decrementing timer => by itself this story is permanently inactive. This story is meant to be triggered externally (from the "calls" or "sms" stories) """ logging.info("Adding topups stories") # topup story itself, basically just a selection of an agent and subsequent # computation of the value topup_story = self.create_story( name="topups", initiating_population=sims, member_id_field="SIM_ID", # note that there is timegen specified => the clock is not ticking # => the story can only be set externally (cf calls story) ) topup_story.set_operations( sims.ops.lookup(id_field="SIM_ID", select={ "MSISDN": "CUSTOMER_NUMBER", "OPERATOR": "OPERATOR", "MAIN_ACCT": "MAIN_ACCT_OLD" }), sims.get_relationship("POSSIBLE_AGENTS").ops.select_one( from_field="SIM_ID", named_as="AGENT"), recharge_gen.ops.generate(named_as="VALUE"), operations.Apply(source_fields=["VALUE", "MAIN_ACCT_OLD"], named_as="MAIN_ACCT", f=np.add, f_args="series"), sims.get_attribute("MAIN_ACCT").ops.update( member_id_field="SIM_ID", copy_from_field="MAIN_ACCT"), self.clock.ops.timestamp(named_as="TIME"), operations.FieldLogger(log_id="topups", cols=[ "TIME", "CUSTOMER_NUMBER", "AGENT", "VALUE", "OPERATOR", "MAIN_ACCT_OLD", "MAIN_ACCT" ]), )
def _add_message_story(self): story_timer_gen = DefaultDailyTimerGenerator( clock=self.clock, seed=next(self.seeder)) low_activity = story_timer_gen.activity(n=3, per=pd.Timedelta("1 day")) med_activity = story_timer_gen.activity(n=10, per=pd.Timedelta("1 day")) high_activity = story_timer_gen.activity(n=20, per=pd.Timedelta("1 day")) activity_gen = NumpyRandomGenerator( method="choice", a=[low_activity, med_activity, high_activity], p=[.2, .7, .1], seed=next(self.seeder)) hello_world = self.create_story( name="hello_world", initiating_population=self.populations["person"], member_id_field="PERSON_ID", timer_gen=story_timer_gen, activity_gen=activity_gen ) hello_world.set_operations( self.clock.ops.timestamp(named_as="TIME"), self.populations["person"].get_relationship("quotes") .ops.select_one(from_field="PERSON_ID",named_as="MESSAGE"), self.populations["person"] .get_relationship("friends") .ops.select_one(from_field="PERSON_ID", named_as="OTHER_PERSON"), self.populations["person"] .ops.lookup(id_field="PERSON_ID", select={"NAME": "EMITTER_NAME"}), self.populations["person"] .ops.lookup(id_field="OTHER_PERSON", select={"NAME": "RECEIVER_NAME"}), operations.FieldLogger(log_id="hello_6") )
def build_site_product_pos_target(circus, params): """ Generates some random target of amount of pos per site, based on the actual number of pos per site """ target_file = os.path.join(db.namespace_folder(circus.name), "site_product_pos_target.csv") sites = circus.actors["sites"] target_action = operations.Chain( sites.relationships["POS"].ops.get_neighbourhood_size( from_field="site_id", named_as="pos_count_target"), operations.FieldLogger(log_id="logs")) sites_df = pd.DataFrame({"site_id": sites.ids}) _, logs = target_action(sites_df) target_df = logs["logs"] target_df["cartesian_product"] = "cp" products = pd.DataFrame({ "product_type_id": params["products"].keys(), "cartesian_product": "cp" }) target_df = pd.merge(left=target_df, right=products, on="cartesian_product") fact = np.random.normal(1, .1, size=target_df.shape[0]) target_df["pos_count_target"] = target_df["pos_count_target"] * fact target_df["pos_count_target"] = target_df["pos_count_target"].astype( np.int) target_df.ix[target_df["pos_count_target"] < 10, "pos_count_target"] = 10 target_df.drop(["cartesian_product"], axis=1, inplace=True) target_df.to_csv(target_file, index=False)
def add_listen_story(the_circus): users = the_circus.populations["user"] # using this timer means users only listen to songs during work hours timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock, seed=next(the_circus.seeder)) # this generate activity level distributed as a "truncated normal # distribution", i.e. very high and low activities are prevented. bounded_gaussian_activity_gen = gen.NumpyRandomGenerator( method="normal", seed=next(the_circus.seeder), loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")), scale=5).map(ops.bound_value(lb=10, ub=30)) listen = the_circus.create_story( name="listen_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) repo = the_circus.populations["music_repository"] listen.set_operations( users.ops.lookup(id_field="UID", select={ "FIRST_NAME": "USER_FIRST_NAME", "LAST_NAME": "USER_LAST_NAME", }), # picks a genre at random repo.ops.select_one(named_as="GENRE"), # picks a song at random for that genre repo.get_relationship("songs").ops.select_one(from_field="GENRE", named_as="SONG_ID"), ops.FieldLogger("events"))
def add_distributor_recharge_story(self, distributors, sim_generator): """ adds an story that increases the stock of distributor. This is triggered externaly by the bulk purchase story below """ restocking = self.create_story( name="distributor_restock", initiating_population=distributors, member_id_field="DISTRIBUTOR_ID", # here again: no activity gen nor time profile here since the story # is triggered externally ) restocking.set_operations( self.clock.ops.timestamp(named_as="DATETIME"), distributors.ops.lookup( id_field="DISTRIBUTOR_ID", select={"SIMS_TO_RESTOCK": "SIMS_TO_RESTOCK"}), sim_generator.ops.generate( named_as="NEW_SIMS_IDS", quantity_field="SIMS_TO_RESTOCK", ), distributors.get_relationship("SIM").ops.add_grouped( from_field="DISTRIBUTOR_ID", grouped_items_field="NEW_SIMS_IDS"), # back to zero distributors.get_attribute("SIMS_TO_RESTOCK").ops.subtract( member_id_field="DISTRIBUTOR_ID", subtracted_value_field="SIMS_TO_RESTOCK"), operations.FieldLogger( log_id="distributor_restock", cols=["DATETIME", "DISTRIBUTOR_ID", "SIMS_TO_RESTOCK"]), )
def add_listen_and_share_stories_with_details(the_circus): """ This is again a copy-paste of add_listen_and_share_stories_with_details, (hopefully this helps to illustrate the progression), here showing the supplementary look-up on the attributes of the songs """ users = the_circus.populations["user"] # using this timer means users only listen to songs during work hours timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock, seed=next(the_circus.seeder)) # this generate activity level distributed as a "truncated normal # distribution", i.e. very high and low activities are prevented. bounded_gaussian_activity_gen = gen.NumpyRandomGenerator( method="normal", seed=next(the_circus.seeder), loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")), scale=5).map(ops.bound_value(lb=10, ub=30)) listen = the_circus.create_story( name="listen_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) share = the_circus.create_story(name="share_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) repo = the_circus.populations["music_repository"] songs = the_circus.populations["song"] select_genre_and_song = ops.Chain( users.ops.lookup(id_field="UID", select={ "FIRST_NAME": "USER_FIRST_NAME", "LAST_NAME": "USER_LAST_NAME", }), # picks a genre at random repo.ops.select_one(named_as="GENRE"), # picks a song at random for that genre repo.get_relationship("songs").ops.select_one(from_field="GENRE", named_as="SONG_ID"), # now also reporting details of listened or shared songs songs.ops.lookup(id_field="SONG_ID", select={ "artist_name": "SONG_ARTIST", "title": "SONG_TITLE", "recording_year": "SONG_YEAR", "duration_seconds": "SONG_DURATION", }), ) listen.set_operations(select_genre_and_song, ops.FieldLogger("listen_events")) share.set_operations( select_genre_and_song, # picks a user this song is shared to users.ops.select_one(named_as="SHARED_TO_UID"), # note we could post-check when user shared a song to their own uid # here, in which case we can use DropRow to discard that share event ops.FieldLogger("share_events"))
def add_listen_and_share_stories(the_circus): """ This is essentially a copy-paste of add_listen_story, + the update for the share story, in order to show the Chain re-usability clearly """ users = the_circus.populations["user"] # using this timer means users only listen to songs during work hours timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock, seed=next(the_circus.seeder)) # this generate activity level distributed as a "truncated normal # distribution", i.e. very high and low activities are prevented. bounded_gaussian_activity_gen = gen.NumpyRandomGenerator( method="normal", seed=next(the_circus.seeder), loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")), scale=5).map(ops.bound_value(lb=10, ub=30)) listen = the_circus.create_story( name="listen_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) share = the_circus.create_story(name="share_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) repo = the_circus.populations["music_repository"] select_genre_and_song = ops.Chain( users.ops.lookup(id_field="UID", select={ "FIRST_NAME": "USER_FIRST_NAME", "LAST_NAME": "USER_LAST_NAME", }), # picks a genre at random repo.ops.select_one(named_as="GENRE"), # picks a song at random for that genre repo.get_relationship("songs").ops.select_one(from_field="GENRE", named_as="SONG_ID"), ) listen.set_operations(select_genre_and_song, ops.FieldLogger("listen_events")) share.set_operations( select_genre_and_song, # picks a user this song is shared to users.ops.select_one(named_as="SHARED_TO_UID"), # note we could post-check when user shared a song to their own uid # here, in which case we can use DropRow to discard that share event ops.FieldLogger("share_events"))
def add_uganda_geography(self, force_build=False): """ Loads the cells definition from Uganda + adds 2 stories to control """ logging.info(" adding Uganda Geography") seeder = seed_provider(12345) if force_build: uganda_cells, uganda_cities, timer_config = build_uganda_populations( self) else: uganda_cells = db.load_population(namespace="uganda", population_id="cells") uganda_cities = db.load_population(namespace="uganda", population_id="cities") timer_config = db.load_timer_gen_config("uganda", "cell_repair_timer_profile") repair_n_fix_timer = CyclicTimerGenerator( clock=self.clock, seed=next(self.seeder), config=timer_config) unhealthy_level_gen = build_unhealthy_level_gen(next(seeder)) healthy_level_gen = build_healthy_level_gen(next(seeder)) # tendency is inversed in case of broken cell: it's probability of # accepting a call is much lower # same profiler for breakdown and repair: they are both related to # typical human activity logging.info(" adding Uganda Geography6") cell_break_down_story = self.create_story( name="cell_break_down", initiating_population=uganda_cells, member_id_field="CELL_ID", timer_gen=repair_n_fix_timer, # fault activity is very low: most cell tend never to break down ( # hopefully...) activity_gen=ParetoGenerator(xmin=5, a=1.4, seed=next(self.seeder)) ) cell_repair_story = self.create_story( name="cell_repair_down", initiating_population=uganda_cells, member_id_field="CELL_ID", timer_gen=repair_n_fix_timer, # repair activity is much higher activity_gen=ParetoGenerator(xmin=100, a=1.2, seed=next(self.seeder)), # repair is not re-scheduled at the end of a repair, but only triggered # from a "break-down" story auto_reset_timer=False ) cell_break_down_story.set_operations( unhealthy_level_gen.ops.generate(named_as="NEW_HEALTH_LEVEL"), uganda_cells.get_attribute("HEALTH").ops.update( member_id_field="CELL_ID", copy_from_field="NEW_HEALTH_LEVEL"), cell_repair_story.ops.reset_timers(member_id_field="CELL_ID"), self.clock.ops.timestamp(named_as="TIME"), operations.FieldLogger(log_id="cell_status", cols=["TIME", "CELL_ID", "NEW_HEALTH_LEVEL"]), ) cell_repair_story.set_operations( healthy_level_gen.ops.generate(named_as="NEW_HEALTH_LEVEL"), uganda_cells.get_attribute("HEALTH").ops.update( member_id_field="CELL_ID", copy_from_field="NEW_HEALTH_LEVEL"), self.clock.ops.timestamp(named_as="TIME"), # note that both stories are contributing to the same # "cell_status" log operations.FieldLogger(log_id="cell_status", cols=["TIME", "CELL_ID", "NEW_HEALTH_LEVEL"]), ) return uganda_cells, uganda_cities
def add_communications(self, subs, sims, cells): """ Adds Calls and SMS story, which in turn may trigger topups story. """ logging.info("Adding calls and sms story ") # generators for topups and call duration voice_duration_generator = NumpyRandomGenerator(method="choice", a=range(20, 240), seed=next(self.seeder)) # call and sms timer generator, depending on the day of the week call_timegen = HighWeekDaysTimerGenerator(clock=self.clock, seed=next(self.seeder)) # probability of doing a topup, with high probability when the depended # variable (i.e. the main account value, see below) gets close to 0 recharge_trigger = DependentTriggerGenerator( value_to_proba_mapper=operations.logistic(k=-0.01, x0=1000), seed=next(self.seeder)) # call activity level, under normal and "excited" states normal_call_activity = ParetoGenerator(xmin=10, a=1.2, seed=next(self.seeder)) excited_call_activity = ParetoGenerator(xmin=100, a=1.1, seed=next(self.seeder)) # after a call or SMS, excitability is the probability of getting into # "excited" mode (i.e., having a shorted expected delay until next call excitability_gen = NumpyRandomGenerator(method="beta", a=7, b=3, seed=next(self.seeder)) subs.create_attribute(name="EXCITABILITY", init_gen=excitability_gen) # same "basic" trigger, without any value mapper flat_trigger = DependentTriggerGenerator(seed=next(self.seeder)) back_to_normal_prob = NumpyRandomGenerator(method="beta", a=3, b=7, seed=next(self.seeder)) # Calls and SMS stories themselves calls = self.create_story(name="calls", initiating_population=subs, member_id_field="A_ID", timer_gen=call_timegen, activity_gen=normal_call_activity, states={ "excited": { "activity": excited_call_activity, "back_to_default_probability": back_to_normal_prob } }) sms = self.create_story(name="sms", initiating_population=subs, member_id_field="A_ID", timer_gen=call_timegen, activity_gen=normal_call_activity, states={ "excited": { "activity": excited_call_activity, "back_to_default_probability": back_to_normal_prob } }) # common logic between Call and SMS: selecting A and B + their related # fields compute_ab_fields = Chain( self.clock.ops.timestamp(named_as="DATETIME"), # selects a B party subs.get_relationship("FRIENDS").ops.select_one(from_field="A_ID", named_as="B_ID", one_to_one=True), # fetches information about all SIMs of A and B subs.get_relationship("SIMS").ops.select_all(from_field="A_ID", named_as="A_SIMS"), sims.ops.lookup(id_field="A_SIMS", select={ "OPERATOR": "OPERATORS_A", "MSISDN": "MSISDNS_A", "MAIN_ACCT": "MAIN_ACCTS_A" }), subs.get_relationship("SIMS").ops.select_all(from_field="B_ID", named_as="B_SIMS"), sims.ops.lookup(id_field="B_SIMS", select={ "OPERATOR": "OPERATORS_B", "MSISDN": "MSISDNS_B" }), # A selects the sims and related values based on the best match # between the sims of A and B operations.Apply(source_fields=[ "MSISDNS_A", "OPERATORS_A", "A_SIMS", "MAIN_ACCTS_A", "MSISDNS_B", "OPERATORS_B", "B_SIMS" ], named_as=[ "MSISDN_A", "OPERATOR_A", "SIM_A", "MAIN_ACCT_OLD", "MSISDN_B", "OPERATOR_B", "SIM_B" ], f=select_sims), operations.Apply(source_fields=["OPERATOR_A", "OPERATOR_B"], named_as="TYPE", f=compute_cdr_type), ) # Both CELL_A and CELL_B might drop the call based on their current HEALTH compute_cell_status = Chain( # some static fields subs.ops.lookup(id_field="A_ID", select={ "CELL": "CELL_A", "EXCITABILITY": "EXCITABILITY_A" }), subs.ops.lookup(id_field="B_ID", select={ "CELL": "CELL_B", "EXCITABILITY": "EXCITABILITY_B" }), cells.ops.lookup(id_field="CELL_A", select={"HEALTH": "CELL_A_HEALTH"}), cells.ops.lookup(id_field="CELL_B", select={"HEALTH": "CELL_B_HEALTH"}), flat_trigger.ops.generate(observed_field="CELL_A_HEALTH", named_as="CELL_A_ACCEPTS"), flat_trigger.ops.generate(observed_field="CELL_B_HEALTH", named_as="CELL_B_ACCEPTS"), operations.Apply( source_fields=["CELL_A_ACCEPTS", "CELL_B_ACCEPTS"], named_as="STATUS", f=compute_call_status)) # update the main account based on the value of this CDR update_accounts = Chain( operations.Apply(source_fields=["MAIN_ACCT_OLD", "VALUE"], named_as="MAIN_ACCT_NEW", f=np.subtract, f_args="series"), sims.get_attribute("MAIN_ACCT").ops.update( member_id_field="SIM_A", copy_from_field="MAIN_ACCT_NEW"), ) # triggers the topup story if the main account is low trigger_topups = Chain( # A subscribers with low account are now more likely to topup the # SIM they just used to make a call recharge_trigger.ops.generate(observed_field="MAIN_ACCT_NEW", named_as="SHOULD_TOP_UP"), self.get_story("topups").ops.force_act_next( member_id_field="SIM_A", condition_field="SHOULD_TOP_UP"), ) # get BOTH sms and Call "bursty" after EITHER a call or an sms get_bursty = Chain( # Trigger to get into "excited" mode because A gave a call or sent an # SMS flat_trigger.ops.generate(observed_field="EXCITABILITY_A", named_as="A_GETTING_BURSTY"), calls.ops.transit_to_state(member_id_field="A_ID", condition_field="A_GETTING_BURSTY", state="excited"), sms.ops.transit_to_state(member_id_field="A_ID", condition_field="A_GETTING_BURSTY", state="excited"), # Trigger to get into "excited" mode because B received a call flat_trigger.ops.generate(observed_field="EXCITABILITY_B", named_as="B_GETTING_BURSTY"), # transiting to excited mode, according to trigger value calls.ops.transit_to_state(member_id_field="B_ID", condition_field="B_GETTING_BURSTY", state="excited"), sms.ops.transit_to_state(member_id_field="B_ID", condition_field="B_GETTING_BURSTY", state="excited"), # # B party need to have their time reset explicitally since they were # not active at this round. A party will be reset automatically calls.ops.reset_timers(member_id_field="B_ID"), sms.ops.reset_timers(member_id_field="B_ID"), ) calls.set_operations( compute_ab_fields, compute_cell_status, ConstantGenerator(value="VOICE").ops.generate(named_as="PRODUCT"), voice_duration_generator.ops.generate(named_as="DURATION"), operations.Apply(source_fields=["DURATION", "DATETIME", "TYPE"], named_as="VALUE", f=compute_call_value), update_accounts, trigger_topups, get_bursty, # final CDRs operations.FieldLogger(log_id="voice_cdr", cols=[ "DATETIME", "MSISDN_A", "MSISDN_B", "STATUS", "DURATION", "VALUE", "CELL_A", "OPERATOR_A", "CELL_B", "OPERATOR_B", "TYPE", "PRODUCT" ]), ) sms.set_operations( compute_ab_fields, compute_cell_status, ConstantGenerator(value="SMS").ops.generate(named_as="PRODUCT"), operations.Apply(source_fields=["DATETIME", "TYPE"], named_as="VALUE", f=compute_sms_value), update_accounts, trigger_topups, get_bursty, # final CDRs operations.FieldLogger(log_id="sms_cdr", cols=[ "DATETIME", "MSISDN_A", "MSISDN_B", "STATUS", "VALUE", "CELL_A", "OPERATOR_A", "CELL_B", "OPERATOR_B", "TYPE", "PRODUCT" ]), )
def add_mobility(self, subs, cells): """ adds a CELL attribute to the customer population + a mobility story that randomly moves customers from CELL to CELL among their used cells. """ logging.info("Adding mobility ") # mobility time profile: assign high mobility activities to busy hours # of the day mov_prof = [ 1., 1., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1. ] mobility_time_gen = CyclicTimerGenerator( clock=self.clock, seed=next(self.seeder), config=CyclicTimerProfile( profile=mov_prof, profile_time_steps="1H", start_date=pd.Timestamp("12 September 2016 00:00.00"))) # Mobility network, i.e. choice of cells per user, i.e. these are the # weighted "used cells" (as in "most used cells) for each user mobility_weight_gen = NumpyRandomGenerator(method="exponential", scale=1., seed=next(self.seeder)) mobility_rel = subs.create_relationship("POSSIBLE_CELLS") logging.info(" creating bipartite graph ") mobility_df = pd.DataFrame.from_records(make_random_bipartite_data( subs.ids, cells.ids, 0.4, seed=next(self.seeder)), columns=["USER_ID", "CELL"]) logging.info(" adding mobility relationships to customer") mobility_rel.add_relations(from_ids=mobility_df["USER_ID"], to_ids=mobility_df["CELL"], weights=mobility_weight_gen.generate( mobility_df.shape[0])) logging.info(" creating customer's CELL attribute ") # Initialize the mobility by allocating one first random cell to each # customer among its network subs.create_attribute(name="CELL", init_relationship="POSSIBLE_CELLS") # Mobility story itself, basically just a random hop from cell to cell, # that updates the "CELL" attributes + generates mobility logs logging.info(" creating mobility story") mobility_story = self.create_story( name="mobility", initiating_population=subs, member_id_field="A_ID", timer_gen=mobility_time_gen, ) logging.info(" adding operations") mobility_story.set_operations( subs.ops.lookup(id_field="A_ID", select={"CELL": "PREV_CELL"}), # selects a destination cell (or maybe the same as current... ^^) mobility_rel.ops.select_one(from_field="A_ID", named_as="NEW_CELL"), # update the CELL attribute of the customers accordingly subs.get_attribute("CELL").ops.update(member_id_field="A_ID", copy_from_field="NEW_CELL"), self.clock.ops.timestamp(named_as="TIME"), # create mobility logs operations.FieldLogger( log_id="mobility_logs", cols=["TIME", "A_ID", "PREV_CELL", "NEW_CELL"]), ) logging.info(" done")
def add_bulk_restock_actions(circus, params, buyer_actor_name, seller_actor_name): buyer = circus.actors[buyer_actor_name] seller = circus.actors[seller_actor_name] pos_per_buyer = circus.actors["pos"].size / buyer.size for product, description in params["products"].items(): action_name = "{}_{}_bulk_purchase".format(buyer_actor_name, product) upper_level_restock_action_name = "{}_{}_bulk_purchase".format( seller_actor_name, product) logging.info("creating {} action".format(action_name)) # generator of item prices and type item_price_gen = random_generators.NumpyRandomGenerator( method="choice", a=description["item_prices"], seed=next(circus.seeder)) item_prices_gen = random_generators.DependentBulkGenerator( element_generator=item_price_gen) item_type_gen = random_generators.NumpyRandomGenerator( method="choice", a=circus.actors[product].ids, seed=next(circus.seeder)) item_types_gen = random_generators.DependentBulkGenerator( element_generator=item_type_gen) tx_gen = random_generators.SequencialGenerator( prefix="_".join(["TX", buyer_actor_name, product])) tx_seq_gen = random_generators.DependentBulkGenerator( element_generator=tx_gen) # trigger for another bulk purchase done by the seller if their own # stock get low seller_low_stock_bulk_purchase_trigger = random_generators.DependentTriggerGenerator( value_to_proba_mapper=operations.bounded_sigmoid( x_min=pos_per_buyer, x_max=description["max_pos_stock_triggering_pos_restock"] * pos_per_buyer, shape=description["restock_sigmoid_shape"], incrementing=False)) # bulk size distribution is a scaled version of POS bulk size distribution bulk_size_gen = scale_quantity_gen(stock_size_gen=circus.generators[ "pos_{}_bulk_size_gen".format(product)], scale_factor=pos_per_buyer) build_purchase_action = circus.create_story( name=action_name, initiating_actor=buyer, actorid_field="BUYER_ID", # no timer or activity: dealers bulk purchases are triggered externally ) build_purchase_action.set_operations( circus.clock.ops.timestamp(named_as="TIME"), buyer.get_relationship("{}__provider".format(product)) .ops.select_one(from_field="BUYER_ID", named_as="SELLER_ID"), bulk_size_gen.ops.generate(named_as="REQUESTED_BULK_SIZE"), buyer.get_relationship(product).ops .get_neighbourhood_size( from_field="BUYER_ID", named_as="OLD_BUYER_STOCK"), # TODO: the perfect case would prevent to go over max_stock at this point # selecting and removing Sims from dealers seller.get_relationship(product).ops \ .select_many( from_field="SELLER_ID", named_as="ITEM_IDS", quantity_field="REQUESTED_BULK_SIZE", # if an item is selected, it is removed from the dealer's stock pop=True, # TODO: put this back to False and log the failed purchases discard_missing=True), # and adding them to the buyer buyer.get_relationship(product).ops.add_grouped( from_field="BUYER_ID", grouped_items_field="ITEM_IDS"), # We do not track the old and new stock of the dealer since the result # is misleading: since all purchases are performed in parallel, # if a dealer is selected several times, its stock level after the # select_many() is the level _after_ all purchases are done, which is # typically not what we want to include in the log. buyer.get_relationship(product).ops \ .get_neighbourhood_size( from_field="BUYER_ID", named_as="NEW_BUYER_STOCK"), # actual number of bought items might be different due to out of stock operations.Apply(source_fields="ITEM_IDS", named_as="BULK_SIZE", f=lambda s: s.map(len), f_args="series"), # Generate some item prices. Note that the same items will have a # different price through the whole distribution chain item_prices_gen.ops.generate( named_as="ITEM_PRICES", observed_field="BULK_SIZE" ), item_types_gen.ops.generate( named_as="ITEM_TYPES", observed_field="BULK_SIZE" ), tx_seq_gen.ops.generate( named_as="TX_IDS", observed_field="BULK_SIZE" ), operations.FieldLogger(log_id="{}_stock".format(action_name), cols=["TIME", "BUYER_ID", "SELLER_ID", "OLD_BUYER_STOCK", "NEW_BUYER_STOCK", "BULK_SIZE"]), operations.FieldLogger(log_id=action_name, cols=["TIME", "BUYER_ID", "SELLER_ID"], exploded_cols=["TX_IDS", "ITEM_IDS", "ITEM_PRICES", "ITEM_TYPES"]), trigger_action_if_low_stock( circus, stock_relationship=seller.get_relationship(product), actor_id_field="SELLER_ID", restock_trigger=seller_low_stock_bulk_purchase_trigger, triggered_action_name=upper_level_restock_action_name ) )
def add_agent_sim_purchase_story(self, agents, dealers): """ Adds a SIM purchase story from agents to dealer, with impact on stock of both populations """ logging.info("Creating purchase story") timegen = HighWeekDaysTimerGenerator(clock=self.clock, seed=next(self.seeder)) purchase_activity_gen = NumpyRandomGenerator(method="choice", a=range(1, 4), seed=next(self.seeder)) # TODO: if we merge profiler and generator, we could have higher probs here # based on calendar # TODO2: or not, maybe we should have a sub-operation with its own counter # to "come back to normal", instead of sampling a random variable at # each turn => would improve efficiency purchase = self.create_story(name="purchases", initiating_population=agents, member_id_field="AGENT", timer_gen=timegen, activity_gen=purchase_activity_gen, states={ "on_holiday": { "activity": ConstantGenerator(value=0), "back_to_default_probability": ConstantGenerator(value=0) } }) purchase.set_operations( self.clock.ops.timestamp(named_as="DATETIME"), agents.get_relationship("DEALERS").ops.select_one( from_field="AGENT", named_as="DEALER"), dealers.get_relationship("SIM").ops.select_one( from_field="DEALER", named_as="SOLD_SIM", # each SIM can only be sold once one_to_one=True, # if a SIM is selected, it is removed from the dealer's stock pop=True, # If a chosen dealer has empty stock, we don't want to drop the # row in story_data, but keep it with a None sold SIM, # which indicates the sale failed discard_empty=False), operations.Apply(source_fields="SOLD_SIM", named_as="FAILED_SALE", f=pd.isnull, f_args="series"), # any agent who failed to buy a SIM will try again at next round # (we could do that probabilistically as well, just add a trigger..) purchase.ops.force_act_next(member_id_field="AGENT", condition_field="FAILED_SALE"), # not specifying the logged columns => by defaults, log everything # ALso, we log the sale before dropping to failed sales, to keep operations.FieldLogger(log_id="purchases"), # only successful sales actually add a SIM to agents operations.DropRow(condition_field="FAILED_SALE"), agents.get_relationship("SIM").ops.add(from_field="AGENT", item_field="SOLD_SIM"), )
def add_agent_holidays_story(self, agents): """ Adds stories that reset to 0 the activity level of the purchases story of some populations """ logging.info("Adding 'holiday' periods for agents ") # TODO: this is a bit weird, I think what I'd need is a profiler that would # return duration (i.e timer count) with probability related to time # until next typical holidays :) # We could call this YearProfile though the internal mechanics would be # different than week and day profiler holiday_time_gen = HighWeekDaysTimerGenerator(clock=self.clock, seed=next(self.seeder)) # TODO: we'd obviously have to adapt those weight to longer periods # thought this interface is not very intuitive # => create a method where can can specify the expected value of the # inter-event interval, and convert that into an activity holiday_start_activity = ParetoGenerator(xmin=.25, a=1.2, seed=next(self.seeder)) holiday_end_activity = ParetoGenerator(xmin=150, a=1.2, seed=next(self.seeder)) going_on_holidays = self.create_story( name="agent_start_holidays", initiating_population=agents, member_id_field="AGENT", timer_gen=holiday_time_gen, activity_gen=holiday_start_activity) returning_from_holidays = self.create_story( name="agent_stops_holidays", initiating_population=agents, member_id_field="AGENT", timer_gen=holiday_time_gen, activity_gen=holiday_end_activity, auto_reset_timer=False) going_on_holidays.set_operations( self.get_story("purchases").ops.transit_to_state( member_id_field="AGENT", state="on_holiday"), returning_from_holidays.ops.reset_timers(member_id_field="AGENT"), # just for the logs self.clock.ops.timestamp(named_as="TIME"), ConstantGenerator(value="going").ops.generate(named_as="STATES"), operations.FieldLogger(log_id="holidays"), ) returning_from_holidays.set_operations( self.get_story("purchases").ops.transit_to_state( member_id_field="AGENT", state="default"), # just for the logs self.clock.ops.timestamp(named_as="TIME"), ConstantGenerator(value="returning").ops.generate( named_as="STATES"), operations.FieldLogger(log_id="holidays"), )
def create_purchase_story(the_circus): timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock, seed=next(the_circus.seeder)) customers = the_circus.populations["customer"] purchase_story = the_circus.create_story( name="purchase", initiating_population=customers, member_id_field="CUST_ID", timer_gen=timer_gen, # this time not all customers have the activity level: on average # they will collectively perform 1 story per day, but some will do # on average more stories per day and some will do on average less # stories per day activity_gen=gen.NumpyRandomGenerator( method="exponential", scale=timer_gen.activity( n=1, per=pd.Timedelta("24h") ), seed=next(the_circus.seeder)) ) customers_items = customers.get_relationship("my_items") pos = the_circus.populations["point_of_sale"] pos_items = pos.get_relationship("items") purchase_story.set_operations( customers.ops.lookup(id_field="CUST_ID", select={ "FIRST_NAME": "BUYER_FIRST_NAME", "LAST_NAME": "BUYER_LAST_NAME"}), pos.ops.select_one(named_as="POS_ID"), pos.ops.lookup(id_field="POS_ID", select={"COMPANY": "POS_NAME"}), # pick an item from the vendor's stock pos_items.ops.select_one( # join the POS table on the POS_ID field of the story_data from_field="POS_ID", # the result of that join is to be populated into that field named_as="BOUGHT_ITEM_ID", # each joined item should be unique (2 customers cannot buy the # same item) one_to_one=True, # remove the joined items from the POS relationship pop=True, # in case some POS is out of stock, just drop the row in the # story_data. (As an alternative, we could keep it and trigger # some retries for the empty value later on.. ) discard_empty=True), # adds the item to the "my_items" relations of each customer customers_items.ops.add( # story_data field containing the added item item_field="BOUGHT_ITEM_ID", # story_data field containing the "from" side of the relations # (i..e the id of the customer buying the item in this case) from_field="CUST_ID" ), ops.FieldLogger(log_id="purchases") )
scale=5, seed=next(example_circus.seeder)) name_gen = FakerGenerator(method="name", seed=next(example_circus.seeder)) person = example_circus.create_population(name="person", size=1000, ids_gen=id_gen) person.create_attribute("NAME", init_gen=name_gen) person.create_attribute("AGE", init_gen=age_gen) hello_world = example_circus.create_story( name="hello_world", initiating_population=example_circus.populations["person"], member_id_field="PERSON_ID", timer_gen=ConstantDependentGenerator(value=1)) hello_world.set_operations( example_circus.clock.ops.timestamp(named_as="TIME"), ConstantGenerator(value="hello world").ops.generate(named_as="MESSAGE"), operations.FieldLogger(log_id="hello")) example_circus.run(duration=pd.Timedelta("48h"), log_output_folder="output/example_scenario", delete_existing_logs=True) # -- DEBUG output printout df = pd.read_csv("output/example_scenario/hello.csv") print(df.head(10)) print(df.tail(10))
def add_initial_stock_as_purchases(circus, buyer_actor_name, params): for product, description in params["products"].items(): action_name = "{}_{}_bulk_purchase".format(buyer_actor_name, product) logging.info("adding initial {} stock of {} as purchases".format( product, buyer_actor_name)) buyer = circus.actors[buyer_actor_name] # generator of item prices and type item_price_gen = random_generators.NumpyRandomGenerator( method="choice", a=description["item_prices"], seed=next(circus.seeder)) item_prices_gen = random_generators.DependentBulkGenerator( element_generator=item_price_gen) item_type_gen = random_generators.NumpyRandomGenerator( method="choice", a=circus.actors[product].ids, seed=next(circus.seeder)) item_types_gen = random_generators.DependentBulkGenerator( element_generator=item_type_gen) tx_gen = random_generators.SequencialGenerator( prefix="_".join(["TX_initial", buyer_actor_name, product])) tx_seq_gen = random_generators.DependentBulkGenerator( element_generator=tx_gen) log_stock = circus.create_story( name="initial_{}".format(action_name), initiating_actor=buyer, actorid_field="BUYER_ID", # everybody executes this action once, at the beginning timer_gen=random_generators.ConstantDependentGenerator(0), auto_reset_timer=False) # reset timer once so that it executes once log_stock.reset_timers() log_stock.set_operations( circus.clock.ops.timestamp(named_as="TIME", random=False), buyer.get_relationship( "{}__provider".format(product)).ops.select_one( from_field="BUYER_ID", named_as="SELLER_ID"), buyer.get_relationship(product).ops.select_all( from_field="BUYER_ID", named_as="ITEM_IDS"), operations.Apply(source_fields="ITEM_IDS", named_as="BULK_SIZE", f=lambda s: s.map(len), f_args="series"), item_prices_gen.ops.generate(named_as="ITEM_PRICES", observed_field="BULK_SIZE"), item_types_gen.ops.generate(named_as="ITEM_TYPES", observed_field="BULK_SIZE"), tx_seq_gen.ops.generate(named_as="TX_IDS", observed_field="BULK_SIZE"), random_generators.ConstantGenerator(value=0).ops.generate( named_as="OLD_BUYER_STOCK"), operations.Apply(source_fields="BULK_SIZE", named_as="NEW_BUYER_STOCK", f=lambda s: s, f_args="series"), operations.FieldLogger(log_id="{}_stock".format(action_name), cols=[ "TIME", "BUYER_ID", "SELLER_ID", "OLD_BUYER_STOCK", "NEW_BUYER_STOCK", "BULK_SIZE" ]), operations.FieldLogger(log_id=action_name, cols=["TIME", "BUYER_ID", "SELLER_ID"], exploded_cols=[ "TX_IDS", "ITEM_IDS", "ITEM_PRICES", "ITEM_TYPES" ]))