def trigger_action_if_low_stock(circus, stock_relationship, actor_id_field, restock_trigger, triggered_action_name, field_prefix=""): """ Looks up the stock level of the specified relationship and submits that to the provided restock_trigger. If the result is true, we send a "force_act_next" to the specified action. :param circus: :param stock_relationship: :param actor_id_field: :param restock_trigger: :param triggered_action_name: :param field_prefix: :return: """ return operations.Chain( stock_relationship.ops.get_neighbourhood_size( from_field=actor_id_field, named_as="{}CURRENT_STOCK".format(field_prefix)), restock_trigger.ops.generate( named_as="{}SHOULD_RESTOCK".format(field_prefix), observed_field="{}CURRENT_STOCK".format(field_prefix)), circus.get_story(triggered_action_name).ops.force_act_next( member_id_field=actor_id_field, condition_field="{}SHOULD_RESTOCK".format(field_prefix)), )
def test_chain_of_3_operation_should_return_merged_logs(): cdrs1 = pd.DataFrame(np.random.rand(12, 3), columns=["A", "B", "duration"]) op1 = mockops.FakeOp(input, {"cdrs1": cdrs1}) cdrs2 = pd.DataFrame(np.random.rand(12, 3), columns=["A", "B", "duration"]) op2 = mockops.FakeOp(input, {"cdrs2": cdrs2}) cdrs3 = pd.DataFrame(np.random.rand(12, 3), columns=["A", "B", "duration"]) op3 = mockops.FakeOp(input, {"cdrs3": cdrs3}) chain = operations.Chain(op1, op2, op3) prev_data = pd.DataFrame(columns=[]) story_data, all_logs = chain(prev_data) assert set(all_logs.keys()) == {"cdrs1", "cdrs2", "cdrs3"} assert all_logs["cdrs1"].equals(cdrs1) assert all_logs["cdrs2"].equals(cdrs2) assert all_logs["cdrs3"].equals(cdrs3)
def build_site_product_pos_target(circus, params): """ Generates some random target of amount of pos per site, based on the actual number of pos per site """ target_file = os.path.join(db.namespace_folder(circus.name), "site_product_pos_target.csv") sites = circus.actors["sites"] target_action = operations.Chain( sites.relationships["POS"].ops.get_neighbourhood_size( from_field="site_id", named_as="pos_count_target"), operations.FieldLogger(log_id="logs")) sites_df = pd.DataFrame({"site_id": sites.ids}) _, logs = target_action(sites_df) target_df = logs["logs"] target_df["cartesian_product"] = "cp" products = pd.DataFrame({ "product_type_id": params["products"].keys(), "cartesian_product": "cp" }) target_df = pd.merge(left=target_df, right=products, on="cartesian_product") fact = np.random.normal(1, .1, size=target_df.shape[0]) target_df["pos_count_target"] = target_df["pos_count_target"] * fact target_df["pos_count_target"] = target_df["pos_count_target"].astype( np.int) target_df.ix[target_df["pos_count_target"] < 10, "pos_count_target"] = 10 target_df.drop(["cartesian_product"], axis=1, inplace=True) target_df.to_csv(target_file, index=False)
def add_listen_and_share_stories_with_details(the_circus): """ This is again a copy-paste of add_listen_and_share_stories_with_details, (hopefully this helps to illustrate the progression), here showing the supplementary look-up on the attributes of the songs """ users = the_circus.populations["user"] # using this timer means users only listen to songs during work hours timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock, seed=next(the_circus.seeder)) # this generate activity level distributed as a "truncated normal # distribution", i.e. very high and low activities are prevented. bounded_gaussian_activity_gen = gen.NumpyRandomGenerator( method="normal", seed=next(the_circus.seeder), loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")), scale=5).map(ops.bound_value(lb=10, ub=30)) listen = the_circus.create_story( name="listen_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) share = the_circus.create_story(name="share_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) repo = the_circus.populations["music_repository"] songs = the_circus.populations["song"] select_genre_and_song = ops.Chain( users.ops.lookup(id_field="UID", select={ "FIRST_NAME": "USER_FIRST_NAME", "LAST_NAME": "USER_LAST_NAME", }), # picks a genre at random repo.ops.select_one(named_as="GENRE"), # picks a song at random for that genre repo.get_relationship("songs").ops.select_one(from_field="GENRE", named_as="SONG_ID"), # now also reporting details of listened or shared songs songs.ops.lookup(id_field="SONG_ID", select={ "artist_name": "SONG_ARTIST", "title": "SONG_TITLE", "recording_year": "SONG_YEAR", "duration_seconds": "SONG_DURATION", }), ) listen.set_operations(select_genre_and_song, ops.FieldLogger("listen_events")) share.set_operations( select_genre_and_song, # picks a user this song is shared to users.ops.select_one(named_as="SHARED_TO_UID"), # note we could post-check when user shared a song to their own uid # here, in which case we can use DropRow to discard that share event ops.FieldLogger("share_events"))
def add_song_populations(the_circus): songs = the_circus.create_population( name="song", size=0, ids_gen=gen.SequencialGenerator(prefix="SONG_")) # since the size of the population is 0, we can create attribute without # providing any initialization songs.create_attribute(name="artist_name") songs.create_attribute(name="song_genre") songs.create_attribute(name="title") songs.create_attribute(name="duration_seconds") songs.create_attribute(name="recording_year") song_id_gen = gen.SequencialGenerator(prefix="S_") # generate artist names from a list of randomly generated ones, so we have # some redundancy in the generated dataset artist_name_gen = gen.NumpyRandomGenerator( method="choice", a=gen.FakerGenerator(method="name", seed=next(the_circus.seeder)).generate(size=200), seed=next(the_circus.seeder)) title_gen = gen.FakerGenerator(method="sentence", seed=next(the_circus.seeder), nb_words=4, variable_nb_words=True) # generates recording years within a desired date range year_gen = gen.FakerGenerator( method="date_time_between_dates", seed=next(the_circus.seeder), datetime_start=pd.Timestamp("1910-10-20"), datetime_end=pd.Timestamp("2016-12-02")) \ .map(f=lambda d: d.year) duration_gen = gen.ParetoGenerator(xmin=60, seed=next(the_circus.seeder), force_int=True, a=1.2) repo = the_circus.populations["music_repository"] repo_genre_rel = repo.get_attribute("genre_name") for genre_id, genre_name in repo_genre_rel.get_values().items(): # an operation capable of creating songs of that genre init_attribute = ops.Chain( artist_name_gen.ops.generate(named_as="artist_name"), title_gen.ops.generate(named_as="title"), year_gen.ops.generate(named_as="recording_year"), duration_gen.ops.generate(named_as="duration_seconds"), gen.ConstantGenerator(value=genre_name).ops.generate( named_as="song_genre")) # dataframe of emtpy songs: just with one SONG_ID column for now song_ids = song_id_gen.generate(size=1000) emtpy_songs = story.Story.init_story_data( member_id_field_name="SONG_ID", active_ids=song_ids) # we can already adds the generated songs to the music repo relationship repo.get_relationship("songs").add_grouped_relations( from_ids=[genre_id], grouped_ids=[song_ids]) # here we generate all desired columns in the dataframe initialized_songs, _ = init_attribute(emtpy_songs) initialized_songs.drop(["SONG_ID"], axis=1, inplace=True) # this works because the columns of init_attribute match exactly the # ones of the attributes of the populations songs.update(initialized_songs) # makes sure year and duration are handled as integer songs.get_attribute("recording_year").transform_inplace(int) songs.get_attribute("duration_seconds").transform_inplace(int)
def add_listen_and_share_stories(the_circus): """ This is essentially a copy-paste of add_listen_story, + the update for the share story, in order to show the Chain re-usability clearly """ users = the_circus.populations["user"] # using this timer means users only listen to songs during work hours timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock, seed=next(the_circus.seeder)) # this generate activity level distributed as a "truncated normal # distribution", i.e. very high and low activities are prevented. bounded_gaussian_activity_gen = gen.NumpyRandomGenerator( method="normal", seed=next(the_circus.seeder), loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")), scale=5).map(ops.bound_value(lb=10, ub=30)) listen = the_circus.create_story( name="listen_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) share = the_circus.create_story(name="share_events", initiating_population=users, member_id_field="UID", timer_gen=timer_gen, activity_gen=bounded_gaussian_activity_gen) repo = the_circus.populations["music_repository"] select_genre_and_song = ops.Chain( users.ops.lookup(id_field="UID", select={ "FIRST_NAME": "USER_FIRST_NAME", "LAST_NAME": "USER_LAST_NAME", }), # picks a genre at random repo.ops.select_one(named_as="GENRE"), # picks a song at random for that genre repo.get_relationship("songs").ops.select_one(from_field="GENRE", named_as="SONG_ID"), ) listen.set_operations(select_genre_and_song, ops.FieldLogger("listen_events")) share.set_operations( select_genre_and_song, # picks a user this song is shared to users.ops.select_one(named_as="SHARED_TO_UID"), # note we could post-check when user shared a song to their own uid # here, in which case we can use DropRow to discard that share event ops.FieldLogger("share_events"))