Beispiel #1
0
def trigger_action_if_low_stock(circus,
                                stock_relationship,
                                actor_id_field,
                                restock_trigger,
                                triggered_action_name,
                                field_prefix=""):
    """

    Looks up the stock level of the specified relationship and submits that
    to the provided restock_trigger. If the result is true, we send a
    "force_act_next" to the specified action.

    :param circus:
    :param stock_relationship:
    :param actor_id_field:
    :param restock_trigger:
    :param triggered_action_name:
    :param field_prefix:
    :return:
    """

    return operations.Chain(
        stock_relationship.ops.get_neighbourhood_size(
            from_field=actor_id_field,
            named_as="{}CURRENT_STOCK".format(field_prefix)),
        restock_trigger.ops.generate(
            named_as="{}SHOULD_RESTOCK".format(field_prefix),
            observed_field="{}CURRENT_STOCK".format(field_prefix)),
        circus.get_story(triggered_action_name).ops.force_act_next(
            member_id_field=actor_id_field,
            condition_field="{}SHOULD_RESTOCK".format(field_prefix)),
    )
Beispiel #2
0
def test_chain_of_3_operation_should_return_merged_logs():

    cdrs1 = pd.DataFrame(np.random.rand(12, 3), columns=["A", "B", "duration"])
    op1 = mockops.FakeOp(input, {"cdrs1": cdrs1})

    cdrs2 = pd.DataFrame(np.random.rand(12, 3), columns=["A", "B", "duration"])
    op2 = mockops.FakeOp(input, {"cdrs2": cdrs2})

    cdrs3 = pd.DataFrame(np.random.rand(12, 3), columns=["A", "B", "duration"])
    op3 = mockops.FakeOp(input, {"cdrs3": cdrs3})

    chain = operations.Chain(op1, op2, op3)

    prev_data = pd.DataFrame(columns=[])
    story_data, all_logs = chain(prev_data)

    assert set(all_logs.keys()) == {"cdrs1", "cdrs2", "cdrs3"}
    assert all_logs["cdrs1"].equals(cdrs1)
    assert all_logs["cdrs2"].equals(cdrs2)
    assert all_logs["cdrs3"].equals(cdrs3)
Beispiel #3
0
def build_site_product_pos_target(circus, params):
    """
    Generates some random target of amount of pos per site, based on the
    actual number of pos per site
    """

    target_file = os.path.join(db.namespace_folder(circus.name),
                               "site_product_pos_target.csv")

    sites = circus.actors["sites"]

    target_action = operations.Chain(
        sites.relationships["POS"].ops.get_neighbourhood_size(
            from_field="site_id", named_as="pos_count_target"),
        operations.FieldLogger(log_id="logs"))

    sites_df = pd.DataFrame({"site_id": sites.ids})

    _, logs = target_action(sites_df)

    target_df = logs["logs"]
    target_df["cartesian_product"] = "cp"

    products = pd.DataFrame({
        "product_type_id": params["products"].keys(),
        "cartesian_product": "cp"
    })

    target_df = pd.merge(left=target_df,
                         right=products,
                         on="cartesian_product")

    fact = np.random.normal(1, .1, size=target_df.shape[0])
    target_df["pos_count_target"] = target_df["pos_count_target"] * fact
    target_df["pos_count_target"] = target_df["pos_count_target"].astype(
        np.int)

    target_df.ix[target_df["pos_count_target"] < 10, "pos_count_target"] = 10
    target_df.drop(["cartesian_product"], axis=1, inplace=True)

    target_df.to_csv(target_file, index=False)
Beispiel #4
0
def add_listen_and_share_stories_with_details(the_circus):
    """
    This is again a copy-paste of add_listen_and_share_stories_with_details,
    (hopefully this helps to illustrate the progression), here showing the
    supplementary look-up on the attributes of the songs
    """

    users = the_circus.populations["user"]

    # using this timer means users only listen to songs during work hours
    timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock,
                                                  seed=next(the_circus.seeder))

    # this generate activity level distributed as a "truncated normal
    # distribution", i.e. very high and low activities are prevented.
    bounded_gaussian_activity_gen = gen.NumpyRandomGenerator(
        method="normal",
        seed=next(the_circus.seeder),
        loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")),
        scale=5).map(ops.bound_value(lb=10, ub=30))

    listen = the_circus.create_story(
        name="listen_events",
        initiating_population=users,
        member_id_field="UID",
        timer_gen=timer_gen,
        activity_gen=bounded_gaussian_activity_gen)

    share = the_circus.create_story(name="share_events",
                                    initiating_population=users,
                                    member_id_field="UID",
                                    timer_gen=timer_gen,
                                    activity_gen=bounded_gaussian_activity_gen)

    repo = the_circus.populations["music_repository"]
    songs = the_circus.populations["song"]

    select_genre_and_song = ops.Chain(
        users.ops.lookup(id_field="UID",
                         select={
                             "FIRST_NAME": "USER_FIRST_NAME",
                             "LAST_NAME": "USER_LAST_NAME",
                         }),

        # picks a genre at random
        repo.ops.select_one(named_as="GENRE"),

        # picks a song at random for that genre
        repo.get_relationship("songs").ops.select_one(from_field="GENRE",
                                                      named_as="SONG_ID"),

        # now also reporting details of listened or shared songs
        songs.ops.lookup(id_field="SONG_ID",
                         select={
                             "artist_name": "SONG_ARTIST",
                             "title": "SONG_TITLE",
                             "recording_year": "SONG_YEAR",
                             "duration_seconds": "SONG_DURATION",
                         }),
    )

    listen.set_operations(select_genre_and_song,
                          ops.FieldLogger("listen_events"))

    share.set_operations(
        select_genre_and_song,

        # picks a user this song is shared to
        users.ops.select_one(named_as="SHARED_TO_UID"),

        # note we could post-check when user shared a song to their own uid
        # here, in which case we can use DropRow to discard that share event
        ops.FieldLogger("share_events"))
Beispiel #5
0
def add_song_populations(the_circus):

    songs = the_circus.create_population(
        name="song", size=0, ids_gen=gen.SequencialGenerator(prefix="SONG_"))

    # since the size of the population is 0, we can create attribute without
    # providing any initialization
    songs.create_attribute(name="artist_name")
    songs.create_attribute(name="song_genre")
    songs.create_attribute(name="title")
    songs.create_attribute(name="duration_seconds")
    songs.create_attribute(name="recording_year")

    song_id_gen = gen.SequencialGenerator(prefix="S_")

    # generate artist names from a list of randomly generated ones, so we have
    # some redundancy in the generated dataset
    artist_name_gen = gen.NumpyRandomGenerator(
        method="choice",
        a=gen.FakerGenerator(method="name",
                             seed=next(the_circus.seeder)).generate(size=200),
        seed=next(the_circus.seeder))

    title_gen = gen.FakerGenerator(method="sentence",
                                   seed=next(the_circus.seeder),
                                   nb_words=4,
                                   variable_nb_words=True)

    # generates recording years within a desired date range
    year_gen = gen.FakerGenerator(
            method="date_time_between_dates",
            seed=next(the_circus.seeder),
            datetime_start=pd.Timestamp("1910-10-20"),
            datetime_end=pd.Timestamp("2016-12-02")) \
        .map(f=lambda d: d.year)

    duration_gen = gen.ParetoGenerator(xmin=60,
                                       seed=next(the_circus.seeder),
                                       force_int=True,
                                       a=1.2)

    repo = the_circus.populations["music_repository"]
    repo_genre_rel = repo.get_attribute("genre_name")
    for genre_id, genre_name in repo_genre_rel.get_values().items():

        # an operation capable of creating songs of that genre
        init_attribute = ops.Chain(
            artist_name_gen.ops.generate(named_as="artist_name"),
            title_gen.ops.generate(named_as="title"),
            year_gen.ops.generate(named_as="recording_year"),
            duration_gen.ops.generate(named_as="duration_seconds"),
            gen.ConstantGenerator(value=genre_name).ops.generate(
                named_as="song_genre"))

        # dataframe of emtpy songs: just with one SONG_ID column for now
        song_ids = song_id_gen.generate(size=1000)
        emtpy_songs = story.Story.init_story_data(
            member_id_field_name="SONG_ID", active_ids=song_ids)

        # we can already adds the generated songs to the music repo relationship
        repo.get_relationship("songs").add_grouped_relations(
            from_ids=[genre_id], grouped_ids=[song_ids])

        # here we generate all desired columns in the dataframe
        initialized_songs, _ = init_attribute(emtpy_songs)
        initialized_songs.drop(["SONG_ID"], axis=1, inplace=True)

        # this works because the columns of init_attribute match exactly the
        # ones of the attributes of the populations
        songs.update(initialized_songs)

    # makes sure year and duration are handled as integer
    songs.get_attribute("recording_year").transform_inplace(int)
    songs.get_attribute("duration_seconds").transform_inplace(int)
Beispiel #6
0
def add_listen_and_share_stories(the_circus):
    """
    This is essentially a copy-paste of add_listen_story, + the update for the
    share story, in order to show the Chain re-usability clearly
    """

    users = the_circus.populations["user"]

    # using this timer means users only listen to songs during work hours
    timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock,
                                                  seed=next(the_circus.seeder))

    # this generate activity level distributed as a "truncated normal
    # distribution", i.e. very high and low activities are prevented.
    bounded_gaussian_activity_gen = gen.NumpyRandomGenerator(
        method="normal",
        seed=next(the_circus.seeder),
        loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")),
        scale=5).map(ops.bound_value(lb=10, ub=30))

    listen = the_circus.create_story(
        name="listen_events",
        initiating_population=users,
        member_id_field="UID",
        timer_gen=timer_gen,
        activity_gen=bounded_gaussian_activity_gen)

    share = the_circus.create_story(name="share_events",
                                    initiating_population=users,
                                    member_id_field="UID",
                                    timer_gen=timer_gen,
                                    activity_gen=bounded_gaussian_activity_gen)

    repo = the_circus.populations["music_repository"]

    select_genre_and_song = ops.Chain(
        users.ops.lookup(id_field="UID",
                         select={
                             "FIRST_NAME": "USER_FIRST_NAME",
                             "LAST_NAME": "USER_LAST_NAME",
                         }),

        # picks a genre at random
        repo.ops.select_one(named_as="GENRE"),

        # picks a song at random for that genre
        repo.get_relationship("songs").ops.select_one(from_field="GENRE",
                                                      named_as="SONG_ID"),
    )

    listen.set_operations(select_genre_and_song,
                          ops.FieldLogger("listen_events"))

    share.set_operations(
        select_genre_and_song,

        # picks a user this song is shared to
        users.ops.select_one(named_as="SHARED_TO_UID"),

        # note we could post-check when user shared a song to their own uid
        # here, in which case we can use DropRow to discard that share event
        ops.FieldLogger("share_events"))