예제 #1
0
def test_bounded_generator_should_limnit_with_both_bound():

    bound_f = operations.bound_value(lb=10, ub=15)
    assert bound_f(5) == 10
    assert bound_f(10) == 10
    assert bound_f(12) == 12
    assert bound_f(15) == 15
    assert bound_f(20) == 15
예제 #2
0
def add_mobility_action(circus, params):

    logging.info(" creating customer mobility action")
    mov_prof = [
        1., 1., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1., 1., 1.,
        5., 10., 5., 1., 1., 1., 1.
    ]
    mobility_time_gen = CyclicTimerGenerator(
        clock=circus.clock,
        seed=next(circus.seeder),
        config=CyclicTimerProfile(
            profile=mov_prof,
            profile_time_steps="1H",
            start_date=pd.Timestamp("12 September 2016 00:00.00"),
        ))

    gaussian_activity = NumpyRandomGenerator(
        method="normal",
        loc=params["mean_daily_customer_mobility_activity"],
        scale=params["std_daily_customer_mobility_activity"],
        seed=next(circus.seeder))

    mobility_activity_gen = gaussian_activity.map(f=bound_value(lb=.5))

    mobility_action = circus.create_story(
        name="customer_mobility",
        initiating_actor=circus.actors["customers"],
        actorid_field="CUST_ID",
        timer_gen=mobility_time_gen,
        activity_gen=mobility_activity_gen)

    logging.info(" adding operations")

    mobility_action.set_operations(
        circus.actors["customers"].ops.lookup(
            id_field="CUST_ID",
            select={"CURRENT_SITE": "PREV_SITE"}),

        # selects a destination site (or maybe the same as current... ^^)

        circus.actors["customers"] \
            .get_relationship("POSSIBLE_SITES") \
            .ops.select_one(from_field="CUST_ID", named_as="NEW_SITE"),

        # update the SITE attribute of the customers accordingly
        circus.actors["customers"] \
            .get_attribute("CURRENT_SITE") \
            .ops.update(
                id_field="CUST_ID",
                copy_from_field="NEW_SITE"),

        circus.clock.ops.timestamp(named_as="TIME"),

        # create mobility logs
        FieldLogger(log_id="customer_mobility_logs",
                    cols=["TIME", "CUST_ID", "PREV_SITE",
                          "NEW_SITE"]),
    )
예제 #3
0
def add_mobility_action(circus, params):

    logging.info(" creating field agent mobility action")

    # Field agents move only during the work hours
    mobility_time_gen = WorkHoursTimerGenerator(clock=circus.clock,
                                                seed=next(circus.seeder))

    fa_mean_weekly_activity = mobility_time_gen.activity(
        n=params["mean_daily_fa_mobility_activity"], per=pd.Timedelta("1day"))

    fa_weekly_std = mobility_time_gen.activity(
        n=params["std_daily_fa_mobility_activity"], per=pd.Timedelta("1day"))

    gaussian_activity = NumpyRandomGenerator(method="normal",
                                             loc=fa_mean_weekly_activity,
                                             scale=fa_weekly_std,
                                             seed=next(circus.seeder))

    mobility_activity_gen = gaussian_activity.map(f=bound_value(lb=1))

    field_agents = circus.actors["field_agents"]

    mobility_action = circus.create_story(name="field_agent_mobility",
                                          initiating_actor=field_agents,
                                          actorid_field="FA_ID",
                                          timer_gen=mobility_time_gen,
                                          activity_gen=mobility_activity_gen)

    logging.info(" adding operations")

    mobility_action.set_operations(
        field_agents.ops.lookup(
            id_field="FA_ID",
            select={"CURRENT_SITE": "PREV_SITE"}),

        # selects a destination site (or maybe the same as current... ^^)

        field_agents \
            .get_relationship("POSSIBLE_SITES") \
            .ops.select_one(from_field="FA_ID", named_as="NEW_SITE"),

        # update the SITE attribute of the field agents accordingly
        field_agents \
            .get_attribute("CURRENT_SITE") \
            .ops.update(
                id_field="FA_ID",
                copy_from_field="NEW_SITE"),

        circus.clock.ops.timestamp(named_as="TIME"),

        # create mobility logs
        FieldLogger(log_id="field_agent_mobility_logs",
                    cols=["TIME", "FA_ID", "PREV_SITE",
                          "NEW_SITE"]),
    )
예제 #4
0
def test_populations_during_working_hours():

    with path.tempdir() as log_parent_folder:
        log_folder = os.path.join(log_parent_folder, "logs")

        circus = Circus(name="tested_circus",
                        master_seed=1,
                        start=pd.Timestamp("8 June 2016"),
                        step_duration=pd.Timedelta("1h"))

        field_agents = circus.create_population(
            name="fa",
            size=100,
            ids_gen=SequencialGenerator(max_length=3, prefix="id_"))

        mobility_time_gen = WorkHoursTimerGenerator(clock=circus.clock,
                                                    seed=next(circus.seeder))

        five_per_day = mobility_time_gen.activity(n=5,
                                                  per=pd.Timedelta("1day"))

        std_per_day = mobility_time_gen.activity(n=.5,
                                                 per=pd.Timedelta("1day"))

        gaussian_activity = NumpyRandomGenerator(method="normal",
                                                 loc=five_per_day,
                                                 scale=std_per_day,
                                                 seed=1)
        mobility_activity_gen = gaussian_activity.map(bound_value(lb=1))

        # just a dummy operation to produce some logs
        story = circus.create_story(name="test_story",
                                    initiating_population=field_agents,
                                    member_id_field="some_id",
                                    timer_gen=mobility_time_gen,
                                    activity_gen=mobility_activity_gen)

        story.set_operations(circus.clock.ops.timestamp(named_as="TIME"),
                             FieldLogger(log_id="the_logs"))

        circus.run(duration=pd.Timedelta("30 days"),
                   log_output_folder=log_folder)

        logging.info("loading produced logs")
        logs = load_all_logs(log_folder)["the_logs"]

        logging.info("number of produced logs: {} logs".format(logs.shape[0]))

        # 30 days of simulation should produce 100 * 5 * 30 == 15k logs
        assert 14e3 <= logs.shape[0] <= 16e3
예제 #5
0
def scale_quantity_gen(stock_size_gen, scale_factor):
    """
    stock_size_gen must be a generator of positive numbers (think of them as
     "quantities" of stuff, i.e on a ratio scale)

    This just builds another generator of numbers scaled a requested,
     making sure the generated numbers are always positive
    """

    if scale_factor is not None:
        return stock_size_gen\
            .map(f_vect=operations.scale(factor=scale_factor)) \
            .map(f=operations.bound_value(lb=1))

    return stock_size_gen
예제 #6
0
def add_listen_story(the_circus):

    users = the_circus.populations["user"]

    # using this timer means users only listen to songs during work hours
    timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock,
                                                  seed=next(the_circus.seeder))

    # this generate activity level distributed as a "truncated normal
    # distribution", i.e. very high and low activities are prevented.
    bounded_gaussian_activity_gen = gen.NumpyRandomGenerator(
        method="normal",
        seed=next(the_circus.seeder),
        loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")),
        scale=5).map(ops.bound_value(lb=10, ub=30))

    listen = the_circus.create_story(
        name="listen_events",
        initiating_population=users,
        member_id_field="UID",
        timer_gen=timer_gen,
        activity_gen=bounded_gaussian_activity_gen)

    repo = the_circus.populations["music_repository"]

    listen.set_operations(
        users.ops.lookup(id_field="UID",
                         select={
                             "FIRST_NAME": "USER_FIRST_NAME",
                             "LAST_NAME": "USER_LAST_NAME",
                         }),

        # picks a genre at random
        repo.ops.select_one(named_as="GENRE"),

        # picks a song at random for that genre
        repo.get_relationship("songs").ops.select_one(from_field="GENRE",
                                                      named_as="SONG_ID"),
        ops.FieldLogger("events"))
예제 #7
0
def test_bounded_generator_should_limnit_with_upper_bound():

    bound_f = operations.bound_value(ub=15)
    assert bound_f(10) == 10
    assert bound_f(15) == 15
    assert bound_f(20) == 15
예제 #8
0
def test_bounding_function_should_not_modify_unbounded_values():
    bound_f = operations.bound_value(lb=None, ub=None)

    for x in np.arange(-1000, 2000, 10000):
        assert x == bound_f(x)
예제 #9
0
def add_listen_and_share_stories_with_details(the_circus):
    """
    This is again a copy-paste of add_listen_and_share_stories_with_details,
    (hopefully this helps to illustrate the progression), here showing the
    supplementary look-up on the attributes of the songs
    """

    users = the_circus.populations["user"]

    # using this timer means users only listen to songs during work hours
    timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock,
                                                  seed=next(the_circus.seeder))

    # this generate activity level distributed as a "truncated normal
    # distribution", i.e. very high and low activities are prevented.
    bounded_gaussian_activity_gen = gen.NumpyRandomGenerator(
        method="normal",
        seed=next(the_circus.seeder),
        loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")),
        scale=5).map(ops.bound_value(lb=10, ub=30))

    listen = the_circus.create_story(
        name="listen_events",
        initiating_population=users,
        member_id_field="UID",
        timer_gen=timer_gen,
        activity_gen=bounded_gaussian_activity_gen)

    share = the_circus.create_story(name="share_events",
                                    initiating_population=users,
                                    member_id_field="UID",
                                    timer_gen=timer_gen,
                                    activity_gen=bounded_gaussian_activity_gen)

    repo = the_circus.populations["music_repository"]
    songs = the_circus.populations["song"]

    select_genre_and_song = ops.Chain(
        users.ops.lookup(id_field="UID",
                         select={
                             "FIRST_NAME": "USER_FIRST_NAME",
                             "LAST_NAME": "USER_LAST_NAME",
                         }),

        # picks a genre at random
        repo.ops.select_one(named_as="GENRE"),

        # picks a song at random for that genre
        repo.get_relationship("songs").ops.select_one(from_field="GENRE",
                                                      named_as="SONG_ID"),

        # now also reporting details of listened or shared songs
        songs.ops.lookup(id_field="SONG_ID",
                         select={
                             "artist_name": "SONG_ARTIST",
                             "title": "SONG_TITLE",
                             "recording_year": "SONG_YEAR",
                             "duration_seconds": "SONG_DURATION",
                         }),
    )

    listen.set_operations(select_genre_and_song,
                          ops.FieldLogger("listen_events"))

    share.set_operations(
        select_genre_and_song,

        # picks a user this song is shared to
        users.ops.select_one(named_as="SHARED_TO_UID"),

        # note we could post-check when user shared a song to their own uid
        # here, in which case we can use DropRow to discard that share event
        ops.FieldLogger("share_events"))
예제 #10
0
def add_listen_and_share_stories(the_circus):
    """
    This is essentially a copy-paste of add_listen_story, + the update for the
    share story, in order to show the Chain re-usability clearly
    """

    users = the_circus.populations["user"]

    # using this timer means users only listen to songs during work hours
    timer_gen = profilers.WorkHoursTimerGenerator(clock=the_circus.clock,
                                                  seed=next(the_circus.seeder))

    # this generate activity level distributed as a "truncated normal
    # distribution", i.e. very high and low activities are prevented.
    bounded_gaussian_activity_gen = gen.NumpyRandomGenerator(
        method="normal",
        seed=next(the_circus.seeder),
        loc=timer_gen.activity(n=20, per=pd.Timedelta("1 day")),
        scale=5).map(ops.bound_value(lb=10, ub=30))

    listen = the_circus.create_story(
        name="listen_events",
        initiating_population=users,
        member_id_field="UID",
        timer_gen=timer_gen,
        activity_gen=bounded_gaussian_activity_gen)

    share = the_circus.create_story(name="share_events",
                                    initiating_population=users,
                                    member_id_field="UID",
                                    timer_gen=timer_gen,
                                    activity_gen=bounded_gaussian_activity_gen)

    repo = the_circus.populations["music_repository"]

    select_genre_and_song = ops.Chain(
        users.ops.lookup(id_field="UID",
                         select={
                             "FIRST_NAME": "USER_FIRST_NAME",
                             "LAST_NAME": "USER_LAST_NAME",
                         }),

        # picks a genre at random
        repo.ops.select_one(named_as="GENRE"),

        # picks a song at random for that genre
        repo.get_relationship("songs").ops.select_one(from_field="GENRE",
                                                      named_as="SONG_ID"),
    )

    listen.set_operations(select_genre_and_song,
                          ops.FieldLogger("listen_events"))

    share.set_operations(
        select_genre_and_song,

        # picks a user this song is shared to
        users.ops.select_one(named_as="SHARED_TO_UID"),

        # note we could post-check when user shared a song to their own uid
        # here, in which case we can use DropRow to discard that share event
        ops.FieldLogger("share_events"))
예제 #11
0
def noisified(df, col, lb, col_type=np.float):
    fact = np.random.normal(1, .1, size=df.shape[0])
    col2 = df[col].apply(operations.bound_value(lb=lb)) * fact
    return col2.astype(col_type)