def connect_agent_to_dealer(self, agents, dealers): """ Relationship from agents to dealers """ logging.info("Randomly connecting agents to dealer ") deg_prob = params["average_agent_degree"] / params[ "n_agents"] * params["n_dealers"] agent_weight_gen = NumpyRandomGenerator(method="exponential", scale=1., seed=1) agent_customer_df = pd.DataFrame.from_records( make_random_bipartite_data(agents.ids, dealers.ids, deg_prob, seed=next(self.seeder)), columns=["AGENT", "DEALER"]) agent_customer_rel = agents.create_relationship(name="DEALERS") agent_customer_rel.add_relations(from_ids=agent_customer_df["AGENT"], to_ids=agent_customer_df["DEALER"], weights=agent_weight_gen.generate( agent_customer_df.shape[0])) # every agent is also connected to the "broke dealer", to make sure this # one gets out of stock quickly agent_customer_rel.add_relations(from_ids=agents.ids, to_ids=np.repeat( "broke_dealer", agents.ids.shape), weights=4)
def add_mobility_action(circus, params): logging.info(" creating customer mobility action") mov_prof = [ 1., 1., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1. ] mobility_time_gen = CyclicTimerGenerator( clock=circus.clock, seed=next(circus.seeder), config=CyclicTimerProfile( profile=mov_prof, profile_time_steps="1H", start_date=pd.Timestamp("12 September 2016 00:00.00"), )) gaussian_activity = NumpyRandomGenerator( method="normal", loc=params["mean_daily_customer_mobility_activity"], scale=params["std_daily_customer_mobility_activity"], seed=next(circus.seeder)) mobility_activity_gen = gaussian_activity.map(f=bound_value(lb=.5)) mobility_action = circus.create_story( name="customer_mobility", initiating_actor=circus.actors["customers"], actorid_field="CUST_ID", timer_gen=mobility_time_gen, activity_gen=mobility_activity_gen) logging.info(" adding operations") mobility_action.set_operations( circus.actors["customers"].ops.lookup( id_field="CUST_ID", select={"CURRENT_SITE": "PREV_SITE"}), # selects a destination site (or maybe the same as current... ^^) circus.actors["customers"] \ .get_relationship("POSSIBLE_SITES") \ .ops.select_one(from_field="CUST_ID", named_as="NEW_SITE"), # update the SITE attribute of the customers accordingly circus.actors["customers"] \ .get_attribute("CURRENT_SITE") \ .ops.update( id_field="CUST_ID", copy_from_field="NEW_SITE"), circus.clock.ops.timestamp(named_as="TIME"), # create mobility logs FieldLogger(log_id="customer_mobility_logs", cols=["TIME", "CUST_ID", "PREV_SITE", "NEW_SITE"]), )
def step4(): """ Woah, this got drastically slower """ example1 = circus.Circus(name="example1", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) person = example1.create_population( name="person", size=1000, ids_gen=SequencialGenerator(prefix="PERSON_")) person.create_attribute("NAME", init_gen=FakerGenerator(method="name", seed=next( example1.seeder))) sites = SequencialGenerator(prefix="SITE_").generate(1000) random_site_gen = NumpyRandomGenerator(method="choice", a=sites, seed=next(example1.seeder)) allowed_sites = person.create_relationship(name="sites") for i in range(5): allowed_sites \ .add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size)) hello_world = example1.create_story( name="hello_world", initiating_population=person, member_id_field="PERSON_ID", # after each story, reset the timer to 0, so that it will get # executed again at the next clock tick (next hour) timer_gen=ConstantDependentGenerator(value=0)) duration_gen = NumpyRandomGenerator(method="exponential", scale=60, seed=next(example1.seeder)) hello_world.set_operations( person.ops.lookup(id_field="PERSON_ID", select={"NAME": "NAME"}), ConstantGenerator(value="hello world").ops.generate(named_as="HELLO"), duration_gen.ops.generate(named_as="DURATION"), allowed_sites.ops.select_one(from_field="PERSON_ID", named_as="SITE"), example1.clock.ops.timestamp(named_as="TIME"), FieldLogger(log_id="hello")) example1.run(duration=pd.Timedelta("48h"), log_output_folder="output/example1", delete_existing_logs=True) with open("output/example1/hello.csv") as f: print("Logged {} lines".format(len(f.readlines()) - 1))
def add_mobility_action(circus, params): logging.info(" creating field agent mobility action") # Field agents move only during the work hours mobility_time_gen = WorkHoursTimerGenerator(clock=circus.clock, seed=next(circus.seeder)) fa_mean_weekly_activity = mobility_time_gen.activity( n=params["mean_daily_fa_mobility_activity"], per=pd.Timedelta("1day")) fa_weekly_std = mobility_time_gen.activity( n=params["std_daily_fa_mobility_activity"], per=pd.Timedelta("1day")) gaussian_activity = NumpyRandomGenerator(method="normal", loc=fa_mean_weekly_activity, scale=fa_weekly_std, seed=next(circus.seeder)) mobility_activity_gen = gaussian_activity.map(f=bound_value(lb=1)) field_agents = circus.actors["field_agents"] mobility_action = circus.create_story(name="field_agent_mobility", initiating_actor=field_agents, actorid_field="FA_ID", timer_gen=mobility_time_gen, activity_gen=mobility_activity_gen) logging.info(" adding operations") mobility_action.set_operations( field_agents.ops.lookup( id_field="FA_ID", select={"CURRENT_SITE": "PREV_SITE"}), # selects a destination site (or maybe the same as current... ^^) field_agents \ .get_relationship("POSSIBLE_SITES") \ .ops.select_one(from_field="FA_ID", named_as="NEW_SITE"), # update the SITE attribute of the field agents accordingly field_agents \ .get_attribute("CURRENT_SITE") \ .ops.update( id_field="FA_ID", copy_from_field="NEW_SITE"), circus.clock.ops.timestamp(named_as="TIME"), # create mobility logs FieldLogger(log_id="field_agent_mobility_logs", cols=["TIME", "FA_ID", "PREV_SITE", "NEW_SITE"]), )
def test_populations_during_working_hours(): with path.tempdir() as log_parent_folder: log_folder = os.path.join(log_parent_folder, "logs") circus = Circus(name="tested_circus", master_seed=1, start=pd.Timestamp("8 June 2016"), step_duration=pd.Timedelta("1h")) field_agents = circus.create_population( name="fa", size=100, ids_gen=SequencialGenerator(max_length=3, prefix="id_")) mobility_time_gen = WorkHoursTimerGenerator(clock=circus.clock, seed=next(circus.seeder)) five_per_day = mobility_time_gen.activity(n=5, per=pd.Timedelta("1day")) std_per_day = mobility_time_gen.activity(n=.5, per=pd.Timedelta("1day")) gaussian_activity = NumpyRandomGenerator(method="normal", loc=five_per_day, scale=std_per_day, seed=1) mobility_activity_gen = gaussian_activity.map(bound_value(lb=1)) # just a dummy operation to produce some logs story = circus.create_story(name="test_story", initiating_population=field_agents, member_id_field="some_id", timer_gen=mobility_time_gen, activity_gen=mobility_activity_gen) story.set_operations(circus.clock.ops.timestamp(named_as="TIME"), FieldLogger(log_id="the_logs")) circus.run(duration=pd.Timedelta("30 days"), log_output_folder=log_folder) logging.info("loading produced logs") logs = load_all_logs(log_folder)["the_logs"] logging.info("number of produced logs: {} logs".format(logs.shape[0])) # 30 days of simulation should produce 100 * 5 * 30 == 15k logs assert 14e3 <= logs.shape[0] <= 16e3
class _MaybeBackToDefault(SideEffectOnly): """ This is an internal operation of story, that transits members back to default with probability as declared in back_to_default_probability """ def __init__(self, story): self.judge = NumpyRandomGenerator(method="uniform", seed=1234) self.story = story def side_effect(self, story_data): # only transiting members that have ran during this clock tick active_timer = self.story.timer.loc[story_data.index] non_default_ids = active_timer[ active_timer["state"] != "default"].index if non_default_ids.shape[0] == 0: return back_prob = self.story.get_param("back_to_default_probability", non_default_ids) if np.all(back_prob == 0): cond = [False] * non_default_ids.shape[0] elif np.all(back_prob == 1): cond = [True] * non_default_ids.shape[0] else: baseline = self.judge.generate(back_prob.shape[0]) cond = back_prob > baseline member_ids = back_prob[cond].index states = ["default"] * member_ids.shape[0] self.story.transit_to_state(ids=member_ids, states=states)
def numpy_generators_read_from_disk_should_generate_same_sequence_as_original( ): with path.tempdir() as p: # making sure we're not using the default seed tested = NumpyRandomGenerator(method="normal", loc=10, scale=4, seed=123456) gen_file = os.path.join(p, "tested2.json") tested.save_to(gen_file) reloaded = Generator.load_generator(gen_type="NumpyRandomGenerator", input_file=gen_file) assert tested.generate(size=10000) == reloaded.generate(size=10000)
def add_survey_action(circus): logging.info(" creating field agent survey action") field_agents = circus.actors["field_agents"] # Surveys only happen during work hours survey_timer_gen = WorkHoursTimerGenerator(clock=circus.clock, seed=next(circus.seeder)) min_activity = survey_timer_gen.activity( n=10, per=pd.Timedelta("7 days"), ) max_activity = survey_timer_gen.activity( n=100, per=pd.Timedelta("7 days"), ) survey_activity_gen = NumpyRandomGenerator(method="choice", a=np.arange( min_activity, max_activity), seed=next(circus.seeder)) survey_action = circus.create_story(name="pos_surveys", initiating_actor=field_agents, actorid_field="FA_ID", timer_gen=survey_timer_gen, activity_gen=survey_activity_gen) survey_action.set_operations( field_agents.ops.lookup(id_field="FA_ID", select={"CURRENT_SITE": "SITE"}), # TODO: We should select a POS irrespectively of the relationship weight circus.actors["sites"].get_relationship("POS").ops.select_one( from_field="SITE", named_as="POS_ID", # a field agent in a location without a POS won't serve any discard_empty=True), circus.actors["pos"].ops.lookup(id_field="POS_ID", select={ "LATITUDE": "POS_LATITUDE", "LONGITUDE": "POS_LONGITUDE", "AGENT_NAME": "POS_NAME", }), SequencialGenerator(prefix="TASK").ops.generate(named_as="TASK_ID"), ConstantGenerator(value="Done").ops.generate(named_as="STATUS"), circus.clock.ops.timestamp(named_as="TIME"), FieldLogger(log_id="pos_surveys", cols=[ "TASK_ID", "FA_ID", "POS_ID", "POS_NAME", "POS_LATITUDE", "POS_LONGITUDE", "TIME", "STATUS" ]))
def _add_pos_latlong(circus, params): logging.info("Generating POS attributes from Sites info (coord, dist l2)") pos = circus.actors["pos"] sites = circus.actors["sites"] # 1 deg is about 1km at 40 degree north => standard deviation of about 200m coord_noise = NumpyRandomGenerator(method="normal", loc=0, scale=1 / (85 * 5), seed=next(circus.seeder)) # using an at build time to generate random values :) pos_coord_act = Chain( pos.ops.lookup(id_field="POS_ID", select={"SITE": "SITE_ID"}), sites.ops.lookup(id_field="SITE_ID", select={ "LATITUDE": "SITE_LATITUDE", "LONGITUDE": "SITE_LONGITUDE" }), coord_noise.ops.generate(named_as="LAT_NOISE"), coord_noise.ops.generate(named_as="LONG_NOISE"), Apply(source_fields=["SITE_LATITUDE", "LAT_NOISE"], named_as="POS_LATITUDE", f=np.add, f_args="series"), Apply(source_fields=["SITE_LONGITUDE", "LONG_NOISE"], named_as="POS_LONGITUDE", f=np.add, f_args="series"), ) # also looks up the dist l1 and dist l2 associated to the site for product in params["products"].keys(): pos_coord_act.append( sites.ops.lookup(id_field="SITE_ID", select={ "{}__dist_l2".format(product): "{}__provider".format(product), "{}__dist_l1".format(product): "{}__dist_l1".format(product), }), ) pos_info, _ = pos_coord_act(Story.init_story_data("POS_ID", pos.ids)) pos.create_attribute("LATITUDE", init_values=pos_info["POS_LATITUDE"]) pos.create_attribute("LONGITUDE", init_values=pos_info["POS_LONGITUDE"]) # copies the dist l1 and l2 of the site to the pos for product in params["products"].keys(): rel_l2 = pos.create_relationship(name="{}__provider".format(product)) rel_l2.add_relations(from_ids=pos_info["POS_ID"], to_ids=pos_info["{}__provider".format(product)])
def load_empirical_discrete_generator(namespace, gen_id, seed): root_folder = _empirical_discrete_gen_folder(namespace) gen_file_path = os.path.join(root_folder, "%s.csv" % gen_id) df = pd.read_csv(gen_file_path) gen = NumpyRandomGenerator(method="choice", a=df["x"].tolist(), p=df["px"].tolist(), seed=seed) return gen
def _create_attractiveness_attributes(circus, pos): logging.info("generating pos attractiveness values and evolutions") # "base" attractiveness, in [-50, 50] attractiveness_base_gen = NumpyRandomGenerator(method="choice", a=range(-50, 50), seed=next(circus.seeder)) pos.create_attribute("ATTRACT_BASE", init_gen=attractiveness_base_gen) # attractiveness itself is ATTRACT_BASE going through a sigmoid ac = _attractiveness_sigmoid()(pos.get_attribute_values("ATTRACT_BASE")) pos.create_attribute("ATTRACTIVENESS", init_values=ac) # evolution steps of the base attractiveness attractiveness_delta_gen = NumpyRandomGenerator(method="choice", a=[-2, -1, 0, 1, 2], p=[.1, .25, .3, .25, .1], seed=next(circus.seeder)) pos.create_attribute("ATTRACT_DELTA", init_gen=attractiveness_delta_gen)
def add_agent_reviews_stories(self, agents): """ This illustrates the dynamic creation of new populations: reviews are modeled as "population" (even though they are mostly inactive data container) that are created dynamically and linked to agents. I guess most of the time reviews would be modeled as logs instead of populations, but let's just ignore that for illustration purposes... ^^ """ timegen = HighWeekDaysTimerGenerator(clock=self.clock, seed=next(self.seeder)) review_activity_gen = NumpyRandomGenerator(method="choice", a=range(1, 4), seed=next(self.seeder)) # the system starts with no reviews review_population = self.create_population(name="rev", size=0) review_population.create_attribute("DATE") review_population.create_attribute("TEXT") review_population.create_attribute("AGENT_ID") review_population.create_attribute("AGENT_NAME") reviews = self.create_story( name="agent_reviews", initiating_population=agents, member_id_field="AGENT", timer_gen=timegen, activity_gen=review_activity_gen, ) review_id_gen = SequencialGenerator(start=0, prefix="REVIEW_ID") text_id_gen = FakerGenerator(method="text", seed=next(self.seeder)) reviews.set_operations( self.clock.ops.timestamp(named_as="DATETIME"), agents.ops.lookup(id_field="AGENT", select={"AGENT_NAME": "AGENT_NAME"}), review_id_gen.ops.generate(named_as="REVIEW_ID"), text_id_gen.ops.generate(named_as="REVIEW_TEXT"), review_population.ops.update(id_field="REVIEW_ID", copy_attributes_from_fields={ "DATE": "DATETIME", "TEXT": "REVIEW_TEXT", "AGENT_ID": "AGENT", "AGENT_NAME": "AGENT_NAME", }), # actually, here we're modelling review both as populations and logs.. operations.FieldLogger(log_id="reviews"))
def create_field_agents(circus, params): logging.info(" adding {} field agents".format(params["n_field_agents"])) field_agents = circus.create_population( name="field_agents", size=params["n_field_agents"], ids_gen=SequencialGenerator(prefix="FA_")) logging.info(" adding mobility relationships to field agents") mobility_rel = field_agents.create_relationship("POSSIBLE_SITES") # TODO: make sure the number of sites per field agent is "reasonable" mobility_df = pd.DataFrame.from_records(make_random_bipartite_data( field_agents.ids, circus.actors["sites"].ids, 0.4, seed=next(circus.seeder)), columns=["FA_ID", "SID"]) mobility_weight_gen = NumpyRandomGenerator(method="exponential", scale=1., seed=next(circus.seeder)) mobility_rel.add_relations(from_ids=mobility_df["FA_ID"], to_ids=mobility_df["SID"], weights=mobility_weight_gen.generate( mobility_df.shape[0])) # Initialize the mobility by allocating one first random site to each # field agent among its network field_agents.create_attribute(name="CURRENT_SITE", init_relationship="POSSIBLE_SITES") return field_agents
def test_random_generator_should_provide_correct_amount_of_single_values(): tested = NumpyRandomGenerator(method="gamma", scale=10, shape=1.8, seed=1) genops = tested.ops.generate(named_as="rand") story_data = pd.DataFrame(np.random.rand(10, 5), columns=["A", "B", "C", "D", "E"]) result, logs = genops(story_data) assert result.columns.tolist() == ["A", "B", "C", "D", "E", "rand"] # should be float and not list of values assert result["rand"].dtype == float
def add_pos(circus, params): logging.info("creating {} POS".format(params["n_pos"])) pos = circus.create_population(name="pos", size=params["n_pos"], ids_gen=SequencialGenerator(prefix="POS_")) _create_attractiveness_attributes(circus, pos) logging.info("assigning a site to each POS") # probability of each site to be chosen, based on geo_level1 population site_weight = circus.actors["sites"] \ .get_attribute("GEO_LEVEL_1_POPULATION") \ .get_values(None) site_gen = NumpyRandomGenerator(method="choice", seed=next(circus.seeder), a=circus.actors["sites"].ids, p=site_weight.values / sum(site_weight)) pos.create_attribute("SITE", init_gen=site_gen) # generate a random pos location from around the SITE location _add_pos_latlong(circus, params) pos.create_attribute("MONGO_ID", init_gen=MongoIdGenerator()) pos.create_attribute("AGENT_NAME", init_gen=snd_constants.gen("POS_NAMES", next(circus.seeder))) pos.create_attribute("CONTACT_NAME", init_gen=snd_constants.gen("CONTACT_NAMES", next(circus.seeder))) pos.create_attribute("CONTACT_PHONE", init_gen=FakerGenerator(method="phone_number", seed=next(circus.seeder))) logging.info("recording the list POS per site in site relationship") pos_rel = circus.actors["sites"].create_relationship("POS") pos_rel.add_relations(from_ids=pos.get_attribute_values("SITE"), to_ids=pos.ids) for product, description in params["products"].items(): _init_pos_product(circus, product, description)
def _add_message_story(self): story_timer_gen = DefaultDailyTimerGenerator( clock=self.clock, seed=next(self.seeder)) low_activity = story_timer_gen.activity(n=3, per=pd.Timedelta("1 day")) med_activity = story_timer_gen.activity(n=10, per=pd.Timedelta("1 day")) high_activity = story_timer_gen.activity(n=20, per=pd.Timedelta("1 day")) activity_gen = NumpyRandomGenerator( method="choice", a=[low_activity, med_activity, high_activity], p=[.2, .7, .1], seed=next(self.seeder)) hello_world = self.create_story( name="hello_world", initiating_population=self.populations["person"], member_id_field="PERSON_ID", timer_gen=story_timer_gen, activity_gen=activity_gen ) hello_world.set_operations( self.clock.ops.timestamp(named_as="TIME"), self.populations["person"].get_relationship("quotes") .ops.select_one(from_field="PERSON_ID",named_as="MESSAGE"), self.populations["person"] .get_relationship("friends") .ops.select_one(from_field="PERSON_ID", named_as="OTHER_PERSON"), self.populations["person"] .ops.lookup(id_field="PERSON_ID", select={"NAME": "EMITTER_NAME"}), self.populations["person"] .ops.lookup(id_field="OTHER_PERSON", select={"NAME": "RECEIVER_NAME"}), operations.FieldLogger(log_id="hello_6") )
def _init_pos_product(circus, product, description): """ Initialize the required stock and generators for this """ logging.info( "Building a generator of {} POS bulk purchase size".format(product)) bulk_size_gen = NumpyRandomGenerator( method="choice", a=description["pos_bulk_purchase_sizes"], p=description["pos_bulk_purchase_sizes_dist"], seed=next(circus.seeder)) circus.attach_generator("pos_{}_bulk_size_gen".format(product), bulk_size_gen) logging.info( "Building a generators of {} POS initial stock size".format(product)) if "pos_init_distro" in description: logging.info(" using pre-defined initial distribution") gen_namespace, gen_id = description["pos_init_distro"].split("/") # TODO: with the new save/load, this is now a mere numpyGenerator init_stock_size_gen = db.load_empirical_discrete_generator( namespace=gen_namespace, gen_id=gen_id, seed=next(circus.seeder)) else: logging.info(" using bulk size distribution") init_stock_size_gen = bulk_size_gen circus.attach_generator("pos_{}_init_stock_size_gen".format(product), init_stock_size_gen) logging.info("Building a generator of {} ids".format(product)) product_id_gen = SequencialGenerator(prefix="{}_".format(product)) circus.attach_generator("{}_id_gen".format(product), product_id_gen) logging.info("Initializing POS {} stock".format(product)) stock_gen = init_stock_size_gen.flatmap( DependentBulkGenerator(element_generator=product_id_gen)) circus.actors["pos"].create_stock_relationship_grp( name=product, stock_bulk_gen=stock_gen)
def _add_person_population(self): id_gen = SequencialGenerator(prefix="PERSON_") age_gen = NumpyRandomGenerator(method="normal", loc=3, scale=5, seed=next(self.seeder)) name_gen = FakerGenerator(method="name", seed=next(self.seeder)) person = self.create_population(name="person", size=1000, ids_gen=id_gen) person.create_attribute("NAME", init_gen=name_gen) person.create_attribute("AGE", init_gen=age_gen) quote_generator = FakerGenerator(method="sentence", nb_words=6, variable_nb_words=True, seed=next(self.seeder)) quotes_rel = self.populations["person"].create_relationship("quotes") for w in range(4): quotes_rel.add_relations( from_ids=person.ids, to_ids=quote_generator.generate(size=person.size), weights=w )
def create_circus_with_population(): example_circus = circus.Circus( name="example", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) person = example_circus.create_population( name="person", size=1000, ids_gen=SequencialGenerator(prefix="PERSON_")) person.create_attribute( "NAME", init_gen=FakerGenerator(method="name", seed=next(example_circus.seeder))) person.create_attribute( "age", init_gen=NumpyRandomGenerator( method="normal", loc=35, scale=5, seed=next(example_circus.seeder))) return example_circus
def test_random_generator_should_provide_correct_amount_of_list_of_values(): tested = NumpyRandomGenerator(method="gamma", scale=10, shape=1.8, seed=1) story_data = pd.DataFrame( np.random.rand(10, 5), columns=["A", "B", "C", "D", "E"], ) story_data["how_many"] = pd.Series( [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) genops = tested.ops.generate(named_as="rand", quantity_field="how_many") result, logs = genops(story_data) assert result.columns.tolist() == [ "A", "B", "C", "D", "E", "how_many", "rand" ] # should be list of the expected sizes assert result["rand"].dtype == list assert result["rand"].apply(len).tolist() == [ 10, 20, 30, 40, 50, 60, 70, 80, 90, 100 ]
from tabulate import tabulate from trumania.core import circus, operations from trumania.core.random_generators import SequencialGenerator, FakerGenerator, NumpyRandomGenerator, ConstantDependentGenerator, ConstantGenerator import trumania.core.util_functions as util_functions util_functions.setup_logging() example_circus = circus.Circus(name="example", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) id_gen = SequencialGenerator(prefix="PERSON_") age_gen = NumpyRandomGenerator(method="normal", loc=3, scale=5, seed=next(example_circus.seeder)) name_gen = FakerGenerator(method="name", seed=next(example_circus.seeder)) person = example_circus.create_population(name="person", size=1000, ids_gen=id_gen) person.create_attribute("NAME", init_gen=name_gen) person.create_attribute("AGE", init_gen=age_gen) hello_world = example_circus.create_story( name="hello_world", initiating_population=example_circus.populations["person"], member_id_field="PERSON_ID", timer_gen=ConstantDependentGenerator(value=1))
seed=next(example_circus.seeder))) return example_circus the_circus = create_circus_with_population() hello_world = the_circus.create_story( name="hello_world", initiating_population=the_circus.populations["person"], member_id_field="PERSON_ID", # each population instance is now going to have 10, 20 or 30 # trigger of this story per week activity_gen=NumpyRandomGenerator( method="choice", a=[10, 20, 30], seed=next(the_circus.seeder) ), # story now only tiggers during office hours timer_gen=WorkHoursTimerGenerator( clock=the_circus.clock, seed=next(the_circus.seeder)) ) hello_world.set_operations( # adding a random timestamp, within the current clock step the_circus.clock .ops .timestamp(named_as="TIME"),
def build_healthy_level_gen(seed): return NumpyRandomGenerator(method="beta", a=1, b=999, seed=seed)
def add_communications(self, subs, sims, cells): """ Adds Calls and SMS story, which in turn may trigger topups story. """ logging.info("Adding calls and sms story ") # generators for topups and call duration voice_duration_generator = NumpyRandomGenerator(method="choice", a=range(20, 240), seed=next(self.seeder)) # call and sms timer generator, depending on the day of the week call_timegen = HighWeekDaysTimerGenerator(clock=self.clock, seed=next(self.seeder)) # probability of doing a topup, with high probability when the depended # variable (i.e. the main account value, see below) gets close to 0 recharge_trigger = DependentTriggerGenerator( value_to_proba_mapper=operations.logistic(k=-0.01, x0=1000), seed=next(self.seeder)) # call activity level, under normal and "excited" states normal_call_activity = ParetoGenerator(xmin=10, a=1.2, seed=next(self.seeder)) excited_call_activity = ParetoGenerator(xmin=100, a=1.1, seed=next(self.seeder)) # after a call or SMS, excitability is the probability of getting into # "excited" mode (i.e., having a shorted expected delay until next call excitability_gen = NumpyRandomGenerator(method="beta", a=7, b=3, seed=next(self.seeder)) subs.create_attribute(name="EXCITABILITY", init_gen=excitability_gen) # same "basic" trigger, without any value mapper flat_trigger = DependentTriggerGenerator(seed=next(self.seeder)) back_to_normal_prob = NumpyRandomGenerator(method="beta", a=3, b=7, seed=next(self.seeder)) # Calls and SMS stories themselves calls = self.create_story(name="calls", initiating_population=subs, member_id_field="A_ID", timer_gen=call_timegen, activity_gen=normal_call_activity, states={ "excited": { "activity": excited_call_activity, "back_to_default_probability": back_to_normal_prob } }) sms = self.create_story(name="sms", initiating_population=subs, member_id_field="A_ID", timer_gen=call_timegen, activity_gen=normal_call_activity, states={ "excited": { "activity": excited_call_activity, "back_to_default_probability": back_to_normal_prob } }) # common logic between Call and SMS: selecting A and B + their related # fields compute_ab_fields = Chain( self.clock.ops.timestamp(named_as="DATETIME"), # selects a B party subs.get_relationship("FRIENDS").ops.select_one(from_field="A_ID", named_as="B_ID", one_to_one=True), # fetches information about all SIMs of A and B subs.get_relationship("SIMS").ops.select_all(from_field="A_ID", named_as="A_SIMS"), sims.ops.lookup(id_field="A_SIMS", select={ "OPERATOR": "OPERATORS_A", "MSISDN": "MSISDNS_A", "MAIN_ACCT": "MAIN_ACCTS_A" }), subs.get_relationship("SIMS").ops.select_all(from_field="B_ID", named_as="B_SIMS"), sims.ops.lookup(id_field="B_SIMS", select={ "OPERATOR": "OPERATORS_B", "MSISDN": "MSISDNS_B" }), # A selects the sims and related values based on the best match # between the sims of A and B operations.Apply(source_fields=[ "MSISDNS_A", "OPERATORS_A", "A_SIMS", "MAIN_ACCTS_A", "MSISDNS_B", "OPERATORS_B", "B_SIMS" ], named_as=[ "MSISDN_A", "OPERATOR_A", "SIM_A", "MAIN_ACCT_OLD", "MSISDN_B", "OPERATOR_B", "SIM_B" ], f=select_sims), operations.Apply(source_fields=["OPERATOR_A", "OPERATOR_B"], named_as="TYPE", f=compute_cdr_type), ) # Both CELL_A and CELL_B might drop the call based on their current HEALTH compute_cell_status = Chain( # some static fields subs.ops.lookup(id_field="A_ID", select={ "CELL": "CELL_A", "EXCITABILITY": "EXCITABILITY_A" }), subs.ops.lookup(id_field="B_ID", select={ "CELL": "CELL_B", "EXCITABILITY": "EXCITABILITY_B" }), cells.ops.lookup(id_field="CELL_A", select={"HEALTH": "CELL_A_HEALTH"}), cells.ops.lookup(id_field="CELL_B", select={"HEALTH": "CELL_B_HEALTH"}), flat_trigger.ops.generate(observed_field="CELL_A_HEALTH", named_as="CELL_A_ACCEPTS"), flat_trigger.ops.generate(observed_field="CELL_B_HEALTH", named_as="CELL_B_ACCEPTS"), operations.Apply( source_fields=["CELL_A_ACCEPTS", "CELL_B_ACCEPTS"], named_as="STATUS", f=compute_call_status)) # update the main account based on the value of this CDR update_accounts = Chain( operations.Apply(source_fields=["MAIN_ACCT_OLD", "VALUE"], named_as="MAIN_ACCT_NEW", f=np.subtract, f_args="series"), sims.get_attribute("MAIN_ACCT").ops.update( member_id_field="SIM_A", copy_from_field="MAIN_ACCT_NEW"), ) # triggers the topup story if the main account is low trigger_topups = Chain( # A subscribers with low account are now more likely to topup the # SIM they just used to make a call recharge_trigger.ops.generate(observed_field="MAIN_ACCT_NEW", named_as="SHOULD_TOP_UP"), self.get_story("topups").ops.force_act_next( member_id_field="SIM_A", condition_field="SHOULD_TOP_UP"), ) # get BOTH sms and Call "bursty" after EITHER a call or an sms get_bursty = Chain( # Trigger to get into "excited" mode because A gave a call or sent an # SMS flat_trigger.ops.generate(observed_field="EXCITABILITY_A", named_as="A_GETTING_BURSTY"), calls.ops.transit_to_state(member_id_field="A_ID", condition_field="A_GETTING_BURSTY", state="excited"), sms.ops.transit_to_state(member_id_field="A_ID", condition_field="A_GETTING_BURSTY", state="excited"), # Trigger to get into "excited" mode because B received a call flat_trigger.ops.generate(observed_field="EXCITABILITY_B", named_as="B_GETTING_BURSTY"), # transiting to excited mode, according to trigger value calls.ops.transit_to_state(member_id_field="B_ID", condition_field="B_GETTING_BURSTY", state="excited"), sms.ops.transit_to_state(member_id_field="B_ID", condition_field="B_GETTING_BURSTY", state="excited"), # # B party need to have their time reset explicitally since they were # not active at this round. A party will be reset automatically calls.ops.reset_timers(member_id_field="B_ID"), sms.ops.reset_timers(member_id_field="B_ID"), ) calls.set_operations( compute_ab_fields, compute_cell_status, ConstantGenerator(value="VOICE").ops.generate(named_as="PRODUCT"), voice_duration_generator.ops.generate(named_as="DURATION"), operations.Apply(source_fields=["DURATION", "DATETIME", "TYPE"], named_as="VALUE", f=compute_call_value), update_accounts, trigger_topups, get_bursty, # final CDRs operations.FieldLogger(log_id="voice_cdr", cols=[ "DATETIME", "MSISDN_A", "MSISDN_B", "STATUS", "DURATION", "VALUE", "CELL_A", "OPERATOR_A", "CELL_B", "OPERATOR_B", "TYPE", "PRODUCT" ]), ) sms.set_operations( compute_ab_fields, compute_cell_status, ConstantGenerator(value="SMS").ops.generate(named_as="PRODUCT"), operations.Apply(source_fields=["DATETIME", "TYPE"], named_as="VALUE", f=compute_sms_value), update_accounts, trigger_topups, get_bursty, # final CDRs operations.FieldLogger(log_id="sms_cdr", cols=[ "DATETIME", "MSISDN_A", "MSISDN_B", "STATUS", "VALUE", "CELL_A", "OPERATOR_A", "CELL_B", "OPERATOR_B", "TYPE", "PRODUCT" ]), )
def add_mobility(self, subs, cells): """ adds a CELL attribute to the customer population + a mobility story that randomly moves customers from CELL to CELL among their used cells. """ logging.info("Adding mobility ") # mobility time profile: assign high mobility activities to busy hours # of the day mov_prof = [ 1., 1., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1. ] mobility_time_gen = CyclicTimerGenerator( clock=self.clock, seed=next(self.seeder), config=CyclicTimerProfile( profile=mov_prof, profile_time_steps="1H", start_date=pd.Timestamp("12 September 2016 00:00.00"))) # Mobility network, i.e. choice of cells per user, i.e. these are the # weighted "used cells" (as in "most used cells) for each user mobility_weight_gen = NumpyRandomGenerator(method="exponential", scale=1., seed=next(self.seeder)) mobility_rel = subs.create_relationship("POSSIBLE_CELLS") logging.info(" creating bipartite graph ") mobility_df = pd.DataFrame.from_records(make_random_bipartite_data( subs.ids, cells.ids, 0.4, seed=next(self.seeder)), columns=["USER_ID", "CELL"]) logging.info(" adding mobility relationships to customer") mobility_rel.add_relations(from_ids=mobility_df["USER_ID"], to_ids=mobility_df["CELL"], weights=mobility_weight_gen.generate( mobility_df.shape[0])) logging.info(" creating customer's CELL attribute ") # Initialize the mobility by allocating one first random cell to each # customer among its network subs.create_attribute(name="CELL", init_relationship="POSSIBLE_CELLS") # Mobility story itself, basically just a random hop from cell to cell, # that updates the "CELL" attributes + generates mobility logs logging.info(" creating mobility story") mobility_story = self.create_story( name="mobility", initiating_population=subs, member_id_field="A_ID", timer_gen=mobility_time_gen, ) logging.info(" adding operations") mobility_story.set_operations( subs.ops.lookup(id_field="A_ID", select={"CELL": "PREV_CELL"}), # selects a destination cell (or maybe the same as current... ^^) mobility_rel.ops.select_one(from_field="A_ID", named_as="NEW_CELL"), # update the CELL attribute of the customers accordingly subs.get_attribute("CELL").ops.update(member_id_field="A_ID", copy_from_field="NEW_CELL"), self.clock.ops.timestamp(named_as="TIME"), # create mobility logs operations.FieldLogger( log_id="mobility_logs", cols=["TIME", "A_ID", "PREV_CELL", "NEW_CELL"]), ) logging.info(" done")
def create_subs_and_sims(self): """ Creates the subs and sims + a relationship between them + an agent relationship. We have at least one sim per subs: sims.size >= subs.size The sims population contains the "OPERATOR", "MAIN_ACCT" and "MSISDN" attributes. The subs population has a "SIMS" relationship that points to the sims owned by each subs. The sims population also has a relationship to the set of agents where this sim can be topped up. """ npgen = RandomState(seed=next(self.seeder)) # subs are empty here but will receive a "CELLS" and "EXCITABILITY" # attributes later on subs = self.create_population( name="subs", size=self.params["n_subscribers"], ids_gen=SequencialGenerator(prefix="SUBS_")) number_of_operators = npgen.choice(a=range(1, 5), size=subs.size) operator_ids = build_ids(size=4, prefix="OPERATOR_", max_length=1) def pick_operators(qty): """ randomly choose a set of unique operators of specified size """ return npgen.choice(a=operator_ids, p=[.8, .05, .1, .05], size=qty, replace=False).tolist() # set of operators of each subs subs_operators_list = map(pick_operators, number_of_operators) # Dataframe with 4 columns for the 1rst, 2nd,... operator of each subs. # Since subs_operators_list don't all have the size, some entries of this # dataframe contains None, which are just discarded by the stack() below subs_operators_df = pd.DataFrame(data=list(subs_operators_list), index=subs.ids) # same info, vertically: the index contains the sub id (with duplicates) # and "operator" one of the operators of this subs subs_ops_mapping = subs_operators_df.stack() subs_ops_mapping.index = subs_ops_mapping.index.droplevel(level=1) # SIM population, each with an OPERATOR and MAIN_ACCT attributes sims = self.create_population( name="sims", size=subs_ops_mapping.size, ids_gen=SequencialGenerator(prefix="SIMS_")) sims.create_attribute("OPERATOR", init_values=subs_ops_mapping.values) recharge_gen = ConstantGenerator(value=1000.) sims.create_attribute(name="MAIN_ACCT", init_gen=recharge_gen) # keeping track of the link between population and sims as a relationship sims_of_subs = subs.create_relationship("SIMS") sims_of_subs.add_relations(from_ids=subs_ops_mapping.index, to_ids=sims.ids) msisdn_gen = MSISDNGenerator( countrycode="0032", prefix_list=["472", "473", "475", "476", "477", "478", "479"], length=6, seed=next(self.seeder)) sims.create_attribute(name="MSISDN", init_gen=msisdn_gen) # Finally, adding one more relationship that defines the set of possible # shops where we can topup each SIM. # TODO: to make this a bit more realistic, we should probably generate # such relationship first from the subs to their favourite shops, and then # copy that info to each SIM, maybe with some fluctuations to account # for the fact that not all shops provide topups of all operators. agents = build_ids(self.params["n_agents"], prefix="AGENT_", max_length=3) agent_df = pd.DataFrame.from_records(make_random_bipartite_data( sims.ids, agents, 0.3, seed=next(self.seeder)), columns=["SIM_ID", "AGENT"]) logging.info(" creating random sim/agent relationship ") sims_agents_rel = sims.create_relationship("POSSIBLE_AGENTS") agent_weight_gen = NumpyRandomGenerator(method="exponential", scale=1., seed=next(self.seeder)) sims_agents_rel.add_relations(from_ids=agent_df["SIM_ID"], to_ids=agent_df["AGENT"], weights=agent_weight_gen.generate( agent_df.shape[0])) return subs, sims, recharge_gen
def step7(): example1 = circus.Circus(name="example1", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) person = example1.create_population( name="person", size=1000, ids_gen=SequencialGenerator(prefix="PERSON_")) person.create_attribute("NAME", init_gen=FakerGenerator(method="name", seed=next( example1.seeder))) person.create_attribute("POPULARITY", init_gen=NumpyRandomGenerator( method="uniform", low=0, high=1, seed=next(example1.seeder))) sites = SequencialGenerator(prefix="SITE_").generate(1000) random_site_gen = NumpyRandomGenerator(method="choice", a=sites, seed=next(example1.seeder)) allowed_sites = person.create_relationship(name="sites") # SITES ------------------ # Add HOME sites allowed_sites.add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size), weights=0.4) # Add WORK sites allowed_sites.add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size), weights=0.3) # Add OTHER sites for i in range(3): allowed_sites \ .add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size), weights=0.1) # FRIENDS ------------------ friends = person.create_relationship(name="friends") friends_df = pd.DataFrame.from_records( make_random_bipartite_data( person.ids, person.ids, p=0.005, # probability for a node to be connected to # another one : 5 friends on average = 5/1000 seed=next(example1.seeder)), columns=["A", "B"]) friends.add_relations(from_ids=friends_df["A"], to_ids=friends_df["B"]) # PRICE ------------------ def price(story_data): result = pd.DataFrame(index=story_data.index) result["PRICE"] = story_data["DURATION"] * 0.05 result["CURRENCY"] = "EUR" return result # STORIES ------------------ hello_world = example1.create_story( name="hello_world", initiating_population=person, member_id_field="PERSON_ID", # after each story, reset the timer to 0, so that it will get # executed again at the next clock tick (next hour) timer_gen=ConstantDependentGenerator(value=0)) duration_gen = NumpyRandomGenerator(method="exponential", scale=60, seed=next(example1.seeder)) hello_world.set_operations( person.ops.lookup(id_field="PERSON_ID", select={"NAME": "NAME"}), ConstantGenerator(value="hello world").ops.generate(named_as="HELLO"), duration_gen.ops.generate(named_as="DURATION"), friends.ops.select_one( from_field="PERSON_ID", named_as="COUNTERPART_ID", weight=person.get_attribute_values("POPULARITY"), # For people that do not have friends, it will try to find # the POPULARITY attribute of a None and crash miserably # Adding this flag will discard people that do not have friends discard_empty=True), person.ops.lookup(id_field="COUNTERPART_ID", select={"NAME": "COUNTER_PART_NAME"}), allowed_sites.ops.select_one(from_field="PERSON_ID", named_as="SITE"), allowed_sites.ops.select_one(from_field="COUNTERPART_ID", named_as="COUNTERPART_SITE"), Apply(source_fields=["DURATION", "SITE", "COUNTERPART_SITE"], named_as=["PRICE", "CURRENCY"], f=price, f_args="dataframe"), example1.clock.ops.timestamp(named_as="TIME"), FieldLogger(log_id="hello")) example1.run(duration=pd.Timedelta("48h"), log_output_folder="output/example1", delete_existing_logs=True) with open("output/example1/hello.csv") as f: print("Logged {} lines".format(len(f.readlines()) - 1))
def __init__(self, story): self.judge = NumpyRandomGenerator(method="uniform", seed=1234) self.story = story
util_functions.setup_logging() logging.info("building circus") example = circus.Circus(name="example", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) person = example.create_population( name="person", size=1000, ids_gen=SequencialGenerator(prefix="PERSON_")) person.create_attribute("NAME", init_gen=FakerGenerator(method="name", seed=next(example.seeder))) person.create_attribute("age", init_gen=NumpyRandomGenerator(method="normal", loc=35, scale=5, seed=next( example.seeder))) example.run(duration=pd.Timedelta("48h"), log_output_folder="output/example2", delete_existing_logs=True) logging.info( "10 first persons: \n" + tabulate(person.to_dataframe().head(10), headers='keys', tablefmt='psql'))
def add_purchase_actions(circus, params): customers = circus.actors["customers"] pos = circus.actors["pos"] sites = circus.actors["sites"] for product, description in params["products"].items(): logging.info("creating customer {} purchase action".format(product)) purchase_timer_gen = DefaultDailyTimerGenerator( circus.clock, next(circus.seeder)) max_activity = purchase_timer_gen.activity( n=1, per=pd.Timedelta( days=description["customer_purchase_min_period_days"])) min_activity = purchase_timer_gen.activity( n=1, per=pd.Timedelta( days=description["customer_purchase_max_period_days"])) purchase_activity_gen = NumpyRandomGenerator( method="uniform", low=1 / max_activity, high=1 / min_activity, seed=next(circus.seeder)).map(f=lambda per: 1 / per) low_stock_bulk_purchase_trigger = DependentTriggerGenerator( value_to_proba_mapper=bounded_sigmoid( x_min=1, x_max=description["max_pos_stock_triggering_pos_restock"], shape=description["restock_sigmoid_shape"], incrementing=False)) item_price_gen = NumpyRandomGenerator(method="choice", a=description["item_prices"], seed=next(circus.seeder)) action_name = "customer_{}_purchase".format(product) purchase_action = circus.create_story( name=action_name, initiating_actor=customers, actorid_field="CUST_ID", timer_gen=purchase_timer_gen, activity_gen=purchase_activity_gen) purchase_action.set_operations( customers.ops.lookup(id_field="CUST_ID", select={"CURRENT_SITE": "SITE"}), sites.get_relationship("POS").ops.select_one( from_field="SITE", named_as="POS", weight=pos.get_attribute_values("ATTRACTIVENESS"), # TODO: this means customer in a location without POS do not buy # anything => we could add a re-try mechanism here discard_empty=True), sites.get_relationship("CELLS").ops.select_one(from_field="SITE", named_as="CELL_ID"), # injecting geo level 2 and distributor in purchase action: # this is only required for approximating targets of that # distributor sites.ops.lookup(id_field="SITE", select={ "GEO_LEVEL_2": "geo_level2_id", "{}__dist_l1".format(product): "distributor_l1" }), pos.get_relationship(product).ops.select_one( from_field="POS", named_as="INSTANCE_ID", pop=True, discard_empty=False), circus.actors[product].ops.select_one(named_as="PRODUCT_ID"), Apply(source_fields="INSTANCE_ID", named_as="FAILED_SALE_OUT_OF_STOCK", f=pd.isnull, f_args="series"), SequencialGenerator( prefix="TX_CUST_{}".format(product)).ops.generate( named_as="TX_ID"), item_price_gen.ops.generate(named_as="VALUE"), circus.clock.ops.timestamp(named_as="TIME"), FieldLogger(log_id=action_name), patterns.trigger_action_if_low_stock( circus, stock_relationship=pos.get_relationship(product), actor_id_field="POS", restock_trigger=low_stock_bulk_purchase_trigger, triggered_action_name="pos_{}_bulk_purchase".format(product)), )