def add_customers(circus, params): logging.info(" adding customers") customers = circus.create_population( name="customers", size=params["n_customers"], ids_gen=SequencialGenerator(prefix="CUST_")) logging.info(" adding 'possible sites' mobility relationship to customers") mobility_rel = customers.create_relationship("POSSIBLE_SITES") # probability of each site to be chosen, based on geo_level1 population site_weight = circus.actors["sites"] \ .get_attribute("GEO_LEVEL_1_POPULATION") \ .get_values(None) customer_gen = NumpyRandomGenerator(method="choice", seed=next(circus.seeder), a=customers.ids, replace=False) site_gen = NumpyRandomGenerator(method="choice", seed=next(circus.seeder), a=circus.actors["sites"].ids, p=site_weight.values / sum(site_weight)) mobility_weight_gen = NumpyRandomGenerator(method="exponential", scale=1., seed=next(circus.seeder)) # Everybody gets at least one site mobility_rel.add_relations(from_ids=customers.ids, to_ids=site_gen.generate(customers.size), weights=mobility_weight_gen.generate( customers.size)) # at each iteration, give a new site to a sample of people # the sample will be of proportion p p = 0.5 # to get an average site per customer of mean_known_sites_per_customer, # if each iteration samples with p, # we need mean_known_sites_per_customer/p iterations # # we remove one iteration that already happened for everybody here above for i in range(int(params["mean_known_sites_per_customer"] / p) - 1): sample = customer_gen.generate(int(customers.size * p)) mobility_rel.add_relations(from_ids=sample, to_ids=site_gen.generate(len(sample)), weights=mobility_weight_gen.generate( len(sample))) logging.info(" assigning a first random site to each customer") customers.create_attribute(name="CURRENT_SITE", init_relationship="POSSIBLE_SITES") return customers
class _MaybeBackToDefault(SideEffectOnly): """ This is an internal operation of story, that transits members back to default with probability as declared in back_to_default_probability """ def __init__(self, story): self.judge = NumpyRandomGenerator(method="uniform", seed=1234) self.story = story def side_effect(self, story_data): # only transiting members that have ran during this clock tick active_timer = self.story.timer.loc[story_data.index] non_default_ids = active_timer[ active_timer["state"] != "default"].index if non_default_ids.shape[0] == 0: return back_prob = self.story.get_param("back_to_default_probability", non_default_ids) if np.all(back_prob == 0): cond = [False] * non_default_ids.shape[0] elif np.all(back_prob == 1): cond = [True] * non_default_ids.shape[0] else: baseline = self.judge.generate(back_prob.shape[0]) cond = back_prob > baseline member_ids = back_prob[cond].index states = ["default"] * member_ids.shape[0] self.story.transit_to_state(ids=member_ids, states=states)
def connect_agent_to_dealer(self, agents, dealers): """ Relationship from agents to dealers """ logging.info("Randomly connecting agents to dealer ") deg_prob = params["average_agent_degree"] / params[ "n_agents"] * params["n_dealers"] agent_weight_gen = NumpyRandomGenerator(method="exponential", scale=1., seed=1) agent_customer_df = pd.DataFrame.from_records( make_random_bipartite_data(agents.ids, dealers.ids, deg_prob, seed=next(self.seeder)), columns=["AGENT", "DEALER"]) agent_customer_rel = agents.create_relationship(name="DEALERS") agent_customer_rel.add_relations(from_ids=agent_customer_df["AGENT"], to_ids=agent_customer_df["DEALER"], weights=agent_weight_gen.generate( agent_customer_df.shape[0])) # every agent is also connected to the "broke dealer", to make sure this # one gets out of stock quickly agent_customer_rel.add_relations(from_ids=agents.ids, to_ids=np.repeat( "broke_dealer", agents.ids.shape), weights=4)
def step4(): """ Woah, this got drastically slower """ example1 = circus.Circus(name="example1", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) person = example1.create_population( name="person", size=1000, ids_gen=SequencialGenerator(prefix="PERSON_")) person.create_attribute("NAME", init_gen=FakerGenerator(method="name", seed=next( example1.seeder))) sites = SequencialGenerator(prefix="SITE_").generate(1000) random_site_gen = NumpyRandomGenerator(method="choice", a=sites, seed=next(example1.seeder)) allowed_sites = person.create_relationship(name="sites") for i in range(5): allowed_sites \ .add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size)) hello_world = example1.create_story( name="hello_world", initiating_population=person, member_id_field="PERSON_ID", # after each story, reset the timer to 0, so that it will get # executed again at the next clock tick (next hour) timer_gen=ConstantDependentGenerator(value=0)) duration_gen = NumpyRandomGenerator(method="exponential", scale=60, seed=next(example1.seeder)) hello_world.set_operations( person.ops.lookup(id_field="PERSON_ID", select={"NAME": "NAME"}), ConstantGenerator(value="hello world").ops.generate(named_as="HELLO"), duration_gen.ops.generate(named_as="DURATION"), allowed_sites.ops.select_one(from_field="PERSON_ID", named_as="SITE"), example1.clock.ops.timestamp(named_as="TIME"), FieldLogger(log_id="hello")) example1.run(duration=pd.Timedelta("48h"), log_output_folder="output/example1", delete_existing_logs=True) with open("output/example1/hello.csv") as f: print("Logged {} lines".format(len(f.readlines()) - 1))
def numpy_generators_read_from_disk_should_generate_same_sequence_as_original( ): with path.tempdir() as p: # making sure we're not using the default seed tested = NumpyRandomGenerator(method="normal", loc=10, scale=4, seed=123456) gen_file = os.path.join(p, "tested2.json") tested.save_to(gen_file) reloaded = Generator.load_generator(gen_type="NumpyRandomGenerator", input_file=gen_file) assert tested.generate(size=10000) == reloaded.generate(size=10000)
def create_field_agents(circus, params): logging.info(" adding {} field agents".format(params["n_field_agents"])) field_agents = circus.create_population( name="field_agents", size=params["n_field_agents"], ids_gen=SequencialGenerator(prefix="FA_")) logging.info(" adding mobility relationships to field agents") mobility_rel = field_agents.create_relationship("POSSIBLE_SITES") # TODO: make sure the number of sites per field agent is "reasonable" mobility_df = pd.DataFrame.from_records(make_random_bipartite_data( field_agents.ids, circus.actors["sites"].ids, 0.4, seed=next(circus.seeder)), columns=["FA_ID", "SID"]) mobility_weight_gen = NumpyRandomGenerator(method="exponential", scale=1., seed=next(circus.seeder)) mobility_rel.add_relations(from_ids=mobility_df["FA_ID"], to_ids=mobility_df["SID"], weights=mobility_weight_gen.generate( mobility_df.shape[0])) # Initialize the mobility by allocating one first random site to each # field agent among its network field_agents.create_attribute(name="CURRENT_SITE", init_relationship="POSSIBLE_SITES") return field_agents
def add_mobility(self, subs, cells): """ adds a CELL attribute to the customer population + a mobility story that randomly moves customers from CELL to CELL among their used cells. """ logging.info("Adding mobility ") # mobility time profile: assign high mobility activities to busy hours # of the day mov_prof = [ 1., 1., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1. ] mobility_time_gen = CyclicTimerGenerator( clock=self.clock, seed=next(self.seeder), config=CyclicTimerProfile( profile=mov_prof, profile_time_steps="1H", start_date=pd.Timestamp("12 September 2016 00:00.00"))) # Mobility network, i.e. choice of cells per user, i.e. these are the # weighted "used cells" (as in "most used cells) for each user mobility_weight_gen = NumpyRandomGenerator(method="exponential", scale=1., seed=next(self.seeder)) mobility_rel = subs.create_relationship("POSSIBLE_CELLS") logging.info(" creating bipartite graph ") mobility_df = pd.DataFrame.from_records(make_random_bipartite_data( subs.ids, cells.ids, 0.4, seed=next(self.seeder)), columns=["USER_ID", "CELL"]) logging.info(" adding mobility relationships to customer") mobility_rel.add_relations(from_ids=mobility_df["USER_ID"], to_ids=mobility_df["CELL"], weights=mobility_weight_gen.generate( mobility_df.shape[0])) logging.info(" creating customer's CELL attribute ") # Initialize the mobility by allocating one first random cell to each # customer among its network subs.create_attribute(name="CELL", init_relationship="POSSIBLE_CELLS") # Mobility story itself, basically just a random hop from cell to cell, # that updates the "CELL" attributes + generates mobility logs logging.info(" creating mobility story") mobility_story = self.create_story( name="mobility", initiating_population=subs, member_id_field="A_ID", timer_gen=mobility_time_gen, ) logging.info(" adding operations") mobility_story.set_operations( subs.ops.lookup(id_field="A_ID", select={"CELL": "PREV_CELL"}), # selects a destination cell (or maybe the same as current... ^^) mobility_rel.ops.select_one(from_field="A_ID", named_as="NEW_CELL"), # update the CELL attribute of the customers accordingly subs.get_attribute("CELL").ops.update(member_id_field="A_ID", copy_from_field="NEW_CELL"), self.clock.ops.timestamp(named_as="TIME"), # create mobility logs operations.FieldLogger( log_id="mobility_logs", cols=["TIME", "A_ID", "PREV_CELL", "NEW_CELL"]), ) logging.info(" done")
def create_subs_and_sims(self): """ Creates the subs and sims + a relationship between them + an agent relationship. We have at least one sim per subs: sims.size >= subs.size The sims population contains the "OPERATOR", "MAIN_ACCT" and "MSISDN" attributes. The subs population has a "SIMS" relationship that points to the sims owned by each subs. The sims population also has a relationship to the set of agents where this sim can be topped up. """ npgen = RandomState(seed=next(self.seeder)) # subs are empty here but will receive a "CELLS" and "EXCITABILITY" # attributes later on subs = self.create_population( name="subs", size=self.params["n_subscribers"], ids_gen=SequencialGenerator(prefix="SUBS_")) number_of_operators = npgen.choice(a=range(1, 5), size=subs.size) operator_ids = build_ids(size=4, prefix="OPERATOR_", max_length=1) def pick_operators(qty): """ randomly choose a set of unique operators of specified size """ return npgen.choice(a=operator_ids, p=[.8, .05, .1, .05], size=qty, replace=False).tolist() # set of operators of each subs subs_operators_list = map(pick_operators, number_of_operators) # Dataframe with 4 columns for the 1rst, 2nd,... operator of each subs. # Since subs_operators_list don't all have the size, some entries of this # dataframe contains None, which are just discarded by the stack() below subs_operators_df = pd.DataFrame(data=list(subs_operators_list), index=subs.ids) # same info, vertically: the index contains the sub id (with duplicates) # and "operator" one of the operators of this subs subs_ops_mapping = subs_operators_df.stack() subs_ops_mapping.index = subs_ops_mapping.index.droplevel(level=1) # SIM population, each with an OPERATOR and MAIN_ACCT attributes sims = self.create_population( name="sims", size=subs_ops_mapping.size, ids_gen=SequencialGenerator(prefix="SIMS_")) sims.create_attribute("OPERATOR", init_values=subs_ops_mapping.values) recharge_gen = ConstantGenerator(value=1000.) sims.create_attribute(name="MAIN_ACCT", init_gen=recharge_gen) # keeping track of the link between population and sims as a relationship sims_of_subs = subs.create_relationship("SIMS") sims_of_subs.add_relations(from_ids=subs_ops_mapping.index, to_ids=sims.ids) msisdn_gen = MSISDNGenerator( countrycode="0032", prefix_list=["472", "473", "475", "476", "477", "478", "479"], length=6, seed=next(self.seeder)) sims.create_attribute(name="MSISDN", init_gen=msisdn_gen) # Finally, adding one more relationship that defines the set of possible # shops where we can topup each SIM. # TODO: to make this a bit more realistic, we should probably generate # such relationship first from the subs to their favourite shops, and then # copy that info to each SIM, maybe with some fluctuations to account # for the fact that not all shops provide topups of all operators. agents = build_ids(self.params["n_agents"], prefix="AGENT_", max_length=3) agent_df = pd.DataFrame.from_records(make_random_bipartite_data( sims.ids, agents, 0.3, seed=next(self.seeder)), columns=["SIM_ID", "AGENT"]) logging.info(" creating random sim/agent relationship ") sims_agents_rel = sims.create_relationship("POSSIBLE_AGENTS") agent_weight_gen = NumpyRandomGenerator(method="exponential", scale=1., seed=next(self.seeder)) sims_agents_rel.add_relations(from_ids=agent_df["SIM_ID"], to_ids=agent_df["AGENT"], weights=agent_weight_gen.generate( agent_df.shape[0])) return subs, sims, recharge_gen
def step7(): example1 = circus.Circus(name="example1", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) person = example1.create_population( name="person", size=1000, ids_gen=SequencialGenerator(prefix="PERSON_")) person.create_attribute("NAME", init_gen=FakerGenerator(method="name", seed=next( example1.seeder))) person.create_attribute("POPULARITY", init_gen=NumpyRandomGenerator( method="uniform", low=0, high=1, seed=next(example1.seeder))) sites = SequencialGenerator(prefix="SITE_").generate(1000) random_site_gen = NumpyRandomGenerator(method="choice", a=sites, seed=next(example1.seeder)) allowed_sites = person.create_relationship(name="sites") # SITES ------------------ # Add HOME sites allowed_sites.add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size), weights=0.4) # Add WORK sites allowed_sites.add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size), weights=0.3) # Add OTHER sites for i in range(3): allowed_sites \ .add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size), weights=0.1) # FRIENDS ------------------ friends = person.create_relationship(name="friends") friends_df = pd.DataFrame.from_records( make_random_bipartite_data( person.ids, person.ids, p=0.005, # probability for a node to be connected to # another one : 5 friends on average = 5/1000 seed=next(example1.seeder)), columns=["A", "B"]) friends.add_relations(from_ids=friends_df["A"], to_ids=friends_df["B"]) # PRICE ------------------ def price(story_data): result = pd.DataFrame(index=story_data.index) result["PRICE"] = story_data["DURATION"] * 0.05 result["CURRENCY"] = "EUR" return result # STORIES ------------------ hello_world = example1.create_story( name="hello_world", initiating_population=person, member_id_field="PERSON_ID", # after each story, reset the timer to 0, so that it will get # executed again at the next clock tick (next hour) timer_gen=ConstantDependentGenerator(value=0)) duration_gen = NumpyRandomGenerator(method="exponential", scale=60, seed=next(example1.seeder)) hello_world.set_operations( person.ops.lookup(id_field="PERSON_ID", select={"NAME": "NAME"}), ConstantGenerator(value="hello world").ops.generate(named_as="HELLO"), duration_gen.ops.generate(named_as="DURATION"), friends.ops.select_one( from_field="PERSON_ID", named_as="COUNTERPART_ID", weight=person.get_attribute_values("POPULARITY"), # For people that do not have friends, it will try to find # the POPULARITY attribute of a None and crash miserably # Adding this flag will discard people that do not have friends discard_empty=True), person.ops.lookup(id_field="COUNTERPART_ID", select={"NAME": "COUNTER_PART_NAME"}), allowed_sites.ops.select_one(from_field="PERSON_ID", named_as="SITE"), allowed_sites.ops.select_one(from_field="COUNTERPART_ID", named_as="COUNTERPART_SITE"), Apply(source_fields=["DURATION", "SITE", "COUNTERPART_SITE"], named_as=["PRICE", "CURRENCY"], f=price, f_args="dataframe"), example1.clock.ops.timestamp(named_as="TIME"), FieldLogger(log_id="hello")) example1.run(duration=pd.Timedelta("48h"), log_output_folder="output/example1", delete_existing_logs=True) with open("output/example1/hello.csv") as f: print("Logged {} lines".format(len(f.readlines()) - 1))
def test_numpy_random_generator_should_delegate_to_numpy_correctly(): # basic "smoke" test, if it does not crash it at least proves it's able # to load the appropriate method tested = NumpyRandomGenerator(method="normal", loc=10, scale=4, seed=1) assert len(tested.generate(size=10)) == 10