예제 #1
0
def add_customers(circus, params):

    logging.info(" adding customers")
    customers = circus.create_population(
        name="customers",
        size=params["n_customers"],
        ids_gen=SequencialGenerator(prefix="CUST_"))

    logging.info(" adding 'possible sites' mobility relationship to customers")

    mobility_rel = customers.create_relationship("POSSIBLE_SITES")

    # probability of each site to be chosen, based on geo_level1 population
    site_weight = circus.actors["sites"] \
        .get_attribute("GEO_LEVEL_1_POPULATION") \
        .get_values(None)

    customer_gen = NumpyRandomGenerator(method="choice",
                                        seed=next(circus.seeder),
                                        a=customers.ids,
                                        replace=False)

    site_gen = NumpyRandomGenerator(method="choice",
                                    seed=next(circus.seeder),
                                    a=circus.actors["sites"].ids,
                                    p=site_weight.values / sum(site_weight))

    mobility_weight_gen = NumpyRandomGenerator(method="exponential",
                                               scale=1.,
                                               seed=next(circus.seeder))

    # Everybody gets at least one site
    mobility_rel.add_relations(from_ids=customers.ids,
                               to_ids=site_gen.generate(customers.size),
                               weights=mobility_weight_gen.generate(
                                   customers.size))

    # at each iteration, give a new site to a sample of people
    # the sample will be of proportion p
    p = 0.5

    # to get an average site per customer of mean_known_sites_per_customer,
    # if each iteration samples with p,
    # we need mean_known_sites_per_customer/p iterations
    #
    # we remove one iteration that already happened for everybody here above
    for i in range(int(params["mean_known_sites_per_customer"] / p) - 1):
        sample = customer_gen.generate(int(customers.size * p))
        mobility_rel.add_relations(from_ids=sample,
                                   to_ids=site_gen.generate(len(sample)),
                                   weights=mobility_weight_gen.generate(
                                       len(sample)))

    logging.info(" assigning a first random site to each customer")
    customers.create_attribute(name="CURRENT_SITE",
                               init_relationship="POSSIBLE_SITES")

    return customers
예제 #2
0
    class _MaybeBackToDefault(SideEffectOnly):
        """
        This is an internal operation of story, that transits members
        back to default with probability as declared in
        back_to_default_probability
        """
        def __init__(self, story):
            self.judge = NumpyRandomGenerator(method="uniform", seed=1234)
            self.story = story

        def side_effect(self, story_data):
            # only transiting members that have ran during this clock tick
            active_timer = self.story.timer.loc[story_data.index]

            non_default_ids = active_timer[
                active_timer["state"] != "default"].index

            if non_default_ids.shape[0] == 0:
                return

            back_prob = self.story.get_param("back_to_default_probability",
                                             non_default_ids)

            if np.all(back_prob == 0):
                cond = [False] * non_default_ids.shape[0]
            elif np.all(back_prob == 1):
                cond = [True] * non_default_ids.shape[0]
            else:
                baseline = self.judge.generate(back_prob.shape[0])
                cond = back_prob > baseline

            member_ids = back_prob[cond].index
            states = ["default"] * member_ids.shape[0]

            self.story.transit_to_state(ids=member_ids, states=states)
예제 #3
0
    def connect_agent_to_dealer(self, agents, dealers):
        """
        Relationship from agents to dealers
        """
        logging.info("Randomly connecting agents to dealer ")

        deg_prob = params["average_agent_degree"] / params[
            "n_agents"] * params["n_dealers"]

        agent_weight_gen = NumpyRandomGenerator(method="exponential",
                                                scale=1.,
                                                seed=1)

        agent_customer_df = pd.DataFrame.from_records(
            make_random_bipartite_data(agents.ids,
                                       dealers.ids,
                                       deg_prob,
                                       seed=next(self.seeder)),
            columns=["AGENT", "DEALER"])

        agent_customer_rel = agents.create_relationship(name="DEALERS")

        agent_customer_rel.add_relations(from_ids=agent_customer_df["AGENT"],
                                         to_ids=agent_customer_df["DEALER"],
                                         weights=agent_weight_gen.generate(
                                             agent_customer_df.shape[0]))

        # every agent is also connected to the "broke dealer", to make sure this
        # one gets out of stock quickly
        agent_customer_rel.add_relations(from_ids=agents.ids,
                                         to_ids=np.repeat(
                                             "broke_dealer", agents.ids.shape),
                                         weights=4)
예제 #4
0
def step4():
    """
    Woah, this got drastically slower
    """

    example1 = circus.Circus(name="example1",
                             master_seed=12345,
                             start=pd.Timestamp("1 Jan 2017 00:00"),
                             step_duration=pd.Timedelta("1h"))

    person = example1.create_population(
        name="person",
        size=1000,
        ids_gen=SequencialGenerator(prefix="PERSON_"))

    person.create_attribute("NAME",
                            init_gen=FakerGenerator(method="name",
                                                    seed=next(
                                                        example1.seeder)))

    sites = SequencialGenerator(prefix="SITE_").generate(1000)
    random_site_gen = NumpyRandomGenerator(method="choice",
                                           a=sites,
                                           seed=next(example1.seeder))

    allowed_sites = person.create_relationship(name="sites")
    for i in range(5):
        allowed_sites \
            .add_relations(from_ids=person.ids,
                           to_ids=random_site_gen.generate(person.size))

    hello_world = example1.create_story(
        name="hello_world",
        initiating_population=person,
        member_id_field="PERSON_ID",

        # after each story, reset the timer to 0, so that it will get
        # executed again at the next clock tick (next hour)
        timer_gen=ConstantDependentGenerator(value=0))

    duration_gen = NumpyRandomGenerator(method="exponential",
                                        scale=60,
                                        seed=next(example1.seeder))

    hello_world.set_operations(
        person.ops.lookup(id_field="PERSON_ID", select={"NAME": "NAME"}),
        ConstantGenerator(value="hello world").ops.generate(named_as="HELLO"),
        duration_gen.ops.generate(named_as="DURATION"),
        allowed_sites.ops.select_one(from_field="PERSON_ID", named_as="SITE"),
        example1.clock.ops.timestamp(named_as="TIME"),
        FieldLogger(log_id="hello"))

    example1.run(duration=pd.Timedelta("48h"),
                 log_output_folder="output/example1",
                 delete_existing_logs=True)

    with open("output/example1/hello.csv") as f:
        print("Logged {} lines".format(len(f.readlines()) - 1))
예제 #5
0
def numpy_generators_read_from_disk_should_generate_same_sequence_as_original(
):

    with path.tempdir() as p:

        # making sure we're not using the default seed
        tested = NumpyRandomGenerator(method="normal",
                                      loc=10,
                                      scale=4,
                                      seed=123456)

        gen_file = os.path.join(p, "tested2.json")
        tested.save_to(gen_file)

        reloaded = Generator.load_generator(gen_type="NumpyRandomGenerator",
                                            input_file=gen_file)

        assert tested.generate(size=10000) == reloaded.generate(size=10000)
예제 #6
0
def create_field_agents(circus, params):

    logging.info(" adding {} field agents".format(params["n_field_agents"]))

    field_agents = circus.create_population(
        name="field_agents",
        size=params["n_field_agents"],
        ids_gen=SequencialGenerator(prefix="FA_"))

    logging.info(" adding mobility relationships to field agents")

    mobility_rel = field_agents.create_relationship("POSSIBLE_SITES")

    # TODO: make sure the number of sites per field agent is "reasonable"
    mobility_df = pd.DataFrame.from_records(make_random_bipartite_data(
        field_agents.ids,
        circus.actors["sites"].ids,
        0.4,
        seed=next(circus.seeder)),
                                            columns=["FA_ID", "SID"])

    mobility_weight_gen = NumpyRandomGenerator(method="exponential",
                                               scale=1.,
                                               seed=next(circus.seeder))

    mobility_rel.add_relations(from_ids=mobility_df["FA_ID"],
                               to_ids=mobility_df["SID"],
                               weights=mobility_weight_gen.generate(
                                   mobility_df.shape[0]))

    # Initialize the mobility by allocating one first random site to each
    # field agent among its network
    field_agents.create_attribute(name="CURRENT_SITE",
                                  init_relationship="POSSIBLE_SITES")

    return field_agents
예제 #7
0
    def add_mobility(self, subs, cells):
        """
        adds a CELL attribute to the customer population + a mobility story that
        randomly moves customers from CELL to CELL among their used cells.
        """
        logging.info("Adding mobility ")

        # mobility time profile: assign high mobility activities to busy hours
        # of the day
        mov_prof = [
            1., 1., 1., 1., 1., 1., 1., 1., 5., 10., 5., 1., 1., 1., 1., 1.,
            1., 5., 10., 5., 1., 1., 1., 1.
        ]
        mobility_time_gen = CyclicTimerGenerator(
            clock=self.clock,
            seed=next(self.seeder),
            config=CyclicTimerProfile(
                profile=mov_prof,
                profile_time_steps="1H",
                start_date=pd.Timestamp("12 September 2016 00:00.00")))

        # Mobility network, i.e. choice of cells per user, i.e. these are the
        # weighted "used cells" (as in "most used cells) for each user
        mobility_weight_gen = NumpyRandomGenerator(method="exponential",
                                                   scale=1.,
                                                   seed=next(self.seeder))

        mobility_rel = subs.create_relationship("POSSIBLE_CELLS")

        logging.info(" creating bipartite graph ")
        mobility_df = pd.DataFrame.from_records(make_random_bipartite_data(
            subs.ids, cells.ids, 0.4, seed=next(self.seeder)),
                                                columns=["USER_ID", "CELL"])

        logging.info(" adding mobility relationships to customer")
        mobility_rel.add_relations(from_ids=mobility_df["USER_ID"],
                                   to_ids=mobility_df["CELL"],
                                   weights=mobility_weight_gen.generate(
                                       mobility_df.shape[0]))

        logging.info(" creating customer's CELL attribute ")

        # Initialize the mobility by allocating one first random cell to each
        # customer among its network
        subs.create_attribute(name="CELL", init_relationship="POSSIBLE_CELLS")

        # Mobility story itself, basically just a random hop from cell to cell,
        # that updates the "CELL" attributes + generates mobility logs
        logging.info(" creating mobility story")
        mobility_story = self.create_story(
            name="mobility",
            initiating_population=subs,
            member_id_field="A_ID",
            timer_gen=mobility_time_gen,
        )

        logging.info(" adding operations")
        mobility_story.set_operations(
            subs.ops.lookup(id_field="A_ID", select={"CELL": "PREV_CELL"}),

            # selects a destination cell (or maybe the same as current... ^^)
            mobility_rel.ops.select_one(from_field="A_ID",
                                        named_as="NEW_CELL"),

            # update the CELL attribute of the customers accordingly
            subs.get_attribute("CELL").ops.update(member_id_field="A_ID",
                                                  copy_from_field="NEW_CELL"),
            self.clock.ops.timestamp(named_as="TIME"),

            # create mobility logs
            operations.FieldLogger(
                log_id="mobility_logs",
                cols=["TIME", "A_ID", "PREV_CELL", "NEW_CELL"]),
        )

        logging.info(" done")
예제 #8
0
    def create_subs_and_sims(self):
        """
        Creates the subs and sims + a relationship between them + an agent
        relationship.

        We have at least one sim per subs: sims.size >= subs.size

        The sims population contains the "OPERATOR", "MAIN_ACCT" and "MSISDN" attributes.

        The subs population has a "SIMS" relationship that points to the sims owned by
        each subs.

        The sims population also has a relationship to the set of agents where this sim
        can be topped up.
        """

        npgen = RandomState(seed=next(self.seeder))

        # subs are empty here but will receive a "CELLS" and "EXCITABILITY"
        # attributes later on
        subs = self.create_population(
            name="subs",
            size=self.params["n_subscribers"],
            ids_gen=SequencialGenerator(prefix="SUBS_"))

        number_of_operators = npgen.choice(a=range(1, 5), size=subs.size)
        operator_ids = build_ids(size=4, prefix="OPERATOR_", max_length=1)

        def pick_operators(qty):
            """
            randomly choose a set of unique operators of specified size
            """
            return npgen.choice(a=operator_ids,
                                p=[.8, .05, .1, .05],
                                size=qty,
                                replace=False).tolist()

        # set of operators of each subs
        subs_operators_list = map(pick_operators, number_of_operators)

        # Dataframe with 4 columns for the 1rst, 2nd,... operator of each subs.
        # Since subs_operators_list don't all have the size, some entries of this
        # dataframe contains None, which are just discarded by the stack() below
        subs_operators_df = pd.DataFrame(data=list(subs_operators_list),
                                         index=subs.ids)

        # same info, vertically: the index contains the sub id (with duplicates)
        # and "operator" one of the operators of this subs
        subs_ops_mapping = subs_operators_df.stack()
        subs_ops_mapping.index = subs_ops_mapping.index.droplevel(level=1)

        # SIM population, each with an OPERATOR and MAIN_ACCT attributes
        sims = self.create_population(
            name="sims",
            size=subs_ops_mapping.size,
            ids_gen=SequencialGenerator(prefix="SIMS_"))
        sims.create_attribute("OPERATOR", init_values=subs_ops_mapping.values)
        recharge_gen = ConstantGenerator(value=1000.)
        sims.create_attribute(name="MAIN_ACCT", init_gen=recharge_gen)

        # keeping track of the link between population and sims as a relationship
        sims_of_subs = subs.create_relationship("SIMS")
        sims_of_subs.add_relations(from_ids=subs_ops_mapping.index,
                                   to_ids=sims.ids)

        msisdn_gen = MSISDNGenerator(
            countrycode="0032",
            prefix_list=["472", "473", "475", "476", "477", "478", "479"],
            length=6,
            seed=next(self.seeder))
        sims.create_attribute(name="MSISDN", init_gen=msisdn_gen)

        # Finally, adding one more relationship that defines the set of possible
        # shops where we can topup each SIM.
        # TODO: to make this a bit more realistic, we should probably generate
        # such relationship first from the subs to their favourite shops, and then
        # copy that info to each SIM, maybe with some fluctuations to account
        # for the fact that not all shops provide topups of all operators.
        agents = build_ids(self.params["n_agents"],
                           prefix="AGENT_",
                           max_length=3)

        agent_df = pd.DataFrame.from_records(make_random_bipartite_data(
            sims.ids, agents, 0.3, seed=next(self.seeder)),
                                             columns=["SIM_ID", "AGENT"])

        logging.info(" creating random sim/agent relationship ")
        sims_agents_rel = sims.create_relationship("POSSIBLE_AGENTS")

        agent_weight_gen = NumpyRandomGenerator(method="exponential",
                                                scale=1.,
                                                seed=next(self.seeder))

        sims_agents_rel.add_relations(from_ids=agent_df["SIM_ID"],
                                      to_ids=agent_df["AGENT"],
                                      weights=agent_weight_gen.generate(
                                          agent_df.shape[0]))

        return subs, sims, recharge_gen
예제 #9
0
def step7():

    example1 = circus.Circus(name="example1",
                             master_seed=12345,
                             start=pd.Timestamp("1 Jan 2017 00:00"),
                             step_duration=pd.Timedelta("1h"))

    person = example1.create_population(
        name="person",
        size=1000,
        ids_gen=SequencialGenerator(prefix="PERSON_"))

    person.create_attribute("NAME",
                            init_gen=FakerGenerator(method="name",
                                                    seed=next(
                                                        example1.seeder)))
    person.create_attribute("POPULARITY",
                            init_gen=NumpyRandomGenerator(
                                method="uniform",
                                low=0,
                                high=1,
                                seed=next(example1.seeder)))

    sites = SequencialGenerator(prefix="SITE_").generate(1000)
    random_site_gen = NumpyRandomGenerator(method="choice",
                                           a=sites,
                                           seed=next(example1.seeder))

    allowed_sites = person.create_relationship(name="sites")

    # SITES ------------------

    # Add HOME sites
    allowed_sites.add_relations(from_ids=person.ids,
                                to_ids=random_site_gen.generate(person.size),
                                weights=0.4)

    # Add WORK sites
    allowed_sites.add_relations(from_ids=person.ids,
                                to_ids=random_site_gen.generate(person.size),
                                weights=0.3)

    # Add OTHER sites
    for i in range(3):
        allowed_sites \
            .add_relations(from_ids=person.ids,
                           to_ids=random_site_gen.generate(person.size),
                           weights=0.1)

    # FRIENDS ------------------

    friends = person.create_relationship(name="friends")

    friends_df = pd.DataFrame.from_records(
        make_random_bipartite_data(
            person.ids,
            person.ids,
            p=0.005,  # probability for a node to be connected to
            # another one : 5 friends on average = 5/1000
            seed=next(example1.seeder)),
        columns=["A", "B"])

    friends.add_relations(from_ids=friends_df["A"], to_ids=friends_df["B"])

    # PRICE ------------------

    def price(story_data):

        result = pd.DataFrame(index=story_data.index)

        result["PRICE"] = story_data["DURATION"] * 0.05
        result["CURRENCY"] = "EUR"

        return result

    # STORIES ------------------

    hello_world = example1.create_story(
        name="hello_world",
        initiating_population=person,
        member_id_field="PERSON_ID",

        # after each story, reset the timer to 0, so that it will get
        # executed again at the next clock tick (next hour)
        timer_gen=ConstantDependentGenerator(value=0))

    duration_gen = NumpyRandomGenerator(method="exponential",
                                        scale=60,
                                        seed=next(example1.seeder))

    hello_world.set_operations(
        person.ops.lookup(id_field="PERSON_ID", select={"NAME": "NAME"}),
        ConstantGenerator(value="hello world").ops.generate(named_as="HELLO"),
        duration_gen.ops.generate(named_as="DURATION"),
        friends.ops.select_one(
            from_field="PERSON_ID",
            named_as="COUNTERPART_ID",
            weight=person.get_attribute_values("POPULARITY"),
            # For people that do not have friends, it will try to find
            # the POPULARITY attribute of a None and crash miserably
            # Adding this flag will discard people that do not have friends
            discard_empty=True),
        person.ops.lookup(id_field="COUNTERPART_ID",
                          select={"NAME": "COUNTER_PART_NAME"}),
        allowed_sites.ops.select_one(from_field="PERSON_ID", named_as="SITE"),
        allowed_sites.ops.select_one(from_field="COUNTERPART_ID",
                                     named_as="COUNTERPART_SITE"),
        Apply(source_fields=["DURATION", "SITE", "COUNTERPART_SITE"],
              named_as=["PRICE", "CURRENCY"],
              f=price,
              f_args="dataframe"),
        example1.clock.ops.timestamp(named_as="TIME"),
        FieldLogger(log_id="hello"))

    example1.run(duration=pd.Timedelta("48h"),
                 log_output_folder="output/example1",
                 delete_existing_logs=True)

    with open("output/example1/hello.csv") as f:
        print("Logged {} lines".format(len(f.readlines()) - 1))
예제 #10
0
def test_numpy_random_generator_should_delegate_to_numpy_correctly():

    # basic "smoke" test, if it does not crash it at least proves it's able
    # to load the appropriate method
    tested = NumpyRandomGenerator(method="normal", loc=10, scale=4, seed=1)
    assert len(tested.generate(size=10)) == 10