def add_music_repo(the_circus): repo = the_circus.create_population( name="music_repository", size=5, ids_gen=gen.SequencialGenerator(prefix="GENRE_")) repo.create_attribute( name="genre_name", init_values=["blues", "jazz", "electro", "pop", "rock"]) repo.create_relationship(name="songs")
def add_items_to_pos_stock(the_circus): """ Generates and add 5 items to the "items" relationship of each POS """ pos = the_circus.populations["point_of_sale"] items_gen = gen.SequencialGenerator(prefix="ITEM_") the_circus.attach_generator("items_gen", items_gen) item_arrays = [items_gen.generate(5) for _ in pos.ids] pos.get_relationship("items").add_grouped_relations( from_ids=pos.ids, grouped_ids=item_arrays)
def build_music_repo(): # this time we create a "detached" population, not connected to a circus repo = population.Population( circus=None, size=5, ids_gen=gen.SequencialGenerator(prefix="GENRE_")) repo.create_attribute( name="genre_name", init_values=["blues", "jazz", "electro", "pop", "rock"]) repo.create_relationship(name="songs", seed=18) return repo
def add_listener(the_circus): users = the_circus.create_population( name="user", size=5, ids_gen=gen.SequencialGenerator(prefix="user_")) users.create_attribute(name="FIRST_NAME", init_gen=gen.FakerGenerator(method="first_name", seed=next( the_circus.seeder))) users.create_attribute(name="LAST_NAME", init_gen=gen.FakerGenerator(method="last_name", seed=next( the_circus.seeder)))
def add_songids_to_repos(the_circus): repo = the_circus.populations["music_repository"] song_id_gen = gen.SequencialGenerator(prefix="S_") added_songs = [song_id_gen.generate(size=1000) for _ in repo.ids] repo.get_relationship("songs").add_grouped_relations( # 5 genre ids from_ids=repo.ids, # 5 list of 1000 songs grouped_ids=added_songs)
def create_products(circus, params): """ :param circus: :param params: :return: """ for product_name, description in params["products"].items(): product = circus.create_population( name=product_name, ids_gen=random_generators.SequencialGenerator( prefix=description["prefix"]), size=description["product_types_num"]) product.create_attribute(name="product_description", init_gen=random_generators.FakerGenerator( method="text", seed=next(circus.seeder))) sims = circus.actors["sim"] sims.create_attribute("type", init_gen=snd_constants.gen("SIM_TYPES", next(circus.seeder))) sims.create_attribute("ean", init_gen=random_generators.FakerGenerator( method="ean", seed=next(circus.seeder))) handsets = circus.actors["handset"] handsets.create_attribute("tac_id", init_gen=random_generators.FakerGenerator( method="ean", seed=next(circus.seeder))) handsets.create_attribute("category", init_gen=snd_constants.gen( "HANDSET_CATEGORY", next(circus.seeder))) handsets.create_attribute("internet_technology", init_gen=snd_constants.gen( "SIM_CAP", next(circus.seeder))) handsets.create_attribute("brand", init_gen=snd_constants.gen( "HANDSET_BRANDS", next(circus.seeder))) handsets.create_attribute("ean", init_gen=random_generators.FakerGenerator( method="ean", seed=next(circus.seeder)))
def create_customer_population(the_circus): """ Creates a customer population and attach it to the circus """ customer = the_circus.create_population( name="customer", size=2500, ids_gen=gen.SequencialGenerator(prefix="CUS_")) customer.create_attribute( name="FIRST_NAME", init_gen=gen.FakerGenerator(method="first_name", seed=next(the_circus.seeder))) customer.create_attribute( name="LAST_NAME", init_gen=gen.FakerGenerator(method="last_name", seed=next(the_circus.seeder))) customer.create_relationship(name="my_items")
def add_products_populations(the_circus, size=100): """ Creates a products population and attach it to the circus """ products = the_circus.create_population( name="products", size=size, ids_gen=gen.SequencialGenerator(prefix="PID_")) product_name_gen = gen.FakerGenerator(method="sentence", nb_words=2, seed=next(the_circus.seeder)) products.create_attribute(name="ProductName", init_gen=product_name_gen) score_gen = gen.FakerGenerator(method="random_int", min=0, max=1000, seed=next(the_circus.seeder)) products.create_attribute(name="ProductScore", init_gen=score_gen) products.create_relationship(name="my_products") """ Write products to product_score.txt Example product_score.txt: ============================ 2939 600 2123 300 ============================ """ products_copy = products.to_dataframe().copy() products_copy['PID'] = products_copy.index products_copy['PID'] = products_copy['PID'].apply( lambda x: seq2int(x, 'PID_')) products_copy.sort_values(by=['PID'], inplace=True) products_copy.to_csv('results/product_score.txt', sep='\t', index=False, quoting=2, columns=['PID', 'ProductScore']) print('Written to results/product_score.txt') del products_copy return products
def create_pos_population(the_circus): """ Creates a point of sale population and attach it to the circus """ pos = the_circus.create_population( name="point_of_sale", size=1000, ids_gen=gen.SequencialGenerator(prefix="POS_")) name_gen = gen.FakerGenerator(method="name", seed=next(the_circus.seeder)) pos.create_attribute("NAME", init_gen=name_gen) city_gen = gen.FakerGenerator(method="city", seed=next(the_circus.seeder)) pos.create_attribute("CITY", init_gen=city_gen) company_gen = gen.FakerGenerator(method="company", seed=next(the_circus.seeder)) pos.create_attribute("COMPANY", init_gen=company_gen) pos.create_relationship(name="items")
def add_user_population(the_circus, size=100): """ Creates a user population and attach it to the circus """ customer = the_circus.create_population( name="users", size=size, ids_gen=gen.SequencialGenerator(prefix="UID_")) customer.create_attribute(name="FIRST_NAME", init_gen=gen.FakerGenerator( method="first_name", seed=next(the_circus.seeder))) customer.create_attribute(name="LAST_NAME", init_gen=gen.FakerGenerator( method="last_name", seed=next(the_circus.seeder))) customer.create_relationship(name="my_items") return customer
def add_song_populations(the_circus): songs = the_circus.create_population( name="song", size=0, ids_gen=gen.SequencialGenerator(prefix="SONG_")) # since the size of the population is 0, we can create attribute without # providing any initialization songs.create_attribute(name="artist_name") songs.create_attribute(name="song_genre") songs.create_attribute(name="title") songs.create_attribute(name="duration_seconds") songs.create_attribute(name="recording_year") song_id_gen = gen.SequencialGenerator(prefix="S_") # generate artist names from a list of randomly generated ones, so we have # some redundancy in the generated dataset artist_name_gen = gen.NumpyRandomGenerator( method="choice", a=gen.FakerGenerator(method="name", seed=next(the_circus.seeder)).generate(size=200), seed=next(the_circus.seeder)) title_gen = gen.FakerGenerator(method="sentence", seed=next(the_circus.seeder), nb_words=4, variable_nb_words=True) # generates recording years within a desired date range year_gen = gen.FakerGenerator( method="date_time_between_dates", seed=next(the_circus.seeder), datetime_start=pd.Timestamp("1910-10-20"), datetime_end=pd.Timestamp("2016-12-02")) \ .map(f=lambda d: d.year) duration_gen = gen.ParetoGenerator(xmin=60, seed=next(the_circus.seeder), force_int=True, a=1.2) repo = the_circus.populations["music_repository"] repo_genre_rel = repo.get_attribute("genre_name") for genre_id, genre_name in repo_genre_rel.get_values().items(): # an operation capable of creating songs of that genre init_attribute = ops.Chain( artist_name_gen.ops.generate(named_as="artist_name"), title_gen.ops.generate(named_as="title"), year_gen.ops.generate(named_as="recording_year"), duration_gen.ops.generate(named_as="duration_seconds"), gen.ConstantGenerator(value=genre_name).ops.generate( named_as="song_genre")) # dataframe of emtpy songs: just with one SONG_ID column for now song_ids = song_id_gen.generate(size=1000) emtpy_songs = story.Story.init_story_data( member_id_field_name="SONG_ID", active_ids=song_ids) # we can already adds the generated songs to the music repo relationship repo.get_relationship("songs").add_grouped_relations( from_ids=[genre_id], grouped_ids=[song_ids]) # here we generate all desired columns in the dataframe initialized_songs, _ = init_attribute(emtpy_songs) initialized_songs.drop(["SONG_ID"], axis=1, inplace=True) # this works because the columns of init_attribute match exactly the # ones of the attributes of the populations songs.update(initialized_songs) # makes sure year and duration are handled as integer songs.get_attribute("recording_year").transform_inplace(int) songs.get_attribute("duration_seconds").transform_inplace(int)
def add_bulk_restock_actions(circus, params, buyer_actor_name, seller_actor_name): buyer = circus.actors[buyer_actor_name] seller = circus.actors[seller_actor_name] pos_per_buyer = circus.actors["pos"].size / buyer.size for product, description in params["products"].items(): action_name = "{}_{}_bulk_purchase".format(buyer_actor_name, product) upper_level_restock_action_name = "{}_{}_bulk_purchase".format( seller_actor_name, product) logging.info("creating {} action".format(action_name)) # generator of item prices and type item_price_gen = random_generators.NumpyRandomGenerator( method="choice", a=description["item_prices"], seed=next(circus.seeder)) item_prices_gen = random_generators.DependentBulkGenerator( element_generator=item_price_gen) item_type_gen = random_generators.NumpyRandomGenerator( method="choice", a=circus.actors[product].ids, seed=next(circus.seeder)) item_types_gen = random_generators.DependentBulkGenerator( element_generator=item_type_gen) tx_gen = random_generators.SequencialGenerator( prefix="_".join(["TX", buyer_actor_name, product])) tx_seq_gen = random_generators.DependentBulkGenerator( element_generator=tx_gen) # trigger for another bulk purchase done by the seller if their own # stock get low seller_low_stock_bulk_purchase_trigger = random_generators.DependentTriggerGenerator( value_to_proba_mapper=operations.bounded_sigmoid( x_min=pos_per_buyer, x_max=description["max_pos_stock_triggering_pos_restock"] * pos_per_buyer, shape=description["restock_sigmoid_shape"], incrementing=False)) # bulk size distribution is a scaled version of POS bulk size distribution bulk_size_gen = scale_quantity_gen(stock_size_gen=circus.generators[ "pos_{}_bulk_size_gen".format(product)], scale_factor=pos_per_buyer) build_purchase_action = circus.create_story( name=action_name, initiating_actor=buyer, actorid_field="BUYER_ID", # no timer or activity: dealers bulk purchases are triggered externally ) build_purchase_action.set_operations( circus.clock.ops.timestamp(named_as="TIME"), buyer.get_relationship("{}__provider".format(product)) .ops.select_one(from_field="BUYER_ID", named_as="SELLER_ID"), bulk_size_gen.ops.generate(named_as="REQUESTED_BULK_SIZE"), buyer.get_relationship(product).ops .get_neighbourhood_size( from_field="BUYER_ID", named_as="OLD_BUYER_STOCK"), # TODO: the perfect case would prevent to go over max_stock at this point # selecting and removing Sims from dealers seller.get_relationship(product).ops \ .select_many( from_field="SELLER_ID", named_as="ITEM_IDS", quantity_field="REQUESTED_BULK_SIZE", # if an item is selected, it is removed from the dealer's stock pop=True, # TODO: put this back to False and log the failed purchases discard_missing=True), # and adding them to the buyer buyer.get_relationship(product).ops.add_grouped( from_field="BUYER_ID", grouped_items_field="ITEM_IDS"), # We do not track the old and new stock of the dealer since the result # is misleading: since all purchases are performed in parallel, # if a dealer is selected several times, its stock level after the # select_many() is the level _after_ all purchases are done, which is # typically not what we want to include in the log. buyer.get_relationship(product).ops \ .get_neighbourhood_size( from_field="BUYER_ID", named_as="NEW_BUYER_STOCK"), # actual number of bought items might be different due to out of stock operations.Apply(source_fields="ITEM_IDS", named_as="BULK_SIZE", f=lambda s: s.map(len), f_args="series"), # Generate some item prices. Note that the same items will have a # different price through the whole distribution chain item_prices_gen.ops.generate( named_as="ITEM_PRICES", observed_field="BULK_SIZE" ), item_types_gen.ops.generate( named_as="ITEM_TYPES", observed_field="BULK_SIZE" ), tx_seq_gen.ops.generate( named_as="TX_IDS", observed_field="BULK_SIZE" ), operations.FieldLogger(log_id="{}_stock".format(action_name), cols=["TIME", "BUYER_ID", "SELLER_ID", "OLD_BUYER_STOCK", "NEW_BUYER_STOCK", "BULK_SIZE"]), operations.FieldLogger(log_id=action_name, cols=["TIME", "BUYER_ID", "SELLER_ID"], exploded_cols=["TX_IDS", "ITEM_IDS", "ITEM_PRICES", "ITEM_TYPES"]), trigger_action_if_low_stock( circus, stock_relationship=seller.get_relationship(product), actor_id_field="SELLER_ID", restock_trigger=seller_low_stock_bulk_purchase_trigger, triggered_action_name=upper_level_restock_action_name ) )
def add_initial_stock_as_purchases(circus, buyer_actor_name, params): for product, description in params["products"].items(): action_name = "{}_{}_bulk_purchase".format(buyer_actor_name, product) logging.info("adding initial {} stock of {} as purchases".format( product, buyer_actor_name)) buyer = circus.actors[buyer_actor_name] # generator of item prices and type item_price_gen = random_generators.NumpyRandomGenerator( method="choice", a=description["item_prices"], seed=next(circus.seeder)) item_prices_gen = random_generators.DependentBulkGenerator( element_generator=item_price_gen) item_type_gen = random_generators.NumpyRandomGenerator( method="choice", a=circus.actors[product].ids, seed=next(circus.seeder)) item_types_gen = random_generators.DependentBulkGenerator( element_generator=item_type_gen) tx_gen = random_generators.SequencialGenerator( prefix="_".join(["TX_initial", buyer_actor_name, product])) tx_seq_gen = random_generators.DependentBulkGenerator( element_generator=tx_gen) log_stock = circus.create_story( name="initial_{}".format(action_name), initiating_actor=buyer, actorid_field="BUYER_ID", # everybody executes this action once, at the beginning timer_gen=random_generators.ConstantDependentGenerator(0), auto_reset_timer=False) # reset timer once so that it executes once log_stock.reset_timers() log_stock.set_operations( circus.clock.ops.timestamp(named_as="TIME", random=False), buyer.get_relationship( "{}__provider".format(product)).ops.select_one( from_field="BUYER_ID", named_as="SELLER_ID"), buyer.get_relationship(product).ops.select_all( from_field="BUYER_ID", named_as="ITEM_IDS"), operations.Apply(source_fields="ITEM_IDS", named_as="BULK_SIZE", f=lambda s: s.map(len), f_args="series"), item_prices_gen.ops.generate(named_as="ITEM_PRICES", observed_field="BULK_SIZE"), item_types_gen.ops.generate(named_as="ITEM_TYPES", observed_field="BULK_SIZE"), tx_seq_gen.ops.generate(named_as="TX_IDS", observed_field="BULK_SIZE"), random_generators.ConstantGenerator(value=0).ops.generate( named_as="OLD_BUYER_STOCK"), operations.Apply(source_fields="BULK_SIZE", named_as="NEW_BUYER_STOCK", f=lambda s: s, f_args="series"), operations.FieldLogger(log_id="{}_stock".format(action_name), cols=[ "TIME", "BUYER_ID", "SELLER_ID", "OLD_BUYER_STOCK", "NEW_BUYER_STOCK", "BULK_SIZE" ]), operations.FieldLogger(log_id=action_name, cols=["TIME", "BUYER_ID", "SELLER_ID"], exploded_cols=[ "TX_IDS", "ITEM_IDS", "ITEM_PRICES", "ITEM_TYPES" ]))
"n_telcos": 1, "n_field_agents": 100, "n_customers": 20000, } if __name__ == "__main__": util_functions.setup_logging() snd = circus.Circus(name=static_params["circus_name"], master_seed=12345, start=pd.Timestamp(static_params["clock_start_date"]), step_duration=pd.Timedelta( static_params["clock_time_step"])) distributor_id_gen = random_generators.SequencialGenerator(prefix="DIST_") snd_products.create_products(snd, static_params) snd_geo.load_geo_actors(snd, static_params) snd_customers.add_customers(snd, static_params) snd_pos.add_pos(snd, static_params) snd_dealer.add_telcos(snd, static_params, distributor_id_gen) snd_dealer.prepare_dealers(snd, params=static_params) snd_field_agents.create_field_agents(snd, static_params) logging.info("created circus:\n{}".format(snd)) snd.save_to_db(overwrite=True)