Example #1
0
    def load_from_db(circus_name):

        logging.info("loading circus {}".format(circus_name))

        namespace_folder = db.namespace_folder(namespace=circus_name)
        config_file = os.path.join(namespace_folder, "circus_config.json")

        with open(config_file, "r") as config_h:
            config = json.load(config_h)

            clock_config = {
                "start": pd.Timestamp(config["clock_config"]["start"]),
                "step_duration": pd.Timedelta(
                    str(config["clock_config"]["step_duration"]))
            }

            circus = Circus(name=circus_name, master_seed=config["master_seed"],
                            **clock_config)

            for population_id in db.list_populations(namespace=circus_name):
                circus.load_population(population_id)

            for gen_type, gen_id in db.list_generators(namespace=circus_name):
                circus.load_generator(gen_type=gen_type, gen_id=gen_id)

            return circus
Example #2
0
def save_pos_as_partial_ids_csv(circus, params):

    target_file = os.path.join(db.namespace_folder(circus.name),
                               "pos_id_msisdn.csv")

    # Right now, all POS sell all products, so they will all appear for all
    # products in the reference table for partial_ids
    pos_df = circus.actors["pos"].to_dataframe().reset_index()

    pos_df = pos_df.rename(columns={
        "MONGO_ID": "id",
        "index": "partial_id",
        "CONTACT_PHONE": "msisdn",
    })[["id", "partial_id", "msisdn"]]

    partial_ids_df = pd.DataFrame(
        columns=["id", "partial_id", "msisdn", "product_type_id"])

    for product in params["products"].keys():
        tmp_df = pos_df
        tmp_df["product_type_id"] = product
        partial_ids_df = partial_ids_df.append(tmp_df)

    partial_ids_df = partial_ids_df.reindex_axis(
        ["id", "product_type_id", "partial_id", "msisdn"], axis=1)

    partial_ids_df.to_csv(target_file, index=False)
Example #3
0
def save_providers_csv(circus, params):

    target_file = os.path.join(db.namespace_folder(circus.name),
                               "distributor_agent_product.csv")

    pos_df = circus.actors["pos"].to_dataframe().reset_index()

    providers_df = pd.DataFrame(
        columns=["distributor_id", "agent_id", "product_type_id"])

    for product in params["products"].keys():
        """Add LINK between POS and DIST_L1"""

        pos_dist_l2 = circus.actors["pos"] \
            .relationships["{}__provider".format(product)].get_relations()

        dist_l2_dist_l1 = circus.actors["dist_l2"] \
            .relationships["{}__provider".format(product)].get_relations()

        # join dist_l1 responsible for pos
        pos_dist_l1 = pd.merge(left=pos_dist_l2,
                               right=dist_l2_dist_l1[["from", "to"]],
                               left_on="to",
                               right_on="from",
                               suffixes=('_pos', '_dist_l1'))

        # join pos mongo id
        pos_dist_l1 = pd.merge(left=pos_dist_l1,
                               right=pos_df[["index", "MONGO_ID"]],
                               left_on="from_pos",
                               right_on="index")

        pos_dist_l1 = pos_dist_l1.rename(columns={
            "MONGO_ID": "agent_id",
            "to_dist_l1": "distributor_id"
        })

        pos_dist_l1["product_type_id"] = product

        pos_dist_l1 = pos_dist_l1.reindex_axis(
            ["distributor_id", "agent_id", "product_type_id"], axis=1)

        providers_df = providers_df.append(pos_dist_l1)
        """Add LINK between DIST_L2 and DIST_L1"""

        dist_l2_dist_l1 = dist_l2_dist_l1.rename(columns={
            "from": "agent_id",
            "to": "distributor_id"
        })

        dist_l2_dist_l1["product_type_id"] = product

        dist_l2_dist_l1 = dist_l2_dist_l1.reindex_axis(
            ["distributor_id", "agent_id", "product_type_id"], axis=1)

        providers_df = providers_df.append(dist_l2_dist_l1)

    providers_df.to_csv(target_file, index=False)
Example #4
0
def save_pos_as_mobile_sync_csv(circus):

    target_file = os.path.join(db.namespace_folder(circus.name),
                               "points_of_interest.csv")

    logging.info(
        "generating a mobile-sync csv pos file in {}".format(target_file))

    pos_df = circus.actors["pos"].to_dataframe().reset_index()
    sites_df = circus.actors["sites"].to_dataframe()

    pos_df = pd.merge(left=pos_df,
                      right=sites_df[["GEO_LEVEL_1", "GEO_LEVEL_2"]],
                      left_on="SITE",
                      right_index=True)

    pos_df = pos_df.rename(
            columns={
                "MONGO_ID": "id",
                "index": "agent_code",
                "AGENT_NAME": "name",
                "CONTACT_NAME": "contact_name",
                "CONTACT_PHONE": "contact_phone_number",
                "LONGITUDE": "longitude",
                "LATITUDE": "latitude",
                "GEO_LEVEL_1": "geo_level_1",
                "GEO_LEVEL_2": "geo_level_2"
            }
        )\
        .drop([
            "ATTRACT_BASE", "ATTRACTIVENESS", "ATTRACT_DELTA",
            "SITE"
        ], axis=1)

    pos_df["pos_type"] = "grocery store"
    pos_df["pos_channel"] = "franchise"

    for col in ["picture_uri", "geo_level_3", "geo_level_4", "geo_level_5"]:
        pos_df[col] = "some_{}".format(col)

    for bcol in [
            "is_pos", "electronic_recharge_activity_flag",
            "physical_recharge_activity_flag", "sim_activity_flag",
            "handset_activity_flag", "mfs_activity_flag"
    ]:
        pos_df[bcol] = True

    pos_df = pos_df.reindex_axis([
        "id", "name", "latitude", "longitude", "agent_code", "geo_level_1",
        "geo_level_2", "geo_level_3", "geo_level_4", "geo_level_5", "is_pos",
        "contact_name", "contact_phone_number", "pos_type", "pos_channel",
        "picture_uri", "electronic_recharge_activity_flag",
        "physical_recharge_activity_flag", "sim_activity_flag",
        "handset_activity_flag", "mfs_activity_flag"
    ],
                                 axis=1)

    pos_df.to_csv(target_file, index=False)
Example #5
0
    def save_params_to_db(self, params_type, params):
        """
        Saves the params object to the circus folder in the DB for future reference
        :param params_type: "build", "run" or "target"
        :param params: the params object
        """
        target_file = os.path.join(db.namespace_folder(self.name),
                                   "params_{}.json".format(params_type))

        with open(target_file, "w") as outfile:
            json.dump(params, outfile)
def create_distl1_daily_targets(product, write_mode):
    """
    Create some fake sellin and sellout target per distributor l1
    based on actual.
    """

    target_file = os.path.join(
        db.namespace_folder(circus_name),
        "distributor_product_sellin_sellout_target.csv")

    logging.info(" producing sellin sellout target for dist_l1s in {}".format(
        target_file))

    # contains info on dist_l1 bulk purchases (dist_l1 buys from telco)
    # TIME, BUYER_ID, SELLER_ID, OLD_BUYER_STOCK, NEW_BUYER_STOCK, BULK_SIZE
    input_file_name = "output/{}/dist_l1_" \
                      "{}_bulk_purchase_stock.csv".format(
                          circus_name, product
                      )

    bulk_purchases = pd.read_csv(input_file_name, parse_dates=[0])
    bulk_purchases["day"] = bulk_purchases["TIME"].apply(
        lambda s: s.strftime("%D"))

    mean_daily_sells = bulk_purchases\
        .groupby(["BUYER_ID", "day"])["BULK_SIZE"]\
        .agg({"target_units": len, "target_value": np.sum})\
        .groupby(level=0)\
        .median()

    for direction in ["sellin", "sellout"]:
        for metric in ["target_units", "target_value"]:
            col = "_".join([direction, metric])
            if metric == "target_units":
                mean_daily_sells[col] = noisified(mean_daily_sells,
                                                  col=metric,
                                                  lb=1,
                                                  col_type=np.int)
            else:
                mean_daily_sells[col] = noisified(mean_daily_sells,
                                                  col=metric,
                                                  lb=100)

    mean_daily_sells.drop(["target_units", "target_value"],
                          axis=1,
                          inplace=True)
    mean_daily_sells.reset_index(inplace=True)
    mean_daily_sells["product_type_id"] = product
    mean_daily_sells.rename(columns={"BUYER_ID": "distributor_id"},
                            inplace=True)

    to_csv(mean_daily_sells, target_file, write_mode)
def create_distl1_daily_geo_targets(product, write_mode, nrows=None):
    """
    Create some fake daily geo_l2 target per product/distributor
    """

    target_file = os.path.join(db.namespace_folder(circus_name),
                               "distributor_product_geol2_sellout_target.csv")

    logging.info(" producing geo_l2 sellout target for dist_l1s in {}".format(
        target_file))

    # contains info on dist_l1 bulk purchases (dist_l1 buys from telco)
    # CUST_ID, SITE, POS, CELL_ID, geo_level2_id, distributor_l1, INSTANCE_ID,
    # PRODUCT_ID,FAILED_SALE_OUT_OF_STOCK,TX_ID,VALUE,TIME
    input_file_name = "output/{}/customer_{}_purchase.csv".format(
        circus_name, product)

    customer_purchases = pd.read_csv(input_file_name,
                                     parse_dates=[11],
                                     nrows=nrows)
    customer_purchases["day"] = customer_purchases["TIME"].apply(
        lambda s: s.strftime("%D"))

    customer_purchases["product_type_id"] = product

    mean_daily_sells = customer_purchases \
        .groupby(["product_type_id", "distributor_l1",
                  "geo_level2_id", "day"])["VALUE"] \
        .agg({"sellout_target_units": len, "sellout_target_value": np.sum}) \
        .groupby(level=[0, 1, 2]) \
        .median()\
        .reset_index()

    mean_daily_sells = mean_daily_sells.rename(
        columns={"distributor_l1": "distributor_id"})

    mean_daily_sells["sellout_target_units"] = noisified(
        mean_daily_sells, col="sellout_target_units", lb=25, col_type=np.int)

    mean_daily_sells["sellout_target_value"] = noisified(
        mean_daily_sells, col="sellout_target_value", lb=100)

    to_csv(mean_daily_sells, target_file, write_mode)
Example #8
0
def build_site_product_pos_target(circus, params):
    """
    Generates some random target of amount of pos per site, based on the
    actual number of pos per site
    """

    target_file = os.path.join(db.namespace_folder(circus.name),
                               "site_product_pos_target.csv")

    sites = circus.actors["sites"]

    target_action = operations.Chain(
        sites.relationships["POS"].ops.get_neighbourhood_size(
            from_field="site_id", named_as="pos_count_target"),
        operations.FieldLogger(log_id="logs"))

    sites_df = pd.DataFrame({"site_id": sites.ids})

    _, logs = target_action(sites_df)

    target_df = logs["logs"]
    target_df["cartesian_product"] = "cp"

    products = pd.DataFrame({
        "product_type_id": params["products"].keys(),
        "cartesian_product": "cp"
    })

    target_df = pd.merge(left=target_df,
                         right=products,
                         on="cartesian_product")

    fact = np.random.normal(1, .1, size=target_df.shape[0])
    target_df["pos_count_target"] = target_df["pos_count_target"] * fact
    target_df["pos_count_target"] = target_df["pos_count_target"].astype(
        np.int)

    target_df.ix[target_df["pos_count_target"] < 10, "pos_count_target"] = 10
    target_df.drop(["cartesian_product"], axis=1, inplace=True)

    target_df.to_csv(target_file, index=False)