Exemple #1
0
def run_data_loader(conn_string, multi_region, cities, num_users, num_rides,
                    num_vehicles, num_histories, num_promo_codes, num_threads,
                    skip_reload_tables, echo_sql):
    if num_users <= 0 or num_rides <= 0 or num_vehicles <= 0:
        raise ValueError("The number of objects to generate must be > 0.")

    start_time = time.time()

    logging.info("Loading MovR")

    with MovR(conn_string,
              multi_region=multi_region,
              reset_tables=(not skip_reload_tables),
              echo=echo_sql) as movr:

        logging.info("Loading cities %s.", cities)
        logging.info(
            "Loading movr data with ~%d users, ~%d vehicles, ~%d rides, ~%d histories, and ~%d promo codes.",
            num_users, num_vehicles, num_rides, num_histories, num_promo_codes)

    # don't create more than 1 thread per city
    usable_threads = min(num_threads, len(cities))
    if usable_threads < num_threads:
        logging.info(
            "Only using %d of %d requested threads, since we only create at most one thread per city.",
            usable_threads, num_threads)

    num_users_per_city = int(math.ceil(float(num_users) / len(cities)))
    num_rides_per_city = int(math.ceil(float(num_rides) / len(cities)))
    num_vehicles_per_city = int(math.ceil(float(num_vehicles) / len(cities)))
    num_histories_per_city = int(math.ceil(float(num_histories) / len(cities)))

    cities_per_thread = int(math.ceil(float(len(cities)) / usable_threads))
    num_promo_codes_per_thread = int(
        math.ceil(
            float(num_promo_codes) / (float(len(cities)) / cities_per_thread)))

    RUNNING_THREADS = []

    original_city_count = len(cities)
    for i in range(usable_threads):
        if len(cities) > 0:
            t = threading.Thread(
                target=load_movr_data,
                args=(conn_string, num_users_per_city, num_vehicles_per_city,
                      num_rides_per_city, num_histories_per_city,
                      num_promo_codes_per_thread, cities[:cities_per_thread],
                      echo_sql))
            cities = cities[cities_per_thread:]
            t.start()
            RUNNING_THREADS.append(t)

    while threading.active_count(
    ) > 1:  # keep main thread alive so we can catch ctrl + c
        time.sleep(0.1)

    duration = time.time() - start_time

    logging.info("Populated %s cities in %f seconds.", original_city_count,
                 duration)
Exemple #2
0
def load_movr_data(conn_string, num_users, num_vehicles, num_rides, num_histories, num_promo_codes_per_thread, cities, echo_sql=False):
    if num_users <= 0 or num_rides <= 0 or num_vehicles <= 0:
        raise ValueError("The number of objects to generate must be > 0.")

    start_time = time.time()
    with MovR(conn_string, echo=echo_sql) as movr:
        engine = create_engine(
            conn_string, echo=echo_sql)
        for city in cities:
            if TERMINATE_GRACEFULLY:
                logging.debug("Terminating...")
                break

            logging.info("Generating user data for %s...", city)
            add_users(engine, num_users, city)
            logging.info("Generating vehicle data for %s...", city)
            add_vehicles(engine, num_vehicles, city)
            logging.info("Generating ride data for %s...", city)
            add_rides(engine, num_rides, city)
            logging.info("Generating location history data for %s...", city)
            add_vehicle_location_histories(engine, num_histories, city)
            logging.info("Populated %s in %f seconds.",
                         city, time.time() - start_time)

        logging.info("Generating %s promo codes...",
                     num_promo_codes_per_thread)
        add_promo_codes(engine, num_promo_codes_per_thread)

    return
Exemple #3
0
def configure_multi_region(conn_string, primary_region, city_list, region_city_pair, echo_sql, preview):

    start_time = time.time()

    with MovR(conn_string, primary_region=primary_region, multi_region=True, echo=echo_sql) as movr:
        regions = movr.get_regions()
        cities = movr.get_cities(city_list)
        if regions is None:
            logging.error("To configure your database for multi-region features, you must specify cluster regions at startup.")
            sys.exit(1)
        if cities is None:
            logging.error("To configure your database for multi-region features, the database must have rows of data with city values.")
            sys.exit(1)
        region_map = assign_regions(cities, regions, movr.primary_region, region_city_pair)
        if preview:
            queries = movr.get_multi_region_transformations(region_map)
            for query in queries:
                print(query)
            sys.exit(0)
        else:
            movr.run_multi_region_transformations(region_map)

    duration = time.time() - start_time

    logging.info("Configured multi-region schema in {0} seconds.".format(duration))
Exemple #4
0
def run_load_generator(conn_string, read_percentage,
                       connection_duration_in_seconds, city_list,
                       use_multi_region, follower_reads, echo_sql,
                       num_threads):
    if read_percentage < 0 or read_percentage > 1:
        raise ValueError("read percentage must be between 0 and 1")

    logging.info("simulating movr load for cities %s", city_list)

    movr_objects = {"local": {}, "global": {}}

    logging.info("warming up....")
    with MovR(conn_string, multi_region=use_multi_region,
              echo=echo_sql) as movr:
        active_rides = []
        for city in city_list:
            movr_objects["local"][city] = {
                "users": movr.get_users(city, follower_reads),
                "vehicles": movr.get_vehicles(city, follower_reads)
            }
            if len(list(movr_objects["local"][city]["vehicles"])) == 0 or len(
                    list(movr_objects["local"][city]["users"])) == 0:
                logging.error(
                    "must have users and vehicles for city '%s' in the movr database to generate load. try running with the 'load' command.",
                    city)
                sys.exit(1)

            active_rides.extend(movr.get_active_rides(city, follower_reads))
        movr_objects["global"]["promo_codes"] = movr.get_promo_codes()

    RUNNING_THREADS = []
    logging.info("running single region queries..."
                 ) if not use_multi_region else logging.info(
                     "running multi-region queries...")
    for i in range(num_threads):
        t = threading.Thread(target=simulate_movr_load,
                             args=(conn_string, use_multi_region, city_list,
                                   movr_objects, active_rides, read_percentage,
                                   follower_reads,
                                   connection_duration_in_seconds, echo_sql))
        t.start()
        RUNNING_THREADS.append(t)

    while True:  #keep main thread alive to catch exit signals
        time.sleep(15)

        stats.print_stats(action_list=[
            ACTION_ADD_VEHICLE, ACTION_GET_VEHICLES, ACTION_UPDATE_RIDE_LOC,
            ACTION_NEW_CODE, ACTION_APPLY_CODE, ACTION_NEW_USER,
            ACTION_START_RIDE, ACTION_END_RIDE
        ])

        stats.new_window()
Exemple #5
0
def simulate_movr_load(conn_string,
                       cities,
                       movr_objects,
                       active_rides,
                       read_percentage,
                       follower_reads,
                       connection_duration_in_seconds,
                       echo_sql=False):

    datagen = Faker()
    while True:
        logging.debug(
            "creating a new connection to %s, which will reset in %d seconds",
            conn_string, connection_duration_in_seconds)
        try:
            with MovR(conn_string, echo=echo_sql) as movr:
                timeout = time.time(
                ) + connection_duration_in_seconds  #refresh connections so load can balance among cluster nodes even if the cluster size changes
                while True:

                    if TERMINATE_GRACEFULLY:
                        logging.debug("Terminating thread.")
                        return

                    if time.time() > timeout:
                        break

                    active_city = random.choice(cities)

                    if random.random() < read_percentage:
                        # simulate user loading screen
                        start = time.time()
                        movr.get_vehicles(active_city, follower_reads, 25)
                        stats.add_latency_measurement("get vehicles",
                                                      time.time() - start)

                    else:

                        # every write tick, simulate the various vehicles updating their locations if they are being used for rides
                        for ride in active_rides[0:10]:

                            latlong = MovRGenerator.generate_random_latlong()
                            start = time.time()
                            movr.update_ride_location(ride['city'],
                                                      ride_id=ride['id'],
                                                      lat=latlong['lat'],
                                                      long=latlong['long'])
                            stats.add_latency_measurement(
                                ACTION_UPDATE_RIDE_LOC,
                                time.time() - start)

                        #do write operations randomly
                        if random.random() < .03:
                            # simulate a movr marketer creating a new promo code
                            start = time.time()
                            promo_code = movr.create_promo_code(
                                code="_".join(datagen.words(nb=3)) + "_" +
                                str(time.time()),
                                description=datagen.paragraph(),
                                expiration_time=datetime.datetime.now() +
                                datetime.timedelta(days=random.randint(0, 30)),
                                rules={
                                    "type": "percent_discount",
                                    "value": "10%"
                                })
                            stats.add_latency_measurement(
                                ACTION_NEW_CODE,
                                time.time() - start)
                            movr_objects["global"].get("promo_codes",
                                                       []).append(promo_code)

                        elif random.random() < .1:
                            # simulate a user applying a promo code to her account
                            start = time.time()
                            movr.apply_promo_code(
                                active_city,
                                random.choice(movr_objects["local"]
                                              [active_city]["users"])['id'],
                                random.choice(
                                    movr_objects["global"]["promo_codes"]))
                            stats.add_latency_measurement(
                                ACTION_APPLY_CODE,
                                time.time() - start)
                        elif random.random() < .3:
                            # simulate new signup
                            start = time.time()
                            new_user = movr.add_user(
                                active_city, datagen.name(), datagen.address(),
                                datagen.credit_card_number())
                            stats.add_latency_measurement(
                                ACTION_NEW_USER,
                                time.time() - start)
                            movr_objects["local"][active_city]["users"].append(
                                new_user)

                        elif random.random() < .1:
                            # simulate a user adding a new vehicle to the population
                            start = time.time()
                            new_vehicle = movr.add_vehicle(
                                active_city,
                                owner_id=random.choice(
                                    movr_objects["local"][active_city]
                                    ["users"])['id'],
                                type=MovRGenerator.generate_random_vehicle(),
                                vehicle_metadata=MovRGenerator.
                                generate_vehicle_metadata(type),
                                status=MovRGenerator.get_vehicle_availability(
                                ),
                                current_location=datagen.address())
                            stats.add_latency_measurement(
                                ACTION_ADD_VEHICLE,
                                time.time() - start)
                            movr_objects["local"][active_city][
                                "vehicles"].append(new_vehicle)

                        elif random.random() < .5:
                            # simulate a user starting a ride
                            start = time.time()
                            ride = movr.start_ride(
                                active_city,
                                random.choice(movr_objects["local"]
                                              [active_city]["users"])['id'],
                                random.choice(movr_objects["local"]
                                              [active_city]["vehicles"])['id'])
                            stats.add_latency_measurement(
                                ACTION_START_RIDE,
                                time.time() - start)
                            active_rides.append(ride)

                        else:
                            if len(active_rides):
                                #simulate a ride ending
                                ride = active_rides.pop()
                                start = time.time()
                                movr.end_ride(ride['city'], ride['id'])
                                stats.add_latency_measurement(
                                    ACTION_END_RIDE,
                                    time.time() - start)
        except DBAPIError:
            logging.error("lost connection to the db. sleeping for 10 seconds")
            time.sleep(10)
Exemple #6
0
            rows.append([partition, partition_zone_map[partition]])
        print(
            tabulate(
                rows,
                ["partition", "zone where partitioned data will be moved"]),
            "\n")

        rows = []
        for partition in partition_zone_map:
            rows.append(["promo_codes", partition_zone_map[partition]])
        print(
            tabulate(rows, [
                "reference table", "zones where index data will be replicated"
            ]), "\n")

        with MovR(conn_string, init_tables=False, echo=args.echo_sql) as movr:
            if args.preview_queries:
                queries = movr.get_geo_partitioning_queries(
                    partition_city_map, partition_zone_map)
                print("queries to geo-partition the database")

                rows = []

                print("===table and index partitions===")
                for query in queries["table_partitions"]:
                    print(query)

                for query in queries["index_partitions"]:
                    print(query)

                print("===table and index zones===")
Exemple #7
0
        run_data_loader(conn_string,
                        multi_region=args.multi_region,
                        cities=get_city_list(args.city),
                        num_users=args.num_users,
                        num_rides=args.num_rides,
                        num_vehicles=args.num_vehicles,
                        num_histories=args.num_histories,
                        num_promo_codes=args.num_promo_codes,
                        num_threads=args.num_threads,
                        skip_reload_tables=args.skip_reload_tables,
                        echo_sql=args.echo_sql)

    elif args.subparser_name == "configure-multi-region":
        with MovR(conn_string,
                  primary_region=args.primary_region,
                  echo=args.echo_sql) as movr:
            regions = movr.get_regions()
            cities = movr.get_cities()
            if regions is None:
                logging.error(
                    "To configure your database for multi-region features, you must specify cluster regions at startup."
                )
                sys.exit(1)
            if cities is None:
                logging.error(
                    "To configure your database for multi-region features, the database must have rows of data with city values."
                )
                sys.exit(1)
            region_map = assign_regions(cities, regions, args.primary_region,
                                        args.region_city_pair)
Exemple #8
0
        run_data_loader(conn_string,
                        cities=get_cities(args.city),
                        num_users=args.num_users,
                        num_rides=args.num_rides,
                        num_vehicles=args.num_vehicles,
                        num_histories=args.num_histories,
                        num_promo_codes=args.num_promo_codes,
                        num_threads=args.num_threads,
                        use_multi_region=args.multi_region,
                        skip_reload_tables=args.skip_reload_tables,
                        echo_sql=args.echo_sql)

    elif args.subparser_name == "configure-multi-region":
        if args.preview_queries:
            with MovR(conn_string,
                      multi_region=True,
                      init_tables=False,
                      echo=args.echo_sql) as movr:

                queries = movr.get_multi_region_transformations()
                print(
                    "DDL to convert a single region database to multi-region")

                print("===primary key alters===")
                for query in queries["pk_alters"]:
                    print(query)

                print("===foreign key alters and index drops ===")
                for query in queries["fk_alters"]:
                    print(query)
            sys.exit(0)
        else: