def run_data_loader(conn_string, multi_region, cities, num_users, num_rides, num_vehicles, num_histories, num_promo_codes, num_threads, skip_reload_tables, echo_sql): if num_users <= 0 or num_rides <= 0 or num_vehicles <= 0: raise ValueError("The number of objects to generate must be > 0.") start_time = time.time() logging.info("Loading MovR") with MovR(conn_string, multi_region=multi_region, reset_tables=(not skip_reload_tables), echo=echo_sql) as movr: logging.info("Loading cities %s.", cities) logging.info( "Loading movr data with ~%d users, ~%d vehicles, ~%d rides, ~%d histories, and ~%d promo codes.", num_users, num_vehicles, num_rides, num_histories, num_promo_codes) # don't create more than 1 thread per city usable_threads = min(num_threads, len(cities)) if usable_threads < num_threads: logging.info( "Only using %d of %d requested threads, since we only create at most one thread per city.", usable_threads, num_threads) num_users_per_city = int(math.ceil(float(num_users) / len(cities))) num_rides_per_city = int(math.ceil(float(num_rides) / len(cities))) num_vehicles_per_city = int(math.ceil(float(num_vehicles) / len(cities))) num_histories_per_city = int(math.ceil(float(num_histories) / len(cities))) cities_per_thread = int(math.ceil(float(len(cities)) / usable_threads)) num_promo_codes_per_thread = int( math.ceil( float(num_promo_codes) / (float(len(cities)) / cities_per_thread))) RUNNING_THREADS = [] original_city_count = len(cities) for i in range(usable_threads): if len(cities) > 0: t = threading.Thread( target=load_movr_data, args=(conn_string, num_users_per_city, num_vehicles_per_city, num_rides_per_city, num_histories_per_city, num_promo_codes_per_thread, cities[:cities_per_thread], echo_sql)) cities = cities[cities_per_thread:] t.start() RUNNING_THREADS.append(t) while threading.active_count( ) > 1: # keep main thread alive so we can catch ctrl + c time.sleep(0.1) duration = time.time() - start_time logging.info("Populated %s cities in %f seconds.", original_city_count, duration)
def load_movr_data(conn_string, num_users, num_vehicles, num_rides, num_histories, num_promo_codes_per_thread, cities, echo_sql=False): if num_users <= 0 or num_rides <= 0 or num_vehicles <= 0: raise ValueError("The number of objects to generate must be > 0.") start_time = time.time() with MovR(conn_string, echo=echo_sql) as movr: engine = create_engine( conn_string, echo=echo_sql) for city in cities: if TERMINATE_GRACEFULLY: logging.debug("Terminating...") break logging.info("Generating user data for %s...", city) add_users(engine, num_users, city) logging.info("Generating vehicle data for %s...", city) add_vehicles(engine, num_vehicles, city) logging.info("Generating ride data for %s...", city) add_rides(engine, num_rides, city) logging.info("Generating location history data for %s...", city) add_vehicle_location_histories(engine, num_histories, city) logging.info("Populated %s in %f seconds.", city, time.time() - start_time) logging.info("Generating %s promo codes...", num_promo_codes_per_thread) add_promo_codes(engine, num_promo_codes_per_thread) return
def configure_multi_region(conn_string, primary_region, city_list, region_city_pair, echo_sql, preview): start_time = time.time() with MovR(conn_string, primary_region=primary_region, multi_region=True, echo=echo_sql) as movr: regions = movr.get_regions() cities = movr.get_cities(city_list) if regions is None: logging.error("To configure your database for multi-region features, you must specify cluster regions at startup.") sys.exit(1) if cities is None: logging.error("To configure your database for multi-region features, the database must have rows of data with city values.") sys.exit(1) region_map = assign_regions(cities, regions, movr.primary_region, region_city_pair) if preview: queries = movr.get_multi_region_transformations(region_map) for query in queries: print(query) sys.exit(0) else: movr.run_multi_region_transformations(region_map) duration = time.time() - start_time logging.info("Configured multi-region schema in {0} seconds.".format(duration))
def run_load_generator(conn_string, read_percentage, connection_duration_in_seconds, city_list, use_multi_region, follower_reads, echo_sql, num_threads): if read_percentage < 0 or read_percentage > 1: raise ValueError("read percentage must be between 0 and 1") logging.info("simulating movr load for cities %s", city_list) movr_objects = {"local": {}, "global": {}} logging.info("warming up....") with MovR(conn_string, multi_region=use_multi_region, echo=echo_sql) as movr: active_rides = [] for city in city_list: movr_objects["local"][city] = { "users": movr.get_users(city, follower_reads), "vehicles": movr.get_vehicles(city, follower_reads) } if len(list(movr_objects["local"][city]["vehicles"])) == 0 or len( list(movr_objects["local"][city]["users"])) == 0: logging.error( "must have users and vehicles for city '%s' in the movr database to generate load. try running with the 'load' command.", city) sys.exit(1) active_rides.extend(movr.get_active_rides(city, follower_reads)) movr_objects["global"]["promo_codes"] = movr.get_promo_codes() RUNNING_THREADS = [] logging.info("running single region queries..." ) if not use_multi_region else logging.info( "running multi-region queries...") for i in range(num_threads): t = threading.Thread(target=simulate_movr_load, args=(conn_string, use_multi_region, city_list, movr_objects, active_rides, read_percentage, follower_reads, connection_duration_in_seconds, echo_sql)) t.start() RUNNING_THREADS.append(t) while True: #keep main thread alive to catch exit signals time.sleep(15) stats.print_stats(action_list=[ ACTION_ADD_VEHICLE, ACTION_GET_VEHICLES, ACTION_UPDATE_RIDE_LOC, ACTION_NEW_CODE, ACTION_APPLY_CODE, ACTION_NEW_USER, ACTION_START_RIDE, ACTION_END_RIDE ]) stats.new_window()
def simulate_movr_load(conn_string, cities, movr_objects, active_rides, read_percentage, follower_reads, connection_duration_in_seconds, echo_sql=False): datagen = Faker() while True: logging.debug( "creating a new connection to %s, which will reset in %d seconds", conn_string, connection_duration_in_seconds) try: with MovR(conn_string, echo=echo_sql) as movr: timeout = time.time( ) + connection_duration_in_seconds #refresh connections so load can balance among cluster nodes even if the cluster size changes while True: if TERMINATE_GRACEFULLY: logging.debug("Terminating thread.") return if time.time() > timeout: break active_city = random.choice(cities) if random.random() < read_percentage: # simulate user loading screen start = time.time() movr.get_vehicles(active_city, follower_reads, 25) stats.add_latency_measurement("get vehicles", time.time() - start) else: # every write tick, simulate the various vehicles updating their locations if they are being used for rides for ride in active_rides[0:10]: latlong = MovRGenerator.generate_random_latlong() start = time.time() movr.update_ride_location(ride['city'], ride_id=ride['id'], lat=latlong['lat'], long=latlong['long']) stats.add_latency_measurement( ACTION_UPDATE_RIDE_LOC, time.time() - start) #do write operations randomly if random.random() < .03: # simulate a movr marketer creating a new promo code start = time.time() promo_code = movr.create_promo_code( code="_".join(datagen.words(nb=3)) + "_" + str(time.time()), description=datagen.paragraph(), expiration_time=datetime.datetime.now() + datetime.timedelta(days=random.randint(0, 30)), rules={ "type": "percent_discount", "value": "10%" }) stats.add_latency_measurement( ACTION_NEW_CODE, time.time() - start) movr_objects["global"].get("promo_codes", []).append(promo_code) elif random.random() < .1: # simulate a user applying a promo code to her account start = time.time() movr.apply_promo_code( active_city, random.choice(movr_objects["local"] [active_city]["users"])['id'], random.choice( movr_objects["global"]["promo_codes"])) stats.add_latency_measurement( ACTION_APPLY_CODE, time.time() - start) elif random.random() < .3: # simulate new signup start = time.time() new_user = movr.add_user( active_city, datagen.name(), datagen.address(), datagen.credit_card_number()) stats.add_latency_measurement( ACTION_NEW_USER, time.time() - start) movr_objects["local"][active_city]["users"].append( new_user) elif random.random() < .1: # simulate a user adding a new vehicle to the population start = time.time() new_vehicle = movr.add_vehicle( active_city, owner_id=random.choice( movr_objects["local"][active_city] ["users"])['id'], type=MovRGenerator.generate_random_vehicle(), vehicle_metadata=MovRGenerator. generate_vehicle_metadata(type), status=MovRGenerator.get_vehicle_availability( ), current_location=datagen.address()) stats.add_latency_measurement( ACTION_ADD_VEHICLE, time.time() - start) movr_objects["local"][active_city][ "vehicles"].append(new_vehicle) elif random.random() < .5: # simulate a user starting a ride start = time.time() ride = movr.start_ride( active_city, random.choice(movr_objects["local"] [active_city]["users"])['id'], random.choice(movr_objects["local"] [active_city]["vehicles"])['id']) stats.add_latency_measurement( ACTION_START_RIDE, time.time() - start) active_rides.append(ride) else: if len(active_rides): #simulate a ride ending ride = active_rides.pop() start = time.time() movr.end_ride(ride['city'], ride['id']) stats.add_latency_measurement( ACTION_END_RIDE, time.time() - start) except DBAPIError: logging.error("lost connection to the db. sleeping for 10 seconds") time.sleep(10)
rows.append([partition, partition_zone_map[partition]]) print( tabulate( rows, ["partition", "zone where partitioned data will be moved"]), "\n") rows = [] for partition in partition_zone_map: rows.append(["promo_codes", partition_zone_map[partition]]) print( tabulate(rows, [ "reference table", "zones where index data will be replicated" ]), "\n") with MovR(conn_string, init_tables=False, echo=args.echo_sql) as movr: if args.preview_queries: queries = movr.get_geo_partitioning_queries( partition_city_map, partition_zone_map) print("queries to geo-partition the database") rows = [] print("===table and index partitions===") for query in queries["table_partitions"]: print(query) for query in queries["index_partitions"]: print(query) print("===table and index zones===")
run_data_loader(conn_string, multi_region=args.multi_region, cities=get_city_list(args.city), num_users=args.num_users, num_rides=args.num_rides, num_vehicles=args.num_vehicles, num_histories=args.num_histories, num_promo_codes=args.num_promo_codes, num_threads=args.num_threads, skip_reload_tables=args.skip_reload_tables, echo_sql=args.echo_sql) elif args.subparser_name == "configure-multi-region": with MovR(conn_string, primary_region=args.primary_region, echo=args.echo_sql) as movr: regions = movr.get_regions() cities = movr.get_cities() if regions is None: logging.error( "To configure your database for multi-region features, you must specify cluster regions at startup." ) sys.exit(1) if cities is None: logging.error( "To configure your database for multi-region features, the database must have rows of data with city values." ) sys.exit(1) region_map = assign_regions(cities, regions, args.primary_region, args.region_city_pair)
run_data_loader(conn_string, cities=get_cities(args.city), num_users=args.num_users, num_rides=args.num_rides, num_vehicles=args.num_vehicles, num_histories=args.num_histories, num_promo_codes=args.num_promo_codes, num_threads=args.num_threads, use_multi_region=args.multi_region, skip_reload_tables=args.skip_reload_tables, echo_sql=args.echo_sql) elif args.subparser_name == "configure-multi-region": if args.preview_queries: with MovR(conn_string, multi_region=True, init_tables=False, echo=args.echo_sql) as movr: queries = movr.get_multi_region_transformations() print( "DDL to convert a single region database to multi-region") print("===primary key alters===") for query in queries["pk_alters"]: print(query) print("===foreign key alters and index drops ===") for query in queries["fk_alters"]: print(query) sys.exit(0) else: