K_train = matrix.BigSymmetricMatrix(args.train_key, bucket="pictureweb") K_test = matrix.BigMatrix(args.test_key, bucket="pictureweb") model = matrix.BigMatrix(args.model_key, bucket="pictureweb", shape=(K_train.shape[0], int(np.max(y_train) + 1)), shard_sizes=(4096, 1000), write_header=True) config = wc.default() config['runtime']['s3_bucket'] = 'pictureweb' config['runtime'][ 's3_key'] = 'pywren.runtime/pywren_runtime-3.6-pictureweb.tar.gz' config['standalone']['sqs_queue_name'] = 'pictureweb' print("please launch some standalone instances for this script....") pwex = pywren.standalone_executor(config=config) print("Evaluating Train") t = time.time() y_train_pred = gemm(pwex, K_train, model, overwrite=False, tasks_per_job=1, gemm_impl=2) e = time.time() print("Train Eval took {0}".format(e - t)) print("Downloading train") y_train_pred_local = y_train_pred.numpy() train_top1 = fv.top_k_accuracy(y_train, y_train_pred_local, k=1) train_top5 = fv.top_k_accuracy(y_train, y_train_pred_local, k=5) print("Train top 5 accuracy {0}, Train top 1 accuracy {1}".format(
def run_experiment(problem_size, shard_size, pipeline, num_priorities, lru, eager, truncate, max_cores, start_cores, trial, launch_granularity, timeout, log_granularity, autoscale_policy, standalone, warmup, verify, matrix_exists, read_limit, write_limit): # set up logging invoke_executor = fs.ThreadPoolExecutor(1) logger = logging.getLogger() region = wc.default()["account"]["aws_region"] print("REGION", region) for key in logging.Logger.manager.loggerDict: logging.getLogger(key).setLevel(logging.CRITICAL) logger.setLevel(logging.DEBUG) arg_bytes = pickle.dumps( (problem_size, shard_size, pipeline, num_priorities, lru, eager, truncate, max_cores, start_cores, trial, launch_granularity, timeout, log_granularity, autoscale_policy, read_limit, write_limit)) arg_hash = hashlib.md5(arg_bytes).hexdigest() log_file = "{0}.log".format(arg_hash) fh = logging.FileHandler(log_file) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch = logging.StreamHandler() ch.setLevel(logging.INFO) ch.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(ch) logger.info("Logging to {0}".format(log_file)) if standalone: extra_env = { "AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"], "AWS_SECRET_ACCESS_KEY": os.environ["AWS_ACCESS_KEY_ID"], "OMP_NUM_THREADS": "1", "AWS_DEFAULT_REGION": region } config = wc.default() config['runtime']['s3_bucket'] = 'numpywrenpublic' key = "pywren.runtime/pywren_runtime-3.6-numpywren-standalone.tar.gz" config['runtime']['s3_key'] = key pwex = pywren.standalone_executor(config=config) else: extra_env = {"AWS_DEFAULT_REGION": region} config = wc.default() config['runtime']['s3_bucket'] = 'numpywrenpublic-us-east-1' key = "pywren.runtime/pywren_runtime-3.6-numpywren-08-25-2018.tar.gz" config['runtime']['s3_key'] = key pwex = pywren.default_executor(config=config) if (not matrix_exists): X = np.random.randn(problem_size, 1) shard_sizes = [shard_size, 1] X_sharded = BigMatrix("cholesky_test_{0}_{1}".format( problem_size, shard_size), shape=X.shape, shard_sizes=shard_sizes, write_header=True, autosqueeze=False, bucket="numpywrentop500test", hash_keys=False) shard_matrix(X_sharded, X) print("Generating PSD matrix...") t = time.time() print(X_sharded.shape) XXT_sharded = binops.gemm(pwex, X_sharded, X_sharded.T, overwrite=False) e = time.time() print("GEMM took {0}".format(e - t)) else: X_sharded = BigMatrix("cholesky_test_{0}_{1}".format( problem_size, shard_size), autosqueeze=False, hash_keys=False, bucket="numpywrentop500test") key_name = binops.generate_key_name_binop(X_sharded, X_sharded.T, "gemm") XXT_sharded = BigMatrix(key_name, hash_keys=False, bucket="numpywrentop500test") XXT_sharded.lambdav = problem_size * 10 if (verify): A = XXT_sharded.numpy() print("Computing local cholesky") L = np.linalg.cholesky(A) t = time.time() instructions, trailing, L_sharded = compiler._chol(XXT_sharded, truncate=truncate) pipeline_width = args.pipeline if (lru): cache_size = 5 else: cache_size = 0 pywren_config = pwex.config config = npw.config.default() program = lp.LambdaPackProgram(instructions, executor=pywren.lambda_executor, pywren_config=pywren_config, num_priorities=num_priorities, eager=eager, config=config, write_limit=write_limit, read_limit=read_limit) warmup_start = time.time() if (warmup): warmup_sleep = 170 def warmup_fn(x): program.incr_up(1) time.sleep(warmup_sleep) program.decr_up(1) print("Warming up...") futures = pwex.map(warmup_fn, range(max_cores)) last_spinup = time.time() while (True): if ((time.time() - last_spinup) > 0.75 * warmup_sleep): print("Calling pwex.map..") futures = pwex.map(warmup_fn, range(max_cores)) last_spinup = time.time() time.sleep(2) if (program.get_up() is None): up_workers = 0 else: up_workers = int(program.get_up()) print("{0} workers alive".format(up_workers)) if (up_workers >= max_cores): time.sleep(warmup_sleep) break warmup_end = time.time() print("Warmup took {0} seconds".format(warmup_end - warmup_start)) e = time.time() print("Program compile took {0} seconds".format(e - t)) print("program.hash", program.hash) REDIS_CLIENT = program.control_plane.client done_counts = [] ready_counts = [] post_op_counts = [] not_ready_counts = [] running_counts = [] sqs_invis_counts = [] sqs_vis_counts = [] up_workers_counts = [] busy_workers_counts = [] read_objects = [] write_objects = [] all_read_timeouts = [] all_write_timeouts = [] all_redis_timeouts = [] times = [time.time()] flops = [0] reads = [0] writes = [0] print("LRU", lru) print("eager", eager) exp = {} exp["redis_done_counts"] = done_counts exp["redis_ready_counts"] = ready_counts exp["redis_post_op_counts"] = post_op_counts exp["redis_not_ready_counts"] = not_ready_counts exp["redis_running_counts"] = running_counts exp["sqs_invis_counts"] = sqs_invis_counts exp["sqs_vis_counts"] = sqs_vis_counts exp["busy_workers"] = busy_workers_counts exp["up_workers"] = up_workers_counts exp["times"] = times exp["lru"] = lru exp["priority"] = num_priorities exp["eager"] = eager exp["truncate"] = truncate exp["max_cores"] = max_cores exp["problem_size"] = problem_size exp["shard_size"] = shard_size exp["pipeline"] = pipeline exp["flops"] = flops exp["reads"] = reads exp["writes"] = writes exp["read_objects"] = read_objects exp["write_objects"] = write_objects exp["read_timeouts"] = all_read_timeouts exp["write_timeouts"] = all_write_timeouts exp["redis_timeouts"] = all_redis_timeouts exp["trial"] = trial exp["launch_granularity"] = launch_granularity exp["log_granularity"] = log_granularity exp["autoscale_policy"] = autoscale_policy exp["standalone"] = standalone exp["program"] = program exp["time_steps"] = 1 exp["failed"] = False program.start() t = time.time() logger.info("Starting with {0} cores".format(start_cores)) invoker = fs.ThreadPoolExecutor(1) all_future_futures = invoker.submit(lambda: pwex.map( lambda x: job_runner.lambdapack_run(program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(start_cores), extra_env=extra_env)) # print(all_future_futures.result()) all_futures = [all_future_futures] # print([f.result() for f in all_futures]) start_time = time.time() last_run_time = start_time print(program.program_status()) print("QUEUE URLS", len(program.queue_urls)) total_lambda_epochs = start_cores try: while (program.program_status() == lp.PS.RUNNING): time.sleep(log_granularity) curr_time = int(time.time() - start_time) p = program.get_progress() if (p is None): print("no progress...") continue else: p = int(p) times.append(int(time.time())) max_pc = p waiting = 0 running = 0 for i, queue_url in enumerate(program.queue_urls): client = boto3.client('sqs') attrs = client.get_queue_attributes( QueueUrl=queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible' ])['Attributes'] waiting += int(attrs["ApproximateNumberOfMessages"]) running += int(attrs["ApproximateNumberOfMessagesNotVisible"]) sqs_invis_counts.append(running) sqs_vis_counts.append(waiting) busy_workers = REDIS_CLIENT.get("{0}_busy".format(program.hash)) if (busy_workers == None): busy_workers = 0 else: busy_workers = int(busy_workers) up_workers = program.get_up() if (up_workers == None): up_workers = 0 else: up_workers = int(up_workers) up_workers_counts.append(up_workers) busy_workers_counts.append(busy_workers) logger.debug("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) if ((curr_time % INFO_FREQ) == 0): logger.info("Waiting: {0}, Currently Processing: {1}".format( waiting, running)) logger.info("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) current_gflops = program.get_flops() if (current_gflops is None): current_gflops = 0 else: current_gflops = int(current_gflops) / 1e9 flops.append(current_gflops) current_gbytes_read = program.get_read() if (current_gbytes_read is None): current_gbytes_read = 0 else: current_gbytes_read = int(current_gbytes_read) / 1e9 reads.append(current_gbytes_read) current_gbytes_write = program.get_write() if (current_gbytes_write is None): current_gbytes_write = 0 else: current_gbytes_write = int(current_gbytes_write) / 1e9 writes.append(current_gbytes_write) gflops_rate = flops[-1] / (times[-1] - times[0]) greads_rate = reads[-1] / (times[-1] - times[0]) gwrites_rate = writes[-1] / (times[-1] - times[0]) b = XXT_sharded.shard_sizes[0] current_objects_read = (current_gbytes_read * 1e9) / (b * b * 8) current_objects_write = (current_gbytes_write * 1e9) / (b * b * 8) read_objects.append(current_objects_read) write_objects.append(current_objects_write) read_rate = read_objects[-1] / (times[-1] - times[0]) write_rate = write_objects[-1] / (times[-1] - times[0]) avg_workers = np.mean(up_workers_counts) smooth_len = 10 if (len(flops) > smooth_len + 5): gflops_rate_5_min_window = (flops[-1] - flops[-smooth_len]) / ( times[-1] - times[-smooth_len]) gread_rate_5_min_window = (reads[-1] - reads[-smooth_len]) / ( times[-1] - times[-smooth_len]) gwrite_rate_5_min_window = ( writes[-1] - writes[-smooth_len]) / (times[-1] - times[-smooth_len]) read_rate_5_min_window = (read_objects[-1] - read_objects[-smooth_len]) / ( times[-1] - times[-smooth_len]) write_rate_5_min_window = (write_objects[-1] - write_objects[-smooth_len]) / ( times[-1] - times[-smooth_len]) workers_5_min_window = np.mean(up_workers_counts[-smooth_len:]) else: gflops_rate_5_min_window = "N/A" gread_rate_5_min_window = "N/A" gwrite_rate_5_min_window = "N/A" workers_5_min_window = "N/A" read_rate_5_min_window = "N/A" write_rate_5_min_window = "N/A" read_timeouts = int(REDIS_CLIENT.get("s3.timeouts.read")) write_timeouts = int(REDIS_CLIENT.get("s3.timeouts.write")) redis_timeouts = int(REDIS_CLIENT.get("redis.timeouts")) all_read_timeouts.append(read_timeouts) all_write_timeouts.append(write_timeouts) all_redis_timeouts.append(redis_timeouts) read_timeouts_fraction = read_timeouts / current_objects_read write_timeouts_fraction = write_timeouts / current_objects_write print("=======================================") print("Max PC is {0}".format(max_pc)) print("Waiting: {0}, Currently Processing: {1}".format( waiting, running)) print("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) print( "{0}: Total GFLOPS {1}, Total GBytes Read {2}, Total GBytes Write {3}" .format(curr_time, current_gflops, current_gbytes_read, current_gbytes_write)) print( "{0}: Average GFLOPS rate {1}, Average GBytes Read rate {2}, Average GBytes Write rate {3}, Average Worker Count {4}" .format(curr_time, gflops_rate, greads_rate, gwrites_rate, avg_workers)) print("{0}: Average read txns/s {1}, Average write txns/s {2}". format(curr_time, read_rate, write_rate)) print( "{0}: smoothed GFLOPS rate {1}, smoothed GBytes Read rate {2}, smoothed GBytes Write rate {3}, smoothed Worker Count {4}" .format(curr_time, gflops_rate_5_min_window, gread_rate_5_min_window, gwrite_rate_5_min_window, workers_5_min_window)) print("{0}: smoothed read txns/s {1}, smoothed write txns/s {2}". format(curr_time, read_rate_5_min_window, write_rate_5_min_window)) print( "{0}: Read timeouts: {1}, Write timeouts: {2}, Redis timeouts: {3} " .format(curr_time, read_timeouts, write_timeouts, redis_timeouts)) print( "{0}: Read timeouts fraction: {1}, Write timeouts fraction: {2}" .format(curr_time, read_timeouts_fraction, write_timeouts_fraction)) print("=======================================") time_since_launch = time.time() - last_run_time if (autoscale_policy == "dynamic"): if (time_since_launch > launch_granularity and up_workers < np.ceil(waiting * 0.5 / pipeline_width) and up_workers < max_cores): cores_to_launch = int( min( np.ceil(waiting / pipeline_width) - up_workers, max_cores - up_workers)) logger.info( "launching {0} new tasks....".format(cores_to_launch)) new_future_futures = invoker.submit( lambda: pwex.map(lambda x: job_runner.lambdapack_run( program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(cores_to_launch), extra_env=extra_env)) last_run_time = time.time() # check if we OOM-erred # [x.result() for x in all_futures] all_futures.extend(new_future_futures) elif (autoscale_policy == "constant_timeout"): if (time_since_launch > (0.85 * timeout)): cores_to_launch = max_cores logger.info( "launching {0} new tasks....".format(cores_to_launch)) new_future_futures = invoker.submit( lambda: pwex.map(lambda x: job_runner.lambdapack_run( program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(cores_to_launch), extra_env=extra_env)) last_run_time = time.time() # check if we OOM-erred # [x.result() for x in all_futures] all_futures.append(new_future_futures) else: raise Exception("unknown autoscale policy") exp["time_steps"] += 1 if (verify): L_sharded_local = L_sharded.numpy() print("max diff", np.max(np.abs(L_sharded_local - L))) except KeyboardInterrupt: exp["failed"] = True program.stop() pass except Exception as e: traceback.print_exc() exp["failed"] = True program.stop() raise pass print(program.program_status()) exp["all_futures"] = all_futures exp_bytes = dill.dumps(exp) client = boto3.client('s3') client.put_object(Key="lambdapack/{0}/runtime.pickle".format(program.hash), Body=exp_bytes, Bucket=program.bucket) print("=======================") print("=======================") print("Execution Summary:") print("Executed Program ID: {0}".format(program.hash)) print("Program Success: {0}".format((not exp["failed"]))) print("Problem Size: {0}".format(exp["problem_size"])) print("Shard Size: {0}".format(exp["shard_size"])) print("Total Execution time: {0}".format(times[-1] - times[0])) print("Average Flop Rate (GFlop/s): {0}".format(exp["flops"][-1] / (times[-1] - times[0]))) with open("/tmp/last_run", "w+") as f: f.write(program.hash)
def toyjoin(key, share): def run_command(key): pywren.wrenlogging.default_config('INFO') logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) logger.info("before everything") partition_num = key['partition_num'] rounds = key['rounds'] em = JiffyClient(host=key['em']) reduceId = key['taskId'] appName = key['appName'] alg_type = key['type'] data_ques1 = open_or_create_jiffy_queues(em, appName, partition_num, 1, 'receiver') logger.info("queue opened") names = key['names'] dtypes = key['dtypes'] ############# left table left_table = 100000 indices = tm.makeStringIndex(left_table).values key = np.tile(indices[:left_table], 1) left = DataFrame({"key": key, "value": np.random.randn(left_table)}) t_start = time.time() ############### initialize join functions # print(left) lim = 0 ############## keeps fetching fin_num = 0 if alg_type == 'pipelined': leftsorter = None leftcount = None orizer = None intrizer = None count = 0 while fin_num < partition_num and lim < 15: #### read table lim += 1 time.sleep(0.01) logger.info("before get") obj = data_ques1[0].get() if sys.getsizeof(obj) > 1000: part_data = pd.read_table(BytesIO(obj), header=None, delimiter="|", names=['key', 'value2']) # ds, fin_num = read_jiffy_splits(names, dtypes, reduceId, data_ques1, fin_num, batch_size = 1, fin_size = partition_num) logger.info(ds) logger.info(fin_num) # print(fin_num) if len(ds) > 0: ### join # start = timeit.default_timer() result, orizer, intrizer, leftsorter, leftcount = pipeline_merge( left, ds, factorizer=orizer, intfactorizer=intrizer, leftsorter=leftsorter, leftcount=leftcount, slices=8, how="pipeline") time.sleep(0.8) logger.info("merged") # end = timeit.default_timer() # count += (end - start) # logger.info(str(i) + " chunks take time " + str(end - start) + " Accum time: " + str(count)) elif alg_type == 'origin': ds = pd.DataFrame() while fin_num < partition_num and lim < 1500: lim += 1 #### read table dd, fin_num = read_jiffy_splits(names, dtypes, reduceId, data_ques1, fin_num, batch_size=1, fin_size=partition_num) if len(dd) > 0: ds = ds.append(dd) print("this is ds:") print(ds) result = merge(left, ds, how="inner") print(fin_num) t_fin = time.time() # share.append([t_start,t_fin, fin_num]) return ([t_fin, t_start]) # wrenexec = pywren.default_executor() wrenexec = pywren.standalone_executor() keylist = [] keylist.append(key) print(keylist) futures = wrenexec.map(run_command, keylist) # for key in keylist: # run_command(key) pywren.wait(futures) results = [f.result() for f in futures] share.append(results)
def get_near_duplicates_chunks(cd_ref_pairs, imgnt, cds, top_k, dssim_window_size, use_pywren, return_ndc_results, distance_metrics=['l2', 'fc7', 'dssim'], cache=True, cache_root="ndc_cache"): pywren_config = wc.default() pywren_config["runtime"]["s3_bucket"] = "imagenet2pywren" pywren_config["runtime"][ "s3_key"] = "pywren.runtime/pywren_runtime-3.6-imagenet2pywren.meta.json" def get_results(cd_ref_pair): candidates, references = cd_ref_pair result = compute_nearest_neighbors(distance_metrics, candidates, references, top_k, dssim_window_size, cache, cache_root) if return_ndc_results: return result else: return None, result[1] if use_pywren: pwex = pywren.standalone_executor(config=pywren_config) print("pywren config", pwex.config) print('Number of pywren calls', len(cd_ref_pairs)) extra_env = { "AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"], "AWS_SECRET_ACCESS_KEY": os.environ["AWS_SECRET_ACCESS_KEY"], "OMP_NUM_THREADS": "1", "AWS_DEFAULT_REGION": os.environ["AWS_DEFAULT_REGION"] } futures = pwex.map(get_results, cd_ref_pairs, exclude_modules=["site-packages"], extra_env=extra_env) all_results = [] print('Waiting for futures') results, call_id_to_failed_future = wait_for_futures( futures, print_frequency=100, raise_exception=False) print('Got {} results'.format(len(results))) print('{} futures failed'.format(len(call_id_to_failed_future))) all_results.extend(results) failed_cd_ref_pairs = [] for call_id, failed_future in call_id_to_failed_future.items(): failed_cd_ref_pairs.append(cd_ref_pairs[int(call_id)]) if len(failed_cd_ref_pairs) > 0: # Retry failed futures print('Retrying {} failed futures'.format( len(failed_cd_ref_pairs))) futures = pwex.map(get_results, failed_cd_ref_pairs, exclude_modules=["site-packages"], extra_env=extra_env) results, call_id_to_failed_future = wait_for_futures( futures, print_frequency=1, raise_exception=True) all_results.extend(results) #assert len(all_results) == len(cd_ref_pairs) print('Retrieved {} results'.format(len(all_results))) else: all_results = [] for cd_ref_pair in cd_ref_pairs: all_results.append(get_results(cd_ref_pair)) assert len(all_results) == len(cd_ref_pairs) timing_results = [timing_info for _, timing_info in all_results] start = timer() print('Beginning merge') merged_results = {} for d in distance_metrics: merged_results[d] = {} if return_ndc_results: dist_results = [dist_res for dist_res, _ in all_results] separated_results = {} for d in distance_metrics: separated_results[d] = [] for r in dist_results: for d, distance_dict in r.items(): separated_results[d].append(distance_dict) for d in distance_metrics: merged_results[d] = merge_result(separated_results[d], top_k) end = timer() print('Merge took {} seconds'.format(end - start)) return merged_results, timing_results
def run_experiment(problem_size, shard_size, pipeline, priority, lru, eager, truncate, max_cores, start_cores, trial, launch_granularity, timeout, log_granularity, autoscale_policy, standalone): # set up logging logger = logging.getLogger() for key in logging.Logger.manager.loggerDict: logging.getLogger(key).setLevel(logging.CRITICAL) logger.setLevel(logging.DEBUG) arg_bytes = pickle.dumps( (problem_size, shard_size, pipeline, priority, lru, eager, truncate, max_cores, start_cores, trial, launch_granularity, timeout, log_granularity, autoscale_policy)) arg_hash = hashlib.md5(arg_bytes).hexdigest() log_file = "optimization_experiments/{0}.log".format(arg_hash) fh = logging.FileHandler(log_file) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch = logging.StreamHandler() ch.setLevel(logging.INFO) ch.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(ch) logger.info("Logging to {0}".format(log_file)) X = np.random.randn(problem_size, 1) if standalone: redis_env = { "REDIS_ADDR": os.environ.get("REDIS_ADDR", ""), "REDIS_PASS": os.environ.get("REDIS_PASS", ""), "AWS_ACCESS_KEY_ID": "AKIAIV3ENRQOI3FET2YA", "AWS_SECRET_ACCESS_KEY": "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", "OMP_NUM_THREADS": "1" } config = wc.default() config['runtime']['s3_bucket'] = 'pictureweb' config['runtime'][ 's3_key'] = 'pywren.runtime/pywren_runtime-3.6-numpywren_avx512.tar.gz' pwex = pywren.standalone_executor(config=config) else: redis_env = { "REDIS_ADDR": os.environ.get("REDIS_ADDR", ""), "REDIS_PASS": os.environ.get("REDIS_PASS", "") } config = wc.default() config['runtime']['s3_bucket'] = 'pictureweb' config['runtime'][ 's3_key'] = 'pywren.runtime/pywren_runtime-3.6-numpywren.tar.gz' pwex = pywren.default_executor(config=config) shard_sizes = [shard_size, 1] X_sharded = BigMatrix("cholesky_test_{0}_{1}".format( problem_size, shard_size), shape=X.shape, shard_sizes=shard_sizes, write_header=True) shard_matrix(X_sharded, X) print("Generating PSD matrix...") t = time.time() XXT_sharded = binops.gemm(pwex, X_sharded, X_sharded.T, overwrite=False) e = time.time() print("GEMM took {0}".format(e - t)) XXT_sharded.lambdav = problem_size * 10 instructions, L_sharded, trailing = lp._chol(XXT_sharded) pipeline_width = args.pipeline if (priority): num_priorities = 5 else: num_priorities = 1 if (lru): cache_size = 5 else: cache_size = 0 REDIS_CLIENT = redis.StrictRedis(REDIS_ADDR, port=REDIS_PORT, password=REDIS_PASS, db=0, socket_timeout=5) if (truncate is not None): instructions = instructions[:truncate] config = pwex.config program = lp.LambdaPackProgram(instructions, executor=pywren.lambda_executor, pywren_config=config, num_priorities=num_priorities, eager=eager) done_counts = [] ready_counts = [] post_op_counts = [] not_ready_counts = [] running_counts = [] sqs_invis_counts = [] sqs_vis_counts = [] up_workers_counts = [] busy_workers_counts = [] times = [] flops = [] reads = [] writes = [] print("LRU", lru) print("eager", eager) exp = {} exp["redis_done_counts"] = done_counts exp["redis_ready_counts"] = ready_counts exp["redis_post_op_counts"] = post_op_counts exp["redis_not_ready_counts"] = not_ready_counts exp["redis_running_counts"] = running_counts exp["sqs_invis_counts"] = sqs_invis_counts exp["sqs_vis_counts"] = sqs_vis_counts exp["busy_workers"] = busy_workers_counts exp["up_workers"] = up_workers_counts exp["times"] = times exp["lru"] = lru exp["priority"] = priority exp["eager"] = eager exp["truncate"] = truncate exp["max_cores"] = max_cores exp["problem_size"] = problem_size exp["shard_size"] = shard_size exp["pipeline"] = pipeline exp["flops"] = flops exp["reads"] = reads exp["writes"] = writes exp["trial"] = trial exp["launch_granularity"] = launch_granularity exp["log_granularity"] = log_granularity exp["autoscale_policy"] = autoscale_policy exp["standalone"] = standalone logger.info("Longest Path: {0}".format(program.longest_path)) program.start() t = time.time() logger.info("Starting with {0} cores".format(start_cores)) all_futures = pwex.map( lambda x: job_runner.lambdapack_run(program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(start_cores), extra_env=redis_env) # print([f.result() for f in all_futures]) start_time = time.time() last_run_time = start_time while (program.program_status() == lp.PS.RUNNING): curr_time = int(time.time() - start_time) max_pc = program.get_max_pc() times.append(int(time.time())) time.sleep(log_granularity) waiting = 0 running = 0 for i, queue_url in enumerate(program.queue_urls): client = boto3.client('sqs') attrs = client.get_queue_attributes( QueueUrl=queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible' ])['Attributes'] waiting += int(attrs["ApproximateNumberOfMessages"]) running += int(attrs["ApproximateNumberOfMessagesNotVisible"]) sqs_invis_counts.append(running) sqs_vis_counts.append(waiting) busy_workers = REDIS_CLIENT.get("{0}_busy".format(program.hash)) if (busy_workers == None): busy_workers = 0 else: busy_workers = int(busy_workers) up_workers = program.get_up() if (up_workers == None): up_workers = 0 else: up_workers = int(up_workers) up_workers_counts.append(up_workers) busy_workers_counts.append(busy_workers) logger.debug("Waiting: {0}, Currently Processing: {1}".format( waiting, running)) logger.debug("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) if ((curr_time % INFO_FREQ) == 0): logger.info("Max PC is {0}".format(max_pc)) logger.info("Waiting: {0}, Currently Processing: {1}".format( waiting, running)) logger.info("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) #print("{5}: Not Ready: {0}, Ready: {1}, Running: {4}, Post OP: {2}, Done: {3}".format(not_ready_count, ready_count, post_op_count, done_count, running_count, curr_time)) current_gflops = program.get_flops() if (current_gflops is None): current_gflops = 0 else: current_gflops = int(current_gflops) / 1e9 flops.append(current_gflops) current_gbytes_read = program.get_read() if (current_gbytes_read is None): current_gbytes_read = 0 else: current_gbytes_read = int(current_gbytes_read) / 1e9 reads.append(current_gbytes_read) current_gbytes_write = program.get_write() if (current_gbytes_write is None): current_gbytes_write = 0 else: current_gbytes_write = int(current_gbytes_write) / 1e9 writes.append(current_gbytes_write) #print("{0}: Total GFLOPS {1}, Total GBytes Read {2}, Total GBytes Write {3}".format(curr_time, current_gflops, current_gbytes_read, current_gbytes_write)) time_since_launch = time.time() - last_run_time if (autoscale_policy == "dynamic"): if (time_since_launch > launch_granularity and up_workers < np.ceil(waiting * 0.5 / pipeline_width) and up_workers < max_cores): cores_to_launch = int( min( np.ceil(waiting / pipeline_width) - up_workers, max_cores - up_workers)) logger.info( "launching {0} new tasks....".format(cores_to_launch)) new_futures = pwex.map(lambda x: job_runner.lambdapack_run( program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(cores_to_launch), extra_env=redis_env) last_run_time = time.time() # check if we OOM-erred # [x.result() for x in all_futures] all_futures.extend(new_futures) elif (autoscale_policy == "constant_timeout"): if (time_since_launch > (0.99 * timeout)): cores_to_launch = max_cores logger.info( "launching {0} new tasks....".format(cores_to_launch)) new_futures = pwex.map(lambda x: job_runner.lambdapack_run( program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(cores_to_launch), extra_env=redis_env) last_run_time = time.time() # check if we OOM-erred # [x.result() for x in all_futures] all_futures.extend(new_futures) else: raise Exception("unknown autoscale policy") exp["all_futures"] = all_futures doubles = 0 for pc in range(program.num_inst_blocks): run_count = REDIS_CLIENT.get("{0}_{1}_start".format(program.hash, pc)) if (run_count is None): run_count = 0 else: run_count = int(run_count) if (run_count != 1): logger.warn("PC: {0}, Run Count: {1}".format(pc, run_count)) doubles += 1 print("Number of repeats: {0}".format(doubles)) e = time.time() time.sleep(10) logger.info(program.program_status()) logger.info("PROGRAM STATUS " + str(program.program_status())) logger.info("PROGRAM HASH " + str(program.hash)) logger.info("Took {0} seconds".format(e - t)) # collect in executor = fs.ThreadPoolExecutor(72) futures = [] for i in range(0, program.num_inst_blocks, 1): futures.append(executor.submit(program.get_profiling_info, i)) res = fs.wait(futures) profiled_blocks = [f.result() for f in futures] serializer = serialize.SerializeIndependent() byte_string = serializer([profiled_blocks])[0][0] exp["profiled_block_pickle_bytes"] = byte_string read, write, total_flops, bins, instructions, runtimes = lp.perf_profile( profiled_blocks, num_bins=100) flop_rate = sum(total_flops) / max(bins) exp["flop_rate"] = flop_rate print("Average Flop rate of {0}".format(flop_rate)) # save other stuff try: os.mkdir("optimization_experiments/") except FileExistsError: pass exp_bytes = pickle.dumps(exp) dump_path = "optimization_experiments/{0}.pickle".format(arg_hash) print("Dumping experiment pickle to {0}".format(dump_path)) with open(dump_path, "wb+") as f: f.write(exp_bytes)