def logger(pipe, log_file, node_list, manager, startTime, interval, nodes, services, close_pipe): f = open("{0}_results.csv".format(log_file), 'w', newline='') f2 = open("{0}_observations.csv".format(log_file), 'w', newline='') close_flag = False logWriter = csv.writer(f, dialect='excel') logWriter2 = csv.writer(f2, dialect='excel') logWriter.writerow(["SQL CPU", "Web Worker CPU", "SQL Memory", "Web Worker Memory", "# SQL Containers", "# Web Worker Containers", "Delta Requests", "# Requests", "Iteration", "Minutes", "Seconds"]) logWriter2.writerow(["SQL CPU", "Web Worker CPU", "SQL Memory", "Web Worker Memory", "Minutes", "Seconds"]) #services = {} #nodes = {} #getNodeIDs(node_list, nodes) #getServices(services, manager) sql_cpu_avg = 0 web_worker_cpu_avg = 0 sql_mem_avg = 0 web_worker_mem_avg = 0 sql_cpu_usages = [] sql_mem_usages = [] web_worker_cpu_usages = [] web_worker_mem_usages = [] for service_name, service in services.items(): get_tasks(service, manager) sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg = get_stats(services, sql_cpu_usages, sql_mem_usages, web_worker_cpu_usages, web_worker_mem_usages, nodes) diff_time = time.time() - startTime logWriter2.writerow([sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg, diff_time//60, diff_time%60]) while not close_flag: while not close_pipe.poll(): time.sleep(interval) sql_cpu_usages = [] sql_mem_usages = [] web_worker_cpu_usages = [] web_worker_mem_usages = [] if pipe.poll(): pipe_tuple = pipe.recv() if pipe_tuple == "close": print("Logger shutting down") close_flag = True f.close() f2.close() else: if pipe_tuple[11] == True: #time.sleep(interval) for service_name, service in services.items(): get_tasks(service, manager) logWriter.writerow(pipe_tuple[:11]) sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg = get_stats(services, sql_cpu_usages, sql_mem_usages, web_worker_cpu_usages, web_worker_mem_usages, nodes) diff_time = time.time() - startTime logWriter2.writerow([sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg, diff_time//60, diff_time%60]) while not close_flag: if pipe.poll(): pipe_tuple = pipe.recv() if pipe_tuple == "close": print("Logger shutting down") close_flag = True f.close() f2.close() else: logWriter.writerow(pipe_tuple[:11])
def compute_features(data, time_interval): df = pd.DataFrame(data, columns=['timestamp', 'x', 'y', 'z']) df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') x = np.array(df['x']) y = np.array(df['y']) z = np.array(df['z']) timestamp = pd.Series(df['timestamp']) # Perform flipping x and y axes to ensure standard orientation # For correct orientation, x-angle should be mostly negative # So, if median x-angle is positive, flip both x and y axes # Ref: https://github.com/wadpac/hsmm4acc/blob/524743744068e83f468a4e217dde745048a625fd/UKMovementSensing/prepacc.py angx = np.arctan2(x, np.sqrt(y * y + z * z)) * 180.0 / math.pi if np.median(angx) > 0: x *= -1 y *= -1 ENMO = get_ENMO(x, y, z) angle_x, angle_y, angle_z = get_tilt_angles(x, y, z) LIDS = get_LIDS(timestamp, ENMO) _, ENMO_stats = get_stats(df['timestamp'], ENMO, time_interval) _, angle_z_stats = get_stats(df['timestamp'], angle_z, time_interval) timestamp_agg, LIDS_stats = get_stats(df['timestamp'], LIDS, time_interval) feat = np.hstack((ENMO_stats, angle_z_stats, LIDS_stats)) return feat
def test_get_stats(self): self.assertIsInstance(get_stats({'a': [10, 2, 0, 0]}), list, 'result is not list') self.assertEqual( get_stats({'a': [10, 2, 0, 0]})[0], 0.2, 'wrong compute') self.assertEqual( get_stats({'a': [20, 5, 0, 0]})[0], 0.25, 'wrong compute') self.assertEqual(get_stats({'a': [0, 0, 0, 0]})[0], 0, 'wrong compute')
def stats(self): """ Query play stats for a given user """ parser = argparse.ArgumentParser(description=get_stats.__doc__, prog=f'{self.parser.prog} stats') parser.add_argument('username', help="Username of the user to query") args = vars(parser.parse_args(argv[2:])) args['headers'] = self.bot.headers get_stats(**args)
def main(args): if args.mode == Mode.STATS: return stats_mode(args) if args.mode == Mode.CHECK: return check_mode(args) if args.mode == Mode.SCATTER: return scatter_mode(args) if args.mode == Mode.FULL: for f in args.files: lines = utils.get_log_lines([f]) utils.get_stats(lines) return
def augment_examples(X, y, s=1, weights_on_scarce=.75, dataset_name="", random_state=42, show_stats=True): ''' generate randomly transformed example images :param X: list of 3D image dataset, shape = (r, c, channel) :param y: labels :param s: integer scale factor :parma weights_on_scarse: correction factor on scarce label classes :param dataset_name: name of dataset being boosted :param random_state :return: len(X) * s number of boosted examples ''' n_channel = X[0].shape[-1] n_train = len(y) train_freq = plots.get_label_dist(y) train_freq_normalized = minmax_normalization(train_freq, eps=1e-8) n_transform_list = np.floor( (1 - weights_on_scarce * train_freq_normalized) * s) X_augmented = [] y_augmented = [] for i, image in enumerate(X): sys.stdout.write('\r>> Augmenting image %s (%.1f%%)' % (str(i), float(i + 1) / float(n_train) * 100.0)) sys.stdout.flush() n_transform = int(n_transform_list[0][y[i]]) for j in range(n_transform): image = random_transform(image) image = minmax_normalization(image) X_augmented.append(image) y_augmented.append(y[i]) X_augmented = np.array(X_augmented).reshape(len(X_augmented), 32, 32, n_channel) X_augmented, y_augmented = shuffle(X_augmented, y_augmented, random_state=random_state) if show_stats: utils.get_stats(X_augmented, y_augmented, dataset_name) return X_augmented, y_augmented
def test_stats(self): _id = utils.insert_url(self.data["url"], self.data["code"]) for i in range(1, 5): utils.bump_stats(_id) created_at, last_usage, usage_count = utils.get_stats( self.data["code"]) self.assertEqual(usage_count, i)
def main(method, model_name, model_version, save_bbox_labels, frames_train_size, step, ae_model_path): useKfold = False if step > 0: # used for k-fold testing accuracy calculation useKfold = True games_stats = [] for k in range(len(utils.test_games)): games_stats.append([]) acc = [] print("Running with method " + method) #Evaluate method for all test games for j, game in enumerate(utils.test_games): model_path = utils.trained_models_dir + model_name + model_version + '.pth' if (method == 'net'): model = utils.load_model_embed(model_path) elif (method == 'ae'): model = utils.load_model_embed(ae_model_path, isAE=True) else: model = [] offset = 0 while offset <= 512 - frames_train_size: print(" running for game " + game + " starting from frame " + str(offset + 1)) names, labels, gt_clusters = run_for_game(game, model, offset) result_clusters = [[] for y in range(2)] for m in range(len(labels)): result_clusters[labels[m]].append(names[m]) two_way_acc = utils.get_stats(gt_clusters, result_clusters, 2) if useKfold: games_stats[j].append(two_way_acc) else: acc.append(two_way_acc) if save_bbox_labels: labels_file_path = 'results/' + game + '_labels_' + method + '.txt' f_results = open(labels_file_path, 'w') save_labels(f_results, names, labels) write_bbox_labels(game, names, labels, method) f_results.close() ex.add_artifact(labels_file_path) if step == 0: offset = 512 else: offset = offset + step if (useKfold): acc = np.mean(games_stats, axis=1) print("mean acc: " + str(np.mean(acc))) print("mean error: " + str(1 - np.mean(acc))) print("std acc: " + str(np.std(acc))) print("standard error: " + str(np.std(acc) / math.sqrt(len(utils.test_games))))
def run_experiments(args): res = [] for i in range(args.num_trials): print("Trial {}/{}".format(i + 1, args.num_trials)) acc, _ = main(args) res.append(acc) mean, err_bd = get_stats(res, conf_interval=True) return mean, err_bd
def print_results(): name = { 0: 'No method', 1: 'Mean', 2: 'Interpolation', 3: 'Hotdeck', 4: 'Regression' }[choice] print('- - - - - - - - Before imputation - - - - - - - -\n' + 'Missing values:\t' + '{:.2%}'.format(na_fraction(before)) + '\n' + get_stats(before).to_string() + '\n') print('- - - - - - - - After imputation of ' + name + ' - - - - -\n' + 'Missing values:\t' + '{:.2%}'.format(na_fraction(after)) + '\n' + get_stats(after).to_string() + '\n') print('- - - - - - - - Difference - - - - - - - -\n' + (get_stats(after) - get_stats(before)).to_string() + '\n')
def augment_examples(X, y, s=1, weights_on_scarce=.75, dataset_name="", random_state=42, show_stats=True): ''' generate randomly transformed example images :param X: list of 3D image dataset, shape = (r, c, channel) :param y: labels :param s: integer scale factor :parma weights_on_scarse: correction factor on scarce label classes :param dataset_name: name of dataset being boosted :param random_state :return: len(X) * s number of boosted examples ''' n_channel = X[0].shape[-1] n_train = len(y) train_freq = get_label_dist(y) train_freq_normalized = minmax_normalization(train_freq, eps=1e-8) n_transform_list = np.floor((1 - weights_on_scarce * train_freq_normalized) * s) X_augmented = [ ] y_augmented = [ ] for i, image in enumerate(X): sys.stdout.write('\r>> Augmenting image %s (%.1f%%)' % ( str(i), float(i + 1) / float(n_train) * 100.0)) sys.stdout.flush() n_transform = int(n_transform_list[0][y[i]]) for j in range(n_transform): image = random_transform(image) image = minmax_normalization(image) X_augmented.append(image) y_augmented.append(y[ i ]) X_augmented = np.array(X_augmented).reshape(len(X_augmented), 32, 32, n_channel ) X_augmented, y_augmented = shuffle(X_augmented, y_augmented, random_state=random_state) if show_stats: utils.get_stats(X_augmented, y_augmented, dataset_name) return X_augmented, y_augmented
def eval_pairs(user_vecs, rel_mat, t, dist_func): mean, std, max, min = ut.get_stats(user_vecs, dist_func) pairs = find_pairs_above_threshold(rel_mat, t) num_pairs_found = 0 dist_sum = 0 for ui, uj in pairs: dist = ut.get_dist(user_vecs, ui, uj, dist_func) if dist is not None: num_pairs_found += 1 norm_dist = np.divide(np.subtract(dist, mean), std) dist_sum += norm_dist return dist_sum/num_pairs_found
def run_test(model, test_data, device): if isinstance(model, VecModel): print(f'Model weights: {model.w.weight}') print(f'Model biases: {model.w.bias}') model.to(device) predictions = model.predict(test_data, device=device) stats, class_stats = get_stats(test_data.labels, predictions, all_labels=list(model.class_map.keys())) normal_metrics = { f'Test {metric}': value for metric, value in stats.items() } class_risk_metrics = { f'Test class {idx} accuracy': class_stats[model.idx_to_class_map[idx]]['recall'] for idx in range(2) } worst_risk_class, worst_risk_entry = max(list(class_stats.items()), key=lambda x: 1 - x[1]['recall']) entry = { **normal_metrics, **class_risk_metrics, 'Test risk histogram': wandb.Image( class_stats_histogram(class_stats, lambda x: 1 - x['recall'], 'Risk', bins=np.arange(11) * 0.1, range=(0, 1))), 'Test class histogram': wandb.Image( class_stats_histogram(class_stats, lambda x: x['true'], 'Frequency', cmp_fn=lambda x: -x, bins=10)), 'Test worst class risk': 1 - worst_risk_entry['recall'], 'Test worst class risk label': worst_risk_class } wandb.log(entry) pprint(entry) with open(os.path.join(wandb.run.dir, 'test_result.json'), 'w') as out_f: out_f.write(json.dumps(class_stats))
def url_stats(code: str): """Shows stats for a given code""" _, exists = utils.code_exists(code) if not exists: return jsonify(error="Code Not Found"), 404 else: created_at, last_usage, usage_count = utils.get_stats(code) result = { 'created_at': utils.to_iso8601(created_at), 'usage_count': usage_count } if last_usage: result['last_usage'] = utils.to_iso8601(last_usage) return jsonify(result), 200
def shortcodeStats(shortcode): """ Receives a shortcode, check whether it's in the db and if so returns its stats in json """ try: conn = utils.create_connection("test.db") entry, url = utils.check_entry(shortcode, conn) if entry: stats = utils.get_stats(shortcode, conn) stats = flask.jsonify(stats) conn.close() return flask.make_response(stats, 200) except: conn.close() return not_found()
def main(unused_args): dataset = utils.ThyroidData() (x_train, y_train), (x_test, y_test) = dataset.load_data() if FLAGS.train: tf.gfile.MakeDirs(FLAGS.save_path) with tf.Session() as sess: dagmm = DAGMM(sess) sess.run(tf.global_variables_initializer()) dagmm.fit(x_train) elif FLAGS.test: with tf.Session() as sess: dagmm = DAGMM(sess) dagmm.load(FLAGS.save_path) predictions = dagmm.predict(x_test) precision, recall, f1 = utils.get_stats(predictions=predictions, labels=y_test) print("Precision: {}".format(precision)) print("Recall: {}".format(recall)) print("F1: {}".format(f1))
def bot(): global DETECTED_LANGUAGE data = request.form incoming_msg = request.values.get('Body', '').lower() phone_number = data.get("From").replace("whatsapp:+", "") DETECTED_LANGUAGE = detect_language(incoming_msg) incoming_msg = translate_text( "en", incoming_msg) if DETECTED_LANGUAGE != "en" else incoming_msg response = MessagingResponse() if 'details' in incoming_msg and EVENT: translate_response = translate_text( DETECTED_LANGUAGE, str('Here is the link: ') ) if DETECTED_LANGUAGE != "en" else str('Here is the link: ') message = response.message(translate_response + STATIC_MESSAGE['link']) message.media(STATIC_MESSAGE['detailed_image']) return str(response) elif 'opt in' in incoming_msg or 'optin' in incoming_msg: send_notification(phone_number, 'Welcome_to_aa') elif 'news' in incoming_msg: result = get_news(incoming_msg) for i in result: response.message(i) return str(response) elif 'stats' in incoming_msg or 'statistics' in incoming_msg: result = get_stats(incoming_msg) response.message(result) return str(response) elif 'regulations' in incoming_msg: result = regulations(incoming_msg) response.message(result) return str(response) else: response_dialogFlow = detect_intent_from_text(str(incoming_msg), phone_number) translate_response = translate_text( DETECTED_LANGUAGE, str(response_dialogFlow.fulfillment_text) ) if DETECTED_LANGUAGE != "en" else str( response_dialogFlow.fulfillment_text) response.message(translate_response) return str(response)
def train(model, num_batch, train_batches, valid_batches, test_batches, opt, num_epochs, hidden_dim, verbose = True): epoch = 0 step = 0 best_epoch = 0 best_val_ret = 0 best_tst_ret = 0 best_med_ret = 0 best_std_ret = 0 rpt_epoch = 1 test_epoch = 1 while epoch < num_epochs: batch_X_float, batch_X_embed, batch_label, batch_duration, batch_ret = next(train_batches) opt.zero_grad() loss, pred_ret = model(batch_X_float, batch_X_embed, batch_label, batch_ret) loss.backward() #clip_grad_norm(model.parameters(), 1) opt.step() step += 1 if step >= num_batch: epoch += 1 step = 0 if epoch % rpt_epoch == 0: med_diff, avg_diff, max_diff = get_stats(pred_ret, batch_ret) if verbose: print('Train: epoch: %d, avg loss: %.3f, median diff: %.3f, mean: %.3f, max: %.3f' % (epoch, loss.data[0], med_diff, avg_diff, max_diff)) valid_X_float, valid_X_embed, valid_label, valid_duration, valid_ret = next(valid_batches) _, ret = model(valid_X_float, valid_X_embed, valid_label, valid_ret) valid_avg_ret, valid_med_ret, valid_std_ret = ret_strtgy(ret, valid_ret) test_X_float, test_X_embed, test_label, test_duration, test_ret = next(test_batches) _, ret = model(test_X_float, test_X_embed, test_label, test_ret) test_avg_ret, test_med_ret, test_std_ret = ret_strtgy(ret, test_ret) if valid_avg_ret > best_val_ret: best_epoch = epoch best_val_ret = valid_avg_ret best_tst_ret = test_avg_ret best_med_ret = test_med_ret best_std_ret = test_std_ret model_name = './models/regmse_' + str(hidden_dim) + 'dim_model.pt' torch.save(model.state_dict(), model_name) if epoch % test_epoch == 0 and verbose: print('Test epoch: %d, avg return: %.3f, median: %.3f, std: %.3f' % (epoch, test_avg_ret, test_med_ret, test_std_ret)) return best_epoch, best_tst_ret, best_med_ret, best_std_ret
def lone_test(functions, N=100, wr=2, times=3): # creating test arrays img = np.arange(N**2).reshape((N, N)) # defining function parameters ws = 2 * wr + 1 # velocity test for i, function in enumerate(functions): name = function.__name__ function_stats = get_stats(function, times, return_val=True) print(f'Statistics for function: {name}(N={N}, ws={ws})') avg, std, minv, maxv, resp = function_stats(img, wr) rel_std = 100 * std / np.abs(avg) # % amp = maxv - minv print( f' avg±std:\t{avg:.4g}±{std:.4g} s\t(rel_std: {rel_std:.2f}%)' ) print(f' amp: {amp:4g} = [{minv:.4g}, {maxv:.4g}] s') print(f' function run {times} times.', end='\n\n')
def runtime_results(functions, Ns=[5], wss=[3, 5, 7], times=3): def print_stats(*stats): if len(stats) == 0: print('name\tN\tws\tavg\tstd\trel_std\tminv\tmaxv\tamp') else: print('\t'.join([str(i) for i in stats])) print_stats() for function in functions: name = function.__name__ function_stats = get_stats( function, times, ) for N in Ns: img = np.random.rand(N, N) for ws in wss: wr = ws // 2 + 1 avg, std, minv, maxv = function_stats(img, wr) rel_std = 100 * std / np.abs(avg) # % amp = maxv - minv print_stats(name, N, ws, avg, std, rel_std, minv, maxv, amp)
def run(args, log_interval=5000, rerun=False): # see if we already ran this experiment code_root = os.path.dirname(os.path.realpath(__file__)) exp_dir = utils.get_path_from_args( args) if not args.output_dir else args.output_dir path = "{}/results/{}".format(code_root, exp_dir) if not os.path.isdir(path): os.makedirs(path) if os.path.exists(os.path.join(path, "logs.pkl")) and not rerun: return utils.load_obj(os.path.join(path, "logs")) start_time = time.time() # correctly seed everything utils.set_seed(args.seed) # --- initialise everything --- task_family_train = tasks_sine.RegressionTasksSinusoidal( "train", args.skew_task_distribution) task_family_valid = tasks_sine.RegressionTasksSinusoidal( "valid", args.skew_task_distribution) # initialise network model_inner = MamlModel( task_family_train.num_inputs, task_family_train.num_outputs, n_weights=args.num_hidden_layers, device=args.device, ).to(args.device) model_outer = copy.deepcopy(model_inner) if args.detector == "minimax": task_sampler = TaskSampler( task_family_train.atoms // (2 if args.skew_task_distribution else 1)).to(args.device) elif args.detector == "neyman-pearson": constrainer = Constrainer( task_family_train.atoms // (2 if args.skew_task_distribution else 1)).to(args.device) # intitialise meta-optimiser meta_optimiser = optim.Adam(model_outer.weights + model_outer.biases, args.lr_meta) # initialise loggers logger = Logger() logger.best_valid_model = copy.deepcopy(model_outer) for i_iter in range(args.n_iter): # copy weights of network copy_weights = [w.clone() for w in model_outer.weights] copy_biases = [b.clone() for b in model_outer.biases] # get all shared parameters and initialise cumulative gradient meta_gradient = [ 0 for _ in range( len(copy_weights + copy_biases) + (2 if args.detector != "bayes" else 0)) ] # sample tasks if args.detector == "minimax": task_idxs, task_probs = task_sampler(args.tasks_per_metaupdate) elif args.detector == "neyman-pearson": amplitude_idxs = torch.randint( task_family_train.atoms // (2 if args.skew_task_distribution else 1), (args.tasks_per_metaupdate, ), ) phase_idxs = torch.randint( task_family_train.atoms // (2 if args.skew_task_distribution else 1), (args.tasks_per_metaupdate, ), ) task_idxs = amplitude_idxs, phase_idxs else: task_idxs = None target_functions = task_family_train.sample_tasks( args.tasks_per_metaupdate, task_idxs=task_idxs) for t in range(args.tasks_per_metaupdate): # reset network weights model_inner.weights = [w.clone() for w in copy_weights] model_inner.biases = [b.clone() for b in copy_biases] # get data for current task train_inputs = task_family_train.sample_inputs( args.k_meta_train).to(args.device) for _ in range(args.num_inner_updates): # make prediction using the current model outputs = model_inner(train_inputs) # get targets targets = target_functions[t](train_inputs) # ------------ update on current task ------------ # compute loss for current task loss_task = F.mse_loss(outputs, targets) # compute the gradient wrt current model params = [w for w in model_inner.weights ] + [b for b in model_inner.biases] grads = torch.autograd.grad(loss_task, params, create_graph=True, retain_graph=True) # make an update on the inner model using the current model (to build up computation graph) for i in range(len(model_inner.weights)): if not args.first_order: model_inner.weights[i] = (model_inner.weights[i] - args.lr_inner * grads[i]) else: model_inner.weights[i] = ( model_inner.weights[i] - args.lr_inner * grads[i].detach()) for j in range(len(model_inner.biases)): if not args.first_order: model_inner.biases[j] = ( model_inner.biases[j] - args.lr_inner * grads[i + j + 1]) else: model_inner.biases[j] = ( model_inner.biases[j] - args.lr_inner * grads[i + j + 1].detach()) # ------------ compute meta-gradient on test loss of current task ------------ # get test data test_inputs = task_family_train.sample_inputs(args.k_meta_test).to( args.device) # get outputs after update test_outputs = model_inner(test_inputs) # get the correct targets test_targets = target_functions[t](test_inputs) # compute loss (will backprop through inner loop) if args.detector == "minimax": importance = task_probs[t] else: importance = 1.0 / args.tasks_per_metaupdate loss_meta_raw = F.mse_loss(test_outputs, test_targets) loss_meta = loss_meta_raw * importance if args.detector == "neyman-pearson": amplitude_idxs, phase_idxs = task_idxs aux_loss = constrainer(amplitude_idxs[t], phase_idxs[t], loss_meta_raw) loss_meta = loss_meta + aux_loss # compute gradient w.r.t. *outer model* outer_params = model_outer.weights + model_outer.biases if args.detector == "minimax": outer_params += [ task_sampler.tau_amplitude, task_sampler.tau_phase ] elif args.detector == "neyman-pearson": outer_params += [ constrainer.tau_amplitude, constrainer.tau_phase ] task_grads = torch.autograd.grad( loss_meta, outer_params, retain_graph=(args.detector != "bayes")) for i in range(len(outer_params)): meta_gradient[i] += task_grads[i].detach() # ------------ meta update ------------ meta_optimiser.zero_grad() # print(meta_gradient) # assign meta-gradient for i in range(len(model_outer.weights)): model_outer.weights[i].grad = meta_gradient[i] meta_gradient[i] = 0 for j in range(len(model_outer.biases)): model_outer.biases[j].grad = meta_gradient[i + j + 1] meta_gradient[i + j + 1] = 0 if args.detector == "minimax": task_sampler.tau_amplitude.grad = -meta_gradient[i + j + 2] task_sampler.tau_phase.grad = -meta_gradient[i + j + 3] meta_gradient[i + j + 2] = 0 meta_gradient[i + j + 3] = 0 elif args.detector == "neyman-pearson": constrainer.tau_amplitude.grad = -meta_gradient[i + j + 2] constrainer.tau_phase.grad = -meta_gradient[i + j + 3] meta_gradient[i + j + 2] = 0 meta_gradient[i + j + 3] = 0 # do update step on outer model meta_optimiser.step() # ------------ logging ------------ if i_iter % log_interval == 0: # evaluate on training set losses = eval( args, copy.copy(model_outer), task_family=task_family_train, num_updates=args.num_inner_updates, ) loss_mean, loss_conf = utils.get_stats(np.array(losses)) logger.train_loss.append(loss_mean) logger.train_conf.append(loss_conf) # evaluate on valid set losses = eval( args, copy.copy(model_outer), task_family=task_family_valid, num_updates=args.num_inner_updates, ) loss_mean, loss_conf = utils.get_stats(np.array(losses)) logger.valid_loss.append(loss_mean) logger.valid_conf.append(loss_conf) # save best model if logger.valid_loss[-1] == np.min(logger.valid_loss): print("saving best model at iter", i_iter) logger.best_valid_model = copy.copy(model_outer) # save logging results utils.save_obj(logger, os.path.join(path, "logs")) # print current results logger.print_info(i_iter, start_time) start_time = time.time() return logger
sumoBinary = 'sumo.exe' else: sumoBinary = 'sumo-gui.exe' # initializations max_steps = 5400 # seconds = 1 h 30 min each episode total_episodes = 100 num_experiments = 1 learn = False traffic_gen = TrafficGenerator(max_steps) qmodel_filename, stats_filename = utils.get_file_names() init_experiment, init_epoch = utils.get_init_epoch(stats_filename, total_episodes) print('init_experiment={} init_epoch={}'.format(init_experiment, init_epoch)) stats = utils.get_stats(stats_filename, num_experiments, total_episodes) for experiment in range(init_experiment, num_experiments): env = SumoEnv(sumoBinary, max_steps) tl = TLAgent(env, traffic_gen, max_steps, num_experiments, total_episodes, qmodel_filename, stats, init_epoch, learn) init_epoch = 0 # reset init_epoch after first experiment if learn: tl.train(experiment) else: seeds = np.load('seed.npy') tl.evaluate_model(experiment, seeds) stats = copy.deepcopy(tl.stats) print(stats['rewards'][0:experiment + 1, :]) print(stats['intersection_queue'][0:experiment + 1, :])
path='A1-networks/' from utils import get_stats, print_stats from utils import convert_to_table from os import walk #compulsory data sets files=['toy/circle9.net', 'toy/star.net', 'toy/graph3+1+3.net', 'toy/grid-p-6x6.net', 'model/homorand_N1000_K4_0.net', 'model/ER1000k8.net', 'model/SF_1000_g2.7.net', 'model/ws1000.net', 'real/zachary_unwh.net', 'real/airports_UW.net'] files_names=[] #get all the files in the directory(path) for i in ['toy/', 'model/', 'real/']: lstDir=walk(path+i) for (root, dirs, fil) in lstDir: for f in fil: files_names+=[i+f] print(files_names) #results=print_stats(files_names,path) results=get_stats(files_names,path) f=open('results.txt', 'w') for i in results: for j in i: f.write(str(j)+',') f.write('\n') f.close() convert_to_table('results.txt', 'table.txt')
# The pickled data is a dictionary with 4 key/value pairs: # # - `'features'` is a 4D array containing raw pixel data of the traffic sign images, (num examples, width, height, channels). # - `'labels'` is a 1D array containing the label/class id of the traffic sign. The file `signnames.csv` contains id -> name mappings for each id. # - `'sizes'` is a list containing tuples, (width, height) representing the original width and height the image. # - `'coords'` is a list containing tuples, (x1, y1, x2, y2) representing coordinates of a bounding box around the sign in the image. **THESE COORDINATES ASSUME THE ORIGINAL IMAGE. THE PICKLED DATA CONTAINS RESIZED VERSIONS (32 by 32) OF THESE IMAGES** # # Complete the basic data summary below. Use python, numpy and/or pandas methods to calculate the data summary rather than hard coding the results. For example, the [pandas shape method](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.shape.html) might be useful for calculating some of the summary results. # ### Provide a Basic Summary of the Data Set Using Python, Numpy and/or Pandas # # In[5]: utils.get_stats(train['features'], train['labels'], 'Train') utils.get_stats(valid['features'], valid['labels'], 'Valid') utils.get_stats(test['features'], test['labels'], 'Test') # ### Include an exploratory visualization of the dataset # # Visualize the German Traffic Signs Dataset using the pickled file(s). This is open ended, suggestions include: plotting traffic sign images, plotting the count of each sign, etc. # # The [Matplotlib](http://matplotlib.org/) [examples](http://matplotlib.org/examples/index.html) and [gallery](http://matplotlib.org/gallery.html) pages are a great resource for doing visualizations in Python. # # **NOTE:** It's recommended you start with something simple first. If you wish to do more, come back to it after you've completed the rest of the sections. It can be interesting to look at the distribution of classes in the training, validation and test set. Is the distribution the same? Are there more examples of some classes than others? # In[6]:
import dash_core_components as dcc from dash.dependencies import Input, Output import pandas as pd from datetime import datetime import utils import math # Load the dataset events_df = pd.read_json('data/regen_event_list_ts.json', lines=True) heavy_precipitation_events = events_df[events_df["si"] > 0.0] normal_precipitation_events = events_df[events_df["si"] == 0.0] ts_events_df = pd.read_json('data/regen_event_list_ts_expanded.json', lines=True) heavy_precipitation_events_ts = ts_events_df[ts_events_df["si"] > 0.0] normal_precipitation_events_ts = ts_events_df[ts_events_df["si"] == 0.0] stats_table = utils.get_stats(events_df, ts_events_df) # Get options and range si_min = 0 # float(min(events_df["si"].min(), ts_events_df["si"].min())) si_max = math.ceil((max(events_df["si"].max(), ts_events_df["si"].max()))) length_min = int(ts_events_df["length"].min()) length_max = int(ts_events_df["length"].max()) area_min = 0 # float(ts_events_df["area"].min()) area_max = math.ceil(ts_events_df["area"].max()) min_date = events_df.datetime.min().date() max_date = events_df.datetime.max().date() # Create the Dash app external_stylesheets = [ { "href": "https://fonts.googleapis.com/css2?"
x, cx, y = data zx, zcx, y = model(x.to(device)), model(cx.to(device)), y.to(device) loss = contastive_loss(zx, zcx, y) val_loss.append(loss.item() * x.shape[0]) print('>> Val-loss: %f' % np.mean(val_loss)) print('Computing features for testing set') embeddings = np.zeros((len(testloader.dataset), args.emb_size)) test_loss = [] with torch.no_grad(): for i, data in enumerate(testloader): x = data[0] z = model(x.to(device)) embeddings[i * args.batch_size:(i + 1) * args.batch_size] = z.cpu() dist_matrix = squareform(pdist(embeddings, metric='euclidean')) print(dist_matrix.shape) nearest_neighbors = np.argsort(dist_matrix, axis=1) for k in [1, 3, 5]: print('===== k=%d =====' % k) utils.get_stats(testloader.dataset.image_paths, nearest_neighbors, k=k) # todo: save to file # compute feature
if (e + 1) % args.print_every == 0: log_format = "Epoch {}: loss={:.4f}, val_acc={:.4f}, final_test_acc={:.4f}" print(log_format.format(e + 1, train_loss, val_acc, final_test_acc)) print("Best Epoch {}, final test acc {:.4f}".format( best_epoch, final_test_acc)) return final_test_acc, sum(train_times) / len(train_times) if __name__ == "__main__": args = parse_args() res = [] train_times = [] for i in range(args.num_trials): print("Trial {}/{}".format(i + 1, args.num_trials)) acc, train_time = main(args) res.append(acc) train_times.append(train_time) mean, err_bd = get_stats(res) print("mean acc: {:.4f}, error bound: {:.4f}".format(mean, err_bd)) out_dict = { "hyper-parameters": vars(args), "result": "{:.4f}(+-{:.4f})".format(mean, err_bd), "train_time": "{:.4f}".format(sum(train_times) / len(train_times)) } with open(args.output_path, "w") as f: json.dump(out_dict, f, sort_keys=True, indent=4)
temp_pos_weight.to(device)) loss_cols = model.head_loss(preds_dict['cols'].reshape(-1), targets_cols.to(device), temp_pos_weight.to(device)) loss_rows = model.head_loss(preds_dict['rows'].reshape(-1), targets_rows.to(device), temp_pos_weight.to(device)) total_loss = loss_cells + loss_cols + loss_rows for k, v in preds_dict.items(): preds_dict[k] = v.to(torch.device('cpu')) val_loss += total_loss.item() stat_dict = get_stats(preds_dict['cells'], preds_dict['cols'], preds_dict['rows'], targets_cells, targets_cols, targets_rows, prediction_thres) #Iterate over, put tensors to device loop.set_description(f'Val Epoch [{epoch+1}/{num_epochs}]') loop.set_postfix_str( s=f"Total_loss = {round(total_loss.item(),4)}, Cells = {round(loss_cells.item(),4)}, Cols = {round(loss_cols.item(),4)}, Rows = {round(loss_rows.item(),4)}, F1_Cells = {round(stat_dict['cells']['f1'],4)}, F1_Cols = {round(stat_dict['cols']['f1'],4)}, F1_Rows = {round(stat_dict['rows']['f1'],4)}" ) print( f"#####AVERAGE: Epoch [{epoch+1}/{num_epochs}] Train Loss: {train_loss/ct_train}, Val Loss: {val_loss/ct_val}####################" ) if idx % 100 == 0: Stats['total_loss'].append(total_loss) Stats['loss_cells'].append(loss_cells) Stats['loss_cols'].append(loss_cols) Stats['loss_rows'].append(loss_rows)
def train(self, train_data, dev_data, model, train_tmp_dir, device=torch.device('cpu')): with open(os.path.join(train_tmp_dir, f'model.pkl'), 'wb') as out_f: pickle.dump(model.save_params(), out_f) wandb.save(os.path.join(train_tmp_dir, 'model.pkl')) print(set(dev_data.labels).difference(set(train_data.labels))) model = model.to(device) self.best_metrics: Dict[str, Tuple[int, float]] = {} loss_fn = self.create_loss(model, train_data, dev_data).to(device) tot_params = [{'params': list(model.parameters())}] if self.flags.loss_type in ['hcvar', 'cvar']: tot_params += [{ 'params': loss_fn.threshold, 'lr': 10 * self.flags.lr, 'momentum': 0. }] if self.flags.optimizer == 'SGD': optimizer = torch.optim.SGD(tot_params, lr=self.flags.lr, momentum=self.flags.momentum) else: optimizer = torch.optim.Adam(tot_params, lr=self.flags.lr) decay_sched = self.create_lr_scheduler(optimizer, loss_fn) epochs = self.flags.epochs train_plot = self._make_class_dist_plot(train_data) wandb.log({'Train class dist': wandb.Image(train_plot)}) for epoch in range(epochs): start_time = datetime.datetime.now() model.train() epoch_loss = self._epoch_closure(train_data, model, epoch, loss_fn, optimizer, device) epoch_time = datetime.datetime.now() - start_time # Decay learning rate log_entry = { 'epoch': epoch, 'Train loss/sample': epoch_loss, 'Time': epoch_time.total_seconds(), 'Samples/sec': len(train_data) / epoch_time.total_seconds(), } if self.flags.lr_decay_type: current_lr = optimizer.param_groups[0]['lr'] log_entry['Current LR'] = current_lr if self.flags.lr_decay_type == 'plateau': decay_sched.step(epoch_loss) else: decay_sched.step() if ((epoch + 1) % self.flags.dev_interval == 0 or epoch == epochs - 1): model.eval() dev_preds = model.predict(dev_data, device=device) global_stats, class_stats = get_stats( dev_preds, dev_data.labels, all_labels=list(model.class_map.keys())) for metric_name in global_stats: self._save_best_metric(train_tmp_dir, model, epoch, stats_dict=global_stats, class_stats=class_stats) worst_risk_class, worst_risk_entry = max( list(class_stats.items()), key=lambda x: 1 - x[1]['recall']) log_entry = { **{ f'Dev {metric}': val for metric, val in global_stats.items() }, **log_entry, **{ f'Class {idx} dev risk': 1 - class_stats[model.idx_to_class_map[idx]]['recall'] for idx in range(2) }, 'Dev risk histogram': wandb.Image( class_stats_histogram(class_stats, lambda x: 1 - x['recall'], 'Risk', range=(0, 1), bins=np.arange(11) * 0.1)), 'Dev worst class risk': 1 - worst_risk_entry['recall'], 'Dev worst class risk label': worst_risk_class } pprint(log_entry) wandb.log(log_entry) # At the end save best metrics over the course of the run wandb.log({ 'Dev class histogram': wandb.Image( class_stats_histogram(class_stats, lambda x: x['true'], 'Frequency', cmp_fn=lambda x: -x, bins=20)) }) pprint('Best metrics: ') pprint(self.best_metrics)
def controller(input_pipe, number_of_processes, node_list, req_list, manager, polling_interval, polls_per_update, log_file, nodes, services): close_flag = False # Node list #node_list = ["192.168.56.102:4000", "192.168.56.103:4000", "192.168.56.101:4000"] #manager = "192.168.56.102:4000" #services = {} # upper and lower cpu usage thresholds where scaling should happen on cpu_upper_threshold = 50.0 cpu_lower_threshold = 20.0 # create list of processes and pipes process_list = [] spike_list = [] # pipes that main thread will read from and load threads will write to par_pipes = [] spike_par_pipes = [] # pipes that main thread will write to and load threads will read from child_pipes = [] spike_child_pipes = [] sql_cpu_usages = [] sql_mem_usages = [] web_worker_cpu_usages = [] web_worker_mem_usages = [] sql_cpu_avg = 0 sql_mem_avg = 0 web_worker_mem_avg = 0 web_worker_cpu_avg = 0 num_web_workers = 2 num_sql = 1 num_requests = 0 # Storage variables prev_sql_cpu_avg = 0 prev_sql_mem_avg = 0 prev_web_worker_mem_avg = 0 prev_web_worker_cpu_avg = 0 prev_num_web_workers = 0 prev_num_sql = 0 prev_num_requests = 0 spike_size = 3 # CREATE SPECIFIED NUMBER OF PROCESSES for i in range(0, number_of_processes): # Create new pipe par_pipe, child_pipe = multiprocessing.Pipe() par_pipes.append(par_pipe) child_pipes.append(child_pipe) temp_process = multiprocessing.Process(target=load_process, args=(req_list, child_pipes[i])) process_list.append(temp_process) for i in range(0, spike_size * 2): par_pipe, child_pipe = multiprocessing.Pipe() spike_par_pipes.append(par_pipe) spike_child_pipes.append(child_pipe) temp_process = multiprocessing.Process(target=load_process, args=(req_list, spike_child_pipes[i])) spike_list.append(temp_process) # get services, nodes and tasks #Always start with 2 web worker and 1 sql scale(services["web-worker"], num_web_workers, manager) scale(services["mysql"], num_sql, manager) time.sleep(7) for service_name, service in services.items(): get_tasks(service, manager) # get initial stats # get web-worker stats sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg = get_stats( services, sql_cpu_usages, sql_mem_usages, web_worker_cpu_usages, web_worker_mem_usages, nodes) # initalize estimator init_x = np.asarray( (sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg)) init_x = init_x.reshape(init_x.size, 1) estimator = kalmanEstimator(np.identity(4), np.random.random((4, 3)), init_x) # APPROACH: # We need at least 4 measurements to ensure that a solution can be found # 1st & 2nd containers will remain the same # ****************************************************************************************************************** # ********************************************* 1st DIFF MEASUREMENT *********************************************** # store measurements prev_sql_cpu_avg = sql_cpu_avg prev_sql_mem_avg = sql_mem_avg prev_web_worker_cpu_avg = web_worker_cpu_avg prev_web_worker_mem_avg = web_worker_mem_avg prev_num_requests = num_requests prev_num_sql = num_sql prev_num_web_workers = num_web_workers # Start generating a load process_list[0].start() # Wait a couple seconds time.sleep(5) # Send poll request to the process we started par_pipes[0].send("poll") while not par_pipes[0].poll(): pass # If the loop above has been broken then we can read the information from the pipe num_requests = par_pipes[0].recv() #print('BOOM {}'.format(num_requests)) # get the stats sql_cpu_usages = [] sql_mem_usages = [] web_worker_cpu_usages = [] web_worker_mem_usages = [] sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg = get_stats( services, sql_cpu_usages, sql_mem_usages, web_worker_cpu_usages, web_worker_mem_usages, nodes) # create some np arrays for the regression sql_cpu_history = np.asarray(sql_cpu_avg - prev_sql_cpu_avg) sql_mem_history = np.asarray(sql_mem_avg - prev_sql_mem_avg) web_worker_cpu_history = np.asarray(web_worker_cpu_avg - prev_web_worker_cpu_avg) web_worker_mem_history = np.asarray(web_worker_mem_avg - prev_web_worker_mem_avg) request_history = np.asarray(num_requests - prev_num_requests) web_work_history = np.asarray(num_web_workers - prev_num_web_workers) sql_history = np.asarray(num_sql - prev_num_sql) # As before we store the stats prev_sql_cpu_avg = sql_cpu_avg prev_sql_mem_avg = sql_mem_avg prev_web_worker_cpu_avg = web_worker_cpu_avg prev_web_worker_mem_avg = web_worker_mem_avg prev_num_requests = num_requests prev_num_sql = num_sql prev_num_web_workers = num_web_workers # Wait a couple more seconds time.sleep(5) # ****************************************************************************************************************** # ********************************************* 2nd DIFF MEASUREMENT *********************************************** # Send poll request to the process we started par_pipes[0].send("poll") while not par_pipes[0].poll(): pass # If the loop above has been broken then we can read the information from the pipe num_requests = par_pipes[0].recv() # get the stats sql_cpu_usages = [] sql_mem_usages = [] web_worker_cpu_usages = [] web_worker_mem_usages = [] sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg = get_stats( services, sql_cpu_usages, sql_mem_usages, web_worker_cpu_usages, web_worker_mem_usages, nodes) # Append new values to the histories sql_cpu_history = np.append(sql_cpu_history, sql_cpu_avg - prev_sql_cpu_avg) sql_mem_history = np.append(sql_mem_history, sql_mem_avg - prev_sql_mem_avg) web_worker_cpu_history = np.append( web_worker_cpu_history, web_worker_cpu_avg - prev_web_worker_cpu_avg) web_worker_mem_history = np.append( web_worker_mem_history, web_worker_mem_avg - prev_web_worker_mem_avg) request_history = np.append(request_history, num_requests - prev_num_requests) web_work_history = np.append(web_work_history, num_web_workers - prev_num_web_workers) sql_history = np.append(sql_history, num_sql - prev_num_sql) print(web_worker_cpu_avg) # Store the stats prev_sql_cpu_avg = sql_cpu_avg prev_sql_mem_avg = sql_mem_avg prev_web_worker_cpu_avg = web_worker_cpu_avg prev_web_worker_mem_avg = web_worker_mem_avg prev_num_requests = num_requests prev_num_sql = num_sql prev_num_web_workers = num_web_workers print(web_worker_cpu_usages) # ****************************************************************************************************************** # ********************************************* 3rd DIFF MEASUREMENT *********************************************** print("Two measurements taken\n") # Start 2 new containers num_web_workers = num_web_workers + 1 num_sql = num_sql + 1 scale(services["web-worker"], num_web_workers, manager) scale(services["mysql"], num_sql, manager) # We also start another load generator process_list[1].start() # as before we sleep and will update time.sleep(5) # poll pipes [0] & [1] for i in range(0, 2): par_pipes[i].send("poll") pipes_ready = poll_pipes(par_pipes, 2) # reset number of requests num_requests = 0 for i in range(0, 2): num_requests = num_requests + par_pipes[i].recv() # update tasks since we scaled for service_name, service in services.items(): get_tasks(service, manager) # get the stats sql_cpu_usages = [] sql_mem_usages = [] web_worker_cpu_usages = [] web_worker_mem_usages = [] sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg = get_stats( services, sql_cpu_usages, sql_mem_usages, web_worker_cpu_usages, web_worker_mem_usages, nodes) # Append new values to the histories sql_cpu_history = np.append(sql_cpu_history, sql_cpu_avg - prev_sql_cpu_avg) sql_mem_history = np.append(sql_mem_history, sql_mem_avg - prev_sql_mem_avg) web_worker_cpu_history = np.append( web_worker_cpu_history, web_worker_cpu_avg - prev_web_worker_cpu_avg) web_worker_mem_history = np.append( web_worker_mem_history, web_worker_mem_avg - prev_web_worker_mem_avg) request_history = np.append(request_history, num_requests - prev_num_requests) web_work_history = np.append(web_work_history, num_web_workers - prev_num_web_workers) sql_history = np.append(sql_history, num_sql - prev_num_sql) # Store the stats prev_sql_cpu_avg = sql_cpu_avg prev_sql_mem_avg = sql_mem_avg prev_web_worker_cpu_avg = web_worker_cpu_avg prev_web_worker_mem_avg = web_worker_mem_avg prev_num_requests = num_requests prev_num_sql = num_sql prev_num_web_workers = num_web_workers # ****************************************************************************************************************** # ********************************************* 4th DIFF MEASUREMENT *********************************************** print("3 measurements taken\n") # Now we get the 4th measurement # Scale down the number of sql containers and scale up web-worker num_sql = num_sql - 1 num_web_workers = num_web_workers + 1 scale(services["web-worker"], num_web_workers, manager) scale(services["mysql"], num_sql, manager) # as before we sleep and will update time.sleep(5) for service_name, service in services.items(): get_tasks(service, manager) for i in range(0, 2): par_pipes[i].send("poll") pipes_ready = poll_pipes(par_pipes, 2) # reset number of requests num_requests = 0 for i in range(0, 2): num_requests = num_requests + par_pipes[i].recv() # get the stats sql_cpu_usages = [] sql_mem_usages = [] web_worker_cpu_usages = [] web_worker_mem_usages = [] sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg = get_stats( services, sql_cpu_usages, sql_mem_usages, web_worker_cpu_usages, web_worker_mem_usages, nodes) # Append new values to the histories sql_cpu_history = np.append(sql_cpu_history, sql_cpu_avg - prev_sql_cpu_avg) sql_mem_history = np.append(sql_mem_history, sql_mem_avg - prev_sql_mem_avg) web_worker_cpu_history = np.append( web_worker_cpu_history, web_worker_cpu_avg - prev_web_worker_cpu_avg) web_worker_mem_history = np.append( web_worker_mem_history, web_worker_mem_avg - prev_web_worker_mem_avg) request_history = np.append(request_history, num_requests - prev_num_requests) web_work_history = np.append(web_work_history, num_web_workers - prev_num_web_workers) sql_history = np.append(sql_history, num_sql - prev_num_sql) # Store the stats prev_sql_cpu_avg = sql_cpu_avg prev_sql_mem_avg = sql_mem_avg prev_web_worker_cpu_avg = web_worker_cpu_avg prev_web_worker_mem_avg = web_worker_mem_avg prev_num_requests = num_requests prev_num_sql = num_sql prev_num_web_workers = num_web_workers # ****************************************************************************************************************** # ********************************************* REGRESSION ********************************************************* # Use these lines whenever we update the regression # TODO put this into a function target_mat = np.vstack([ sql_cpu_history, web_worker_cpu_history, sql_mem_history, web_worker_mem_history ]).T design_mat = np.vstack([sql_history, web_work_history, request_history]).T control_matrix = regularized_lin_regression(design_mat, target_mat, 0.0001) #print(control_matrix) estimator.update_B(control_matrix.T) #print(control_matrix.T) obs = np.array( [[sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg]]).T estimator.update(obs, np.identity(4)) #Helper vars polls_since_update = 0 processes_started = 2 delta_web = 0 delta_sql = 0 delta_requests = 0 scaling_triggered = False # TODO We have generated an initial estimate # Begin by starting up the rest of the load generators and then monitoring and adjust close_flag = False #print("Experiment Started\n") output_pipe, log_pipe = multiprocessing.Pipe() close_pipe, log_close_pipe = multiprocessing.Pipe() startTime = time.time() log_process = multiprocessing.Process( target=logger, args=(log_pipe, log_file, node_list, manager, startTime, polling_interval / 4.0, nodes, services, log_close_pipe)) log_process.start() iteration_count = 0 #old_time = datetime.datetime.now() output_pipe.send([ estimator.x[0][0], estimator.x[1][0], estimator.x[2][0], estimator.x[3][0], num_sql, num_web_workers, delta_requests, num_requests, iteration_count, 0.0, 0.0, True ]) print("Experiment Started") spike = False spike_number = 0 while not close_flag: #old_time = time.time() if input_pipe.poll(): message = input_pipe.recv() if message == "Quit": close_flag = True print("Shutting down") for i in range(0, processes_started): par_pipes[i].send("close") process_list[i].join() print("Load process {0}".format(i)) print("Loads spun down") scale(services["web-worker"], 2, manager) scale(services["mysql"], 1, manager) output_pipe.send("close") close_pipe.send("close") log_process.join() print("Logger shut down") print(estimator.B) break if (processes_started < number_of_processes and (iteration_count % 20 == 0)): #We haven't started all of the load generators #So start another process_list[processes_started].start() processes_started = processes_started + 1 #Sleep at the start since we need to sleep on first entry time.sleep(polling_interval) if scaling_triggered: for service_name, service in services.items(): get_tasks(service, manager) output_pipe.send([ estimator.x[0][0], estimator.x[1][0], estimator.x[2][0], estimator.x[3][0], num_sql, num_web_workers, delta_requests, num_requests, iteration_count, minutes, seconds, scaling_triggered ]) scaling_triggered = False iteration_count = iteration_count + 1 ###################################### TEST ABILITY TO REACT TO A SPIKE if (iteration_count == 120): for i in range(0, spike_size): spike_list[i].start() #processes_started = processes_started + 1 spike = True spike_number = 1 if (iteration_count == 175): for i in range(0, spike_size): spike_par_pipes[i].send("close") #processes_started = processes_started - 1 spike = False if (iteration_count == 230): for i in range(spike_size, 2 * spike_size): spike_list[i].start() #processes_started = processes_started + 1 spike = True spike_number = 2 if (iteration_count == 280): for i in range(spike_size, 2 * spike_size): spike_par_pipes[i].send("close") #processes_started = processes_started - 1 spike = False for i in range(0, processes_started): par_pipes[i].send("poll") if spike: if spike_number == 1: for i in range(0, spike_size): spike_par_pipes[i].send("poll") for i in range(0, spike_size): while not spike_par_pipes[i].poll(): pass if spike_number == 2: for i in range(spike_size, 2 * spike_size): spike_par_pipes[i].send("poll") for i in range(spike_size, 2 * spike_size): while not spike_par_pipes[i].poll(): pass pipes_ready = poll_pipes(par_pipes, processes_started) # reset number of requests num_requests = 0 for i in range(0, processes_started): num_requests = num_requests + par_pipes[i].recv() if spike: if spike_number == 1: for i in range(0, spike_size): num_requests = num_requests + spike_par_pipes[i].recv() if spike_number == 2: for i in range(spike_size, 2 * spike_size): num_requests = num_requests + spike_par_pipes[i].recv() delta_requests = num_requests - prev_num_requests #We've slept so poll sql_cpu_usages = [] sql_mem_usages = [] web_worker_cpu_usages = [] web_worker_mem_usages = [] #Check to see if we need to update the estimator if polls_since_update == polls_per_update: sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg = get_stats( services, sql_cpu_usages, sql_mem_usages, web_worker_cpu_usages, web_worker_mem_usages, nodes) #need to update the estimator #Check to see if we have 100 entries in the history list if sql_cpu_history.size == 100: #We have 100 entries randomly replace one of them replacement_index = random.randint(0, 99) #Use np.put to insert new value at index replacement_index, overwriting previous value np.put(sql_cpu_history, replacement_index, sql_cpu_avg - prev_sql_cpu_avg) np.put(sql_mem_history, replacement_index, sql_mem_avg - prev_sql_mem_avg) np.put(web_worker_cpu_history, replacement_index, web_worker_cpu_avg - prev_web_worker_cpu_avg) np.put(web_worker_mem_history, replacement_index, web_worker_mem_avg - prev_web_worker_mem_avg) np.put(request_history, replacement_index, num_requests - prev_num_requests) np.put(web_work_history, replacement_index, num_web_workers - prev_num_web_workers) np.put(sql_history, replacement_index, num_sql - prev_num_sql) else: #Don't have 100 entries. Append new values sql_cpu_history = np.append(sql_cpu_history, sql_cpu_avg - prev_sql_cpu_avg) sql_mem_history = np.append(sql_mem_history, sql_mem_avg - prev_sql_mem_avg) web_worker_cpu_history = np.append( web_worker_cpu_history, web_worker_cpu_avg - prev_web_worker_cpu_avg) web_worker_mem_history = np.append( web_worker_mem_history, web_worker_mem_avg - prev_web_worker_mem_avg) request_history = np.append(request_history, num_requests - prev_num_requests) web_work_history = np.append( web_work_history, num_web_workers - prev_num_web_workers) sql_history = np.append(sql_history, num_sql - prev_num_sql) #Do regression target_mat = np.vstack([ sql_cpu_history, web_worker_cpu_history, sql_mem_history, web_worker_mem_history ]).T design_mat = np.vstack( [sql_history, web_work_history, request_history]).T control_matrix = regularized_lin_regression( design_mat, target_mat, 0.0001) estimator.update_B(control_matrix.T) #Also need to correct Kalman gain estimator.update( np.array([[ sql_cpu_avg, web_worker_cpu_avg, sql_mem_avg, web_worker_mem_avg ]]).T, 0.002 * np.random.randn(4, 4)) polls_since_update = 0 else: polls_since_update = polls_since_update + 1 #TODO For Carl: Get Estimate from Estimator, make scaling decision, send values to logger prev_sql_cpu_avg = sql_cpu_avg prev_sql_mem_avg = sql_mem_avg prev_web_worker_cpu_avg = web_worker_cpu_avg prev_web_worker_mem_avg = web_worker_mem_avg prev_num_requests = num_requests prev_num_sql = num_sql prev_num_web_workers = num_web_workers estimate = estimator.estimate(np.array([[0, 0, delta_requests]]).T) #print(estimate) if (estimate[1] >= cpu_upper_threshold): #We assume the web worker needs scaling most of the time while not (estimate[1] < cpu_upper_threshold or delta_web == search_range or num_web_workers + (delta_web + 1) > max_containers): delta_web = delta_web + 1 estimate = estimator.estimate( np.array([[0, delta_web, delta_requests]]).T) scaling_triggered = True if (estimate[0] >= cpu_upper_threshold): while not (estimate[0] < cpu_upper_threshold or delta_sql == search_range or num_sql + (delta_sql + 1) > max_containers): delta_sql = delta_sql + 1 estimate = estimator.estimate( np.array([[delta_sql, delta_web, delta_requests]]).T) scaling_triggered = True if not scaling_triggered: #just to prevent two cases triggering if estimate[1] <= cpu_lower_threshold: while not (estimate[1] > cpu_lower_threshold or abs(delta_web) == search_range or num_web_workers + (delta_web - 1) < 1): delta_web = delta_web - 1 estimate = estimator.estimate( np.array([[0, delta_web, delta_requests]]).T) #We assume the web worker needs scaling most of the time scaling_triggered = True if (estimate[0] <= cpu_lower_threshold): while not (estimate[0] > cpu_lower_threshold or abs(delta_sql) == search_range or num_sql + (delta_sql - 1) < 1): delta_sql = delta_sql - 1 estimate = estimator.estimate( np.array([[delta_sql, delta_web, delta_requests]]).T) scaling_triggered = True #We have made our decision actually update estimator estimator.predict(np.array([[delta_sql, delta_web, delta_requests]]).T) if scaling_triggered: #Actually do the scaling here num_web_workers = num_web_workers + delta_web num_sql = num_sql + delta_sql scale(services["web-worker"], num_web_workers, manager) scale(services["mysql"], num_sql, manager) delta_web = 0 delta_sql = 0 #scaling_triggered = 0 #time.sleep(0.05) #Send the values to the logger #order will be sql_cpu web_worker_cpu sql_mem web_worker_mem num_sql num_web_workers #For each value we send actual then predicted diff_time = time.time() - startTime minutes, seconds = diff_time // 60, diff_time % 60 if not scaling_triggered: #diff_time = time.time() - startTime #minutes, seconds = diff_time // 60, diff_time % 60 output_pipe.send([ estimator.x[0][0], estimator.x[1][0], estimator.x[2][0], estimator.x[3][0], num_sql, num_web_workers, delta_requests, num_requests, iteration_count, minutes, seconds, scaling_triggered ])
if (e + 1) % args.print_every == 0: log_format = "Epoch {}: loss={:.4f}, val_acc={:.4f}, final_test_acc={:.4f}" print(log_format.format(e + 1, train_loss, val_acc, final_test_acc)) print("Best Epoch {}, final test acc {:.4f}".format( best_epoch, final_test_acc)) return final_test_acc, sum(train_times) / len(train_times) if __name__ == "__main__": args = parse_args() res = [] train_times = [] for i in range(args.num_trials): print("Trial {}/{}".format(i + 1, args.num_trials)) acc, train_time = main(args) res.append(acc) train_times.append(train_time) mean, err_bd = get_stats(res, conf_interval=False) print("mean acc: {:.4f}, error bound: {:.4f}".format(mean, err_bd)) out_dict = { "hyper-parameters": vars(args), "result": "{:.4f}(+-{:.4f})".format(mean, err_bd), "train_time": "{:.4f}".format(sum(train_times) / len(train_times)) } with open(args.output_path, "w") as f: json.dump(out_dict, f, sort_keys=True, indent=4)
def train_agent(ppo: PPO, env, policy_iters, max_timesteps, memory, update_timestep, env_resets, log_interval, lam=0, n_parallel=500, var_type='reward'): running_reward = 0 avg_length = 0 time_step = 0 n_updates = 0 i_episode = 0 prev_performance = np.array( [-np.inf for _ in range(len(env.model.models))]) memory.clear_memory() rewards_history = deque(maxlen=6) best_weights = None is_done_func = env.model.is_done_func if var_type == 'reward': state_dynamics = False elif var_type == 'state': state_dynamics = True else: raise Exception("Variance must either be 'reward' or 'state'") for model in env.model.models.values(): model.to(device) state_mean = torch.FloatTensor(env.state_filter.mean).to(device) state_stddev = torch.FloatTensor(env.state_filter.stdev).to(device) action_mean = torch.FloatTensor(env.action_filter.mean).to(device) action_stddev = torch.FloatTensor(env.action_filter.stdev).to(device) diff_mean = torch.FloatTensor(env.diff_filter.mean).to(device) diff_stddev = torch.FloatTensor(env.diff_filter.stdev).to(device) start_states = torch.FloatTensor(env_resets).to(device) done_true = [True for _ in range(n_parallel)] done_false = [False for _ in range(n_parallel)] while n_updates < policy_iters: i_episode += n_parallel state = start_states.clone() prev_done = done_false var = 0 t = 0 while t < max_timesteps: state_f = filter_torch(state, state_mean, state_stddev) time_step += n_parallel t += 1 with torch.no_grad(): action = ppo.policy_old.act(state_f, memory) action = torch.clamp(action, env.action_bounds.lowerbound[0], env.action_bounds.upperbound[0]) action_f = filter_torch(action, action_mean, action_stddev) X = torch.cat((state_f, action_f), dim=1) y = random_env_forward(X, env) nextstate_f = state_f + filter_torch_invert( y, diff_mean, diff_stddev) nextstate = filter_torch_invert(nextstate_f, state_mean, state_stddev) if is_done_func: done = is_done_func(nextstate).cpu().numpy() done[prev_done] = True prev_done = done else: if t >= max_timesteps: done = done_true else: done = done_false uncert = get_stats(env, X, state_f, action, diff_mean, diff_stddev, state_mean, state_stddev, done, state_dynamics) reward = torch_reward(env.name, nextstate, action, done) reward = (1 - lam) * reward + lam * uncert state = nextstate memory.rewards.append(reward) memory.is_terminals.append(done) running_reward += reward var += uncert**2 # update if it's time if time_step % update_timestep == 0: ppo.update(memory) memory.clear_memory() time_step = 0 n_updates += 1 if n_updates > 10: improved, prev_performance = validate_agent_with_ensemble( ppo, env, start_states, state_mean, state_stddev, action_mean, action_stddev, diff_mean, diff_stddev, prev_performance, 0.7, memory, max_timesteps) if improved: best_weights = ppo.policy.state_dict() best_update = n_updates rewards_history.append(improved) if len(rewards_history) > 5: if rewards_history[0] > max( np.array(rewards_history)[1:]): print('Policy Stopped Improving after {} updates'. format(best_update)) ppo.policy.load_state_dict(best_weights) ppo.policy_old.load_state_dict(best_weights) return avg_length += t * n_parallel if i_episode % log_interval == 0: avg_length = int(avg_length / log_interval) running_reward = int((running_reward.sum() / log_interval)) print( 'Episode {} \t Avg length: {} \t Avg reward: {} \t Number of Policy Updates: {}' .format(i_episode, avg_length, running_reward, n_updates)) running_reward = 0 avg_length = 0