def get_mode_connections(p1, t1, p2, t2, eval_task, config): w1 = flatten_params(load_task_model_by_policy(t1, p1, config['exp_dir'])) w2 = flatten_params(load_task_model_by_policy(t2, p2, config['exp_dir'])) loss, acc, ts = calculate_mode_connectivity(w1, w2, loaders['sequential'][eval_task]['val'], config) save_path = '{}/mc_{}_{}_to_{}_{}_on_{}'.format(config['exp_dir'],p1, t1, p2, t2, eval_task) res = {'loss': loss, 'acc': acc, 'ts': ts} save_np_arrays(res, path=save_path) return res
def calculate_l2_distance(p1, t1, p2, t2, config): m1 = load_task_model_by_policy(t1, p1, config['exp_dir']) m2 = load_task_model_by_policy(t2, p2, config['exp_dir']) key = 'l2_{}_{}_to_{}_{}'.format(p1, t1, p2, t2) l2_dist = get_norm_distance(m1, m2) log_comet_metric(experiment, key, l2_dist, 1) return l2_dist
def plot_cka(p1, t1, p2, t2, eval_task, config): m1 = load_task_model_by_policy(t1, p1, config['exp_dir']) m2 = load_task_model_by_policy(t2, p2, config['exp_dir']) save_path = '{}/cka_on_{}_{}_vs_{}_{}'.format(config['exp_dir'], p1, t1, p2, t2) scores, keys = calculate_CKA(m1, m2, loaders['sequential'][eval_task]['val'], num_batches=50) res = {'scores': scores, 'keys': keys} save_np_arrays(res, path=save_path) ylabel = r'$w^*_{}$'.format( t1) if p1 == 'mtl' else r'$\hat{{w}}_{}$'.format(t1) xlabel = r'$w^*_{}$'.format( t2) if p2 == 'mtl' else r'$\hat{{w}}_{}$'.format(t2) plot_heat_map(scores, keys, save_path, xlabel, ylabel) return res
def plot_graphs(config): # load models models = {'seq': {}, 'mtl': {}} for t in range(1, config['num_tasks'] + 1): models['seq'][t] = flatten_params( load_task_model_by_policy(t, 'seq', config['exp_dir'])) if t >= 2: models['mtl'][t] = flatten_params( load_task_model_by_policy(t, 'mtl', config['exp_dir'])) # plot_l2_distances(config) # acc_fig_path = "{}/accs".format(config['exp_dir']) # plot_accs(config['num_tasks'], seq_meter.data, mtl_meter.data, acc_fig_path) # plot_cka_scores(config) # --- task 1 --- plot_mode_connections_for_minima('seq', 1, config) plot_mode_connections_for_minima('seq', 2, config) # get_custom_mode_connections_for_minima('seq', 1, config) path = '{}/surface_{}_{}_{}_{}_{}_{}_on_{}'.format(config['exp_dir'], 'seq', 1, 'mtl', 2, 'seq', 2, 1) labels = [r"$\hat{w}_1$", r"$w^*_{2}$", r"$\hat{w}_{2}$"] plot_loss_plane([models['seq'][1], models['mtl'][2], models['seq'][2]], loaders['sequential'][1]['val'], path, labels, config) path = '{}/surface_{}_{}_{}_{}_{}_{}_on_{}'.format(config['exp_dir'], 'seq', 1, 'mtl', 2, 'seq', 2, 2) labels = [r"$\hat{w}_1$", r"$w^*_{2}$", r"$\hat{w}_{2}$"] plot_loss_plane([models['seq'][1], models['mtl'][2], models['seq'][2]], loaders['sequential'][2]['val'], path, labels, config)
def plot_loss_plane(w, eval_loader, path, w_labels, config): u = w[2] - w[0] dx = np.linalg.norm(u) u /= dx v = w[1] - w[0] v -= np.dot(u, v) * u dy = np.linalg.norm(v) v /= dy m = load_task_model_by_policy(0, 'init', config['exp_dir']) m.eval() coords = np.stack(get_xy(p, w[0], u, v) for p in w) # print("coords", coords) G = 15 margin = 0.2 alphas = np.linspace(0.0 - margin, 1.0 + margin, G) betas = np.linspace(0.0 - margin, 1.0 + margin, G) tr_loss = np.zeros((G, G)) grid = np.zeros((G, G, 2)) for i, alpha in enumerate(alphas): for j, beta in enumerate(betas): p = w[0] + alpha * dx * u + beta * dy * v m = assign_weights(m, p).to(DEVICE) err = eval_single_epoch(m, eval_loader)['loss'] c = get_xy(p, w[0], u, v) #print(c) grid[i, j] = [alpha * dx, beta * dy] tr_loss[i, j] = err contour = {'grid': grid, 'values': tr_loss, 'coords': coords} save_np_arrays(contour, path=path) plot_contour(grid, tr_loss, coords, log_alpha=-5.0, N=7, path=path, w_labels=w_labels, dataset='mnist') #config['dataset']) return contour
def main(): print('Started the trial >>', TRIAL_ID, 'for experiment 1') # init and save9 setup_experiment(experiment, config) # convention: init => initialization # convention: t_i_seq => task i (sequential) # convention: t_i_mtl => task 1 ... i (multitask) # convention: t_i_lcm => task 1 ... i (Linear Mode Connectivity) eigen_spectrum = {1: {}, 2: {}} for task in range(1, config['num_tasks'] + 1): print('---- Task {} (seq) ----'.format(task)) seq_model = train_task_sequentially( task, loaders['sequential'][task]['train'], config) eigenvals, eigenvecs = get_model_eigenspectrum( seq_model, loaders['sequential'][task]['val']) eigen_spectrum[task]['eigenvals'], eigen_spectrum[task][ 'eigenvecs'] = eigenvals, eigenvecs save_task_model_by_policy(seq_model, task, 'seq', config['exp_dir']) for prev_task in range(1, task + 1): metrics = eval_single_epoch( seq_model, loaders['sequential'][prev_task]['val']) seq_meter.update(task, prev_task, metrics['accuracy']) print(prev_task, metrics) log_comet_metric(experiment, 't_{}_seq_acc'.format(prev_task), metrics['accuracy'], task) log_comet_metric(experiment, 't_{}_seq_loss'.format(prev_task), round(metrics['loss'], 5), task) if task == 1: log_comet_metric(experiment, 'avg_acc', metrics['accuracy'], task) log_comet_metric(experiment, 'avg_loss', metrics['loss'], task) if task > 1: accs_mtl, losses_mtl = [], [] print('---- Task {} (mtl) ----'.format(task)) mtl_model = train_task_MTL( task, loaders['full-multitask'][task]['train'], config, loaders['sequential'][1]['val']) #grads_t1 = get_model_grads(mtl_model, loaders['sequential'][1]['val']) # grads_t2 = get_model_grads(mtl_model, loaders['sequential'][2]['val']) grads_t1 = get_model_grads( load_task_model_by_policy(1, 'seq', config['exp_dir']).to(DEVICE), loaders['full-multitask'][2]['train']) grads_t3 = get_model_grads( load_task_model_by_policy(1, 'seq', config['exp_dir']).to(DEVICE), loaders['sequential'][2]['train']) seq_1 = flatten_params( load_task_model_by_policy(1, 'seq', config['exp_dir']), False).cpu() seq_2 = flatten_params( load_task_model_by_policy(2, 'seq', config['exp_dir']), False).cpu() cosines_t1 = compute_direction_cosines( grads_t1, eigen_spectrum[1]['eigenvecs']) # cosines_t2 = compute_direction_cosines(grads_t2, eigen_spectrum[2]['eigenvecs']) cosines_t3 = compute_direction_cosines( grads_t3, eigen_spectrum[1]['eigenvecs']) cosine_d1 = compute_direction_cosines( (flatten_params(mtl_model, False).cpu() - seq_1), eigen_spectrum[1]['eigenvecs']) cosine_d2 = compute_direction_cosines( seq_2 - seq_1, eigen_spectrum[1]['eigenvecs']) print("cos 1 >> ", cosines_t1) # print("cos 2 >> ", cosines_t2) print("cos 3 >> ", cosines_t3) print("dir 1 >>", cosine_d1) print("dir 2 >>", cosine_d2) save_task_model_by_policy(mtl_model, task, 'mtl', config['exp_dir']) for prev_task in range(1, task + 1): metrics_mtl = eval_single_epoch( mtl_model, loaders['sequential'][prev_task]['val']) accs_mtl.append(metrics_mtl['accuracy']) losses_mtl.append(metrics_mtl['loss']) mtl_meter.update(task, prev_task, metrics['accuracy']) print('MTL >> ', prev_task, metrics_mtl) log_comet_metric(experiment, 't_{}_mtl_acc'.format(prev_task), metrics_mtl['accuracy'], task) log_comet_metric(experiment, 't_{}_mtl_loss'.format(prev_task), round(metrics_mtl['loss'], 5), task) log_comet_metric(experiment, 'avg_acc_mtl', np.mean(accs_mtl), task) log_comet_metric(experiment, 'avg_loss_mtl', np.mean(losses_mtl), task) print() seq_meter.save(config) mtl_meter.save(config) experiment.log_asset_folder(config['exp_dir']) experiment.end()