def main(path): train_images, train_labels = utils.load_training_data(path) test_images, test_labels = utils.load_test_data(path) n_classes = 10 model = KMeans(n_clusters=n_classes, n_init=1, max_iter=1) model.fit(train_images) # which images are assigned to each cluster: # 1. check all data points assigned to each cluster # 2. check actual labels of the data points assigned to each cluster # 3. assign the mode of actual labels to be the label for that cluster cluster_label_dict = {} for cluster_num in range(n_classes): idx = utils.cluster_indices(cluster_num, model.labels_) original_labels = np.take(train_labels, idx) mode = stats.mode(original_labels)[0][0] cluster_label_dict.update({cluster_num: mode}) # prediction predicted_cluster = model.predict(test_images) predicted_labels = np.vectorize(cluster_label_dict.get)(predicted_cluster) accuracy = utils.classification_accuracy(predicted_labels, test_labels) print(" K means clustering accuracy for cifar 10 = {}".format(accuracy)) # visualise clusters cluster_centroids = model.cluster_centers_ utils.visualize(cluster_centroids)
def report_progress(self, data): """"Reports progress at specified interval, including test run performance if specified.""" progress = np.round((self.i_t / self.total_time_steps) * 100, 2) time_elapsed = np.round(time.time() - self.start_time, 1) summary = '\rProgress: {}% complete \nTime Elapsed: {}s \n' if 'rnn.loss_' in self.mons.keys(): interval = self.report_interval avg_loss = sum(self.mons['rnn.loss_'][-interval:]) / interval loss = 'Average loss: {} \n'.format(avg_loss) summary += loss if self.report_accuracy or self.report_loss: test_sim = self.get_test_sim() test_sim.run(data, mode='test', monitors=['rnn.y_hat', 'rnn.loss_'], verbose=False, a_initial=np.copy(self.rnn.a)) if self.report_accuracy: acc = classification_accuracy(data, test_sim.mons['rnn.y_hat']) accuracy = 'Test accuracy: {} \n'.format(acc) summary += accuracy if self.report_loss: test_loss = np.mean(test_sim.mons['rnn.loss_']) loss_summary = 'Test loss: {} \n'.format(test_loss) summary += loss_summary print(summary.format(progress, time_elapsed))
def loss(self): model = self.inference #loss = slim.losses.softmax_cross_entropy(model, self.y) loss = digits.classification_loss(model, self.y) accuracy = digits.classification_accuracy(model, self.y) self.summaries.append(tf.summary.scalar(accuracy.op.name, accuracy)) return loss
def exp_web_images(): # parameters param = ExperimentParam(n_rows=32, n_cols=32, n_channels=3, n_classes=43) # load data folder = './from-web' X, y = load_web_images(folder, param) X = np.array([utils.pre_process(X[i]) for i in range(len(X))], dtype=np.float32) # load model model_fname = param._model_fname net, sess = load_model(model_fname, param) preds, softmax = sess.run([net._preds, net._softmax], { net._X: X, net._is_training: False }) accuracy = utils.classification_accuracy(y, preds) print('Accuracy on web images: ', accuracy) print('labels: ', y) print('predictions: ', preds) # top softmax topk = sess.run(tf.nn.top_k(tf.constant(softmax), k=3)) print(topk)
def show_bad_cases(test_fname, param): # load test data X_test, y_test = utils.load_data(test_fname) X_test_normed = np.array( [utils.pre_process(X_test[i]) for i in range(len(X_test))], dtype=np.float32) n_data, n_rows, n_cols, n_channels = X_test.shape param._n_rows = n_rows param._n_cols = n_cols param._n_channels = n_channels # load model n_classes = int(np.max(y_test) + 1) tf.reset_default_graph() net = network.TrafficSignNet(n_classes, param) sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, param._model_fname) # test preds_test = sess.run(net._preds, { net._X: X_test_normed, net._is_training: False }) test_accuracy = utils.classification_accuracy(y_test, preds_test) print('test accuracy: ', test_accuracy) sess.close() X_test_normed = None # show test images that are not correctly classified err_indices = np.where(preds_test != y_test)[0] utils.show_images(X_test, y_test, err_indices, n_cols=5, num_images=200, preds=preds_test)
def test_model(model_fname, param, X, y): net, sess = load_model(model_fname, param) n_images = X.shape[0] batch_size = param._batch_size n_batches = n_images // batch_size if n_batches * batch_size < n_images: n_batches += 1 preds = [] for batch in range(n_batches): bt_start = batch * batch_size bt_end = min(bt_start + batch_size, n_images) X_batch = X[bt_start:bt_end] bt_preds = sess.run(net._preds, { net._X: X_batch, net._is_training: False }) preds.append(bt_preds) preds = np.concatenate(preds) accuracy = utils.classification_accuracy(y, preds) sess.close() return accuracy, preds
def loss(self): loss = digits.classification_loss(self.inference, self.y) accuracy = digits.classification_accuracy(self.inference, self.y) self.summaries.append(tf.summary.scalar(accuracy.op.name, accuracy)) return loss
def train( trainloader, testloader, device, seed, debias_=True, specific=None, ratio=0.5, # bias ratio in dataset n_epochs=5, model_lr=1e-3, n2v_lr=1e-3, combined_n2v_lr=1e-3, # metalearning rate for n2v alpha=100, # for debias, beta=0.1, # for adversarial loss out_file=None, base_folder="", results_folder="", experiment="sgd", momentum=0, module="layer4", finetuned=False, adversarial=False, nonlinear=False, subset=False, subset_ratio=0.1, save_every=False, model_momentum=0, n2v_momentum=0, experimental=False, multiple=False, debias_multiple=False, reset=False, reset_counter=1, n2v_start=False, experiment2=None, adaptive_alpha=False, n2v_adam=False, single=False, imagenet=False, train_batch_size=64, constant_resize=False, adaptive_resize=False, no_class=False, gamma=0, partial_projection=False, norm='l2', constant_alpha=False, jump_alpha=False, linear_alpha=False, mean_debias=False, no_limit=False, dataset='bam', parallel=False, gpu_ids=[], switch_modes=True): print("mu", momentum, "debias", debias_, "alpha", alpha, " | ratio:", ratio) def get_vg(W): if single: return W[-2, :] else: return W[-2, :] - W[-1, :] if dataset == 'bam' or dataset == 'coco': model_init_path, n2v_init_path = utils.get_paths( base_folder, seed, specific, model_end="resnet_init" + '.pt', n2v_end="resnet_n2v_init" + '.pt', n2v_module=module, experiment=experiment, with_n2v=False) else: model_init_path = os.path.join(base_folder, str(seed), experiment, 'resnet_init.pt') n2v_init_path = os.path.join(base_folder, str(seed), experiment, module, 'resnet_n2v_init.pt') if finetuned: if dataset == 'bam' or dataset == 'coco': model_init_path = utils.get_model_path( base_folder, seed, specific, "resnet_" + str(ratio) + ".pt", experiment='post_train' if not n2v_start else experiment.split('_finetuned')[0]) else: model_init_path = os.path.join( base_folder, str(seed), 'post_train' if not n2v_start else experiment.split('_finetuned')[0], 'resnet.pt') assert (debias_ and not adversarial) or ( adversarial and not debias_) or (not adversarial and not debias_) if debias_ and n2v_start: ext = "_n2v_" if not nonlinear else "_mlp_" if dataset == 'bam' or dataset == 'coco': n2v_init_path = utils.get_net2vec_path( base_folder, seed, specific, module, "resnet" + str(ext) + str(ratio) + ".pt", experiment=experiment.split('_finetuned')[0]) else: n2v_init_path = os.path.join(base_folder, str(seed), experiment.split('_finetuned')[0], module, 'resnet' + ext[:-1] + '.pt') # if we're also doing adversarial, make sure to load the matching n2v as init... if adversarial: ext = "_n2v_" if not nonlinear else "_mlp_" if dataset == 'bam' or dataset == 'coco': n2v_init_path = utils.get_net2vec_path(base_folder, seed, specific, module, "resnet" + str(ext) + str(ratio) + ".pt", experiment='post_train') else: n2v_init_path = os.path.join(base_folder, str(seed), 'post_train', module, 'resnet' + ext[:-1] + '.pt') num_classes = 10 num_attributes = 12 if nonlinear: num_attributes = 2 if multiple: num_attributes = 10 + 9 + 2 * 10 if dataset == 'coco': num_classes = 79 num_attributes = 81 model, net, net_forward, activation_probe = models.load_models( device, lambda x, y, z: models.resnet_(pretrained=True, custom_path=x, device=y, initialize=z, num_classes=num_classes, size=50 if (dataset == 'bam' or dataset == 'coco') else 34), model_path=model_init_path, net2vec_pretrained=True, net2vec_path=n2v_init_path, module=module, num_attributes=num_attributes, # we want to make sure to save the inits if not finetuned... model_init=True if not finetuned else False, n2v_init=True if not (finetuned and (adversarial or (debias_ and n2v_start))) else False, loader=trainloader, nonlinear=nonlinear, # parameters if we want to initially project probes to have a certain amount of bias partial_projection=partial_projection, t=gamma) print(model_init_path, n2v_init_path) model_n2v_combined = models.ProbedModel(model, net, module, switch_modes=switch_modes) if n2v_adam: combined_optim = torch.optim.Adam( [{ 'params': model_n2v_combined.model.parameters() }, { 'params': model_n2v_combined.net.parameters() }], lr=n2v_lr) # TODO: allow for momentum training as well n2v_optim = torch.optim.Adam(net.parameters(), lr=n2v_lr) else: combined_optim = torch.optim.SGD( [{ 'params': model_n2v_combined.model.parameters() }, { 'params': model_n2v_combined.net.parameters(), 'lr': combined_n2v_lr, 'momentum': n2v_momentum }], lr=model_lr, momentum=model_momentum) # TODO: allow for momentum training as well n2v_optim = torch.optim.SGD(net.parameters(), lr=n2v_lr, momentum=n2v_momentum) model_optim = torch.optim.SGD(model.parameters(), lr=model_lr, momentum=model_momentum) d_losses = [] adv_losses = [] n2v_train_losses = [] n2v_accs = [] n2v_val_losses = [] class_train_losses = [] class_accs = [] class_val_losses = [] alpha_log = [] magnitudes = [] magnitudes2 = [] unreduced = [] bias_grads = [] loss_shapes = [] loss_shapes2 = [] results = { "debias_losses": d_losses, "n2v_train_losses": n2v_train_losses, "n2v_val_losses": n2v_val_losses, "n2v_accs": n2v_accs, "class_train_losses": class_train_losses, "class_val_losses": class_val_losses, "class_accs": class_accs, "adv_losses": adv_losses, "alphas": alpha_log, "magnitudes": magnitudes, "magnitudes2": magnitudes2, "unreduced": unreduced, "bias_grads": bias_grads, "loss_shapes": loss_shapes, "loss_shapes2": loss_shapes2 } if debias_: results_end = str(ratio) + "_debias.pck" elif adversarial: results_end = str(ratio) + "_adv.pck" if nonlinear: results_end = str(ratio) + "_mlp_adv.pck" else: results_end = str(ratio) + "_base.pck" if dataset == 'bam' or dataset == 'coco': results_path = utils.get_net2vec_path( results_folder, seed, specific, module, results_end, experiment if experiment2 is None else experiment2) else: results_path = os.path.join( results_folder, str(seed), experiment if experiment2 is None else experiment2, module, results_end) if debias_: model_end = "resnet_debias_" + str(ratio) + '.pt' n2v_end = "resnet_n2v_debias_" + str(ratio) + '.pt' elif adversarial: if not nonlinear: model_end = "resnet_adv_" + str(ratio) + '.pt' else: model_end = "resnet_adv_nonlinear_" + str(ratio) + '.pt' if not nonlinear: n2v_end = "resnet_n2v_adv_" + str(ratio) + '.pt' else: n2v_end = "resnet_mlp_adv_" + str(ratio) + '.pt' else: model_end = "resnet_base_" + str(ratio) + '.pt' n2v_end = "resnet_n2v_base_" + str(ratio) + '.pt' if dataset != 'bam' and dataset != 'coco': model_end = model_end.replace('_' + str(ratio), '') n2v_end = n2v_end.replace('_' + str(ratio), '') if dataset == 'bam' or dataset == 'coco': model_path, n2v_path = utils.get_paths( base_folder, seed, specific, model_end=model_end, n2v_end=n2v_end, n2v_module=module, experiment=experiment if experiment2 is None else experiment2, with_n2v=True, ) else: model_path = os.path.join( base_folder, str(seed), experiment if experiment2 is None else experiment2, module, model_end) n2v_path = os.path.join( base_folder, str(seed), experiment if experiment2 is None else experiment2, module, n2v_end) if hasattr(trainloader.dataset, 'idx_to_class'): for key in trainloader.dataset.idx_to_class: if specific is not None and trainloader.dataset.idx_to_class[ key] in specific: specific_idx = int(key) else: specific_idx = 0 train_labels = None if not nonlinear else [-2, -1] d_last = 0 resize = constant_resize or adaptive_resize if imagenet: imagenet_trainloaders, _ = dataload.get_imagenet_tz( './datasets/imagenet', workers=8, train_batch_size=train_batch_size // 8, resize=resize, constant=constant_resize) imagenet_trainloader = dataload.process_imagenet_loaders( imagenet_trainloaders) params = list(model_n2v_combined.parameters())[:-2] init_alpha = alpha last_e = 0 # setup training criteria if dataset == 'coco': object_weights = torch.FloatTensor( trainloader.dataset.getObjectWeights()) gender_weights = torch.FloatTensor( trainloader.dataset.getGenderWeights()) all_weights = torch.cat([object_weights, gender_weights]) probe_criterion = nn.BCEWithLogitsLoss(weight=all_weights.to(device), reduction='elementwise_mean') downstream_criterion = nn.BCEWithLogitsLoss( weight=object_weights.to(device), reduction='elementwise_mean') else: probe_criterion = None downstream_criterion = nn.CrossEntropyLoss() for e in range(n_epochs): # save results every epoch... with open(results_path, 'wb') as f: print("saving results", e) print(results_path) pickle.dump(results, f) model.eval() with torch.no_grad(): n2v_acc, n2v_val_loss = utils.net2vec_accuracy( testloader, net_forward, device, train_labels) n2v_accs.append(n2v_acc) n2v_val_losses.append(n2v_val_loss) if dataset != 'coco': class_acc, class_val_loss = utils.classification_accuracy( testloader, model, device) class_accs.append(class_acc) class_val_losses.append(class_val_loss) else: f1, mAP = utils.detection_results(testloader, model, device) print("Epoch", e, "| f1:", f1, "| mAP:", mAP) class_accs.append([f1, mAP]) d_initial = 0 if not adversarial: curr_W = net.weight.data.clone() if not multiple: vg = get_vg(curr_W).reshape(-1, 1) d_initial = debias.debias_loss(curr_W[:-2], vg, t=0).item() print("Epoch", e, "bias", str(d_initial), " | debias: ", debias_) else: ds = np.zeros(10) for i in range(10): if i == 0: vg = (curr_W[10, :] - curr_W[11, :]).reshape(-1, 1) else: vg = (curr_W[20 + i, :] - curr_W[29 + i, :]).reshape( -1, 1) ds[i] = debias.debias_loss(curr_W[:10], vg, t=0).item() print("Epoch", e, "bias", ds, " | debias: ", debias_) print("Accuracies:", n2v_acc) d_initial = ds[0] else: print("Epoch", e, "Adversarial", n2v_accs[-1]) if adaptive_alpha and (e == 0 or ((d_last / d_initial) >= (5 / 2**(e - 1)) or (0.8 < (d_last / d_initial) < 1.2))): #alpha = alpha old_alpha = alpha # we don't want to increase too much if it's already decreasing if (e == 0 or (d_last / d_initial) >= (5 / 2**(e - 1))): alpha = min( alpha * 2, (15 / (2**e)) / (d_initial + 1e-10) ) # numerical stability just in case d_initial gets really low #if e > 0 and old_alpha >= alpha: # alpha = old_alpha # don't update if we're decreasing... print("Option 1") if e > 0 and alpha < old_alpha: # we want to increase if plateaud alpha = max( old_alpha * 1.5, alpha ) # numerical stability just in case d_initial gets really low print("Option 2") # don't want to go over 1000... if alpha > 1000: alpha = 1000 d_last = d_initial elif not adaptive_alpha and not constant_alpha: if dataset == 'coco' and jump_alpha: if e < 2: alpha = 5e3 elif e >= 2 and e < 4: alpha = 1e4 else: alpha = init_alpha elif jump_alpha and (e - last_e) > 2: if not mean_debias: if alpha < 100: alpha = min(alpha * 2, 100) last_e = e else: # two jumps # if (e-last_e) >= ((n_epochs - last_e) // 2): # alpha = 1000 # else: alpha = 1000 else: if alpha < 1000: alpha = min(alpha * 2, 1000) last_e = e else: alpha = 10000 elif linear_alpha and (e - last_e) > 2: if alpha < 100: alpha = min(alpha * 2, 100) last_e = e else: alpha += (1000 - 100) / (n_epochs - last_e) elif not jump_alpha and not linear_alpha: if (e + 1) % 3 == 0: # apply alpha schedule? # alpha = min(alpha * 1.2, max(init_alpha,1000)) alpha = alpha * 1.5 alpha_log.append(alpha) print("Current Alpha:,", alpha) if save_every and e % 10 == 0 and e > 0 and seed == 0 and debias_: torch.save(net.state_dict(), n2v_path.split('.pt')[0] + '_' + str(e) + '.pt') torch.save(model.state_dict(), model_path.split('.pt')[0] + '_' + str(e) + '.pt') if reset and (e + 1) % reset_counter == 0 and e > 0: print("resetting") net, net_forward, activation_probe = net2vec.create_net2vec( model, module, num_attributes, device, pretrained=False, initialize=True, nonlinear=nonlinear) n2v_optim = torch.optim.SGD(net.parameters(), lr=n2v_lr, momentum=n2v_momentum) model.train() ct = 0 for X, y, genders in trainloader: ids = None ##### Part 1: Update the Embeddings ##### model_optim.zero_grad() n2v_optim.zero_grad() labels = utils.merge_labels(y, genders, device) logits = net_forward(X.to(device), switch_modes=switch_modes) # Now actually update net2vec embeddings, making sure to use the same batch if train_labels is not None: if logits.shape[1] == labels.shape[1]: logits = logits[:, train_labels] labels = labels[:, train_labels] shapes = [] shapes2 = [] if dataset == 'coco': prelim_loss = probe_criterion(logits, labels) else: prelim_loss, ids = utils.balanced_loss(logits, labels, device, 0.5, ids=ids, multiple=multiple, specific=specific_idx, shapes=shapes) #print("prelim_loss:", prelim_loss.item()) prelim_loss.backward() # we don't want to update these parameters, just in case model_optim.zero_grad() n2v_train_losses.append(prelim_loss.item()) n2v_optim.step() try: magnitudes.append( torch.norm(net.weight.data, dim=1).data.cpu().numpy()) except: pass ##### Part 2: Update Conv parameters for classification ##### model_optim.zero_grad() n2v_optim.zero_grad() class_logits = model(X.to(device)) class_loss = downstream_criterion(class_logits, y.to(device)) class_train_losses.append(class_loss.item()) if debias_: W_curr = net.weight.data vg = get_vg(W_curr).reshape(-1, 1) unreduced.append( debias.debias_loss(W_curr[:-2], vg, t=0, unreduced=True).data.cpu().numpy()) loss = class_loss #### Part 2a: Debias Loss if debias_: model_optim.zero_grad() n2v_optim.zero_grad() labels = utils.merge_labels(y, genders, device) o = net.weight.clone() combined_optim.zero_grad() with higher.innerloop_ctx(model_n2v_combined, combined_optim) as (fn2v, diffopt_n2v): models.update_probe(fn2v) logits = fn2v(X.to(device)) if dataset == 'coco': prelim_loss = probe_criterion(logits, labels) else: prelim_loss, ids = utils.balanced_loss( logits, labels, device, 0.5, ids=ids, multiple=False, specific=specific_idx, shapes=shapes2) diffopt_n2v.step(prelim_loss) weights = list(fn2v.parameters())[-2] vg = get_vg(weights).reshape(-1, 1) d_loss = debias.debias_loss(weights[:-2], vg, t=gamma, norm=norm, mean=mean_debias) # only want to save the actual bias... d_losses.append(d_loss.item()) grad_of_grads = torch.autograd.grad( alpha * d_loss, list(fn2v.parameters(time=0))[:-2], allow_unused=True) del prelim_loss del logits del vg del fn2v del diffopt_n2v #### Part 2b: Adversarial Loss if adversarial: logits = net_forward( None, forward=True)[:, -2:] # just use activation probe labels = genders.type(torch.FloatTensor).reshape( genders.shape[0], -1).to(device) adv_loss, _ = utils.balanced_loss(logits, labels, device, 0.5, ids=ids, stable=True) adv_losses.append(adv_loss.item()) # getting too strong, let it retrain... if adv_loss < 2: adv_loss = -beta * adv_loss loss += adv_loss loss.backward() if debias_: # custom backward to include the bias regularization.... max_norm_grad = -1 param_idx = -1 for ii in range(len(grad_of_grads)): if (grad_of_grads[ii] is not None and params[ii].grad is not None and torch.isnan(grad_of_grads[ii]).long().sum() < grad_of_grads[ii].reshape(-1).shape[0]): # just in case some or nan for some reason? not_nan = ~torch.isnan(grad_of_grads[ii]) params[ii].grad[not_nan] += grad_of_grads[ii][not_nan] if grad_of_grads[ii][not_nan].norm().item( ) > max_norm_grad: max_norm_grad = grad_of_grads[ii][not_nan].norm( ).item() param_idx = ii bias_grads.append((param_idx, max_norm_grad)) # undo the last step and apply a smaller alpha to prevent stability issues if not no_limit and ((not mean_debias and max_norm_grad > 100) or (mean_debias and max_norm_grad > 100)): for ii in range(len(grad_of_grads)): if (grad_of_grads[ii] is not None and params[ii].grad is not None and torch.isnan(grad_of_grads[ii]).long().sum() < grad_of_grads[ii].reshape(-1).shape[0]): # just in case some or nan for some reason? not_nan = ~torch.isnan(grad_of_grads[ii]) params[ii].grad[not_nan] -= grad_of_grads[ii][ not_nan] # scale accordingly # params[ii].grad[not_nan] += grad_of_grads[ii][not_nan] / max_norm_grad loss_shapes.append(shapes) loss_shapes2.append(shapes2) model_optim.step() #magnitudes2.append( # torch.norm(net.weight.data, dim=1).data.cpu().numpy() #) ct += 1 # save results every epoch... with open(results_path, 'wb') as f: print("saving results", e) print(results_path) pickle.dump(results, f) torch.save(net.state_dict(), n2v_path) torch.save(model.state_dict(), model_path)
def train_main(trainloader, testloader, device, seed, specific=None, p=0.5, n_epochs=5, lr=0.1, experiment="", out_file=None, base_folder="", dataset="bam", parallel=False, gpu_ids=[], linear_only=False): if out_file is not None: f = open(out_file, 'a') else: f = None print("Downstream Training | Ratio: " + str(p) + " | lr = " + str(lr), file=f) num_classes = 10 if dataset == 'coco': num_classes = 79 model = models.resnet_(pretrained=True, custom_path=os.path.join(base_folder, str(seed), "resnet_init.pt"), device=device, num_classes=num_classes, initialize=True, size=50 if (dataset == 'bam' or dataset == 'coco') else 34, linear_only=linear_only) if parallel: model = nn.DataParallel(model, device_ids=gpu_ids) optim = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9) def scaler(epoch): return 0.75**(epoch // 10) scheduler = torch.optim.lr_scheduler.LambdaLR( optim, lr_lambda=scaler if dataset == 'coco' else (lambda epoch: 0.95**epoch)) start = time.time() criterion = nn.CrossEntropyLoss() best_f1 = 0 if dataset == 'coco': object_weights = torch.FloatTensor( trainloader.dataset.getObjectWeights()) criterion = nn.BCEWithLogitsLoss(weight=object_weights.to(device), reduction='elementwise_mean') for e in range(n_epochs): if dataset != 'coco': with torch.no_grad(): acc = utils.classification_accuracy(testloader, model, device) print("Epoch:", e, "| acc:", acc, file=f) else: with torch.no_grad(): f1, mAP = utils.detection_results(testloader, model, device) print("Epoch:", e, "| f1:", f1, '| mAP:', mAP, '| lr:', scheduler.get_lr(), file=f) if f1 > best_f1: save_file = utils.get_model_path(base_folder, seed, specific, 'resnet_best' + str(p) + '_{}_{}.pt'.format(f1, mAP), experiment=experiment) best_f1 = f1 torch.save(model.state_dict(), save_file) model.train() for X, y, color in trainloader: optim.zero_grad() loss = criterion(model(X.to(device)), y.to(device)) loss.backward() optim.step() scheduler.step() end = time.time() print(start - end) if dataset == 'bam' or dataset == 'coco': if dataset == 'coco': with torch.no_grad(): f1, mAP = utils.detection_results(testloader, model, device) # print("final", utils.classification_accuracy(testloader, model, device), file=f) save_file = utils.get_model_path(base_folder, seed, specific, 'resnet_' + str(p) + '_{}_{}.pt'.format(f1, mAP), experiment=experiment) else: save_file = os.path.join(base_folder, str(seed), experiment, 'resnet.pt') torch.save(model.state_dict(), save_file) if f is not None: f.close()
def train(pre_trained=None): # create folder to save models and loss graphs reference = hp['net_type'] + str(time.strftime("_%Y%m%d_%H%M%S")) checkpoints_folder = hp["output_dir"] + '/checkpoints/' + reference os.makedirs(checkpoints_folder, exist_ok=True) # save hyper parameter settings pickle_file_location = checkpoints_folder + "/hp.pkl" pickle_file = open(pickle_file_location, "wb") pickle.dump(hp, pickle_file) pickle_file.close() # create data iterator train_data_set = DataGenerator(hp) iterator = DataLoader(dataset=train_data_set, batch_size=hp['batch_size'], num_workers=hp['num_workers'], pin_memory=True, shuffle=False, drop_last=True) val_set = ValidationDataGenerator(hp) val_set_iterator = DataLoader(dataset=val_set, batch_size=50, num_workers=hp['num_workers'], pin_memory=True, shuffle=False, drop_last=True) # create model and loss model = ConvNet().to(device) loss = CrossEntropyLoss().to(device) # optimizer optimizer = torch.optim.Adam(params=model.parameters(), lr=hp['learning_rate']) start_epoch = 0 # load pre trained model if pre_trained is not None: ckpt = torch.load(pre_trained) model.load_state_dict(ckpt['net']) optimizer.load_state_dict(ckpt['opt']) start_epoch = ckpt['epoch'] + 1 # init loss arrays classification_loss = np.zeros(hp['num_epochs']) train_accuracy = np.zeros(hp['num_epochs']) val_accuracy = np.zeros(hp['num_epochs']) # training loop for epoch in range(start_epoch, hp['num_epochs']): c_loss = 0 acc = 0 for i, (img, label) in enumerate(iterator): img = img.to(device, dtype=torch.float) label = label.to(device, dtype=torch.float) optimizer.zero_grad() logits = model(img) l = loss(logits, label.long()) l.backward() optimizer.step() c_loss += l.item() # calc accuracy logits = logits.detach().cpu().numpy() label = label.detach().cpu().numpy() acc += utils.classification_accuracy(logits, label) print("epoch = {}, Training_sample={}, classification loss ={}". format(epoch, i, l.item())) # average loss per epoch classification_loss[epoch] = c_loss / (i + 1) # average accuracy per epoch train_accuracy[epoch] = acc / (i + 1) print("epoch = {}, average classification loss ={}".format( epoch, classification_loss[epoch])) print("epoch = {}, Training accuracy ={}".format( epoch, train_accuracy[epoch])) with torch.no_grad(): val_acc = 0 for i, (img, label) in enumerate(val_set_iterator): img = img.to(device, dtype=torch.float) label = label.to(device, dtype=torch.float) logits = model(img) # calc accuracy logits = logits.detach().cpu().numpy() label = label.detach().cpu().numpy() val_acc += utils.classification_accuracy(logits, label) val_accuracy[epoch] = val_acc / (i + 1) print("epoch = {}, Validation set accuracy ={}".format( epoch, val_accuracy[epoch])) # plot accuracy curves and save model plt.plot(range(1, len(train_accuracy) + 1), train_accuracy, 'b-', label=" Train Accuracy") plt.plot(range(1, len(val_accuracy) + 1), val_accuracy, 'r-', label="Validation Accuracy") plt.xlabel("epochs") plt.ylabel("accuracy") plt.legend(loc='best') plt.savefig(checkpoints_folder + "/accuracy.jpeg", bbox_inches="tight") plt.clf() net_save = { 'net': model.state_dict(), 'opt': optimizer.state_dict(), 'epoch': epoch } torch.save( net_save, checkpoints_folder + "/convnet_ethiopian_mnist_epoch{}.pth".format(epoch))
dot_data = tree.export_graphviz(dt, out_file=None, feature_names=X_test.columns) graph = graphviz.Source(dot_data) graph.render("post-pruning-" + target_column) except: print( 'if you wish to render the pruning graphs please install graphviz') return svc, dt, bt, knn, ANN, X_train, y_train, X_test, y_test # read csv heart = pd.read_csv('./data/heart.csv') bank = pd.read_csv('./data/bank.csv', sep=';') # main areas heart_final, heart_scalers = preprocess_heart_data(heart) bank_final, heart_scalers = preprocess_bank_data(bank) SVC, DecisionTree, BoostedTrees, KNN, ANN, X_train, y_train, X_test, y_test = classifer_creation( heart_final, 'target') classification_accuracy(SVC, DecisionTree, BoostedTrees, KNN, ANN, X_train, y_train, X_test, y_test) print( '==========================================new area==========================================' ) SVC, DecisionTree, BoostedTrees, KNN, ANN, X_train, y_train, X_test, y_test = classifer_creation( bank_final, 'y') classification_accuracy(SVC, DecisionTree, BoostedTrees, KNN, ANN, X_train, y_train, X_test, y_test)
def train_pipeline(X_train, y_train, X_valid, y_valid, X_test, y_test, param): # data n_data, n_rows, n_cols, n_channels = X_train.shape oh_y_train = utils.one_hot_encode(y_train) # network structure and prediction net = network.TrafficSignNet(param) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # Ensures that we execute the update_ops before performing the train_step #train_op = tf.train.AdamOptimizer(learning_rate=param._learning_rate).minimize(net._loss) train_op = tf.train.RMSPropOptimizer( learning_rate=param._learning_rate, momentum=param._momentum).minimize(net._loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # train n_batches = n_data // param._batch_size best_valid = 0 best_valid_test = 0 best_test = 0 best_test_valid = 0 for epoch in range(param._n_epochs): # re-autment data periodically if epoch > 0 and epoch % param._aug_data_period == 0: print('re-augment data') X_train, y_train, oh_y_train = gen_new_train(param) indices = shuffle(np.array(range(n_data))) # training epoch_loss = 0 for batch in range(n_batches): bstart = batch * param._batch_size bend = (batch + 1) * param._batch_size batch_indices = indices[bstart:bend] X_batch = X_train[batch_indices] y_batch = oh_y_train[batch_indices] _, loss = sess.run([train_op, net._loss], feed_dict={ net._X: X_batch, net._y: y_batch, net._is_training: True }) epoch_loss += loss epoch_loss /= n_batches # turn off traning flag to calculate predictions if epoch % param._valid_period == 0 or epoch == param._n_epochs - 1: # validation preds_valid = sess.run(net._preds, { net._X: X_valid, net._is_training: False }) valid_accuracy = utils.classification_accuracy( y_valid, preds_valid) # test preds_test = sess.run(net._preds, { net._X: X_test, net._is_training: False }) test_accuracy = utils.classification_accuracy(y_test, preds_test) #preds_test1 = classify(sess, X_test, net, param) #test_accuracy1 = utils.classification_accuracy(y_test, preds_test1) if valid_accuracy > best_valid: best_valid = valid_accuracy best_valid_test = test_accuracy saver.save(sess, param._model_fname) if test_accuracy > best_test: best_test = test_accuracy best_test_valid = valid_accuracy print('epoch: ', epoch, ' loss: ', epoch_loss, ' valid accuracy: ', valid_accuracy, \ ' test accuracy: ', test_accuracy) #, 'test accuracy1: ', test_accuracy1) else: print('epoch: ', epoch, ' loss: ', epoch_loss) sess.close() print('best valid: ', best_valid, ' best valid test: ', best_valid_test) print('best test: ', best_test, ' best_test_valid: ', best_test_valid)
train_images, train_labels = utils.load_training_data( "cifar-10-batches-py") # 50000,3072 test_images, test_labels = utils.load_test_data( "cifar-10-batches-py") # 10000,3072 mean_channel_train_images = utils.cifar_10_color(train_images) # 50000,3 mean_channel_test_images = utils.cifar_10_color(test_images) # 10000,3 # task 1 tic = time.time() mu, sigma, prior = utils.naive_bayes_learn(mean_channel_train_images, train_labels) # 10x3 10x3 10x1 prediction = utils.cifar10_classifier_naivebayes(mean_channel_test_images, mu, sigma, prior) accuracy = utils.classification_accuracy(prediction, test_labels) print("naive bayes accuracy = {}".format(accuracy)) toc = time.time() print("Time taken for Naive Bayes = ", toc - tic) print("----------------------------------") # # task 2 tic = time.time() mu, covariance, prior = utils.bayes_learn(mean_channel_train_images, train_labels) prediction = utils.cifar10_classifier_bayes(mean_channel_test_images, mu, covariance, prior) accuracy = utils.classification_accuracy(prediction, test_labels) print("Bayes accuracy = {}".format(accuracy)) toc = time.time() print("Time taken for Bayes = ", toc - tic)
def loss(self): model = self.inference loss = digits.classification_loss(model, self.y) accuracy = digits.classification_accuracy(model, self.y) self.summaries.append(tf.summary.scalar(accuracy.op.name, accuracy)) return loss