def run_experiments(dataset, net_type="large", optimizer="sgd", initial_lr=2e-3, batch_size=100, num_epochs=100, num_exps=20, num_repts=5, l2_reg=200e-4, bias_l2_reg=None, curriculum="None", sorted_indices=None, data_augmentation=False, comp_grads=False): cache_file = os.path.join(dataset.data_path, 'data.pkl') (x_train, cls_train, y_train), (x_test, cls_test, y_test) = dataset.load_data_cache(cache_file) # x_train = x_train.astype('float32') # x_train /= 255 x_train = (x_train - 128.) / 128 # x_test = x_test.astype('float32') x_test = (x_test - 128.) / 128 if data_augmentation == "True": datagen = DataGenerator( featurewise_center=True, # set input mean to 0 over the dataset # rescale=1. / 255, samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= True, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 20, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True # randomly flip images ) else: print("No Aug") datagen = DataGenerator() index_array = np.arange(len(x_train)) datagen.subset_index_array = index_array datagen.sorted_indices = sorted_indices datagen.steps_per_epoch = len(x_train) / batch_size datagen.num_classes = dataset.num_classes datagen.curriculum = False datagen.x_test = x_test datagen.y_test = y_test # print(sorted_indices.shape) x_train = x_train[sorted_indices] # print(x_train.shape) # exit() y_train = y_train[sorted_indices] datagen.x_train = x_train datagen.y_train = y_train if curriculum != "None": # curriculum # x_train = x_train[sorted_indices] # y_train = y_train[sorted_indices] datagen.curriculum = True datagen.curriculum_schedule = get_curriculum_schedule(len(x_train)) if comp_grads: comp_grads = 'comp_grads/' else: comp_grads = '' # results_path = os.path.join(dataset.results_path, # net_type + "_acc8/" + comp_grads + optimizer + "/" + str(initial_lr) + "/" + str( # l2_reg) + "/" + curriculum + "/") results_path = os.path.join( dataset.results_path, net_type + "/" + comp_grads + optimizer + "2/" + str(initial_lr) + "/" + str(l2_reg) + "/" + curriculum + "/") if not os.path.exists(results_path): os.makedirs(results_path) print(results_path) reduce_lr = LearningRateScheduler(get_lr_scheduler(initial_lr)) train_acc = TrainHistory(datagen) if "subset" in dataset.data_path: dataset.data_path = subset_model_path ###################################################################### testing with open(os.path.join(dataset.data_path, 'svm_results.pkl'), mode='rb') as file: prob_estimates, preds_svm, _, _, _, _ = pickle.load(file) ###################################################################### testing end for exp in range(0, num_exps): grads_history1 = defaultdict(list) grads_history2 = defaultdict(list) print("Experiment ", exp) results_path_ = os.path.join(results_path, "exp{0}/".format(exp)) if not os.path.exists(results_path_): os.makedirs(results_path_) for rpt in range(num_repts): print("Rept. ", rpt) old_model = load_model( os.path.join( dataset.data_path, 'model/' + net_type + '/model_init_{0}.h5'.format(exp))) model = create_model(net_type=net_type, n_classes=dataset.num_classes, reg_factor=l2_reg, bias_reg_factor=bias_l2_reg) model.set_weights(old_model.get_weights()) # K.set_value(model.l2_reg, K.cast_to_floatx(100e-3)) # K.set_value(model.l2_bias_reg, K.cast_to_floatx(100e-3)) if optimizer == "adam": opt = keras.optimizers.adam(lr=initial_lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) elif optimizer == "sgd": opt = keras.optimizers.sgd(lr=initial_lr) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) model.summary() listOfVariableTensors = model.trainable_weights gradients_fetcher = create_gradients_fetcher( model, listOfVariableTensors) # ###################################################################### testing # history = model.fit(x=x_train, y=y_train, # batch_size=batch_size, # epochs=num_epochs, # verbose=2, # sample_weight=[], # validation_data=(x_test, y_test), # callbacks=[train_acc, reduce_lr], # steps_per_epoch=len(x_train) / batch_size) # ###################################################################### testing end loss, acc = model.evaluate(x_train, y_train, batch_size=100, verbose=1) val_loss, val_acc = model.evaluate(x_test, y_test, batch_size=100, verbose=1) train_acc.datagen.history = defaultdict(list) train_acc.datagen.history[0] = { 'acc': acc, 'val_acc': val_acc, 'val_loss': val_loss, 'loss': loss } print({ 'acc': acc, 'val_acc': val_acc, 'val_loss': val_loss, 'loss': loss }) if comp_grads: num_batches = np.int(len(x_train) / batch_size) for e in range(num_epochs + 1): print("Epoch #", e) # (euc_dist, cos_dist, sim, rad, angle) = grads_comp_update(model, x_train, y_train, batch_size) # # grads_history1[(rpt, e, i)].append((euc_dist, cos_dist, sim, rad, angle)) # # # grads_full = gradients_fetcher(x_train, y_train, batch_size=1) grads1 = gradients_fetcher(x_train, y_train, batch_size=len(x_train)) #2500) for i in range(len(x_train)): grads2 = grads_full[i] euc_dist = np.sum(np.subtract(grads1, grads2)**2) cos_dist = scipy.spatial.distance.cosine( grads1, grads2) rad = 1 - cos_dist sim = 1 - cos_dist angle = np.degrees(np.arccos(rad)) grads_history1[(rpt, e, i)].append( (euc_dist, cos_dist, sim, rad, angle)) #print((euc_dist, cos_dist, sim, rad, angle)) for b in range(num_batches): grads2 = gradients_fetcher( x_train[b * batch_size:(b + 1) * batch_size], y_train[b * batch_size:(b + 1) * batch_size], batch_size=batch_size) euc_dist = np.sum(np.subtract(grads1, grads2)**2) cos_dist = scipy.spatial.distance.cosine( grads1, grads2) rad = 1 - cos_dist sim = 1 - cos_dist angle = np.degrees(np.arccos(rad)) grads_history2[(rpt, e, b)].append( (euc_dist, cos_dist, sim, rad, angle)) #print((euc_dist, cos_dist, sim, rad, angle)) # grads_history1[(rpt, e)] = gradients_fetcher(x_train, y_train, batch_size=1) # grads1 = gradients_fetcher(x_train, y_train, batch_size=2500) # grads_history2[(rpt, e,num_batches)].append(grads1) # for b in range(num_batches): # grads2 = gradients_fetcher(x_train[b * batch_size:(b + 1) * batch_size], # y_train[b * batch_size:(b + 1) * batch_size], batch_size=batch_size) # grads_history2[(rpt, e, num_batches)].append(grads2) # euc_dist = np.sum(np.subtract(grads1, grads2) ** 2) # cos_dist = scipy.spatial.distance.cosine(grads1, grads2) # print("samples batch #{0}: dist= {1} cos-sim={2}".format(b, euc_dist, cos_dist)) # print("samples {0}: ".format(b), 1-scipy.spatial.distance.cosine(grads1, grads2)) # grads_history[(i,b)].append((euc_dist, 1-cos_dist)) # grads1 = gradients_fetcher(x_train, y_train, batch_size=2500)#np.sum(grads_history[e], axis=0) # print(grads_history[e].shape) # print(grads1.shape) # # for i in range(len(x_train)): # grads2 = grads_history[e][i] # euc_dist = np.sum(np.subtract(grads1, grads2) ** 2) # cos_dist = scipy.spatial.distance.cosine(grads1, grads2) # # print("sample #{0}: dist= {1} cos-sim={2} ".format(i, euc_dist, cos_dist)) # angle = math.degrees(math.acos(cos_dist)) # print("angle = {0}".format(angle)) # # batch_grad = grads_history[e] # # grads2 = gradients_fetcher(x_train[b * batch_size:(b + 1) * batch_size], # y_train[b * batch_size:(b + 1) * batch_size], batch_size=batch_size) # # euc_dist = np.sum(np.subtract(grads1, grads2) ** 2) # cos_dist = scipy.spatial.distance.cosine(grads1, grads2) # print("samples batch #{0}: dist= {1} cos-sim={2}".format(b, euc_dist, 1-cos_dist)) # # print("samples {0}: ".format(b), 1-scipy.spatial.distance.cosine(grads1, grads2)) # # grads1 = gradients_fetcher(x_train, y_train, batch_size=2500) # print("DIFF ", np.sum(np.abs(np.sum(grads_history[e], axis=0) - np.sum(grads1, axis=0)))) # grads1 = gradients_fetcher(x_train[:100], y_train[:100], batch_size=100) # grads2 = gradients_fetcher(x_train[:100], y_train[:100], batch_size=10) # grads3 = gradients_fetcher(x_train[:100], y_train[:100], batch_size=1) # grads4 = gradients_fetcher(x_train[:100], y_train[:100], batch_size=1) # print(np.sum(grads1,axis=0).shape) # print(np.sum(grads2,axis=0).shape) # print("DIFF ", np.sum(np.abs(np.sum(grads1,axis=0) - np.sum(grads2,axis=0)))) # print("DIFF ", np.sum(np.abs(np.sum(grads3) - np.sum(grads4)))) # print("DIFF ", np.sum(np.abs(np.sum(grads1) - np.sum(grads3)))) # grads2 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=1) # grads4 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=1) # # # grads_history[e] = gradients_fetcher(x_train[:100], y_train[:100], batch_size=1) # grads1 = gradients_fetcher(x_train[:100], y_train[:100], batch_size=10) # print(np.sum(np.sqrt((grads1 - np.sum(grads_history[e], axis=0))**2))) # grads1 = gradients_fetcher(x_train, y_train, batch_size=2500) # for b in range(np.int(len(x_train) / batch_size)): # # grads2 = gradients_fetcher(x_train[b * batch_size:(b + 1) * batch_size], # y_train[b * batch_size:(b + 1) * batch_size], batch_size=batch_size) # euc_dist = np.sum(np.subtract(grads1, grads2) ** 2) # cos_dist = scipy.spatial.distance.cosine(grads1, grads2) # print("samples batch #{0}: dist= {1} cos-sim={2}".format(b, euc_dist, cos_dist)) # print("samples {0}: ".format(b), 1-scipy.spatial.distance.cosine(grads1, grads2)) # grads_history[(i,b)].append((euc_dist, 1-cos_dist)) history = model.fit( x=x_train, y=y_train, batch_size=100, # batch_size, epochs=1, verbose=2, sample_weight=[], validation_data=(x_test, y_test), callbacks=[train_acc]) # grads_history1[(rpt, num_epochs)] = gradients_fetcher(x_train, y_train, batch_size=1) # grads1 = gradients_fetcher(x_train, y_train, batch_size=2500) # grads_history2[(rpt, num_epochs, num_batches)].append(grads1) # for b in range(num_batches): # grads2 = gradients_fetcher(x_train[b * batch_size:(b + 1) * batch_size], # y_train[b * batch_size:(b + 1) * batch_size], batch_size=batch_size) # grads_history2[(rpt, num_epochs, num_batches)].append(grads2) # with open(os.path.join(results_path_, "gradsHistoryDict1_{0}".format(rpt)), 'wb') as file_pi: # pickle.dump(grads_history1, file_pi, protocol=4) # with open(os.path.join(results_path_, "gradsHistoryDict2_{0}".format(rpt)), 'wb') as file_pi: # pickle.dump(grads_history2, file_pi, protocol=4) else: history = model.fit_generator( generator=datagen.flow(x=x_train, y=y_train, batch_size=batch_size, shuffle=True, seed=seed), steps_per_epoch=len(x_train) / batch_size, epochs=num_epochs, verbose=2, validation_data=(x_test, y_test), callbacks=[train_acc, reduce_lr], workers=4) # print(x_train.shape) # grads1 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=10) # grads3 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=10) # grads2 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=1) # grads4 = gradients_fetcher(x_train[:10], y_train[:10], batch_size=1) # # print(np.std(grads2)) # print(np.sum(np.sqrt((grads1 - np.mean(grads2, axis=0))**2))) # # print(np.sum((grads1 - np.mean(grads2[:100], axis=0)) ** 2)) # # print(np.sum((grads1 - np.mean(grads2[-100:], axis=0)) ** 2)) # print(np.sum(np.abs(grads1 - np.mean(grads2, axis=0)))) # exit() model.save( os.path.join(results_path_, "model_trained{0}.h5".format(rpt))) with open( os.path.join(results_path_, "trainHistoryDict{0}".format(rpt)), 'wb') as file_pi: pickle.dump(train_acc.datagen.history, file_pi) if comp_grads: with open(os.path.join(results_path_, "gradsHistoryDict1"), 'wb') as file_pi: pickle.dump(grads_history1, file_pi, protocol=4) with open(os.path.join(results_path_, "gradsHistoryDict2"), 'wb') as file_pi: pickle.dump(grads_history2, file_pi, protocol=4) return None