def train(train_data, ep, bz): x, y = data_loader.load(train_data, dim, t) print('Training model ...') model = create_model() model.fit(x, y, epochs=ep, batch_size=bz, verbose=2) # save(directory + 'model/', m_name, model) evaluate(create_model, seed, x, y, t, ep, bz)
def train_model(model, train_dataset, valid_dataset, out_vocab, num_epochs=20): """Pre-trains the model with a subset of the training set to simulate a data donation setup. Args: model: The model to be pre-trained. train_dataset: The training set. valid_set: The dataset on which the model will be evaluated. out_vocab: The output vocabulary num_epochs: Number of rounds the model has to be pre-trained for. """ loss_objective = MaskedLoss() train_loss = tf.keras.metrics.Mean(name='train_loss') optimizer = tf.keras.optimizers.Adam() # training step train_step_signature = [ tf.TensorSpec(shape=(None, None), dtype=tf.int32), tf.TensorSpec(shape=(None, None), dtype=tf.int32), ] @tf.function(input_signature=train_step_signature) def train_step(inputs, outputs): dec_inputs = outputs[:, :-1] dec_target = outputs[:, 1:] padding_mask, look_ahead_mask, pointer_mask = create_masks( inputs, dec_target) with tf.GradientTape() as tape: y_pred = model((inputs, dec_inputs, padding_mask, look_ahead_mask, pointer_mask), training=True) loss = loss_objective(dec_target, y_pred) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) for epoch in range(num_epochs): train_loss.reset_states() for inp, out in train_dataset: train_step(inp, out) print('Epoch {} Loss {:.4f} '.format(epoch + 1, train_loss.result())) print('Validation Metrics :') val_acc = evaluate(model, valid_dataset, out_vocab)
import numpy as np import tensorflow.keras as keras from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 as MobileNet from model_utils import SequentialConstructor, evaluate, pretty_print_history model = SequentialConstructor([ MobileNet(weights='imagenet', include_top=False), GlobalAveragePooling2D(), BatchNormalization(), Dropout(0.5), Dense(512, activation='relu'), Dropout(0.5), Dense(256, activation='relu'), Dropout(0.5) ], output_shape=(2, )) hist = evaluate(model, model_name='mobilenet-binary', train_dir='data/proc/binary/train/224/', num_epochs=10) pretty_print_history(hist)
model = SequentialConstructor([ *Conv2DLayer(filters=96, kernel_size=11, strides=4, batchnorm=True), *Conv2DLayer(filters=256, kernel_size=11,strides=1, batchnorm=True), *Conv2DLayer(filters=384, kernel_size=3, strides=1, pooling=False), *Conv2DLayer(filters=384, kernel_size=3, strides=1, pooling=False), *Conv2DLayer(filters=256, kernel_size=3, strides=1), GlobalAveragePooling2D(), BatchNormalization(), Flatten(), *DenseLayer(2048, activation='relu', dropout=0.4, batchnorm=True), *DenseLayer(1024, activation='relu', dropout=0.4), *DenseLayer(512, activation='relu') ], output_shape=(2,)) hists = [] iteration = 1 num_epochs = 5 while input(f'Train for {str(num_epochs)} more epochs? y/n: ') == 'y': hist = evaluate(model, model_name='olivernet-binary-'+str(iteration*num_epochs), train_dir='data/proc/binary/train/224/', num_epochs=num_epochs) hists.append(hist) iteration += 1 for hist in hists: pretty_print_history(hist) #if input('plot? y/n: ') == 'y': # import matplotlib.pyplot as plt # plt.plot(np.arange(num_epochs), hist['acc'], 'r-') # plt.plot(np.arange(num_epochs), hist['val_acc'], 'b-') # plt.show()
import numpy as np import tensorflow.keras as keras from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization from tensorflow.keras.applications.densenet import DenseNet121 as DenseNet from model_utils import SequentialConstructor, evaluate, pretty_print_history model = SequentialConstructor([ DenseNet(weights='imagenet', include_top=False), GlobalAveragePooling2D(), BatchNormalization(), Dropout(0.5), Dense(512, activation='relu'), Dropout(0.5), Dense(256, activation='relu'), Dropout(0.5) ], output_shape=(2, )) hist = evaluate(model, model_name='densenet121-binary', train_dir='data/proc/binary/train/224/') pretty_print_history(hist)
def cotrain(configs, data, iter_steps=1, train_ratio=0.2, device='cuda:0'): """ cotrain model: params: model_names: model configs data: dataset include train and untrain data save_paths: paths for storing models iter_steps: maximum iteration steps train_ratio: labeled data ratio """ assert iter_steps >= 1 assert len(configs) == 2 train_data, untrain_data = dp.split_dataset( data['train'], seed=args.seed, num_per_class=args.num_per_class) gt_y = data['test'][1] new_train_data = deepcopy(train_data) add_num = 8000 for step in range(iter_steps): pred_probs = [] test_preds = [] add_ids = [] for view in range(2): print('Iter step: %d, view: %d, model name: %s' % (step + 1, view, configs[view].model_name)) configs[view] = adjust_config(configs[view], len(train_data[0]), step) net = models.create(configs[view].model_name).to(device) mu.train(net, new_train_data, configs[view], device) mu.evaluate(net, data['test'], configs[view], device) save_checkpoint( { 'state_dict': net.state_dict(), 'epoch': step + 1, }, False, fpath=os.path.join('logs/cotrain/%s.epoch%d' % (configs[view].model_name, step))) test_preds.append( mu.predict_prob(net, data['test'], configs[view], device)) if len(untrain_data[0]) > configs[view].batch_size: pred_probs.append( mu.predict_prob(net, untrain_data, configs[view], device)) add_ids.append( dp.select_ids(pred_probs[view], train_data, add_num)) # update training data # import pdb;pdb.set_trace() pred_y = np.argmax(sum(pred_probs), axis=1) add_id = np.array(sum(add_ids), dtype=np.bool) fuse_y = np.argmax(sum(test_preds), axis=1) print('Fuse Acc:%0.4f' % np.mean(fuse_y == gt_y)) if args.tricks: new_train_data, _ = dp.update_train_untrain( add_id, train_data, untrain_data, pred_y) add_num += add_num else: if len(untrain_data[0]) < 1: break new_train_data, untrain_data = dp.update_train_untrain( add_id, new_train_data, untrain_data, pred_y)
def main(): """Runs the entire pipelone from loading the data and defining the model to training the model and evaluating the model. """ arg = parse_arguments() in_vocab, slot_vocab, intent_vocab = load_vocab(arg) # Loading data train_in_data, train_slot_data, train_intent_data = load_dataset( arg, arg.train_data_path, in_vocab, slot_vocab, intent_vocab) valid_in_data, valid_slot_data, valid_intent_data = load_dataset( arg, arg.valid_data_path, in_vocab, slot_vocab, intent_vocab) test_in_data, test_slot_data, test_intent_data = load_dataset( arg, arg.test_data_path, in_vocab, slot_vocab, intent_vocab) valid_dataset = tf.data.Dataset.from_tensor_slices( (valid_in_data, valid_slot_data, valid_intent_data)) valid_dataset = valid_dataset.batch(512, drop_remainder=False) test_dataset = tf.data.Dataset.from_tensor_slices( (test_in_data, test_slot_data, test_intent_data)) test_dataset = test_dataset.batch(512, drop_remainder=False) # Generate splits of data for federated simulation ftrain_data = generate_splits(train_in_data, train_slot_data, train_intent_data, arg) ftrain_data = tff.simulation.FromTensorSlicesClientData(ftrain_data) if arg.clients_per_round == -1: arg.clients_per_round = arg.num_clients # Define a non-federated model for checkpointing local_model = create_keras_model(arg, len(in_vocab['vocab']), len(slot_vocab['vocab']), len(intent_vocab['vocab'])) checkpoint_manager, summary_writer = manage_checkpoints(local_model, arg) summary_writer.set_as_default() # Generate a sample dataset raw_example_dataset = ftrain_data.create_tf_dataset_for_client('1') example_dataset = preprocess(raw_example_dataset, arg) server_opt, client_opt = get_optimizers(arg) # Define the federated averaging process iterative_process = tff.learning.build_federated_averaging_process( lambda: create_tff_model( arg, len(in_vocab['vocab']), len(slot_vocab['vocab']), len(intent_vocab['vocab']), example_dataset.element_spec), client_optimizer_fn=client_opt, server_optimizer_fn=server_opt) server_state = iterative_process.initialize() best_validation_acc = 0.0 for round_num in range(1, arg.num_rounds): # Sample a subset of clients to be used for this round client_subset = np.random.choice(arg.num_clients, arg.clients_per_round, replace=False) ftrain_data_subset = make_federated_data(ftrain_data, client_subset, arg) # Perform one round of federated training server_state, metrics = iterative_process.next(server_state, ftrain_data_subset) # Compute and log validation metrics tff.learning.assign_weights_to_keras_model(local_model, server_state.model) semantic_acc, intent_acc, f1_score = evaluate(local_model, valid_dataset, slot_vocab) tf.summary.scalar('Train loss', metrics._asdict()['loss'], step=round_num) tf.summary.scalar('Train Intent Slot Accuracy', metrics._asdict()['intent_slot_accuracy'], step=round_num) tf.summary.scalar('Validation Intent Slot Accuracy', semantic_acc, step=round_num) tf.summary.scalar('Validation f1 Score', f1_score, step=round_num) tf.summary.scalar('Validation Intent Accuracy', intent_acc, step=round_num) # Save the best model so far if semantic_acc > best_validation_acc: best_validation_acc = semantic_acc checkpoint_save_path = checkpoint_manager.save() print('Saving checkpoint for epoch {} at {}'.format( round_num, checkpoint_save_path)) print('round {:2d}, metrics={}'.format(round_num, metrics))
def spaco(configs, data, iter_steps=1, gamma=0, train_ratio=0.2, regularizer='soft'): """ self-paced co-training model implementation based on Pytroch params: model_names: model names for spaco, such as ['resnet50','densenet121'] data: dataset for spaco model save_pathts: save paths for two models iter_step: iteration round for spaco gamma: spaco hyperparameter train_ratio: initiate training dataset ratio """ num_view = len(configs) train_data, untrain_data = dp.split_dataset( data['train'], seed=args.seed, num_per_class=args.num_per_class) add_num = 4000 pred_probs = [] test_preds = [] sel_ids = [] weights = [] start_step = 0 ########### # initiate classifier to get preidctions ########### for view in range(num_view): configs[view] = adjust_config(configs[view], len(train_data[0]), 0) net = models.create(configs[view].model_name).to(view) mu.train(net, train_data, configs[view], device=view) pred_probs.append(mu.predict_prob(net, untrain_data, configs[view], view)) test_preds.append(mu.predict_prob(net, data['test'], configs[view], view)) acc = mu.evaluate(net, data['test'], configs[view], view) save_checkpoint( { 'state_dict': net.state_dict(), 'epoch': 0, }, False, fpath=os.path.join( 'spaco/%s.epoch%d' % (configs[view].model_name, 0))) pred_y = np.argmax(sum(pred_probs), axis=1) # initiate weights for unlabled examples for view in range(num_view): sel_id, weight = dp.get_ids_weights(pred_probs[view], pred_y, train_data, add_num, gamma, regularizer) import pdb;pdb.set_trace() sel_ids.append(sel_id) weights.append(weight) # start iterative training gt_y = data['test'][1] for step in range(start_step, iter_steps): for view in range(num_view): print('Iter step: %d, view: %d, model name: %s' % (step+1,view,configs[view].model_name)) # update sample weights sel_ids[view], weights[view] = dp.update_ids_weights( view, pred_probs, sel_ids, weights, pred_y, train_data, add_num, gamma, regularizer) # update model parameter new_train_data, _ = dp.update_train_untrain( sel_ids[view], train_data, untrain_data, pred_y, weights[view]) configs[view] = adjust_config(configs[view], len(train_data[0]), step) net = models.create(configs[view].model_name).cuda() mu.train(net, new_train_data, configs[view], device=view) # update y pred_probs[view] = mu.predict_prob(model, untrain_data, configs[view]) # evaluation current model and save it acc = mu.evaluate(net, data['test'], configs[view], device=view) predictions = mu.predict_prob(net, data['train'], configs[view], device=view) save_checkpoint( { 'state_dict': net.state_dict(), 'epoch': step + 1, 'predictions': predictions, 'accuracy': acc }, False, fpath=os.path.join( 'spaco/%s.epoch%d' % (configs[view].model_name, step + 1))) test_preds[view] = mu.predict_prob(model, data['test'], configs[view], device=view) add_num += 4000 * num_view fuse_y = np.argmax(sum(test_preds), axis=1) print('Acc:%0.4f' % np.mean(fuse_y== gt_y))
def train(path): x, y = data_loader.load(path, dim, t) pipe = create_model evaluate(pipe, seed, x, y, t, 1000, 512) save_model(directory + 'model/', m_name, pipe) del pipe
def run_training(model, cfg, test_features, test_labels, train_data, train_labels, val_data, val_labels): tmp_run_path = MODEL_PATH + "/tmp_" + get_datetime() model_weights_path = "{}/{}".format(tmp_run_path, cfg.model_weights_name) model_config_path = "{}/{}".format(tmp_run_path, cfg.model_config_name) result_path = "{}/result.txt".format(tmp_run_path) os.makedirs(tmp_run_path, exist_ok=True) json.dump(cfg.to_json(), open(model_config_path, "w")) """Defining loss and optimizer""" optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr) criterion = torch.nn.CrossEntropyLoss() criterion = criterion.to(get_device()) """Creating data generators""" test_iterator = BatchIterator(test_features, test_labels) train_iterator = BatchIterator(train_data, train_labels, cfg.batch_size) validation_iterator = BatchIterator(val_data, val_labels) train_loss = 999 best_val_loss = 999 train_acc = 0 epochs_without_improvement = 0 writer = SummaryWriter() """Running training""" for epoch in range(cfg.n_epochs): train_iterator.shuffle() if epochs_without_improvement == cfg.patience: break val_loss, val_cm = evaluate(model, validation_iterator, criterion) if val_loss < best_val_loss: torch.save(model.state_dict(), model_weights_path) best_val_loss = val_loss best_val_acc = val_cm.accuracy best_val_unweighted_acc = val_cm.unweighted_accuracy epochs_without_improvement = 0 log_success( " Epoch: {} | Val loss improved to {:.4f} | val acc: {:.3f} | weighted val acc: {:.3f} | train loss: {:.4f} | train acc: {:.3f} | saved model to {}." .format(epoch, best_val_loss, best_val_acc, best_val_unweighted_acc, train_loss, train_acc, model_weights_path)) train_loss, train_cm = train(model, train_iterator, optimizer, criterion, cfg.reg_ratio) train_acc = train_cm.accuracy writer.add_scalars('all/losses', { "val": val_loss, "train": train_loss }, epoch) writer.add_scalars('all/accuracy', { "val": val_cm.accuracy, "train": train_cm.accuracy }, epoch) writer.add_scalars( 'all/unweighted_acc', { "val": val_cm.unweighted_accuracy, "train": train_cm.unweighted_accuracy }, epoch) writer.add_scalar('val/loss', val_loss, epoch) writer.add_scalar('val/val_acc', val_cm.accuracy, epoch) writer.add_scalar('val/val_unweighted_acc', val_cm.unweighted_accuracy, epoch) writer.add_scalar('train/loss', train_loss, epoch) writer.add_scalar('train/train_acc', train_cm.accuracy, epoch) writer.add_scalar('train/train_unweighted_acc', train_cm.unweighted_accuracy, epoch) epochs_without_improvement += 1 if not epoch % 1: log( f'| Epoch: {epoch+1} | Val Loss: {val_loss:.3f} | Val Acc: {val_cm.accuracy*100:.2f}% ' f'| Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.3f}%', cfg.verbose) model.load_state_dict(torch.load(model_weights_path)) test_loss, test_cm = evaluate(model, test_iterator, criterion) result = f'| Epoch: {epoch+1} | Test Loss: {test_loss:.3f} | Test Acc: {test_cm.accuracy*100:.2f}% | Weighted Test Acc: {test_cm.unweighted_accuracy*100:.2f}%\n Confusion matrix:\n {test_cm}' log_major("Train acc: {}".format(train_acc)) log_major(result) log_major("Hyperparameters:{}".format(cfg.to_json())) with open(result_path, "w") as file: file.write(result) writer.export_scalars_to_json("./all_scalars.json") writer.close() output_path = "{}/{}_{:.3f}Acc_{:.3f}UAcc_{}".format( MODEL_PATH, cfg.model_name, test_cm.accuracy, test_cm.unweighted_accuracy, strftime("%Y-%m-%d_%H:%M:%S", gmtime())) os.rename(tmp_run_path, output_path) return test_loss
def main(): """Runs the entire pipelone from loading the data and defining the model to training the model and evaluating the model. """ arg = parse_arguments() in_vocab, out_vocab = load_vocab(arg) # Loading data train_in_data, train_out_data = load_dataset(arg, arg.train_data_path, in_vocab, out_vocab) valid_in_data, valid_out_data = load_dataset(arg, arg.valid_data_path, in_vocab, out_vocab) test_in_data, test_out_data = load_dataset(arg, arg.test_data_path, in_vocab, out_vocab) #Generating splits for pre-training, federated training and personalization evaluation central_idxs = np.random.choice(len(train_in_data), int(arg.pre_train_ratio * len(train_in_data)), replace=False) distributed_idxs = [ idx for idx in np.arange(len(train_in_data)) if idx not in central_idxs ] central_in_data, central_out_data = tf.gather( train_in_data, central_idxs), tf.gather(train_out_data, central_idxs) # For personalization, split training set again if arg.personalization: federated_training_idxs = np.random.choice(distributed_idxs, int(arg.p13n_ratio * len(distributed_idxs)), replace=False) p13_idxs = [ idx for idx in np.arange(len(distributed_idxs)) if idx not in federated_training_idxs ] validation_training_idxs = np.random.choice(len(valid_in_data), int(arg.p13n_ratio * len(valid_in_data)), replace=False) validation_p13_idxs = [ idx for idx in np.arange(len(valid_in_data)) if idx not in validation_training_idxs ] p13_in_data, p13_out_data = tf.gather(train_in_data, p13_idxs), tf.gather( train_out_data, p13_idxs) train_in_data, train_out_data = tf.gather( train_in_data, federated_training_idxs), tf.gather(train_out_data, federated_training_idxs) p13_valid_in_data, p13_valid_out_data = tf.gather( valid_in_data, validation_p13_idxs), tf.gather(valid_out_data, validation_p13_idxs) valid_in_data, valid_out_data = tf.gather( valid_in_data, validation_training_idxs), tf.gather(valid_out_data, validation_training_idxs) else: train_in_data, train_out_data = tf.gather(train_in_data, distributed_idxs), tf.gather( train_out_data, distributed_idxs) # Define the dataset to be used for pre-traning train_dataset = tf.data.Dataset.from_tensor_slices( (central_in_data, central_out_data)).shuffle(1000) train_dataset = train_dataset.batch(32, drop_remainder=True) # Define the validation and test datasets on which the model will be evaluated. valid_dataset = tf.data.Dataset.from_tensor_slices( (valid_in_data, valid_out_data)) valid_dataset = valid_dataset.batch(2048, drop_remainder=False) test_dataset = tf.data.Dataset.from_tensor_slices( (test_in_data, test_out_data)) test_dataset = test_dataset.batch(2048, drop_remainder=False) # Generate splits of data for federated simulation ftrain_data = generate_splits(train_in_data, train_out_data, arg) ftrain_data = tff.simulation.FromTensorSlicesClientData(ftrain_data) # Get personalization splits if arg.personalization: federated_p13n_data = get_p13_data(p13_in_data, p13_out_data, p13_valid_in_data, p13_valid_out_data) # Set the correct number of cliets per round. if arg.clients_per_round == -1: arg.clients_per_round = arg.num_clients # Define a non-federated model for checkpointing local_model = create_keras_model(arg, len(in_vocab['vocab']), len(out_vocab['vocab'])) # Setup the checkpointing checkpoint_manager, summary_writer = manage_checkpoints(local_model, arg) summary_writer.set_as_default() # Pre-train the model train_model(local_model, train_dataset, valid_dataset, out_vocab) # Generate a sample dataset for the input spec raw_example_dataset = ftrain_data.create_tf_dataset_for_client('0') example_dataset = preprocess(raw_example_dataset, arg) server_opt, client_opt = get_optimizers(arg) model_fn = lambda: create_tff_model(arg, len(in_vocab[ 'vocab']), len(out_vocab['vocab']), example_dataset.element_spec) # Define the federated averaging process iterative_process = tff.learning.build_federated_averaging_process( model_fn, client_optimizer_fn=client_opt, server_optimizer_fn=server_opt) if arg.personalization: p13n_eval = get_p13_eval(model_fn, evaluate_fn) server_state = iterative_process.initialize() # Initialize the server model with the pre-trained weights trainable_weights = [ weights.numpy() for weights in local_model.trainable_weights ] server_state = tff.learning.state_with_new_model_weights( server_state, trainable_weights, local_model.non_trainable_weights) best_validation_acc = 0.0 print('Training:') for round_num in range(1, arg.num_rounds): start = time.time() # Sample a subset of clients to be used for this round client_subset = np.random.choice(arg.num_clients, arg.clients_per_round, replace=False) ftrain_data_subset = make_federated_data(ftrain_data, client_subset, arg) # Perform one round of federated training server_state, metrics = iterative_process.next(server_state, ftrain_data_subset) # Compute and log validation metrics tff.learning.assign_weights_to_keras_model(local_model, server_state.model) overall_accuracy = evaluate(local_model, valid_dataset, out_vocab) tf.summary.scalar('Train loss', metrics._asdict()['loss'], step=round_num) tf.summary.scalar('Train Intent Slot Accuracy', metrics._asdict()['intent_slot_accuracy'], step=round_num) tf.summary.scalar('Validation Intent Slot Accuracy', overall_accuracy, step=round_num) # If personalization has been enabled, print personalization metrics if round_num % 20 == 0 and arg.personalization: p13n_metrics = p13n_eval(server_state.model, federated_p13n_data) print('Server model metrics:') global_model_acc = np.array( p13n_metrics['baseline_metrics']['intent_slot_accuracy']) print('Overall accuracy : {}'.format( np.mean(global_model_acc).item())) print('Personalized model metrics (SGD):') personalized_model_acc = np.array( p13n_metrics['sgd']['final_model']['intent_slot_accuracy']) print('Overall accuracy : {}'.format( np.mean(personalized_model_acc).item())) print('Personalized model metrics (Adam):') personalized_model_acc = np.array( p13n_metrics['adam']['final_model']['intent_slot_accuracy']) print('Overall accuracy : {}'.format( np.mean(personalized_model_acc).item())) # Save the best model so far if overall_accuracy > best_validation_acc: best_validation_acc = overall_accuracy checkpoint_save_path = checkpoint_manager.save() print('Saving checkpoint for epoch {} at {}'.format( round_num, checkpoint_save_path)) print('round {:2d}, metrics={}'.format(round_num, metrics)) print('Time taken : {}'.format(time.time() - start))