criterion = torch.nn.CrossEntropyLoss() if gpus is not None: model = torch.nn.DataParallel(model, gpus) model.to(device=device, dtype=dtype) criterion.to(device=device, dtype=dtype) optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=momentum, weight_decay=decay, nesterov=True) if args.find_clr: find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=min_lr, max_lr=max_lr, step_size=epochs_per_step * len(train_loader), mode=mode, save_path=save_path) return if args.clr: scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode) else: scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma) best_test = 0 if evaluate == 'true': loss, top1, top5 = test(model, val_loader, criterion, device, dtype) # TODO return csv_logger = CsvLogger(filepath=save_path, data=data) csv_logger.save_params(sys.argv, args) claimed_acc1 = None claimed_acc5 = None
def main(): args = parser.parse_args() if args.seed is None: args.seed = random.randint(1, 10000) print("Random Seed: ", args.seed) random.seed(args.seed) torch.manual_seed(args.seed) if args.gpus: torch.cuda.manual_seed_all(args.seed) time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if args.evaluate: args.results_dir = '/tmp' if args.save is '': args.save = time_stamp save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) if args.gpus is not None: args.gpus = [int(i) for i in args.gpus.split(',')] device = 'cuda:' + str(args.gpus[0]) cudnn.benchmark = True else: device = 'cpu' if args.type == 'float64': dtype = torch.float64 elif args.type == 'float32': dtype = torch.float32 elif args.type == 'float16': dtype = torch.float16 else: raise ValueError('Wrong type!') # TODO int8 if (args.model == "recnn"): print("Training RECNN") model = RECNN() ex_model = RECNN_Extractor() else: print("Error: no model matched!") num_parameters = sum([l.nelement() for l in model.parameters()]) print(model) print('number of parameters: {}'.format(num_parameters)) # define loss function (criterion) and optimizer criterion = torch.nn.MSELoss() if args.gpus is not None: model = torch.nn.DataParallel(model, args.gpus) ex_model = torch.nn.DataParallel(ex_model, args.gpus) model.to(device=device, dtype=dtype) ex_model.to(device=device, dtype=dtype) criterion.to(device=device, dtype=dtype) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay, nesterov=True) if args.find_clr: find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode, save_path=save_path) return if args.clr: scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode) else: scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma) best_test = 0 # optionally resume from a checkpoint data = None if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=device) args.start_epoch = checkpoint['epoch'] best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) elif os.path.isdir(args.resume): checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar') csv_path = os.path.join(args.resume, 'results.csv') print("=> loading checkpoint '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=device) args.start_epoch = checkpoint['epoch'] best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) ex_model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_path, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.extract_features: test_hdf5_list = [ x for x in glob.glob(os.path.join(args.h5dir, 'test', '*.h5')) ] test_hdf5_list.sort() print(test_hdf5_list) tcnt = 0 for f in test_hdf5_list: h5_file = h5py.File(f, 'r') tcnt = tcnt + 1 if tcnt == 1: testx = torch.from_numpy(np.array(h5_file['data'])) testy = torch.from_numpy(np.array(h5_file['label'])) else: testcx = torch.from_numpy(np.array(h5_file['data'])) testcy = torch.from_numpy(np.array(h5_file['label'])) testx = torch.cat((testx, testcx), 0) testy = torch.cat((testy, testcy), 0) tex_shape = testx.shape testx = testx.view(tex_shape[0], 1, tex_shape[1], tex_shape[2], tex_shape[3]) testxy = torch.utils.data.TensorDataset(testx, testy) val_loader = torch.utils.data.DataLoader(testxy, batch_size=args.batch_size, shuffle=False) (test_features, test_preds, test_target) = extract_features(model, ex_model, val_loader, criterion, device, dtype) test_features_numpy = test_features.cpu().numpy() test_preds_numpy = test_preds.cpu().numpy() test_target_numpy = test_target.cpu().numpy() test_data = { 'test_features': test_features_numpy, 'test_preds': test_preds_numpy, 'test_target': test_target_numpy } test_mat_filename = 'test' + args.setting scipy.io.savemat(test_mat_filename, test_data) train_hdf5_list = [ x for x in glob.glob(os.path.join(args.h5dir, 'train', '*.h5')) ] train_hdf5_list.sort() tcnt = 0 for f in train_hdf5_list: h5_file = h5py.File(f, 'r') tcnt = tcnt + 1 if tcnt == 1: trainx = torch.from_numpy(np.array(h5_file['data'])) trainy = torch.from_numpy(np.array(h5_file['label'])) else: traincx = torch.from_numpy(np.array(h5_file['data'])) traincy = torch.from_numpy(np.array(h5_file['label'])) trainx = torch.cat((trainx, traincx), 0) trainy = torch.cat((trainy, traincy), 0) trx_shape = trainx.shape trainx = trainx.view(trx_shape[0], 1, trx_shape[1], trx_shape[2], trx_shape[3]) trainxy = torch.utils.data.TensorDataset(trainx, trainy) train_loader = torch.utils.data.DataLoader(trainxy, batch_size=args.batch_size, shuffle=False) (train_features, train_preds, train_target) = extract_features(model, ex_model, train_loader, criterion, device, dtype) train_features_numpy = train_features.cpu().numpy() train_preds_numpy = train_preds.cpu().numpy() train_target_numpy = train_target.cpu().numpy() train_data = { 'train_features': train_features_numpy, 'train_preds': train_preds_numpy, 'train_target': train_target_numpy } train_mat_filename = 'train' + args.setting scipy.io.savemat(train_mat_filename, train_data) return if args.evaluate: loss, top1, top5 = test(model, val_loader, criterion, device, dtype) return csv_logger = CsvLogger(filepath=save_path, data=data) csv_logger.save_params(sys.argv, args) claimed_acc1 = None claimed_acc5 = None best_test = 10000000 train_network(args.start_epoch, args.epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, args.batch_size, args.log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test)
def main(): args = parser.parse_args() if args.seed is None: args.seed = random.randint(1, 10000) print("Random Seed: ", args.seed) random.seed(args.seed) torch.manual_seed(args.seed) if args.gpus: torch.cuda.manual_seed_all(args.seed) time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if args.evaluate: args.results_dir = '/tmp' if args.save is '': args.save = 'mar10_224_' + time_stamp save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) if args.gpus is not None: args.gpus = [int(i) for i in args.gpus.split(',')] device = 'cuda:' + str(args.gpus[0]) cudnn.benchmark = True else: device = 'cpu' if args.type == 'float64': dtype = torch.float64 elif args.type == 'float32': dtype = torch.float32 elif args.type == 'float16': dtype = torch.float16 else: raise ValueError('Wrong type!') # TODO int8 model = STN_MobileNet2(input_size=args.input_size, scale=args.scaling, shearing=args.shearing) # print(model.stnmod.fc_loc[0].bias.data) num_parameters = sum([l.nelement() for l in model.parameters()]) print(model) print('number of parameters: {}'.format(num_parameters)) print('FLOPs: {}'.format( flops_benchmark.count_flops( STN_MobileNet2, args.batch_size // len(args.gpus) if args.gpus is not None else args.batch_size, device, dtype, args.input_size, 3, args.scaling))) train_loader, val_loader, test_loader = get_loaders( args.dataroot, args.batch_size, args.batch_size, args.input_size, args.workers, args.b_weights) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() L1_criterion = torch.nn.L1Loss() PW_criterion = torch.nn.CosineSimilarity(dim=2, eps=1e-6) if args.gpus is not None: model = torch.nn.DataParallel(model, args.gpus) model.to(device=device, dtype=dtype) criterion.to(device=device, dtype=dtype) L1_criterion.to(device=device, dtype=dtype) PW_criterion.to(device=device, dtype=dtype) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay, nesterov=True) if args.find_clr: find_bounds_clr(model, train_loader, optimizer, PW_criterion, device, dtype, min_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode, save_path=save_path) return if args.clr: print('Use CLR') scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode) else: print('Use scheduler') scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma) best_val = 500 # optionally resume from a checkpoint data = None if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=device) # args.start_epoch = checkpoint['epoch'] - 1 # best_val = checkpoint['best_prec1'] # best_test = checkpoint['best_prec1'] args.start_epoch = 0 best_val = 500 state_dict = checkpoint['state_dict'] # if weights from imagenet new_state_dict = OrderedDict() for k, v in state_dict.items(): # print(k, v.size()) name = k if k == 'module.fc.bias': new_state_dict[name] = torch.zeros(101) continue elif k == 'module.fc.weight': new_state_dict[name] = torch.ones(101, 1280) continue else: print('else:', name) new_state_dict[name] = v model.load_state_dict(new_state_dict, strict=False) # optimizer.load_state_dict(checkpoint['optimizer'], strict=False) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) elif os.path.isdir(args.resume): checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar') csv_path = os.path.join(args.resume, 'results.csv') print("=> loading checkpoint '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_val = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_path, checkpoint['epoch'])) data = [] with open(csv_path) as csvfile: reader = csv.DictReader(csvfile) for row in reader: data.append(row) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.evaluate: loss, test_mae = test(model, predefined_points, 0, test_loader, PW_criterion, device, dtype) # TODO return csv_logger = CsvLogger(filepath=save_path, data=data) csv_logger.save_params(sys.argv, args) # claimed_acc1 = None # claimed_acc5 = None # if args.input_size in claimed_acc_top1: # if args.scaling in claimed_acc_top1[args.input_size]: # claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling] # claimed_acc5 = claimed_acc_top5[args.input_size][args.scaling] # csv_logger.write_text( # 'Claimed accuracies are: {:.2f}% top-1, {:.2f}% top-5'.format(claimed_acc1 * 100., claimed_acc5 * 100.)) train_network(args.start_epoch, args.epochs, scheduler, model, predefined_points, train_loader, val_loader, test_loader, optimizer, PW_criterion, device, dtype, args.batch_size, args.log_interval, csv_logger, save_path, best_val)
def train_gan(train_set, indices: List, samples_per_N: int, repetition_n: int, identifier: str, experiment_name: str, batch_size: int = 256, desired_epochs: int = 1000): """ The GAN is trained for 1000 epochs. If a a set of 60k samples is trained with a batchsize of 256, then a epoch equals 226 iterations. A budget of 100,000 iterations would equals to 426 """ assert train_set.shape[0] > len(indices) print(train_set.shape) print(len(indices)) my_ds = DataSetManager(train_set[indices]) # print("Set number of iterations to train\n") v5 = (desired_epochs * (train_set[indices].shape[0])) // batch_size + 1 print("ITERS " + str(v5)) print("SIZE " + str(train_set[indices].shape)) # print("Use pretrained model? (0 means No, some number different to 0 means yes)\n") decision_number = 0 #int( input() ) # print("Type a name to save the model with?\n") model_tag = str(round(samples_per_N)) + '_' + str(repetition_n) storing_path = 'data/' + experiment_name + "/" + model_tag + '_data/' model_path = storing_path + model_tag + '.ckpt' # Recall that os.mkdir isn't recursive, so it only makes on directoryt at a time try: # Create target Directory os.mkdir(storing_path) print("Directory ", storing_path, " Created ") except FileExistsError: print("Directory ", storing_path, " already exists") # ===> Auxiliar functions <=== """ ----------------8<-------------[ cut here ]------------------ ------------------------------------------------ """ def save_history(files_prefix, gen_loss_record, disc_loss_record, jsd_error, current_epoch, epoch_record, my_ds, iter_, epochs, global_iters): # Save losses per epoch df = pd.DataFrame(np.array(gen_loss_record)) with open(files_prefix + '_gen_loss.csv', 'w+') as f: df.to_csv(f, header=False, index=False) df = pd.DataFrame(np.array(disc_loss_record)) with open(files_prefix + '_disc_loss.csv', 'w+') as f: df.to_csv(f, header=False, index=False) df = pd.DataFrame(np.array(epoch_record)) with open(files_prefix + '_epoch_record.csv', 'w+') as f: df.to_csv(f, header=False, index=False) # Save current iter and epochs df = pd.DataFrame( np.array([epochs + my_ds.epochs_completed, global_iters + iter_])) with open(files_prefix + '_training.csv', 'w+') as f: df.to_csv(f, header=False, index=False) with open(files_prefix + '_jsd_error.csv', 'a') as csvFile: writer = csv.writer(csvFile) writer.writerow([current_epoch, jsd_error]) def send_bot_message(bot, my_ds, iter_, ITERS, identifier): """ Not quite straighforward since the critic draws many more samples. """ message = "\nEpochs [" + str( my_ds.epochs_completed) + "] Iter: " + str(iter_) + ";\t" + str( np.round(100 * iter_ / ITERS, 2)) + "% " message = message + identifier print(message) bot.set_status(message) # Send update message if bot.verbose: bot.send_message(message) print("\n") def save_gen_samples(gen_op, disc_op, sess, path, k, n=4): """ k: is the number of epochs used to trained the generator n: is the number of batches to draw samples """ suffix = '_gen_samples_' + str(k) + '_epochs_' + '.csv' for k in range(n): samples = sess.run(gen_op) df = pd.DataFrame(np.array(samples)) with open(path + suffix, 'a') as f: df.to_csv(f, header=False, index=False) # Score the samples using the critic scores = sess.run(disc_op) df = pd.DataFrame(np.array(scores)) with open(path + 'scores_' + suffix, 'a') as f: df.to_csv(f, header=False, index=False) # ===> Model Parameters <=== """ ----------------8<-------------[ cut here ]------------------ ------------------------------------------------ """ DIM = 512 # model dimensionality GEN_DIM = 100 # output dimension of the generator DIS_DIM = 1 # outptu dimension fo the discriminator FIXED_GENERATOR = False # wheter to hold the generator fixed at ral data plus Gaussian noise, as in the plots in the paper LAMBDA = .1 # smaller lambda makes things faster for toy tasks, but isn't necessary if you increase CRITIC_ITERS enough BATCH_SIZE = batch_size # batch size ITERS = v5 #100000 # how many generator iterations to train for FREQ = 250 # sample frequency CRITIC_ITERS = 5 # homw many critic iteractions per generator iteration def Generator_Softmax(n_samples, name='gen'): with tf.variable_scope(name): noise = tf.random_normal([n_samples, GEN_DIM]) output01 = tf_utils.linear(noise, DIM, name='fc-1') output01 = tf_utils.relu(output01, name='relu-1') output02 = tf_utils.linear(output01, DIM, name='fc-2') output02 = tf_utils.relu(output02, name='relu-2') output03 = tf_utils.linear(output02, DIM, name='fc-3') output03 = tf_utils.relu(output03, name='relu-3') output04 = tf_utils.linear(output03, DIM, name='fc-4') output04 = tf_utils.relu(output04, name='relu-4') output05 = tf_utils.linear(output04, GEN_DIM, name='fc-5') # Reminder: a logit can be modeled as a linear function of the predictors output05 = tf.nn.softmax(output05, name='softmax-1') return output05 def Discriminator(inputs, is_reuse=True, name='disc'): with tf.variable_scope(name, reuse=is_reuse): print('is_reuse: {}'.format(is_reuse)) output01 = tf_utils.linear(inputs, DIM, name='fc-1') output01 = tf_utils.relu(output01, name='relu-1') output02 = tf_utils.linear(output01, DIM, name='fc-2') output02 = tf_utils.relu(output02, name='relu-2') output03 = tf_utils.linear(output02, DIM, name='fc-3') output03 = tf_utils.relu(output03, name='relu-3') output04 = tf_utils.linear(output03, DIM, name='fc-4') output04 = tf_utils.relu(output04, name='relu-4') output05 = tf_utils.linear(output04, DIS_DIM, name='fc-5') return output05 real_data = tf.placeholder(tf.float32, shape=[None, GEN_DIM]) fake_data = Generator_Softmax(BATCH_SIZE) disc_real = Discriminator(real_data, is_reuse=False) disc_fake = Discriminator(fake_data) disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) gen_cost = -tf.reduce_mean(disc_fake) # WGAN gradient penalty parameters alpha = tf.random_uniform(shape=[BATCH_SIZE, 1], minval=0., maxval=1.) interpolates = alpha * real_data + (1. - alpha) * fake_data disc_interpolates = Discriminator(interpolates) gradients = tf.gradients(disc_interpolates, [interpolates][0]) slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1)**2) disc_cost += LAMBDA * gradient_penalty disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='disc') gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='gen') disc_lr = tf.placeholder(tf.float32, shape=()) # 1e-4 gen_lr = tf.placeholder(tf.float32, shape=()) # 1e-4 disc_train_op = tf.train.AdamOptimizer(learning_rate=disc_lr, beta1=0.5, beta2=0.9).minimize( disc_cost, var_list=disc_vars) if len(gen_vars) > 0: gen_train_op = tf.train.AdamOptimizer(learning_rate=gen_lr, beta1=0.5, beta2=0.9).minimize( gen_cost, var_list=gen_vars) else: gen_train_op = tf.no_op() """ ----------------8<-------------[ cut here ]------------------ ------------------------------------------------ """ # ===> Model Parameters <=== df = pd.DataFrame(np.array(indices)) with open(storing_path + 'training_indices.csv', 'a') as f: df.to_csv(f, header=False, index=False) session_saver = tf.train.Saver() # files_prefix = 'model/'+ model_tag if decision_number == 0: pre_trained = False gen_loss_record = [] # type: List[float] disc_loss_record = [] # type: List[float] epoch_record = [] # type: List[float] epochs = 0 global_iters = 0 else: pre_trained = True temp = pd.read_csv(storing_path + '_training.csv', header=None).values epochs, global_iters = temp.flatten() my_ds.epochs_completed = epochs gen_loss_record = (pd.read_csv(storing_path + '_gen_loss.csv', header=None).values).tolist() disc_loss_record = (pd.read_csv(storing_path + '_disc_loss.csv', header=None).values).tolist() epoch_record = (pd.read_csv(storing_path + '_epoch_record.csv', header=None).values).tolist() # Create a DLBot instance bot = DLBot(token=telegram_token, user_id=telegram_user_id) # Activate the bot bot.activate_bot() print("\nTelegram bot has been activated ") iters_per_epoch = my_ds.num_examples / BATCH_SIZE total_iters = int( np.ceil((desired_epochs * iters_per_epoch) / CRITIC_ITERS)) critic_iters = np.round((5 / 6) * total_iters) gen_iters = np.round((1 / 6) * total_iters) ITERS = total_iters # Train loop with tf.Session() as sess: if pre_trained == False: # false by default: sess.run(tf.global_variables_initializer()) if pre_trained == True: # tf.reset_default_graph() session_saver.restore(sess, model_path) # # DUCK TAPE SOLUTION iter_ = 0 """ while my_ds.epochs_completed < desired_epochs: iter_ +=1 """ gen_lr_ = CyclicLR(base_lr=10**-4.72, max_lr=10**-3.72, step_size=gen_iters) disc_lr_ = CyclicLR(base_lr=10**-4.72, max_lr=10**-3.72, step_size=critic_iters) for iter_ in range(ITERS): batch_data, disc_cost_ = None, None previous_epoch = my_ds.epochs_completed # train critic for i_ in range(CRITIC_ITERS): batch_data = my_ds.next_batch( BATCH_SIZE) # data_gen.__next__() disc_cost_, _ = sess.run([disc_cost, disc_train_op], feed_dict={ real_data: batch_data, disc_lr: disc_lr_.clr() }) disc_lr_.on_batch_end() # train generator sess.run(gen_train_op, feed_dict={gen_lr: gen_lr_.clr()}) gen_lr_.on_batch_end() gen_cost2 = sess.run(gen_cost) current_epoch = my_ds.epochs_completed condition2 = current_epoch % 5 == 0 if current_epoch > previous_epoch and condition2: disc_loss_record.append(disc_cost_) gen_loss_record.append(gen_cost2) epoch_record.append(my_ds.epochs_completed) # print("Diff "+str(current_epoch - previous_epoch)) if (np.mod(iter_, FREQ) == 0) or (iter_ + 1 == ITERS): """ print("===> Debugging") print(disc_loss_record) print(gen_loss_record) """ bot.loss_hist.append(disc_cost_) fake_samples = sess.run( fake_data) # , feed_dict={real_data: batch_data} # print("\n==> Sum-Simplex condition: " +str(np.sum(fake_samples, axis=1))) send_bot_message(bot, my_ds, iter_, ITERS, identifier) jsd_error = gan_error_all_species(fake_samples, train_set) current_epoch = my_ds.epochs_completed session_saver.save(sess, model_path) save_history(storing_path, gen_loss_record, disc_loss_record, jsd_error, current_epoch, epoch_record, my_ds, iter_, epochs, global_iters) # save_gen_samples(fake_data, disc_fake ,sess, storing_path, k) # fake_data = Generator_Softmax(BATCH_SIZE) utils.tick() # _iter[0] += 1 if iter_ == ITERS: session_saver.save(sess, model_path) # Create gan samples n_samples = len(indices) k_iter = n_samples // BATCH_SIZE + 1 gan_samples_path = storing_path + "gan_samples_" + model_tag + '.csv' for k in range(k_iter): fake_samples = sess.run(fake_data) df = pd.DataFrame(fake_samples) with open(gan_samples_path, 'a') as f: df.to_csv(f, header=False, index=False) # Clear variables valuies tf.reset_default_graph() current_epoch = my_ds.epochs_completed save_history(storing_path, gen_loss_record, disc_loss_record, jsd_error, current_epoch, epoch_record, my_ds, iter_, epochs, global_iters) bot.stop_bot() print("Training is done") # Duct tapping the size of gan sample set to avoid changing the TF Graph temp1 = pd.read_csv(gan_samples_path, header=None).values temp1 = temp1[0:n_samples] df = pd.DataFrame(temp1) with open(gan_samples_path, 'w+') as f: df.to_csv(f, header=False, index=False) print("Training is done")
model.add(Flatten()) model.add(Dense(units = 256, activation='relu', name='fc1')) model.add(Dropout(rate=0.5)) model.add(Dense(units = 256, activation='relu', name='fc2')) model.add(Dropout(rate=0.5)) model.add(Dense(units=2, activation='softmax', name='predictions')) Y_one_hot = to_categorical(np.ravel(Y_Train),2) ##Cyclic Learing Rate clr_triangular = CyclicLR(mode = 'triangular2',base_lr = base_lr, max_lr = max_lr, step_size = step_size) gradientDescent=SGD() model.compile(gradientDescent, loss= 'categorical_crossentropy', metrics = ['accuracy']) #### model.summary() model.fit_generator(datagen.flow(x= X_Train, y =Y_one_hot,batch_size = batch_size),steps_per_epoch=len(X_Train) / batch_size, callbacks=[clr_triangular], verbose=2, epochs=epochs, use_multiprocessing=True) #####Plots of CLR########################################################### plt.figure(1) plt.ylabel('Training accuracy') plt.xlabel('Learning Rate') plt.title("CLR - 'triangular2' Policy") plt.plot(clr_triangular.history['lr'],clr_triangular.history['acc'] )
checkpoint = torch.load(SAVED_MODEL_PATH) model.load_state_dict(checkpoint['state_dict']) #start_epoch = checkpoint['epoch'] print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) model.fc0.conv2.register_forward_hook(get_activation('fc0.conv2')) model.fc1.conv2.register_forward_hook(get_activation('fc1.conv2')) optim = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay) scheduler = CyclicLR(optim, gamma=gamma, step_size=stepsize) num_epochs = 1202 start_time = time.time() train_time = 0 best_rank1 = -np.inf best_epoch = 0 if evaluation: print("Evaluation in Progress") test(model, queryloader, galleryloader) sys.exit(0) print("Training of model in progress")
model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) clr = CyclicLR(base_lr=0.001, max_lr=0.006, step_size=20.) tfb = keras.callbacks.TensorBoard(log_dir='logs', histogram_freq=0, batch_size=batch_size, write_graph=True, write_grads=True, write_images=False) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, callbacks=[clr, tfb]) score = model.evaluate(x_test, y_test, verbose=0)
model.compile(loss={ "main_output": geodistance_tensorflow, "auxiliary_output": 'categorical_crossentropy', "terrain_output": 'categorical_crossentropy' }, optimizer=opt, loss_weights=[1 / 3, 1 / 3, 1 / 3], metrics={ "main_output": geodistance_tensorflow, "auxiliary_output": 'categorical_accuracy', "terrain_output": 'categorical_crossentropy' }) step = 8 * len(X_train) // batch_size clr = CyclicLR(base_lr=0.00001, max_lr=0.01, step_size=step, mode='triangular2') earlyStopping = keras.callbacks.EarlyStopping(monitor='loss', patience=5, verbose=1, restore_best_weights=True) history = model.fit_generator( generate_arrays_from_file(X_train, Y1_train, Y2_train, Y3_train, codes_matrix, terrain_matrix, batch_size), epochs=epochs, steps_per_epoch=train_instances_sz / batch_size, callbacks=[clr, earlyStopping], validation_steps=test_instances_sz / batch_size, validation_data=generate_arrays_from_file(X_test, Y1_test, Y2_test, Y3_test, codes_matrix,
def main(): torch.manual_seed(1) torch.cuda.manual_seed_all(1) global args, best_prec1 best_prec1 = 0 args = parser.parse_args() time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if args.evaluate: args.results_dir = '/tmp' if args.save is '': args.save = time_stamp save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) args.noise = not args.no_noise args.quant = not args.no_quantization args.act_quant = not args.no_act_quantization args.quant_edges = not args.no_quant_edges logging.info("saving to %s", save_path) logging.debug("run arguments: %s", args) if args.gpus is not None: args.gpus = [int(i) for i in args.gpus.split(',')] device = 'cuda:' + str(args.gpus[0]) cudnn.benchmark = True else: device = 'cpu' dtype = torch.float32 args.step_setup = None model = models.__dict__[args.model] model_config = { 'scale': args.scale, 'input_size': args.input_size, 'dataset': args.dataset, 'bitwidth': args.bitwidth, 'quantize': args.quant, 'noise': args.noise, 'step': args.step, 'depth': args.depth, 'act_bitwidth': args.act_bitwidth, 'act_quant': args.act_quant, 'quant_edges': args.quant_edges, 'step_setup': args.step_setup, 'quant_epoch_step': args.quant_epoch_step, 'quant_start_stage': args.quant_start_stage, 'normalize': args.no_pre_process_normalize, 'noise_mask': args.noise_mask } if args.model_config is not '': model_config = dict(model_config, **literal_eval(args.model_config)) # create model model = model(**model_config) logging.info("creating model %s", args.model) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print("number of parameters: ", params) logging.info("created model with configuration: %s", model_config) print(model) data = None checkpoint_epoch = 0 # optionally resume from a checkpoint if args.evaluate: if not os.path.isfile(args.evaluate): parser.error('invalid checkpoint: {}'.format(args.evaluate)) checkpoint = torch.load(args.evaluate, map_location=device) load_model(model, checkpoint) logging.info("loaded checkpoint '%s' (epoch %s)", args.evaluate, checkpoint['epoch']) print("loaded checkpoint {0} (epoch {1})".format( args.evaluate, checkpoint['epoch'])) elif args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=device) if not args.start_from_zero: args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] checkpoint_epoch = checkpoint['epoch'] load_model(model, checkpoint) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) elif os.path.isdir(args.resume): checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar') csv_path = os.path.join(args.resume, 'results.csv') print("=> loading checkpoint '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=device) best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_path, checkpoint['epoch'])) data = [] with open(csv_path) as csvfile: reader = csv.DictReader(csvfile) for row in reader: data.append(row) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.gpus is not None: model = torch.nn.DataParallel( model, [args.gpus[0]] ) # Statistics need to be calculated on single GPU to be consistant with data among multiplr GPUs # Data loading code default_transform = { 'train': get_transform(args.dataset, input_size=args.input_size, augment=True, integer_values=args.quant_dataloader, norm=not args.no_pre_process_normalize), 'eval': get_transform(args.dataset, input_size=args.input_size, augment=False, integer_values=args.quant_dataloader, norm=not args.no_pre_process_normalize) } transform = getattr(model.module, 'input_transform', default_transform) val_data = get_dataset(args.dataset, 'val', transform['eval'], datasets_path=args.datapath) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.val_batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) train_data = get_dataset(args.dataset, 'train', transform['train'], datasets_path=args.datapath) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) statistics_train_loader = torch.utils.data.DataLoader( train_data, batch_size=args.act_stats_batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay, nesterov=True) model, criterion = model.to(device, dtype), criterion.to(device, dtype) if args.clr: scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode) else: scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma) csv_logger = CsvLogger(filepath=save_path, data=data) csv_logger.save_params(sys.argv, args) csv_logger_training_stats = os.path.join(save_path, 'training_stats.csv') # pre-training activation and parameters statistics calculation #### if check_if_need_to_collect_statistics(model): for layer in model.modules(): if isinstance(layer, actquant.ActQuantBuffers): layer.pre_training_statistics = True # Turn on pre-training activation statistics calculation model.module.statistics_phase = True validate( statistics_train_loader, model, criterion, device, epoch=0, num_of_batches=80, stats_phase=True) # Run validation on training set for statistics model.module.quantize.get_act_max_value_from_pre_calc_stats( list(model.modules())) _ = model.module.quantize.set_weight_basis(list(model.modules()), None) for layer in model.modules(): if isinstance(layer, actquant.ActQuantBuffers): layer.pre_training_statistics = False # Turn off pre-training activation statistics calculation model.module.statistics_phase = False else: # Maximal activation values still need to be derived from loaded stats model.module.quantize.assign_act_clamp_during_val(list( model.modules()), print_clamp_val=True) model.module.quantize.assign_weight_clamp_during_val( list(model.modules()), print_clamp_val=True) # model.module.quantize.get_act_max_value_from_pre_calc_stats(list(model.modules())) if args.gpus is not None: # Return to Multi-GPU after statistics calculations model = torch.nn.DataParallel(model.module, args.gpus) model, criterion = model.to(device, dtype), criterion.to(device, dtype) # pre-training activation statistics calculation #### if args.evaluate: val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, device, epoch=0) print("val_prec1: ", val_prec1) return # fast forward to curr stage for i in range(args.quant_start_stage): model.module.switch_stage(0) for epoch in trange(args.start_epoch, args.epochs + 1): if not isinstance(scheduler, CyclicLR): scheduler.step() # scheduler.optimizer = optimizer train_loss, train_prec1, train_prec5 = train( train_loader, model, criterion, device, epoch, optimizer, scheduler, training_stats_logger=csv_logger_training_stats) for layer in model.modules(): if isinstance(layer, actquant.ActQuantBuffers): layer.print_clamp() # evaluate on validation set val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, device, epoch) # remember best prec@1 and save checkpoint is_best = val_prec1 > best_prec1 best_prec1 = max(val_prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'model': args.model, 'config': args.model_config, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'layers_b_dict': model.module. layers_b_dict #TODO this doesn't work for multi gpu - need to del }, is_best, path=save_path) # New type of logging csv_logger.write({ 'epoch': epoch + 1, 'val_error1': 1 - val_prec1, 'val_error5': 1 - val_prec5, 'val_loss': val_loss, 'train_error1': 1 - train_prec1, 'train_error5': 1 - train_prec5, 'train_loss': train_loss }) csv_logger.plot_progress(title=args.model + str(args.depth)) csv_logger.write_text( 'Epoch {}: Best accuracy is {:.2f}% top-1'.format( epoch + 1, best_prec1 * 100.))
def main(): args = parser.parse_args() if args.seed is None: args.seed = random.randint(1, 10000) print("Random Seed: ", args.seed) random.seed(args.seed) torch.manual_seed(args.seed) if args.gpus: torch.cuda.manual_seed_all(args.seed) time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if args.evaluate: args.results_dir = '/tmp' if args.save is '': args.save = time_stamp save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) if args.gpus is not None: args.gpus = [int(i) for i in args.gpus.split(',')] device = 'cuda:' + str(args.gpus[0]) cudnn.benchmark = True else: device = 'cpu' if args.type == 'float64': dtype = torch.float64 elif args.type == 'float32': dtype = torch.float32 elif args.type == 'float16': dtype = torch.float16 else: raise ValueError('Wrong type!') # TODO int8 model = ShuffleNetV2(scale=args.scaling, c_tag=args.c_tag, SE=args.SE, residual=args.residual, groups=args.groups) num_parameters = sum([l.nelement() for l in model.parameters()]) print(model) print('number of parameters: {}'.format(num_parameters)) print('FLOPs: {}'.format( flops_benchmark.count_flops( ShuffleNetV2, args.batch_size // len(args.gpus) if args.gpus is not None else args.batch_size, device, dtype, args.input_size, 3, args.scaling, 3, args.c_tag, 1000, torch.nn.ReLU, args.SE, args.residual, args.groups))) train_loader, val_loader = get_loaders(args.dataroot, args.batch_size, args.batch_size, args.input_size, args.workers) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() if args.gpus is not None: model = torch.nn.DataParallel(model, args.gpus) model.to(device=device, dtype=dtype) criterion.to(device=device, dtype=dtype) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay, nesterov=True) if args.find_clr: find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode, save_path=save_path) return if args.clr: scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode) else: scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma) best_test = 0 # optionally resume from a checkpoint data = None if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) elif os.path.isdir(args.resume): checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar') csv_path = os.path.join(args.resume, 'results.csv') print("=> loading checkpoint '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=device) args.start_epoch = checkpoint['epoch'] - 1 best_test = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( checkpoint_path, checkpoint['epoch'])) data = [] with open(csv_path) as csvfile: reader = csv.DictReader(csvfile) for row in reader: data.append(row) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.evaluate: loss, top1, top5 = test(model, val_loader, criterion, device, dtype) # TODO return csv_logger = CsvLogger(filepath=save_path, data=data) csv_logger.save_params(sys.argv, args) claimed_acc1 = None claimed_acc5 = None if args.SE in claimed_acc_top1: if args.scaling in claimed_acc_top1[args.SE]: claimed_acc1 = 1 - claimed_acc_top1[args.SE][args.scaling] csv_logger.write_text('Claimed accuracy is {:.2f}% top-1'.format( claimed_acc1 * 100.)) train_network(args.start_epoch, args.epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, args.batch_size, args.log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5, best_test)
class Solver(object): def __init__(self, config): self.n_classes = config['n_classes'] self.model = UNET(1, self.n_classes) if self.n_classes > 1: self.criterion = nn.CrossEntropyLoss() else: self.criterion = nn.BCEWithLogitsLoss() self.lr = config['lr'] self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=1e-3, betas=(0.8, 0.9)) self.device = config['device'] self.num_epochs = config['num_epochs'] if config['N_subimgs'] % config['batch_size'] != 0: self.train_step = config['N_subimgs'] // config['batch_size'] + 1 else: self.train_step = config['N_subimgs'] / config['batch_size'] self.model_save_dir = config['save_pth'] self.best_loss = 10 self.scheduler = CyclicLR(self.optimizer, step_size=2 * (config['N_subimgs'] % config['batch_size']), mode='triangular2') if self.device is not None: self.device = torch.device(self.device) self.model.to(self.device) def restore_best(self): model_pth = os.path.join(self.model_save_dir, 'BEST_checkpoint.pth.tar') checkpoint = torch.load(model_pth) state_dict = checkpoint['model'] best_loss = checkpoint['loss'] epoch = checkpoint['epoch'] return epoch + 1, best_loss def restore_model(self): model_pth = os.path.join(self.model_save_dir, 'checkpoint.pth.tar') checkpoint = torch.load(model_pth) state_dict = checkpoint['model'] epoch = checkpoint['epoch'] return epoch + 1 def save_checkpoint(self, state, path): torch.save(state, os.path.join(path, 'BEST_checkpoint.pth.tar')) def update_lr(self, lr): for param in self.optimizer.param_groups: param['lr'] = lr def train(self, prefetcher, resume=True, best=True): if best and resume: start_epoch, best_loss = self.restore_best() self.best_loss = best_loss.to(self.device) print('Start from %d, so far the best loss is %.6f' \ % (start_epoch, best_loss)) elif resume: start_epoch = self.restore_model() print('Start from %d' % (start_epoch)) else: start_epoch = 0 #not really epoch, consider using step for naming for i in range(start_epoch, self.num_epochs): epoch_loss = 0 self.model.train() self.scheduler.batch_step() for j in range(self.train_step): self.optimizer.zero_grad() img, label = prefetcher.next() img = Variable(img.to(self.device, dtype=torch.float32)) label = Variable(label.to(self.device, dtype=torch.float32)) output = self.model(img) loss = self.criterion(output, label) epoch_loss += loss loss.backward() self.optimizer.step() if loss < self.best_loss: state = {} state['loss'] = loss state['model'] = self.model.state_dict() state['epoch'] = i print('loss decrease, saving model...........') self.save_checkpoint(state, self.model_save_dir) self.best_loss = loss aver_loss = epoch_loss / self.train_step print('training %d epoch, average loss is %.6f' % (i, aver_loss))
model.compile( optimizer=sgd, loss=weighted_pixelwise_crossentropy([0.00418313, 0.509627837, 1.]), #loss = keras_lovasz_softmax, sample_weight_mode="temporal", metrics=[bla_iou, r_iou, blu_iou]) print("Model compiled!") # Callbacks tensorboard = TensorBoard(log_dir="logs/{}".format(time.time()), write_graph=True, update_freq="batch") print("Tensorboard loaded!") # 5e-5 cyclical = CyclicLR(base_lr=float(FLAGS.lr), max_lr=float(FLAGS.max_lr), step_size=train_data.__len__() * 2.5, mode="triangular2") checkpoint = ModelCheckpoint(FLAGS.chkpt.rstrip("/") + "/weights-{epoch:02d}.hdf5", verbose=1, period=1) def train(data_generator, val_generator, callbacks): return model.fit_generator(generator=data_generator, validation_data=val_generator, callbacks=callbacks, epochs=50, verbose=1, shuffle=False)
def main(): seed = random.randint(1, 10000) random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') results_dir = '/tmp' save = time_stamp save_path = os.path.join(results_dir, save) if not os.path.exists(save_path): os.makedirs(save_path) gpus = 2#[int(i) for i in gpus.split(',')] device = 'cuda:0' #+ str(args.gpus[0]) cudnn.benchmark = True dtype = torch.float64 input_size = 224 scaling = 1.0 batch_size = 20 workers = 4 learning_rate = 0.02 momentum = 0.9 decay = 0.00004 max_lr = 1 min_lr = 0.00001 start_epoch = 0 epochs = 400 epochs_per_step = 20 log_interval = 100 mode = 'triangular2' evaluate = 'false' dataroot = "data" model = MobileNet2(input_size=input_size, scale=scaling) num_parameters = sum([l.nelement() for l in model.parameters()]) #print(model) """print('number of parameters: {}'.format(num_parameters)) print('FLOPs: {}'.format( flops_benchmark.count_flops(MobileNet2, batch_size // len(gpus) if gpus is not None else batch_size, device, dtype, input_size, 3, scaling)))""" train_loader, val_loader = get_loaders(dataroot, batch_size, batch_size, input_size, workers) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() model = torch.nn.DataParallel(model) model.to(device=device, dtype=dtype) criterion.to(device=device, dtype=dtype) optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=momentum, weight_decay=decay, nesterov=True) find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=min_lr, max_lr=max_lr, step_size=epochs_per_step * len(train_loader), mode=mode, save_path=save_path) scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr=max_lr, step_size=epochs_per_step * len(train_loader), mode=mode) best_test = 0 if evaluate == 'true': loss, top1, top5 = test(model, val_loader, criterion, device, dtype) # TODO return data = [] csv_logger = CsvLogger(filepath=save_path, data=data) #csv_logger.save_params(sys.argv, args) claimed_acc1 = None claimed_acc5 = None if input_size in claimed_acc_top1: if scaling in claimed_acc_top1[input_size]: claimed_acc1 = claimed_acc_top1[input_size][scaling] claimed_acc5 = claimed_acc_top5[input_size][scaling] csv_logger.write_text( 'Claimed accuracies are: {:.2f}% top-1, {:.2f}% top-5'.format(claimed_acc1 * 100., claimed_acc5 * 100.)) train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion, device, dtype, batch_size, log_interval, csv_logger, './data', claimed_acc1, claimed_acc5, best_test) return 1
callbacks = [] train_source, val_source, classes, batches_per_epoch =\ set_data(input_dims, dataset_path, val_split, train_batch_size, val_batch_size, use_aug, save_aug) model, model_name = set_model(input_dims, model_type, classes) model.compile(optimizer=optimizer, loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics) if lr_mode == 'test': callbacks.append( CyclicLR(bottom_lr=bottom_lr, top_lr=top_lr, iter_per_cycle=epochs * batches_per_epoch, mode='triangular', start_mode='bottom-start', cycle_mode='one-way')) elif lr_mode == 'cyclic': callbacks.append( CyclicLR(bottom_lr=bottom_lr, top_lr=top_lr, iter_per_cycle=epochs_per_clr * batches_per_epoch, mode=clr_mode, profile_fn=clr_profile_fn, scale_fn=clr_scale_fn, gamma=clr_gamma, omega=clr_omega, start_mode=clr_start_mode, cycle_mode=clr_cycle_mode,
def train(self): kf = KFold(n_splits=self.n_folds, shuffle=True, random_state=self.seed) fold = 0 for train_keys_, val_keys_ in kf.split(self.train_keys): if fold in self.exclude_folds: print('!!! Skipped fold ' + str(fold)) fold += 1 pass else: # print(train_keys_, self.additional_keys) train_data, val_data = self.ids_to_keys( train_keys_, self.additional_keys), self.ids_to_keys(val_keys_) train_gen = self.get_generator(is_train=True, dataset=train_data) val_gen = self.get_generator(is_train=False, dataset=val_data) # model_ = zoo_unet_K.get_unet_512_spartial # model = model_(input_shape=(self.input_size, self.input_size, self.n_channels), classes=self.classes, # batch_norm=self.batch_norm, n_filters=16, dropout_val=self.dropout) model = self.model(UNET_INPUT=self.input_size, classes=self.classes, dropout_val=self.dropout, batch_norm=self.batch_norm) callback = KFold_cbk(model, fold) fold_checkpoint_name = 'model_at_fold_BN_%d.h5' % fold for epochs, lr, iter_n in self.lr_schedule: if iter_n == '1' or (iter_n != check_id): with tf.device("/cpu:0"): print( '--------\nNew init, fold %d, starting iteration %s' % (fold, iter_n)) if iter_n != '1': print('loading weights...') model.load_weights(fold_checkpoint_name) print('Done!') if iter_n not in ['1', '2']: print('VGG layers are trainable now') for l in model.layers: l.trainable = True # if iter_n not in['1', '2', '3', '4']: # print('HueSat, Contrast, Brightness are enabled ') # train_gen = self.get_generator(is_train=True, dataset=train_data, hue_br_contr=True) parallel_model = multi_gpu_model(model, gpus=2) clr = CyclicLR(base_lr=lr * 0.8, max_lr=lr * 2, step_size=120.) parallel_model.compile(optimizer=self.optimizer( lr=lr, clipnorm=1.), loss=self.loss, metrics=[zoo_losses_K.dice_coef]) parallel_model.fit_generator( train_gen, steps_per_epoch=np.ceil( len(train_keys_) / self.batch_size), epochs=epochs, validation_data=val_gen, validation_steps=np.ceil( len(val_keys_) / self.batch_size), verbose=2, callbacks=[callback], # FIXME: turn clr back max_queue_size=30, use_multiprocessing=self.use_multiprocessing, workers=4) check_id = iter_n fold += 1 print('Finished!')