def _evaluate(pipeline_name, dev_mode): meta = pd.read_csv(os.path.join(params.meta_dir, 'stage{}_metadata.csv'.format(params.competition_stage))) meta_valid = meta[meta['is_valid'] == 1] if dev_mode: meta_valid = meta_valid.sample(30, random_state=1234) data = {'input': {'meta': meta_valid, 'meta_valid': None, 'train_mode': False, 'target_sizes': [(300, 300)] * len(meta_valid), }, } pipeline = PIPELINES[pipeline_name]['inference'](SOLUTION_CONFIG) output = pipeline.transform(data) pipeline.clean_cache() y_pred = output['y_pred'] pipeline.clean_cache() y_true = read_masks(meta_valid[Y_COLUMNS_SCORING].values, params.data_dir, dataset="val") logger.info('Calculating mean precision and recall') (precision, recall) = mean_precision_and_recall(y_true, y_pred) logger.info('Mean precision on validation is {}'.format(precision)) logger.info('Mean recall on validation is {}'.format(recall)) ctx.channel_send('Precision', 0, precision) ctx.channel_send('Recall', 0, recall)
def _evaluate_pipeline(pipeline_name, validation_size): meta = pd.read_csv(os.path.join(params.meta_dir, 'stage1_metadata.csv')) meta_train = meta[meta['is_train'] == 1] valid_ids = eval(params.valid_category_ids) meta_train_split, meta_valid_split = train_valid_split(meta_train, validation_size, valid_category_ids=valid_ids) data = {'input': {'meta': meta_valid_split, 'meta_valid': None, 'train_mode': False, 'target_sizes': meta_valid_split[SIZE_COLUMNS].values }, } y_true = read_masks(meta_valid_split[Y_COLUMNS_SCORING].values) pipeline = PIPELINES[pipeline_name]['inference'](SOLUTION_CONFIG) pipeline.clean_cache() output = pipeline.transform(data) pipeline.clean_cache() y_pred = output['y_pred'] logger.info('Calculating IOU and IOUT Scores') iou_score = intersection_over_union(y_true, y_pred) logger.info('IOU score on validation is {}'.format(iou_score)) ctx.channel_send('IOU Score', 0, iou_score) iout_score = intersection_over_union_thresholds(y_true, y_pred) logger.info('IOUT score on validation is {}'.format(iout_score)) ctx.channel_send('IOUT Score', 0, iout_score)
def get_train_loaders(ifold, batch_size=8, dev_mode=False, pad_mode='edge', meta_version=1, pseudo_label=False, depths=False): train_shuffle = True train_meta, val_meta = get_nfold_split(ifold, nfold=10, meta_version=meta_version) if pseudo_label: test_meta = get_test_meta() train_meta = train_meta.append(test_meta, sort=True) if dev_mode: train_shuffle = False train_meta = train_meta.iloc[:10] val_meta = val_meta.iloc[:10] #print(val_meta[X_COLUMN].values[:5]) #print(val_meta[Y_COLUMN].values[:5]) print(train_meta.shape, val_meta.shape) img_mask_aug_train, img_mask_aug_val = get_img_mask_augments( pad_mode, depths) train_set = ImageDataset(True, train_meta, augment_with_target=img_mask_aug_train, image_augment=transforms.ColorJitter( 0.2, 0.2, 0.2, 0.2), image_transform=get_image_transform(pad_mode), mask_transform=get_mask_transform(pad_mode)) train_loader = data.DataLoader(train_set, batch_size=batch_size, shuffle=train_shuffle, num_workers=4, collate_fn=train_set.collate_fn, drop_last=True) train_loader.num = len(train_set) val_set = ImageDataset(True, val_meta, augment_with_target=img_mask_aug_val, image_augment=None, image_transform=get_image_transform(pad_mode), mask_transform=get_mask_transform(pad_mode)) val_loader = data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=val_set.collate_fn) val_loader.num = len(val_set) val_loader.y_true = read_masks(val_meta[ID_COLUMN].values) return train_loader, val_loader
def set_params(self, transformer, validation_datagen, meta_valid=None, *args, **kwargs): self.model = transformer.model self.optimizer = transformer.optimizer self.loss_function = transformer.loss_function self.output_names = transformer.output_names self.validation_datagen = validation_datagen self.meta_valid = meta_valid self.y_true = read_masks(self.meta_valid[Y_COLUMN].values) self.activation_func = transformer.activation_func self.transformer = transformer
def get_train_loaders(ifold, batch_size=8, dev_mode=False): #pdb.set_trace() train_shuffle = True train_meta, val_meta = get_nfold_split(ifold, nfold=10) if dev_mode: train_shuffle = False train_meta = train_meta.iloc[:10] val_meta = val_meta.iloc[:10] print(train_meta[X_COLUMN].values[:5]) print(train_meta[Y_COLUMN].values[:5]) train_set = ImageDataset(True, train_meta, augment_with_target=ImgAug( aug.crop_seq(crop_size=(H, W), pad_size=(28, 28), pad_method='reflect')), image_augment=ImgAug(aug.brightness_seq), image_transform=image_transform, mask_transform=mask_transform) train_loader = data.DataLoader(train_set, batch_size=batch_size, shuffle=train_shuffle, num_workers=4, collate_fn=train_set.collate_fn, drop_last=True) train_loader.num = len(train_set) val_set = ImageDataset( True, val_meta, augment_with_target=ImgAug(aug.pad_to_fit_net(64, 'reflect')), image_augment=None, #ImgAug(aug.pad_to_fit_net(64, 'reflect')), image_transform=image_transform, mask_transform=mask_transform) val_loader = data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=val_set.collate_fn) val_loader.num = len(val_set) val_loader.y_true = read_masks(val_meta[Y_COLUMN].values) return train_loader, val_loader
def train_net(net, epochs=5, batch_size=1, lr=0.01, val_percent=0.05, save_cp=True, gpu=True): # Define directories dir_img = 'E:/Dataset/Dataset10k/images/training/' dir_mask = 'E:/Dataset/Dataset10k/annotations/training/' val_dir_img = 'E:/Dataset/Dataset10k/images/validation/' val_dir_mask = 'E:/Dataset/Dataset10k/annotations/validation/' dir_checkpoint = 'checkpoints/' # Get list of images and annotations train_images = os.listdir(dir_img) train_masks = os.listdir(dir_mask) train_size = len(train_images) val_images = os.listdir(val_dir_img) val_masks = os.listdir(val_dir_mask) val_size = len(val_images) val_imgs = np.array([read_image(val_dir_img + i) for i in val_images]).astype(np.float32) val_true_masks = np.array( [read_masks(val_dir_mask + i) for i in val_masks]) val = zip(val_imgs, val_true_masks) print(''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, train_size, val_size, str(save_cp), str(gpu))) # Define optimizer and loss functions optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=0.0005) criterion = nn.BCELoss() # Start training epochs for epoch in range(epochs): print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) net.train() epoch_loss = 0 for i in range(round(train_size // batch_size)): imgs = train_images[i:i + batch_size] true_masks = train_masks[i:i + batch_size] imgs = np.array([read_image(dir_img + i) for i in imgs]).astype(np.float32) true_masks = np.array( [read_masks(dir_mask + i) for i in true_masks]) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) print(imgs.size(), true_masks.size()) if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() masks_pred = net(imgs) print(masks_pred.size()) masks_probs_flat = masks_pred.view(-1) print(masks_probs_flat.size()) true_masks_flat = true_masks.view(-1) print(true_masks_flat.size()) loss = criterion(masks_probs_flat, true_masks_flat) epoch_loss += loss.item() print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch finished ! Loss: {}'.format(np.mean(epoch_loss))) if 1: val_dice = eval_net(net, val, gpu) print('Validation Dice Coeff: {}'.format(val_dice)) if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1))
def train(FLAG): print("Reading dataset...") # load data Xtrain, Ytrain = read_images(TRAIN_DIR), read_masks(TRAIN_DIR, onehot=True) Xtest, Ytest = read_images(VAL_DIR), read_masks(VAL_DIR, onehot=True) track = [ "hw3-train-validation/validation/0008", "hw3-train-validation/validation/0097", "hw3-train-validation/validation/0107" ] Xtrack, Ytrack = read_list(track) vgg16 = VGG16(classes=7, shape=(256, 256, 3)) vgg16.build(vgg16_npy_path=FLAG.init_from, mode=FLAG.mode, keep_prob=FLAG.keep_prob) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt') def initialize_uninitialized(sess): global_vars = tf.global_variables() is_not_initialized = sess.run( [tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [ v for (v, f) in zip(global_vars, is_not_initialized) if not f ] if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # hyper parameters batch_size = 32 epoch = 500 early_stop_patience = 50 min_delta = 0.0001 opt_type = 'adam' # recorder epoch_counter = 0 # optimizer global_step = tf.Variable(0, trainable=False) # Passing global_step to minimize() will increment it at each step. if opt_type is 'sgd': start_learning_rate = FLAG.lr half_cycle = 2000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=True) else: start_learning_rate = FLAG.lr half_cycle = 2000 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, half_cycle, 0.5, staircase=True) opt = tf.train.AdamOptimizer(learning_rate=learning_rate) obj = vgg16.loss train_op = opt.minimize(obj, global_step=global_step) # progress bar ptrain = IntProgress() pval = IntProgress() display(ptrain) display(pval) ptrain.max = int(Xtrain.shape[0] / batch_size) pval.max = int(Xtest.shape[0] / batch_size) # re-initialize initialize_uninitialized(sess) # reset due to adding a new task patience_counter = 0 current_best_val_loss = np.float('Inf') # optimize when the aggregated obj while (patience_counter < early_stop_patience and epoch_counter < epoch): # start training stime = time.time() bar_train = Bar( 'Training', max=int(Xtrain.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') bar_val = Bar( 'Validation', max=int(Xtest.shape[0] / batch_size), suffix='%(index)d/%(max)d - %(percent).1f%% - %(eta)ds') train_loss, train_accu = 0.0, 0.0 for i in range(int(Xtrain.shape[0] / batch_size)): st = i * batch_size ed = (i + 1) * batch_size loss, accu, _ = sess.run( [obj, vgg16.accuracy, train_op], feed_dict={ vgg16.x: Xtrain[st:ed, :], vgg16.y: Ytrain[st:ed, :], vgg16.is_train: True }) train_loss += loss train_accu += accu ptrain.value += 1 ptrain.description = "Training %s/%s" % (ptrain.value, ptrain.max) train_loss = train_loss / ptrain.value train_accu = train_accu / ptrain.value # validation val_loss = 0 val_accu = 0 for i in range(int(Xtest.shape[0] / batch_size)): st = i * batch_size ed = (i + 1) * batch_size loss, accu = sess.run( [obj, vgg16.accuracy], feed_dict={ vgg16.x: Xtest[st:ed, :], vgg16.y: Ytest[st:ed, :], vgg16.is_train: False }) val_loss += loss val_accu += accu pval.value += 1 pval.description = "Testing %s/%s" % (pval.value, pval.value) val_loss = val_loss / pval.value val_accu = val_accu / pval.value # plot if epoch_counter % 10 == 0: Xplot = sess.run(vgg16.pred, feed_dict={ vgg16.x: Xtrack[:, :], vgg16.y: Ytrack[:, :], vgg16.is_train: False }) for i, fname in enumerate(track): saveimg = skimage.transform.resize(Xplot[i], output_shape=(512, 512), order=0, preserve_range=True, clip=False) saveimg = label2rgb(saveimg) imageio.imwrite( os.path.join( FLAG.save_dir, os.path.basename(fname) + "_pred_" + str(epoch_counter) + ".png"), saveimg) print( os.path.join( FLAG.save_dir, os.path.basename(fname) + "_pred_" + str(epoch_counter) + ".png")) # early stopping check if (current_best_val_loss - val_loss) > min_delta: current_best_val_loss = val_loss patience_counter = 0 saver.save(sess, checkpoint_path, global_step=epoch_counter) print("save in %s" % checkpoint_path) else: patience_counter += 1 # shuffle Xtrain and Ytrain in the next epoch idx = np.random.permutation(Xtrain.shape[0]) Xtrain, Ytrain = Xtrain[idx, :, :, :], Ytrain[idx, :] # epoch end epoch_counter += 1 ptrain.value = 0 pval.value = 0 bar_train.finish() bar_val.finish() print( "Epoch %s (%s), %s sec >> train loss: %.4f, train accu: %.4f, val loss: %.4f, val accu: %.4f" % (epoch_counter, patience_counter, round(time.time() - stime, 2), train_loss, train_accu, val_loss, val_accu))