def get_optimize_op(self, global_step, learning_rate): """ Need to override if you want to use different optimization policy. :param learning_rate: :param global_step: :return: (learning_rate, optimizer) tuple """ learning_rate = tf.train.polynomial_decay( learning_rate, global_step, decay_steps=HyperParams.get().opt_decay_steps_deeplab, power=HyperParams.get().opt_decay_power_deeplab, end_learning_rate=0.0000001) # learning_rate = tf.train.exponential_decay( # learning_rate, global_step, # decay_steps=HyperParams.get().opt_decay_steps, # decay_rate=HyperParams.get().opt_decay_rate, # staircase=True # ) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.MomentumOptimizer( learning_rate, momentum=HyperParams.get().opt_momentum) # optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8) optimize_op = optimizer.minimize(self.get_loss_opt(), global_step, colocate_gradients_with_ops=True) return learning_rate, optimize_op
def mask_size_normalize(data, target_size=None): s = random.randint(0, 1) if s <= 0 and target_size is None: data = random_scaling(data) return data # getting maximum size of masks maximum_size = get_max_size_of_masks(data.masks) if maximum_size <= 1: return random_scaling(data) # normalize by the target size if target_size is None: target_size = random.uniform(HyperParams.get().pre_size_norm_min, HyperParams.get().pre_size_norm_max) shorter_edge_size = min(data.img.shape[:2]) size_factor = target_size / maximum_size size_factor = min(3000 / shorter_edge_size, size_factor) size_factor = max(120 / shorter_edge_size, size_factor) target_edge_size = int(shorter_edge_size * size_factor) data = resize_shortedge(data, target_edge_size) return data
def objective(args): print('------------ STARTED') t = Trainer() print(args) print('------------') for k, v in args.items(): if k not in HyperParams.get().__dict__.keys(): continue HyperParams.get().__dict__[k] = v print(HyperParams.get().__dict__) print('------------') miou, name = t.run('unet') print(miou, name) print('------------ FINISHED') if miou <= 0.0: return { 'loss': 1.0, 'status': STATUS_FAIL, } return { 'loss': 1.0 - miou, 'miou': miou, 'model_name': name, 'task_group_id': os.environ.get('TASK_GROUP_ID', ''), 'task_group_name': os.environ.get('TASK_GROUP_NAME', ''), 'task_name': os.environ.get('TASK_NAME', ''), 'status': STATUS_OK, }
def parse_merged_output(output, cutoff=0.5, cutoff_instance_max=0.8, cutoff_instance_avg=0.2): """ Split 1-channel merged output for instance segmentation :param cutoff: :param output: (h, w, 1) segmentation image :return: list of (h, w, 1). instance-aware segmentations. """ cutoffed = output > cutoff lab_img = label(cutoffed, connectivity=1) instances = [] for i in range(1, lab_img.max() + 1): instances.append((lab_img == i).astype(np.bool)) filtered_instances = [] scores = [] for instance in instances: # TODO : max or avg? instance_score_max = np.max(instance * output) # score max if instance_score_max < cutoff_instance_max: continue instance_score_avg = np.sum(instance * output) / np.sum( instance) # score avg if instance_score_avg < cutoff_instance_avg: continue filtered_instances.append(instance) scores.append(instance_score_avg) instances = filtered_instances # dilation instances_tmp = [] if HyperParams.get().post_dilation_iter > 0: for instance in filtered_instances: instance = ndimage.morphology.binary_dilation( instance, iterations=HyperParams.get().post_dilation_iter) instances_tmp.append(instance) instances = instances_tmp # sorted by size sorted_idx = [ i[0] for i in sorted(enumerate(instances), key=lambda x: get_size_of_mask(x[1])) ] instances = [instances[x] for x in sorted_idx] scores = [scores[x] for x in sorted_idx] # make sure there are no overlaps instances, scores = Network.remove_overlaps(instances, scores) # fill holes if HyperParams.get().post_fill_holes: instances = [ ndimage.morphology.binary_fill_holes(i) for i in instances ] return instances, scores
def __init__(self, batchsize): super().__init__(batchsize, unet_weight=True) self.img_size = 228 self.num_block = HyperParams.get().unet_step_size self.inp_size = get_net_input_size(self.img_size, self.num_block) assert (self.inp_size - self.img_size) % 2 == 0 self.pad_size = (self.inp_size - self.img_size) // 2 self.batchsize = batchsize self.input_batch = tf.placeholder( tf.float32, shape=(None, self.img_size + self.pad_size * 2, self.img_size + self.pad_size * 2, 3), name='image') self.mask_batch = tf.placeholder(tf.float32, shape=(None, self.img_size, self.img_size, 1), name='mask') self.weight_batch = tf.placeholder(tf.float32, shape=(None, self.img_size, self.img_size, 1), name='weight') self.unused = None self.logit = None self.output = None
def random_affine(data): """ Randomly apply affine transformations including rotation, shearing, translation. :param data: CellImageData :return: CellImageData """ s = random.randint(0, 2) if s >= 1: return data rand_rotate = np.random.randint(-HyperParams.get().pre_affine_rotate, HyperParams.get().pre_affine_rotate) rand_shear = np.random.randint(-HyperParams.get().pre_affine_shear, HyperParams.get().pre_affine_shear) rand_translate = np.random.uniform(-HyperParams.get().pre_affine_translate, HyperParams.get().pre_affine_translate) aug = iaa.Affine(scale=1.0, translate_percent=rand_translate, rotate=rand_rotate, shear=rand_shear, cval=0, mode='reflect') data.img = aug.augment_image(data.img) data.masks = [aug.augment_image(mask) for mask in data.masks] return data
def random_scaling(data): """ Randomly scale an image and masks. :param data: CellImageData :return: CellImageData """ s = random.randint(0, 1) if s <= 0: return data img_h, img_w = data.img.shape[:2] scale_f1 = HyperParams.get().pre_scale_f1 scale_f2 = HyperParams.get().pre_scale_f2 new_w = int(random.uniform(1. - scale_f1, 1. + scale_f2) * img_w) new_h = int(random.uniform(1. - scale_f1, 1. + scale_f2) * img_h) data.img = cv2.resize(data.img, (new_w, new_h), interpolation=cv2.INTER_AREA) data.masks = [ cv2.resize(mask, (new_w, new_h), interpolation=cv2.INTER_AREA) for mask in data.masks ] data.img_w, data.img_h = new_w, new_h return data
def get_optimize_op(self, global_step, learning_rate): """ Need to override if you want to use different optimization policy. :param learning_rate: :param global_step: :return: (learning_rate, optimizer) tuple """ learning_rate = tf.train.exponential_decay( learning_rate, global_step, decay_steps=HyperParams.get().opt_decay_steps, decay_rate=HyperParams.get().opt_decay_rate, staircase=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if HyperParams.get().optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8) elif HyperParams.get().optimizer == 'rmsprop': # not good optimizer = tf.train.RMSPropOptimizer( learning_rate, decay=0.9, momentum=HyperParams.get().opt_momentum) elif HyperParams.get().optimizer == 'sgd': # not optimized optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif HyperParams.get().optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer( learning_rate, momentum=HyperParams.get().opt_momentum) else: raise Exception('invalid optimizer: %s' % HyperParams.get().optimizer) optimize_op = optimizer.minimize(self.get_loss_opt(), global_step, colocate_gradients_with_ops=True) return learning_rate, optimize_op
def erosion_mask(data): """ As described in the original paper, Separation between cluttered cells is enhanced by using morphological algorithm. :param data: CellImageData :return: CellImageData """ if len(data.masks) == 0: return data total_map = np.zeros_like(data.masks[0], dtype=np.uint8) masks = [] for mask in data.masks: mask[total_map > 0] = 0 mask = ndimage.morphology.binary_erosion( (mask > 0), border_value=1, iterations=HyperParams.get().pre_erosion_iter).astype(np.uint8) total_map = total_map + mask masks.append(mask) data.masks = masks return data
return get_multiple_metric(thr_list, instances, label) def filter_by_voting(args): x, total_list, voting_th, iou_th, rect, rects = args voted = [] for i2, x2 in enumerate(total_list): if rect is not None and rects is not None: rmin1, rmax1, cmin1, cmax1 = rect rmin2, rmax2, cmin2, cmax2 = rects[i2] overlap_r = (rmin1 <= rmin2 <= rmax1 or rmin1 <= rmax2 <= rmax1 ) or (rmin2 <= rmin1 <= rmax2 or rmin2 <= rmax1 <= rmax2) overlap_c = (cmin1 <= cmin2 <= cmax1 or cmin1 <= cmax2 <= cmax1 ) or (cmin2 <= cmin1 <= cmax2 or cmin2 <= cmax1 <= cmax2) if not (overlap_r and overlap_c): voted.append(0) continue voted.append(1 if get_iou(x, x2) > iou_th else 0) return sum(voted) >= voting_th if __name__ == '__main__': fire.Fire(Trainer) print(HyperParams.get().__dict__)
def save(self): sub = pd.DataFrame() sub['ImageId'] = self.test_ids sub['EncodedPixels'] = pd.Series( self.rles).apply(lambda x: ' '.join(str(y) for y in x)) # save a submission file filepath = self.get_filepath() f = open(filepath, 'w') f.close() sub.to_csv(filepath, index=False) logger.info('%s saved at %s.' % (self.name, filepath)) # save hyperparameters filepath = self.get_confpath() f = open(filepath, 'w') a = json.dumps(HyperParams.get().__dict__, indent=4) f.write(a) f.close() total_html = "<html><body>Average Score=$avg_score$<br/><br/><table>" \ " <tr>" \ " <th>ID</th><th>Image</th>" \ " </tr>" \ " $rows$" \ "</table></body></html>" row_html = "<tr>" \ " <td><b>{idx}</b><br/>{iou}<br/>{iou2}</td><td><img src=\"./{idx}.jpg\"</td>" \ "</tr>" # save training results rows = [] metrics = [] for idx, (loss, metric, metric_desc) in self.train_scores.items(): row = row_html.format(idx=idx, iou=format(metric, '.3f'), iou2='<br/>'.join(metric_desc)) rows.append(row) metrics.append(metric) html = total_html.replace('$rows$', ''.join(rows)).replace( '$avg_score$', str(np.mean(metrics))) filepath = self.get_train_htmlpath() f = open(filepath, 'w') f.write(html) f.close() # save validation results rows = [] metrics = [] for idx, (loss, metric, metric_desc) in self.valid_scores.items(): row = row_html.format(idx=idx, iou=format(metric, '.3f'), iou2='<br/>'.join(metric_desc)) rows.append(row) metrics.append(metric) html = total_html.replace('$rows$', ''.join(rows)).replace( '$avg_score$', str(np.mean(metrics))) filepath = self.get_valid_htmlpath() f = open(filepath, 'w') f.write(html) f.close() # save test results total_html = "<html><body><table>" \ " <tr>" \ " <th>IDX</th><th>ID</th><th>Image</th>" \ " </tr>" \ " $rows$" \ "</table></body></html>" row_html = "<tr>" \ " <td>{idx}</td><td><img src=\"./{idx}.jpg\"</td>" \ "</tr>" rows = [] for idx, (loss, metric) in self.test_scores.items(): row = row_html.format(idx=idx) rows.append(row) html = total_html.replace('$rows$', ''.join(rows)) filepath = self.get_test_htmlpath() f = open(filepath, 'w') f.write(html) f.close() # save pkl f = open(self.get_pklpath(), 'wb') pickle.dump( { 'valid_instances': self.valid_instances, 'test_instances': self.test_instances }, f, pickle.HIGHEST_PROTOCOL) f.close()
class KaggleSubmission: BASEPATH = os.path.dirname(os.path.realpath(__file__)) + ( "/submissions" if HyperParams.get().dataset_stage == 1 else "/submissions_stage2") CNAME = 'data-science-bowl-2018' def __init__(self, name): self.name = name self.test_ids = [] self.rles = [] self.train_scores = OrderedDict() self.valid_scores = OrderedDict() self.test_scores = OrderedDict() self.valid_instances = {} # key : id -> (instances, scores) self.test_instances = {} logger.info('creating: %s' % os.path.join(KaggleSubmission.BASEPATH, self.name)) os.makedirs(os.path.join(KaggleSubmission.BASEPATH, self.name), exist_ok=True) logger.info( 'creating: %s' % os.path.join(KaggleSubmission.BASEPATH, self.name, 'valid')) os.makedirs(os.path.join(KaggleSubmission.BASEPATH, self.name, 'valid'), exist_ok=True) logger.info( 'creating: %s' % os.path.join(KaggleSubmission.BASEPATH, self.name, 'train')) os.makedirs(os.path.join(KaggleSubmission.BASEPATH, self.name, 'train'), exist_ok=True) def save_train_image(self, idx, image, loss=0.0, score=0.0, score_desc=[]): cv2.imwrite( os.path.join(KaggleSubmission.BASEPATH, self.name, 'train', idx + '.jpg'), image) if isinstance(idx, bytes): idx = idx.decode("utf-8") self.train_scores[idx] = (loss, score, score_desc) def save_valid_image(self, idx, image, loss=0.0, score=0.0, score_desc=[]): cv2.imwrite( os.path.join(KaggleSubmission.BASEPATH, self.name, 'valid', idx + '.jpg'), image) if isinstance(idx, bytes): idx = idx.decode("utf-8") self.valid_scores[idx] = (loss, score, score_desc) def save_image(self, idx, image, loss=0.0): cv2.imwrite( os.path.join(KaggleSubmission.BASEPATH, self.name, idx + '.jpg'), image) self.test_scores[idx] = (loss, 0.0) def add_result(self, idx, instances): """ :param idx: test sample id :param instances: list of (h, w, 1) numpy containing """ if len(instances) == 0: self.test_ids.append(idx) self.rles.append([]) return for instance in instances: rles, cnt = rle_encoding(instance) if cnt < 3: continue assert len(rles) % 2 == 0 self.test_ids.append(idx) self.rles.append(rles) def get_filepath(self): filepath = os.path.join(KaggleSubmission.BASEPATH, self.name, 'submission_%s.csv' % self.name) return filepath def get_confpath(self): filepath = os.path.join(KaggleSubmission.BASEPATH, self.name, 'config.json') return filepath def get_train_htmlpath(self): filepath = os.path.join(KaggleSubmission.BASEPATH, self.name, 'train', 'train.html') return filepath def get_valid_htmlpath(self): filepath = os.path.join(KaggleSubmission.BASEPATH, self.name, 'valid', 'valid.html') return filepath def get_test_htmlpath(self): filepath = os.path.join(KaggleSubmission.BASEPATH, self.name, 'test.html') return filepath def get_pklpath(self): filepath = os.path.join(KaggleSubmission.BASEPATH, self.name, 'submission.pkl') return filepath def save(self): sub = pd.DataFrame() sub['ImageId'] = self.test_ids sub['EncodedPixels'] = pd.Series( self.rles).apply(lambda x: ' '.join(str(y) for y in x)) # save a submission file filepath = self.get_filepath() f = open(filepath, 'w') f.close() sub.to_csv(filepath, index=False) logger.info('%s saved at %s.' % (self.name, filepath)) # save hyperparameters filepath = self.get_confpath() f = open(filepath, 'w') a = json.dumps(HyperParams.get().__dict__, indent=4) f.write(a) f.close() total_html = "<html><body>Average Score=$avg_score$<br/><br/><table>" \ " <tr>" \ " <th>ID</th><th>Image</th>" \ " </tr>" \ " $rows$" \ "</table></body></html>" row_html = "<tr>" \ " <td><b>{idx}</b><br/>{iou}<br/>{iou2}</td><td><img src=\"./{idx}.jpg\"</td>" \ "</tr>" # save training results rows = [] metrics = [] for idx, (loss, metric, metric_desc) in self.train_scores.items(): row = row_html.format(idx=idx, iou=format(metric, '.3f'), iou2='<br/>'.join(metric_desc)) rows.append(row) metrics.append(metric) html = total_html.replace('$rows$', ''.join(rows)).replace( '$avg_score$', str(np.mean(metrics))) filepath = self.get_train_htmlpath() f = open(filepath, 'w') f.write(html) f.close() # save validation results rows = [] metrics = [] for idx, (loss, metric, metric_desc) in self.valid_scores.items(): row = row_html.format(idx=idx, iou=format(metric, '.3f'), iou2='<br/>'.join(metric_desc)) rows.append(row) metrics.append(metric) html = total_html.replace('$rows$', ''.join(rows)).replace( '$avg_score$', str(np.mean(metrics))) filepath = self.get_valid_htmlpath() f = open(filepath, 'w') f.write(html) f.close() # save test results total_html = "<html><body><table>" \ " <tr>" \ " <th>IDX</th><th>ID</th><th>Image</th>" \ " </tr>" \ " $rows$" \ "</table></body></html>" row_html = "<tr>" \ " <td>{idx}</td><td><img src=\"./{idx}.jpg\"</td>" \ "</tr>" rows = [] for idx, (loss, metric) in self.test_scores.items(): row = row_html.format(idx=idx) rows.append(row) html = total_html.replace('$rows$', ''.join(rows)) filepath = self.get_test_htmlpath() f = open(filepath, 'w') f.write(html) f.close() # save pkl f = open(self.get_pklpath(), 'wb') pickle.dump( { 'valid_instances': self.valid_instances, 'test_instances': self.test_instances }, f, pickle.HIGHEST_PROTOCOL) f.close() def submit_result(self, submit_msg='KakaoAutoML'): """ Submit result to kaggle and wait for getting the result. """ logger.info('kaggle.submit_result: initialization') api_client = KaggleApi() api_client.authenticate() submissions = api_client.competitionSubmissions(KaggleSubmission.CNAME) last_idx = submissions[0].ref if len(submissions) > 0 else -1 # submit logger.info('kaggle.submit_result: trying to submit @ %s' % self.get_filepath()) submit_result = api_client.competitionSubmit(self.get_filepath(), submit_msg, KaggleSubmission.CNAME) logger.info('kaggle.submit_result: submitted!') # wait for the updated LB wait_interval = 10 # in seconds for _ in range(60 // wait_interval * 5): submissions = api_client.competitionSubmissions( KaggleSubmission.CNAME) if len(submissions) == 0: continue if submissions[ 0].status == 'complete' and submissions[0].ref != last_idx: # updated logger.info('kaggle.submit_result: LB Score Updated!') return submit_result, submissions[0] time.sleep(wait_interval) logger.info('kaggle.submit_result: LB Score NOT Updated!') return submit_result, None
def build(self): # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/layers.py#L429 batch_norm_params = { 'is_training': self.is_training, 'center': True, 'scale': True, 'decay': HyperParams.get().net_bn_decay, 'epsilon': HyperParams.get().net_bn_epsilon, 'fused': True, 'zero_debias_moving_mean': True } dropout_params = { 'keep_prob': HyperParams.get().net_dropout_keep, 'is_training': self.is_training, } conv_args = { 'padding': 'SAME', 'weights_initializer': tf.truncated_normal_initializer(mean=0.0, stddev=HyperParams.get().net_init_stddev), 'normalizer_fn': slim.batch_norm, 'normalizer_params': batch_norm_params, 'activation_fn': tf.nn.elu } net = self.input_batch features = [] with slim.arg_scope([slim.convolution, slim.conv2d_transpose], **conv_args): with slim.arg_scope([slim.dropout], **dropout_params): step_size = HyperParams.get().unet_step_size base_feature_size = HyperParams.get().unet_base_feature max_feature_size = base_feature_size * (2 ** step_size) # down sampling steps for i in range(step_size): net = NetworkUnet.double_conv(net, int(base_feature_size*(2**i)), scope='down_conv_%d' % (i + 1)) features.append(net) net = slim.max_pool2d(net, [2, 2], 2, padding='SAME', scope='pool%d' % (i + 1)) # middle net = NetworkUnet.double_conv(net, max_feature_size, scope='middle_conv_1') # upsampling steps for i in range(step_size): net = slim.conv2d_transpose(net, int(max_feature_size/(2**(i+1))), [2, 2], 2, scope='up_trans_conv_%d' % (i + 1)) down_feat = features.pop() # upsample with origin version assert net.shape[3] == down_feat.shape[3], '%d, %d, %d' % (i, net.shape[3], down_feat.shape[3]) net = tf.concat([down_feat, net], axis=-1) net = NetworkUnet.double_conv(net, int(max_feature_size/(2**(i+1))), scope='up_conv_%d' % (i + 1)) # not in the original paper net = NetworkUnet.double_conv(net, 32, scope='output_conv_1') # original paper : one 1x1 conv net = slim.convolution(net, 1, [3, 3], 1, scope='final_conv', activation_fn=None, padding='SAME', weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01)) self.logit = net self.output = tf.nn.sigmoid(net, 'visualization') if self.unet_weight: w = self.weight_batch else: w = 1.0 self.loss = tf.losses.sigmoid_cross_entropy( multi_class_labels=self.mask_batch, logits=self.logit, weights=w ) self.loss_opt = self.loss return net
from tensorpack.dataflow import PrefetchData from data_augmentation import data_to_segment_input, data_to_normalize01 from hyperparams import HyperParams logger = logging.getLogger('train') logger.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s') ch.setFormatter(formatter) logger.handlers = [] logger.addHandler(ch) if HyperParams.get().dataset_stage == 1: master_dir_train = '/data/public/rw/datasets/dsb2018/train' master_dir_train2 = None master_dir_test = '/data/public/rw/datasets/dsb2018/test' # train/valid set k folds implementation IDX_LIST = list(next(os.walk(master_dir_train))[1]) logger.info('Loading Dataset for Stage1(%d)' % (len(IDX_LIST))) if HyperParams.get().data_fold > 0: VALID_IDX_LIST = IDX_LIST[-94 * HyperParams.get().data_fold:][:94] TRAIN_IDX_LIST = sorted(list(set(IDX_LIST) - set(VALID_IDX_LIST))) assert len(VALID_IDX_LIST) == 94, len(VALID_IDX_LIST) assert len(TRAIN_IDX_LIST) == 576, len(TRAIN_IDX_LIST) else:
def run(self, model, epoch=600, batchsize=16, learning_rate=0.0001, early_rejection=False, valid_interval=10, tag='', save_result=True, checkpoint='', pretrain=False, skip_train=False, validate_train=True, validate_valid=True, logdir='/data/public/rw/kaggle-data-science-bowl/logs/', **kwargs): self.set_network(model, batchsize) ds_train, ds_valid, ds_valid_full, ds_test = self.network.get_input_flow( ) self.network.build() print(HyperParams.get().__dict__) net_output = self.network.get_output() net_loss = self.network.get_loss() global_step = tf.Variable(0, trainable=False) learning_rate_v, train_op = self.network.get_optimize_op( global_step=global_step, learning_rate=learning_rate) best_loss_val = 999999 best_miou_val = 0.0 name = '%s_%s_lr=%.8f_epoch=%d_bs=%d' % ( tag if tag else datetime.datetime.now().strftime("%y%m%dT%H%M%f"), model, learning_rate, epoch, batchsize, ) model_path = os.path.join(KaggleSubmission.BASEPATH, name, 'model') best_ckpt_saver = BestCheckpointSaver(save_dir=model_path, num_to_keep=100, maximize=True) saver = tf.train.Saver() m_epoch = 0 # initialize session self.init_session() # tensorboard tf.summary.scalar('loss', net_loss, collections=['train', 'valid']) s_train = tf.summary.merge_all('train') s_valid = tf.summary.merge_all('valid') train_writer = tf.summary.FileWriter(logdir + name + '/train', self.sess.graph) valid_writer = tf.summary.FileWriter(logdir + name + '/valid', self.sess.graph) logger.info('initialization+') if not checkpoint: self.sess.run(tf.global_variables_initializer()) if pretrain: global_vars = tf.global_variables() from tensorflow.python import pywrap_tensorflow reader = pywrap_tensorflow.NewCheckpointReader( self.network.get_pretrain_path()) var_to_shape_map = reader.get_variable_to_shape_map() saved_vars = list(var_to_shape_map.keys()) var_list = [ x for x in global_vars if x.name.replace(':0', '') in saved_vars ] var_list = [x for x in var_list if 'logit' not in x.name] logger.info('pretrained weights(%d) loaded : %s' % (len(var_list), self.network.get_pretrain_path())) pretrain_loader = tf.train.Saver(var_list) pretrain_loader.restore(self.sess, self.network.get_pretrain_path()) elif checkpoint == 'best': path = get_best_checkpoint(model_path) saver.restore(self.sess, path) logger.info('restored from best checkpoint, %s' % path) elif checkpoint == 'latest': path = tf.train.latest_checkpoint(model_path) saver.restore(self.sess, path) logger.info('restored from latest checkpoint, %s' % path) else: saver.restore(self.sess, checkpoint) logger.info('restored from checkpoint, %s' % checkpoint) step = self.sess.run(global_step) start_e = (batchsize * step) // len(CellImageDataManagerTrain.LIST) logger.info('training started+') if epoch > 0 and not skip_train: try: losses = [] for e in range(start_e, epoch): loss_val_avg = [] train_cnt = 0 for dp_train in ds_train.get_data(): _, loss_val, summary_train = self.sess.run( [train_op, net_loss, s_train], feed_dict=self.network.get_feeddict( dp_train, True)) loss_val_avg.append(loss_val) train_cnt += 1 step, lr = self.sess.run([global_step, learning_rate_v]) loss_val_avg = sum(loss_val_avg) / len(loss_val_avg) logger.info( 'training %d epoch %d step, lr=%.8f loss=%.4f train_iter=%d' % (e + 1, step, lr, loss_val_avg, train_cnt)) losses.append(loss_val) train_writer.add_summary(summary_train, global_step=step) if early_rejection and len(losses) > 100 and losses[ len(losses) - 100] * 1.05 < loss_val_avg: logger.info('not improved, stop at %d' % e) break # early rejection if early_rejection and ((e == 50 and loss_val > 0.5) or (e == 200 and loss_val > 0.2)): logger.info('not improved training loss, stop at %d' % e) break m_epoch = e avg = 10.0 if loss_val < 0.20 and (e + 1) % valid_interval == 0: avg = [] for _ in range(5): ds_valid.reset_state() ds_valid_d = ds_valid.get_data() for dp_valid in ds_valid_d: loss_val, summary_valid = self.sess.run( [net_loss, s_valid], feed_dict=self.network.get_feeddict( dp_valid, False)) avg.append(loss_val) ds_valid_d.close() avg = sum(avg) / len(avg) logger.info('validation loss=%.4f' % (avg)) if best_loss_val > avg: best_loss_val = avg valid_writer.add_summary(summary_valid, global_step=step) if avg < 0.16 and e >= 100 and (e + 1) % valid_interval == 0: cnt_tps = np.array((len(thr_list)), dtype=np.int32), cnt_fps = np.array((len(thr_list)), dtype=np.int32) cnt_fns = np.array((len(thr_list)), dtype=np.int32) pool_args = [] ds_valid_full.reset_state() ds_valid_full_d = ds_valid_full.get_data() for idx, dp_valid in tqdm( enumerate(ds_valid_full_d), desc='validate using the iou metric', total=len(CellImageDataManagerValid.LIST)): image = dp_valid[0] inference_result = self.network.inference( self.sess, image, cutoff_instance_max=0.9) instances, scores = inference_result[ 'instances'], inference_result['scores'] pool_args.append( (thr_list, instances, dp_valid[2])) ds_valid_full_d.close() pool = Pool(processes=8) cnt_results = pool.map(do_get_multiple_metric, pool_args) pool.close() pool.join() pool.terminate() for cnt_result in cnt_results: cnt_tps = cnt_tps + cnt_result[0] cnt_fps = cnt_fps + cnt_result[1] cnt_fns = cnt_fns + cnt_result[2] ious = np.divide(cnt_tps, cnt_tps + cnt_fps + cnt_fns) mIou = np.mean(ious) logger.info('validation metric: %.5f' % mIou) if best_miou_val < mIou: best_miou_val = mIou best_ckpt_saver.handle( mIou, self.sess, global_step) # save & keep best model # early rejection by mIou if early_rejection and e > 50 and best_miou_val < 0.15: break if early_rejection and e > 100 and best_miou_val < 0.25: break except KeyboardInterrupt: logger.info('interrupted. stop training, start to validate.') try: chk_path = get_best_checkpoint(model_path, select_maximum_value=True) if chk_path: logger.info( 'training is done. Start to evaluate the best model. %s' % chk_path) saver.restore(self.sess, chk_path) except Exception as e: logger.warning('error while loading the best model:' + str(e)) # show sample in train set : show_train > 0 kaggle_submit = KaggleSubmission(name) if validate_train in [True, 'True', 'true']: logger.info('Start to test on training set.... (may take a while)') train_metrics = [] for single_id in tqdm(CellImageDataManagerTrain.LIST[:20], desc='training set test'): result = self.single_id(None, None, single_id, set_type='train', show=False, verbose=False) image = result['image'] labels = result['labels'] instances = result['instances'] score = result['score'] score_desc = result['score_desc'] img_vis = Network.visualize(image, labels, instances, None) kaggle_submit.save_train_image(single_id, img_vis, score=score, score_desc=score_desc) train_metrics.append(score) logger.info('trainset validation ends. score=%.4f' % np.mean(train_metrics)) # show sample in valid set : show_valid > 0 if validate_valid in [True, 'True', 'true']: logger.info( 'Start to test on validation set.... (may take a while)') valid_metrics = [] for single_id in tqdm(CellImageDataManagerValid.LIST, desc='validation set test'): result = self.single_id(None, None, single_id, set_type='train', show=False, verbose=False) image = result['image'] labels = result['labels'] instances = result['instances'] score = result['score'] score_desc = result['score_desc'] img_vis = Network.visualize(image, labels, instances, None) kaggle_submit.save_valid_image(single_id, img_vis, score=score, score_desc=score_desc) kaggle_submit.valid_instances[single_id] = ( instances, result['instance_scores']) valid_metrics.append(score) logger.info('validation ends. score=%.4f' % np.mean(valid_metrics)) # show sample in test set logger.info('saving...') if save_result: for i, single_id in tqdm( enumerate(CellImageDataManagerTest.LIST), total=len(CellImageDataManagerTest.LIST)): # TODO try: result = self.single_id(None, None, single_id, 'test', False, False) except Exception as e: logger.warning('single_id=%s err=%s' % (single_id, str(e))) continue image = result['image'] instances = result['instances'] img_h, img_w = image.shape[:2] img_vis = Network.visualize(image, None, instances, None) # save to submit instances = Network.resize_instances(instances, (img_h, img_w)) kaggle_submit.save_image(single_id, img_vis) kaggle_submit.test_instances[single_id] = ( instances, result['instance_scores']) kaggle_submit.add_result(single_id, instances) # for single_id in tqdm(CellImageDataManagerTest.LIST[1120:], desc='test set evaluation'): # result = self.single_id(None, None, single_id, set_type='test', show=False, verbose=False) # temporal saving if i % 500 == 0: kaggle_submit.save() kaggle_submit.save() logger.info( 'done. epoch=%d best_loss_val=%.4f best_mIOU=%.4f name= %s' % (m_epoch, best_loss_val, best_miou_val, name)) return best_miou_val, name
def ensemble_models_id(self, single_id, set_type='train', model='stage1_unet', show=True, verbose=True): self._load_ensembles(model) d = self._get_cell_data(single_id, set_type) logger.debug('image size=%dx%d' % (d.img_h, d.img_w)) total_model_size = len(self.ensembles['rcnn']) + len( self.ensembles['unet']) logger.debug('total_model_size=%d rcnn=%d unet=%d' % (total_model_size, len( self.ensembles['rcnn']), len(self.ensembles['unet']))) rcnn_instances = [] rcnn_scores = [] # TODO : RCNN Ensemble rcnn_ensemble = False for idx, data in enumerate(self.ensembles['rcnn']): if set_type == 'train': instances, scores = data['valid_instances'].get( single_id, (None, None)) rcnn_ensemble = True else: # TODO ls = data['test_instances'].get(single_id, None) if ls is None: instances = scores = None else: instances = [x[0] for x in ls] scores = [x[1] for x in ls] rcnn_ensemble = True logger.debug('rcnn # instances = %d' % len(instances)) if instances is None: logger.warning('Not found id=%s in RCNN %d Model' % (single_id, idx + 1)) continue rcnn_instances.extend( [instance[:d.img_h, :d.img_w] for instance in instances]) rcnn_scores.extend([ s * HyperParams.get().rcnn_score_rescale for s in scores ]) # rescale scores total_instances = [] total_scores = [] # TODO : UNet Ensemble for idx, data in enumerate(self.ensembles['unet']): if set_type == 'train': instances, scores = data['valid_instances'].get( single_id, (None, None)) else: instances, scores = data['test_instances'].get( single_id, (None, None)) if instances is None: logger.warning('Not found id=%s in UNet %d Model' % (single_id, idx + 1)) continue total_instances.extend(instances) total_scores.extend(scores) # if single_id in ['646f5e00a2db3add97fb80a83ef3c07edd1b17b1b0d47c2bd650cdcab9f322c0']: # take too long # logger.warning('no ensemble id=%s' % single_id) # break watch = StopWatch() watch.start() logger.debug('voting+ size=%d' % len(total_instances)) # TODO : Voting? voting_th = HyperParams.get().ensemble_voting_th rects = [get_rect_of_mask(a) for a in total_instances] voted = [] for i, x in enumerate(total_instances): voted.append( filter_by_voting( (x, total_instances, voting_th, 0.3, rects[i], rects))) total_instances = list(compress(total_instances, voted)) total_scores = list(compress(total_scores, voted)) watch.stop() logger.debug('voting elapsed=%.5f' % watch.get_elapsed()) watch.reset() # nms watch.start() logger.debug('nms+ size=%d' % len(total_instances)) instances, scores = Network.nms( total_instances, total_scores, None, thresh=HyperParams.get().ensemble_nms_iou) watch.stop() logger.debug('nms elapsed=%.5f' % watch.get_elapsed()) watch.reset() # high threshold if not exists in RCNN if rcnn_ensemble: voted = [] for i, x in enumerate(instances): voted.append( filter_by_voting((x, rcnn_instances, 1, 0.3, None, None))) new_instances = [] new_scores = [] for instance, score, v in zip(instances, scores, voted): if v: new_instances.append(instance) new_scores.append(score) elif score > HyperParams.get().ensemble_th_no_rcnn: new_instances.append(instance) new_scores.append(score) instances, scores = new_instances, new_scores # nms with rcnn instances = instances + rcnn_instances scores = scores + rcnn_scores watch.start() logger.debug('nms_rcnn+ size=%d' % len(instances)) instances, scores = Network.nms( instances, scores, None, thresh=HyperParams.get().ensemble_nms_iou) watch.stop() logger.debug('nms_rcnn- size=%d elapsed=%.5f' % (len(instances), watch.get_elapsed())) watch.reset() # remove overlaps logger.debug('remove overlaps+') sorted_idx = [ i[0] for i in sorted(enumerate(instances), key=lambda x: get_size_of_mask(x[1]), reverse=False) ] instances = [instances[x] for x in sorted_idx] scores = [scores[x] for x in sorted_idx] instances2 = [ ndimage.morphology.binary_fill_holes(i) for i in instances ] instances2, scores2 = Network.remove_overlaps(instances2, scores) # remove deleted instances logger.debug('remove deleted+ size=%d' % len(instances2)) voted = [] for x in instances2: voted.append(filter_by_voting((x, instances, 1, 0.75, None, None))) instances = list(compress(instances2, voted)) scores = list(compress(scores2, voted)) # TODO : Filter by score? logger.debug('filter by score+ size=%d' % len(instances)) score_filter_th = HyperParams.get().ensemble_score_th if score_filter_th > 0.0: logger.debug('filter_by_score=%.3f' % score_filter_th) instances = [ i for i, s in zip(instances, scores) if s > score_filter_th ] scores = [ s for i, s in zip(instances, scores) if s > score_filter_th ] logger.debug('finishing+ size=%d' % len(instances)) image = d.image(is_gray=False) score_desc = [] labels = [] if len(d.masks) > 0: # has label masks labels = list(d.multi_masks(transpose=False)) tp, fp, fn = get_multiple_metric(thr_list, instances, labels) logger.debug('instances=%d, labels=%d' % (len(instances), len(labels))) for i, thr in enumerate(thr_list): desc = 'score=%.3f, tp=%d, fp=%d, fn=%d --- iou %.2f' % ( (tp / (tp + fp + fn))[i], tp[i], fp[i], fn[i], thr) logger.debug(desc) score_desc.append(desc) score = np.mean(tp / (tp + fp + fn)) logger.debug('score=%.3f, tp=%.1f, fp=%.1f, fn=%.1f --- mean' % (score, np.mean(tp), np.mean(fp), np.mean(fn))) else: score = 0.0 if show: img_vis = Network.visualize(image, labels, instances, None) cv2.imshow('valid', img_vis) cv2.waitKey(0) else: return { 'instance_scores': scores, 'score': score, 'image': image, 'instances': instances, 'labels': labels, 'score_desc': score_desc }
def single_id(self, model, checkpoint, single_id, set_type='train', show=True, verbose=True): if model: self.set_network(model) self.network.build() self.init_session() if checkpoint: saver = tf.train.Saver() saver.restore(self.sess, checkpoint) if verbose: logger.info('restored from checkpoint, %s' % checkpoint) d = self._get_cell_data(single_id, set_type) h, w = d.img.shape[:2] shortedge = min(h, w) logger.debug('%s image size=(%d x %d)' % (single_id, w, h)) watch = StopWatch() logger.debug('preprocess+') d = self.network.preprocess(d) image = d.image(is_gray=False) total_instances = [] total_scores = [] total_from_set = [] cutoff_instance_max = HyperParams.get().post_cutoff_max_th cutoff_instance_avg = HyperParams.get().post_cutoff_avg_th watch.start() logger.debug('inference at default scale+ %dx%d' % (w, h)) inference_result = self.network.inference( self.sess, image, cutoff_instance_max=cutoff_instance_max, cutoff_instance_avg=cutoff_instance_avg) instances_pre, scores_pre = inference_result[ 'instances'], inference_result['scores'] instances_pre = Network.resize_instances(instances_pre, target_size=(h, w)) total_instances = total_instances + instances_pre total_scores = total_scores + scores_pre total_from_set = [1] * len(instances_pre) watch.stop() logger.debug('inference- elapsed=%.5f' % watch.get_elapsed()) watch.reset() logger.debug('inference with flips+') # re-inference using flip for flip_orientation in range(2): flipped = cv2.flip(image.copy(), flip_orientation) inference_result = self.network.inference( self.sess, flipped, cutoff_instance_max=cutoff_instance_max, cutoff_instance_avg=cutoff_instance_avg) instances_flip, scores_flip = inference_result[ 'instances'], inference_result['scores'] instances_flip = [ cv2.flip(instance.astype(np.uint8), flip_orientation) for instance in instances_flip ] instances_flip = Network.resize_instances(instances_flip, target_size=(h, w)) total_instances = total_instances + instances_flip total_scores = total_scores + scores_flip total_from_set = total_from_set + [2 + flip_orientation ] * len(instances_flip) watch.stop() logger.debug('inference- elapsed=%.5f' % watch.get_elapsed()) watch.reset() logger.debug('inference with scaling+flips+') # re-inference after rescale image def inference_with_scale(image, resize_target): image = cv2.resize(image.copy(), None, None, resize_target, resize_target, interpolation=cv2.INTER_AREA) inference_result = self.network.inference( self.sess, image, cutoff_instance_max=cutoff_instance_max, cutoff_instance_avg=cutoff_instance_avg) instances_rescale, scores_rescale = inference_result[ 'instances'], inference_result['scores'] instances_rescale = Network.resize_instances(instances_rescale, target_size=(h, w)) return instances_rescale, scores_rescale max_mask = get_max_size_of_masks(instances_pre) logger.debug('max_mask=%d' % max_mask) resize_target = HyperParams.get().test_aug_scale_t / max_mask resize_target = min(HyperParams.get().test_aug_scale_max, resize_target) resize_target = max(HyperParams.get().test_aug_scale_min, resize_target) import math # resize_target = 2.0 / (1.0 + math.exp(-1.5*(resize_target - 1.0))) # resize_target = max(0.5, resize_target) resize_target = max(228.0 / shortedge, resize_target) # if resize_target > 1.0 and min(w, h) > 1000: # logger.debug('too large image, no resize') # resize_target = 0.8 logger.debug('resize_target=%.4f' % resize_target) instances_rescale, scores_rescale = inference_with_scale( image, resize_target) total_instances = total_instances + instances_rescale total_scores = total_scores + scores_rescale total_from_set = total_from_set + [4] * len(instances_rescale) # re-inference using flip + rescale for flip_orientation in range(2): flipped = cv2.flip(image.copy(), flip_orientation) instances_flip, scores_flip = inference_with_scale( flipped, resize_target) instances_flip = [ cv2.flip(instance.astype(np.uint8), flip_orientation) for instance in instances_flip ] instances_flip = Network.resize_instances(instances_flip, target_size=(h, w)) total_instances = total_instances + instances_flip total_scores = total_scores + scores_flip total_from_set = total_from_set + [5 + flip_orientation ] * len(instances_flip) watch.stop() logger.debug('inference- elapsed=%.5f' % watch.get_elapsed()) watch.reset() watch.start() logger.debug('voting+ size=%d' % len(total_instances)) # TODO : Voting? voting_th = HyperParams.get().post_voting_th rects = [get_rect_of_mask(a) for a in total_instances] voted = [] for i, x in enumerate(total_instances): voted.append( filter_by_voting( (x, total_instances, voting_th, 0.3, rects[i], rects))) total_instances = list(compress(total_instances, voted)) total_scores = list(compress(total_scores, voted)) total_from_set = list(compress(total_from_set, voted)) watch.stop() logger.debug('voting elapsed=%.5f' % watch.get_elapsed()) watch.reset() # nms watch.start() logger.debug('nms+ size=%d' % len(total_instances)) instances, scores = Network.nms( total_instances, total_scores, total_from_set, thresh=HyperParams.get().test_aug_nms_iou) watch.stop() logger.debug('nms elapsed=%.5f' % watch.get_elapsed()) watch.reset() # remove overlaps logger.debug('remove overlaps+') sorted_idx = [ i[0] for i in sorted(enumerate(instances), key=lambda x: get_size_of_mask(x[1]), reverse=True) ] instances = [instances[x] for x in sorted_idx] scores = [scores[x] for x in sorted_idx] instances = [ ndimage.morphology.binary_fill_holes(i) for i in instances ] instances, scores = Network.remove_overlaps(instances, scores) # TODO : Filter by score? # logger.debug('filter by score+') # score_filter_th = HyperParams.get().post_filter_th # if score_filter_th > 0.0: # logger.debug('filter_by_score=%.3f' % score_filter_th) # instances = [i for i, s in zip(instances, scores) if s > score_filter_th] # scores = [s for i, s in zip(instances, scores) if s > score_filter_th] logger.debug('finishing+') image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA) score_desc = [] labels = [] if len(d.masks) > 0: # has label masks labels = list(d.multi_masks(transpose=False)) labels = Network.resize_instances(labels, target_size=(h, w)) tp, fp, fn = get_multiple_metric(thr_list, instances, labels) if verbose: logger.info('instances=%d, reinf(%.3f) labels=%d' % (len(instances), resize_target, len(labels))) for i, thr in enumerate(thr_list): desc = 'score=%.3f, tp=%d, fp=%d, fn=%d --- iou %.2f' % ( (tp / (tp + fp + fn))[i], tp[i], fp[i], fn[i], thr) if verbose: logger.info(desc) score_desc.append(desc) score = np.mean(tp / (tp + fp + fn)) if verbose: logger.info('score=%.3f, tp=%.1f, fp=%.1f, fn=%.1f --- mean' % (score, np.mean(tp), np.mean(fp), np.mean(fn))) else: score = 0.0 if show: img_vis = Network.visualize(image, labels, instances, None) cv2.imshow('valid', img_vis) cv2.waitKey(0) if not model: return { 'instance_scores': scores, 'score': score, 'image': image, 'instances': instances, 'labels': labels, 'score_desc': score_desc }