def train(): """Train loop.""" lr = config.base_learn_rate lr_decay_steps = config.lr_decay_steps max_train_iter = config.max_train_iter m.assign_lr(sess, lr) if environ.verbose: loop = range(max_train_iter) else: loop = pb.get(max_train_iter) for niter in loop: # decrease learning rate if len(lr_decay_steps) > 0: if (niter + 1) == lr_decay_steps[0]: lr *= 0.1 m.assign_lr(sess, lr) lr_decay_steps.pop(0) l2_loss = train_step() if (niter + 1) % config.disp_iter == 0 or niter == 0: exp_logger.log_train_loss(niter, l2_loss) if (niter + 1) % config.valid_iter == 0 or niter == 0: log.info("Experment ID {}".format(environ.exp_id)) test_iter.reset() psnr, ssim = evaluate(test_iter, -1) exp_logger.log_valid_psnr(niter, psnr) exp_logger.log_valid_ssim(niter, ssim) if (niter + 1) % config.save_iter == 0: save() test_iter.reset() psnr, ssim = evaluate(test_iter, -1) return psnr, ssim
def write_segmentation_data(opt, image_data, fname, num_per_shard=16000, seed=2): """Write segmentation network training data to sharded file. Args: opt: dictionary image_data: list of dictionary fname: string, output file basename num_per_shard: int, number of training examples per file shard seed: rng seed """ random = np.random.RandomState(seed) neg_pos_ratio = opt['neg_pos_ratio'] # Count total number of examples. num_ex = len(image_data) num_ex_final = 0 for ii in xrange(num_ex): num_ex_final += len(image_data[ii]['object_info']) num_ex_final *= (1 + neg_pos_ratio) log.info('Preparing segmentation data, {} examples'.format(num_ex_final)) num_shards = int(np.ceil(num_ex_final / float(num_per_shard))) log.info('Writing to {} in {} shards'.format(fname, num_shards)) fout = ShardedFile(fname, num_shards=num_shards) with ShardedFileWriter(fout, num_ex_final) as writer: for ii in progress_bar.get(num_ex): segm_data = _image_to_segmentation( opt, image_data[ii], random=random) for segm_j in segm_data: writer.write(segm_j) pass
def train(): """Train loop.""" lr = config.base_learn_rate lr_decay_steps = config.lr_decay_steps max_train_iter = config.max_train_iter m.assign_lr(sess, lr) if environ.verbose: loop = range(max_train_iter) else: loop = pb.get(max_train_iter) for niter in loop: # decrease learning rate if len(lr_decay_steps) > 0: if (niter + 1) == lr_decay_steps[0]: lr *= 0.1 m.assign_lr(sess, lr) lr_decay_steps.pop(0) ce = train_step() if (niter + 1) % config.disp_iter == 0 or niter == 0: exp_logger.log_train_ce(niter, ce) if (niter + 1) % config.valid_iter == 0 or niter == 0: acc = evaluate(trainval_iter, 10) exp_logger.log_train_acc(niter, acc) test_iter.reset() acc = evaluate(test_iter, -1) log.info("Experment ID {}".format(environ.exp_id)) exp_logger.log_valid_acc(niter, acc) if (niter + 1) % config.save_iter == 0: save() test_iter.reset() acc = evaluate(test_iter, -1) return acc
def get_segmentation_data(opt, image_data, seed=2): """ Get dataset for training segmentation. Input images with the groundtruth segmentations. Output sliding windows of the image, and groudtruth to be the centering object. Random gittering and scaling will be applied. Resize image input to 128 x 128. Positive example : negative example = 1 : 5. Random sample sliding windows accross the image to generate negative examples. Args: opt: options. image_data: dataset generated from get_image_data. Returns: results: list of dictionary. Each has the following fields: input: numpy.ndarray, [Hp, Wp, 3], dtype=uint8, [0, 255] label_segmentation: numpy.ndarray, [Hp, Wp], dtype=uint8, [0, 1] label_objectness: float, [0, 1] """ random = np.random.RandomState(seed) neg_pos_ratio = opt['neg_pos_ratio'] outsize = opt['output_window_size'] # Count total number of examples. num_ex = len(image_data) num_ex_final = 0 for ii in xrange(num_ex): num_ex_final += len(image_data[ii]['object_info']) num_ex_final *= (1 + neg_pos_ratio) log.info('Preparing segmentation data, {} examples'.format(num_ex_final)) # Initialize arrays. input_data = np.zeros([num_ex_final, outsize, outsize, 3], dtype='uint8') label_segmentation = np.zeros([num_ex_final, outsize, outsize], dtype='uint8') label_objectness = np.zeros([num_ex_final, 1], dtype='uint8') idx = 0 for ii in progress_bar.get(num_ex): segm_data = _image_to_segmentation(opt, image_data[ii], random=random) for segm_j in segm_data: input_data[idx] = segm_j['input'] label_segmentation[idx] = segm_j['label_segmentation'] label_objectness[idx] = segm_j['label_objectness'] idx += 1 return { 'input': input_data, 'label_segmentation': label_segmentation, 'label_objectness': label_objectness }
def get_segmentation_data(opt, image_data, seed=2): """ Get dataset for training segmentation. Input images with the groundtruth segmentations. Output sliding windows of the image, and groudtruth to be the centering object. Random gittering and scaling will be applied. Resize image input to 128 x 128. Positive example : negative example = 1 : 5. Random sample sliding windows accross the image to generate negative examples. Args: opt: options. image_data: dataset generated from get_image_data. Returns: results: list of dictionary. Each has the following fields: input: numpy.ndarray, [Hp, Wp, 3], dtype=uint8, [0, 255] label_segmentation: numpy.ndarray, [Hp, Wp], dtype=uint8, [0, 1] label_objectness: float, [0, 1] """ random = np.random.RandomState(seed) neg_pos_ratio = opt['neg_pos_ratio'] outsize = opt['output_window_size'] # Count total number of examples. num_ex = len(image_data) num_ex_final = 0 for ii in xrange(num_ex): num_ex_final += len(image_data[ii]['object_info']) num_ex_final *= (1 + neg_pos_ratio) log.info('Preparing segmentation data, {} examples'.format(num_ex_final)) # Initialize arrays. input_data = np.zeros([num_ex_final, outsize, outsize, 3], dtype='uint8') label_segmentation = np.zeros( [num_ex_final, outsize, outsize], dtype='uint8') label_objectness = np.zeros([num_ex_final, 1], dtype='uint8') idx = 0 for ii in progress_bar.get(num_ex): segm_data = _image_to_segmentation(opt, image_data[ii], random=random) for segm_j in segm_data: input_data[idx] = segm_j['input'] label_segmentation[idx] = segm_j['label_segmentation'] label_objectness[idx] = segm_j['label_objectness'] idx += 1 return { 'input': input_data, 'label_segmentation': label_segmentation, 'label_objectness': label_objectness }
def get_raw_data(opt, seed=2): """Generate raw data (dictionary). Args: opt: dictionary, options. seed: int, rng seed. Returns: results: """ num_examples = opt['num_examples'] max_num_objects = opt['max_num_objects'] radius_lower = opt['radius_lower'] radius_upper = opt['radius_upper'] width = opt['width'] height = opt['height'] num_object_types = opt['num_object_types'] results = [] random = np.random.RandomState(seed) log.info('Generating raw data') for ii in progress_bar.get(num_examples): num_obj = int(np.ceil(random.uniform(0, 1) * max_num_objects)) ex = [] for jj in xrange(num_obj): radius = int(np.ceil(random.uniform(radius_lower, radius_upper))) center_x = int(np.ceil(random.uniform(radius, width - radius))) center_y = int(np.ceil(random.uniform(radius, height - radius))) center = (center_x, center_y) obj_type = int( np.floor(random.uniform(0, num_object_types - 1e-5))) ex.append({ 'center': center, 'radius': radius, 'type': obj_type }) results.append(ex) return results
def run(mscoco, image_list, output_fname, num_shards): """Run all images. Args: mscoco: MSCOCO API object. image_list: list, list of image IDs. """ not_found = [] cat_rev_dict = mscoco.get_cat_list_reverse() fout = ShardedFile(output_fname, num_shards=num_shards) pb = progress_bar.get(len(image_list)) log.info('Running through all images') with ShardedFileWriter(fout, num_objects=len(image_list)) as writer: for i in writer: image_fname = image_list[i] image_id = mscoco.get_image_id_from_path(image_fname) anns = mscoco.get_image_annotations(image_id) if anns is None: not_found.append(image_id) continue num_ann = len(anns) boxes = numpy.zeros((num_ann, 4), dtype='int16') cats = numpy.zeros((num_ann, 1), dtype='int16') for i, ann in enumerate(anns): boxes[i, :] = numpy.floor(ann['bbox']).astype('int16') cats[i] = cat_rev_dict[ann['category_id']] data = {'boxes': boxes, 'categories': cats, 'image': image_fname} writer.write(data, key=image_fname) pb.increment() for image_id in not_found: log.error('Not found annotation for image {}'.format(image_id)) log.error('Total {:d} image annotations not found.'.format(len(not_found))) pass
def write_segmentation_data(opt, image_data, fname, num_per_shard=16000, seed=2): """Write segmentation network training data to sharded file. Args: opt: dictionary image_data: list of dictionary fname: string, output file basename num_per_shard: int, number of training examples per file shard seed: rng seed """ random = np.random.RandomState(seed) neg_pos_ratio = opt['neg_pos_ratio'] # Count total number of examples. num_ex = len(image_data) num_ex_final = 0 for ii in xrange(num_ex): num_ex_final += len(image_data[ii]['object_info']) num_ex_final *= (1 + neg_pos_ratio) log.info('Preparing segmentation data, {} examples'.format(num_ex_final)) num_shards = int(np.ceil(num_ex_final / float(num_per_shard))) log.info('Writing to {} in {} shards'.format(fname, num_shards)) fout = ShardedFile(fname, num_shards=num_shards) with ShardedFileWriter(fout, num_ex_final) as writer: for ii in progress_bar.get(num_ex): segm_data = _image_to_segmentation(opt, image_data[ii], random=random) for segm_j in segm_data: writer.write(segm_j) pass
def get_raw_data(opt, seed=2): """Generate raw data (dictionary). Args: opt: dictionary, options. seed: int, rng seed. Returns: results: """ num_examples = opt['num_examples'] max_num_objects = opt['max_num_objects'] radius_lower = opt['radius_lower'] radius_upper = opt['radius_upper'] width = opt['width'] height = opt['height'] num_object_types = opt['num_object_types'] results = [] random = np.random.RandomState(seed) log.info('Generating raw data') for ii in progress_bar.get(num_examples): num_obj = int(np.ceil(random.uniform(0, 1) * max_num_objects)) ex = [] for jj in xrange(num_obj): radius = int(np.ceil(random.uniform(radius_lower, radius_upper))) center_x = int(np.ceil(random.uniform(radius, width - radius))) center_y = int(np.ceil(random.uniform(radius, height - radius))) center = (center_x, center_y) obj_type = int(np.floor(random.uniform(0, num_object_types - 1e-5))) ex.append({'center': center, 'radius': radius, 'type': obj_type}) results.append(ex) return results
def pack_data(mscoco, info_file, feature_file, local_feat, output_fname, num_ex_per_shards): """Pack data together""" inps = [] with ShardedFileReader(info_file) as info_reader: num_obj = len(info_reader) log.info('Number of entries: {:d}'.format(num_obj)) pb = progress_bar.get(num_obj) num_shards = int(np.ceil(num_obj / float(num_ex_per_shards))) output_file = ShardedFile(output_fname, num_shards=num_shards) with ShardedFileReader(feature_file) as feature_reader: with ShardedFileWriter(output_file, num_objects=num_obj) as writer: for question_entry in info_reader: image_id = question_entry['image_id'] image_path = mscoco.get_image_path(image_id) feature = feature_reader[image_path] if feature is None: raise Exception( 'Key {} not found in feature'.format(image_path)) if local_feat is None: local_feat_dim = 0 else: local_feat_dim = feature[local_feat].shape[-1] if len(feature['boxes'].shape) == 1: num_boxes = 1 elif len(feature['boxes'].shape) == 2: num_boxes = feature['boxes'].shape[0] else: raise Exception('Unknown shape for boxes {}'.format( feature['boxes'].shape)) inp_dim = 4 + 1 + 1 + local_feat_dim inp = np.zeros((num_boxes, inp_dim), dtype='float32') for box_i in xrange(num_boxes): if num_boxes == 1: inp[box_i, :4] = feature['boxes'] inp[box_i, 4] = feature['categories'] inp[box_i, 5] = feature['scores'] if local_feat: inp[box_i, 6:] = feature[local_feat] else: inp[box_i, :4] = feature['boxes'][box_i] inp[box_i, 4] = feature['categories'][box_i] inp[box_i, 5] = feature['scores'][box_i] if local_feat: inp[box_i, 6:] = feature[local_feat][box_i] inp = inp.reshape(num_boxes * inp_dim) cat = np.array([question_entry['category']]) total_inp = np.concatenate((cat, inp)).astype('float32') data = { 'image_id': image_id, 'input': total_inp, 'label': int(question_entry['number']) } writer.write(data) pb.increment()
def get_dataset(self): """ Recommended settings: 128 x 448. """ if self.dataset is not None: return self.dataset cache = self.read_h5_data() if cache: self.dataset = cache return self.dataset inp_height = self.opt['height'] inp_width = self.opt['width'] num_ex = self.opt['num_examples'] if 'num_examples' in self.opt else -1 timespan = self.opt['timespan'] if 'timespan' in self.opt else -1 inp_shape = (inp_width, inp_height) ids_fname = os.path.join(self.folder, '{}.txt'.format(self.split)) inp_list = [] segm_list = [] max_num_obj = 0 img_ids = [] log.info('Reading image IDs') with open(ids_fname) as f_ids: for ii in f_ids: img_ids.append(ii.strip('\n')) if num_ex == -1: num_ex = len(img_ids) # Shuffle sequence. random = np.random.RandomState(2) shuffle = np.arange(len(img_ids)) random.shuffle(shuffle) # Read images. log.info('Reading {} images'.format(num_ex)) idx_map = np.zeros(len(img_ids), dtype='int') for idx in pb.get(num_ex): img_id = img_ids[shuffle[idx]] idx_map[idx] = int(img_id) fname = '{}.png'.format(img_id) img_fname = os.path.join(self.image_folder, fname) gt_fname = os.path.join(self.gt_folder, fname) img = cv2.imread(img_fname) img = cv2.resize(img, inp_shape, interpolation=cv2.INTER_NEAREST) gt = cv2.imread(gt_fname) gt = gt.astype('float') segm = self.get_separate_labels(gt) max_num_obj = max(max_num_obj, len(segm)) segm_reshape = [] for jj, ss in enumerate(segm): segm_reshape.append( cv2.resize(ss, inp_shape, interpolation=cv2.INTER_NEAREST)) inp_list.append(img) segm_list.append(segm_reshape) # Include one more max_num_obj += 1 if timespan == -1: timespan = max_num_obj else: timespan = max(timespan, max_num_obj) log.info('Assemble images') num_ex = len(inp_list) inp = np.zeros([num_ex, inp_height, inp_width, 3], dtype='uint8') label_segm = np.zeros([num_ex, timespan, inp_height, inp_width], dtype='uint8') label_score = np.zeros([num_ex, timespan], dtype='uint8') log.info('Number of examples: {}'.format(num_ex)) log.info('Input height: {} width: {}'.format(inp_height, inp_width)) log.info('Input shape: {} label shape: {} {}'.format( inp.shape, label_segm.shape, label_score.shape)) for ii, data in enumerate(zip(inp_list, segm_list)): img = data[0] segm = data[1] inp[ii] = img num_obj = len(segm) for jj in xrange(num_obj): label_segm[ii, jj] = segm[jj] label_score[ii, :num_obj] = 1 print idx_map self.dataset = { 'input': inp, 'label_segmentation': label_segm, 'label_score': label_score, 'index_map': idx_map } self.write_h5_data() return self.dataset
def get_dataset(self): """ Recommended settings: 128 x 448. """ if self.dataset is not None: return self.dataset cache = self.read_h5_data() if cache: self.dataset = cache return self.dataset inp_height = self.opt['height'] inp_width = self.opt['width'] num_ex = self.opt['num_examples'] if 'num_examples' in self.opt else -1 timespan = self.opt['timespan'] if 'timespan' in self.opt else -1 inp_shape = (inp_width, inp_height) ids_fname = os.path.join(self.folder, '{}.txt'.format(self.split)) inp_list = [] segm_list = [] max_num_obj = 0 img_ids = [] log.info('Reading image IDs') with open(ids_fname) as f_ids: for ii in f_ids: img_ids.append(ii.strip('\n')) if num_ex == -1: num_ex = len(img_ids) # Shuffle sequence. random = np.random.RandomState(2) shuffle = np.arange(len(img_ids)) random.shuffle(shuffle) # Read images. log.info('Reading {} images'.format(num_ex)) idx_map = np.zeros(len(img_ids), dtype='int') for idx in pb.get(num_ex): img_id = img_ids[shuffle[idx]] idx_map[idx] = int(img_id) fname = '{}.png'.format(img_id) img_fname = os.path.join(self.image_folder, fname) gt_fname = os.path.join(self.gt_folder, fname) img = cv2.imread(img_fname) img = cv2.resize(img, inp_shape, interpolation=cv2.INTER_NEAREST) gt = cv2.imread(gt_fname) gt = gt.astype('float') segm = self.get_separate_labels(gt) max_num_obj = max(max_num_obj, len(segm)) segm_reshape = [] for jj, ss in enumerate(segm): segm_reshape.append(cv2.resize( ss, inp_shape, interpolation=cv2.INTER_NEAREST)) inp_list.append(img) segm_list.append(segm_reshape) # Include one more max_num_obj += 1 if timespan == -1: timespan = max_num_obj else: timespan = max(timespan, max_num_obj) log.info('Assemble images') num_ex = len(inp_list) inp = np.zeros([num_ex, inp_height, inp_width, 3], dtype='uint8') label_segm = np.zeros( [num_ex, timespan, inp_height, inp_width], dtype='uint8') label_score = np.zeros([num_ex, timespan], dtype='uint8') log.info('Number of examples: {}'.format(num_ex)) log.info('Input height: {} width: {}'.format(inp_height, inp_width)) log.info('Input shape: {} label shape: {} {}'.format( inp.shape, label_segm.shape, label_score.shape)) for ii, data in enumerate(zip(inp_list, segm_list)): img = data[0] segm = data[1] inp[ii] = img num_obj = len(segm) for jj in xrange(num_obj): label_segm[ii, jj] = segm[jj] label_score[ii, :num_obj] = 1 print idx_map self.dataset = { 'input': inp, 'label_segmentation': label_segm, 'label_score': label_score, 'index_map': idx_map } self.write_h5_data() return self.dataset