def __init__(self, input, model, d_period=1, g_period=1): """ Args: d_period(int): period of each d_opt run g_period(int): period of each g_opt run """ super(SeparateGANTrainer, self).__init__() self._d_period = int(d_period) self._g_period = int(g_period) assert min(d_period, g_period) == 1 # Setup input cbs = input.setup(model.get_inputs_desc()) self.register_callback(cbs) # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.get_inputs_desc()) with TowerContext('', is_training=True), \ argscope(BatchNorm, internal_update=True): # should not hook the updates to both train_op, it will hurt training speed. self.tower_func(*input.get_input_tensors()) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if len(update_ops): logger.warn("Found {} ops in UPDATE_OPS collection!".format(len(update_ops))) logger.warn("Using SeparateGANTrainer with UPDATE_OPS may hurt your training speed a lot!") opt = model.get_optimizer() with tf.name_scope('optimize'): self.d_min = opt.minimize( model.d_loss, var_list=model.d_vars, name='d_min') self.g_min = opt.minimize( model.g_loss, var_list=model.g_vars, name='g_min')
def get_depth_meta(cam_mat, *queries): assert isinstance(cam_mat, (np.ndarray, tf.Tensor)), type(cam_mat) responses = [] for query in queries: if query == 'depth_min': responses.append(cam_mat[1, 3, 0]) elif query == 'depth_interval': responses.append(cam_mat[1, 3, 1]) elif query == 'depth_num': responses.append(cam_mat[1, 3, 2]) elif query == 'depth_max': responses.append(cam_mat[1, 3, 3]) elif query == 'extrinsic': responses.append(cam_mat[0]) elif query == 'intrinsic': responses.append(cam_mat[1, :3, :3]) elif query == 'R': responses.append(cam_mat[0, :3, :3]) elif query == 'T': responses.append(cam_mat[0, :3, 3]) else: logger.warn('unknown query: {}'.format(query)) exit() return responses
def get_imagenet_dataflow( datadir, name, batch_size, augmentors, parallel=None): """ See explanations in the tutorial: http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html """ assert name in ['train', 'val', 'test'] assert datadir is not None assert isinstance(augmentors, list) isTrain = name == 'train' if parallel is None: parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading if isTrain: ds = dataset.ILSVRC12(datadir, name, shuffle=True) ds = AugmentImageComponent(ds, augmentors, copy=False) if parallel < 16: logger.warn("DataFlow may become the bottleneck when too few processes are used.") ds = PrefetchDataZMQ(ds, parallel) ds = BatchData(ds, batch_size, remainder=False) else: ds = dataset.ILSVRC12Files(datadir, name, shuffle=False) aug = imgaug.AugmentorList(augmentors) def mapf(dp): fname, cls = dp im = cv2.imread(fname, cv2.IMREAD_COLOR) im = aug.augment(im) return im, cls ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True) ds = BatchData(ds, batch_size, remainder=True) ds = PrefetchDataZMQ(ds, 1) return ds
def get_iNaturalist_dataflow( datadir, name, batch_size, augmentors, parallel=None): """ See explanations in the tutorial: http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html """ assert name in ['train', 'val', 'test'] assert datadir is not None assert isinstance(augmentors, list) isTrain = name == 'train' if parallel is None: parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading if isTrain: ds = dataset.iNaturalist(datadir, name, shuffle=True) ds = AugmentImageComponent(ds, augmentors, copy=False) if parallel < 16: logger.warn("DataFlow may become the bottleneck when too few processes are used.") ds = PrefetchDataZMQ(ds, parallel) ds = BatchData(ds, batch_size, remainder=False) else: ds = dataset.iNaturalistFiles(datadir, name, shuffle=False) aug = imgaug.AugmentorList(augmentors) def mapf(dp): fname, cls = dp im = cv2.imread(fname, cv2.IMREAD_COLOR) im = aug.augment(im) return im, cls ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True) ds = BatchData(ds, batch_size, remainder=True) ds = PrefetchDataZMQ(ds, 1) return ds
def Dropout(x, *args, **kwargs): """ Same as `tf.layers.dropout`. However, for historical reasons, the first positional argument is interpreted as keep_prob rather than drop_prob. Explicitly use `rate=` keyword arguments to ensure things are consistent. """ if 'is_training' in kwargs: kwargs['training'] = kwargs.pop('is_training') if len(args) > 0: if args[0] != 0.5: logger.warn( "The first positional argument to tensorpack.Dropout is the probability to keep, rather than to drop. " "This is different from the rate argument in tf.layers.Dropout due to historical reasons. " "To mimic tf.layers.Dropout, explicitly use keyword argument 'rate' instead" ) rate = 1 - args[0] elif 'keep_prob' in kwargs: assert 'rate' not in kwargs, "Cannot set both keep_prob and rate!" rate = 1 - kwargs.pop('keep_prob') elif 'rate' in kwargs: rate = kwargs.pop('rate') else: rate = 0.5 if kwargs.get('training', None) is None: kwargs['training'] = get_current_tower_context().is_training if get_tf_version_tuple() <= (1, 12): return tf.layers.dropout(x, rate=rate, **kwargs) else: return tf.nn.dropout(x, rate=rate if kwargs['training'] else 0.)
def _add_detection_gt(self, img, add_mask): """ Add 'boxes', 'class', 'is_crowd' of this image to the dict, used by detection. If add_mask is True, also add 'segmentation' in coco poly format. """ # ann_ids = self.coco.getAnnIds(imgIds=img['image_id']) # objs = self.coco.loadAnns(ann_ids) objs = self.coco.imgToAnns[img['image_id']] # equivalent but faster than the above two lines # clean-up boxes valid_objs = [] width = img.pop('width') height = img.pop('height') for objid, obj in enumerate(objs): if obj.get('ignore', 0) == 1: continue x1, y1, w, h = obj['bbox'] # bbox is originally in float # x1/y1 means upper-left corner and w/h means true w/h. This can be verified by segmentation pixels. # But we do make an assumption here that (0.0, 0.0) is upper-left corner of the first pixel x1 = np.clip(float(x1), 0, width) y1 = np.clip(float(y1), 0, height) w = np.clip(float(x1 + w), 0, width) - x1 h = np.clip(float(y1 + h), 0, height) - y1 # Require non-zero seg area and more than 1x1 box size if obj['area'] > 1 and w > 0 and h > 0 and w * h >= 4: obj['bbox'] = [x1, y1, x1 + w, y1 + h] valid_objs.append(obj) if add_mask: segs = obj['segmentation'] if not isinstance(segs, list): assert obj['iscrowd'] == 1 obj['segmentation'] = None else: valid_segs = [np.asarray(p).reshape(-1, 2).astype('float32') for p in segs if len(p) >= 6] if len(valid_segs) == 0: logger.error("Object {} in image {} has no valid polygons!".format(objid, img['file_name'])) elif len(valid_segs) < len(segs): logger.warn("Object {} in image {} has invalid polygons!".format(objid, img['file_name'])) obj['segmentation'] = valid_segs # all geometrically-valid boxes are returned boxes = np.asarray([obj['bbox'] for obj in valid_objs], dtype='float32') # (n, 4) cls = np.asarray([ self.COCO_id_to_category_id[obj['category_id']] for obj in valid_objs], dtype='int32') # (n,) is_crowd = np.asarray([obj['iscrowd'] for obj in valid_objs], dtype='int8') # add the keys img['boxes'] = boxes # nx4 img['class'] = cls # n, always >0 img['is_crowd'] = is_crowd # n, if add_mask: # also required to be float32 img['segmentation'] = [ obj['segmentation'] for obj in valid_objs]
def eval_inference_results2(self, results, output=None, threshold=None, metric_only=False): # Compared with eval_inference_results, v2 version has an threshold # used to filter scores below. It is designed for SSL experiments. if not metric_only: if threshold is not None: logger.warn( "Use thresholding {} to filter final resulting boxes". format(threshold)) continuous_id_to_COCO_id = { v: k for k, v in self.COCO_id_to_category_id.items() } n = 0 final_results = [] for res in results: # convert to COCO's incontinuous category id if res["category_id"] in continuous_id_to_COCO_id: res["category_id"] = continuous_id_to_COCO_id[ res["category_id"]] if threshold is not None: if res["score"] < threshold: n += 1 continue # COCO expects results in xywh format box = res["bbox"] box[2] -= box[0] box[3] -= box[1] res["bbox"] = [round(float(x), 3) for x in box] final_results.append(res) results = final_results if output is not None: if not os.path.exists(os.path.dirname(output)): os.makedirs(os.path.dirname(output)) with open(output, "w") as f: json.dump(results, f) if threshold is not None: with open(output + "_boxcount.json", "w") as f: r = {"passed": len(results), "removed": n} print("Box thresholding stats: \n\t", r) json.dump(r, f) if len(results): metrics = self.print_coco_metrics(results) # save precision_recall data: precision_recall = self.cocoEval.precision_recall pr_path = os.path.join( os.path.split(output)[0], "precision_recall.npy") print("Saving precision_recall curve to {}".format(pr_path)) np.save(pr_path, {"pr": precision_recall}) # sometimes may crash if the results are empty? return metrics else: return {}
def get_imagenet_dataflow(datadir, name, batch_size, augmentors, parallel=None): """ See explanations in the tutorial: http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html """ assert name in ['train', 'val', 'test'] assert datadir is not None assert isinstance(augmentors, list) isTrain = name == 'train' if parallel is None: parallel = min(40, multiprocessing.cpu_count()) if isTrain: ds = dataset.ILSVRC12(datadir, name, shuffle=True) ds = AugmentImageComponent(ds, augmentors, copy=False) if parallel < 16: logger.warn( "DataFlow may become the bottleneck when too few processes are used." ) ds = PrefetchDataZMQ(ds, parallel) ds = BatchData(ds, batch_size, remainder=False) else: ds = dataset.ILSVRC12Files(datadir, name, shuffle=False) aug = imgaug.AugmentorList(augmentors) def mapf(dp): fname, cls = dp jpeg_filename = os.path.basename(fname) jpeg_dirname = os.path.basename(os.path.dirname(fname)) zip_filepath = os.path.dirname(fname) + '.zip' f = zipfile.ZipFile(zip_filepath, 'r') compress_jpeg = np.fromstring(f.read( os.path.join(jpeg_dirname, jpeg_filename)), dtype=np.uint8) im = cv2.imdecode(compress_jpeg, cv2.IMREAD_COLOR) #im = cv2.imread(fname, cv2.IMREAD_COLOR) im = aug.augment(im) return im, cls ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True) ds = BatchData(ds, batch_size, remainder=True) ds = PrefetchDataZMQ(ds, 1) return ds
def _process(self, grads): g = [] to_print = [] for grad, var in grads: if re.match(self._regex, var.op.name): g.append((grad, var)) else: to_print.append(var.op.name) if self._verbose and len(to_print): message = ', '.join(to_print) logger.warn("No gradient w.r.t these trainable variables: {}".format(message)) return g
def _get_value_to_set(self): if self.current > self.best: self.best = self.current self.wait = 0 else: self.wait += 1 if self.wait > self.patience: self.wait = 0 current_lr = self.get_current_value() self.base_lr = max(current_lr * self.factor, self.min_lr) logger.warn( "ReduceLROnPlateau reducing learning rate to {}".format( self.base_lr)) return self.base_lr
def get_imagenet_dataflow(datadir, name, batch_size, augmentors, parallel=None): #获取图像网络数据流 """ See explanations in the tutorial: http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html """ assert name in ['train', 'val', 'test'] assert datadir is not None assert isinstance(augmentors, list) isTrain = name == 'train' if parallel is None: # 如果不是并行的话 parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading 超线程? 获取当前计算机cpu数量 if isTrain: # dataset:创建一个在数据流上运行的预测器,并且拿出一个batch? ds = dataset.ILSVRC12(datadir, name, shuffle=True) ds = AugmentImageComponent(ds, augmentors, copy=False) # 使用共享的增强参数在多个组件上应用图像增强器 if parallel < 16: # 如果少于16个的话 logger.warn( "DataFlow may become the bottleneck when too few processes are used." ) ds = PrefetchDataZMQ(ds, parallel) # 实现高效的数据流水线 ds = BatchData(ds, batch_size, remainder=False) # 取一个batch? else: # 如果是测试时,增强图像,加速对数据流的读取操作等 # 与ILSVRC12相同,但生成图像的文件名而不是np array。 ds = dataset.ILSVRC12Files(datadir, name, shuffle=False) aug = imgaug.AugmentorList(augmentors) def mapf(dp): fname, cls = dp im = cv2.imread(fname, cv2.IMREAD_COLOR ) # cv2.IMREAD_COLOR : 默认使用该种标识。加载一张彩色图片,忽视它的透明度 im = aug.augment(im) # 增强图像 return im, cls ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True) # 并行加速? ds = BatchData(ds, batch_size, remainder=True) # 取一个batch? ds = PrefetchDataZMQ(ds, 1) return ds
def get_config(model, nr_tower): batch = TOTAL_BATCH_SIZE // nr_tower logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch)) dataset_train = get_data('train', batch, args.min_crop) dataset_val = get_data('val', batch, args.min_crop) # max_epoch = int(np.ceil(max_iter / base_step_size)) step_size = 1280000 // TOTAL_BATCH_SIZE max_iter = int(step_size * args.epoch) max_epoch = (max_iter // step_size) + 1 lr = args.lr lr_decay = np.exp(np.log(args.lr_ratio) / max_epoch) callbacks = [ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(0, lr*0.01), (step_size//2, lr)], interp='linear', step_based=True), HyperParamSetterWithFunc('learning_rate', lambda e, x: x * lr_decay if e > 0 else x), ScheduledHyperParamSetter('bn_momentum', [(0, 0.9), (max_epoch//3, 0.99), (max_epoch//3*2, 0.999)]), EstimatedTimeLeft() ] try: callbacks.append(ScheduledHyperParamSetter('dropblock_keep_prob', [(0, 0.9), (max_epoch-1, 1.0)], interp='linear')) except: logger.warn('Could not add dropblock_keep_prob callback.') pass infs = [ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top5', 'val-error-top5')] if nr_tower == 1: # single-GPU inference with queue prefetch callbacks.append(InferenceRunner(QueueInput(dataset_val), infs)) else: # multi-GPU inference (with mandatory queue prefetch) callbacks.append(DataParallelInferenceRunner( dataset_val, infs, list(range(nr_tower)))) return TrainConfig( model=model, dataflow=dataset_train, callbacks=callbacks, steps_per_epoch=step_size, max_epoch=max_epoch, )
def evaluate(model, sess_init, args): """ use feedforward trainconfig of tensorpack :return: """ out_path = args.out pred_conf = PredictConfig( model=model, session_init=sess_init, input_names=['imgs', 'cams', 'gt_depth'], output_names=['prob_map', 'coarse_depth', 'refine_depth', 'imgs', 'loss', 'less_one_accuracy', 'less_three_accuracy'] ) ds_val = get_data(args, 'val') logger.warn('val size: %d' % len(ds_val)) pred_func = FeedfreePredictor(pred_conf, QueueInput(ds_val)) global_count = 0 avg_loss = 0. avg_less_one_acc = 0. avg_less_three_acc = 0. ds_len = len(ds_val) logger.info('begin evaluating') for i in range(ds_len): logger.info('datapoint %d' % i) prob_map, coarse_depth, refine_depth, imgs, loss, less_one_accuracy, less_three_accuracy = pred_func() batch_size, h, w, *_ = prob_map.shape ref_img = imgs[0] assert ref_img.shape[3] == 3, ref_img.shape for j in range(batch_size): # print(prob_map[j].shape) plt.imsave(path.join(out_path, str(global_count) + '_prob.png'), np.squeeze(prob_map[j]), cmap='rainbow') plt.imsave(path.join(out_path, str(global_count) + '_depth.png'), np.squeeze(coarse_depth[j]), cmap='rainbow') plt.imsave(path.join(out_path, str(global_count) + '_rgb.png'), np.squeeze(ref_img[j]).astype('uint8')) global_count += 1 avg_loss += loss avg_less_one_acc += less_one_accuracy avg_less_three_acc += less_three_accuracy avg_loss /= ds_len avg_less_one_acc /= ds_len avg_less_three_acc /= ds_len with open(path.join(out_path, '!log.txt'), 'w') as out_file: out_file.write(f'loss: {avg_loss}\n') out_file.write(f'less_one_acc: {avg_less_one_acc}\n') out_file.write(f'less_three_acc: {avg_less_three_acc}\n') return avg_loss, avg_less_three_acc, avg_less_one_acc
def sample_cat_hallucinations(self, layer_ops, merge_ops, prob_at_layer=None, min_num_hallus=1, hallu_input_choice=None): """ prob_at_layer : probility of having input from a layer. None is translated to default, which sample a layer proportional to its ch_dim. The ch_dim is computed using self, as we assume the last op is cat, and the cat determines the ch_dim. """ assert self[-1].merge_op == LayerTypes.MERGE_WITH_CAT n_inputs = self.num_inputs() n_final_merge = len(self[-1].inputs) if prob_at_layer is None: prob_at_layer = np.ones(len(self) - 1) prob_at_layer[:n_inputs - 1] = n_final_merge prob_at_layer[n_inputs - 1] = n_final_merge * 1.5 prob_at_layer = prob_at_layer / np.sum(prob_at_layer) assert len(prob_at_layer) >= len(self) - 1 if len(prob_at_layer) > len(self) - 1: logger.warn( "sample cell hallu cuts the prob_at_layer to len(info_list) - 1" ) prob_at_layer = prob_at_layer[:len(self) - 1] # choose inputs n_hallu_inputs = 2 l_hallu = [] for _ in range(min_num_hallus): # replace == True : can connect multiple times to the same layer in_idxs = np.random.choice(list(range(len(prob_at_layer))), size=n_hallu_inputs, replace=False, p=prob_at_layer) in_ids = list(map(lambda idx: self[idx].id, in_idxs)) main_ops = list( map(int, np.random.choice(layer_ops, size=n_hallu_inputs))) merge_op = int(np.random.choice(merge_ops)) hallu = LayerInfo(layer_id=self[-1].id, inputs=in_ids, operations=main_ops + [merge_op]) l_hallu.append(hallu) return l_hallu
def get_sequential_loader(ds, isTrain, batch_size, augmentors, parallel=None): """ Load a Single-File LMDB (Sequential Read) Args: augmentors (list[imgaug.Augmentor]): Defaults to `fbresnet_augmentor(isTrain)` Returns: A LMDBData which produces BGR images and labels. See explanations in the tutorial: http://tensorpack.readthedocs.io/tutorial/efficient-dataflow.html """ assert isinstance(augmentors, list) aug = imgaug.AugmentorList(augmentors) if parallel is None: parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading if isTrain: ds = LocallyShuffleData(ds, 50000) ds = MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0) ds = AugmentImageComponent(ds, aug, copy=False) if parallel < 16: logger.warn( "DataFlow may become the bottleneck when too few processes are used." ) ds = BatchData(ds, batch_size, remainder=False, use_list=True) ds = MultiProcessRunnerZMQ(ds, parallel) else: def mapper(data): im, label = data im = cv2.imdecode(im, cv2.IMREAD_COLOR) im = aug.augment(im) return im, label ds = MultiProcessMapDataZMQ(ds, parallel, mapper, buffer_size=2000, strict=True) ds = BatchData(ds, batch_size, remainder=True, use_list=True) return ds
def _get_value_to_set(self): #label_loss = tf.get_default_graph().get_tensor_by_name("tower0/cls_loss/label_loss:0") #label_loss = label_loss.eval() if len(self._queue) > 0: moving_mean = np.asarray(self._queue).mean(axis=0) else: return self.base_lr if moving_mean < self.best: self.best = moving_mean self.wait = 0 else: self.wait += 1 if self.wait > self.patience: self.wait = 0 self.base_lr = max(self.base_lr * self.factor, self.min_lr) logger.warn( "ReduceLROnPlateau reducing learning rate to {}".format( self.base_lr)) return self.base_lr
def get_random_loader(ds, isTrain, batch_size, augmentors, parallel=None): """ DataFlow data (Random Read) Args: augmentors (list[imgaug.Augmentor]): Defaults to `fbresnet_augmentor(isTrain)` Returns: A DataFlow which produces BGR images and labels. See explanations in the tutorial: http://tensorpack.readthedocs.io/tutorial/efficient-dataflow.html """ assert isinstance(augmentors, list) aug = imgaug.AugmentorList(augmentors) if parallel is None: parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading if isTrain: ds = AugmentImageComponent(ds, aug, copy=False) if parallel < 16: logger.warn( "DataFlow may become the bottleneck when too few processes are used." ) ds = MultiProcessRunnerZMQ(ds, parallel) ds = BatchData(ds, batch_size, remainder=False) else: def mapf(dp): fname, cls = dp im = cv2.imread(fname, cv2.IMREAD_COLOR) im = aug.augment(im) return im, cls ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True) ds = BatchData(ds, batch_size, remainder=True) ds = MultiProcessRunnerZMQ(ds, 1) return ds
def __init__(self, shapes): """ Args: shapes (list[list]): a list of fully-specified shapes. """ self.shapes = shapes logger.warn("Using dummy input for debug!") def fn(): tlist = [] ctx = get_current_tower_context() assert ctx is not None assert len(self.shapes) == len(self._desc) for idx, p in enumerate(self._desc): tlist.append( tf.constant(0, dtype=p.type, name='dummy-{}-{}'.format(p.name, ctx.index), shape=self.shapes[idx])) return tlist super(DummyConstantInput, self).__init__(fn)
def get_data(args, mode): assert mode in ['train', 'val', 'test', 'fake'], 'invalid mode: {}'.format(mode) parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading if parallel < 16: logger.warn("DataFlow may become the bottleneck when too few processes are used.") if mode == 'train': # ds = PrefetchData(ds, 4, parallel) ds = DTU(args.data, args.view_num, mode, args.interval_scale, args.max_d) # ds = PrefetchDataZMQ(ds, nr_proc=parallel) ds = BatchData(ds, args.batch, remainder=False) elif mode == 'val': ds = DTU(args.data, args.view_num, mode, args.interval_scale, args.max_d) # ds = PrefetchData(ds, 4, parallel) ds = BatchData(ds, args.batch, remainder=True) # ds = FakeData([[3, 512, 640, 3], [3, 2, 4, 4], [512 // 4, 640 // 4, 1]], 1) # ds = BatchData(ds, args.batch, remainder=False) else: ds = FakeData([[3, 512, 640, 3], [3, 2, 4, 4], [512 // 4, 640 // 4, 1]], 20) ds = BatchData(ds, args.batch, remainder=False) return ds
def _match_vars(self, func): reader, chkpt_vars = SaverRestoreNoGlobalStep._read_checkpoint_vars(self.path) graph_vars = tf.global_variables() chkpt_vars_used = set() for v in graph_vars: name = get_savename_from_varname(v.name, varname_prefix=self.prefix) # skip global step if name == "global_step:0": print("skip restoring global step!") continue if reader.has_tensor(name): func(reader, name, v) chkpt_vars_used.add(name) else: vname = v.op.name if not is_training_name(vname): logger.warn("Variable {} in the graph not found in checkpoint!".format(vname)) if len(chkpt_vars_used) < len(chkpt_vars): unused = chkpt_vars - chkpt_vars_used for name in sorted(unused): if not is_training_name(name): logger.warn("Variable {} in checkpoint not found in the graph!".format(name))
def _get_augment_params(img): cnt = 0 h, w = img.shape[:2] def get_dest_size(): if _is_scale: sx = np.random.uniform(xrange[0], xrange[1], size=[]) if aspect_ratio_thres == 0: sy = sx else: sy = np.random.uniform(yrange[0], yrange[1], size=[]) destX = max(sx * w, minimum[0]) destY = max(sy * h, minimum[1]) else: sx = np.random.uniform(xrange[0], xrange[1], size=[]) if aspect_ratio_thres == 0: sy = sx * 1.0 / w * h else: sy = np.random.uniform(yrange[0], yrange[1], size=[]) destX = max(sx, minimum[0]) destY = max(sy, minimum[1]) return (int(destX + 0.5), int(destY + 0.5)) while True: destX, destY = get_dest_size() if aspect_ratio_thres > 0: # don't check when thres == 0 oldr = w * 1.0 / h newr = destX * 1.0 / destY diff = abs(newr - oldr) / oldr if diff >= aspect_ratio_thres + 1e-5: cnt += 1 if cnt > 50: logger.warn("RandomResize failed to augment an image") return h, w, h, w break continue return h, w, destY, destX
def get_depth_meta(cams, depth_num): """ :param cams: shape: batch, view_num :return: depth_start, depth_interval """ with tf.variable_scope('depth_meta'): ref_cam = cams[:, 0] logger.warn('cams shape: {}'.format(cams.get_shape().as_list())) logger.warn('ref_cam shape: {}'.format(ref_cam.get_shape().as_list())) logger.warn('ref_cam type: {}'.format(type(ref_cam))) batch_size = tf.shape(cams)[0] # depth_start = tf.reshape( # tf.slice(ref_cam, [0, 1, 3, 0], [batch_size, 1, 1, 1]), [batch_size], name='depth_start') depth_start = tf.reshape(tf.slice(cams, [0, 0, 1, 3, 0], [batch_size, 1, 1, 1, 1]), [batch_size], name='depth_start') # depth_interval = tf.reshape( # tf.slice(ref_cam, [0, 1, 3, 1], [batch_size, 1, 1, 1]), [batch_size], name='depth_interval') depth_interval = tf.reshape(tf.slice(cams, [0, 0, 1, 3, 1], [batch_size, 1, 1, 1, 1]), [batch_size], name='depth_interval') # depth_end = tf.add(depth_start, (tf.cast(depth_num, tf.float32) - 1) * depth_interval, name='depth_end') depth_end = depth_start + (tf.cast(depth_num, tf.float32) - 1) * depth_interval depth_end = tf.identity(depth_end, 'depth_end') # depth_start = tf.map_fn(lambda cam: Cam.get_depth_meta(cam, 'depth_min'), ref_cam) # assert depth_start.get_shape().as_list() == [batch_size] # depth_interval = tf.map_fn(lambda cam: Cam.get_depth_meta(cam, 'depth_interval'), ref_cam) # assert depth_interval.get_shape().as_list() == [batch_size] return depth_start, depth_interval, depth_end
def __call__(self, roidb): # fname, boxes, klass, is_crowd = roidb["file_name"], roidb[ "boxes"], roidb["class"], roidb["is_crowd"] assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return float32 boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height ret = {} tfms = self.aug_weak.get_transform(im) im = tfms.apply_image(im) points = box_to_point8(boxes) points = tfms.apply_coords(points) boxes = point8_to_box(points) h, w = im.shape[:2] if self.aug_type != "default": boxes_backup = boxes.copy() try: assert len(boxes) > 0, "boxes after resizing becomes to zero" assert np.sum(np_area(boxes)) > 0, "boxes are all zero area!" bbs = array_to_bb(boxes) images_aug, bbs_aug, _ = self.aug_strong(images=[im], bounding_boxes=[bbs], n_real_box=len(bbs)) # convert to gt boxes array boxes = bb_to_array(bbs_aug[0]) boxes[:, 0] = np.clip(boxes[:, 0], 0, w) boxes[:, 1] = np.clip(boxes[:, 1], 0, h) boxes[:, 2] = np.clip(boxes[:, 2], 0, w) boxes[:, 3] = np.clip(boxes[:, 3], 0, h) # after affine, some boxes can be zero area. Let's remove them and their corresponding info boxes, mask = remove_empty_boxes(boxes) klass = klass[mask] assert len( klass ) > 0, "Empty boxes and kclass after removing empty ones" is_crowd = np.array( [0] * len(klass)) # do not ahve crowd annotations assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \ "Invalid category {}!".format(klass.max()) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" im = images_aug[0] except Exception as e: logger.warn("Error catched " + str(e) + "\n Use non-augmented data.") boxes = boxes_backup ret["image"] = im try: # Add rpn data to dataflow: if self.cfg.MODE_FPN: multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes else: ret["anchor_labels"], ret[ "anchor_boxes"] = self.get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes"] = boxes ret["gt_labels"] = klass except Exception as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), "warn") return None return ret
def finalize_configs(is_training): """ Run some sanity checks, and populate some configs from others """ _C.freeze(False) # populate new keys now _C.DATA.NUM_CLASS = _C.DATA.NUM_CATEGORY + 1 # +1 background _C.DATA.BASEDIR = os.path.expanduser(_C.DATA.BASEDIR) if isinstance(_C.DATA.VAL, six.string_types ): # support single string (the typical case) as well _C.DATA.VAL = (_C.DATA.VAL, ) assert _C.BACKBONE.NORM in ['FreezeBN', 'SyncBN', 'GN', 'None'], _C.BACKBONE.NORM if _C.BACKBONE.NORM != 'FreezeBN': assert not _C.BACKBONE.FREEZE_AFFINE assert _C.BACKBONE.FREEZE_AT in [0, 1, 2] _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS) assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES) # image size into the backbone has to be multiple of this number _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[ 3] # [3] because we build FPN with features r2,r3,r4,r5 if _C.MODE_FPN: size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1. _C.PREPROC.MAX_SIZE = np.ceil( _C.PREPROC.MAX_SIZE / size_mult) * size_mult assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint'] assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head') assert _C.FPN.NORM in ['None', 'GN'] if _C.FPN.CASCADE: # the first threshold is the proposal sampling threshold assert _C.CASCADE.IOUS[0] == _C.FRCNN.FG_THRESH assert len(_C.CASCADE.BBOX_REG_WEIGHTS) == len(_C.CASCADE.IOUS) if is_training: train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE if isinstance( train_scales, (list, tuple)) and train_scales[1] - train_scales[0] > 100: # don't autotune if augmentation is on os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '1' assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER # setup NUM_GPUS if _C.TRAINER == 'horovod': import horovod.tensorflow as hvd ngpu = hvd.size() if ngpu == hvd.local_size(): logger.warn( "It's not recommended to use horovod for single-machine training. " "Replicated trainer is more stable and has the same efficiency." ) else: assert 'OMPI_COMM_WORLD_SIZE' not in os.environ ngpu = get_num_gpu() assert ngpu > 0, "Has to train with GPU!" assert ngpu % 8 == 0 or 8 % ngpu == 0, "Can only train with 1,2,4 or >=8 GPUs, but found {} GPUs".format( ngpu) else: # autotune is too slow for inference os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' ngpu = get_num_gpu() if _C.TRAIN.NUM_GPUS is None: _C.TRAIN.NUM_GPUS = ngpu else: if _C.TRAINER == 'horovod': assert _C.TRAIN.NUM_GPUS == ngpu else: assert _C.TRAIN.NUM_GPUS <= ngpu _C.freeze() logger.info("Config: ------------------------------------------\n" + str(_C))
def predict_unlabeled(model, model_path, nr_visualize=100, output_dir='output_patch_samples'): """Predict the pseudo label information of unlabeled data.""" assert cfg.EVAL.PSEUDO_INFERENCE, 'set cfg.EVAL.PSEUDO_INFERENCE=True' df, dataset_size = get_eval_unlabeled_dataflow(cfg.DATA.TRAIN, return_size=True) df.reset_state() predcfg = PredictConfig( model=model, session_init=SmartInit(model_path), input_names=['image'], # ['image', 'gt_boxes', 'gt_labels'], output_names=[ 'generate_{}_proposals/boxes'.format( 'fpn' if cfg.MODE_FPN else 'rpn'), 'generate_{}_proposals/scores'.format( 'fpn' if cfg.MODE_FPN else 'rpn'), 'fastrcnn_all_scores', 'output/boxes', 'output/scores', # score of the labels 'output/labels', ]) pred = OfflinePredictor(predcfg) if os.path.isdir(output_dir): if os.path.isfile(os.path.join(output_dir, 'pseudo_data.npy')): os.remove(os.path.join(output_dir, 'pseudo_data.npy')) if not os.path.isdir(os.path.join(output_dir, 'vis')): os.makedirs(os.path.join(output_dir, 'vis')) else: shutil.rmtree(os.path.join(output_dir, 'vis')) fs.mkdir_p(output_dir + '/vis') else: fs.mkdir_p(output_dir) fs.mkdir_p(output_dir + '/vis') logger.warning('-' * 100) logger.warning('Write to {}'.format(output_dir)) logger.warning('-' * 100) with tqdm.tqdm(total=nr_visualize) as pbar: for idx, dp in itertools.islice(enumerate(df), nr_visualize): img, img_id = dp # dp['image'], dp['img_id'] rpn_boxes, rpn_scores, all_scores, \ final_boxes, final_scores, final_labels = pred(img) outs = { 'proposals_boxes': rpn_boxes, # (?,4) 'proposals_scores': rpn_scores, # (?,) 'boxes': final_boxes, 'scores': final_scores, 'labels': final_labels } ratios = [10, 10] # [top 20% as background, bottom 20% as background] bg_ind, fg_ind = custom.find_bg_and_fg_proposals(all_scores, ratios=ratios) bg_viz = draw_predictions(img, rpn_boxes[bg_ind], all_scores[bg_ind]) fg_viz = draw_predictions(img, rpn_boxes[fg_ind], all_scores[fg_ind]) results = [ DetectionResult(*args) for args in zip(final_boxes, final_scores, final_labels, [None] * len(final_labels)) ] final_viz = draw_final_outputs(img, results) viz = tpviz.stack_patches([bg_viz, fg_viz, final_viz], 2, 2) if os.environ.get('DISPLAY', None): tpviz.interactive_imshow(viz) assert cv2.imwrite('{}/vis/{:03d}.png'.format(output_dir, idx), viz) pbar.update() logger.info('Write {} samples to {}'.format(nr_visualize, output_dir)) ## Parallel inference the whole unlabled data pseudo_preds = collections.defaultdict(list) num_tower = max(cfg.TRAIN.NUM_GPUS, 1) graph_funcs = MultiTowerOfflinePredictor(predcfg, list( range(num_tower))).get_predictors() dataflows = [ get_eval_unlabeled_dataflow(cfg.DATA.TRAIN, shard=k, num_shards=num_tower) for k in range(num_tower) ] all_results = multithread_predict_dataflow(dataflows, graph_funcs) for id, result in tqdm.tqdm(enumerate(all_results)): img_id = result['image_id'] outs = { 'proposals_boxes': result['proposal_box'].astype(np.float16), # (?,4) 'proposals_scores': result['proposal_score'].astype(np.float16), # (?,) # 'frcnn_all_scores': result['frcnn_score'].astype(np.float16), 'boxes': result['bbox'].astype(np.float16), # (?,4) 'scores': result['score'].astype(np.float16), # (?,) 'labels': result['category_id'].astype(np.float16) # (?,) } pseudo_preds[img_id] = outs logger.warn('Writing to {}'.format( os.path.join(output_dir, 'pseudo_data.npy'))) try: dd.io.save(os.path.join(output_dir, 'pseudo_data.npy'), pseudo_preds) except RuntimeError: logger.error('Save failed. Check reasons manually...')
def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box( low=0, high=255, shape=(self.height, self.width, 1), dtype=np.uint8) self._restart_episode()
def __init__(self, rom_file, viz=0, height_range=(None, None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode()
def _add_detection_gt(self, img, add_mask): """ Add 'boxes', 'class', 'is_crowd' of this image to the dict, used by detection. If add_mask is True, also add 'segmentation' in coco poly format. """ # ann_ids = self.coco.getAnnIds(imgIds=img['image_id']) # objs = self.coco.loadAnns(ann_ids) objs = self.coco.imgToAnns[ img["image_id"]] # equivalent but faster than the above two lines if "minival" not in self.annotation_file: # TODO better to check across the entire json, rather than per-image ann_ids = [ann["id"] for ann in objs] assert len(set(ann_ids)) == len(ann_ids), \ "Annotation ids in '{}' are not unique!".format(self.annotation_file) # clean-up boxes width = img.pop("width") height = img.pop("height") all_boxes = [] all_segm = [] all_cls = [] all_iscrowd = [] for objid, obj in enumerate(objs): if obj.get("ignore", 0) == 1: continue x1, y1, w, h = list(map(float, obj["bbox"])) # bbox is originally in float # x1/y1 means upper-left corner and w/h means true w/h. This can be verified by segmentation pixels. # But we do make an assumption here that (0.0, 0.0) is upper-left corner of the first pixel x2, y2 = x1 + w, y1 + h # np.clip would be quite slow here x1 = min(max(x1, 0), width) x2 = min(max(x2, 0), width) y1 = min(max(y1, 0), height) y2 = min(max(y2, 0), height) w, h = x2 - x1, y2 - y1 # Require non-zero seg area and more than 1x1 box size if obj["area"] > 1 and w > 0 and h > 0: all_boxes.append([x1, y1, x2, y2]) all_cls.append( self.COCO_id_to_category_id.get(obj["category_id"], obj["category_id"])) iscrowd = obj.get("iscrowd", 0) all_iscrowd.append(iscrowd) if add_mask: segs = obj["segmentation"] if not isinstance(segs, list): assert iscrowd == 1 all_segm.append(None) else: valid_segs = [ np.asarray(p).reshape(-1, 2).astype("float32") for p in segs if len(p) >= 6 ] if len(valid_segs) == 0: logger.error( "Object {} in image {} has no valid polygons!".format( objid, img["file_name"])) elif len(valid_segs) < len(segs): logger.warn("Object {} in image {} has invalid polygons!".format( objid, img["file_name"])) all_segm.append(valid_segs) # all geometrically-valid boxes are returned if len(all_boxes): img["boxes"] = np.asarray(all_boxes, dtype="float32") # (n, 4) else: img["boxes"] = np.zeros((0, 4), dtype="float32") cls = np.asarray(all_cls, dtype="int32") # (n,) if len(cls): assert cls.min() > 0, "Category id in COCO format must > 0!" img["class"] = cls # n, always >0 img["is_crowd"] = np.asarray(all_iscrowd, dtype="int8") # n, if add_mask: # also required to be float32 img["segmentation"] = all_segm
def __init__(self, rom_file, viz=0, height_range=(None, None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: if execute_only_once(): logger.warn( "https://github.com/mgbellemare/Arcade-Learning-Environment/pull/171 is not merged!" ) # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode()
def BatchNorm3d(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5, center=True, scale=True, beta_initializer=tf.zeros_initializer(), gamma_initializer=tf.ones_initializer(), virtual_batch_size=None, data_format='channels_last', internal_update=False, sync_statistics=None): """ Almost equivalent to `tf.layers.batch_normalization`, but different (and more powerful) in the following: 1. Accepts an alternative `data_format` option when `axis` is None. For 2D input, this argument will be ignored. 2. Default value for `momentum` and `epsilon` is different. 3. Default value for `training` is automatically obtained from tensorpack's `TowerContext`, but can be overwritten. 4. Support the `internal_update` option, which enables the use of BatchNorm layer inside conditionals. 5. Support the `sync_statistics` option, which is very useful in small-batch models. Args: internal_update (bool): if False, add EMA update ops to `tf.GraphKeys.UPDATE_OPS`. If True, update EMA inside the layer by control dependencies. They are very similar in speed, but `internal_update=True` can be used when you have conditionals in your model, or when you have multiple networks to train. Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/14699 sync_statistics: either None or "nccl". By default (None), it uses statistics of the input tensor to normalize. When set to "nccl", this layer must be used under tensorpack multi-gpu trainers, and it then uses per-machine (multiple GPU) statistics to normalize. Note that this implementation averages the per-tower E[x] and E[x^2] among towers to compute global mean&variance. The result is the global mean&variance only if each tower has the same batch size. This option has no effect when not training. This option is also known as "Cross-GPU BatchNorm" as mentioned in https://arxiv.org/abs/1711.07240. Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/18222 Variable Names: * ``beta``: the bias term. Will be zero-inited by default. * ``gamma``: the scale term. Will be one-inited by default. * ``mean/EMA``: the moving average of mean. * ``variance/EMA``: the moving average of variance. Note: Combinations of ``training`` and ``ctx.is_training``: * ``training == ctx.is_training``: standard BN, EMA are maintained during training and used during inference. This is the default. * ``training and not ctx.is_training``: still use batch statistics in inference. * ``not training and ctx.is_training``: use EMA to normalize in training. This is useful when you load a pre-trained BN and don't want to fine tune the EMA. EMA will not be updated in this case. """ # parse shapes data_format = get_data_format(data_format, tfmode=False) shape = inputs.get_shape().as_list() ndims = len(shape) # in 3d conv, we have 5d dim [batch, c, d, h, w] # assert ndims in [2, 4], ndims if sync_statistics is not None: sync_statistics = sync_statistics.lower() assert sync_statistics in [None, 'nccl', 'horovod'], sync_statistics if axis is None: if ndims == 2: data_format = 'NHWC' axis = 1 elif ndims == 5: axis = 1 if data_format == 'NCHW' else 4 else: axis = 1 if data_format == 'NCHW' else 3 else: data_format = 'NCHW' if axis == 1 else 'NHWC' num_chan = shape[axis] # parse training/ctx ctx = get_current_tower_context() if training is None: training = ctx.is_training training = bool(training) TF_version = get_tf_version_number() if not training and ctx.is_training: assert TF_version >= 1.4, \ "Fine tuning a BatchNorm model with fixed statistics is only " \ "supported after https://github.com/tensorflow/tensorflow/pull/12580 " if ctx.is_main_training_tower: # only warn in first tower logger.warn( "[BatchNorm] Using moving_mean/moving_variance in training.") # Using moving_mean/moving_variance in training, which means we # loaded a pre-trained BN and only fine-tuning the affine part. if sync_statistics is None or not (training and ctx.is_training): coll_bk = backup_collection([tf.GraphKeys.UPDATE_OPS]) with rename_get_variable({ 'moving_mean': 'mean/EMA', 'moving_variance': 'variance/EMA' }): tf_args = dict(axis=axis, momentum=momentum, epsilon=epsilon, center=center, scale=scale, beta_initializer=beta_initializer, gamma_initializer=gamma_initializer, fused=True, _reuse=tf.get_variable_scope().reuse) if TF_version >= 1.5: tf_args['virtual_batch_size'] = virtual_batch_size else: assert virtual_batch_size is None, "Feature not supported in this version of TF!" layer = tf.layers.BatchNormalization(**tf_args) xn = layer.apply(inputs, training=training, scope=tf.get_variable_scope()) # maintain EMA only on one GPU is OK, even in replicated mode. # because during training, EMA isn't used if ctx.is_main_training_tower: for v in layer.non_trainable_variables: add_model_variable(v) if not ctx.is_main_training_tower or internal_update: restore_collection(coll_bk) if training and internal_update: assert layer.updates with tf.control_dependencies(layer.updates): ret = tf.identity(xn, name='output') else: ret = tf.identity(xn, name='output') vh = ret.variables = VariableHolder( moving_mean=layer.moving_mean, mean=layer.moving_mean, # for backward-compatibility moving_variance=layer.moving_variance, variance=layer.moving_variance) # for backward-compatibility if scale: vh.gamma = layer.gamma if center: vh.beta = layer.beta else: red_axis = [0] if ndims == 2 else ( [0, 2, 3] if axis == 1 else [0, 1, 2]) if ndims == 5: red_axis = [0, 2, 3, 4] if axis == 1 else [0, 1, 2, 3] new_shape = None # don't need to reshape unless ... if ndims == 4 and axis == 1: new_shape = [1, num_chan, 1, 1] if ndims == 5 and axis == 1: new_shape = [1, num_chan, 1, 1, 1] batch_mean = tf.reduce_mean(inputs, axis=red_axis) batch_mean_square = tf.reduce_mean(tf.square(inputs), axis=red_axis) if sync_statistics == 'nccl': if six.PY3 and TF_version <= 1.8 and ctx.is_main_training_tower: logger.warn( "A TensorFlow bug will cause cross-GPU BatchNorm to fail. " "Apply this patch: https://github.com/tensorflow/tensorflow/pull/20360" ) from tensorflow.contrib.nccl.ops import gen_nccl_ops shared_name = re.sub('tower[0-9]+/', '', tf.get_variable_scope().name) num_dev = ctx.total batch_mean = gen_nccl_ops.nccl_all_reduce( input=batch_mean, reduction='sum', num_devices=num_dev, shared_name=shared_name + '_NCCL_mean') * (1.0 / num_dev) batch_mean_square = gen_nccl_ops.nccl_all_reduce( input=batch_mean_square, reduction='sum', num_devices=num_dev, shared_name=shared_name + '_NCCL_mean_square') * (1.0 / num_dev) elif sync_statistics == 'horovod': # Require https://github.com/uber/horovod/pull/331 # Proof-of-concept, not ready yet. import horovod.tensorflow as hvd batch_mean = hvd.allreduce(batch_mean, average=True) batch_mean_square = hvd.allreduce(batch_mean_square, average=True) batch_var = batch_mean_square - tf.square(batch_mean) batch_mean_vec = batch_mean batch_var_vec = batch_var beta, gamma, moving_mean, moving_var = get_bn_variables( num_chan, scale, center, beta_initializer, gamma_initializer) if new_shape is not None: batch_mean = tf.reshape(batch_mean, new_shape) batch_var = tf.reshape(batch_var, new_shape) # Using fused_batch_norm(is_training=False) is actually slightly faster, # but hopefully this call will be JITed in the future. xn = tf.nn.batch_normalization(inputs, batch_mean, batch_var, tf.reshape(beta, new_shape), tf.reshape(gamma, new_shape), epsilon) else: xn = tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, epsilon) if ctx.is_main_training_tower: ret = update_bn_ema(xn, batch_mean_vec, batch_var_vec, moving_mean, moving_var, momentum, internal_update) else: ret = tf.identity(xn, name='output') vh = ret.variables = VariableHolder( moving_mean=moving_mean, mean=moving_mean, # for backward-compatibility moving_variance=moving_var, variance=moving_var) # for backward-compatibility if scale: vh.gamma = gamma if center: vh.beta = beta return ret
def play_one_episode(env, func): env.reset() env.prepare() r = 0 stats = [StatCounter() for _ in range(7)] while r == 0: last_cards_value = env.get_last_outcards() last_cards_char = to_char(last_cards_value) last_out_cards = Card.val2onehot60(last_cards_value) last_category_idx = env.get_last_outcategory_idx() curr_cards_char = to_char(env.get_curr_handcards()) is_active = True if last_cards_value.size == 0 else False s = env.get_state_prob() intention, r, category_idx = env.step_auto() if category_idx == 14: continue minor_cards_targets = pick_minor_targets(category_idx, to_char(intention)) if not is_active: if category_idx == Category.QUADRIC.value and category_idx != last_category_idx: passive_decision_input = 1 passive_bomb_input = intention[0] - 3 passive_decision_prob, passive_bomb_prob, _, _, _, _, _ = func( [ s.reshape(1, -1), last_out_cards.reshape(1, -1), np.zeros([s.shape[0]]) ]) stats[0].feed( int(passive_decision_input == np.argmax( passive_decision_prob))) stats[1].feed( int(passive_bomb_input == np.argmax(passive_bomb_prob))) else: if category_idx == Category.BIGBANG.value: passive_decision_input = 2 passive_decision_prob, _, _, _, _, _, _ = func([ s.reshape(1, -1), last_out_cards.reshape(1, -1), np.zeros([s.shape[0]]) ]) stats[0].feed( int(passive_decision_input == np.argmax( passive_decision_prob))) else: if category_idx != Category.EMPTY.value: passive_decision_input = 3 # OFFSET_ONE # 1st, Feb - remove relative card output since shift is hard for the network to learn passive_response_input = intention[0] - 3 if passive_response_input < 0: print("something bad happens") passive_response_input = 0 passive_decision_prob, _, passive_response_prob, _, _, _, _ = func( [ s.reshape(1, -1), last_out_cards.reshape(1, -1), np.zeros([s.shape[0]]) ]) stats[0].feed( int(passive_decision_input == np.argmax( passive_decision_prob))) stats[2].feed( int(passive_response_input == np.argmax( passive_response_prob))) else: passive_decision_input = 0 passive_decision_prob, _, _, _, _, _, _ = func([ s.reshape(1, -1), last_out_cards.reshape(1, -1), np.zeros([s.shape[0]]) ]) stats[0].feed( int(passive_decision_input == np.argmax( passive_decision_prob))) else: seq_length = get_seq_length(category_idx, intention) # ACTIVE OFFSET ONE! active_decision_input = category_idx - 1 active_response_input = intention[0] - 3 _, _, _, active_decision_prob, active_response_prob, active_seq_prob, _ = func( [ s.reshape(1, -1), last_out_cards.reshape(1, -1), np.zeros([s.shape[0]]) ]) stats[3].feed( int(active_decision_input == np.argmax(active_decision_prob))) stats[4].feed( int(active_response_input == np.argmax(active_response_prob))) if seq_length is not None: # length offset one seq_length_input = seq_length - 1 stats[5].feed( int(seq_length_input == np.argmax(active_seq_prob))) if minor_cards_targets is not None: main_cards = pick_main_cards(category_idx, to_char(intention)) handcards = curr_cards_char.copy() state = s.copy() for main_card in main_cards: handcards.remove(main_card) cards_onehot = Card.char2onehot60(main_cards) # we must make the order in each 4 batch correct... discard_onehot_from_s_60(state, cards_onehot) is_pair = False minor_type = 0 if category_idx == Category.THREE_TWO.value or category_idx == Category.THREE_TWO_LINE.value: is_pair = True minor_type = 1 for target in minor_cards_targets: target_val = Card.char2value_3_17(target) - 3 _, _, _, _, _, _, minor_response_prob = func([ state.copy().reshape(1, -1), last_out_cards.reshape(1, -1), np.array([minor_type]) ]) stats[6].feed( int(target_val == np.argmax(minor_response_prob))) cards = [target] handcards.remove(target) if is_pair: if target not in handcards: logger.warn('something wrong...') logger.warn('minor', target) logger.warn('main_cards', main_cards) logger.warn('handcards', handcards) else: handcards.remove(target) cards.append(target) # correct for one-hot state cards_onehot = Card.char2onehot60(cards) # print(s.shape) # print(cards_onehot.shape) discard_onehot_from_s_60(state, cards_onehot) return stats
def __call__(self, roidbs): # # roidbs2 repsect to unlabeled data def prepare_data(roidb, aug, aug_type="default", is_unlabled=False): fname, boxes, klass, is_crowd, img_id = roidb["file_name"], roidb[ "boxes"], roidb["class"], roidb["is_crowd"], roidb["image_id"] assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return float32 boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height ret = {} if not is_unlabled and aug_type == "default": tfms = aug.get_transform(im) im = tfms.apply_image(im) points = box_to_point8(boxes) points = tfms.apply_coords(points) boxes = point8_to_box(points) else: # It is strong augmentation # Load box informaiton from disk if is_unlabled: pseudo_target = self.get_pseudo_gt(img_id) # has no pseudo target found assert pseudo_target is not None boxes = pseudo_target["boxes"] klass = pseudo_target["labels"].astype(np.int32) assert len( boxes) > 0, "boxes after thresholding becomes to zero" is_crowd = np.array( [0] * len(klass)) # do not ahve crowd annotations else: # it is labeled data, use boxes loaded from roidb, klass, is_crowd pass if aug_type == "default": # use default augmentations, only happend for unlabeled data tfms = self.aug.get_transform(im) im = tfms.apply_image(im) points = box_to_point8(boxes) points = tfms.apply_coords(points) boxes = point8_to_box(points) # is_crowd = np.array([0]*len(klass)) # do not ahve crowd annotations else: # use strong augmentation with extra packages # resize first tfms = self.resize.get_transform(im) im = tfms.apply_image(im) points = box_to_point8(boxes) points = tfms.apply_coords(points) boxes = point8_to_box(points) boxes_backup = boxes.copy() h, w = im.shape[:2] # strong augmentation try: assert len( boxes) > 0, "boxes after resizing becomes to zero" assert np.sum( np_area(boxes)) > 0, "boxes are all zero area!" bbs = array_to_bb(boxes) images_aug, bbs_aug, _ = aug(images=[im], bounding_boxes=[bbs], n_real_box=len(bbs)) # # convert to gt boxes array boxes = bb_to_array(bbs_aug[0]) boxes[:, 0] = np.clip(boxes[:, 0], 0, w) boxes[:, 1] = np.clip(boxes[:, 1], 0, h) boxes[:, 2] = np.clip(boxes[:, 2], 0, w) boxes[:, 3] = np.clip(boxes[:, 3], 0, h) # after affine, some boxes can be zero area. Let's remove them and their corresponding info boxes, mask = remove_empty_boxes(boxes) klass = klass[mask] is_crowd = is_crowd[mask] assert len( klass ) > 0, "Empty boxes and kclass after removing empty ones" assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \ "Invalid category {}!".format(klass.max()) assert np.min( np_area(boxes)) > 0, "Some boxes have zero area!" im = images_aug[0] except Exception as e: # if augmentation makes the boxes become empty, we switch to # non-augmented one # logger.warn("Error catched " + str(e) + # "\n Use non-augmented data.") boxes = boxes_backup ret["image"] = im # Add rpn data to dataflow: if self.cfg.MODE_FPN: multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes else: ret["anchor_labels"], ret[ "anchor_boxes"] = self.get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes"] = boxes ret["gt_labels"] = klass if is_unlabled: ret["proposals_boxes"] = pseudo_target["proposals_boxes"] # ret["proposals_scores"] = pseudo_target['proposals_scores'] return ret try: roidb, roidb_u = roidbs results = {} if self.labeled_augment_type == "default": results.update(prepare_data(roidb, self.aug, is_unlabled=False)) else: results.update( prepare_data(roidb, self.aug_strong_labeled, aug_type=self.labeled_augment_type, is_unlabled=False)) # strong augmentation res_u = {} for k, v in prepare_data(roidb_u, self.aug_strong, aug_type=self.unlabeled_augment_type, is_unlabled=True).items(): res_u[k + "_strong"] = v results.update(res_u) except Exception as e: logger.warn("Input is filtered " + str(e)) return None return results