Exemple #1
0
 def _sync(self):
     logger.info("Updating weights ...")
     dic = {v.name: v.eval() for v in self.vars}
     self.shared_dic['params'] = dic
     self.condvar.acquire()
     self.condvar.notify_all()
     self.condvar.release()
Exemple #2
0
    def __init__(self,
                 predictor_io_names,
                 player,
                 state_shape,
                 batch_size,
                 memory_size, init_memory_size,
                 exploration, end_exploration, exploration_epoch_anneal,
                 update_frequency, history_len):
        """
        Args:
            predictor_io_names (tuple of list of str): input/output names to
                predict Q value from state.
            player (RLEnvironment): the player.
            history_len (int): length of history frames to concat. Zero-filled
                initial frames.
            update_frequency (int): number of new transitions to add to memory
                after sampling a batch of transitions for training.
        """
        init_memory_size = int(init_memory_size)

        for k, v in locals().items():
            if k != 'self':
                setattr(self, k, v)
        self.num_actions = player.get_action_space().num_actions()
        logger.info("Number of Legal actions: {}".format(self.num_actions))

        self.rng = get_rng(self)
        self._init_memory_flag = threading.Event()  # tell if memory has been initialized

        # TODO just use a semaphore?
        # a queue to receive notifications to populate memory
        self._populate_job_queue = queue.Queue(maxsize=5)

        self.mem = ReplayMemory(memory_size, state_shape, history_len)
Exemple #3
0
def get_config(model, nr_tower):
    batch = TOTAL_BATCH_SIZE // nr_tower

    logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
    dataset_train = get_data('train', batch)
    dataset_val = get_data('val', batch)

    step_size = 1280000 // TOTAL_BATCH_SIZE
    max_iter = 3 * 10**5
    max_epoch = (max_iter // step_size) + 1
    callbacks = [
        ModelSaver(),
        ScheduledHyperParamSetter('learning_rate',
                                  [(0, 0.5), (max_iter, 0)],
                                  interp='linear', step_based=True),
    ]
    infs = [ClassificationError('wrong-top1', 'val-error-top1'),
            ClassificationError('wrong-top5', 'val-error-top5')]
    if nr_tower == 1:
        # single-GPU inference with queue prefetch
        callbacks.append(InferenceRunner(QueueInput(dataset_val), infs))
    else:
        # multi-GPU inference (with mandatory queue prefetch)
        callbacks.append(DataParallelInferenceRunner(
            dataset_val, infs, list(range(nr_tower))))

    return TrainConfig(
        model=model,
        dataflow=dataset_train,
        callbacks=callbacks,
        steps_per_epoch=step_size,
        max_epoch=max_epoch,
    )
Exemple #4
0
    def _init_memory(self):
        logger.info("Populating replay memory with epsilon={} ...".format(self.exploration))

        with get_tqdm(total=self.init_memory_size) as pbar:
            while len(self.mem) < self.init_memory_size:
                self._populate_exp()
                pbar.update()
        self._init_memory_flag.set()
Exemple #5
0
def eval_model_multithread(pred, nr_eval, get_player_fn):
    """
    Args:
        pred (OfflinePredictor): state -> Qvalue
    """
    NR_PROC = min(multiprocessing.cpu_count() // 2, 8)
    with pred.sess.as_default():
        mean, max = eval_with_funcs([pred] * NR_PROC, nr_eval, get_player_fn)
    logger.info("Average Score: {}; Max Score: {}".format(mean, max))
Exemple #6
0
 def __init__(self, dirname, label='phoneme'):
     self.dirname = dirname
     assert os.path.isdir(dirname), dirname
     self.filelists = [k for k in fs.recursive_walk(self.dirname)
                       if k.endswith('.wav')]
     logger.info("Found {} wav files ...".format(len(self.filelists)))
     assert len(self.filelists), "Found no '.wav' files!"
     assert label in ['phoneme', 'letter'], label
     self.label = label
Exemple #7
0
def compute_mean_std(db, fname):
    ds = LMDBSerializer.load(db, shuffle=False)
    ds.reset_state()
    o = OnlineMoments()
    for dp in get_tqdm(ds):
        feat = dp[0]  # len x dim
        for f in feat:
            o.feed(f)
    logger.info("Writing to {} ...".format(fname))
    with open(fname, 'wb') as f:
        f.write(serialize.dumps([o.mean, o.std]))
Exemple #8
0
 def update_target_param():
     vars = tf.global_variables()
     ops = []
     G = tf.get_default_graph()
     for v in vars:
         target_name = v.op.name
         if target_name.startswith('target'):
             new_name = target_name.replace('target/', '')
             logger.info("{} <- {}".format(target_name, new_name))
             ops.append(v.assign(G.get_tensor_by_name(new_name + ':0')))
     return tf.group(*ops, name='update_target_network')
Exemple #9
0
def eval_with_funcs(predictors, nr_eval, get_player_fn, verbose=False):
    """
    Args:
        predictors ([PredictorBase])
    """
    class Worker(StoppableThread, ShareSessionThread):
        def __init__(self, func, queue):
            super(Worker, self).__init__()
            self._func = func
            self.q = queue

        def func(self, *args, **kwargs):
            if self.stopped():
                raise RuntimeError("stopped!")
            return self._func(*args, **kwargs)

        def run(self):
            with self.default_sess():
                player = get_player_fn(train=False)
                while not self.stopped():
                    try:
                        score = play_one_episode(player, self.func)
                    except RuntimeError:
                        return
                    self.queue_put_stoppable(self.q, score)

    q = queue.Queue()
    threads = [Worker(f, q) for f in predictors]

    for k in threads:
        k.start()
        time.sleep(0.1)  # avoid simulator bugs
    stat = StatCounter()

    def fetch():
        r = q.get()
        stat.feed(r)
        if verbose:
            logger.info("Score: {}".format(r))

    for _ in tqdm(range(nr_eval), **get_tqdm_kwargs()):
        fetch()
    # waiting is necessary, otherwise the estimated mean score is biased
    logger.info("Waiting for all the workers to finish the last run...")
    for k in threads:
        k.stop()
    for k in threads:
        k.join()
    while q.qsize():
        fetch()

    if stat.count > 0:
        return (stat.average, stat.max)
    return (0, 0)
Exemple #10
0
def convert_param_name(param):
    resnet_param = {}
    for k, v in six.iteritems(param):
        try:
            newname = name_conversion(k)
        except Exception:
            logger.error("Exception when processing caffe layer {}".format(k))
            raise
        logger.info("Name Transform: " + k + ' --> ' + newname)
        resnet_param[newname] = v
    return resnet_param
Exemple #11
0
 def run(self):
     self.clients = defaultdict(self.ClientState)
     try:
         while True:
             msg = loads(self.c2s_socket.recv(copy=False).bytes)
             ident, state, reward, isOver = msg
             client = self.clients[ident]
             if client.ident is None:
                 client.ident = ident
             # maybe check history and warn about dead client?
             self._process_msg(client, state, reward, isOver)
     except zmq.ContextTerminated:
         logger.info("[Simulator] Context was terminated.")
Exemple #12
0
def compute_mean_std(db, fname):
    ds = LMDBDataPoint(db, shuffle=False)
    ds.reset_state()
    o = OnlineMoments()
    with get_tqdm(total=ds.size()) as bar:
        for dp in ds.get_data():
            feat = dp[0]  # len x dim
            for f in feat:
                o.feed(f)
            bar.update()
    logger.info("Writing to {} ...".format(fname))
    with open(fname, 'wb') as f:
        f.write(serialize.dumps([o.mean, o.std]))
Exemple #13
0
 def _trigger_epoch(self):
     if self.exploration > self.end_exploration:
         self.exploration -= self.exploration_epoch_anneal
         logger.info("Exploration changed to {}".format(self.exploration))
     # log player statistics
     stats = self.player.stats
     for k, v in six.iteritems(stats):
         try:
             mean, max = np.mean(v), np.max(v)
             self.trainer.add_scalar_summary('expreplay/mean_' + k, mean)
             self.trainer.add_scalar_summary('expreplay/max_' + k, max)
         except:
             pass
     self.player.reset_stat()
Exemple #14
0
    def print_class_histogram(self, imgs):
        nr_class = len(COCOMeta.class_names)
        hist_bins = np.arange(nr_class + 1)

        # Histogram of ground-truth objects
        gt_hist = np.zeros((nr_class,), dtype=np.int)
        for entry in imgs:
            # filter crowd?
            gt_inds = np.where(
                (entry['class'] > 0) & (entry['is_crowd'] == 0))[0]
            gt_classes = entry['class'][gt_inds]
            gt_hist += np.histogram(gt_classes, bins=hist_bins)[0]
        data = [[COCOMeta.class_names[i], v] for i, v in enumerate(gt_hist)]
        data.append(['total', sum([x[1] for x in data])])
        table = tabulate(data, headers=['class', '#box'], tablefmt='pipe')
        logger.info("Ground-Truth Boxes:\n" + colored(table, 'cyan'))
Exemple #15
0
					def texture_loss(x, p=16):
						_, h, w, c = x.get_shape().as_list()
						x = normalize(x)
						assert h % p == 0 and w % p == 0
						logger.info('Create texture loss for layer {} with shape {}'.format(x.name, x.get_shape()))

						x = tf.space_to_batch_nd(x, [p, p], [[0, 0], [0, 0]])  # [b * ?, h/p, w/p, c]
						x = tf.reshape(x, [p, p, -1, h // p, w // p, c])       # [p, p, b, h/p, w/p, c]
						x = tf.transpose(x, [2, 3, 4, 0, 1, 5])                # [b * ?, p, p, c]
						patches_a, _, patches_b = tf.split(x, 3, axis=0)       # each is b,h/p,w/p,p,p,c; 	split to render, _image, style

						patches_a = tf.reshape(patches_a, [-1, p, p, c])       # [b * ?, p, p, c]
						patches_b = tf.reshape(patches_b, [-1, p, p, c])       # [b * ?, p, p, c]
						return tf.losses.mean_squared_error(
							gram_matrix(patches_a),
							gram_matrix(patches_b),
							reduction=tf.losses.Reduction.MEAN
						)
Exemple #16
0
    def _parameter_net(self, theta, kernel_shape=9):
        """Estimate filters for convolution layers

        Args:
            theta: angle of filter
            kernel_shape: size of each filter

        Returns:
            learned filter as [B, k, k, 1]
        """
        with argscope(FullyConnected, nl=tf.nn.leaky_relu):
            net = FullyConnected('fc1', theta, 64)
            net = FullyConnected('fc2', net, 128)

        pred_filter = FullyConnected('fc3', net, kernel_shape ** 2, nl=tf.identity)
        pred_filter = tf.reshape(pred_filter, [BATCH, kernel_shape, kernel_shape, 1], name="pred_filter")
        logger.info('Parameter net output: {}'.format(pred_filter.get_shape().as_list()))
        return pred_filter
Exemple #17
0
    def _parameter_net(self, theta, kernel_shape=9):
        """Estimate filters for convolution layers

        Args:
            theta: angle of filter
            kernel_shape: size of each filter

        Returns:
            learned filter as [B, k, k, 1]
        """
        with argscope(FullyConnected, nl=tf.nn.leaky_relu):
            net = FullyConnected('fc1', theta, 64)
            net = FullyConnected('fc2', net, 128)

        pred_filter = FullyConnected('fc3', net, kernel_shape ** 2, nl=tf.identity)
        pred_filter = tf.reshape(pred_filter, [BATCH, kernel_shape, kernel_shape, 1], name="pred_filter")
        logger.info('Parameter net output: {}'.format(pred_filter.get_shape().as_list()))
        return pred_filter
Exemple #18
0
                def texture_loss(x, p=16):
                    _, h, w, c = x.get_shape().as_list()
                    x = normalize(x)
                    assert h % p == 0 and w % p == 0
                    logger.info('Create texture loss for layer {} with shape {}'.format(x.name, x.get_shape()))

                    x = tf.space_to_batch_nd(x, [p, p], [[0, 0], [0, 0]])  # [b * ?, h/p, w/p, c]
                    x = tf.reshape(x, [p, p, -1, h // p, w // p, c])       # [p, p, b, h/p, w/p, c]
                    x = tf.transpose(x, [2, 3, 4, 0, 1, 5])                # [b * ?, p, p, c]
                    patches_a, patches_b = tf.split(x, 2, axis=0)          # each is b,h/p,w/p,p,p,c

                    patches_a = tf.reshape(patches_a, [-1, p, p, c])       # [b * ?, p, p, c]
                    patches_b = tf.reshape(patches_b, [-1, p, p, c])       # [b * ?, p, p, c]
                    return tf.losses.mean_squared_error(
                        gram_matrix(patches_a),
                        gram_matrix(patches_b),
                        reduction=Reduction.MEAN
                    )
    def __init__(self, predictor_io_names, predictor_refine_io_names, env,
                 state_shape, batch_size, memory_size, init_memory_size,
                 init_exploration, update_frequency):
        """
        Args:
            predictor_io_names (tuple of list of str): input/output names to
                predict Q value from state.
            player (RLEnvironment): the player.
            history_len (int): length of history frames to concat. Zero-filled
                initial frames.
            update_frequency (int): number of new transitions to add to memory
                after sampling a batch of transitions for training.
        """
        init_memory_size = int(init_memory_size)

        items = locals().items()
        for k, v in items:
            if k != 'self':
                setattr(self, k, v)
        self.exploration = init_exploration
        self.env = env

        self.rng = get_rng(self)
        # print('RNG------------------------------------------', self.rng.randint(10))
        self._init_memory_flag = threading.Event(
        )  # tell if memory has been initialized

        # a queue to receive notifications to populate memory
        self._populate_job_queue = queue.Queue(maxsize=5)

        self.mem = ReplayMemory(memory_size, state_shape)
        self.mem_refine = ReplayMemoryRefine(memory_size, state_shape)
        self.env.reset()
        self._current_ob, self._current_history = self.env.focus_image, self.env.history
        # stage 1 ar actions
        self._action_space = self.env.action_space
        # stage 2 actions
        self._action_space_refine = self.env.action_space_refine
        logger.info(
            "Number of Legal actions: stage-1-ar {}, stage-2 {}".format(
                len(self._action_space), len(self._action_space_refine)))
        self._player_scores = StatCounter()
        self._current_game_score = StatCounter()
        self.state_shape = state_shape
Exemple #20
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
            num - len(roidbs), len(roidbs)
        )
    )

    ds = DataFromList(roidbs, shuffle=True)

    preprocess = TrainingDataPreprocessor(cfg)

    if cfg.DATA.NUM_WORKERS > 0:
        if cfg.TRAINER == "horovod":
            buffer_size = cfg.DATA.NUM_WORKERS * 10  # one dataflow for each process, therefore don't need large buffer
            ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size)
            # MPI does not like fork()
        else:
            buffer_size = cfg.DATA.NUM_WORKERS * 20
            ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size)
    else:
        ds = MapData(ds, preprocess)
    return ds
Exemple #21
0
def get_val_dataflow(datadir,
                     batch_size,
                     augmentors=None,
                     parallel=None,
                     num_splits=None,
                     split_index=None):
    if augmentors is None:
        augmentors = fbresnet_augmentor(False)
    assert datadir is not None
    assert isinstance(augmentors, list)
    if parallel is None:
        parallel = min(40, multiprocessing.cpu_count())

    if num_splits is None:
        ds = dataset.ILSVRC12Files(datadir, 'val', shuffle=False)
    else:
        # shard validation data
        assert split_index < num_splits
        files = dataset.ILSVRC12Files(datadir, 'val', shuffle=False)
        files.reset_state()
        files = list(files.get_data())
        logger.info("Number of validation data = {}".format(len(files)))
        split_size = len(files) // num_splits
        start, end = split_size * split_index, split_size * (split_index + 1)
        end = min(end, len(files))
        logger.info("Local validation split = {} - {}".format(start, end))
        files = files[start:end]
        ds = DataFromList(files, shuffle=False)
    aug = imgaug.AugmentorList(augmentors)

    def mapf(dp):
        fname, cls = dp
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        im = aug.augment(im)
        return im, cls

    ds = MultiThreadMapData(ds,
                            parallel,
                            mapf,
                            buffer_size=min(2000, ds.size()),
                            strict=True)
    ds = BatchData(ds, batch_size, remainder=True)
    # do not fork() under MPI
    return ds
 def _trigger_epoch(self):
     self.active = True
     logger.info("Exp3: Average Reward: {}".format(self.average_reward /
                                                   self.reward_cnt))
     logger.info("Exp3: Max Reward: {}".format(self.max_reward))
     logger.info("Exp3: Sample weights: {}".format(self.sample_w))
     logger.info("Exp3: weights: {}".format(self.w))
     self.old_average = self.average_reward
     self.average_reward = np.zeros(self.K)
     self.max_reward = np.zeros(self.K)
     self.reward_cnt = np.ones(self.K)
Exemple #23
0
def ShuffleNet(image, classes=5):
    with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'), \
        argscope(Conv2D, use_bias=False):
        image_channel_first = tf.transpose(image, [0, 3, 1, 2])
        group = 8  #args.group
        ratio = 0.5
        if not True:  #args.v2:
            # Copied from the paper
            channels = {
                3: [240, 480, 960],
                4: [272, 544, 1088],
                8: [384, 768, 1536]
            }
            mul = group * 4  # #chan has to be a multiple of this number
            channels = [
                int(math.ceil(x * ratio / mul) * mul) for x in channels[group]
            ]
            # The first channel must be a multiple of group
            first_chan = int(math.ceil(24 * ratio / group) * group)
        else:
            # Copied from the paper
            channels = {0.5: [48, 96, 192], 1.: [116, 232, 464]}[ratio]
            first_chan = 24

        logger.info("#Channels: " + str([first_chan] + channels))

        l = Conv2D('conv1',
                   image_channel_first,
                   first_chan,
                   3,
                   strides=2,
                   activation=BNReLU)
        l = MaxPooling('pool1', l, 3, 2, padding='SAME')

        l = shufflenet_stage('stage2', l, channels[0], 4, group)
        l = shufflenet_stage('stage3', l, channels[1], 8, group)
        l = shufflenet_stage('stage4', l, channels[2], 4, group)

        if True:  #args.v2:
            l = Conv2D('conv5', l, 1024, 1, activation=BNReLU)

        l = GlobalAvgPooling('gap', l)
        output = FullyConnected('linear', l, classes)
        return output
def get_cifar_augmented_data(subset,
                             options,
                             do_multiprocess=True,
                             do_validation=False,
                             shuffle=None):
    isTrain = subset == 'train' and do_multiprocess
    shuffle = shuffle if shuffle is not None else isTrain
    if options.num_classes == 10 and options.ds_name == 'cifar10':
        ds = dataset.Cifar10(subset,
                             shuffle=shuffle,
                             do_validation=do_validation)
        cutout_length = 16
        n_holes = 1
    elif options.num_classes == 100 and options.ds_name == 'cifar100':
        ds = dataset.Cifar100(subset,
                              shuffle=shuffle,
                              do_validation=do_validation)
        cutout_length = 8
        n_holes = 1
    else:
        raise ValueError(
            'Number of classes must be set to 10(default) or 100 for CIFAR')
    logger.info('{} set has n_samples: {}'.format(subset, len(ds.data)))
    pp_mean = ds.get_per_pixel_mean()
    if isTrain:
        logger.info('Will do cut-out with length={} n_holes={}'.format(
            cutout_length, n_holes))
        augmentors = [
            imgaug.CenterPaste((40, 40)),
            imgaug.RandomCrop((32, 32)),
            imgaug.Flip(horiz=True),
            imgaug.MapImage(lambda x: (x - pp_mean) / 128.0),
            Cutout(length=cutout_length, n_holes=n_holes),
        ]
    else:
        augmentors = [imgaug.MapImage(lambda x: (x - pp_mean) / 128.0)]
    ds = AugmentImageComponent(ds, augmentors)
    ds = BatchData(ds,
                   options.batch_size // options.nr_gpu,
                   remainder=not isTrain)
    if do_multiprocess:
        ds = PrefetchData(ds, 3, 2)
    return ds
Exemple #25
0
def get_eval_dataflow(name, is_aws, is_gcs, shard=0, num_shards=1):
    """
    Args:
        name (str): name of the dataset to evaluate
        shard, num_shards: to get subset of evaluation data
    """
    roidbs = DatasetRegistry.get(name).inference_roidbs()
    logger.info("Found {} images for inference.".format(len(roidbs)))

    num_imgs = len(roidbs)
    img_per_shard = num_imgs // num_shards
    img_range = (
        shard * img_per_shard,
        (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs,
    )

    # no filter for training
    ds = DataFromListOfDict(roidbs[img_range[0] : img_range[1]], ["file_name", "image_id"])

    if is_aws:
        s3 = boto3.resource("s3")
    elif is_gcs:
        c = storage.Client.create_anonymous_client()
        bucket = c.get_bucket("determined-ai-coco-dataset")

    def f(fname):
        if is_aws:
            s3_object = s3.meta.client.get_object(Bucket="determined-ai-coco-dataset", Key=fname)
            im = cv2.imdecode(
                np.asarray(bytearray(s3_object["Body"].read()), dtype=np.uint8), cv2.IMREAD_COLOR,
            )
        elif is_gcs:
            blob = bucket.blob(fname)
            s = download_gcs_blob_with_backoff(blob)
            im = cv2.imdecode(np.asarray(bytearray(s), dtype=np.uint8), cv2.IMREAD_COLOR)
        else:
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im

    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds
Exemple #26
0
    def __init__(self, basedir, name):
        '''
        basedir = os.path.expanduser(basedir)
        self.name = name
        self._imgdir = os.path.realpath(os.path.join(
            basedir, self._INSTANCE_TO_BASEDIR.get(name, name)))
        assert os.path.isdir(self._imgdir), self._imgdir
        annotation_file = os.path.join(
            basedir, 'annotations/instances_{}.json'.format(name))
        assert os.path.isfile(annotation_file), annotation_file

        from pycocotools.coco import COCO
        self.coco = COCO(annotation_file)
        logger.info("Instances loaded from {}.".format(annotation_file))
        '''

        if basedir is not None:
            self.name = name
            self._imgdir = os.path.realpath(
                os.path.join(basedir,
                             COCOMeta.INSTANCE_TO_BASEDIR.get(name, name)))
            assert os.path.isdir(self._imgdir), self._imgdir
            annotation_file = os.path.join(
                basedir, 'annotations/instances_{}.json'.format(name))
            assert os.path.isfile(annotation_file), annotation_file

            from pycocotools.coco import COCO
            self.coco = COCO(annotation_file)

            # initialize the meta
            cat_ids = self.coco.getCatIds()
            cat_names = [c['name'] for c in self.coco.loadCats(cat_ids)]
        else:
            cat_ids = [1, 2, 3]
            cat_names = ['mating', 'single_cell', 'crowd']

        if not COCOMeta.valid():
            COCOMeta.create(cat_ids, cat_names)
        else:
            assert COCOMeta.cat_names == cat_names

        if basedir is not None:
            logger.info("Instances loaded from {}.".format(annotation_file))
Exemple #27
0
    def __init__(self, basedir, name):
        """
        Args:
            basedir (str): root to the dataset
            name (str): the name of the split, e.g. "train2017"
        """
        basedir = os.path.expanduser(basedir)
        self.name = name
        self._imgdir = os.path.realpath(
            os.path.join(basedir, self._INSTANCE_TO_BASEDIR.get(name, name)))
        assert os.path.isdir(self._imgdir), "{} is not a directory!".format(
            self._imgdir)
        annotation_file = os.path.join(
            basedir, 'annotations/instances_{}.json'.format(name))
        assert os.path.isfile(annotation_file), annotation_file

        from pycocotools.coco import COCO
        self.coco = COCO(annotation_file)
        logger.info("Instances loaded from {}.".format(annotation_file))
Exemple #28
0
def eval_det(pred_all, gt_all, ovthresh=0.25, use_07_metric=False):
    """ Generic functions to compute precision/recall for object detection
        for multiple classes.
        Input:
            pred_all: map of {img_id: [(classname, bbox, score)]}
            gt_all: map of {img_id: [(classname, bbox)]}
            ovthresh: scalar, iou threshold
            use_07_metric: bool, if true use VOC07 11 point method
        Output:
            rec: {classname: rec}
            prec: {classname: prec_all}
            ap: {classname: scalar}
    """
    pred = {}  # map {classname: pred}
    gt = {}  # map {classname: gt}
    for img_id in pred_all.keys():
        for classname, bbox, score in pred_all[img_id]:
            if classname not in pred: pred[classname] = {}
            if img_id not in pred[classname]:
                pred[classname][img_id] = []
            if classname not in gt: gt[classname] = {}
            if img_id not in gt[classname]:
                gt[classname][img_id] = []
            pred[classname][img_id].append((bbox, score))
    for img_id in gt_all.keys():
        for classname, bbox in gt_all[img_id]:
            if classname not in gt: gt[classname] = {}
            if classname not in pred: pred[classname] = {}
            if img_id not in gt[classname]:
                gt[classname][img_id] = []
            gt[classname][img_id].append(bbox)

    rec = {}
    prec = {}
    ap = {}
    for classname in gt.keys():
        logger.info('Computing AP for class: ' + classname)
        rec[classname], prec[classname], ap[classname] = eval_det_cls(
            pred[classname], gt[classname], ovthresh)
        logger.info(classname + ':' + str(ap[classname]))
        # logger.info(classname + ' rec:' + str(rec[classname]))

    return rec, prec, ap
Exemple #29
0
def train_net(net, session_init, batch_size, num_epochs, train_dataflow,
              val_dataflow):

    num_towers = max(get_num_gpu(), 1)
    batch_per_tower = batch_size // num_towers
    logger.info("Running on {} towers. Batch size per tower: {}".format(
        num_towers, batch_per_tower))

    num_training_samples = 1281167
    step_size = num_training_samples // batch_size
    max_iter = (num_epochs - 1) * step_size
    callbacks = [
        ModelSaver(),
        ScheduledHyperParamSetter('learning_rate', [(0, 0.5), (max_iter, 0)],
                                  interp='linear',
                                  step_based=True),
        EstimatedTimeLeft()
    ]

    infs = [
        ClassificationError('wrong-top1', 'val-error-top1'),
        ClassificationError('wrong-top5', 'val-error-top5')
    ]
    if num_towers == 1:
        # single-GPU inference with queue prefetch
        callbacks.append(
            InferenceRunner(input=QueueInput(val_dataflow), infs=infs))
    else:
        # multi-GPU inference (with mandatory queue prefetch)
        callbacks.append(
            DataParallelInferenceRunner(input=val_dataflow,
                                        infs=infs,
                                        gpus=list(range(num_towers))))

    config = TrainConfig(dataflow=train_dataflow,
                         model=net,
                         callbacks=callbacks,
                         session_init=session_init,
                         steps_per_epoch=step_size,
                         max_epoch=num_epochs)

    launch_train_with_config(
        config=config, trainer=SyncMultiGPUTrainerParameterServer(num_towers))
Exemple #30
0
 def input_transform(self, points, k=3):
     # [B,N,3] --> [3, k]
     num_point = points.get_shape()[1]
     points = tf.expand_dims(points, -1)
     with argscope(Conv2D, nl=BNReLU, padding='VALID'), \
             argscope(FullyConnected, nl=BNReLU):
         transmat = (LinearWrap(points).Conv2D(
             'tconv0', 64, kernel_shape=[1, 3]).Conv2D(
                 'tconv1', 128, kernel_shape=1).Conv2D(
                     'tconv2', 1024, kernel_shape=1).MaxPooling(
                         'tpool0', [num_point, 1]).FullyConnected(
                             'tfc0', 512, nl=BNReLU).FullyConnected(
                                 'tfc1', 256,
                                 nl=BNReLU).TransformPoints('transf_xyz',
                                                            3,
                                                            in_dim=3)())
     logger.info('transformation matrix: {}\n\n'.format(
         transmat.get_shape()))
     return transmat
Exemple #31
0
    def __init__(self,
                 predictor_io_names,
                 player,
                 state_shape,
                 batch_size,
                 memory_size, init_memory_size,
                 init_exploration,
                 update_frequency, history_len):
        """
        Args:
            predictor_io_names (tuple of list of str): input/output names to
                predict Q value from state.
            player (gym.Env): the player.
            state_shape (tuple): h, w, c
            history_len (int): length of history frames to concat. Zero-filled
                initial frames.
            update_frequency (int): number of new transitions to add to memory
                after sampling a batch of transitions for training.
        """
        assert len(state_shape) == 3, state_shape
        init_memory_size = int(init_memory_size)

        for k, v in locals().items():
            if k != 'self':
                setattr(self, k, v)
        self.exploration = init_exploration
        self.num_actions = player.action_space.n
        logger.info("Number of Legal actions: {}".format(self.num_actions))

        self.rng = get_rng(self)
        self._init_memory_flag = threading.Event()  # tell if memory has been initialized

        # a queue to receive notifications to populate memory
        self._populate_job_queue = queue.Queue(maxsize=5)

        self.mem = ReplayMemory(memory_size, state_shape, history_len)
        self._current_ob = self.player.reset()
        self.stack = []
        self._player_scores = []
        self._current_game_scores = []
        for l in range (NUM_LEVELS):
            self._current_game_scores += [StatCounter()]
            self._player_scores += [StatCounter()]
Exemple #32
0
def print_class_histogram(roidbs):
    """
    Args:
        roidbs (list[dict]): the same format as the output of `load_training_roidbs`.
    """
    dataset = DetectionDataset()
    hist_bins = np.arange(dataset.num_classes + 1)

    # Histogram of ground-truth objects
    gt_hist = np.zeros((dataset.num_classes, ), dtype=np.int)
    for entry in roidbs:
        # filter crowd?
        gt_inds = np.where((entry['class'] > 0) & (entry['is_crowd'] == 0))[0]
        gt_classes = entry['class'][gt_inds]
        gt_hist += np.histogram(gt_classes, bins=hist_bins)[0]
    data = [[dataset.class_names[i], v] for i, v in enumerate(gt_hist)]
    data.append(['total', sum([x[1] for x in data])])
    table = tabulate(data, headers=['class', '#box'], tablefmt='pipe')
    logger.info("Ground-Truth Boxes:\n" + colored(table, 'cyan'))
Exemple #33
0
def mvsnet_gn(x,
              group=32,
              group_channel=8,
              epsilon=1e-5,
              channel_wise=True,
              data_format='channels_last',
              beta_initializer=tf.constant_initializer(),
              gamma_initializer=tf.constant_initializer(1.)):
    assert len(x.get_shape().as_list()) == 4, len(x.get_shape().as_list())
    assert data_format in ['channels_first', 'channels_last'], data_format

    if data_format == 'channels_first':
        _, c, h, w = x.get_shape().as_list()

        logger.info('f**k you f**k you! %s' % data_format)
    else:
        _, h, w, c = x.get_shape().as_list()
        x = tf.transpose(x, [0, 3, 1, 2])
        # assert c < 100, c
    if channel_wise:
        g = tf.cast(tf.maximum(1, c // group_channel), tf.int32)
    else:
        g = tf.cast(tf.minimum(group, c), tf.int32)

    # normalization
    # tf.Print()
    x = tf.reshape(x, (-1, g, c // g, h, w))
    new_shape = [1, c, 1, 1]
    mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True)
    beta = tf.get_variable('beta', [c],
                           dtype=tf.float32,
                           initializer=beta_initializer)
    beta = tf.reshape(beta, new_shape)
    gamma = tf.get_variable('gamma', [c],
                            dtype=tf.float32,
                            initializer=gamma_initializer)
    gamma = tf.reshape(gamma, new_shape)
    x = (x - mean) / tf.sqrt(var + epsilon)
    x = tf.reshape(x, [-1, c, h, w]) * gamma + beta

    if data_format == 'channels_last':
        x = tf.transpose(x, [0, 2, 3, 1])
    return x
def proceed_test(args, is_densecrf=False):
    import cv2
    ds = dataset.Aerial(args.base_dir, args.meta_dir, "test")
    imglist = ds.imglist
    ds = BatchData(ds, 1)

    pred_config = PredictConfig(model=Model(),
                                session_init=get_model_loader(args.load),
                                input_names=['image'],
                                output_names=['prob'])
    predictor = OfflinePredictor(pred_config)

    from tensorpack.utils.fs import mkdir_p
    result_dir = "test-{}".format(os.path.basename(__file__).rstrip(".py"))
    import shutil
    shutil.rmtree(result_dir, ignore_errors=True)
    mkdir_p(result_dir)
    mkdir_p(os.path.join(result_dir, "compressed"))

    import subprocess

    logger.info("start validation....")
    _itr = ds.get_data()
    for i in tqdm(range(len(imglist))):
        image = next(_itr)
        name = os.path.basename(imglist[i]).rstrip(".tif")
        image = np.squeeze(image)
        prediction = predict_scaler(image,
                                    predictor,
                                    scales=[0.9, 1, 1.1],
                                    classes=CLASS_NUM,
                                    tile_size=CROP_SIZE,
                                    is_densecrf=is_densecrf)
        prediction = np.argmax(prediction, axis=2)
        prediction = prediction * 255  # to 0-255
        file_path = os.path.join(result_dir, "{}.tif".format(name))
        compressed_file_path = os.path.join(result_dir, "compressed",
                                            "{}.tif".format(name))
        cv2.imwrite(file_path, prediction)
        command = "gdal_translate --config GDAL_PAM_ENABLED NO -co COMPRESS=CCITTFAX4 -co NBITS=1 " + file_path + " " + compressed_file_path
        print command
        subprocess.call(command, shell=True)
Exemple #35
0
    def build_graph(self, image, label):
        """
        The default tower function.
        """
        image = self.image_preprocess(image)
        assert self.data_format == 'NCHW'
        image = tf.transpose(image, [0, 3, 1, 2])

        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            # BatchNorm always comes with trouble. We use the testing mode of it during attack.
            with freeze_collection([tf.GraphKeys.UPDATE_OPS
                                    ]), argscope(BatchNorm, training=False):
                if hasattr(self, 'palatte'):
                    print("======palatte")
                    image, target_label = self.attacker.attack(
                        image, label, self.get_logits_raw, self.palatte)
                else:
                    image, target_label = self.attacker.attack(
                        image, label, self.get_logits)
                image = tf.stop_gradient(image, name='adv_training_sample')

            logits = self.get_logits(image)

        loss = ImageNetModel.compute_loss_and_error(
            logits, label, label_smoothing=self.label_smoothing)
        AdvImageNetModel.compute_attack_success(logits, target_label)
        if not self.training:
            return

        wd_loss = regularize_cost(self.weight_decay_pattern,
                                  tf.contrib.layers.l2_regularizer(
                                      self.weight_decay),
                                  name='l2_regularize_loss')
        add_moving_summary(loss, wd_loss)
        total_cost = tf.add_n([loss, wd_loss], name='cost')
        self.step = 1
        if self.loss_scale != 1.:
            logger.info("Scaling the total loss by {} ...".format(
                self.loss_scale))
            return total_cost * self.loss_scale
        else:
            return total_cost
    def __init__(
            self,
            # model,
            agent_name,
            player,
            state_shape,
            num_actions,
            batch_size,
            memory_size,
            init_memory_size,
            init_exploration,
            update_frequency,
            encoding_file='../AutoEncoder/encoding.npy'):
        init_memory_size = int(init_memory_size)
        # self.model = model

        for k, v in locals().items():
            if k != 'self':
                setattr(self, k, v)
        self.agent_name = agent_name
        self.exploration = init_exploration
        self.num_actions = num_actions
        self.encoding = np.load(encoding_file)
        logger.info(
            "Number of Legal actions: {}, {}".format(*self.num_actions))

        self.rng = get_rng(self)
        self._init_memory_flag = threading.Event(
        )  # tell if memory has been initialized

        # a queue to receive notifications to populate memory
        self._populate_job_queue = queue.Queue(maxsize=5)

        self.mem = ReplayMemory(memory_size, state_shape)
        self.player.reset()
        self.player.prepare()
        self._comb_mask = True
        self._fine_mask = None
        self._current_ob, self._action_space = self.get_state_and_action_spaces(
        )
        self._player_scores = StatCounter()
        self._current_game_score = StatCounter()
def proceed_test_dir(args):
    import cv2
    ll = os.listdir(args.test_dir)

    pred_config = PredictConfig(model=Model(),
                                session_init=get_model_loader(args.load),
                                input_names=['image'],
                                output_names=['prob'])
    predictor = OfflinePredictor(pred_config)

    from tensorpack.utils.fs import mkdir_p
    result_dir = "test-from-dir"
    visual_dir = os.path.join(result_dir, "visualization")
    final_dir = os.path.join(result_dir, "final")
    import shutil
    shutil.rmtree(result_dir, ignore_errors=True)
    mkdir_p(result_dir)
    mkdir_p(visual_dir)
    mkdir_p(final_dir)

    logger.info("start validation....")

    def mypredictor(input_img):
        # input image: 1*H*W*3
        # output : H*W*C
        output = predictor(input_img[np.newaxis, :, :, :])
        return output[0][0]

    for i in tqdm(range(len(ll))):
        filename = ll[i]
        image = cv2.imread(os.path.join(args.test_dir, filename))
        prediction = predict_scaler(image,
                                    mypredictor,
                                    scales=[0.5, 0.75, 1, 1.25, 1.5],
                                    classes=CLASS_NUM,
                                    tile_size=CROP_SIZE,
                                    is_densecrf=False)
        prediction = np.argmax(prediction, axis=2)
        cv2.imwrite(os.path.join(final_dir, "{}".format(filename)), prediction)
        cv2.imwrite(
            os.path.join(visual_dir, "{}".format(filename)),
            np.concatenate((image, visualize_label(prediction)), axis=1))
Exemple #38
0
 def train_single_model(self, model_idx, result_q):
     logger.info("[train_single_model] model_idx:{}".format(model_idx))
     critic_loss_list = []
     lock = self.locks[model_idx]
     actor = self.actors[model_idx]
     lr_critic = self.lr_critic * (1.0 + 0.1 * model_idx)
     lr_actor = self.lr_actor * (1.0 - 0.05 * model_idx)
     states = np.random.random((self.BATCH_SIZE, param.state_dim))
     actions = np.random.random((self.BATCH_SIZE, param.action_dim))
     rewards = np.random.random(self.BATCH_SIZE)
     dones = np.array([False] * self.BATCH_SIZE, dtype='bool')
     new_states = np.random.random((self.BATCH_SIZE, param.state_dim))
     for T in range(self.train_times):
         lock.acquire()
         _, _, critic_loss = actor.combine_train(states, actions, rewards,
                                                 dones, new_states,
                                                 lr_critic, lr_actor)
         lock.release()
         critic_loss_list.append(critic_loss)
     result_q.put(critic_loss_list)
Exemple #39
0
                def texture_loss(x, p=16):
                    x = normalize(x)
                    _, h, w, c = x.get_shape().as_list()
                    assert h % p == 0 and w % p == 0
                    logger.info(
                        'Create texture loss for layer {} with shape {}'.
                        format(x.name, x.get_shape()))

                    x = tf.space_to_batch_nd(x, [p, p], [[0, 0], [0, 0]])
                    x = tf.reshape(x, [p, p, -1, h // p, w // p, c])
                    x = tf.transpose(x, [2, 3, 4, 0, 1, 5])
                    patches_a, patches_b = tf.split(
                        x, 2)  # each is b,h/p,w/p,p,p,c

                    patches_a = tf.reshape(patches_a, [-1, p, p, c])
                    patches_b = tf.reshape(patches_b, [-1, p, p, c])
                    return tf.losses.mean_squared_error(
                        gram_matrix(patches_a),
                        gram_matrix(patches_b),
                        reduction=Reduction.SUM) * (1.0 / BATCH_SIZE)
Exemple #40
0
    def _before_train(self):
        # graph is finalized, OK to write it now.
        time = datetime.now().strftime('%m%d-%H%M%S')
        self.saver.export_meta_graph(
            os.path.join(self.checkpoint_dir, 'graph-{}.meta'.format(time)),
            collection_list=self.graph.get_all_collection_keys())

        # save
        try:
            self.saver.save(tf.get_default_session(),
                            self.path,
                            global_step=tf.train.get_global_step(),
                            write_meta_graph=False)
            logger.info("Model saved to %s." % tf.train.get_checkpoint_state(
                self.checkpoint_dir).model_checkpoint_path)
        except (OSError, IOError, tf.errors.PermissionDeniedError,
                tf.errors.ResourceExhaustedError
                ):  # disk error sometimes.. just ignore it
            logger.exception("Exception in ModelSaver!")
        exit()
Exemple #41
0
    def __init__(self, basedir, name):
        assert name in COCOMeta.INSTANCE_TO_BASEDIR.keys(), name
        self.name = name
        self._imgdir = os.path.join(basedir, COCOMeta.INSTANCE_TO_BASEDIR[name])
        assert os.path.isdir(self._imgdir), self._imgdir
        annotation_file = os.path.join(
            basedir, 'annotations/instances_{}.json'.format(name))
        assert os.path.isfile(annotation_file), annotation_file

        self.coco = COCO(annotation_file)

        # initialize the meta
        cat_ids = self.coco.getCatIds()
        cat_names = [c['name'] for c in self.coco.loadCats(cat_ids)]
        if not COCOMeta.valid():
            COCOMeta.create(cat_ids, cat_names)
        else:
            assert COCOMeta.cat_names == cat_names

        logger.info("Instances loaded from {}.".format(annotation_file))
Exemple #42
0
    def __init__(self, basedir, name):
        assert name in COCOMeta.INSTANCE_TO_BASEDIR.keys(), name
        self.name = name
        self._imgdir = os.path.join(basedir, COCOMeta.INSTANCE_TO_BASEDIR[name])
        assert os.path.isdir(self._imgdir), self._imgdir
        annotation_file = os.path.join(
            basedir, 'annotations/instances_{}.json'.format(name))
        assert os.path.isfile(annotation_file), annotation_file

        self.coco = COCO(annotation_file)

        # initialize the meta
        cat_ids = self.coco.getCatIds()
        cat_names = [c['name'] for c in self.coco.loadCats(cat_ids)]
        if not COCOMeta.valid():
            COCOMeta.create(cat_ids, cat_names)
        else:
            assert COCOMeta.cat_names == cat_names

        logger.info("Instances loaded from {}.".format(annotation_file))
Exemple #43
0
    def __init__(self, args):
        super(AnytimeFCN, self).__init__(None, args)

        # Class weight for fully convolutional networks
        self.class_weight = None
        if hasattr(args, 'class_weight'):
            self.class_weight = args.class_weight
        if self.class_weight is None:
            self.class_weight = np.ones(self.num_classes, dtype=np.float32)
        logger.info('Class weights: {}'.format(self.class_weight))

        self.is_label_one_hot = args.is_label_one_hot
        self.eval_threshold = args.eval_threshold
        self.do_scale_feat_to_label = args.do_scale_feat_to_label
        self.n_pools = args.n_pools if not self.do_scale_feat_to_label else 0
        self.is_atrous = args.is_atrous
        self.output_stride = args.output_stride
        # base_o_s / o_s * base_rate ; base_o_s == 16
        self.atrous_rates = [6, 12, 18]
        self.atrous_rates_base_output_stride = 16
Exemple #44
0
 def _setup_graph(self):
     # get weight copy ops
     trainable_collection = tf.get_collection_ref(
         tf.GraphKeys.TRAINABLE_VARIABLES)
     mirrored_collection_name_map = dict()
     for var in trainable_collection:
         mirrored_collection_name_map[var.name.replace(
             self.src_scope + '/', '')] = var
     mirrored_collection_name_set = set(mirrored_collection_name_map.keys())
     model_collection = tf.get_collection_ref(tf.GraphKeys.MODEL_VARIABLES)
     assign_ops = []
     for var in model_collection:
         if var.name in mirrored_collection_name_set:
             op = var.assign(mirrored_collection_name_map[var.name])
             assign_ops.append(op)
     self.assign_ops.extend(assign_ops)
     assert len(assign_ops) == len(trainable_collection)
     logger.info(
         '[WeightSyncCallBack] Create {} assign ops for WeightSyncCallBack, schedule = {}'
         .format(len(assign_ops), self.schedule))
def do_predict(predictor, input_file):
    try:
        img = cv2.imread(os.path.join('test_images', input_file),
                         cv2.IMREAD_COLOR)

        results = predict_image(img, predictor)
        if cfg.MODE_MASK:
            final = draw_final_outputs_blackwhite(img, results)
        else:
            final = draw_final_outputs(img, results)
        viz = final.copy(
        )  #np.concatenate((img, final), axis=1) #concatenate hata dena
        opp = cv2.imwrite(
            os.path.join(os.getcwd(), 'test_inferences',
                         input_file.split('.')[0] + ".png"), viz)
        if opp:
            logger.info(
                "Inference output for {} Successful".format(input_file))
    except:
        print(input_file)
Exemple #46
0
def print_class_histogram(roidbs):
    """
    Args:
        roidbs (list[dict]): the same format as the output of `training_roidbs`.
    """
    # labels are in [1, NUM_CATEGORY], hence +2 for bins
    hist_bins = np.arange(cfg.DATA.NUM_CATEGORY + 2)

    # Histogram of ground-truth objects
    gt_hist = np.zeros((cfg.DATA.NUM_CATEGORY + 1,), dtype=np.int)
    for entry in roidbs:
        # filter crowd?
        gt_inds = np.where((entry["class"] > 0) & (entry["is_crowd"] == 0))[0]
        gt_classes = entry["class"][gt_inds]
        gt_hist += np.histogram(gt_classes, bins=hist_bins)[0]
    data = [[cfg.DATA.CLASS_NAMES[i], v] for i, v in enumerate(gt_hist)]
    data.append(["total", sum(x[1] for x in data)])
    # the first line is BG
    table = tabulate(data[1:], headers=["class", "#box"], tablefmt="pipe")
    logger.info("Ground-Truth Boxes:\n" + colored(table, "cyan"))
Exemple #47
0
def finalize_configs(is_training):
    """
    Run some sanity checks, and populate some configs from others
    """
    _C.DATA.NUM_CLASS = _C.DATA.NUM_CATEGORY + 1  # +1 background
    _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS)
    assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES)
    # image size into the backbone has to be multiple of this number
    _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[
        3]  # [3] because we build FPN with features r2,r3,r4,r5

    if _C.MODE_FPN:
        size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1.
        _C.PREPROC.MAX_SIZE = np.ceil(
            _C.PREPROC.MAX_SIZE / size_mult) * size_mult

    if is_training:
        os.environ['TF_AUTOTUNE_THRESHOLD'] = '1'
        assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER

        # setup NUM_GPUS
        if _C.TRAINER == 'horovod':
            import horovod.tensorflow as hvd
            ngpu = hvd.size()
        else:
            assert 'OMPI_COMM_WORLD_SIZE' not in os.environ
            ngpu = get_num_gpu()
        assert ngpu % 8 == 0 or 8 % ngpu == 0, ngpu
        if _C.TRAIN.NUM_GPUS is None:
            _C.TRAIN.NUM_GPUS = ngpu
        else:
            if _C.TRAINER == 'horovod':
                assert _C.TRAIN.NUM_GPUS == ngpu
            else:
                assert _C.TRAIN.NUM_GPUS <= ngpu
    else:
        # autotune is too slow for inference
        os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'

    logger.info("Config: ------------------------------------------\n" +
                str(_C))
Exemple #48
0
    def get_logits(self, image):

        with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'), \
                argscope(Conv2D, use_bias=False):

            group = args.group
            if not args.v2:
                # Copied from the paper
                channels = {
                    3: [240, 480, 960],
                    4: [272, 544, 1088],
                    8: [384, 768, 1536]
                }
                mul = group * 4  # #chan has to be a multiple of this number
                channels = [int(math.ceil(x * args.ratio / mul) * mul)
                            for x in channels[group]]
                # The first channel must be a multiple of group
                first_chan = int(math.ceil(24 * args.ratio / group) * group)
            else:
                # Copied from the paper
                channels = {
                    0.5: [48, 96, 192],
                    1.: [116, 232, 464]
                }[args.ratio]
                first_chan = 24

            logger.info("#Channels: " + str([first_chan] + channels))

            l = Conv2D('conv1', image, first_chan, 3, strides=2, activation=BNReLU)
            l = MaxPooling('pool1', l, 3, 2, padding='SAME')

            l = shufflenet_stage('stage2', l, channels[0], 4, group)
            l = shufflenet_stage('stage3', l, channels[1], 8, group)
            l = shufflenet_stage('stage4', l, channels[2], 4, group)

            if args.v2:
                l = Conv2D('conv5', l, 1024, 1, activation=BNReLU)

            l = GlobalAvgPooling('gap', l)
            logits = FullyConnected('linear', l, 1000)
            return logits
Exemple #49
0
    def run(self):
        self.clients = defaultdict(self.ClientState)
        try:
            while True:
                msg = loads(self.c2s_socket.recv(copy=False).bytes)
                ident, state, reward, isOver = msg
                # TODO check history and warn about dead client
                client = self.clients[ident]

                # check if reward&isOver is valid
                # in the first message, only state is valid
                if len(client.memory) > 0:
                    client.memory[-1].reward = reward
                    if isOver:
                        self._on_episode_over(ident)
                    else:
                        self._on_datapoint(ident)
                # feed state and return action
                self._on_state(state, ident)
        except zmq.ContextTerminated:
            logger.info("[Simulator] Context was terminated.")
Exemple #50
0
    def __init__(self,
                 predictor_io_names,
                 player,
                 state_shape,
                 batch_size,
                 memory_size, init_memory_size,
                 init_exploration,
                 update_frequency, history_len):
        """
        Args:
            predictor_io_names (tuple of list of str): input/output names to
                predict Q value from state.
            player (RLEnvironment): the player.
            state_shape (tuple): h, w, c
            history_len (int): length of history frames to concat. Zero-filled
                initial frames.
            update_frequency (int): number of new transitions to add to memory
                after sampling a batch of transitions for training.
        """
        assert len(state_shape) == 3, state_shape
        init_memory_size = int(init_memory_size)

        for k, v in locals().items():
            if k != 'self':
                setattr(self, k, v)
        self.exploration = init_exploration
        self.num_actions = player.action_space.n
        logger.info("Number of Legal actions: {}".format(self.num_actions))

        self.rng = get_rng(self)
        self._init_memory_flag = threading.Event()  # tell if memory has been initialized

        # a queue to receive notifications to populate memory
        self._populate_job_queue = queue.Queue(maxsize=5)

        self.mem = ReplayMemory(memory_size, state_shape, history_len)
        self._current_ob = self.player.reset()
        self._player_scores = StatCounter()
        self._current_game_score = StatCounter()
Exemple #51
0
def finalize_configs(is_training):
    """
    Run some sanity checks, and populate some configs from others
    """
    _C.DATA.NUM_CLASS = _C.DATA.NUM_CATEGORY + 1  # +1 background
    _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS)
    assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES)
    # image size into the backbone has to be multiple of this number
    _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[3]  # [3] because we build FPN with features r2,r3,r4,r5

    if _C.MODE_FPN:
        size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1.
        _C.PREPROC.MAX_SIZE = np.ceil(_C.PREPROC.MAX_SIZE / size_mult) * size_mult

    if is_training:
        os.environ['TF_AUTOTUNE_THRESHOLD'] = '1'
        assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER

        # setup NUM_GPUS
        if _C.TRAINER == 'horovod':
            import horovod.tensorflow as hvd
            ngpu = hvd.size()
        else:
            assert 'OMPI_COMM_WORLD_SIZE' not in os.environ
            ngpu = get_num_gpu()
        assert ngpu % 8 == 0 or 8 % ngpu == 0, ngpu
        if _C.TRAIN.NUM_GPUS is None:
            _C.TRAIN.NUM_GPUS = ngpu
        else:
            if _C.TRAINER == 'horovod':
                assert _C.TRAIN.NUM_GPUS == ngpu
            else:
                assert _C.TRAIN.NUM_GPUS <= ngpu
    else:
        # autotune is too slow for inference
        os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'

    logger.info("Config: ------------------------------------------\n" + str(_C))
    def build_graph(self, image, label):
        image = ImageNetModel.image_preprocess(image, bgr=self.image_bgr)
        assert self.data_format in ['NCHW', 'NHWC']
        if self.data_format == 'NCHW':
            image = tf.transpose(image, [0, 3, 1, 2])

        logits = self.get_logits(image)
        loss = ImageNetModel.compute_loss_and_error(logits, label)

        if self.weight_decay > 0:
            wd_loss = regularize_cost(self.weight_decay_pattern,
                                      tf.contrib.layers.l2_regularizer(self.weight_decay),
                                      name='l2_regularize_loss')
            add_moving_summary(loss, wd_loss)
            total_cost = tf.add_n([loss, wd_loss], name='cost')
        else:
            total_cost = tf.identity(loss, name='cost')
            add_moving_summary(total_cost)

        if self.loss_scale != 1.:
            logger.info("Scaling the total loss by {} ...".format(self.loss_scale))
            return total_cost * self.loss_scale
        else:
            return total_cost
get_config_func = imp.load_source('config_script', args.config).get_config
config = get_config_func()
config.dataset.reset_state()

if args.output:
    mkdir_p(args.output)
    cnt = 0
    index = args.index   # TODO: as an argument?
    for dp in config.dataset.get_data():
        imgbatch = dp[index]
        if cnt > args.number:
            break
        for bi, img in enumerate(imgbatch):
            cnt += 1
            fname = os.path.join(args.output, '{:03d}-{}.png'.format(cnt, bi))
            cv2.imwrite(fname, img * args.scale)

NR_DP_TEST = args.number
logger.info("Testing dataflow speed:")
ds = RepeatedData(config.dataset, -1)
with tqdm.tqdm(total=NR_DP_TEST, leave=True, unit='data points') as pbar:
    for idx, dp in enumerate(ds.get_data()):
        del dp
        if idx > NR_DP_TEST:
            break
        pbar.update()



#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File: checkpoint-manipulate.py
# Author: Yuxin Wu <*****@*****.**>


import numpy as np
from tensorpack.tfutils.varmanip import dump_chkpt_vars
from tensorpack.utils import logger
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('checkpoint')
parser.add_argument('--dump', help='dump to an npy file')
parser.add_argument('--shell', action='store_true', help='start a shell with the params')
args = parser.parse_args()

if args.checkpoint.endswith('.npy'):
    params = np.load(args.checkpoint).item()
else:
    params = dump_chkpt_vars(args.checkpoint)
logger.info("Variables in the checkpoint:")
logger.info(str(params.keys()))
if args.dump:
    np.save(args.dump, params)
if args.shell:
    import IPython as IP
    IP.embed(config=IP.terminal.ipapp.load_default_config())
Exemple #55
0
 def fetch():
     r = q.get()
     stat.feed(r)
     if verbose:
         logger.info("Score: {}".format(r))
Exemple #56
0
def print_config():
    logger.info("Config: ------------------------------------------")
    for k in dir(config):
        if k == k.upper():
            logger.info("{} = {}".format(k, getattr(config, k)))
    logger.info("--------------------------------------------------")
Exemple #57
0
def play_n_episodes(player, predfunc, nr, render=False):
    logger.info("Start Playing ... ")
    for k in range(nr):
        score = play_one_episode(player, predfunc, render=render)
        print("{}/{}, score={}".format(k, nr, score))
Exemple #58
0
def finalize_configs(is_training):
    """
    Run some sanity checks, and populate some configs from others
    """
    _C.DATA.NUM_CLASS = _C.DATA.NUM_CATEGORY + 1  # +1 background
    _C.DATA.BASEDIR = os.path.expanduser(_C.DATA.BASEDIR)

    assert _C.BACKBONE.NORM in ['FreezeBN', 'SyncBN', 'GN'], _C.BACKBONE.NORM
    if _C.BACKBONE.NORM != 'FreezeBN':
        assert not _C.BACKBONE.FREEZE_AFFINE
    assert _C.BACKBONE.FREEZE_AT in [0, 1, 2]

    _C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS)
    assert len(_C.FPN.ANCHOR_STRIDES) == len(_C.RPN.ANCHOR_SIZES)
    # image size into the backbone has to be multiple of this number
    _C.FPN.RESOLUTION_REQUIREMENT = _C.FPN.ANCHOR_STRIDES[3]  # [3] because we build FPN with features r2,r3,r4,r5

    if _C.MODE_FPN:
        size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1.
        _C.PREPROC.MAX_SIZE = np.ceil(_C.PREPROC.MAX_SIZE / size_mult) * size_mult
        assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint']
        assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head')
        assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head')
        assert _C.FPN.NORM in ['None', 'GN']

        if _C.FPN.CASCADE:
            num_cascade = _C.CASCADE.NUM_STAGES
            # the first threshold is the proposal sampling threshold
            assert len(_C.CASCADE.IOUS) == num_cascade
            assert _C.CASCADE.IOUS[0] == _C.FRCNN.FG_THRESH
            assert len(_C.CASCADE.BBOX_REG_WEIGHTS) == num_cascade

    if is_training:
        train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE
        if train_scales[1] - train_scales[0] > 100:
            # don't warmup if augmentation is on
            os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'
        os.environ['TF_AUTOTUNE_THRESHOLD'] = '1'
        assert _C.TRAINER in ['horovod', 'replicated'], _C.TRAINER

        # setup NUM_GPUS
        if _C.TRAINER == 'horovod':
            import horovod.tensorflow as hvd
            ngpu = hvd.size()
        else:
            assert 'OMPI_COMM_WORLD_SIZE' not in os.environ
            ngpu = get_num_gpu()
        assert ngpu > 0, "Has to run with GPU!"
        assert ngpu % 8 == 0 or 8 % ngpu == 0, ngpu
        if _C.TRAIN.NUM_GPUS is None:
            _C.TRAIN.NUM_GPUS = ngpu
        else:
            if _C.TRAINER == 'horovod':
                assert _C.TRAIN.NUM_GPUS == ngpu
            else:
                assert _C.TRAIN.NUM_GPUS <= ngpu
    else:
        # autotune is too slow for inference
        os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'

    _C.freeze()
    logger.info("Config: ------------------------------------------\n" + str(_C))
Exemple #59
0
        # manually build the graph with batch=1
        input_desc = [
            InputDesc(tf.float32, [1, 224, 224, 3], 'input'),
            InputDesc(tf.int32, [1], 'label')
        ]
        input = PlaceholderInput()
        input.setup(input_desc)
        with TowerContext('', is_training=False):
            model.build_graph(*input.get_input_tensors())
        model_utils.describe_trainable_vars()

        tf.profiler.profile(
            tf.get_default_graph(),
            cmd='op',
            options=tf.profiler.ProfileOptionBuilder.float_operation())
        logger.info("Note that TensorFlow counts flops in a different way from the paper.")
        logger.info("TensorFlow counts multiply+add as two flops, however the paper counts them "
                    "as 1 flop because it can be executed in one instruction.")
    else:
        if args.v2:
            name = "ShuffleNetV2-{}x".format(args.ratio)
        else:
            name = "ShuffleNetV1-{}x-g{}".format(args.ratio, args.group)
        logger.set_logger_dir(os.path.join('train_log', name))

        nr_tower = max(get_num_gpu(), 1)
        config = get_config(model, nr_tower)
        if args.load:
            config.session_init = get_model_loader(args.load)
        launch_train_with_config(config, SyncMultiGPUTrainerParameterServer(nr_tower))
Exemple #60
0
        def additional_losses(a, b):
            with tf.variable_scope('VGG19'):
                x = tf.concat([a, b], axis=0)
                x = tf.reshape(x, [2 * BATCH_SIZE, SHAPE_LR * 4, SHAPE_LR * 4, 3]) * 255.0
                x = x - VGG_MEAN_TENSOR
                # VGG 19
                with varreplace.freeze_variables():
                    with argscope(Conv2D, kernel_size=3, activation=tf.nn.relu):
                        conv1_1 = Conv2D('conv1_1', x, 64)
                        conv1_2 = Conv2D('conv1_2', conv1_1, 64)
                        pool1 = MaxPooling('pool1', conv1_2, 2)  # 64
                        conv2_1 = Conv2D('conv2_1', pool1, 128)
                        conv2_2 = Conv2D('conv2_2', conv2_1, 128)
                        pool2 = MaxPooling('pool2', conv2_2, 2)  # 32
                        conv3_1 = Conv2D('conv3_1', pool2, 256)
                        conv3_2 = Conv2D('conv3_2', conv3_1, 256)
                        conv3_3 = Conv2D('conv3_3', conv3_2, 256)
                        conv3_4 = Conv2D('conv3_4', conv3_3, 256)
                        pool3 = MaxPooling('pool3', conv3_4, 2)  # 16
                        conv4_1 = Conv2D('conv4_1', pool3, 512)
                        conv4_2 = Conv2D('conv4_2', conv4_1, 512)
                        conv4_3 = Conv2D('conv4_3', conv4_2, 512)
                        conv4_4 = Conv2D('conv4_4', conv4_3, 512)
                        pool4 = MaxPooling('pool4', conv4_4, 2)  # 8
                        conv5_1 = Conv2D('conv5_1', pool4, 512)
                        conv5_2 = Conv2D('conv5_2', conv5_1, 512)
                        conv5_3 = Conv2D('conv5_3', conv5_2, 512)
                        conv5_4 = Conv2D('conv5_4', conv5_3, 512)
                        pool5 = MaxPooling('pool5', conv5_4, 2)  # 4

            # perceptual loss
            with tf.name_scope('perceptual_loss'):
                pool2 = normalize(pool2)
                pool5 = normalize(pool5)
                phi_a_1, phi_b_1 = tf.split(pool2, 2, axis=0)
                phi_a_2, phi_b_2 = tf.split(pool5, 2, axis=0)

                logger.info('Create perceptual loss for layer {} with shape {}'.format(pool2.name, pool2.get_shape()))
                pool2_loss = tf.losses.mean_squared_error(phi_a_1, phi_b_1, reduction=Reduction.MEAN)
                logger.info('Create perceptual loss for layer {} with shape {}'.format(pool5.name, pool5.get_shape()))
                pool5_loss = tf.losses.mean_squared_error(phi_a_2, phi_b_2, reduction=Reduction.MEAN)

            # texture loss
            with tf.name_scope('texture_loss'):
                def texture_loss(x, p=16):
                    _, h, w, c = x.get_shape().as_list()
                    x = normalize(x)
                    assert h % p == 0 and w % p == 0
                    logger.info('Create texture loss for layer {} with shape {}'.format(x.name, x.get_shape()))

                    x = tf.space_to_batch_nd(x, [p, p], [[0, 0], [0, 0]])  # [b * ?, h/p, w/p, c]
                    x = tf.reshape(x, [p, p, -1, h // p, w // p, c])       # [p, p, b, h/p, w/p, c]
                    x = tf.transpose(x, [2, 3, 4, 0, 1, 5])                # [b * ?, p, p, c]
                    patches_a, patches_b = tf.split(x, 2, axis=0)          # each is b,h/p,w/p,p,p,c

                    patches_a = tf.reshape(patches_a, [-1, p, p, c])       # [b * ?, p, p, c]
                    patches_b = tf.reshape(patches_b, [-1, p, p, c])       # [b * ?, p, p, c]
                    return tf.losses.mean_squared_error(
                        gram_matrix(patches_a),
                        gram_matrix(patches_b),
                        reduction=Reduction.MEAN
                    )

                texture_loss_conv1_1 = tf.identity(texture_loss(conv1_1), name='normalized_conv1_1')
                texture_loss_conv2_1 = tf.identity(texture_loss(conv2_1), name='normalized_conv2_1')
                texture_loss_conv3_1 = tf.identity(texture_loss(conv3_1), name='normalized_conv3_1')

            return [pool2_loss, pool5_loss, texture_loss_conv1_1, texture_loss_conv2_1, texture_loss_conv3_1]