Ejemplo n.º 1
0
    def test_serialise(self):
        """ Create a network and serialise its biases and weights."""
        num_layers = 7
        num_cls = 10
        im_dim = Shape(3, 512, 512)

        # Setup default network. Variables are random.
        net = orpac_net.Orpac(self.sess, im_dim, num_layers, num_cls, None, False)
        self.sess.run(tf.global_variables_initializer())

        # Serialise the network biases and weights.
        data = net.serialise()
        assert isinstance(data, dict)
        assert set(data.keys()) == {'weight', 'bias', 'num-layers'}
        assert set(data['bias'].keys()) == set(range(net.numLayers()))
        assert set(data['weight'].keys()) == set(range(net.numLayers()))
        assert data['num-layers'] == num_layers

        # Verify the variables.
        for i in range(net.numLayers()):
            assert np.array_equal(net.getBias(i), data['bias'][i])
            assert np.array_equal(net.getWeight(i), data['weight'][i])
Ejemplo n.º 2
0
    def loadRawData(self, path, ft_dim, num_samples):
        """Return feature and label vector for data set of choice.

        Returns:
            im_dim: Shape
                Image shape
            ft_dim: Shape
                Dimensions of training data.
            int2name: dict[int:str]
                A LUT to translate machine labels to human readable strings.
                For instance {0: 'None', 1: 'Cube 0', 2: 'Cube 1'}.
            train: N-List[TrainingSample]
                Training data.
        """
        # Compile a list of JPG images in the source folder. Then verify that
        # a) each is a valid JPG file and b) all images have the same size.
        fnames = self.findTrainingFiles(path, num_samples)

        # Load and verify that the pickled meta data for each JPG file
        # specifies the same set of class labels.
        int2name = self.getLabelData(fnames)
        num_cls = len(int2name)

        # Compute the height and width that input images must have to be
        # compatible with the selected output feature size.
        im_dim = orpac_net.waveletToImageDim(Shape(None, *ft_dim.hw()))

        # Fill in channel information: Images must always be RGB and the
        # feature output channels are available via a utility method.
        im_dim.chan = 3
        ft_dim.chan = orpac_net.Orpac.numOutputChannels(num_cls)

        # Compile all the features that have not been compiled already.
        self.compileMissingFeatures(fnames, ft_dim)

        # Load the compiled training data alongside each image.
        train = self.loadTrainingData(fnames, im_dim, ft_dim, num_cls)
        return im_dim, ft_dim, int2name, train
Ejemplo n.º 3
0
    def test_weights_and_biases(self):
        """Create default network and test various accessor methods"""
        im_dim = Shape(3, 512, 512)
        num_cls, num_layers = 10, 7

        # Create network with random weights.
        net = orpac_net.Orpac(self.sess, im_dim, num_layers, num_cls, None, False)
        self.sess.run(tf.global_variables_initializer())

        # First layer must be compatible with input.
        assert net.getBias(0).shape == (64, 1, 1)
        assert net.getWeight(0).shape == (3, 3, net._xin.shape[1], 64)

        # The last filter is responsible for creating the various features we
        # train the network on. Its dimension must be 33x33 to achieve a large
        # receptive field on the input image.
        num_ft_chan = net.outputShape().chan
        net.getBias(num_layers - 1).shape == (num_ft_chan, 1, 1)
        net.getWeight(num_layers - 1).shape == (33, 33, 64, num_ft_chan)

        # The output layer must have the correct number of features and
        # feature map size. This excludes the batch dimension.
        assert net.output().shape[1:] == net.outputShape().chw()
Ejemplo n.º 4
0
    def test_restore(self):
        """ Restore a network.

        This test cannot be combined with `test_serialise` because of TFs
        idiosyncrasies with (not) sharing Tensor names. Therefore, specify
        dummy values for three layers, pass them to the Ctor, and verify the
        values are correct.
        """
        sess = self.sess
        num_cls, num_layers = 10, 3
        im_dim = Shape(3, 512, 512)

        # Use utility functions to determine the number channels of the network
        # output layer. Also determine the number of ...
        num_ft_chan = orpac_net.Orpac.numOutputChannels(num_cls)
        dim_xin = orpac_net.imageToWaveletDim(im_dim)

        # Create variables for first, middle and last layer. The first layer
        # must be adapted to the input, the middle layer is fixed and the last
        # layer must encode the features (ie BBox, isFg, Class).
        bw_init = {'bias': {}, 'weight': {}}
        bw_init['bias'][0] = 0 * np.ones((64, 1, 1), np.float32)
        bw_init['weight'][0] = 0 * np.ones((3, 3, dim_xin.chan, 64), np.float32)
        bw_init['bias'][1] = 1 * np.ones((64, 1, 1), np.float32)
        bw_init['weight'][1] = 1 * np.ones((3, 3, 64, 64), np.float32)
        bw_init['bias'][2] = 2 * np.ones((num_ft_chan, 1, 1), np.float32)
        bw_init['weight'][2] = 2 * np.ones((33, 33, 64, num_ft_chan), np.float32)
        bw_init['num-layers'] = 3

        # Create a new network and restore its weights.
        net = orpac_net.Orpac(sess, im_dim, num_layers, num_cls, bw_init, False)
        sess.run(tf.global_variables_initializer())

        # Ensure the weights are as specified.
        for i in range(net.numLayers()):
            assert np.array_equal(net.getBias(i), bw_init['bias'][i])
            assert np.array_equal(net.getWeight(i), bw_init['weight'][i])
Ejemplo n.º 5
0
    def setup_class(cls):
        # Feature dimension will only be 2x2 to simplify testing and debugging.
        ft_dim = Shape(None, 2, 2)
        num_cls, num_layers = 10, 7

        # Compute the image dimensions required for a 2x2 feature size.
        im_dim = orpac_net.waveletToImageDim(ft_dim)

        # Create Tensorflow session and dummy network. The network is such that
        # the feature size is only 2x2 because this makes testing easier.
        cls.sess = tf.Session()
        cls.net = orpac_net.Orpac(
            cls.sess, im_dim, num_layers, num_cls, None, train=False)
        assert cls.net.outputShape().hw() == (2, 2)

        # A dummy feature tensor that we will populate it with our own data to
        # simulate the network output. To create it we simply "clone" the
        # genuine network output tensor.
        cls.y_pred_in = tf.placeholder(tf.float32, cls.net.output().shape)

        # Setup cost computation. This will create a node for `y_true`.
        cls.total_cost = orpac_net.createCostNodes(cls.y_pred_in)
        g = tf.get_default_graph().get_tensor_by_name
        cls.y_true_in = g('orpac-cost/y_true:0')
Ejemplo n.º 6
0
def main():
    param = parseCmdline()
    sess = tf.Session()

    # File names.
    netstate_path = 'netstate'
    os.makedirs(netstate_path, exist_ok=True)
    fnames = {
        'meta': os.path.join(netstate_path, 'orpac-meta.pickle'),
        'orpac-net': os.path.join(netstate_path, 'orpac-net.pickle'),
        'checkpt': os.path.join(netstate_path, 'tf-checkpoint.pickle'),
    }
    del netstate_path

    # Restore the configuration if it exists, otherwise create a new one.
    print('\n----- Simulation Parameters -----')
    restore = os.path.exists(fnames['meta'])
    if restore:
        meta = pickle.load(open(fnames['meta'], 'rb'))
        conf, log = meta['conf'], meta['log']
        bw_init = pickle.load(open(fnames['orpac-net'], 'rb'))
    else:
        log = collections.defaultdict(list)
        conf = config.NetConf(
            seed=0, epoch=0, num_layers=7, path=os.path.join('data', '3dflight'),
            ft_dim=Shape(None, 64, 64), num_samples=None
        )
        bw_init = None
        print(f'Restored from <{None}>')
    print('\n', conf)

    # Load the BBox training data.
    print('\n----- Data Set -----')
    ds = data_loader.ORPAC(conf.path, conf.ft_dim, conf.num_samples, conf.seed)
    ds.printSummary()
    int2name = ds.int2name()
    num_classes = len(int2name)
    im_dim = ds.imageShape()

    # Input/output/parameter tensors for network.
    print('\n----- Network Setup -----')

    # Create input tensor and trainable ORPAC net.
    net = orpac_net.Orpac(sess, im_dim, conf.num_layers, num_classes, bw_init, True)

    # Select cost function and optimiser, then initialise the TF graph.
    sess.run(tf.global_variables_initializer())

    # Ensure the network output shape matches the training output.
    assert net.outputShape() == ds.featureShape()
    print('Output feature map size: ', net.outputShape())

    # Restore the network from Tensorflow's checkpoint file.
    saver = tf.train.Saver()
    if restore:
        print('\nRestored Tensorflow graph from checkpoint file')
        saver.restore(sess, fnames['checkpt'])
    else:
        print('Starting with untrained network')

    print(f'\n----- Training for another {param.N} Epochs -----')
    try:
        epoch_ofs = conf.epoch + 1
        lrates = np.logspace(np.log10(param.lr0), np.log10(param.lr1), param.N)
        t0_all = time.time()
        for epoch, lrate in enumerate(lrates):
            t0_epoch = time.time()
            tot_epoch = epoch + epoch_ofs
            print(f'\nEpoch {tot_epoch} ({epoch+1}/{param.N} in this training cycle)')

            ds.reset()
            trainEpoch(ds, net, log, lrate)

            # Save the network state and log data.
            pickle.dump(net.serialise(), open(fnames['orpac-net'], 'wb'))
            conf = conf._replace(epoch=epoch + epoch_ofs)
            meta = {'conf': conf, 'int2name': int2name, 'log': log}
            pickle.dump(meta, open(fnames['meta'], 'wb'))
            saver.save(sess, fnames['checkpt'])

            # Determine training time for epoch
            etime = str(datetime.timedelta(seconds=int(time.time() - t0_epoch)))
            et_h, et_m, et_s = etime.split(':')
            etime_str = f'  Training time: {et_h}h {et_m}m {et_s}s'

            # Print basic stats about epoch.
            print(f'{etime_str}   Learning Rate: {lrate:.1E}')
        etime = str(datetime.timedelta(seconds=int(time.time() - t0_all)))
        et_h, et_m, et_s = etime.split(':')
        print(f'\nTotal training time: {et_h}h {et_m}m {et_s}s\n')
    except KeyboardInterrupt:
        pass
Ejemplo n.º 7
0
class Orpac:
    # Specify how many times the decompose the input image with Wavelets.
    _NUM_WAVELET_DECOMPOSITIONS = 3

    def __init__(self, sess, im_dim, num_layers, num_classes, bw_init, train):
        # Decide if we want to create cost nodes or not.
        assert isinstance(train, bool)

        # Backup basic variables.
        self._trainable = train
        self.sess = sess
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.im_dim = im_dim

        # Create placeholder variable for Wavelet decomposed image.
        self._xin = self._createInputTensor(im_dim)

        # Setup the NMS nodes and Orpac network.
        self._setupNonMaxSuppression()
        with tf.variable_scope('orpac'):
            self.out = self._setupNetwork(self._xin, bw_init, np.float32)

        # Store shape of the output tensor.
        self.ft_dim = Shape(*self.out.shape.as_list()[1:])

        # Define the cost nodes and compile them into a dictionary if this
        # network is trainable, otherwise do nothing.
        if self._trainable:
            self._cost_nodes, self._optimiser = self._addOptimiser()
        else:
            self._cost_nodes, self._optimiser = {}, None

    def session(self):
        """Return Tensorflow session"""
        return self.sess

    def getBias(self, layer):
        g = tf.get_default_graph().get_tensor_by_name
        return self.sess.run(g(f'orpac/b{layer}:0'))

    def getWeight(self, layer):
        g = tf.get_default_graph().get_tensor_by_name
        return self.sess.run(g(f'orpac/W{layer}:0'))

    def numLayers(self):
        return self.num_layers

    def numClasses(self):
        return self.num_classes

    def outputShape(self):
        """Return the shape of the network output (exclusive Batch dimension).

        For example, the output may be Shape(chan=18, height=64, width=64).
        """
        # Sanity check: the number of output channels must match the value
        # returned by `numOutputChannels`.
        assert self.ft_dim.chan == self.numOutputChannels(self.numClasses())
        return self.ft_dim.copy()

    def imageShape(self):
        return self.im_dim.copy()

    def output(self):
        return self.out

    def trainable(self):
        return self._trainable

    def costNodes(self):
        return dict(self._cost_nodes)

    @staticmethod
    def numOutputChannels(num_classes: int):
        """Return the number of feature channels when there are `num_classes`.

        This value specifes the number of channels that the final network layer
        will return.

        NOTE: this returns the same value as `featureShape.chan` but does not
        require an Orpac instance since it is a class method.

        Input:
            num_classes: int
                The number of output channels depends on the number of classes
                in the data set. This variables specifes that number.

        Returns:
            int: number of channels in final network output layer.
        """
        return 4 + 2 + num_classes

    @staticmethod
    def setBBoxRects(y, val):
        y = np.array(y)
        assert y.ndim == 3
        assert np.array(val).shape == y[:4].shape
        y[:4] = val
        return y

    @staticmethod
    def getBBoxRects(y):
        assert y.ndim == 3
        return y[:4]

    @staticmethod
    def setIsFg(y, val):
        y = np.array(y)
        assert y.ndim == 3
        assert np.array(val).shape == y[4:6].shape
        y[4:6] = val
        return y

    @staticmethod
    def getIsFg(y):
        assert y.ndim == 3
        return y[4:6]

    @staticmethod
    def setClassLabel(y, val):
        y = np.array(y)
        assert y.ndim == 3
        assert np.array(val).shape == y[6:].shape
        y[6:] = val
        return y

    @staticmethod
    def getClassLabel(y):
        assert y.ndim == 3
        return y[6:]

    def _createInputTensor(self, im_dim):
        N = self._NUM_WAVELET_DECOMPOSITIONS

        im_dim = np.array(im_dim.hw()) / (2**N)
        width, height = im_dim.astype(np.int32).tolist()

        num_chan = 3 * (4**N)
        x_dim = (1, num_chan, height, width)
        return tf.placeholder(tf.float32, x_dim, name='x_in')

    def _addOptimiser(self):
        cost = createCostNodes(self.out)
        g = tf.get_default_graph().get_tensor_by_name
        lrate_in = tf.placeholder(tf.float32, name='lrate')
        opt = tf.train.AdamOptimizer(learning_rate=lrate_in).minimize(cost)
        nodes = {
            'cls': g(f'orpac-cost/cls:0'),
            'bbox': g(f'orpac-cost/bbox:0'),
            'isFg': g(f'orpac-cost/isFg:0'),
            'total': g(f'orpac-cost/total:0'),
        }
        return nodes, opt

    def _imageToInput(self, img):
        """Return Wavelet decomposed `img`

        The returned tensor is compatible with this class' `_xin` placeholder.

        The image dimensions must match those returned by `imageShape`, ie.
        it must be square, RGB and all its dimension must be powers of 2.

        Each colour channel will be decomposed self._NUM_WAVELET_DECOMPOSITIONS
        times.

        Inputs:
            img: UInt8 Array[height, width, 3]

        Output:
            Array[1, *imageToWaveletDim(img_shape)]
                The output dimension depends on the number of decompositions
                and the input size. For a 512x512x3 image with 3 decompositions
                the output would have Shape(chan=192, height=64, width=64).
        """
        # Sanity check.
        assert isinstance(img, np.ndarray) and img.dtype == np.uint8

        im_dim = self.imageShape()
        assert img.shape == im_dim.hwc()
        assert im_dim.isSquare() and im_dim.isPow2()

        # Normalise the image and put each colour channels as a separate image
        # into a work list.
        img = img.astype(np.float32) / 255
        src = list(img.transpose([2, 0, 1]))

        # Decompose the each channel.
        for i in range(self._NUM_WAVELET_DECOMPOSITIONS):
            N = im_dim.width >> (i + 1)

            # Apply wavelet transform to every image in the worklist and place
            # the results in an output list.
            dst = []
            while len(src) > 0:
                cA, (cH, cV, cD) = pywt.dwt2(src.pop(),
                                             'db2',
                                             mode='symmetric')

                # All coefficients must be square and have identical dimensions.
                assert cA.shape == cH.shape == cV.shape == cD.shape
                assert cA.ndim == 2 and cA.shape[0] == cA.shape[1]

                # The wavelet decomposition reduces dimension by roughly 2.
                # However, due to transients the outputs are a bit larger than
                # that which is why we must trim them. Here we compute the
                # start/stop indices for the trimming.
                excess = cA.shape[0] - N
                assert excess >= 0
                a = excess // 2
                b = a + N
                assert b <= cA.shape[0]

                # Trim the coefficients.
                dst.append(cA[a:b, a:b])
                dst.append(cH[a:b, a:b])
                dst.append(cV[a:b, a:b])
                dst.append(cD[a:b, a:b])

            # Copy the output into the new work list and repeat the process.
            src = dst

        # Convert the Python list to Numpy and verify its shape.
        data = np.array(src, np.float32)
        assert data.shape == imageToWaveletDim(im_dim).chw()

        # Return the decomposed image with the leading batch dimension.
        return np.expand_dims(data, 0)

    def _setupNetwork(self, x_in, bw_init, dtype):
        # Convenience: shared arguments conv2d.
        opts = dict(padding='SAME', data_format='NCHW', strides=[1, 1, 1, 1])
        num_ft_chan = 64

        # Hidden conv layers.
        # Examples dimensions assume 128x128 RGB images.
        # Input : [-1, 3, 128, 128] ---> [-1, 64, 128, 128]
        # Kernel: 3x3  Features: 64
        prev = x_in
        for i in range(self.num_layers - 1):
            prev_shape = tuple(prev.shape.as_list())
            b_dim = (num_ft_chan, 1, 1)
            W_dim = (3, 3, prev_shape[1], num_ft_chan)
            b, W = unpackBiasAndWeight(bw_init, b_dim, W_dim, i, dtype)

            prev = tf.nn.relu(tf.nn.conv2d(prev, W, **opts) + b)
            del i, b, W, b_dim, W_dim

        # Conv output layer to learn the BBoxes and class labels.
        # Shape: [-1, 64, 64, 64] ---> [-1, num_out_chan, 64, 64]
        # Kernel: 33x33
        num_out_chan = self.numOutputChannels(self.num_classes)
        prev_shape = tuple(prev.shape.as_list())
        b_dim = (num_out_chan, 1, 1)
        W_dim = (33, 33, prev.shape[1], num_out_chan)
        b, W = unpackBiasAndWeight(bw_init, b_dim, W_dim, self.num_layers - 1,
                                   dtype)
        return tf.add(tf.nn.conv2d(prev, W, **opts), b, name='out')

    def _setupNonMaxSuppression(self):
        """Create non-maximum-suppression nodes.

        These are irrelevant for training but useful in the predictor to cull
        the flood of possible bounding boxes.
        """
        with tf.variable_scope('non-max-suppression'):
            r_in = tf.placeholder(tf.float32, [None, 4], name='bb_rects')
            s_in = tf.placeholder(tf.float32, [None], name='scores')
            tf.image.non_max_suppression(r_in, s_in, 30, 0.2, name='op')

    def nonMaxSuppression(self, bb_rects, scores):
        """ Wrapper around Tensorflow's non-max-suppression function.

        Input:
            sess: Tensorflow sessions
            bb_rects: Array[N, 4]
                BBox rectangles, one per column.
            scores: Array[N]
                One scalar score for each BBox.

        Returns:
            idx: Array
                List of BBox indices that survived the operation.
        """
        g = tf.get_default_graph().get_tensor_by_name
        fd = {
            g('non-max-suppression/scores:0'): scores,
            g('non-max-suppression/bb_rects:0'): bb_rects,
        }
        return self.sess.run(g('non-max-suppression/op:0'), feed_dict=fd)

    def train(self, img, y, lrate, mask_cls, mask_bbox, mask_isFg):
        assert self._trainable

        # Sanity checks
        assert lrate > 0
        assert mask_cls.shape == mask_bbox.shape == mask_isFg.shape
        assert y.shape == self.ft_dim.chw()
        assert y.shape[1:] == mask_cls.shape

        # Feed dictionary.
        g = tf.get_default_graph().get_tensor_by_name
        fd = {
            self._xin: self._imageToInput(img),
            g(f'lrate:0'): lrate,
            g(f'orpac-cost/y_true:0'): np.expand_dims(y, 0),
            g(f'orpac-cost/mask_cls:0'): mask_cls,
            g(f'orpac-cost/mask_bbox:0'): mask_bbox,
            g(f'orpac-cost/mask_isFg:0'): mask_isFg,
        }

        # Run one optimisation step and return the costs.
        nodes = [self._cost_nodes, self._optimiser]
        costs, _ = self.sess.run(nodes, feed_dict=fd)
        return costs

    def predict(self, img):
        # Run predictor network.
        g = tf.get_default_graph().get_tensor_by_name
        out = self.sess.run(g(f'orpac/out:0'),
                            feed_dict={self._xin: self._imageToInput(img)})
        assert out.ndim == 4 and out.shape[0] == 1
        return out[0]

    def serialise(self):
        out = {'weight': {}, 'bias': {}, 'num-layers': self.numLayers()}
        for i in range(self.num_layers):
            out['bias'][i] = self.getBias(i)
            out['weight'][i] = self.getWeight(i)
        return out