Ejemplo n.º 1
0
    def network_backprop_setup(self, grad_req, arg_names, arg_shapes,
                               eval_metric):

        if grad_req != 'null':
            self.grad_params = {}
            for name, shape in zip(arg_names, arg_shapes):
                if not (name.endswith('data') or name.endswith("mean_face")
                        or name.endswith('cls_label')
                        or name.endswith('proj_weight')
                        or name.endswith('proj_label')
                        or name.endswith('ground_truth')
                        or name.endswith('ellipse_label')
                        or name.endswith("bbox_weight")):
                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)

        # setting the required optimizer
        self.optimizer = opt.create(self.optimizer,
                                    rescale_grad=1.0,
                                    **(self.kwargs))
        self.updater = get_updater(self.optimizer)
        eval_metric = metric.create(eval_metric)

        return eval_metric
Ejemplo n.º 2
0
    def init_optimizer(self, kvstore='local', optimizer='sgd',
                       optimizer_params=(('learning_rate', 0.01),), force_init=False):
        """Install and initialize optimizers.

        Parameters
        ----------
        kvstore : str or KVStore
            Default `'local'`.
        optimizer : str or Optimizer
            Default `'sgd'`
        optimizer_params : dict
            Default `(('learning_rate', 0.01),)`. The default value is not a dictionary,
            just to avoid pylint warning of dangerous default values.
        force_init : bool
            Default `False`, indicating whether we should force re-initializing the
            optimizer in the case an optimizer is already installed.
        """
        assert self.binded and self.params_initialized

        if self.optimizer_initialized and not force_init:
            self.logger.warning('optimizer already initialized, ignoring...')
            return

        (kvstore, update_on_kvstore) = \
                _create_kvstore(kvstore, len(self._context), self._arg_params)

        batch_size = self._exec_group.batch_size
        if kvstore and 'dist' in kvstore.type and '_sync' in kvstore.type:
            batch_size *= kvstore.num_workers
        rescale_grad = 1.0/batch_size

        if isinstance(optimizer, str):
            idx2name = {}
            if update_on_kvstore:
                idx2name.update(enumerate(self._exec_group.param_names))
            else:
                for k in range(len(self._context)):
                    idx2name.update({i*len(self._context)+k: n
                                     for i, n in enumerate(self._exec_group.param_names)})
            optimizer_params = dict(optimizer_params)
            if 'rescale_grad' not in optimizer_params:
                optimizer_params['rescale_grad'] = rescale_grad
            optimizer = opt.create(optimizer,
                                   sym=self.symbol, param_idx2name=idx2name,
                                   **optimizer_params)
        else:
            assert isinstance(optimizer, opt.Optimizer)
            if optimizer.rescale_grad != rescale_grad:
                #pylint: disable=no-member
                warnings.warn(
                    "Optimizer created manually outside Module but rescale_grad " +
                    "is not normalized to 1.0/batch_size/num_workers (%s vs. %s). "%(
                        optimizer.rescale_grad, rescale_grad) +
                    "Is this intended?", stacklevel=2)

        self._optimizer = optimizer
        self._kvstore = kvstore
        self._update_on_kvstore = update_on_kvstore
        self._updater = None

        if kvstore:
            # copy initialized local parameters to kvstore
            _initialize_kvstore(kvstore=kvstore,
                                param_arrays=self._exec_group.param_arrays,
                                arg_params=self._arg_params,
                                param_names=self._param_names,
                                update_on_kvstore=update_on_kvstore)
        if update_on_kvstore:
            kvstore.set_optimizer(self._optimizer)
        else:
            self._updater = opt.get_updater(optimizer)

        self.optimizer_initialized = True

        if self._preload_opt_states is not None:
            self.load_optimizer_states(self._preload_opt_states)
            self._preload_opt_states = None
Ejemplo n.º 3
0
    def init_optimizer(self, kvstore='local', optimizer='sgd',
                       optimizer_params=(('learning_rate', 0.01),), force_init=False):
        """Install and initialize optimizers.

        Parameters
        ----------
        kvstore : str or KVStore
            Default `'local'`.
        optimizer : str or Optimizer
            Default `'sgd'`
        optimizer_params : dict
            Default `(('learning_rate', 0.01),)`. The default value is not a dictionary,
            just to avoid pylint warning of dangerous default values.
        force_init : bool
            Default `False`, indicating whether we should force re-initializing the
            optimizer in the case an optimizer is already installed.
        """
        assert self.binded and self.params_initialized

        if self.optimizer_initialized and not force_init:
            self.logger.warning('optimizer already initialized, ignoring...')
            return

        (kvstore, update_on_kvstore) = \
                _create_kvstore(kvstore, len(self._context), self._arg_params)

        batch_size = self._exec_group.batch_size
        if kvstore and 'dist' in kvstore.type and '_sync' in kvstore.type:
            batch_size *= kvstore.num_workers
        rescale_grad = 1.0/batch_size

        if isinstance(optimizer, str):
            idx2name = {}
            if update_on_kvstore:
                idx2name.update(enumerate(self._exec_group.param_names))
            else:
                for k in range(len(self._context)):
                    idx2name.update({i*len(self._context)+k: n
                                     for i, n in enumerate(self._exec_group.param_names)})
            optimizer_params = dict(optimizer_params)
            if 'rescale_grad' not in optimizer_params:
                optimizer_params['rescale_grad'] = rescale_grad
            optimizer = opt.create(optimizer,
                                   sym=self.symbol, param_idx2name=idx2name,
                                   **optimizer_params)
        else:
            assert isinstance(optimizer, opt.Optimizer)
            if optimizer.rescale_grad != rescale_grad:
                #pylint: disable=no-member
                warnings.warn(
                    "Optimizer created manually outside Module but rescale_grad " +
                    "is not normalized to 1.0/batch_size/num_workers (%s vs. %s). "%(
                        optimizer.rescale_grad, rescale_grad) +
                    "Is this intended?", stacklevel=2)

        self._optimizer = optimizer
        self._kvstore = kvstore
        self._update_on_kvstore = update_on_kvstore
        self._updater = None

        if kvstore:
            # copy initialized local parameters to kvstore
            _initialize_kvstore(kvstore=kvstore,
                                param_arrays=self._exec_group.param_arrays,
                                arg_params=self._arg_params,
                                param_names=self._param_names,
                                update_on_kvstore=update_on_kvstore)
        if update_on_kvstore:
            kvstore.set_optimizer(self._optimizer)
        else:
            self._updater = opt.get_updater(optimizer)

        self.optimizer_initialized = True

        if self._preload_opt_states is not None:
            self.load_optimizer_states(self._preload_opt_states)
            self._preload_opt_states = None
Ejemplo n.º 4
0
 def fit(self, train_data, eval_data=None,
         eval_metric='acc',
         grad_req='write',
         epoch_end_callback=None,
         batch_end_callback=None,
         kvstore='local',
         logger=None):
     if logger is None:
         logger = logging
     logging.info('Start training with %s', str(self.ctx))
     arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(data=train_data.provide_data[0][1])
     arg_names = self.symbol.list_arguments()
     if grad_req != 'null':
         self.grad_params = {}
         for name, shape in zip(arg_names, arg_shapes):
             if not (name.endswith('data') or name.endswith('label')):
                 self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
     else:
         self.grad_params = None
     aux_names = self.symbol.list_auxiliary_states()
     self.aux_params = {k : nd.zeros(s) for k, s in zip(aux_names, aux_shapes)}
     data_name = train_data.data_name
     label_name = train_data.label_name
     input_names = [data_name, label_name]
     self.optimizer = opt.create(self.optimizer, rescale_grad=(1.0/train_data.get_batch_size()), **(self.kwargs))
     self.updater = get_updater(self.optimizer)
     eval_metric = metric.create(eval_metric)
     # begin training
     for epoch in range(self.begin_epoch, self.num_epoch):
         nbatch = 0
         train_data.reset()
         eval_metric.reset()
         for data in train_data:
             nbatch += 1
             label_shape = data[label_name].shape
             self.arg_params[data_name] = mx.nd.array(data[data_name], self.ctx)
             self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
                 label_shape[1]*label_shape[2]), self.ctx)
             output_names = self.symbol.list_outputs()
             self.exector = self.symbol.bind(self.ctx, self.arg_params,
                             args_grad=self.grad_params,
                             grad_req=grad_req,
                             aux_states=self.aux_params)
             assert len(self.symbol.list_arguments()) == len(self.exector.grad_arrays)
             update_dict = {name: nd for name, nd in zip(self.symbol.list_arguments(), \
                 self.exector.grad_arrays) if nd}
             output_dict = {}
             output_buff = {}
             for key, arr in zip(self.symbol.list_outputs(), self.exector.outputs):
                 output_dict[key] = arr
                 output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
             self.exector.forward(is_train=True)
             for key in output_dict:
                 output_dict[key].copyto(output_buff[key])
             self.exector.backward()
             for key, arr in update_dict.items():
                 if key != "bigscore_weight":
                     self.updater(key, arr, self.arg_params[key])
             pred_shape = self.exector.outputs[0].shape
             label = mx.nd.array(data[label_name].reshape(label_shape[0], label_shape[1]*label_shape[2]))
             pred = mx.nd.array(output_buff["softmax_output"].asnumpy().reshape(pred_shape[0], \
                 pred_shape[1], pred_shape[2]*pred_shape[3]))
             eval_metric.update([label], [pred])
             self.exector.outputs[0].wait_to_read()
             batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, eval_metric=eval_metric)
             batch_end_callback(batch_end_params)
         if epoch_end_callback is not None:
             epoch_end_callback(epoch, self.symbol, self.arg_params, self.aux_params)
         name, value = eval_metric.get()
         logger.info("                     --->Epoch[%d] Train-%s=%f", epoch, name, value)
         # evaluation
         if eval_data:
             logger.info(" in eval process...")
             nbatch = 0
             eval_data.reset()
             eval_metric.reset()
             for data in eval_data:
                 nbatch += 1
                 label_shape = data[label_name].shape
                 self.arg_params[data_name] = mx.nd.array(data[data_name], self.ctx)
                 self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
                     label_shape[1]*label_shape[2]), self.ctx)
                 exector = self.symbol.bind(self.ctx, self.arg_params,
                                 args_grad=self.grad_params,
                                 grad_req=grad_req,
                                 aux_states=self.aux_params)
                 cpu_output_array = mx.nd.zeros(exector.outputs[0].shape)
                 exector.forward(is_train=False)
                 exector.outputs[0].copyto(cpu_output_array)
                 pred_shape = cpu_output_array.shape
                 label = mx.nd.array(data[label_name].reshape(label_shape[0], \
                     label_shape[1]*label_shape[2]))
                 pred = mx.nd.array(cpu_output_array.asnumpy().reshape(pred_shape[0], \
                     pred_shape[1], pred_shape[2]*pred_shape[3]))
                 eval_metric.update([label], [pred])
                 exector.outputs[0].wait_to_read()
         name, value = eval_metric.get()
         logger.info('batch[%d] Validation-%s=%f', nbatch, name, value)
Ejemplo n.º 5
0
    def fit(self, train_data, eval_data=None,
            eval_metric='acc',
            grad_req='write',
            epoch_end_callback=None,
            batch_end_callback=None,
            kvstore='local',
            logger=None):
        global outimgiter
        if logger is None:
            logger = logging
        logging.info('Start training with %s', str(self.ctx))
        logging.info(str(self.kwargs))
        batch_size = train_data.provide_data[0][1][0]
        arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape( \
            data=tuple(train_data.provide_data[0][1]), label_det=(batch_size,200,6))
        arg_names = self.symbol.list_arguments()
        out_names = self.symbol.list_outputs()
        aux_names = self.symbol.list_auxiliary_states()

        # pprint([(n,s) for n,s in zip(arg_names,arg_shapes)])
        # pprint([(n,s) for n,s in zip(out_names,out_shapes)])
        # pprint([(n,s) for n,s in zip(aux_names,aux_shapes)])
        
        if grad_req != 'null':
            self.grad_params = {}
            for name, shape in zip(arg_names, arg_shapes):
                if not (name.endswith('data') or name.endswith('label')):
                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
        else:
            self.grad_params = None
        self.aux_params = {k : mx.nd.zeros(s, self.ctx) for k, s in zip(aux_names, aux_shapes)}
        data_name = train_data.provide_data[0][0]
        label_name_det = train_data.provide_label[0][0]
        label_name_seg = train_data.provide_label[1][0]
        input_names = [data_name, label_name_det, label_name_seg]

        print(train_data.provide_label)
        print(os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"])

        self.optimizer = opt.create(self.optimizer, rescale_grad=(1.0/train_data.batch_size), **(self.kwargs))
        self.updater = get_updater(self.optimizer)
        eval_metric = CustomAccuracyMetric() # metric.create(eval_metric)
        multibox_metric = MultiBoxMetric()

        eval_metrics = metric.CompositeEvalMetric()
        eval_metrics.add(multibox_metric)
        # eval_metrics.add(eval_metric)
        
        # begin training
        for epoch in range(self.begin_epoch, self.num_epoch):
            nbatch = 0
            train_data.reset()
            eval_metrics.reset()
            logger.info('learning rate: '+str(self.optimizer.learning_rate))
            for data,_ in train_data:
                if self.evaluation_only:
                    break
                nbatch += 1
                label_shape_det = data.label[0].shape
                label_shape_seg = data.label[1].shape
                self.arg_params[data_name] = mx.nd.array(data.data[0], self.ctx)
                self.arg_params[label_name_det] = mx.nd.array(data.label[0], self.ctx)
                self.arg_params[label_name_seg] = mx.nd.array(data.label[1], self.ctx)
                output_names = self.symbol.list_outputs()

                ###################### analyze shapes ####################
                # pprint([(k,v.shape) for k,v in self.arg_params.items()])
                
                self.executor = self.symbol.bind(self.ctx, self.arg_params,
                    args_grad=self.grad_params, grad_req=grad_req, aux_states=self.aux_params)
                assert len(self.symbol.list_arguments()) == len(self.executor.grad_arrays)
                update_dict = {name: nd for name, nd in zip(self.symbol.list_arguments(), \
                    self.executor.grad_arrays) if nd is not None}
                output_dict = {}
                output_buff = {}
                for key, arr in zip(self.symbol.list_outputs(), self.executor.outputs):
                    output_dict[key] = arr
                    output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
                    # output_buff[key] = mx.nd.empty(arr.shape, ctx=self.ctx)

                def stat_helper(name, array):
                    """wrapper for executor callback"""
                    import ctypes
                    from mxnet.ndarray import NDArray
                    from mxnet.base import NDArrayHandle, py_str
                    array = ctypes.cast(array, NDArrayHandle)
                    if 0:
                        array = NDArray(array, writable=False).asnumpy()
                        print (name, array.shape, np.mean(array), np.std(array),
                               ('%.1fms' % (float(time.time()-stat_helper.start_time)*1000)))
                    else:
                        array = NDArray(array, writable=False)
                        array.wait_to_read()
                        elapsed = float(time.time()-stat_helper.start_time)*1000.
                        if elapsed>5:
                            print (name, array.shape, ('%.1fms' % (elapsed,)))
                    stat_helper.start_time=time.time()
                stat_helper.start_time=float(time.time())
                # self.executor.set_monitor_callback(stat_helper)

                tic = time.time()
                    
                self.executor.forward(is_train=True)
                for key in output_dict:
                    output_dict[key].copyto(output_buff[key])

                # exit(0) # for debugging forward pass only
                    
                self.executor.backward()
                for key, arr in update_dict.items():
                    if key != "bigscore_weight":
                        self.updater(key, arr, self.arg_params[key])

                for output in self.executor.outputs:
                    output.wait_to_read()
                if TIMING:
                    print("%.0fms" % ((time.time()-tic)*1000.,))
                        
                output_dict = dict(zip(output_names, self.executor.outputs))
                pred_det_shape = output_dict["det_out_output"].shape
                # pred_seg_shape = output_dict["seg_out_output"].shape
                label_det = mx.nd.array(data.label[0].reshape((label_shape_det[0],
                                                               label_shape_det[1]*label_shape_det[2])))
                # label_seg = mx.nd.array(data.label[1].reshape((label_shape_seg[0],
                #                                                label_shape_seg[1]*label_shape_seg[2])))
                pred_det = mx.nd.array(output_buff["det_out_output"].reshape((pred_det_shape[0],
                    pred_det_shape[1], pred_det_shape[2])))
                # pred_seg = mx.nd.array(output_buff["seg_out_output"].reshape((pred_seg_shape[0],
                #     pred_seg_shape[1], pred_seg_shape[2]*pred_seg_shape[3])))
                if DEBUG:
                    print(data.label[0].asnumpy()[0,:2,:])

                if TIMING:
                    print("%.0fms" % ((time.time()-tic)*1000.,))
                    
                eval_metrics.get_metric(0).update([mx.nd.zeros(output_buff["cls_prob_output"].shape),
                                        mx.nd.zeros(output_buff["loc_loss_output"].shape),label_det],
                                       [output_buff["cls_prob_output"], output_buff["loc_loss_output"],
                                        output_buff["cls_label_output"]])
                # eval_metrics.get_metric(1).update([label_seg.as_in_context(self.ctx)], [pred_seg.as_in_context(self.ctx)])

                self.executor.outputs[0].wait_to_read()

                ##################### display results ##############################
                # out_det = output_dict["det_out_output"].asnumpy()
                # for imgidx in range(out_det.shape[0]):
                #     img = np.squeeze(data.data[0].asnumpy()[imgidx,:,:,:])
                #     det = out_det[imgidx,:,:]
                #     gt = label_det.asnumpy()[imgidx,:].reshape((-1,6))
                #     display_results(img, det, gt, self.class_names)
                #     [exit(0) if (cv2.waitKey(1)&0xff)==27 else None]
                # outimgiter += 1

                batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, eval_metric=eval_metrics)
                batch_end_callback(batch_end_params)

                if TIMING:
                    print("%.0fms" % ((time.time()-tic)*1000.,))
                    
                # exit(0) # for debugging only
                
            ##### save snapshot
            if (not self.evaluation_only) and (epoch_end_callback is not None):
                epoch_end_callback(epoch, self.symbol, self.arg_params, self.aux_params)
                
            names, values = eval_metrics.get()
            for name, value in zip(names,values):
                logger.info("                     --->Epoch[%d] Train-%s=%f", epoch, name, value)
                
            # evaluation
            if eval_data:
                logger.info(" in eval process...")
                nbatch = 0
                depth_metric = DistanceAccuracyMetric(class_names=self.class_names)
                eval_data.reset()
                eval_metrics.reset()
                self.valid_metric.reset()
                depth_metric.reset()
                timing_results = []
                for data, fnames in eval_data:
                    nbatch += 1
                    label_shape_det = data.label[0].shape
                    # label_shape_seg = data.label[1].shape
                    self.arg_params[data_name] = mx.nd.array(data.data[0], self.ctx)
                    self.arg_params[label_name_det] = mx.nd.array(data.label[0], self.ctx)
                    # self.arg_params[label_name_seg] = mx.nd.array(data.label[1], self.ctx)
                    self.executor = self.symbol.bind(self.ctx, self.arg_params,
                        args_grad=self.grad_params, grad_req=grad_req, aux_states=self.aux_params)
                    
                    output_names = self.symbol.list_outputs()
                    output_dict = dict(zip(output_names, self.executor.outputs))

                    # cpu_output_array = mx.nd.zeros(output_dict["seg_out_output"].shape)

                    ############## monitor status
                    # def stat_helper(name, array):
                    #     """wrapper for executor callback"""
                    #     import ctypes
                    #     from mxnet.ndarray import NDArray
                    #     from mxnet.base import NDArrayHandle, py_str
                    #     array = ctypes.cast(array, NDArrayHandle)
                    #     if 1:
                    #         array = NDArray(array, writable=False).asnumpy()
                    #         print (name, array.shape, np.mean(array), np.std(array),
                    #                ('%.1fms' % (float(time.time()-stat_helper.start_time)*1000)))
                    #     else:
                    #         array = NDArray(array, writable=False)
                    #         array.wait_to_read()
                    #         elapsed = float(time.time()-stat_helper.start_time)*1000.
                    #         if elapsed>5:
                    #             print (name, array.shape, ('%.1fms' % (elapsed,)))
                    #     stat_helper.start_time=time.time()
                    # stat_helper.start_time=float(time.time())
                    # self.executor.set_monitor_callback(stat_helper)
                    
                    ############## forward
                    tic = time.time()
                    self.executor.forward(is_train=True)
                    # output_dict["seg_out_output"].wait_to_read()
                    timing_results.append((time.time()-tic)*1000.)
                    
                    # output_dict["seg_out_output"].copyto(cpu_output_array)
                    # pred_shape = output_dict["seg_out_output"].shape
                    # label = mx.nd.array(data.label[1].reshape((label_shape_seg[0], label_shape_seg[1]*label_shape_seg[2])))
                    # output_dict["seg_out_output"].wait_to_read()
                    # seg_out_output = output_dict["seg_out_output"].asnumpy()

                    pred_det_shape = output_dict["det_out_output"].shape
                    # pred_seg_shape = output_dict["seg_out_output"].shape
                    label_det = mx.nd.array(data.label[0].reshape((label_shape_det[0], label_shape_det[1]*label_shape_det[2])))
                    # label_seg = mx.nd.array(data.label[1].reshape((label_shape_seg[0], label_shape_seg[1]*label_shape_seg[2])),ctx=self.ctx)
                    pred_det = mx.nd.array(output_dict["det_out_output"].reshape((pred_det_shape[0], pred_det_shape[1], pred_det_shape[2])))
                    # pred_seg = mx.nd.array(output_dict["seg_out_output"].reshape((pred_seg_shape[0], pred_seg_shape[1], pred_seg_shape[2]*pred_seg_shape[3])),ctx=self.ctx)

                    #### remove invalid boxes
                    out_dets = output_dict["det_out_output"].asnumpy()
                    assert len(out_dets.shape)==3
                    pred_det = np.zeros((batch_size, 200, 7), np.float32)-1.
                    for idx, out_det in enumerate(out_dets):
                        assert len(out_det.shape)==2
                        out_det = np.expand_dims(out_det, axis=0)
                        indices = np.where(out_det[:,:,0]>=0) # labeled as negative
                        out_det = np.expand_dims(out_det[indices[0],indices[1],:],axis=0)
                        indices = np.where(out_det[:,:,1]>.25) # higher confidence
                        out_det = np.expand_dims(out_det[indices[0],indices[1],:],axis=0)
                        pred_det[idx, :out_det.shape[1], :] = out_det
                        del out_det
                    pred_det = mx.nd.array(pred_det)
                    
                    ##### display results
                    if False: # self.evaluation_only:
                        # out_img = output_dict["seg_out_output"]
                        # out_img = mx.nd.split(out_img, axis=0, num_outputs=out_img.shape[0], squeeze_axis=0)
                        # if not isinstance(out_img,list):
                        #     out_img = [out_img]
                        for imgidx in range(eval_data.batch_size):
                            img = np.squeeze(data.data[0].asnumpy()[imgidx,:,:,:])
                            det = pred_det.asnumpy()[imgidx,:,:]
                            ### ground-truth
                            gt = label_det.asnumpy()[imgidx,:].reshape((-1,6))
                            # display result
                            display_img = display_results(img, det, gt, self.class_names)
                            res_fname = fnames[imgidx].replace("SegmentationClass","Results").replace("labelIds","results")
                            if cv2.imwrite(res_fname, display_img):
                                print(res_fname,'saved.')
                            [exit(0) if (cv2.waitKey()&0xff)==27 else None]
                        outimgiter += 1

                    if self.evaluation_only:
                        continue

                    eval_metrics.get_metric(0).update(None,
                                           [output_dict["cls_prob_output"], output_dict["loc_loss_output"],
                                            output_dict["cls_label_output"]])
                    # eval_metrics.get_metric(1).update([label_seg], [pred_seg])
                    self.valid_metric.update([mx.nd.slice_axis(data.label[0],axis=2,begin=0,end=5)], \
                                             [mx.nd.slice_axis(pred_det,axis=2,begin=0,end=6)])
                    disparities = []
                    for imgidx in range(batch_size):
                        dispname = fnames[imgidx].replace("SegmentationClass","Disparity").replace("gtFine_labelTrainIds","disparity")
                        disparities.append(cv2.imread(dispname,-1))
                        assert disparities[0] is not None, dispname + " not found."
                    depth_metric.update(mx.nd.array(disparities),[pred_det])
                    
                    det_metric = self.valid_metric
                    det_names, det_values = det_metric.get()
                    depth_names, depth_values = depth_metric.get()
                    print("\r %d/%d speed=%.1fms %.1f%% %s=%.1f %s=%.1f" % \
                          (nbatch*eval_data.batch_size,eval_data.num_samples,
                           math.fsum(timing_results)/float(nbatch),
                           float(nbatch*eval_data.batch_size)*100./float(eval_data.num_samples),
                           det_names[-1],det_values[-1]*100.,
                           depth_names[-1],depth_values[-1]*100.,),end='\r')
                    
                names, values = eval_metrics.get()
                for name, value in zip(names,values):
                    logger.info(' epoch[%d] Validation-%s=%f', epoch, name, value)
                logger.info('----------------------------------------------')
                print(' & '.join(names))
                print(' & '.join(map(lambda v:'%.1f'%(v*100.,),values)))
                logger.info('----------------------------------------------')
                names, values = self.valid_metric.get()
                for name, value in zip(names,values):
                    logger.info(' epoch[%d] Validation-%s=%f', epoch, name, value)
                logger.info('----------------------------------------------')
                print(' & '.join(names))
                print(' & '.join(map(lambda v:'%.1f'%(v*100.,),values)))
                logger.info('----------------------------------------------')
                names, values = depth_metric.get()
                for name, value in zip(names,values):
                    logger.info(' epoch[%d] Validation-%s=%f', epoch, name, value)
                logger.info('----------------------------------------------')
                print(' & '.join(names))
                print(' & '.join(map(lambda v:'%.1f'%(v*100.,),values)))
                logger.info('----------------------------------------------')
                    
                if self.evaluation_only:
                    exit(0) ## for debugging only
Ejemplo n.º 6
0
 def fit(self,
         train_data,
         eval_data=None,
         eval_metric='acc',
         grad_req='write',
         epoch_end_callback=None,
         batch_end_callback=None,
         kvstore='local',
         logger=None):
     if logger is None:
         logger = logging
     logging.info('Start training with %s', str(self.ctx))
     arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(
         data=train_data.provide_data[0][1])
     arg_names = self.symbol.list_arguments()
     if grad_req != 'null':
         self.grad_params = {}
         for name, shape in zip(arg_names, arg_shapes):
             if not (name.endswith('data') or name.endswith('label')):
                 self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
     else:
         self.grad_params = None
     aux_names = self.symbol.list_auxiliary_states()
     self.aux_params = {
         k: nd.zeros(s)
         for k, s in zip(aux_names, aux_shapes)
     }
     data_name = train_data.data_name
     label_name = train_data.label_name
     input_names = [data_name, label_name]
     self.optimizer = opt.create(self.optimizer,
                                 rescale_grad=(1.0 /
                                               train_data.get_batch_size()),
                                 **(self.kwargs))
     self.updater = get_updater(self.optimizer)
     eval_metric = metric.create(eval_metric)
     # begin training
     for epoch in range(self.begin_epoch, self.num_epoch):
         nbatch = 0
         train_data.reset()
         eval_metric.reset()
         for data in train_data:
             nbatch += 1
             label_shape = data[label_name].shape
             self.arg_params[data_name] = mx.nd.array(
                 data[data_name], self.ctx)
             self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
                 label_shape[1]*label_shape[2]), self.ctx)
             output_names = self.symbol.list_outputs()
             self.exector = self.symbol.bind(self.ctx,
                                             self.arg_params,
                                             args_grad=self.grad_params,
                                             grad_req=grad_req,
                                             aux_states=self.aux_params)
             assert len(self.symbol.list_arguments()) == len(
                 self.exector.grad_arrays)
             update_dict = {name: nd for name, nd in zip(self.symbol.list_arguments(), \
                 self.exector.grad_arrays) if nd is not None}
             output_dict = {}
             output_buff = {}
             for key, arr in zip(self.symbol.list_outputs(),
                                 self.exector.outputs):
                 output_dict[key] = arr
                 output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
             self.exector.forward(is_train=True)
             for key in output_dict:
                 output_dict[key].copyto(output_buff[key])
             self.exector.backward()
             for key, arr in update_dict.items():
                 if key != "bigscore_weight":
                     self.updater(key, arr, self.arg_params[key])
             pred_shape = self.exector.outputs[0].shape
             label = mx.nd.array(data[label_name].reshape(
                 label_shape[0], label_shape[1] * label_shape[2]))
             pred = mx.nd.array(output_buff["softmax_output"].asnumpy().reshape(pred_shape[0], \
                 pred_shape[1], pred_shape[2]*pred_shape[3]))
             eval_metric.update([label], [pred])
             self.exector.outputs[0].wait_to_read()
             batch_end_params = BatchEndParam(epoch=epoch,
                                              nbatch=nbatch,
                                              eval_metric=eval_metric)
             batch_end_callback(batch_end_params)
         if epoch_end_callback is not None:
             epoch_end_callback(epoch, self.symbol, self.arg_params,
                                self.aux_params)
         name, value = eval_metric.get()
         logger.info("                     --->Epoch[%d] Train-%s=%f",
                     epoch, name, value)
         # evaluation
         if eval_data:
             logger.info(" in eval process...")
             nbatch = 0
             eval_data.reset()
             eval_metric.reset()
             for data in eval_data:
                 nbatch += 1
                 label_shape = data[label_name].shape
                 self.arg_params[data_name] = mx.nd.array(
                     data[data_name], self.ctx)
                 self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
                     label_shape[1]*label_shape[2]), self.ctx)
                 exector = self.symbol.bind(self.ctx,
                                            self.arg_params,
                                            args_grad=self.grad_params,
                                            grad_req=grad_req,
                                            aux_states=self.aux_params)
                 cpu_output_array = mx.nd.zeros(exector.outputs[0].shape)
                 exector.forward(is_train=False)
                 exector.outputs[0].copyto(cpu_output_array)
                 pred_shape = cpu_output_array.shape
                 label = mx.nd.array(data[label_name].reshape(label_shape[0], \
                     label_shape[1]*label_shape[2]))
                 pred = mx.nd.array(cpu_output_array.asnumpy().reshape(pred_shape[0], \
                     pred_shape[1], pred_shape[2]*pred_shape[3]))
                 eval_metric.update([label], [pred])
                 exector.outputs[0].wait_to_read()
         name, value = eval_metric.get()
         logger.info('batch[%d] Validation-%s=%f', nbatch, name, value)
Ejemplo n.º 7
0
def _train_rnn(
        symbol,
        ctx,
        marks,
        arg_names, param_names, aux_names,
        arg_params, aux_params,
        begin_epoch, end_epoch, epoch_size, optimizer,
        kvstore, update_on_kvstore, train_data,
        e_marks=None,
        eval_data=None, eval_metric=None,
        epoch_end_callback=None, batch_end_callback=None, time_step_callback=None,
        logger=None, work_load_list=None, monitor=None,
        eval_batch_end_callback=None, sym_gen=None,
        mutable_data_shape=False, max_data_shape=None):
    """Mark should be a list of #SeriesLength, annotating if image has label by 1 , 0"""
    # TODO marks not working if label of SAX is different in one batch

    if logger is None:
        logger = logging
    executor_manager = DataParallelExecutorManager(symbol=symbol,
                                                   sym_gen=sym_gen,
                                                   ctx=ctx,
                                                   train_data=train_data,
                                                   param_names=param_names,
                                                   arg_names=arg_names,
                                                   aux_names=aux_names,
                                                   work_load_list=work_load_list,
                                                   logger=logger,
                                                   mutable_data_shape=mutable_data_shape,
                                                   max_data_shape=max_data_shape)
    if monitor:
        executor_manager.install_monitor(monitor)

    executor_manager.set_params(arg_params, aux_params)

    #if not update_on_kvstore:
    updater = get_updater(optimizer)

    if kvstore:
        _initialize_kvstore(kvstore=kvstore,
                            param_arrays=executor_manager.param_arrays,
                            arg_params=arg_params,
                            param_names=executor_manager.param_names,
                            update_on_kvstore=update_on_kvstore)

    if update_on_kvstore:
        kvstore.set_optimizer(optimizer)

    # Now start training
    train_data.reset()

    for epoch in range(begin_epoch, end_epoch):
        # Training phase
        tic = time.time()
        eval_metric.reset()
        nbatch = 0
        # Iterate over training data.

        # Into Epoch
        #########################
        # record acc
        acc_hist = []

        logger.info('Starting New Epoch...')
        while True:
            do_reset = True

            # iter on batch_size
            for data_batch_zoo in train_data:
                assert isinstance(data_batch_zoo, list), "Iter Error"
                if monitor is not None:
                    monitor.tic()

                # Start to iter on Time steps
                if isinstance(marks[nbatch], list):
                    M = marks[nbatch]
                else:
                    M = marks

                executor_manager, eval_metric, acc_hist = _run_sax(
                    data_batch_zoo, M, executor_manager, eval_metric, updater, ctx, kvstore, acc_hist,
                    monitor=monitor,
                    logger=logger,
                    update_on_kvstore=update_on_kvstore,
                    is_train=True,
                    callback= time_step_callback
                )

                nbatch += 1
                # batch callback (for print purpose)
                if batch_end_callback != None:
                    batch_end_params = BatchEndParam(epoch=epoch,
                                                     nbatch=nbatch,
                                                     eval_metric=eval_metric,
                                                     locals=locals())
                    if isinstance(batch_end_callback, list):
                        for call in batch_end_callback:
                            call(batch_end_params)
                    else:
                        batch_end_callback(batch_end_params)

                # this epoch is done possibly earlier
                if epoch_size is not None and nbatch >= epoch_size:
                    do_reset = False
                    break

            # end on batch_size
            if do_reset is True:
                logger.debug('Epoch[%d] Resetting Data Iterator', epoch)
                train_data.reset()
                logger.debug('Epoch[%d] Resetting Eval Metric', epoch)
                eval_metric.reset()

            # this epoch is done
            if epoch_size is None or nbatch >= epoch_size:
                break

        toc = time.time()
        logger.info('Epoch[%d] Time cost=%.3f', epoch, (toc - tic))

        if epoch_end_callback or epoch + 1 == end_epoch:
            executor_manager.copy_to(arg_params, aux_params)

        if epoch_end_callback != None:
            if isinstance(epoch_end_callback, list):
                for call in epoch_end_callback:
                    call(epoch, symbol, arg_params, aux_params,
                         acc_hist)
            else:
                epoch_end_callback(epoch, symbol, arg_params, aux_params,
                                   acc_hist)

        # evaluation
        # print 'enter evaluation'
        if eval_data:
            assert e_marks is not None, 'e marks cannot be None'
            eval_metric.reset()
            eval_data.reset()
            for b, eval_zoo in enumerate(eval_data):

                if isinstance(e_marks[b], list):
                    M = e_marks[b]
                else:
                    M = e_marks

                executor_manager, eval_metric, acc_hist = _run_sax(
                    eval_zoo, M, executor_manager, eval_metric, updater, ctx, kvstore, acc_hist,
                    update_on_kvstore=update_on_kvstore,
                    is_train=False)

                # executor_manager.load_data_batch(eval_batch)
                # executor_manager.forward(is_train=False)
                # executor_manager.update_metric(eval_metric, eval_batch.label)

                if eval_batch_end_callback != None:
                    batch_end_params = BatchEndParam(epoch=epoch,
                                                     nbatch=i,
                                                     eval_metric=eval_metric,
                                                     locals=locals())
                    if isinstance(eval_batch_end_callback, list):
                        for call in eval_batch_end_callback:
                            call(batch_end_params)
                    else:
                        eval_batch_end_callback(batch_end_params)
            name_value = eval_metric.get_name_value()
            for name, value in name_value:
                logger.info('Epoch[%d] Validation-%s=%f', epoch, name, value)
    # end of all epochs
    return
Ejemplo n.º 8
0
def run(mxIter):
    model_prefix = '/data2/obj_detect/imagenet_models/resnet/resnet-101'
    load_epoch = 0
    #model_prefix = './stage1_models/tiny_face-06440'
    #load_epoch = 42
    #model_prefix = './tiny_face-06440'
    #load_epoch = 140
    head = '%(asctime)-15s %(message)s'
    logging.basicConfig(level=logging.DEBUG, format=head)

    input_shapes = get_input_shapes(mxIter.batch_size)
    optimizer = 'sgd'
    optimizer_params = {
        'learning_rate': 0.0001,
        'momentum' : 0.90,
        'wd' : 0.0001}
    optimizer = opt.create(optimizer, rescale_grad=1.0 / mxIter.batch_size, **optimizer_params)
    updater = get_updater(optimizer)

    net = get_symbol_focal_loss()
    arg_params, aux_params = load_params_checkpoint(model_prefix, load_epoch)
    arg_names = net.list_arguments()
    param_names = [x for x in arg_names if x not in input_shapes]

    initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2)
    delete_params_by_shape(net, arg_params, aux_params, input_shapes, initializer)
    exec_ = net.simple_bind(ctx=mx.gpu(2), **input_shapes)
    copy_params(arg_params, aux_params, exec_)

    param_arrays = [[exec_.arg_arrays[i]] for i,name in enumerate(arg_names) if name in param_names]
    grad_arrays = [[exec_.grad_arrays[i]] for i,name in enumerate(arg_names) if name in param_names]

    #monitor = mx.monitor.Monitor(interval=1, pattern='.*backward.*')
    #monitor.install(exec_)

    batch_size = mxIter.batch_size
    for epoch in range(load_epoch+1, 200):
        num_batch = 0
        metric = 0
        num_inst = 0
        num_reg_inst = 0
        reg_metric = 0
        for batch in mxIter:
            load_data(batch, exec_)
            #monitor.tic()
            exec_.forward(is_train=True)
            outputs = [output.asnumpy() for output in exec_._get_outputs()]
            exec_.backward()
            #monitor.toc_print()
            _update_params(param_arrays, grad_arrays, updater, 1, param_names=param_names)
            num_batch += 1

            # metric
            metric += np.sum(outputs[0])
            reg_metric += np.sum(outputs[1])
            print 'batch -> {}'.format(num_batch)
            print 'focal_loss -> {}'.format(metric / num_batch)
            print 'l1_loss -> {}'.format(reg_metric / num_batch)

            if num_batch % 1000 == 0:
                save_arg_params = {}
                for param_name in param_names:
                    save_arg_params[param_name] = exec_.arg_dict[param_name]
                save_aux_params = exec_.aux_dict
                save_checkpoint('./tiny_face', num_batch, epoch, net, save_arg_params, save_aux_params)

        mxIter.reset()
        save_arg_params = {}
        for param_name in param_names:
            save_arg_params[param_name] = exec_.arg_dict[param_name]
        save_aux_params = exec_.aux_dict
        save_checkpoint('./tiny_face', num_batch, epoch, net, save_arg_params, save_aux_params)
Ejemplo n.º 9
0
    def fit(self,
            train_data,
            eval_data=None,
            eval_metric='acc',
            grad_req='write',
            logger=None,
            softmax_metric=None,
            regression_metric=None,
            epoch_end_callback=None):

        f = open("log_rpn.txt", 'w')
        if logger is None:
            logger = logging
        logging.info('Start training with %s', str(self.ctx))
        f.write('Start training with %s\n' % str(self.ctx))
        arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(
            data=(1, 3, 128, 128),
            mean_face=(10, 3),
            ground_truth=(10, 2),
            bbox_label=(10, 5))
        arg_names = self.symbol.list_arguments()
        if grad_req != 'null':
            self.grad_params = {}
            for name, shape in zip(arg_names, arg_shapes):
                if not (name.endswith('data') or name.endswith("mean_face")
                        or name.endswith('cls_label')
                        or name.endswith('proj_weight')
                        or name.endswith('proj_label')
                        or name.endswith('ground_truth')
                        or name.endswith('bbox_label')
                        or name.endswith("bbox_weight")):
                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
        else:
            self.grad_params = None

        aux_names = self.symbol.list_auxiliary_states()
        self.aux_params = {
            k: mx.nd.zeros(s, self.ctx)
            for k, s in zip(aux_names, aux_shapes)
        }

        data_name = train_data.data_name
        cls_label_name = train_data.cls_label_name
        proj_label_name = train_data.proj_label_name
        proj_weight_name = train_data.proj_weight_name
        ground_truth_name = train_data.ground_truth_name
        bbox_label_name = train_data.bbox_label_name
        bbox_weight_name = train_data.bbox_weight_name

        self.optimizer = opt.create(self.optimizer,
                                    rescale_grad=1.0,
                                    **(self.kwargs))
        self.updater = get_updater(self.optimizer)
        eval_metric = metric.create(eval_metric)

        for epoch in range(self.begin_epoch, self.num_epoch):
            if eval_data:
                logger.info(" in eval process...")
                f.write(" in eval process...")
                nbatch = 0
                softmax_proj = np.zeros((11, 3))
                proj_regression_loss = .0
                bbox_predict_loss = np.array([.0, .0])
                eval_data.reset()
                for data in eval_data:
                    nbatch += 1
                    print "Eval batch:", nbatch
                    softmax_shape = data[cls_label_name].shape
                    self.arg_params[data_name] = mx.nd.array(
                        data[data_name], self.ctx)
                    self.arg_params[cls_label_name] = mx.nd.array(
                        data[cls_label_name].reshape(
                            (softmax_shape[0],
                             softmax_shape[1] * softmax_shape[2])), self.ctx)
                    self.arg_params[proj_label_name] = mx.nd.array(
                        data[proj_label_name], self.ctx)
                    self.arg_params[proj_weight_name] = mx.nd.array(
                        data[proj_weight_name], self.ctx)
                    self.arg_params[ground_truth_name] = mx.nd.array(
                        data[ground_truth_name], self.ctx)
                    self.arg_params[bbox_label_name] = mx.nd.array(
                        data[bbox_label_name], self.ctx)
                    self.arg_params[bbox_weight_name] = mx.nd.array(
                        data[bbox_weight_name], self.ctx)
                    self.arg_params["mean_face"] = mx.nd.array(
                        train_data.mean_face, self.ctx)

                    executor = self.symbol.bind(self.ctx,
                                                self.arg_params,
                                                args_grad=self.grad_params,
                                                grad_req=grad_req,
                                                aux_states=self.aux_params)

                    softmax_output_array = mx.nd.zeros(
                        executor.outputs[0].shape)
                    proj_regression_output_array = mx.nd.zeros(
                        executor.outputs[1].shape)
                    bbox_predict_output_array = mx.nd.zeros(
                        executor.outputs[2].shape)
                    ell_label = mx.nd.zeros(executor.outputs[3].shape)
                    bbox_predict = mx.nd.zeros(executor.outputs[4].shape)
                    executor.forward(is_train=True)
                    executor.outputs[0].copyto(softmax_output_array)
                    executor.outputs[1].copyto(proj_regression_output_array)
                    executor.outputs[2].copyto(bbox_predict_output_array)
                    executor.outputs[3].copyto(ell_label)
                    executor.outputs[4].copyto(bbox_predict)

                    softmax_shape = softmax_output_array.shape
                    index_label = np.nonzero(data[cls_label_name].reshape(
                        softmax_shape[0], softmax_shape[2] *
                        softmax_shape[3]) - 255)
                    label = mx.nd.array(data[cls_label_name].reshape(
                        softmax_shape[0],
                        softmax_shape[2] * softmax_shape[3])[:,
                                                             index_label[1]])
                    pred = mx.nd.array((softmax_output_array.asnumpy().reshape(
                        softmax_shape[0], softmax_shape[1],
                        softmax_shape[2] * softmax_shape[3]))[...,
                                                              index_label[1]])
                    if softmax_metric:
                        tempt = softmax_metric(label, pred, 11)
                        softmax_proj += tempt

                    proj_label = data[proj_label_name]
                    proj_weight = data[proj_weight_name]
                    proj_pred = proj_regression_output_array.asnumpy().reshape(
                        data[proj_weight_name].shape)
                    index_nonzero = np.nonzero(data[proj_weight_name])
                    proj_regress_tmp = regression_metric(
                        proj_label[index_nonzero], proj_pred[index_nonzero],
                        proj_weight[index_nonzero])
                    proj_regression_loss += proj_regress_tmp

                    bbox_pred = bbox_predict_output_array.asnumpy()
                    bbox_predict_tmp = bbox_predict_metric(
                        ell_label.asnumpy(), bbox_pred)
                    bbox_predict_loss += bbox_predict_tmp

                    print "Validation-epoch[%d]-batch[%d]: acc:%f\tproj_regress:%f\tbbox_regress:%f\tbbox_angle:%f" % \
                          (epoch, nbatch, get_accuracy(tempt, self.bgfg), proj_regress_tmp,
                           bbox_predict_tmp[0], bbox_predict_tmp[1])
                    f.write(
                        "Validation-epoch[%d]-batch[%d]: acc:%f\tproj_regress:%f\tbbox_regress:%f\tbbox_angle:%f\n"
                        % (epoch, nbatch, get_accuracy(
                            tempt, self.bgfg), proj_regress_tmp,
                           bbox_predict_tmp[0], bbox_predict_tmp[1]))

                    img_info = eval_data.AllImg[nbatch - 1]
                    print "%s\twidth: %d height: %d num_face: %d" % \
                          (img_info.filename, img_info.width, img_info.height, img_info.num_faces)
                    f.write("%s\twidth: %d height: %d num_face: %d\n" %
                            (img_info.filename, img_info.width,
                             img_info.height, img_info.num_faces))

                    executor.outputs[0].wait_to_read()
                    executor.outputs[1].wait_to_read()
                    executor.outputs[2].wait_to_read()
                    executor.outputs[3].wait_to_read()

                print_accuracy(softmax_proj, f, train_data.class_names,
                               self.bgfg)
                logger.info("ALL Validation accuracy: %f",
                            get_accuracy(softmax_proj, self.bgfg))
                logger.info('Validation projection regression: %f',
                            proj_regression_loss / nbatch)
                logger.info('Validation bbox predict: %f %f',
                            bbox_predict_loss[0] / nbatch,
                            bbox_predict_loss[1] / nbatch)
                f.write("ALL Validation accuracy: %f\n" %
                        get_accuracy(softmax_proj, self.bgfg))
                f.write("Validation projection regression: %f\n" %
                        (proj_regression_loss / nbatch))
                f.write("Validation bbox predict: %f %f\n" %
                        (bbox_predict_loss[0] / nbatch,
                         bbox_predict_loss[1] / nbatch))

            nbatch = 0
            train_data.reset()
            eval_metric.reset()
            proj_regress_loss_t = .0
            proj_regress_loss_b = .0
            softmax_count = np.zeros((11, 3))
            softmax_batch = np.zeros((11, 3))
            bbox_predict_loss_t = np.array([.0, .0])
            bbox_predict_loss_b = np.array([.0, .0])
            for data in train_data:
                nbatch += 1
                softmax_shape = data[cls_label_name].shape
                self.arg_params[data_name] = mx.nd.array(
                    data[data_name], self.ctx)
                self.arg_params[cls_label_name] = mx.nd.array(
                    data[cls_label_name].reshape(
                        (softmax_shape[0],
                         softmax_shape[1] * softmax_shape[2])), self.ctx)
                self.arg_params[proj_label_name] = mx.nd.array(
                    data[proj_label_name], self.ctx)
                self.arg_params[proj_weight_name] = mx.nd.array(
                    data[proj_weight_name], self.ctx)
                self.arg_params[ground_truth_name] = mx.nd.array(
                    data[ground_truth_name], self.ctx)
                self.arg_params[bbox_label_name] = mx.nd.array(
                    data[bbox_label_name], self.ctx)
                self.arg_params[bbox_weight_name] = mx.nd.array(
                    data[bbox_weight_name], self.ctx)
                self.arg_params["mean_face"] = mx.nd.array(
                    train_data.mean_face, self.ctx)

                self.executor = self.symbol.bind(self.ctx,
                                                 self.arg_params,
                                                 args_grad=self.grad_params,
                                                 grad_req=grad_req,
                                                 aux_states=self.aux_params)
                assert len(self.symbol.list_arguments()) == len(
                    self.executor.grad_arrays)

                update_dict = {
                    name: nd
                    for name, nd in zip(self.symbol.list_arguments(),
                                        self.executor.grad_arrays) if nd
                }
                output_dict = {}
                output_buff = {}
                for key, arr in zip(self.symbol.list_outputs(),
                                    self.executor.outputs):
                    output_dict[key] = arr
                    output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
                self.executor.forward(is_train=True)
                for key in output_dict:
                    output_dict[key].copyto(output_buff[key])
                self.executor.backward()
                '''
                for i in xrange(0, 49):
                    if self.executor.grad_arrays[i] != None:
                        print i, arg_names[i], self.executor.grad_arrays[i].asnumpy()[0]
                '''

                for key, arr in update_dict.items():
                    if key != 'upsample_proposal_weight':
                        self.updater(key, arr, self.arg_params[key])
                        '''
                        if key == 'config_fc1_weight':
                            print 'config_fc1_weight'
                            print 'param:', self.arg_params[key].asnumpy()
                            print 'grad:', self.executor.grad_arrays[39].asnumpy()
                        if key == 'refine_proj_param_weight':
                            print 'refine_proj_param_weight'
                            print 'param:', self.arg_params[key].asnumpy()
                            print 'grad:', self.executor.grad_arrays[47].asnumpy()
                        '''

                pred_shape = self.executor.outputs[0].shape
                index_label = np.nonzero(data[cls_label_name].reshape(
                    softmax_shape[0], softmax_shape[1] * softmax_shape[2]) -
                                         255)
                label = mx.nd.array(data[cls_label_name].reshape(
                    softmax_shape[0],
                    softmax_shape[1] * softmax_shape[2])[:, index_label[1]])
                pred = mx.nd.array(
                    (output_buff["proposal_cls_loss_output"].asnumpy().reshape(
                        pred_shape[0], pred_shape[1],
                        pred_shape[2] * pred_shape[3]))[..., index_label[1]])
                if softmax_metric:
                    tempt = softmax_metric(label, pred, 11)
                    softmax_count += tempt
                    softmax_batch += tempt

                # for q in range(0, 50):
                #    print label.asnumpy()[0, q], ':', pred.asnumpy()[0, 0, q], pred.asnumpy()[0, 1, q]

                proj_label = data[proj_label_name]
                proj_weight = data[proj_weight_name]
                proj_pred = output_buff["proj_regression_loss_output"].asnumpy()\
                    .reshape(data[proj_weight_name].shape)
                index_nonzero = np.nonzero(data[proj_weight_name])
                proj_regress_tmp = regression_metric(
                    proj_label[index_nonzero], proj_pred[index_nonzero],
                    proj_weight[index_nonzero])
                proj_regress_loss_t += proj_regress_tmp
                proj_regress_loss_b += proj_regress_tmp

                ell_label = output_buff["ell_label_output"].asnumpy()
                bbox_pred = output_buff["ellipse_predict_loss_output"].asnumpy(
                )
                bbox_predict_tmp = bbox_predict_metric(ell_label, bbox_pred)
                bbox_predict_loss_t += bbox_predict_tmp
                bbox_predict_loss_b += bbox_predict_tmp

                self.executor.outputs[0].wait_to_read()
                self.executor.outputs[1].wait_to_read()
                self.executor.outputs[2].wait_to_read()
                self.executor.outputs[3].wait_to_read()

                print "Training-epoch[%d]-batch[%d]: acc:%f\tproj_regress:%f\tbbox_regress:%f\tbbox_angle:%f" % \
                      (epoch, nbatch, get_accuracy(tempt, self.bgfg), proj_regress_tmp,
                       bbox_predict_tmp[0], bbox_predict_tmp[1])
                f.write(
                    "Training-epoch[%d]-batch[%d]: acc:%f\tproj_regress:%f\tbbox_regress:%f\tbbox_angle:%f\n"
                    % (epoch, nbatch, get_accuracy(
                        tempt, self.bgfg), proj_regress_tmp,
                       bbox_predict_tmp[0], bbox_predict_tmp[1]))

                img_info = train_data.AllImg[nbatch - 1]
                print "%s\twidth: %d height: %d num_face: %d" % \
                      (img_info.filename, img_info.width, img_info.height, img_info.num_faces)
                f.write("%s\twidth: %d height: %d num_face: %d\n" % \
                        (img_info.filename, img_info.width, img_info.height, img_info.num_faces))

                if nbatch % 50 == 0:
                    print_accuracy(softmax_batch, f, train_data.class_names,
                                   self.bgfg)
                    softmax_batch = np.zeros((11, 3))
                    print "Keypoints projection regression smoothl1 loss:\t", proj_regress_loss_b / 50
                    f.write(
                        "Keypoints projection regression smoothl1 loss:\t%f\n"
                        % (proj_regress_loss_b / 50))
                    print "Bounding box regression:\t", bbox_predict_loss_b / 50
                    f.write("Bounding box regression: %f %f\n" %
                            (bbox_predict_loss_b[0] / 50,
                             bbox_predict_loss_b[1] / 50))
                    #print "Keypoints offset regression smoothl1 loss:\t", offset_regress_loss_b / 50
                    #f.write("Keypoints offset regression smoothl1 loss:\t%f\n" % (offset_regress_loss_b / 50))
                    #print "Keypoints visibility accuracy:\t", float(softmax_vis_batch[2]) / float(softmax_vis_batch[0])
                    #f.write("Keypoints visibility accuracy:\t%f\n" %
                    #        (float(softmax_vis_batch[2]) / float(softmax_vis_batch[0])))
                    softmax_vis_batch = np.zeros(3)
                    proj_regress_loss_b = .0
                    offset_regress_loss_b = .0
                    bbox_predict_loss_b = np.array([.0, .0])

                if nbatch % 1000 == 0:
                    if epoch_end_callback != None:
                        epoch_end_callback(epoch * 100000 + nbatch,
                                           self.symbol, self.arg_params,
                                           self.aux_params)

            name, value = eval_metric.get()
            print_accuracy(softmax_count, f, train_data.class_names, self.bgfg)
            logger.info("--->Epoch[%d] Train-cls-%s=%f", epoch, name, value)
            logger.info("--->Epoch[%d] Train-proj-reg-smoothl1=%f", epoch,
                        proj_regress_loss_t / nbatch)
            logger.info("--->Epoch[%d] Train-bbox-reg-smoothl1=%f, %f", epoch,
                        bbox_predict_loss_t[0] / nbatch,
                        bbox_predict_loss_t[1] / nbatch)
            #logger.info("--->Epoch[%d] Train-offset-reg-smoothl1=%f", epoch, offset_regress_loss_t / nbatch)
            #logger.info("--->Epoch[%d] Train-vis-acc=%f", epoch, float(softmax_vis_count[2]) / float(softmax_vis_count[0]))
            f.write("--->Epoch[%d] Train-cls-%s=%f\n" % (epoch, name, value))
            f.write("--->Epoch[%d] Train-proj-reg-smoothl1=%f\n" %
                    (epoch, proj_regress_loss_t / nbatch))
            f.write("--->Epoch[%d] Train-bbox-reg-smoothl1=%f, %f" %
                    (epoch, bbox_predict_loss_t[0] / nbatch,
                     bbox_predict_loss_t[1] / nbatch))
            #f.write("--->Epoch[%d] Train-offset-reg-smoothl1=%f\n" % (epoch, offset_regress_loss_t / nbatch))
            #f.write("--->Epoch[%d] Train-vis-acc=%f" % (epoch, float(softmax_vis_count[2]) / float(softmax_vis_count[0])))

        f.close()
Ejemplo n.º 10
0
    def fit(self,
            train_data,
            eval_data=None,
            eval_metric='acc',
            period=['train', 'val'],
            to_eval_train=True,
            grad_req='write',
            epoch_end_callback=None,
            batch_end_callback=None,
            kvstore='local',
            logger=None):

        if logger is None:
            logger = logging
        logging.info('Start training with %s', str(self.ctx))
        # region 1. 准备参数,包括输入数据和标签数据
        # FCN的参数名
        arg_names = self.symbol.list_arguments()
        # FCN的参数形状
        # print train_data.provide_data[0]
        arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(
            data=train_data.provide_data[0][1])
        # arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(data=(1, 3,
        #                                                                    train_data.resize_size[0],
        #                                                                    train_data.resize_size[1],
        #                                                                    ))
        # print train_data.provide_data[0][1]
        # quit()
        # 输入数据和标签数据
        data_name = train_data.provide_data[0][0]
        label_name = train_data.provide_label[0][0]
        # print data_name, label_name
        # input_names = [data_name, label_name]
        # batch_size, channel, h, w
        # data_shape = train_data.provide_data[0][1]
        self.arg_params[data_name] = mx.nd.empty(train_data.provide_data[0][1],
                                                 self.ctx)
        # # batch_size, h*w
        self.arg_params[label_name] = mx.nd.empty(
            train_data.provide_label[0][1], self.ctx)
        # quit()
        # 其他参数
        aux_names = self.symbol.list_auxiliary_states()
        self.aux_params = {
            k: mx.nd.zeros(s)
            for k, s in zip(aux_names, aux_shapes)
        }
        # endregion

        # region 2.准备参数的梯度
        if grad_req != 'null':
            self.grad_params = {}
            for name, shape in zip(arg_names, arg_shapes):
                if not (name.endswith('data') or name.endswith('label')):
                    # print name,shape
                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
        else:
            self.grad_params = None
        # endregion
        # print self.arg_params
        # region 3. 绑定模型参数 和 模型的输出
        self.executor = self.symbol.bind(self.ctx,
                                         self.arg_params,
                                         args_grad=self.grad_params,
                                         grad_req=grad_req,
                                         aux_states=self.aux_params)
        # quit()
        assert len(self.symbol.list_arguments()) == len(
            self.executor.grad_arrays)
        # 绑定输出变量
        output_dict = {}
        output_buff = {}
        for key, arr in zip(self.symbol.list_outputs(), self.executor.outputs):
            # print key, arr
            output_dict[key] = arr
            output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
        # endregion

        # region 4. 设置优化器
        self.optimizer = opt.create(self.optimizer,
                                    rescale_grad=1.0 / train_data.batch_size,
                                    **self.kwargs)
        self.updater = get_updater(self.optimizer)
        # 需要更新梯度的参数
        update_dict = {
            name: nd
            for name, nd in zip(self.symbol.list_arguments(),
                                self.executor.grad_arrays) if nd is not None
        }
        # endregion

        # region 5. 设置评价尺度
        if eval_metric == 'acc':
            eval_metric = metric.create(eval_metric)
        elif eval_metric == 'meanIOU':
            eval_metric = MeanIoU(c=1, )
        # endregion

        for epoch in range(self.begin_epoch, self.num_epoch):
            # region begin training
            if 'train' in period:
                logger.info(" in train process...")
                all_start = time.time()
                nbatch = 0
                train_data.reset()
                eval_metric.reset()
                for data in train_data:
                    nbatch += 1
                    # all_start = time.time()
                    # region 1. 准备 batch 数据
                    # start = time.time()
                    self.arg_params[data_name][:] = data.data[0]
                    # end = time.time()
                    # print end-start
                    # label_shape = data.label[0].shape
                    # print label_shape
                    self.arg_params[label_name][:] = data.label[0]
                    # end = time.time()
                    # print 'prepare data and label time: %s s' % (end - start)
                    # quit()
                    # print self.arg_params[label_name][:]
                    # endregion

                    # region 2. forward
                    # start = time.time()
                    self.executor.forward(is_train=True)
                    # end = time.time()
                    # print 'forward time: %s s' % (end - start)

                    # endregion

                    # region 3. backward
                    # start = time.time()
                    self.executor.backward()
                    for key, arr in update_dict.items():
                        if key != "bigscore_weight":
                            # 参数名,梯度, 权重
                            self.updater(key, arr, self.arg_params[key])
                            # self.executor.outputs[0].wait_to_read()
                    # end = time.time()
                    # print 'backward time: %f s' % (end - start)
                    # endregion

                    # region 4. 测评
                    # start = time.time()
                    if to_eval_train:
                        # start = time.time()
                        # 取得输出
                        for key in output_dict:
                            # print key
                            output_dict[key].copyto(output_buff[key])
                            # output_dict[key].wait_to_read()
                        # end = time.time()
                        # print 'output1 copy time: %s s' % (end - start)
                        # start = time.time()
                        pred_shape = output_buff['softmax_output'].shape
                        # print pred_shape, label_shape
                        # label = self.arg_params[label_name]
                        pred = output_buff['softmax_output'].reshape(
                            (pred_shape[0], pred_shape[1],
                             pred_shape[2] * pred_shape[3]))
                        # pred = pred.copyto(self.ctx)
                        # print pred.shape
                        label = data.label[0]
                        # quit()
                        # end = time.time()
                        # print 'output copy2 time: %s s' % (end - start)
                        # 更新评价
                        eval_metric.update([label], [pred])
                    batch_end_params = BatchEndParam(
                        epoch=epoch,
                        nbatch=nbatch,
                        eval_metric=eval_metric if to_eval_train else None,
                    )
                    batch_end_callback(batch_end_params)
                    # end = time.time()
                    # print '测评 time: %s s' % (end - start)
                    # endregion
                    # all_end = time.time()
                    # print 'all time: %s s' % (all_end - all_start)
                    # if nbatch > 1:
                    #     quit()
                if epoch_end_callback is not None:
                    epoch_end_callback(epoch, self.symbol, self.arg_params,
                                       self.aux_params)

                # all_end = time.time()
                # print 'all time1: %s s' % (all_end - all_start)
                if to_eval_train:
                    name, value = eval_metric.get()
                    logger.info(
                        "                     --->Epoch[%d] Train-%s=%f",
                        epoch, name, value)
                logger.info('train time per epoch: %f s' %
                            (time.time() - all_start))
            # endregion
            # evaluation
            if 'val' in period and eval_data:
                logger.info(" in eval process...")
                nbatch = 0
                eval_data.reset()
                eval_metric.reset()
                # all_start = time.time()
                for data in eval_data:
                    nbatch += 1
                    # label_shape = data.label.shape

                    self.arg_params[data_name][:] = data.data[0]
                    self.arg_params[label_name][:] = data.label[0]

                    self.executor.forward(is_train=False)
                    pred_shape = self.executor.outputs[0].shape

                    cpu_output_array = mx.nd.empty(pred_shape)
                    self.executor.outputs[0].copyto(cpu_output_array)

                    label = data.label[0]

                    pred = cpu_output_array.reshape(
                        (pred_shape[0], pred_shape[1],
                         pred_shape[2] * pred_shape[3]))

                    eval_metric.update([label], [pred])

                    batch_end_params = BatchEndParam(
                        epoch=epoch,
                        nbatch=nbatch,
                        eval_metric=None,
                    )
                    batch_end_callback(batch_end_params)

                    # if nbatch>200:
                    #     quit()
                    # quit()
                    # self.executor.outputs[0].wait_to_read()
                # all_end = time.time()
                # print 'all time1: %s s' % (all_end - all_start)
                # all_start = time.time()
                name, value = eval_metric.get()
                logger.info('Epoch[%d] Validation-%s=%f', epoch, name, value)
Ejemplo n.º 11
0
def _train_multi_device(symbol, ctx, arg_names, param_names, aux_names,
                        arg_params, aux_params,
                        begin_epoch, end_epoch, epoch_size, optimizer,
                        kvstore, update_on_kvstore,
                        train_data, eval_data=None, eval_metric=None,
                        epoch_end_callback=None, batch_end_callback=None,
                        logger=None, work_load_list=None, monitor=None,
                        eval_batch_end_callback=None, sym_gen=None):
    """Internal training function on multiple devices.
    This function will also work for single device as well.
    Parameters
    ----------
    symbol : Symbol
        The network configuration
    ctx : list of Context
        The training devices.
    arg_names: list of str
        Name of all arguments of the network.
    param_names: list of str
        Name of all trainable parameters of the network.
    aux_names: list of str
        Name of all auxiliary states of the network.
    arg_params : dict of str to NDArray
        Model parameter, dict of name to NDArray of net's weights.
    aux_params : dict of str to NDArray
        Model parameter, dict of name to NDArray of net's auxiliary states.
    begin_epoch : int
        The begining training epoch.
    end_epoch : int
        The end training epoch.
    epoch_size : int, optional
        Number of batches in a epoch. In default, it is set to
        ceil(num_train_examples / batch_size)
    optimizer : Optimizer
        The optimization algorithm
    train_data : DataIter
        Training data iterator.
    eval_data : DataIter
        Validation data iterator.
    eval_metric : EvalMetric
        An evaluation function or a list of evaluation functions.
    epoch_end_callback : callable(epoch, symbol, arg_params, aux_states)
        A callback that is invoked at end of each epoch.
        This can be used to checkpoint model each epoch.
    batch_end_callback : callable(BatchEndParams)
        A callback that is invoked at end of each batch.
        This can be used to measure speed, get result from evaluation metric. etc.
    kvstore : KVStore
        The KVStore
    update_on_kvstore : bool
        whether or not perform weight updating on kvstore
    logger : logging logger
        When not specified, default logger will be used.
    work_load_list : list of float or int, optional
        The list of work load for different devices,
        in the same order as ctx
    monitor : Monitor, optional
        Monitor installed to executor,
        for monitoring outputs, weights, and gradients for debugging.
    Notes
    -----
    - This function will inplace update the NDArrays in arg_params and aux_states.
    """
    
    if logger is None:
        logger = logging
    executor_manager = DataParallelExecutorManager(symbol=symbol,
                                                   sym_gen=sym_gen,
                                                   ctx=ctx,
                                                   train_data=train_data,
                                                   param_names=param_names,
                                                   arg_names=arg_names,
                                                   aux_names=aux_names,
                                                   work_load_list=work_load_list,
                                                   logger=logger)
    if monitor:
        executor_manager.install_monitor(monitor)

    executor_manager.set_params(arg_params, aux_params)

    if not update_on_kvstore:
        updater = get_updater(optimizer)

    if kvstore:
        _initialize_kvstore(kvstore=kvstore,
                            param_arrays=executor_manager.param_arrays,
                            arg_params=arg_params,
                            param_names=executor_manager.param_names,
                            update_on_kvstore=update_on_kvstore)

    if update_on_kvstore:
        logger.debug("Update on kvstore, setting optimizer")
        kvstore.set_optimizer(optimizer)

    # Now start training
    train_data.reset()
    for epoch in range(begin_epoch, end_epoch):
        # Training phase
        tic = time.time()
        eval_metric.reset()
        nbatch = 0
        # Iterate over training data.
        while True:
            do_reset = True
            for data_batch in train_data:
                executor_manager.load_data_batch(data_batch)
                if monitor is not None:
                    monitor.tic()

                executor_manager.forward(is_train=True)
                executor_manager.backward()
                if update_on_kvstore:

                    _update_params_on_kvstore(executor_manager.param_arrays,
                                              executor_manager.grad_arrays,
                                              kvstore)
                else:
                    _update_params(executor_manager.param_arrays,
                                   executor_manager.grad_arrays,
                                   updater=updater,
                                   num_device=len(ctx),
                                   kvstore=kvstore)

                if monitor is not None:
                    monitor.toc_print()

                # evaluate at end, so we can lazy copy
                executor_manager.update_metric(eval_metric, data_batch.label)

                nbatch += 1
                # batch callback (for print purpose)
                if batch_end_callback != None:
                    batch_end_params = BatchEndParam(epoch=epoch,
                                                     nbatch=nbatch,
                                                     eval_metric=eval_metric,
                                                     locals=locals())
                    if isinstance(batch_end_callback, list):
                        for call in batch_end_callback:
                            call(batch_end_params)
                    else:
                        batch_end_callback(batch_end_params)

                # this epoch is done possibly earlier
                if epoch_size is not None and nbatch >= epoch_size:
                    do_reset = False
                    break

            if do_reset == True:
                logger.info('Epoch[%d] Resetting Data Iterator', epoch)
                train_data.reset()

            # this epoch is done
            if epoch_size is None or nbatch >= epoch_size:
                break

        toc = time.time()
        logger.info('Epoch[%d] Time cost=%.3f', epoch, (toc - tic))

        if epoch_end_callback or epoch + 1 == end_epoch:
            executor_manager.copy_to(arg_params, aux_params)

        if epoch_end_callback != None:
            if isinstance(epoch_end_callback, list):
                for call in epoch_end_callback:
                    call(epoch, symbol, arg_params, aux_params)
            else:
                epoch_end_callback(epoch, symbol, arg_params, aux_params)

        # evaluation
        if eval_data:
            eval_metric.reset()
            eval_data.reset()
            for i, eval_batch in enumerate(eval_data):
                executor_manager.load_data_batch(eval_batch)
                executor_manager.forward(is_train=False)
                executor_manager.update_metric(eval_metric, eval_batch.label)
                if eval_batch_end_callback != None:
                    batch_end_params = BatchEndParam(epoch=epoch,
                                                     nbatch=i,
                                                     eval_metric=eval_metric,
                                                     locals=locals())
                    if isinstance(eval_batch_end_callback, list):
                        for call in eval_batch_end_callback:
                            call(batch_end_params)
                    else:
                        eval_batch_end_callback(batch_end_params)
            name_value = eval_metric.get_name_value()
            for name, value in name_value:
                logger.info('Epoch[%d] Validation-%s=%f', epoch, name, value)
    # end of all epochs
    return
def fddb_finetune_fold(fold_index):
    target_index = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10"]
    num_train_feature = 0
    num_valid_feature = 0
    for index in target_index:
        if index != fold_index:
            num_train_feature += num_feature_fold[index]
        else:
            num_valid_feature += num_feature_fold[index]

    train_feature = np.zeros((num_train_feature, feature_len), dtype=np.float)
    train_label = np.zeros((num_train_feature, label_len), dtype=np.float)
    train_weight = np.zeros((num_train_feature, label_len), dtype=np.float)
    train_feature_index = 0
    valid_feature = np.zeros((num_valid_feature, feature_len), dtype=np.float)
    valid_label = np.zeros((num_valid_feature, label_len), dtype=np.float)
    valid_weight = np.zeros((num_valid_feature, label_len), dtype=np.float)
    valid_feature_index = 0
    for index in target_index:
        for i in xrange(num_feature_fold[index]):
            if index != fold_index:
                train_feature[train_feature_index] = feature_fold[index][i]
                train_label[train_feature_index] = label_fold[index][i]
                train_weight[train_feature_index] = weight_fold[index][i]
                train_feature_index += 1
            else:
                valid_feature[valid_feature_index] = feature_fold[index][i]
                valid_label[valid_feature_index] = label_fold[index][i]
                valid_weight[valid_feature_index] = weight_fold[index][i]
                valid_feature_index += 1

    if retrain:
        symbol_finetune = fddb_symbol_finetune.get_vgg16_finetune()
        args = {}
        auxs = {}
        arg_names = symbol_finetune.list_arguments()
        aux_names = symbol_finetune.list_auxiliary_states()
        arg_shapes, _, aux_shapes = symbol_finetune.infer_shape(
            data=(batchsize, feature_len))
        for name, shape in zip(arg_names, arg_shapes):
            if len(shape) < 1:
                continue
            fan_in, fan_out = np.prod(shape[1:]), shape[0]
            factor = fan_in
            scale = np.sqrt(2.34 / factor)
            tempt = np.random.uniform(-scale, scale, size=shape)
            args[name] = mx.nd.array(tempt, ctx)

        for name, shape in zip(aux_names, aux_shapes):
            if len(shape) < 1:
                continue
            fan_in, fan_out = np.prod(shape[1:]), shape[0]
            factor = fan_in
            scale = np.sqrt(2.34 / factor)
            tempt = np.random.uniform(-scale, scale, size=shape)
            auxs[name] = mx.nd.array(tempt, ctx)
    else:
        symbol_finetune = fddb_symbol_finetune.get_vgg16_finetune()
        _, args, auxs = mx.model.load_checkpoint(rpn_prefix, load_epoch)
        for k, v in args.items():
            if v.context != ctx:
                args[k] = mx.nd.zeros(v.shape, ctx)
                v.copyto(args[k])
        for k, v in auxs.items():
            if v.context != ctx:
                auxs[k] = mx.nd.zeros(v.shape, ctx)
                v.copyto(auxs[k])
        arg_names = symbol_finetune.list_arguments()
        arg_shapes, _, aux_shapes = symbol_finetune.infer_shape(
            data=(batchsize, feature_len))

    grad_params = {}
    for name, shape in zip(arg_names, arg_shapes):
        if not (name.endswith('ell_label') or name.endswith('bbox_weight')
                or name.endswith('data')):
            grad_params[name] = mx.nd.zeros(shape, ctx)

    num_train_batch = num_train_feature / batchsize
    lr = 0.03
    lr_decay = 0.33
    epoch_end_callback = mx.callback.do_checkpoint(finetune_prefix + "-" +
                                                   fold_index)

    for j in range(start_epoch, end_epoch):
        bbox_predict_loss = np.array([.0, .0, .0])
        if j % 50 == 0 or j == start_epoch:
            lr *= lr_decay
            optimizer = opt.create('sgd',
                                   rescale_grad=1.0 / batchsize,
                                   learning_rate=lr,
                                   momentum=0.9,
                                   wd=0.00001)
            updater = get_updater(optimizer)
        for i in range(num_train_batch):
            feature_b = train_feature[i * batchsize:(i + 1) * batchsize, :]
            label_b = train_label[i * batchsize:(i + 1) * batchsize, :]
            weight_b = train_weight[i * batchsize:(i + 1) * batchsize, :]
            args["data"] = mx.nd.array(feature_b, ctx)
            args["ell_label"] = mx.nd.array(label_b, ctx)
            args["bbox_weight"] = mx.nd.array(weight_b, ctx)
            executor = symbol_finetune.bind(ctx,
                                            args,
                                            args_grad=grad_params,
                                            grad_req='write',
                                            aux_states=auxs)
            assert len(symbol_finetune.list_arguments()) == len(
                executor.grad_arrays)

            update_dict = {
                name: nd
                for name, nd in zip(symbol_finetune.list_arguments(),
                                    executor.grad_arrays) if nd
            }
            output_dict = {}
            output_buff = {}
            for key, arr in zip(symbol_finetune.list_outputs(),
                                executor.outputs):
                output_dict[key] = arr
                output_buff[key] = mx.nd.zeros(arr.shape, ctx=mx.cpu())
            executor.forward(is_train=True)

            for key in output_dict:
                output_dict[key].copyto(output_buff[key])

            executor.backward()
            for key, arr in update_dict.items():
                updater(key, arr, args[key])

            executor.outputs[0].wait_to_read()

            face_pred = output_buff["ellipse_predict_loss_output"].asnumpy()

            bbox_predict_b = bbox_predict_metric(label_b, face_pred, weight_b)
            bbox_predict_loss += bbox_predict_b

            if i % 10 == 0:
                print "Training-fold[" + \
                      fold_index + \
                      "]-epoch[%d/%d]-batch[%d/%d]: lr:%f\tbbox_regress:%f\tbbox_angle:%f\tiou_regress:%f" % \
                    (j, end_epoch, i, num_train_batch, lr, bbox_predict_b[0], bbox_predict_b[1], bbox_predict_b[2])

        print "ALL Training: bbox_regress:%f\tbbox_angle:%f\tiou_regress:%f" % \
              (bbox_predict_loss[0] / float(num_train_batch), bbox_predict_loss[1] / float(num_train_batch),
               bbox_predict_loss[2] / float(num_train_batch))

        if j % 25 == 0:
            print "Saving the model:", j
            epoch_end_callback(j, symbol_finetune, args, auxs)

        args["data"] = mx.nd.array(valid_feature, ctx)
        args["ell_label"] = mx.nd.array(valid_label, ctx)
        args["bbox_weight"] = mx.nd.array(
            np.ones((valid_feature.shape[0], label_len), dtype=np.float), ctx)

        executor = symbol_finetune.bind(ctx,
                                        args,
                                        args_grad=None,
                                        grad_req='null',
                                        aux_states=auxs)
        output_dict = {}
        output_buff = {}
        for key, arr in zip(symbol_finetune.list_outputs(), executor.outputs):
            output_dict[key] = arr
            output_buff[key] = mx.nd.zeros(arr.shape, ctx=mx.cpu())
        executor.forward(is_train=True)
        for key in output_dict:
            output_dict[key].copyto(output_buff[key])
        executor.outputs[0].wait_to_read()
        face_pred = output_buff["ellipse_predict_loss_output"].asnumpy()

        print valid_label[0]
        print face_pred[0]
        bbox_predict_b = bbox_predict_metric(valid_label, face_pred,
                                             valid_weight)

        print "ALL Validation: bbox_regress:%f\tbbox_angle:%f\tiou_regress:%f" % \
              (bbox_predict_b[0], bbox_predict_b[1], bbox_predict_b[2])
Ejemplo n.º 13
0
            grad_params[name] = mx.nd.zeros(shape, ctx)

    # prepare aux_params
    aux_names = network.list_auxiliary_states()
    aux_params = {
        k: mx.nd.zeros(s, ctx)
        for k, s in zip(aux_names, aux_shapes)
    }

    # prepare optimizer
    optimizer = opt.create('adam',
                           rescale_grad=(1.0 / dataiter.get_batch_size()),
                           **({
                               'learning_rate': 0.01
                           }))
    updater = get_updater(optimizer)

    # create eval_metrix
    eval_metric = metric.create('rmse')

    data_name = dataiter.data_name
    label_name = dataiter.label_name
    arg_params = network_args
    aux_params = network_auxs

    batch_callback = mx.callback.Speedometer(1, 10)
    epoch_callback = mx.callback.do_checkpoint(save_model_prefix)

    # begin training
    for epoch in range(10000):
        nbatch = 0