Example #1
0
def test_optimizer(optimizer, optimizer_params):
    # Weights
    index = 0
    weight = nd.zeros(shape=(8,))
    # Optimizer from registry
    optimizer = opt.create(optimizer, **optimizer_params)
    state = optimizer.create_state(index, weight)
    # Run a few updates
    for i in range(1, 13):
        grad = nd.random_normal(shape=(8,))
        if isinstance(optimizer, SockeyeOptimizer):
            batch_state = BatchState(metric_val=random())
            optimizer.pre_update_batch(batch_state)
        optimizer.update(index, weight, grad, state)
        # Checkpoint
        if i % 3 == 0:
            if isinstance(optimizer, SockeyeOptimizer):
                checkpoint_state = CheckpointState(checkpoint=(i % 3 + 1), metric_val=random())
                optimizer.pre_update_checkpoint(checkpoint_state)
Example #2
0
def test_optimizer(optimizer, optimizer_params):
    # Weights
    index = 0
    weight = nd.zeros(shape=(8,))
    # Optimizer from registry
    optimizer = opt.create(optimizer, **optimizer_params)
    state = optimizer.create_state(index, weight)
    # Run a few updates
    for i in range(1, 13):
        grad = nd.random_normal(shape=(8,))
        if isinstance(optimizer, SockeyeOptimizer):
            batch_state = BatchState(metric_val=random())
            optimizer.pre_update_batch(batch_state)
        optimizer.update(index, weight, grad, state)
        # Checkpoint
        if i % 3 == 0:
            if isinstance(optimizer, SockeyeOptimizer):
                checkpoint_state = CheckpointState(checkpoint=(i % 3 + 1), metric_val=random())
                optimizer.pre_update_checkpoint(checkpoint_state)
Example #3
0
    def network_backprop_setup(self, grad_req, arg_names, arg_shapes,
                               eval_metric):

        if grad_req != 'null':
            self.grad_params = {}
            for name, shape in zip(arg_names, arg_shapes):
                if not (name.endswith('data') or name.endswith("mean_face")
                        or name.endswith('cls_label')
                        or name.endswith('proj_weight')
                        or name.endswith('proj_label')
                        or name.endswith('ground_truth')
                        or name.endswith('ellipse_label')
                        or name.endswith("bbox_weight")):
                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)

        # setting the required optimizer
        self.optimizer = opt.create(self.optimizer,
                                    rescale_grad=1.0,
                                    **(self.kwargs))
        self.updater = get_updater(self.optimizer)
        eval_metric = metric.create(eval_metric)

        return eval_metric
Example #4
0
    def init_optimizer(self, kvstore='local', optimizer='sgd',
                       optimizer_params=(('learning_rate', 0.01),), force_init=False):
        """Install and initialize optimizers.

        Parameters
        ----------
        kvstore : str or KVStore
            Default `'local'`.
        optimizer : str or Optimizer
            Default `'sgd'`
        optimizer_params : dict
            Default `(('learning_rate', 0.01),)`. The default value is not a dictionary,
            just to avoid pylint warning of dangerous default values.
        force_init : bool
            Default `False`, indicating whether we should force re-initializing the
            optimizer in the case an optimizer is already installed.
        """
        assert self.binded and self.params_initialized

        if self.optimizer_initialized and not force_init:
            self.logger.warning('optimizer already initialized, ignoring...')
            return

        (kvstore, update_on_kvstore) = \
                _create_kvstore(kvstore, len(self._context), self._arg_params)

        batch_size = self._exec_group.batch_size
        if kvstore and 'dist' in kvstore.type and '_sync' in kvstore.type:
            batch_size *= kvstore.num_workers
        rescale_grad = 1.0/batch_size

        if isinstance(optimizer, str):
            idx2name = {}
            if update_on_kvstore:
                idx2name.update(enumerate(self._exec_group.param_names))
            else:
                for k in range(len(self._context)):
                    idx2name.update({i*len(self._context)+k: n
                                     for i, n in enumerate(self._exec_group.param_names)})
            optimizer_params = dict(optimizer_params)
            if 'rescale_grad' not in optimizer_params:
                optimizer_params['rescale_grad'] = rescale_grad
            optimizer = opt.create(optimizer,
                                   sym=self.symbol, param_idx2name=idx2name,
                                   **optimizer_params)
        else:
            assert isinstance(optimizer, opt.Optimizer)
            if optimizer.rescale_grad != rescale_grad:
                #pylint: disable=no-member
                warnings.warn(
                    "Optimizer created manually outside Module but rescale_grad " +
                    "is not normalized to 1.0/batch_size/num_workers (%s vs. %s). "%(
                        optimizer.rescale_grad, rescale_grad) +
                    "Is this intended?", stacklevel=2)

        self._optimizer = optimizer
        self._kvstore = kvstore
        self._update_on_kvstore = update_on_kvstore
        self._updater = None

        if kvstore:
            # copy initialized local parameters to kvstore
            _initialize_kvstore(kvstore=kvstore,
                                param_arrays=self._exec_group.param_arrays,
                                arg_params=self._arg_params,
                                param_names=self._param_names,
                                update_on_kvstore=update_on_kvstore)
        if update_on_kvstore:
            kvstore.set_optimizer(self._optimizer)
        else:
            self._updater = opt.get_updater(optimizer)

        self.optimizer_initialized = True

        if self._preload_opt_states is not None:
            self.load_optimizer_states(self._preload_opt_states)
            self._preload_opt_states = None
Example #5
0
 def fit(self, train_data, eval_data=None,
         eval_metric='acc',
         grad_req='write',
         epoch_end_callback=None,
         batch_end_callback=None,
         kvstore='local',
         logger=None):
     if logger is None:
         logger = logging
     logging.info('Start training with %s', str(self.ctx))
     arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(data=train_data.provide_data[0][1])
     arg_names = self.symbol.list_arguments()
     if grad_req != 'null':
         self.grad_params = {}
         for name, shape in zip(arg_names, arg_shapes):
             if not (name.endswith('data') or name.endswith('label')):
                 self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
     else:
         self.grad_params = None
     aux_names = self.symbol.list_auxiliary_states()
     self.aux_params = {k : nd.zeros(s) for k, s in zip(aux_names, aux_shapes)}
     data_name = train_data.data_name
     label_name = train_data.label_name
     input_names = [data_name, label_name]
     self.optimizer = opt.create(self.optimizer, rescale_grad=(1.0/train_data.get_batch_size()), **(self.kwargs))
     self.updater = get_updater(self.optimizer)
     eval_metric = metric.create(eval_metric)
     # begin training
     for epoch in range(self.begin_epoch, self.num_epoch):
         nbatch = 0
         train_data.reset()
         eval_metric.reset()
         for data in train_data:
             nbatch += 1
             label_shape = data[label_name].shape
             self.arg_params[data_name] = mx.nd.array(data[data_name], self.ctx)
             self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
                 label_shape[1]*label_shape[2]), self.ctx)
             output_names = self.symbol.list_outputs()
             self.exector = self.symbol.bind(self.ctx, self.arg_params,
                             args_grad=self.grad_params,
                             grad_req=grad_req,
                             aux_states=self.aux_params)
             assert len(self.symbol.list_arguments()) == len(self.exector.grad_arrays)
             update_dict = {name: nd for name, nd in zip(self.symbol.list_arguments(), \
                 self.exector.grad_arrays) if nd}
             output_dict = {}
             output_buff = {}
             for key, arr in zip(self.symbol.list_outputs(), self.exector.outputs):
                 output_dict[key] = arr
                 output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
             self.exector.forward(is_train=True)
             for key in output_dict:
                 output_dict[key].copyto(output_buff[key])
             self.exector.backward()
             for key, arr in update_dict.items():
                 if key != "bigscore_weight":
                     self.updater(key, arr, self.arg_params[key])
             pred_shape = self.exector.outputs[0].shape
             label = mx.nd.array(data[label_name].reshape(label_shape[0], label_shape[1]*label_shape[2]))
             pred = mx.nd.array(output_buff["softmax_output"].asnumpy().reshape(pred_shape[0], \
                 pred_shape[1], pred_shape[2]*pred_shape[3]))
             eval_metric.update([label], [pred])
             self.exector.outputs[0].wait_to_read()
             batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, eval_metric=eval_metric)
             batch_end_callback(batch_end_params)
         if epoch_end_callback is not None:
             epoch_end_callback(epoch, self.symbol, self.arg_params, self.aux_params)
         name, value = eval_metric.get()
         logger.info("                     --->Epoch[%d] Train-%s=%f", epoch, name, value)
         # evaluation
         if eval_data:
             logger.info(" in eval process...")
             nbatch = 0
             eval_data.reset()
             eval_metric.reset()
             for data in eval_data:
                 nbatch += 1
                 label_shape = data[label_name].shape
                 self.arg_params[data_name] = mx.nd.array(data[data_name], self.ctx)
                 self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
                     label_shape[1]*label_shape[2]), self.ctx)
                 exector = self.symbol.bind(self.ctx, self.arg_params,
                                 args_grad=self.grad_params,
                                 grad_req=grad_req,
                                 aux_states=self.aux_params)
                 cpu_output_array = mx.nd.zeros(exector.outputs[0].shape)
                 exector.forward(is_train=False)
                 exector.outputs[0].copyto(cpu_output_array)
                 pred_shape = cpu_output_array.shape
                 label = mx.nd.array(data[label_name].reshape(label_shape[0], \
                     label_shape[1]*label_shape[2]))
                 pred = mx.nd.array(cpu_output_array.asnumpy().reshape(pred_shape[0], \
                     pred_shape[1], pred_shape[2]*pred_shape[3]))
                 eval_metric.update([label], [pred])
                 exector.outputs[0].wait_to_read()
         name, value = eval_metric.get()
         logger.info('batch[%d] Validation-%s=%f', nbatch, name, value)
Example #6
0
    def fit(self, train_data, eval_data=None,
            eval_metric='acc',
            grad_req='write',
            epoch_end_callback=None,
            batch_end_callback=None,
            kvstore='local',
            logger=None):
        global outimgiter
        if logger is None:
            logger = logging
        logging.info('Start training with %s', str(self.ctx))
        logging.info(str(self.kwargs))
        batch_size = train_data.provide_data[0][1][0]
        arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape( \
            data=tuple(train_data.provide_data[0][1]), label_det=(batch_size,200,6))
        arg_names = self.symbol.list_arguments()
        out_names = self.symbol.list_outputs()
        aux_names = self.symbol.list_auxiliary_states()

        # pprint([(n,s) for n,s in zip(arg_names,arg_shapes)])
        # pprint([(n,s) for n,s in zip(out_names,out_shapes)])
        # pprint([(n,s) for n,s in zip(aux_names,aux_shapes)])
        
        if grad_req != 'null':
            self.grad_params = {}
            for name, shape in zip(arg_names, arg_shapes):
                if not (name.endswith('data') or name.endswith('label')):
                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
        else:
            self.grad_params = None
        self.aux_params = {k : mx.nd.zeros(s, self.ctx) for k, s in zip(aux_names, aux_shapes)}
        data_name = train_data.provide_data[0][0]
        label_name_det = train_data.provide_label[0][0]
        label_name_seg = train_data.provide_label[1][0]
        input_names = [data_name, label_name_det, label_name_seg]

        print(train_data.provide_label)
        print(os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"])

        self.optimizer = opt.create(self.optimizer, rescale_grad=(1.0/train_data.batch_size), **(self.kwargs))
        self.updater = get_updater(self.optimizer)
        eval_metric = CustomAccuracyMetric() # metric.create(eval_metric)
        multibox_metric = MultiBoxMetric()

        eval_metrics = metric.CompositeEvalMetric()
        eval_metrics.add(multibox_metric)
        # eval_metrics.add(eval_metric)
        
        # begin training
        for epoch in range(self.begin_epoch, self.num_epoch):
            nbatch = 0
            train_data.reset()
            eval_metrics.reset()
            logger.info('learning rate: '+str(self.optimizer.learning_rate))
            for data,_ in train_data:
                if self.evaluation_only:
                    break
                nbatch += 1
                label_shape_det = data.label[0].shape
                label_shape_seg = data.label[1].shape
                self.arg_params[data_name] = mx.nd.array(data.data[0], self.ctx)
                self.arg_params[label_name_det] = mx.nd.array(data.label[0], self.ctx)
                self.arg_params[label_name_seg] = mx.nd.array(data.label[1], self.ctx)
                output_names = self.symbol.list_outputs()

                ###################### analyze shapes ####################
                # pprint([(k,v.shape) for k,v in self.arg_params.items()])
                
                self.executor = self.symbol.bind(self.ctx, self.arg_params,
                    args_grad=self.grad_params, grad_req=grad_req, aux_states=self.aux_params)
                assert len(self.symbol.list_arguments()) == len(self.executor.grad_arrays)
                update_dict = {name: nd for name, nd in zip(self.symbol.list_arguments(), \
                    self.executor.grad_arrays) if nd is not None}
                output_dict = {}
                output_buff = {}
                for key, arr in zip(self.symbol.list_outputs(), self.executor.outputs):
                    output_dict[key] = arr
                    output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
                    # output_buff[key] = mx.nd.empty(arr.shape, ctx=self.ctx)

                def stat_helper(name, array):
                    """wrapper for executor callback"""
                    import ctypes
                    from mxnet.ndarray import NDArray
                    from mxnet.base import NDArrayHandle, py_str
                    array = ctypes.cast(array, NDArrayHandle)
                    if 0:
                        array = NDArray(array, writable=False).asnumpy()
                        print (name, array.shape, np.mean(array), np.std(array),
                               ('%.1fms' % (float(time.time()-stat_helper.start_time)*1000)))
                    else:
                        array = NDArray(array, writable=False)
                        array.wait_to_read()
                        elapsed = float(time.time()-stat_helper.start_time)*1000.
                        if elapsed>5:
                            print (name, array.shape, ('%.1fms' % (elapsed,)))
                    stat_helper.start_time=time.time()
                stat_helper.start_time=float(time.time())
                # self.executor.set_monitor_callback(stat_helper)

                tic = time.time()
                    
                self.executor.forward(is_train=True)
                for key in output_dict:
                    output_dict[key].copyto(output_buff[key])

                # exit(0) # for debugging forward pass only
                    
                self.executor.backward()
                for key, arr in update_dict.items():
                    if key != "bigscore_weight":
                        self.updater(key, arr, self.arg_params[key])

                for output in self.executor.outputs:
                    output.wait_to_read()
                if TIMING:
                    print("%.0fms" % ((time.time()-tic)*1000.,))
                        
                output_dict = dict(zip(output_names, self.executor.outputs))
                pred_det_shape = output_dict["det_out_output"].shape
                # pred_seg_shape = output_dict["seg_out_output"].shape
                label_det = mx.nd.array(data.label[0].reshape((label_shape_det[0],
                                                               label_shape_det[1]*label_shape_det[2])))
                # label_seg = mx.nd.array(data.label[1].reshape((label_shape_seg[0],
                #                                                label_shape_seg[1]*label_shape_seg[2])))
                pred_det = mx.nd.array(output_buff["det_out_output"].reshape((pred_det_shape[0],
                    pred_det_shape[1], pred_det_shape[2])))
                # pred_seg = mx.nd.array(output_buff["seg_out_output"].reshape((pred_seg_shape[0],
                #     pred_seg_shape[1], pred_seg_shape[2]*pred_seg_shape[3])))
                if DEBUG:
                    print(data.label[0].asnumpy()[0,:2,:])

                if TIMING:
                    print("%.0fms" % ((time.time()-tic)*1000.,))
                    
                eval_metrics.get_metric(0).update([mx.nd.zeros(output_buff["cls_prob_output"].shape),
                                        mx.nd.zeros(output_buff["loc_loss_output"].shape),label_det],
                                       [output_buff["cls_prob_output"], output_buff["loc_loss_output"],
                                        output_buff["cls_label_output"]])
                # eval_metrics.get_metric(1).update([label_seg.as_in_context(self.ctx)], [pred_seg.as_in_context(self.ctx)])

                self.executor.outputs[0].wait_to_read()

                ##################### display results ##############################
                # out_det = output_dict["det_out_output"].asnumpy()
                # for imgidx in range(out_det.shape[0]):
                #     img = np.squeeze(data.data[0].asnumpy()[imgidx,:,:,:])
                #     det = out_det[imgidx,:,:]
                #     gt = label_det.asnumpy()[imgidx,:].reshape((-1,6))
                #     display_results(img, det, gt, self.class_names)
                #     [exit(0) if (cv2.waitKey(1)&0xff)==27 else None]
                # outimgiter += 1

                batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, eval_metric=eval_metrics)
                batch_end_callback(batch_end_params)

                if TIMING:
                    print("%.0fms" % ((time.time()-tic)*1000.,))
                    
                # exit(0) # for debugging only
                
            ##### save snapshot
            if (not self.evaluation_only) and (epoch_end_callback is not None):
                epoch_end_callback(epoch, self.symbol, self.arg_params, self.aux_params)
                
            names, values = eval_metrics.get()
            for name, value in zip(names,values):
                logger.info("                     --->Epoch[%d] Train-%s=%f", epoch, name, value)
                
            # evaluation
            if eval_data:
                logger.info(" in eval process...")
                nbatch = 0
                depth_metric = DistanceAccuracyMetric(class_names=self.class_names)
                eval_data.reset()
                eval_metrics.reset()
                self.valid_metric.reset()
                depth_metric.reset()
                timing_results = []
                for data, fnames in eval_data:
                    nbatch += 1
                    label_shape_det = data.label[0].shape
                    # label_shape_seg = data.label[1].shape
                    self.arg_params[data_name] = mx.nd.array(data.data[0], self.ctx)
                    self.arg_params[label_name_det] = mx.nd.array(data.label[0], self.ctx)
                    # self.arg_params[label_name_seg] = mx.nd.array(data.label[1], self.ctx)
                    self.executor = self.symbol.bind(self.ctx, self.arg_params,
                        args_grad=self.grad_params, grad_req=grad_req, aux_states=self.aux_params)
                    
                    output_names = self.symbol.list_outputs()
                    output_dict = dict(zip(output_names, self.executor.outputs))

                    # cpu_output_array = mx.nd.zeros(output_dict["seg_out_output"].shape)

                    ############## monitor status
                    # def stat_helper(name, array):
                    #     """wrapper for executor callback"""
                    #     import ctypes
                    #     from mxnet.ndarray import NDArray
                    #     from mxnet.base import NDArrayHandle, py_str
                    #     array = ctypes.cast(array, NDArrayHandle)
                    #     if 1:
                    #         array = NDArray(array, writable=False).asnumpy()
                    #         print (name, array.shape, np.mean(array), np.std(array),
                    #                ('%.1fms' % (float(time.time()-stat_helper.start_time)*1000)))
                    #     else:
                    #         array = NDArray(array, writable=False)
                    #         array.wait_to_read()
                    #         elapsed = float(time.time()-stat_helper.start_time)*1000.
                    #         if elapsed>5:
                    #             print (name, array.shape, ('%.1fms' % (elapsed,)))
                    #     stat_helper.start_time=time.time()
                    # stat_helper.start_time=float(time.time())
                    # self.executor.set_monitor_callback(stat_helper)
                    
                    ############## forward
                    tic = time.time()
                    self.executor.forward(is_train=True)
                    # output_dict["seg_out_output"].wait_to_read()
                    timing_results.append((time.time()-tic)*1000.)
                    
                    # output_dict["seg_out_output"].copyto(cpu_output_array)
                    # pred_shape = output_dict["seg_out_output"].shape
                    # label = mx.nd.array(data.label[1].reshape((label_shape_seg[0], label_shape_seg[1]*label_shape_seg[2])))
                    # output_dict["seg_out_output"].wait_to_read()
                    # seg_out_output = output_dict["seg_out_output"].asnumpy()

                    pred_det_shape = output_dict["det_out_output"].shape
                    # pred_seg_shape = output_dict["seg_out_output"].shape
                    label_det = mx.nd.array(data.label[0].reshape((label_shape_det[0], label_shape_det[1]*label_shape_det[2])))
                    # label_seg = mx.nd.array(data.label[1].reshape((label_shape_seg[0], label_shape_seg[1]*label_shape_seg[2])),ctx=self.ctx)
                    pred_det = mx.nd.array(output_dict["det_out_output"].reshape((pred_det_shape[0], pred_det_shape[1], pred_det_shape[2])))
                    # pred_seg = mx.nd.array(output_dict["seg_out_output"].reshape((pred_seg_shape[0], pred_seg_shape[1], pred_seg_shape[2]*pred_seg_shape[3])),ctx=self.ctx)

                    #### remove invalid boxes
                    out_dets = output_dict["det_out_output"].asnumpy()
                    assert len(out_dets.shape)==3
                    pred_det = np.zeros((batch_size, 200, 7), np.float32)-1.
                    for idx, out_det in enumerate(out_dets):
                        assert len(out_det.shape)==2
                        out_det = np.expand_dims(out_det, axis=0)
                        indices = np.where(out_det[:,:,0]>=0) # labeled as negative
                        out_det = np.expand_dims(out_det[indices[0],indices[1],:],axis=0)
                        indices = np.where(out_det[:,:,1]>.25) # higher confidence
                        out_det = np.expand_dims(out_det[indices[0],indices[1],:],axis=0)
                        pred_det[idx, :out_det.shape[1], :] = out_det
                        del out_det
                    pred_det = mx.nd.array(pred_det)
                    
                    ##### display results
                    if False: # self.evaluation_only:
                        # out_img = output_dict["seg_out_output"]
                        # out_img = mx.nd.split(out_img, axis=0, num_outputs=out_img.shape[0], squeeze_axis=0)
                        # if not isinstance(out_img,list):
                        #     out_img = [out_img]
                        for imgidx in range(eval_data.batch_size):
                            img = np.squeeze(data.data[0].asnumpy()[imgidx,:,:,:])
                            det = pred_det.asnumpy()[imgidx,:,:]
                            ### ground-truth
                            gt = label_det.asnumpy()[imgidx,:].reshape((-1,6))
                            # display result
                            display_img = display_results(img, det, gt, self.class_names)
                            res_fname = fnames[imgidx].replace("SegmentationClass","Results").replace("labelIds","results")
                            if cv2.imwrite(res_fname, display_img):
                                print(res_fname,'saved.')
                            [exit(0) if (cv2.waitKey()&0xff)==27 else None]
                        outimgiter += 1

                    if self.evaluation_only:
                        continue

                    eval_metrics.get_metric(0).update(None,
                                           [output_dict["cls_prob_output"], output_dict["loc_loss_output"],
                                            output_dict["cls_label_output"]])
                    # eval_metrics.get_metric(1).update([label_seg], [pred_seg])
                    self.valid_metric.update([mx.nd.slice_axis(data.label[0],axis=2,begin=0,end=5)], \
                                             [mx.nd.slice_axis(pred_det,axis=2,begin=0,end=6)])
                    disparities = []
                    for imgidx in range(batch_size):
                        dispname = fnames[imgidx].replace("SegmentationClass","Disparity").replace("gtFine_labelTrainIds","disparity")
                        disparities.append(cv2.imread(dispname,-1))
                        assert disparities[0] is not None, dispname + " not found."
                    depth_metric.update(mx.nd.array(disparities),[pred_det])
                    
                    det_metric = self.valid_metric
                    det_names, det_values = det_metric.get()
                    depth_names, depth_values = depth_metric.get()
                    print("\r %d/%d speed=%.1fms %.1f%% %s=%.1f %s=%.1f" % \
                          (nbatch*eval_data.batch_size,eval_data.num_samples,
                           math.fsum(timing_results)/float(nbatch),
                           float(nbatch*eval_data.batch_size)*100./float(eval_data.num_samples),
                           det_names[-1],det_values[-1]*100.,
                           depth_names[-1],depth_values[-1]*100.,),end='\r')
                    
                names, values = eval_metrics.get()
                for name, value in zip(names,values):
                    logger.info(' epoch[%d] Validation-%s=%f', epoch, name, value)
                logger.info('----------------------------------------------')
                print(' & '.join(names))
                print(' & '.join(map(lambda v:'%.1f'%(v*100.,),values)))
                logger.info('----------------------------------------------')
                names, values = self.valid_metric.get()
                for name, value in zip(names,values):
                    logger.info(' epoch[%d] Validation-%s=%f', epoch, name, value)
                logger.info('----------------------------------------------')
                print(' & '.join(names))
                print(' & '.join(map(lambda v:'%.1f'%(v*100.,),values)))
                logger.info('----------------------------------------------')
                names, values = depth_metric.get()
                for name, value in zip(names,values):
                    logger.info(' epoch[%d] Validation-%s=%f', epoch, name, value)
                logger.info('----------------------------------------------')
                print(' & '.join(names))
                print(' & '.join(map(lambda v:'%.1f'%(v*100.,),values)))
                logger.info('----------------------------------------------')
                    
                if self.evaluation_only:
                    exit(0) ## for debugging only
Example #7
0
 def fit(self,
         train_data,
         eval_data=None,
         eval_metric='acc',
         grad_req='write',
         epoch_end_callback=None,
         batch_end_callback=None,
         kvstore='local',
         logger=None):
     if logger is None:
         logger = logging
     logging.info('Start training with %s', str(self.ctx))
     arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(
         data=train_data.provide_data[0][1])
     arg_names = self.symbol.list_arguments()
     if grad_req != 'null':
         self.grad_params = {}
         for name, shape in zip(arg_names, arg_shapes):
             if not (name.endswith('data') or name.endswith('label')):
                 self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
     else:
         self.grad_params = None
     aux_names = self.symbol.list_auxiliary_states()
     self.aux_params = {
         k: nd.zeros(s)
         for k, s in zip(aux_names, aux_shapes)
     }
     data_name = train_data.data_name
     label_name = train_data.label_name
     input_names = [data_name, label_name]
     self.optimizer = opt.create(self.optimizer,
                                 rescale_grad=(1.0 /
                                               train_data.get_batch_size()),
                                 **(self.kwargs))
     self.updater = get_updater(self.optimizer)
     eval_metric = metric.create(eval_metric)
     # begin training
     for epoch in range(self.begin_epoch, self.num_epoch):
         nbatch = 0
         train_data.reset()
         eval_metric.reset()
         for data in train_data:
             nbatch += 1
             label_shape = data[label_name].shape
             self.arg_params[data_name] = mx.nd.array(
                 data[data_name], self.ctx)
             self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
                 label_shape[1]*label_shape[2]), self.ctx)
             output_names = self.symbol.list_outputs()
             self.exector = self.symbol.bind(self.ctx,
                                             self.arg_params,
                                             args_grad=self.grad_params,
                                             grad_req=grad_req,
                                             aux_states=self.aux_params)
             assert len(self.symbol.list_arguments()) == len(
                 self.exector.grad_arrays)
             update_dict = {name: nd for name, nd in zip(self.symbol.list_arguments(), \
                 self.exector.grad_arrays) if nd is not None}
             output_dict = {}
             output_buff = {}
             for key, arr in zip(self.symbol.list_outputs(),
                                 self.exector.outputs):
                 output_dict[key] = arr
                 output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
             self.exector.forward(is_train=True)
             for key in output_dict:
                 output_dict[key].copyto(output_buff[key])
             self.exector.backward()
             for key, arr in update_dict.items():
                 if key != "bigscore_weight":
                     self.updater(key, arr, self.arg_params[key])
             pred_shape = self.exector.outputs[0].shape
             label = mx.nd.array(data[label_name].reshape(
                 label_shape[0], label_shape[1] * label_shape[2]))
             pred = mx.nd.array(output_buff["softmax_output"].asnumpy().reshape(pred_shape[0], \
                 pred_shape[1], pred_shape[2]*pred_shape[3]))
             eval_metric.update([label], [pred])
             self.exector.outputs[0].wait_to_read()
             batch_end_params = BatchEndParam(epoch=epoch,
                                              nbatch=nbatch,
                                              eval_metric=eval_metric)
             batch_end_callback(batch_end_params)
         if epoch_end_callback is not None:
             epoch_end_callback(epoch, self.symbol, self.arg_params,
                                self.aux_params)
         name, value = eval_metric.get()
         logger.info("                     --->Epoch[%d] Train-%s=%f",
                     epoch, name, value)
         # evaluation
         if eval_data:
             logger.info(" in eval process...")
             nbatch = 0
             eval_data.reset()
             eval_metric.reset()
             for data in eval_data:
                 nbatch += 1
                 label_shape = data[label_name].shape
                 self.arg_params[data_name] = mx.nd.array(
                     data[data_name], self.ctx)
                 self.arg_params[label_name] = mx.nd.array(data[label_name].reshape(label_shape[0], \
                     label_shape[1]*label_shape[2]), self.ctx)
                 exector = self.symbol.bind(self.ctx,
                                            self.arg_params,
                                            args_grad=self.grad_params,
                                            grad_req=grad_req,
                                            aux_states=self.aux_params)
                 cpu_output_array = mx.nd.zeros(exector.outputs[0].shape)
                 exector.forward(is_train=False)
                 exector.outputs[0].copyto(cpu_output_array)
                 pred_shape = cpu_output_array.shape
                 label = mx.nd.array(data[label_name].reshape(label_shape[0], \
                     label_shape[1]*label_shape[2]))
                 pred = mx.nd.array(cpu_output_array.asnumpy().reshape(pred_shape[0], \
                     pred_shape[1], pred_shape[2]*pred_shape[3]))
                 eval_metric.update([label], [pred])
                 exector.outputs[0].wait_to_read()
         name, value = eval_metric.get()
         logger.info('batch[%d] Validation-%s=%f', nbatch, name, value)
    def fit(self,
            X,
            marks,
            e_marks=None,
            y=None, eval_data=None, eval_metric='acc',
            epoch_end_callback=None, batch_end_callback=None, time_step_callback=None,
            kvstore='local', logger=None,
            work_load_list=None, monitor=None, eval_batch_end_callback=None):
        """Overwrite"""

        data = self._init_iter(X, y, is_train=True)
        eval_data = self._init_eval_iter(eval_data)

        if self.sym_gen:
            self.symbol = self.sym_gen(
                data.default_bucket_key)  # pylint: disable=no-member
            self._check_arguments()
        self.kwargs["sym"] = self.symbol

        param_dict = dict(data.provide_data + data.provide_label)
        arg_names, param_names, aux_names = self._init_params(param_dict)

        # setup metric
        if not isinstance(eval_metric, metric.EvalMetric):
            eval_metric = metric.create(eval_metric)

        # create kvstore
        (kvstore, update_on_kvstore) = _create_kvstore(
            kvstore, len(self.ctx), self.arg_params)

        param_idx2name = {}
        if update_on_kvstore:
            param_idx2name.update(enumerate(param_names))
        else:
            for i, n in enumerate(param_names):
                for k in range(len(self.ctx)):
                    param_idx2name[i * len(self.ctx) + k] = n
        self.kwargs["param_idx2name"] = param_idx2name

        # init optmizer
        if isinstance(self.optimizer, str):
            batch_size = data.batch_size
            if kvstore and kvstore.type == 'dist_sync':
                batch_size *= kvstore.num_workers
            optimizer = opt.create(self.optimizer,
                                   rescale_grad=(1.0 / batch_size),
                                   **(self.kwargs))
        elif isinstance(self.optimizer, opt.Optimizer):
            optimizer = self.optimizer

        # do training
        _train_rnn(self.symbol, self.ctx,
                   marks,
                   arg_names, param_names, aux_names,
                   self.arg_params, self.aux_params,
                   begin_epoch=self.begin_epoch, end_epoch=self.num_epoch,
                   epoch_size=self.epoch_size,
                   optimizer=optimizer,
                   train_data=data, eval_data=eval_data,
                   eval_metric=eval_metric,
                   epoch_end_callback=epoch_end_callback,
                   batch_end_callback=batch_end_callback,
                   time_step_callback=time_step_callback,
                   kvstore=kvstore, update_on_kvstore=update_on_kvstore,
                   logger=logger, work_load_list=work_load_list, monitor=monitor,
                   eval_batch_end_callback=eval_batch_end_callback,
                   sym_gen=self.sym_gen, e_marks=e_marks)
def fddb_finetune_fold(fold_index):
    target_index = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10"]
    num_train_feature = 0
    num_valid_feature = 0
    for index in target_index:
        if index != fold_index:
            num_train_feature += num_feature_fold[index]
        else:
            num_valid_feature += num_feature_fold[index]

    train_feature = np.zeros((num_train_feature, feature_len), dtype=np.float)
    train_label = np.zeros((num_train_feature, label_len), dtype=np.float)
    train_weight = np.zeros((num_train_feature, label_len), dtype=np.float)
    train_feature_index = 0
    valid_feature = np.zeros((num_valid_feature, feature_len), dtype=np.float)
    valid_label = np.zeros((num_valid_feature, label_len), dtype=np.float)
    valid_weight = np.zeros((num_valid_feature, label_len), dtype=np.float)
    valid_feature_index = 0
    for index in target_index:
        for i in xrange(num_feature_fold[index]):
            if index != fold_index:
                train_feature[train_feature_index] = feature_fold[index][i]
                train_label[train_feature_index] = label_fold[index][i]
                train_weight[train_feature_index] = weight_fold[index][i]
                train_feature_index += 1
            else:
                valid_feature[valid_feature_index] = feature_fold[index][i]
                valid_label[valid_feature_index] = label_fold[index][i]
                valid_weight[valid_feature_index] = weight_fold[index][i]
                valid_feature_index += 1

    if retrain:
        symbol_finetune = fddb_symbol_finetune.get_vgg16_finetune()
        args = {}
        auxs = {}
        arg_names = symbol_finetune.list_arguments()
        aux_names = symbol_finetune.list_auxiliary_states()
        arg_shapes, _, aux_shapes = symbol_finetune.infer_shape(
            data=(batchsize, feature_len))
        for name, shape in zip(arg_names, arg_shapes):
            if len(shape) < 1:
                continue
            fan_in, fan_out = np.prod(shape[1:]), shape[0]
            factor = fan_in
            scale = np.sqrt(2.34 / factor)
            tempt = np.random.uniform(-scale, scale, size=shape)
            args[name] = mx.nd.array(tempt, ctx)

        for name, shape in zip(aux_names, aux_shapes):
            if len(shape) < 1:
                continue
            fan_in, fan_out = np.prod(shape[1:]), shape[0]
            factor = fan_in
            scale = np.sqrt(2.34 / factor)
            tempt = np.random.uniform(-scale, scale, size=shape)
            auxs[name] = mx.nd.array(tempt, ctx)
    else:
        symbol_finetune = fddb_symbol_finetune.get_vgg16_finetune()
        _, args, auxs = mx.model.load_checkpoint(rpn_prefix, load_epoch)
        for k, v in args.items():
            if v.context != ctx:
                args[k] = mx.nd.zeros(v.shape, ctx)
                v.copyto(args[k])
        for k, v in auxs.items():
            if v.context != ctx:
                auxs[k] = mx.nd.zeros(v.shape, ctx)
                v.copyto(auxs[k])
        arg_names = symbol_finetune.list_arguments()
        arg_shapes, _, aux_shapes = symbol_finetune.infer_shape(
            data=(batchsize, feature_len))

    grad_params = {}
    for name, shape in zip(arg_names, arg_shapes):
        if not (name.endswith('ell_label') or name.endswith('bbox_weight')
                or name.endswith('data')):
            grad_params[name] = mx.nd.zeros(shape, ctx)

    num_train_batch = num_train_feature / batchsize
    lr = 0.03
    lr_decay = 0.33
    epoch_end_callback = mx.callback.do_checkpoint(finetune_prefix + "-" +
                                                   fold_index)

    for j in range(start_epoch, end_epoch):
        bbox_predict_loss = np.array([.0, .0, .0])
        if j % 50 == 0 or j == start_epoch:
            lr *= lr_decay
            optimizer = opt.create('sgd',
                                   rescale_grad=1.0 / batchsize,
                                   learning_rate=lr,
                                   momentum=0.9,
                                   wd=0.00001)
            updater = get_updater(optimizer)
        for i in range(num_train_batch):
            feature_b = train_feature[i * batchsize:(i + 1) * batchsize, :]
            label_b = train_label[i * batchsize:(i + 1) * batchsize, :]
            weight_b = train_weight[i * batchsize:(i + 1) * batchsize, :]
            args["data"] = mx.nd.array(feature_b, ctx)
            args["ell_label"] = mx.nd.array(label_b, ctx)
            args["bbox_weight"] = mx.nd.array(weight_b, ctx)
            executor = symbol_finetune.bind(ctx,
                                            args,
                                            args_grad=grad_params,
                                            grad_req='write',
                                            aux_states=auxs)
            assert len(symbol_finetune.list_arguments()) == len(
                executor.grad_arrays)

            update_dict = {
                name: nd
                for name, nd in zip(symbol_finetune.list_arguments(),
                                    executor.grad_arrays) if nd
            }
            output_dict = {}
            output_buff = {}
            for key, arr in zip(symbol_finetune.list_outputs(),
                                executor.outputs):
                output_dict[key] = arr
                output_buff[key] = mx.nd.zeros(arr.shape, ctx=mx.cpu())
            executor.forward(is_train=True)

            for key in output_dict:
                output_dict[key].copyto(output_buff[key])

            executor.backward()
            for key, arr in update_dict.items():
                updater(key, arr, args[key])

            executor.outputs[0].wait_to_read()

            face_pred = output_buff["ellipse_predict_loss_output"].asnumpy()

            bbox_predict_b = bbox_predict_metric(label_b, face_pred, weight_b)
            bbox_predict_loss += bbox_predict_b

            if i % 10 == 0:
                print "Training-fold[" + \
                      fold_index + \
                      "]-epoch[%d/%d]-batch[%d/%d]: lr:%f\tbbox_regress:%f\tbbox_angle:%f\tiou_regress:%f" % \
                    (j, end_epoch, i, num_train_batch, lr, bbox_predict_b[0], bbox_predict_b[1], bbox_predict_b[2])

        print "ALL Training: bbox_regress:%f\tbbox_angle:%f\tiou_regress:%f" % \
              (bbox_predict_loss[0] / float(num_train_batch), bbox_predict_loss[1] / float(num_train_batch),
               bbox_predict_loss[2] / float(num_train_batch))

        if j % 25 == 0:
            print "Saving the model:", j
            epoch_end_callback(j, symbol_finetune, args, auxs)

        args["data"] = mx.nd.array(valid_feature, ctx)
        args["ell_label"] = mx.nd.array(valid_label, ctx)
        args["bbox_weight"] = mx.nd.array(
            np.ones((valid_feature.shape[0], label_len), dtype=np.float), ctx)

        executor = symbol_finetune.bind(ctx,
                                        args,
                                        args_grad=None,
                                        grad_req='null',
                                        aux_states=auxs)
        output_dict = {}
        output_buff = {}
        for key, arr in zip(symbol_finetune.list_outputs(), executor.outputs):
            output_dict[key] = arr
            output_buff[key] = mx.nd.zeros(arr.shape, ctx=mx.cpu())
        executor.forward(is_train=True)
        for key in output_dict:
            output_dict[key].copyto(output_buff[key])
        executor.outputs[0].wait_to_read()
        face_pred = output_buff["ellipse_predict_loss_output"].asnumpy()

        print valid_label[0]
        print face_pred[0]
        bbox_predict_b = bbox_predict_metric(valid_label, face_pred,
                                             valid_weight)

        print "ALL Validation: bbox_regress:%f\tbbox_angle:%f\tiou_regress:%f" % \
              (bbox_predict_b[0], bbox_predict_b[1], bbox_predict_b[2])
Example #10
0
def run(mxIter):
    model_prefix = '/data2/obj_detect/imagenet_models/resnet/resnet-101'
    load_epoch = 0
    #model_prefix = './stage1_models/tiny_face-06440'
    #load_epoch = 42
    #model_prefix = './tiny_face-06440'
    #load_epoch = 140
    head = '%(asctime)-15s %(message)s'
    logging.basicConfig(level=logging.DEBUG, format=head)

    input_shapes = get_input_shapes(mxIter.batch_size)
    optimizer = 'sgd'
    optimizer_params = {
        'learning_rate': 0.0001,
        'momentum' : 0.90,
        'wd' : 0.0001}
    optimizer = opt.create(optimizer, rescale_grad=1.0 / mxIter.batch_size, **optimizer_params)
    updater = get_updater(optimizer)

    net = get_symbol_focal_loss()
    arg_params, aux_params = load_params_checkpoint(model_prefix, load_epoch)
    arg_names = net.list_arguments()
    param_names = [x for x in arg_names if x not in input_shapes]

    initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2)
    delete_params_by_shape(net, arg_params, aux_params, input_shapes, initializer)
    exec_ = net.simple_bind(ctx=mx.gpu(2), **input_shapes)
    copy_params(arg_params, aux_params, exec_)

    param_arrays = [[exec_.arg_arrays[i]] for i,name in enumerate(arg_names) if name in param_names]
    grad_arrays = [[exec_.grad_arrays[i]] for i,name in enumerate(arg_names) if name in param_names]

    #monitor = mx.monitor.Monitor(interval=1, pattern='.*backward.*')
    #monitor.install(exec_)

    batch_size = mxIter.batch_size
    for epoch in range(load_epoch+1, 200):
        num_batch = 0
        metric = 0
        num_inst = 0
        num_reg_inst = 0
        reg_metric = 0
        for batch in mxIter:
            load_data(batch, exec_)
            #monitor.tic()
            exec_.forward(is_train=True)
            outputs = [output.asnumpy() for output in exec_._get_outputs()]
            exec_.backward()
            #monitor.toc_print()
            _update_params(param_arrays, grad_arrays, updater, 1, param_names=param_names)
            num_batch += 1

            # metric
            metric += np.sum(outputs[0])
            reg_metric += np.sum(outputs[1])
            print 'batch -> {}'.format(num_batch)
            print 'focal_loss -> {}'.format(metric / num_batch)
            print 'l1_loss -> {}'.format(reg_metric / num_batch)

            if num_batch % 1000 == 0:
                save_arg_params = {}
                for param_name in param_names:
                    save_arg_params[param_name] = exec_.arg_dict[param_name]
                save_aux_params = exec_.aux_dict
                save_checkpoint('./tiny_face', num_batch, epoch, net, save_arg_params, save_aux_params)

        mxIter.reset()
        save_arg_params = {}
        for param_name in param_names:
            save_arg_params[param_name] = exec_.arg_dict[param_name]
        save_aux_params = exec_.aux_dict
        save_checkpoint('./tiny_face', num_batch, epoch, net, save_arg_params, save_aux_params)
    def fit(self,
            train_data,
            eval_data=None,
            eval_metric='acc',
            grad_req='write',
            logger=None,
            softmax_metric=None,
            regression_metric=None,
            epoch_end_callback=None):

        f = open("log_rpn.txt", 'w')
        if logger is None:
            logger = logging
        logging.info('Start training with %s', str(self.ctx))
        f.write('Start training with %s\n' % str(self.ctx))
        arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(
            data=(1, 3, 128, 128),
            mean_face=(10, 3),
            ground_truth=(10, 2),
            bbox_label=(10, 5))
        arg_names = self.symbol.list_arguments()
        if grad_req != 'null':
            self.grad_params = {}
            for name, shape in zip(arg_names, arg_shapes):
                if not (name.endswith('data') or name.endswith("mean_face")
                        or name.endswith('cls_label')
                        or name.endswith('proj_weight')
                        or name.endswith('proj_label')
                        or name.endswith('ground_truth')
                        or name.endswith('bbox_label')
                        or name.endswith("bbox_weight")):
                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
        else:
            self.grad_params = None

        aux_names = self.symbol.list_auxiliary_states()
        self.aux_params = {
            k: mx.nd.zeros(s, self.ctx)
            for k, s in zip(aux_names, aux_shapes)
        }

        data_name = train_data.data_name
        cls_label_name = train_data.cls_label_name
        proj_label_name = train_data.proj_label_name
        proj_weight_name = train_data.proj_weight_name
        ground_truth_name = train_data.ground_truth_name
        bbox_label_name = train_data.bbox_label_name
        bbox_weight_name = train_data.bbox_weight_name

        self.optimizer = opt.create(self.optimizer,
                                    rescale_grad=1.0,
                                    **(self.kwargs))
        self.updater = get_updater(self.optimizer)
        eval_metric = metric.create(eval_metric)

        for epoch in range(self.begin_epoch, self.num_epoch):
            if eval_data:
                logger.info(" in eval process...")
                f.write(" in eval process...")
                nbatch = 0
                softmax_proj = np.zeros((11, 3))
                proj_regression_loss = .0
                bbox_predict_loss = np.array([.0, .0])
                eval_data.reset()
                for data in eval_data:
                    nbatch += 1
                    print "Eval batch:", nbatch
                    softmax_shape = data[cls_label_name].shape
                    self.arg_params[data_name] = mx.nd.array(
                        data[data_name], self.ctx)
                    self.arg_params[cls_label_name] = mx.nd.array(
                        data[cls_label_name].reshape(
                            (softmax_shape[0],
                             softmax_shape[1] * softmax_shape[2])), self.ctx)
                    self.arg_params[proj_label_name] = mx.nd.array(
                        data[proj_label_name], self.ctx)
                    self.arg_params[proj_weight_name] = mx.nd.array(
                        data[proj_weight_name], self.ctx)
                    self.arg_params[ground_truth_name] = mx.nd.array(
                        data[ground_truth_name], self.ctx)
                    self.arg_params[bbox_label_name] = mx.nd.array(
                        data[bbox_label_name], self.ctx)
                    self.arg_params[bbox_weight_name] = mx.nd.array(
                        data[bbox_weight_name], self.ctx)
                    self.arg_params["mean_face"] = mx.nd.array(
                        train_data.mean_face, self.ctx)

                    executor = self.symbol.bind(self.ctx,
                                                self.arg_params,
                                                args_grad=self.grad_params,
                                                grad_req=grad_req,
                                                aux_states=self.aux_params)

                    softmax_output_array = mx.nd.zeros(
                        executor.outputs[0].shape)
                    proj_regression_output_array = mx.nd.zeros(
                        executor.outputs[1].shape)
                    bbox_predict_output_array = mx.nd.zeros(
                        executor.outputs[2].shape)
                    ell_label = mx.nd.zeros(executor.outputs[3].shape)
                    bbox_predict = mx.nd.zeros(executor.outputs[4].shape)
                    executor.forward(is_train=True)
                    executor.outputs[0].copyto(softmax_output_array)
                    executor.outputs[1].copyto(proj_regression_output_array)
                    executor.outputs[2].copyto(bbox_predict_output_array)
                    executor.outputs[3].copyto(ell_label)
                    executor.outputs[4].copyto(bbox_predict)

                    softmax_shape = softmax_output_array.shape
                    index_label = np.nonzero(data[cls_label_name].reshape(
                        softmax_shape[0], softmax_shape[2] *
                        softmax_shape[3]) - 255)
                    label = mx.nd.array(data[cls_label_name].reshape(
                        softmax_shape[0],
                        softmax_shape[2] * softmax_shape[3])[:,
                                                             index_label[1]])
                    pred = mx.nd.array((softmax_output_array.asnumpy().reshape(
                        softmax_shape[0], softmax_shape[1],
                        softmax_shape[2] * softmax_shape[3]))[...,
                                                              index_label[1]])
                    if softmax_metric:
                        tempt = softmax_metric(label, pred, 11)
                        softmax_proj += tempt

                    proj_label = data[proj_label_name]
                    proj_weight = data[proj_weight_name]
                    proj_pred = proj_regression_output_array.asnumpy().reshape(
                        data[proj_weight_name].shape)
                    index_nonzero = np.nonzero(data[proj_weight_name])
                    proj_regress_tmp = regression_metric(
                        proj_label[index_nonzero], proj_pred[index_nonzero],
                        proj_weight[index_nonzero])
                    proj_regression_loss += proj_regress_tmp

                    bbox_pred = bbox_predict_output_array.asnumpy()
                    bbox_predict_tmp = bbox_predict_metric(
                        ell_label.asnumpy(), bbox_pred)
                    bbox_predict_loss += bbox_predict_tmp

                    print "Validation-epoch[%d]-batch[%d]: acc:%f\tproj_regress:%f\tbbox_regress:%f\tbbox_angle:%f" % \
                          (epoch, nbatch, get_accuracy(tempt, self.bgfg), proj_regress_tmp,
                           bbox_predict_tmp[0], bbox_predict_tmp[1])
                    f.write(
                        "Validation-epoch[%d]-batch[%d]: acc:%f\tproj_regress:%f\tbbox_regress:%f\tbbox_angle:%f\n"
                        % (epoch, nbatch, get_accuracy(
                            tempt, self.bgfg), proj_regress_tmp,
                           bbox_predict_tmp[0], bbox_predict_tmp[1]))

                    img_info = eval_data.AllImg[nbatch - 1]
                    print "%s\twidth: %d height: %d num_face: %d" % \
                          (img_info.filename, img_info.width, img_info.height, img_info.num_faces)
                    f.write("%s\twidth: %d height: %d num_face: %d\n" %
                            (img_info.filename, img_info.width,
                             img_info.height, img_info.num_faces))

                    executor.outputs[0].wait_to_read()
                    executor.outputs[1].wait_to_read()
                    executor.outputs[2].wait_to_read()
                    executor.outputs[3].wait_to_read()

                print_accuracy(softmax_proj, f, train_data.class_names,
                               self.bgfg)
                logger.info("ALL Validation accuracy: %f",
                            get_accuracy(softmax_proj, self.bgfg))
                logger.info('Validation projection regression: %f',
                            proj_regression_loss / nbatch)
                logger.info('Validation bbox predict: %f %f',
                            bbox_predict_loss[0] / nbatch,
                            bbox_predict_loss[1] / nbatch)
                f.write("ALL Validation accuracy: %f\n" %
                        get_accuracy(softmax_proj, self.bgfg))
                f.write("Validation projection regression: %f\n" %
                        (proj_regression_loss / nbatch))
                f.write("Validation bbox predict: %f %f\n" %
                        (bbox_predict_loss[0] / nbatch,
                         bbox_predict_loss[1] / nbatch))

            nbatch = 0
            train_data.reset()
            eval_metric.reset()
            proj_regress_loss_t = .0
            proj_regress_loss_b = .0
            softmax_count = np.zeros((11, 3))
            softmax_batch = np.zeros((11, 3))
            bbox_predict_loss_t = np.array([.0, .0])
            bbox_predict_loss_b = np.array([.0, .0])
            for data in train_data:
                nbatch += 1
                softmax_shape = data[cls_label_name].shape
                self.arg_params[data_name] = mx.nd.array(
                    data[data_name], self.ctx)
                self.arg_params[cls_label_name] = mx.nd.array(
                    data[cls_label_name].reshape(
                        (softmax_shape[0],
                         softmax_shape[1] * softmax_shape[2])), self.ctx)
                self.arg_params[proj_label_name] = mx.nd.array(
                    data[proj_label_name], self.ctx)
                self.arg_params[proj_weight_name] = mx.nd.array(
                    data[proj_weight_name], self.ctx)
                self.arg_params[ground_truth_name] = mx.nd.array(
                    data[ground_truth_name], self.ctx)
                self.arg_params[bbox_label_name] = mx.nd.array(
                    data[bbox_label_name], self.ctx)
                self.arg_params[bbox_weight_name] = mx.nd.array(
                    data[bbox_weight_name], self.ctx)
                self.arg_params["mean_face"] = mx.nd.array(
                    train_data.mean_face, self.ctx)

                self.executor = self.symbol.bind(self.ctx,
                                                 self.arg_params,
                                                 args_grad=self.grad_params,
                                                 grad_req=grad_req,
                                                 aux_states=self.aux_params)
                assert len(self.symbol.list_arguments()) == len(
                    self.executor.grad_arrays)

                update_dict = {
                    name: nd
                    for name, nd in zip(self.symbol.list_arguments(),
                                        self.executor.grad_arrays) if nd
                }
                output_dict = {}
                output_buff = {}
                for key, arr in zip(self.symbol.list_outputs(),
                                    self.executor.outputs):
                    output_dict[key] = arr
                    output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
                self.executor.forward(is_train=True)
                for key in output_dict:
                    output_dict[key].copyto(output_buff[key])
                self.executor.backward()
                '''
                for i in xrange(0, 49):
                    if self.executor.grad_arrays[i] != None:
                        print i, arg_names[i], self.executor.grad_arrays[i].asnumpy()[0]
                '''

                for key, arr in update_dict.items():
                    if key != 'upsample_proposal_weight':
                        self.updater(key, arr, self.arg_params[key])
                        '''
                        if key == 'config_fc1_weight':
                            print 'config_fc1_weight'
                            print 'param:', self.arg_params[key].asnumpy()
                            print 'grad:', self.executor.grad_arrays[39].asnumpy()
                        if key == 'refine_proj_param_weight':
                            print 'refine_proj_param_weight'
                            print 'param:', self.arg_params[key].asnumpy()
                            print 'grad:', self.executor.grad_arrays[47].asnumpy()
                        '''

                pred_shape = self.executor.outputs[0].shape
                index_label = np.nonzero(data[cls_label_name].reshape(
                    softmax_shape[0], softmax_shape[1] * softmax_shape[2]) -
                                         255)
                label = mx.nd.array(data[cls_label_name].reshape(
                    softmax_shape[0],
                    softmax_shape[1] * softmax_shape[2])[:, index_label[1]])
                pred = mx.nd.array(
                    (output_buff["proposal_cls_loss_output"].asnumpy().reshape(
                        pred_shape[0], pred_shape[1],
                        pred_shape[2] * pred_shape[3]))[..., index_label[1]])
                if softmax_metric:
                    tempt = softmax_metric(label, pred, 11)
                    softmax_count += tempt
                    softmax_batch += tempt

                # for q in range(0, 50):
                #    print label.asnumpy()[0, q], ':', pred.asnumpy()[0, 0, q], pred.asnumpy()[0, 1, q]

                proj_label = data[proj_label_name]
                proj_weight = data[proj_weight_name]
                proj_pred = output_buff["proj_regression_loss_output"].asnumpy()\
                    .reshape(data[proj_weight_name].shape)
                index_nonzero = np.nonzero(data[proj_weight_name])
                proj_regress_tmp = regression_metric(
                    proj_label[index_nonzero], proj_pred[index_nonzero],
                    proj_weight[index_nonzero])
                proj_regress_loss_t += proj_regress_tmp
                proj_regress_loss_b += proj_regress_tmp

                ell_label = output_buff["ell_label_output"].asnumpy()
                bbox_pred = output_buff["ellipse_predict_loss_output"].asnumpy(
                )
                bbox_predict_tmp = bbox_predict_metric(ell_label, bbox_pred)
                bbox_predict_loss_t += bbox_predict_tmp
                bbox_predict_loss_b += bbox_predict_tmp

                self.executor.outputs[0].wait_to_read()
                self.executor.outputs[1].wait_to_read()
                self.executor.outputs[2].wait_to_read()
                self.executor.outputs[3].wait_to_read()

                print "Training-epoch[%d]-batch[%d]: acc:%f\tproj_regress:%f\tbbox_regress:%f\tbbox_angle:%f" % \
                      (epoch, nbatch, get_accuracy(tempt, self.bgfg), proj_regress_tmp,
                       bbox_predict_tmp[0], bbox_predict_tmp[1])
                f.write(
                    "Training-epoch[%d]-batch[%d]: acc:%f\tproj_regress:%f\tbbox_regress:%f\tbbox_angle:%f\n"
                    % (epoch, nbatch, get_accuracy(
                        tempt, self.bgfg), proj_regress_tmp,
                       bbox_predict_tmp[0], bbox_predict_tmp[1]))

                img_info = train_data.AllImg[nbatch - 1]
                print "%s\twidth: %d height: %d num_face: %d" % \
                      (img_info.filename, img_info.width, img_info.height, img_info.num_faces)
                f.write("%s\twidth: %d height: %d num_face: %d\n" % \
                        (img_info.filename, img_info.width, img_info.height, img_info.num_faces))

                if nbatch % 50 == 0:
                    print_accuracy(softmax_batch, f, train_data.class_names,
                                   self.bgfg)
                    softmax_batch = np.zeros((11, 3))
                    print "Keypoints projection regression smoothl1 loss:\t", proj_regress_loss_b / 50
                    f.write(
                        "Keypoints projection regression smoothl1 loss:\t%f\n"
                        % (proj_regress_loss_b / 50))
                    print "Bounding box regression:\t", bbox_predict_loss_b / 50
                    f.write("Bounding box regression: %f %f\n" %
                            (bbox_predict_loss_b[0] / 50,
                             bbox_predict_loss_b[1] / 50))
                    #print "Keypoints offset regression smoothl1 loss:\t", offset_regress_loss_b / 50
                    #f.write("Keypoints offset regression smoothl1 loss:\t%f\n" % (offset_regress_loss_b / 50))
                    #print "Keypoints visibility accuracy:\t", float(softmax_vis_batch[2]) / float(softmax_vis_batch[0])
                    #f.write("Keypoints visibility accuracy:\t%f\n" %
                    #        (float(softmax_vis_batch[2]) / float(softmax_vis_batch[0])))
                    softmax_vis_batch = np.zeros(3)
                    proj_regress_loss_b = .0
                    offset_regress_loss_b = .0
                    bbox_predict_loss_b = np.array([.0, .0])

                if nbatch % 1000 == 0:
                    if epoch_end_callback != None:
                        epoch_end_callback(epoch * 100000 + nbatch,
                                           self.symbol, self.arg_params,
                                           self.aux_params)

            name, value = eval_metric.get()
            print_accuracy(softmax_count, f, train_data.class_names, self.bgfg)
            logger.info("--->Epoch[%d] Train-cls-%s=%f", epoch, name, value)
            logger.info("--->Epoch[%d] Train-proj-reg-smoothl1=%f", epoch,
                        proj_regress_loss_t / nbatch)
            logger.info("--->Epoch[%d] Train-bbox-reg-smoothl1=%f, %f", epoch,
                        bbox_predict_loss_t[0] / nbatch,
                        bbox_predict_loss_t[1] / nbatch)
            #logger.info("--->Epoch[%d] Train-offset-reg-smoothl1=%f", epoch, offset_regress_loss_t / nbatch)
            #logger.info("--->Epoch[%d] Train-vis-acc=%f", epoch, float(softmax_vis_count[2]) / float(softmax_vis_count[0]))
            f.write("--->Epoch[%d] Train-cls-%s=%f\n" % (epoch, name, value))
            f.write("--->Epoch[%d] Train-proj-reg-smoothl1=%f\n" %
                    (epoch, proj_regress_loss_t / nbatch))
            f.write("--->Epoch[%d] Train-bbox-reg-smoothl1=%f, %f" %
                    (epoch, bbox_predict_loss_t[0] / nbatch,
                     bbox_predict_loss_t[1] / nbatch))
            #f.write("--->Epoch[%d] Train-offset-reg-smoothl1=%f\n" % (epoch, offset_regress_loss_t / nbatch))
            #f.write("--->Epoch[%d] Train-vis-acc=%f" % (epoch, float(softmax_vis_count[2]) / float(softmax_vis_count[0])))

        f.close()
Example #12
0
    def fit(self,
            train_data,
            eval_data=None,
            eval_metric='acc',
            period=['train', 'val'],
            to_eval_train=True,
            grad_req='write',
            epoch_end_callback=None,
            batch_end_callback=None,
            kvstore='local',
            logger=None):

        if logger is None:
            logger = logging
        logging.info('Start training with %s', str(self.ctx))
        # region 1. 准备参数,包括输入数据和标签数据
        # FCN的参数名
        arg_names = self.symbol.list_arguments()
        # FCN的参数形状
        # print train_data.provide_data[0]
        arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(
            data=train_data.provide_data[0][1])
        # arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(data=(1, 3,
        #                                                                    train_data.resize_size[0],
        #                                                                    train_data.resize_size[1],
        #                                                                    ))
        # print train_data.provide_data[0][1]
        # quit()
        # 输入数据和标签数据
        data_name = train_data.provide_data[0][0]
        label_name = train_data.provide_label[0][0]
        # print data_name, label_name
        # input_names = [data_name, label_name]
        # batch_size, channel, h, w
        # data_shape = train_data.provide_data[0][1]
        self.arg_params[data_name] = mx.nd.empty(train_data.provide_data[0][1],
                                                 self.ctx)
        # # batch_size, h*w
        self.arg_params[label_name] = mx.nd.empty(
            train_data.provide_label[0][1], self.ctx)
        # quit()
        # 其他参数
        aux_names = self.symbol.list_auxiliary_states()
        self.aux_params = {
            k: mx.nd.zeros(s)
            for k, s in zip(aux_names, aux_shapes)
        }
        # endregion

        # region 2.准备参数的梯度
        if grad_req != 'null':
            self.grad_params = {}
            for name, shape in zip(arg_names, arg_shapes):
                if not (name.endswith('data') or name.endswith('label')):
                    # print name,shape
                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
        else:
            self.grad_params = None
        # endregion
        # print self.arg_params
        # region 3. 绑定模型参数 和 模型的输出
        self.executor = self.symbol.bind(self.ctx,
                                         self.arg_params,
                                         args_grad=self.grad_params,
                                         grad_req=grad_req,
                                         aux_states=self.aux_params)
        # quit()
        assert len(self.symbol.list_arguments()) == len(
            self.executor.grad_arrays)
        # 绑定输出变量
        output_dict = {}
        output_buff = {}
        for key, arr in zip(self.symbol.list_outputs(), self.executor.outputs):
            # print key, arr
            output_dict[key] = arr
            output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu())
        # endregion

        # region 4. 设置优化器
        self.optimizer = opt.create(self.optimizer,
                                    rescale_grad=1.0 / train_data.batch_size,
                                    **self.kwargs)
        self.updater = get_updater(self.optimizer)
        # 需要更新梯度的参数
        update_dict = {
            name: nd
            for name, nd in zip(self.symbol.list_arguments(),
                                self.executor.grad_arrays) if nd is not None
        }
        # endregion

        # region 5. 设置评价尺度
        if eval_metric == 'acc':
            eval_metric = metric.create(eval_metric)
        elif eval_metric == 'meanIOU':
            eval_metric = MeanIoU(c=1, )
        # endregion

        for epoch in range(self.begin_epoch, self.num_epoch):
            # region begin training
            if 'train' in period:
                logger.info(" in train process...")
                all_start = time.time()
                nbatch = 0
                train_data.reset()
                eval_metric.reset()
                for data in train_data:
                    nbatch += 1
                    # all_start = time.time()
                    # region 1. 准备 batch 数据
                    # start = time.time()
                    self.arg_params[data_name][:] = data.data[0]
                    # end = time.time()
                    # print end-start
                    # label_shape = data.label[0].shape
                    # print label_shape
                    self.arg_params[label_name][:] = data.label[0]
                    # end = time.time()
                    # print 'prepare data and label time: %s s' % (end - start)
                    # quit()
                    # print self.arg_params[label_name][:]
                    # endregion

                    # region 2. forward
                    # start = time.time()
                    self.executor.forward(is_train=True)
                    # end = time.time()
                    # print 'forward time: %s s' % (end - start)

                    # endregion

                    # region 3. backward
                    # start = time.time()
                    self.executor.backward()
                    for key, arr in update_dict.items():
                        if key != "bigscore_weight":
                            # 参数名,梯度, 权重
                            self.updater(key, arr, self.arg_params[key])
                            # self.executor.outputs[0].wait_to_read()
                    # end = time.time()
                    # print 'backward time: %f s' % (end - start)
                    # endregion

                    # region 4. 测评
                    # start = time.time()
                    if to_eval_train:
                        # start = time.time()
                        # 取得输出
                        for key in output_dict:
                            # print key
                            output_dict[key].copyto(output_buff[key])
                            # output_dict[key].wait_to_read()
                        # end = time.time()
                        # print 'output1 copy time: %s s' % (end - start)
                        # start = time.time()
                        pred_shape = output_buff['softmax_output'].shape
                        # print pred_shape, label_shape
                        # label = self.arg_params[label_name]
                        pred = output_buff['softmax_output'].reshape(
                            (pred_shape[0], pred_shape[1],
                             pred_shape[2] * pred_shape[3]))
                        # pred = pred.copyto(self.ctx)
                        # print pred.shape
                        label = data.label[0]
                        # quit()
                        # end = time.time()
                        # print 'output copy2 time: %s s' % (end - start)
                        # 更新评价
                        eval_metric.update([label], [pred])
                    batch_end_params = BatchEndParam(
                        epoch=epoch,
                        nbatch=nbatch,
                        eval_metric=eval_metric if to_eval_train else None,
                    )
                    batch_end_callback(batch_end_params)
                    # end = time.time()
                    # print '测评 time: %s s' % (end - start)
                    # endregion
                    # all_end = time.time()
                    # print 'all time: %s s' % (all_end - all_start)
                    # if nbatch > 1:
                    #     quit()
                if epoch_end_callback is not None:
                    epoch_end_callback(epoch, self.symbol, self.arg_params,
                                       self.aux_params)

                # all_end = time.time()
                # print 'all time1: %s s' % (all_end - all_start)
                if to_eval_train:
                    name, value = eval_metric.get()
                    logger.info(
                        "                     --->Epoch[%d] Train-%s=%f",
                        epoch, name, value)
                logger.info('train time per epoch: %f s' %
                            (time.time() - all_start))
            # endregion
            # evaluation
            if 'val' in period and eval_data:
                logger.info(" in eval process...")
                nbatch = 0
                eval_data.reset()
                eval_metric.reset()
                # all_start = time.time()
                for data in eval_data:
                    nbatch += 1
                    # label_shape = data.label.shape

                    self.arg_params[data_name][:] = data.data[0]
                    self.arg_params[label_name][:] = data.label[0]

                    self.executor.forward(is_train=False)
                    pred_shape = self.executor.outputs[0].shape

                    cpu_output_array = mx.nd.empty(pred_shape)
                    self.executor.outputs[0].copyto(cpu_output_array)

                    label = data.label[0]

                    pred = cpu_output_array.reshape(
                        (pred_shape[0], pred_shape[1],
                         pred_shape[2] * pred_shape[3]))

                    eval_metric.update([label], [pred])

                    batch_end_params = BatchEndParam(
                        epoch=epoch,
                        nbatch=nbatch,
                        eval_metric=None,
                    )
                    batch_end_callback(batch_end_params)

                    # if nbatch>200:
                    #     quit()
                    # quit()
                    # self.executor.outputs[0].wait_to_read()
                # all_end = time.time()
                # print 'all time1: %s s' % (all_end - all_start)
                # all_start = time.time()
                name, value = eval_metric.get()
                logger.info('Epoch[%d] Validation-%s=%f', epoch, name, value)
Example #13
0
    def fit(self, X, y=None, eval_data=None, eval_metric='acc',
            epoch_end_callback=None, batch_end_callback=None, kvstore='local', logger=None,
            work_load_list=None, monitor=None, eval_batch_end_callback=None):
        """Fit the model.

        Parameters
        ----------
        X : DataIter, or numpy.ndarray/NDArray
            Training data. If X is an DataIter, the name or, if not available,
            position, of its outputs should match the corresponding variable
            names defined in the symbolic graph.
        y : numpy.ndarray/NDArray, optional
            Training set label.
            If X is numpy.ndarray/NDArray, y is required to be set.
            While y can be 1D or 2D (with 2nd dimension as 1), its 1st dimension must be
            the same as X, i.e. the number of data points and labels should be equal.
        eval_data : DataIter or numpy.ndarray/list/NDArray pair
            If eval_data is numpy.ndarray/list/NDArray pair,
            it should be (valid_data, valid_label).
        eval_metric : metric.EvalMetric or str or callable
            The evaluation metric, name of evaluation metric.
            Or a customize evaluation function that returns the statistics
            based on minibatch.
        epoch_end_callback : callable(epoch, symbol, arg_params, aux_states)
            A callback that is invoked at end of each epoch.
            This can be used to checkpoint model each epoch.
        batch_end_callback: callable(epoch)
            A callback that is invoked at end of each batch
            For print purpose
        kvstore: KVStore or str, optional
           The KVStore or a string kvstore type: 'local', 'dist_sync', 'dist_async'
           In default uses 'local', often no need to change for single machiine.
        logger : logging logger, optional
            When not specified, default logger will be used.
        work_load_list : float or int, optional
            The list of work load for different devices,
            in the same order as ctx

        Note
        ----
        KVStore behavior
        - 'local', multi-devices on a single machine, will automatically choose best type.
        - 'dist_sync', multi-machines with BSP
        - 'dist_async', multi-machines with partical asynchronous
        """

        data = self._init_iter(X, y, is_train=True)
        eval_data = self._init_eval_iter(eval_data)

        if self.sym_gen:
            self.symbol = self.sym_gen(data.default_bucket_key) # pylint: disable=no-member
            self._check_arguments()
        self.kwargs["sym"] = self.symbol

        arg_names, param_names, aux_names = \
                self._init_params(dict(data.provide_data+data.provide_label))
        param_idx2name = {}
        for i, n in enumerate(param_names):
            for k in range(len(self.ctx)):
                param_idx2name[i*len(self.ctx)+k] = n
        self.kwargs["param_idx2name"] = param_idx2name

        # setup metric
        if not isinstance(eval_metric, metric.EvalMetric):
            eval_metric = metric.create(eval_metric)

        # create kvstore
        (kvstore, update_on_kvstore) = _create_kvstore(
            kvstore, len(self.ctx), self.arg_params)

        # init optmizer
        if isinstance(self.optimizer, str):
            batch_size = data.batch_size
            if kvstore and kvstore.type == 'dist_sync':
                batch_size *= kvstore.num_workers
            optimizer = opt.create(self.optimizer,
                                   rescale_grad=(1.0/batch_size),
                                   **(self.kwargs))
        elif isinstance(self.optimizer, opt.Optimizer):
            optimizer = self.optimizer

        # do training
        _train_multi_device(self.symbol, self.ctx, arg_names, param_names, aux_names,
                            self.arg_params, self.aux_params,
                            begin_epoch=self.begin_epoch, end_epoch=self.num_epoch,
                            epoch_size=self.epoch_size,
                            optimizer=optimizer,
                            train_data=data, eval_data=eval_data,
                            eval_metric=eval_metric,
                            epoch_end_callback=epoch_end_callback,
                            batch_end_callback=batch_end_callback,
                            kvstore=kvstore, update_on_kvstore=update_on_kvstore,
                            logger=logger, work_load_list=work_load_list, monitor=monitor,
                            eval_batch_end_callback=eval_batch_end_callback,
                            sym_gen=self.sym_gen)
Example #14
0
    def fit(self,
            X,
            marks,
            e_marks=None,
            y=None,
            eval_data=None,
            eval_metric='acc',
            epoch_end_callback=None,
            batch_end_callback=None,
            time_step_callback=None,
            kvstore='local',
            logger=None,
            work_load_list=None,
            monitor=None,
            eval_batch_end_callback=None):
        """Overwrite"""

        data = self._init_iter(X, y, is_train=True)
        eval_data = self._init_eval_iter(eval_data)

        if self.sym_gen:
            self.symbol = self.sym_gen(data.default_bucket_key)  # pylint: disable=no-member
            self._check_arguments()
        self.kwargs["sym"] = self.symbol

        param_dict = dict(data.provide_data + data.provide_label)
        arg_names, param_names, aux_names = self._init_params(param_dict)

        # setup metric
        if not isinstance(eval_metric, metric.EvalMetric):
            eval_metric = metric.create(eval_metric)

        # create kvstore
        (kvstore, update_on_kvstore) = _create_kvstore(kvstore, len(self.ctx),
                                                       self.arg_params)

        param_idx2name = {}
        if update_on_kvstore:
            param_idx2name.update(enumerate(param_names))
        else:
            for i, n in enumerate(param_names):
                for k in range(len(self.ctx)):
                    param_idx2name[i * len(self.ctx) + k] = n
        self.kwargs["param_idx2name"] = param_idx2name

        # init optmizer
        if isinstance(self.optimizer, str):
            batch_size = data.batch_size
            if kvstore and kvstore.type == 'dist_sync':
                batch_size *= kvstore.num_workers
            optimizer = opt.create(self.optimizer,
                                   rescale_grad=(1.0 / batch_size),
                                   **(self.kwargs))
        elif isinstance(self.optimizer, opt.Optimizer):
            optimizer = self.optimizer

        # do training
        _train_rnn(self.symbol,
                   self.ctx,
                   marks,
                   arg_names,
                   param_names,
                   aux_names,
                   self.arg_params,
                   self.aux_params,
                   begin_epoch=self.begin_epoch,
                   end_epoch=self.num_epoch,
                   epoch_size=self.epoch_size,
                   optimizer=optimizer,
                   train_data=data,
                   eval_data=eval_data,
                   eval_metric=eval_metric,
                   epoch_end_callback=epoch_end_callback,
                   batch_end_callback=batch_end_callback,
                   time_step_callback=time_step_callback,
                   kvstore=kvstore,
                   update_on_kvstore=update_on_kvstore,
                   logger=logger,
                   work_load_list=work_load_list,
                   monitor=monitor,
                   eval_batch_end_callback=eval_batch_end_callback,
                   sym_gen=self.sym_gen,
                   e_marks=e_marks)
def train_net(args):
    ctx = []
    cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
    if len(cvd) > 0:
        for i in range(len(cvd.split(','))):
            ctx.append(mx.gpu(i))
    if len(ctx) == 0:
        ctx = [mx.cpu()]
        print('use cpu')
    else:
        print('gpu num:', len(ctx))
    curTime = time.strftime("%Y%m%d%H%M%S", time.localtime())
    prefix = os.path.join(
        args.models_root,
        '%s-%s-%s-%s' % (curTime, args.network, args.loss, args.dataset),
        'model')
    prefix_dir = os.path.dirname(prefix)
    print('prefix', prefix)
    if not os.path.exists(prefix_dir):
        os.makedirs(prefix_dir)
    args.ctx_num = len(ctx)
    args.batch_size = args.per_batch_size * args.ctx_num
    args.image_channel = config.image_shape[2]
    config.batch_size = args.batch_size
    config.per_batch_size = args.per_batch_size
    config.no_wd = args.no_wd
    config.last_gamma = args.last_gamma
    if (args.freeze_block == 1):
        config.bn_mom = 1.0

    print('bbbbbbbbbbbbbbbbbn', config.bn_mom)

    data_dir = config.dataset_path
    path_imgrec = None
    path_imglist = None
    image_size = config.image_shape[0:2]
    assert len(image_size) == 2
    #assert image_size[0]==image_size[1]
    print('image_size', image_size)
    print('num_classes', config.num_classes)
    path_imgrec = os.path.join(data_dir, "train.rec")

    print('Called with argument:', args, config)
    data_shape = (args.image_channel, image_size[0], image_size[1])
    mean = None

    begin_epoch = 0
    if len(args.pretrained) == 0:
        arg_params = None
        aux_params = None
        sym = get_symbol(args)
    else:
        print('loading', args.pretrained, args.pretrained_epoch)
        _, arg_params, aux_params = mx.model.load_checkpoint(
            args.pretrained, args.pretrained_epoch)
        #for item in arg_params:
        #    print(item)
        #print(arg_params)
        #exit()
        sym = get_symbol(args)

    if args.model_visual:
        mx.viz.plot_network(sym,
                            title='model',
                            save_format='pdf',
                            shape={
                                'data': (64, 3, 224, 224),
                                'label': (64, )
                            }).view()
        exit(0)

    if config.count_flops:
        all_layers = sym.get_internals()
        pre_fix = ''
        if (config.emb_size == 2048):
            pre_fix = '2048_'
        _sym = all_layers[pre_fix + 'fc1_output']
        FLOPs = flops_counter.count_flops(_sym,
                                          data=(1, 3, image_size[0],
                                                image_size[1]))
        _str = flops_counter.flops_str(FLOPs)
        print('Network FLOPs: %s' % _str)

    #label_name = 'softmax_label'
    #label_shape = (args.batch_size,)
    emb_symbol = sym.get_internals()[pre_fix + 'fc1_output']
    fixed_param_names = []
    if (args.freeze_block == 1):
        fixed_param_names = emb_symbol.list_arguments()
    elif (args.freeze_block == 2):
        emb_symbol = sym.get_internals()[pre_fix + 'bn1_output']
        fixed_param_names = emb_symbol.list_arguments()
    print(fixed_param_names)
    #fixed_aux = emb_symbol.list_auxiliary_states()
    #fixed_param_names.extend(fixed_aux)
    #print('ffffffffffffffixed params : ', fixed_param_names)
    model = mx.mod.Module(context=ctx,
                          symbol=sym,
                          fixed_param_names=fixed_param_names)
    val_dataiter = None

    if config.loss_name.find('fusion') >= 0:
        from pair_fusion_class_image_iter import FaceImageIter
        triplet_params = [
            config.triplet_bag_size, config.triplet_alpha,
            config.triplet_max_ap
        ]
        train_dataiter = FaceImageIter(
            batch_size=args.batch_size,
            data_shape=data_shape,
            path_imgrec=path_imgrec,
            shuffle=True,
            rand_mirror=config.data_rand_mirror,
            mean=mean,
            cutoff=config.data_cutoff,
            ctx_num=args.ctx_num,
            images_per_identity=config.images_per_identity,
            triplet_params=triplet_params,
            mx_model=model,
            fairface_mode=config.fairface_mode,
        )
        _metric = LossValueMetric()
        eval_metrics = [mx.metric.create(_metric)]

    elif config.loss_name.find('triplet') >= 0:
        #from fair_face_triplet_iter import FaceImageIter
        from triplet_image_iter import FaceImageIter
        if (config.loss_name == 'triplet'):
            dis_type = 'e'
        elif (config.loss_name == 'atriplet'):
            dis_type = 'c'
        triplet_params = [
            config.triplet_bag_size, config.triplet_alpha,
            config.triplet_max_ap
        ]
        train_dataiter = FaceImageIter(
            batch_size=args.batch_size,
            data_shape=data_shape,
            path_imgrec=path_imgrec,
            shuffle=True,
            rand_mirror=config.data_rand_mirror,
            mean=mean,
            cutoff=config.data_cutoff,
            ctx_num=args.ctx_num,
            images_per_identity=config.images_per_identity,
            triplet_params=triplet_params,
            mx_model=model,
            fairface_mode=config.fairface_mode,
            dis_type=dis_type,
        )
        _metric = LossValueMetric()
        eval_metrics = [mx.metric.create(_metric)]

    elif config.loss_name.find('softmax') >= 0:
        from image_iter_gluon import FaceImageDataset
        train_dataset = FaceImageDataset(
            batch_size=args.batch_size,
            data_shape=data_shape,
            path_imgrec=path_imgrec,
            shuffle=True,
            rand_mirror=config.data_rand_mirror,
            mean=mean,
            cutoff=config.data_cutoff,
            color_jittering=config.data_color,
            images_filter=config.data_images_filter,
            selected_attributes=args.selected_attributes,
            label_name=['softmax_label'])

        train_data = mx.gluon.data.DataLoader(train_dataset,
                                              args.batch_size,
                                              shuffle=True,
                                              last_batch="rollover",
                                              num_workers=args.num_workers)
        train_dataiter = mx.contrib.io.DataLoaderIter(train_data)

        metric1 = AccMetric()
        eval_metrics = [mx.metric.create(metric1)]
        if config.ce_loss:
            metric2 = LossValueMetric()
            eval_metrics.append(mx.metric.create(metric2))
    else:
        from image_iter import FaceImageIter
        train_dataiter = FaceImageIter(
            batch_size=args.batch_size,
            data_shape=data_shape,
            path_imgrec=path_imgrec,
            shuffle=True,
            rand_mirror=config.data_rand_mirror,
            mean=mean,
            cutoff=config.data_cutoff,
            color_jittering=config.data_color,
            images_filter=config.data_images_filter,
        )

        metric1 = AccMetric()
        eval_metrics = [mx.metric.create(metric1)]
    if config.loss_name == 'final_softmax':
        _metric = LossValueMetric()
        eval_metrics = [mx.metric.create(_metric)]

        if config.ce_loss:
            metric2 = LossValueMetric()
            eval_metrics.append(mx.metric.create(metric2))

    initializer = mx.init.Xavier(rnd_type='gaussian',
                                 factor_type="out",
                                 magnitude=2)  #resnet style
    #initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2)
    _rescale = 1.0 / args.ctx_num
    clip_gradient = None
    if config.fp_16:
        _rescale /= config.scale16
        clip_gradient = config.gradThres
    #opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale)#, multi_precision=config.fp_16)
    opt = optimizer.create(args.opt,
                           learning_rate=args.lr,
                           momentum=config.mom,
                           wd=config.wd,
                           rescale_grad=_rescale,
                           multi_precision=config.fp_16,
                           clip_gradient=clip_gradient)
    _cb = mx.callback.Speedometer(args.batch_size, args.frequent)

    # cos learning rate scheduler
    if args.cos_lr:
        num_batches = config.num_training_samples // args.batch_size
        total_batches = default.end_epoch * num_batches

    ver_list = []
    ver_name_list = []
    for name in config.val_targets:
        path = os.path.join(data_dir, name + ".bin")
        if os.path.exists(path):
            data_set = verification.load_bin(path, image_size)
            ver_list.append(data_set)
            ver_name_list.append(name)
            print('ver', name)

    def ver_test(nbatch):
        results = []
        label_shape = None
        if (config.net_output == 'ECCV'):
            label_shape = (args.batch_size, 2)

        for i in range(len(ver_list)):
            acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(
                ver_list[i], model, args.batch_size, 10, None, label_shape)
            print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
            #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
            print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' %
                  (ver_name_list[i], nbatch, acc2, std2))
            results.append(acc2)
        return results

    highest_acc = [0.0, 0.0]  #lfw and target
    #  highest_acc.append(0.0)
    global_step = [0]
    save_step = [0]
    highestStep = [0]
    lr_steps = [int(x) for x in args.lr_steps.split(',')]
    print('lr_steps', lr_steps)

    def _batch_callback(param):
        #global global_step
        global_step[0] += 1
        mbatch = global_step[0]

        if config.useWarmup and (mbatch < config.warmupSteps):
            #opt.lr = args.lr * mbatch / config.warmupSteps
            opt.lr = 1.0e-8
            #print("warmup lr: ", opt.lr)

        if (not config.useWarmup) or (config.useWarmup and
                                      (mbatch >= config.warmupSteps)):
            targetSteps = mbatch
            if config.useWarmup:
                if mbatch == config.warmupSteps:
                    opt.lr = args.lr

                targetSteps -= config.warmupSteps

            if args.cos_lr:
                opt.lr = 0.5 * args.lr * (
                    1 + np.cos(np.pi * (targetSteps / total_batches)))
                if (targetSteps % 500) == 0:
                    print('cos lr change to', opt.lr)
            else:
                for step in lr_steps:
                    if targetSteps == step:
                        opt.lr *= 0.1
                        print('lr change to', opt.lr)
                        break

        _cb(param)
        if mbatch % 1000 == 0:
            print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch)

        if mbatch >= 0 and mbatch % args.verbose == 0:
            acc_list = ver_test(mbatch)
            save_step[0] += 1
            msave = save_step[0]
            do_save = False
            is_highest = False
            if len(acc_list) > 0:
                score = sum(acc_list)
                if acc_list[-1] >= highest_acc[-1]:
                    if acc_list[-1] > highest_acc[-1]:
                        is_highest = True
                    else:
                        if score >= highest_acc[0]:
                            is_highest = True
                            highest_acc[0] = score
                    highest_acc[-1] = acc_list[-1]
                    highestStep[0] = save_step[0]

            if is_highest:
                do_save = True
            if args.ckpt == 0:
                do_save = False
            elif args.ckpt == 2:
                do_save = True
            elif args.ckpt == 3:
                msave = 1

            if do_save:
                print('saving', msave)
                arg, aux = model.get_params()
                if config.ckpt_embedding:
                    all_layers = model.symbol.get_internals()
                    _sym = all_layers['fc1_output']
                    _arg = {}
                    for k in arg:
                        if not k.startswith('fc7'):
                            _arg[k] = arg[k]
                    mx.model.save_checkpoint(prefix, msave, _sym, _arg, aux)
                else:
                    mx.model.save_checkpoint(prefix, msave, model.symbol, arg,
                                             aux)
            print('[%d]Accuracy-Highest: %1.5f, mbatch: %d' %
                  (mbatch, highest_acc[-1], highestStep[0]))
        if config.max_steps > 0 and mbatch > config.max_steps:
            sys.exit(0)

    epoch_cb = None
    if config.loss_name.find('triplet') < 0:
        train_dataiter = mx.io.PrefetchingIter(
            train_dataiter)  #triplet loss unavailable
    ######
    if (config.net_output == 'ECCV'):
        class_metric = AccMetric(acc_name='class_acc',
                                 label_index=1,
                                 pred_index=4)
        eval_metrics.append(mx.metric.create(class_metric))

        eval_metrics,
    model.fit(
        train_dataiter,
        begin_epoch=begin_epoch,
        num_epoch=999999,
        eval_data=val_dataiter,
        eval_metric=eval_metrics,
        kvstore=args.kvstore,
        optimizer=opt,
        #optimizer_params   = optimizer_params,
        initializer=initializer,
        arg_params=arg_params,
        aux_params=aux_params,
        allow_missing=True,
        batch_end_callback=_batch_callback,
        epoch_end_callback=epoch_cb)
Example #16
0
    def init_optimizer(self, kvstore='local', optimizer='sgd',
                       optimizer_params=(('learning_rate', 0.01),), force_init=False):
        """Install and initialize optimizers.

        Parameters
        ----------
        kvstore : str or KVStore
            Default `'local'`.
        optimizer : str or Optimizer
            Default `'sgd'`
        optimizer_params : dict
            Default `(('learning_rate', 0.01),)`. The default value is not a dictionary,
            just to avoid pylint warning of dangerous default values.
        force_init : bool
            Default `False`, indicating whether we should force re-initializing the
            optimizer in the case an optimizer is already installed.
        """
        assert self.binded and self.params_initialized

        if self.optimizer_initialized and not force_init:
            self.logger.warning('optimizer already initialized, ignoring...')
            return

        (kvstore, update_on_kvstore) = \
                _create_kvstore(kvstore, len(self._context), self._arg_params)

        batch_size = self._exec_group.batch_size
        if kvstore and 'dist' in kvstore.type and '_sync' in kvstore.type:
            batch_size *= kvstore.num_workers
        rescale_grad = 1.0/batch_size

        if isinstance(optimizer, str):
            idx2name = {}
            if update_on_kvstore:
                idx2name.update(enumerate(self._exec_group.param_names))
            else:
                for k in range(len(self._context)):
                    idx2name.update({i*len(self._context)+k: n
                                     for i, n in enumerate(self._exec_group.param_names)})
            optimizer_params = dict(optimizer_params)
            if 'rescale_grad' not in optimizer_params:
                optimizer_params['rescale_grad'] = rescale_grad
            optimizer = opt.create(optimizer,
                                   sym=self.symbol, param_idx2name=idx2name,
                                   **optimizer_params)
        else:
            assert isinstance(optimizer, opt.Optimizer)
            if optimizer.rescale_grad != rescale_grad:
                #pylint: disable=no-member
                warnings.warn(
                    "Optimizer created manually outside Module but rescale_grad " +
                    "is not normalized to 1.0/batch_size/num_workers (%s vs. %s). "%(
                        optimizer.rescale_grad, rescale_grad) +
                    "Is this intended?", stacklevel=2)

        self._optimizer = optimizer
        self._kvstore = kvstore
        self._update_on_kvstore = update_on_kvstore
        self._updater = None

        if kvstore:
            # copy initialized local parameters to kvstore
            _initialize_kvstore(kvstore=kvstore,
                                param_arrays=self._exec_group.param_arrays,
                                arg_params=self._arg_params,
                                param_names=self._param_names,
                                update_on_kvstore=update_on_kvstore)
        if update_on_kvstore:
            kvstore.set_optimizer(self._optimizer)
        else:
            self._updater = opt.get_updater(optimizer)

        self.optimizer_initialized = True

        if self._preload_opt_states is not None:
            self.load_optimizer_states(self._preload_opt_states)
            self._preload_opt_states = None
Example #17
0
    grad_params = {}
    for name, shape in zip(arg_names, arg_shapes):
        if not (name.endswith('data') or name.endswith('label')):
            grad_params[name] = mx.nd.zeros(shape, ctx)

    # prepare aux_params
    aux_names = network.list_auxiliary_states()
    aux_params = {
        k: mx.nd.zeros(s, ctx)
        for k, s in zip(aux_names, aux_shapes)
    }

    # prepare optimizer
    optimizer = opt.create('adam',
                           rescale_grad=(1.0 / dataiter.get_batch_size()),
                           **({
                               'learning_rate': 0.01
                           }))
    updater = get_updater(optimizer)

    # create eval_metrix
    eval_metric = metric.create('rmse')

    data_name = dataiter.data_name
    label_name = dataiter.label_name
    arg_params = network_args
    aux_params = network_auxs

    batch_callback = mx.callback.Speedometer(1, 10)
    epoch_callback = mx.callback.do_checkpoint(save_model_prefix)