예제 #1
0
파일: yolotrain.py 프로젝트: zfxu/tests
def parse_net_output(Y, numClass, box_per_cell):
    pred = nd.transpose(Y, (0, 2, 3, 1))
    pred = pred.reshape(
        (0, 0, 0, box_per_cell, numClass + 5))  #add one dim for boxes
    predCls = nd.slice_axis(pred, begin=0, end=numClass, axis=-1)
    predObject = nd.slice_axis(pred, begin=numClass, end=numClass + 1, axis=-1)
    #predObject = nd.sigmoid(predObject)
    predXY = nd.slice_axis(pred, begin=numClass + 1, end=numClass + 3, axis=-1)
    #predXY = nd.sigmoid(predXY)
    predWH = nd.slice_axis(pred, begin=numClass + 3, end=numClass + 5, axis=-1)
    #x,y = convert_xy(predXY)
    #w,h = convert_wh(predWH)
    #w = nd.clip(w,0,1)
    #h = nd.clip(h,0,1)
    #x0 = nd.clip(x, 0, 1)
    #y0 = nd.clip(y,0,1)
    #x1 = nd.clip(x0 + w,0,1)
    #y1 = np.clip(y0 + h, 0,1)
    #x = x0
    #y = y0
    #w = x1 - x0
    #h = y1 - y0
    XYWH = nd.concat(predXY, predWH, dim=-1)
    # pdb.set_trace()
    return predCls, predObject, XYWH
예제 #2
0
def split_data(data, num_slice, batch_axis=0, even_split=True, multiplier=1):
    """Splits an NDArray into `num_slice` slices along `batch_axis`.
    Usually used for data parallelism where each slices is sent
    to one device (i.e. GPU).

    Parameters
    ----------
    data : NDArray
        A batch of data.
    num_slice : int
        Number of desired slices.
    batch_axis : int, default 0
        The axis along which to slice.
    even_split : bool, default True
        Whether to force all slices to have the same number of elements.
        If `True`, an error will be raised when `num_slice` does not evenly
        divide `data.shape[batch_axis]`.
    multiplier : int, default 1
        The batch size has to be the multiples of multiplier

    Returns
    -------
    list of NDArray
        Return value is a list even if `num_slice` is 1.
    """
    size = data.shape[batch_axis]
    if even_split and size % num_slice != 0:
        raise ValueError(
            "data with shape %s cannot be evenly split into %d slices along axis %d. " \
            "Use a batch size that's multiple of %d or set even_split=False to allow " \
            "uneven partitioning of data."%(
                str(data.shape), num_slice, batch_axis, num_slice))

    step = (int(size / multiplier) // num_slice) * multiplier

    # If size < num_slice, make fewer slices
    if not even_split and size < num_slice:
        step = 1
        num_slice = size

    if batch_axis == 0:
        slices = [
            data[i * step:(i + 1) *
                 step] if i < num_slice - 1 else data[i * step:size]
            for i in range(num_slice)
        ]
    elif even_split:
        slices = ndarray.split(data, num_outputs=num_slice, axis=batch_axis)
    else:
        slices = [
            ndarray.slice_axis(data, batch_axis, i * step,
                               (i + 1) * step) if i < num_slice - 1 else
            ndarray.slice_axis(data, batch_axis, i * step, size)
            for i in range(num_slice)
        ]
    return slices
예제 #3
0
파일: yolo.py 프로젝트: z01nl1o02/toy-yolo
 def format_net_output(self, Y):
     pred = nd.transpose(Y,(0,2,3,1)) #move channel to last dim
     pred = pred.reshape((0,0,0,self.box_per_cell, self.numClass + 5)) # re-arrange last dim to two dim (B,())
     #here you are responsible to define each field of output
     predCls = nd.slice_axis(pred, begin=0, end=self.numClass,axis=-1)
     predObj = nd.slice_axis(pred, begin=self.numClass, end=self.numClass+1,axis=-1)
     predXY = nd.slice_axis(pred, begin=self.numClass+1, end=self.numClass+3,axis=-1)
     predXY = nd.sigmoid(predXY)
     predWH = nd.slice_axis(pred,begin=self.numClass+3,end=self.numClass+5,axis=-1)
     XYWH = nd.concat(predXY, predWH, dim=-1)
     return predCls, predObj, XYWH
예제 #4
0
 def update(self, labels, preds):
     self.count += 1
     preds = [preds[2]]
     labels = labels[0]
     #print(labels.stype)
     labels = nd.slice_axis(labels,
                            axis=0,
                            begin=0,
                            end=args.per_batch_size - args.num_unlabeled)
     labels = [labels]
     #print(preds)
     #print(labels)
     for label, pred_label in zip(labels, preds):
         if pred_label.shape != label.shape:
             pred_label = mx.ndarray.argmax(pred_label, axis=self.axis)
         pred_label = pred_label.asnumpy().astype('int32').flatten()
         label = label.asnumpy()
         if label.ndim == 2:
             label = label[:, 0]
         label = label.astype('int32').flatten()
         assert label.shape == pred_label.shape
         #print(pred_label, '(pred_label)\n')
         #print(label, '(label)\n')
         self.sum_metric += (pred_label.flat == label.flat).sum()
         #print(self.sum_metric, '(sum_metric)\n')
         self.num_inst += len(pred_label.flat)
    def backward(self, out_grads=None):
        """Run backward on all devices. A backward should be called after
        a call to the forward function. Backward cannot be called unless
        ``self.for_training`` is ``True``.

        Parameters
        ----------
        out_grads : NDArray or list of NDArray, optional
            Gradient on the outputs to be propagated back.
            This parameter is only needed when bind is called
            on outputs that are not a loss function.
        """
        assert self.for_training, 're-bind with for_training=True to run backward'
        if out_grads is None:
            out_grads = []

        for i, (exec_, islice) in enumerate(zip(self.execs, self.slices)):
            out_grads_slice = []
            for grad, axis in zip(out_grads, self.output_layouts):
                if axis >= 0:
                    # pylint: disable=no-member
                    og_my_slice = nd.slice_axis(grad, axis=axis, begin=islice.start,
                                                end=islice.stop)
                    # pylint: enable=no-member
                    out_grads_slice.append(og_my_slice.as_in_context(self.contexts[i]))
                else:
                    out_grads_slice.append(grad.copyto(self.contexts[i]))
            exec_.backward(out_grads=out_grads_slice)
예제 #6
0
def split_and_load_WithCPU(data, GPU_list, batch_axis=0, CPU_percentage=0.3):
    """将数据在GPU和CPU之间分发
    """
    size = data.shape[batch_axis]
    CUP_size = round(size * CPU_percentage)

    CUP_data = slice_axis(data, axis=batch_axis, begin=0, end=CUP_size)
    CUP_data = CUP_data.as_in_context(mx.cpu())

    GPU_data = slice_axis(data, axis=batch_axis, begin=CUP_size, end=size)
    GPU_data = split_and_load(GPU_data,
                              GPU_list,
                              batch_axis=batch_axis,
                              even_split=False)

    return [CUP_data] + GPU_data
예제 #7
0
def test_slice_axis_3d():
    x = nd.random.randn(1, 6, 64).astype("float32")
    mx = nd.slice_axis(x, axis=2, begin=0, end=-4).asnumpy().astype("float32")
    enc = bf.slice_axis(torch.from_numpy(x.asnumpy()), axis=2, begin=0,
                        end=-4).numpy().astype("float32")

    assert np.sum(mx == enc) == 360
예제 #8
0
def slice_axis(data, axis, begin, end):
    dim = data.shape[axis]
    if begin < 0:
        begin += dim
    if end <= 0:
        end += dim
    return nd.slice_axis(data, axis, begin, end)
    def update_metric(self, eval_metric, labels):
        """Accumulate the performance according to `eval_metric` on all devices
        by comparing outputs from [begin, end) to labels. By default use all
        outputs.

        Parameters
        ----------
        eval_metric : EvalMetric
            The metric used for evaluation.
        labels : list of NDArray
            Typically comes from `label` of a `DataBatch`.
        begin : int
            Starting index of used outputs.
        end : int or None
            Ending index of used outputs.
        """
        for texec, islice in zip(self.execs, self.slices):
            labels_slice = []
            for label, axis in zip(labels, self.label_layouts):
                if axis == 0:
                    # slicing NDArray along axis 0 can avoid copying
                    labels_slice.append(label[islice])
                elif axis > 0:
                    # pylint: disable=no-member
                    label_my_slice = nd.slice_axis(label, axis=axis, begin=islice.start,
                                                   end=islice.stop).as_in_context(label.context)
                    # pylint: enable=no-member
                    labels_slice.append(label_my_slice)
                else:
                    labels_slice.append(label)

            labels_ = OrderedDict(zip(self.label_names, labels_slice))
            preds = OrderedDict(zip(self.output_names, texec.outputs))
            eval_metric.update_dict(labels_, preds)
예제 #10
0
def test_slice_axis_1d():
    x = nd.array([[1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 11.,
                                                       12.]]).astype("float32")
    ans = nd.array([[1., 2.], [5., 6.], [9., 10.]])
    mx = nd.slice_axis(x, axis=1, begin=0, end=2)
    enc = bf.slice_axis(torch.from_numpy(x.asnumpy()), axis=1, begin=0, end=2)
    assert np.sum(ans.asnumpy() == enc.numpy()) == 6
    assert np.sum(mx.asnumpy() == enc.numpy()) == 6
예제 #11
0
def test(data_set, mx_model, batch_size, nfolds=10, data_extra = None, label_shape = None):
    print('testing verification..')
    data_list = data_set[0]
    issame_list = data_set[1]
    model = mx_model
    embeddings_list = []
    if data_extra is not None:
        _data_extra = nd.array(data_extra)
    time_consumed = 0.0
    if label_shape is None:
        _label = nd.ones( (batch_size,) )
    else:
        _label = nd.ones( label_shape )
    for i in range( len(data_list) ):
        data = data_list[i]
        embeddings = None
        ba = 0
        while ba<data.shape[0]:
            bb = min(ba+batch_size, data.shape[0])
            count = bb-ba
            _data = nd.slice_axis(data, axis=0, begin=bb-batch_size, end=bb)
            time0 = datetime.datetime.now()
            if data_extra is None:
                db = mx.io.DataBatch(data=(_data,), label=(_label,))
            else:
                db = mx.io.DataBatch(data=(_data,_data_extra), label=(_label,))
            model.forward(db, is_train=False)
            net_out = model.get_outputs()
            _embeddings = net_out[0].asnumpy()
            time_now = datetime.datetime.now()
            diff = time_now - time0
            time_consumed+=diff.total_seconds()
            if embeddings is None:
                embeddings = np.zeros( (data.shape[0], _embeddings.shape[1]) )
            embeddings[ba:bb,:] = _embeddings[(batch_size-count):,:]
            ba = bb
        embeddings_list.append(embeddings)

    _xnorm = 0.0
    _xnorm_cnt = 0
    for embed in embeddings_list:
        for i in range(embed.shape[0]):
            _em = embed[i]
            _norm=np.linalg.norm(_em)
            _xnorm+=_norm
            _xnorm_cnt+=1
    _xnorm /= _xnorm_cnt

    embeddings = embeddings_list[0].copy()
    embeddings = sklearn.preprocessing.normalize(embeddings)
    embeddings = embeddings_list[0] + embeddings_list[1]
    embeddings = sklearn.preprocessing.normalize(embeddings)
    print(embeddings.shape)
    print('infer time', time_consumed)
    accuracy = evaluate(embeddings, issame_list, nrof_folds=nfolds)
    acc2, std2 = np.mean(accuracy), np.std(accuracy)
    return acc2, std2, _xnorm, embeddings_list
예제 #12
0
    def ious(cls, label: np.array) -> np.array:
        """Convert the label provided to this layer to ious.

        The labels for this layer are structured a bit strangely:

            b x 9 x 22 x 72: placeholder for labels (hacky mxnet workaround)
            b x 36 x 22 x 72: predicted bounding boxes
            b x 36 x 22 x 72: actual bounding boxes

        Thus, we ignore the first set of values, and compute iou using the
        last two.
        """
        pred_idx = ANCHORS_PER_GRID * (1 + NUM_BBOX_ATTRS)
        pred_box = cls.reformat(nd.slice_axis(
            label, axis=1, begin=ANCHORS_PER_GRID, end=pred_idx))
        label_box = cls.reformat(nd.slice_axis(
            label, axis=1, begin=pred_idx, end=None))
        return batches_iou(pred_box, label_box)
예제 #13
0
def dumpR(data_set,
          mx_model,
          batch_size,
          name='',
          data_extra=None,
          label_shape=None):
    print('dump verification embedding..')
    data_list = data_set[0]
    issame_list = data_set[1]
    print('issame_list:', issame_list)
    model = mx_model
    embeddings_list = []
    if data_extra is not None:
        _data_extra = nd.array(data_extra)
    time_consumed = 0.0
    if label_shape is None:
        _label = nd.ones((batch_size, ))
    else:
        _label = nd.ones(label_shape)
    for i in range(len(data_list)):
        data = data_list[i]
        embeddings = None
        ba = 0
        while ba < data.shape[0]:
            bb = min(ba + batch_size, data.shape[0])
            count = bb - ba
            _data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb)
            # print('_data.shape, _label.shape:',_data.shape, _label.shape)
            time0 = datetime.datetime.now()
            if data_extra is None:
                db = mx.io.DataBatch(data=(_data, ), label=(_label, ))
            else:
                db = mx.io.DataBatch(data=(_data, _data_extra),
                                     label=(_label, ))
            model.forward(db, is_train=False)
            net_out = model.get_outputs()
            _embeddings = net_out[0].asnumpy()
            time_now = datetime.datetime.now()
            diff = time_now - time0
            time_consumed += diff.total_seconds()
            if embeddings is None:
                embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
            embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
            ba = bb
        embeddings_list.append(embeddings)
    embeddings = embeddings_list[0] + embeddings_list[1]
    print('embeddings:', embeddings)
    embeddings = sklearn.preprocessing.normalize(embeddings)
    actual_issame = np.asarray(issame_list)
    outname = os.path.join('temp.bin')
    with open(outname, 'wb') as f:
        pickle.dump((embeddings, issame_list),
                    f,
                    protocol=pickle.HIGHEST_PROTOCOL)
예제 #14
0
def test(data_set, extractor, batch_size, nfolds=10):
    data_list = data_set[0]
    issame_list = data_set[1]
    embeddings_list = []
    time_consumed = 0.0
    for i in range(len(data_list)):
        data = data_list[i]
        embeddings = None
        ba = 0
        while ba < data.shape[0]:
            bb = min(ba + batch_size, data.shape[0])
            count = bb - ba
            _data = nd.slice_axis(data, axis=0, begin=bb - batch_size,
                                  end=bb).as_in_context(mx.gpu())
            time0 = datetime.datetime.now()
            net_out = extractor(_data)
            _embeddings = net_out.asnumpy()
            time_now = datetime.datetime.now()
            diff = time_now - time0
            time_consumed += diff.total_seconds()
            if embeddings is None:
                embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
            embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
            ba = bb
        embeddings_list.append(embeddings)

    _xnorm = 0.0
    _xnorm_cnt = 0
    for embed in embeddings_list:
        for i in range(embed.shape[0]):
            _em = embed[i]
            _norm = np.linalg.norm(_em)
            _xnorm += _norm
            _xnorm_cnt += 1
    _xnorm /= _xnorm_cnt

    embeddings = embeddings_list[0].copy()
    embeddings = preprocessing.normalize(embeddings)
    acc1, std1 = 0.0, 0.0
    #_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
    #acc1, std1 = np.mean(accuracy), np.std(accuracy)
    #print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
    #embeddings = np.concatenate(embeddings_list, axis=1)
    embeddings = embeddings_list[0] + embeddings_list[1]
    embeddings = preprocessing.normalize(embeddings)
    _, _, accuracy, val, val_std, far = evaluate(embeddings,
                                                 issame_list,
                                                 nrof_folds=nfolds)
    acc2, std2 = np.mean(accuracy), np.std(accuracy)
    return acc1, std1, acc2, std2, _xnorm, embeddings_list
예제 #15
0
def parse_net_output(Y,numClass, box_per_cell):
    pred = nd.transpose(Y,(0,2,3,1))
    pred = pred.reshape((0,0,0,box_per_cell,numClass + 5)) #add one dim for boxes
    predCls = nd.slice_axis(pred, begin = 0, end = numClass,axis=-1)
    predObject = nd.slice_axis(pred,begin=numClass,end=numClass+1,axis=-1)
    #predObject = nd.sigmoid(predObject)
    predXY = nd.slice_axis(pred, begin = numClass + 1, end = numClass + 3, axis=-1)
    #predXY = nd.sigmoid(predXY)
    predWH = nd.slice_axis(pred, begin = numClass + 3, end = numClass + 5, axis=-1)
    #x,y = convert_xy(predXY)
    #w,h = convert_wh(predWH)
    #w = nd.clip(w,0,1)
    #h = nd.clip(h,0,1)
    #x0 = nd.clip(x, 0, 1)
    #y0 = nd.clip(y,0,1)
    #x1 = nd.clip(x0 + w,0,1)
    #y1 = np.clip(y0 + h, 0,1)
    #x = x0
    #y = y0
    #w = x1 - x0
    #h = y1 - y0
    XYWH = nd.concat(predXY,predWH,dim=-1)
   # pdb.set_trace()
    return predCls, predObject, XYWH
예제 #16
0
    def mutil_get_feature(self, aligneds, batch_size=32):
        batch_size = min(len(aligneds), batch_size)
        # pdb.set_trace()
        # data = nd.array([mx.image.imdecode(_bin) for _bin in aligneds])
        data = nd.array(aligneds)
        embeddings = None
        ba = 0
        while ba < data.shape[0]:
            bb = min(ba + batch_size, data.shape[0])
            count = bb - ba
            _data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb)
            # _data = nd.array(aligneds[bb-batch_size:bb])
            #print(_data.shape, _label.shape)
            # time0 = datetime.datetime.now()
            _label = nd.ones((batch_size, ))
            db = mx.io.DataBatch(data=(_data, ))
            self.model.forward(db, is_train=False)
            net_out = self.model.get_outputs()
            #_arg, _aux = model.get_params()
            #__arg = {}
            #for k,v in _arg.iteritems():
            #  __arg[k] = v.as_in_context(_ctx)
            #_arg = __arg
            #_arg["data"] = _data.as_in_context(_ctx)
            #_arg["softmax_label"] = _label.as_in_context(_ctx)
            #for k,v in _arg.iteritems():
            #  print(k,v.context)
            #exe = sym.bind(_ctx, _arg ,args_grad=None, grad_req="null", aux_states=_aux)
            #exe.forward(is_train=False)
            #net_out = exe.outputs
            _embeddings = net_out[0].asnumpy()
            #   time_now = datetime.datetime.now()
            #   diff = time_now - time0
            #   time_consumed+=diff.total_seconds()
            print(_embeddings.shape)
            if embeddings is None:
                embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
            embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
            ba = bb
        # embeddings_list.append(embeddings)

        # input_blob = np.expand_dims(aligned, axis=0)
        # data = mx.nd.array(input_blob)
        # db = mx.io.DataBatch(data=(data,))
        # self.model.forward(db, is_train=False)
        # embedding = self.model.get_outputs()[0].asnumpy()
        embeddings = sklearn.preprocessing.normalize(embeddings)
        return embeddings
예제 #17
0
def evaluate_accuracy(data_iterator, net, ctx, dataset):
    acc = mx.metric.Accuracy()
    for data, label in data_iterator:

        if dataset == "CIFAR10":
            data = nd.slice_axis(data=data, axis=3, begin=0, end=1)

        data = data.as_in_context(ctx).reshape(
            (data.shape[0], -1)
        )  #as_in_context : Returns an array on the target device with the same value as this array.
        label = label.as_in_context(
            ctx
        )  #as_in_context : Returns an array on the target device with the same value as this array.
        output = net(data)
        prediction = nd.argmax(output, axis=1)  # (batch,10(axis=1)
        acc.update(preds=prediction, labels=label)
    return acc.get()
예제 #18
0
def dumpR(data_set, mx_model, batch_size, name='', data_extra = None, label_shape = None):
  print('dump verification embedding..')
  data_list = data_set[0]
  issame_list = data_set[1]
  model = mx_model
  embeddings_list = []
  if data_extra is not None:
    _data_extra = nd.array(data_extra)
  time_consumed = 0.0
  if label_shape is None:
    _label = nd.ones( (batch_size,) )
  else:
    _label = nd.ones( label_shape )
  for i in xrange( len(data_list) ):
    data = data_list[i]
    embeddings = None
    ba = 0
    while ba<data.shape[0]:
      bb = min(ba+batch_size, data.shape[0])
      count = bb-ba
      _data = nd.slice_axis(data, axis=0, begin=bb-batch_size, end=bb)
      #print(_data.shape, _label.shape)
      time0 = datetime.datetime.now()
      if data_extra is None:
        db = mx.io.DataBatch(data=(_data,), label=(_label,))
      else:
        db = mx.io.DataBatch(data=(_data,_data_extra), label=(_label,))
      model.forward(db, is_train=False)
      net_out = model.get_outputs()
      _embeddings = net_out[0].asnumpy()
      time_now = datetime.datetime.now()
      diff = time_now - time0
      time_consumed+=diff.total_seconds()
      if embeddings is None:
        embeddings = np.zeros( (data.shape[0], _embeddings.shape[1]) )
      embeddings[ba:bb,:] = _embeddings[(batch_size-count):,:]
      ba = bb
    embeddings_list.append(embeddings)
  embeddings = embeddings_list[0] + embeddings_list[1]
  embeddings = sklearn.preprocessing.normalize(embeddings)
  actual_issame = np.asarray(issame_list)
  outname = os.path.join('temp.bin')
  with open(outname, 'wb') as f:
    pickle.dump((embeddings, issame_list), f, protocol=pickle.HIGHEST_PROTOCOL)
예제 #19
0
    def lfw_test(nbatch):
      print('testing lfw..')
      embeddings_list = []
      for i in xrange( len(lfw_data_list) ):
        lfw_data = lfw_data_list[i]
        embeddings = None
        ba = 0
        while ba<lfw_data.shape[0]:
          bb = min(ba+args.batch_size, lfw_data.shape[0])
          _data = nd.slice_axis(lfw_data, axis=0, begin=ba, end=bb)
          _label = nd.ones( (bb-ba,) )
          db = mx.io.DataBatch(data=(_data,), label=(_label,))
          model.forward(db, is_train=False)
          net_out = model.get_outputs()
          _embeddings = net_out[0].asnumpy()
          if embeddings is None:
            embeddings = np.zeros( (lfw_data.shape[0], _embeddings.shape[1]) )
          embeddings[ba:bb,:] = _embeddings
          ba = bb
        embeddings_list.append(embeddings)

      acc_list = []
      embeddings = embeddings_list[0]
      _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, issame_list, nrof_folds=10)
      acc_list.append(np.mean(accuracy))
      print('[%d]Accuracy: %1.3f+-%1.3f' % (nbatch, np.mean(accuracy), np.std(accuracy)))
      print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
      embeddings = np.concatenate(embeddings_list, axis=1)
      embeddings = sklearn.preprocessing.normalize(embeddings)
      print(embeddings.shape)
      _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, issame_list, nrof_folds=10)
      acc_list.append(np.mean(accuracy))
      print('[%d]Accuracy-Flip: %1.3f+-%1.3f' % (nbatch, np.mean(accuracy), np.std(accuracy)))
      print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
      pca = PCA(n_components=128)
      embeddings = pca.fit_transform(embeddings)
      embeddings = sklearn.preprocessing.normalize(embeddings)
      print(embeddings.shape)
      _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, issame_list, nrof_folds=10)
      acc_list.append(np.mean(accuracy))
      print('[%d]Accuracy-PCA: %1.3f+-%1.3f' % (nbatch, np.mean(accuracy), np.std(accuracy)))
      print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
      return max(*acc_list)
예제 #20
0
def yolo2_target(scores, output, labels, anchors, ignore_label=-1, thresh=0.5):
    """Generate training targets given predictions and labels."""
    b, h, w, n, _ = scores.shape
    anchors = np.reshape(np.array(anchors), (-1, 2))
    #scores = nd.slice_axis(outputs, begin=1, end=2, axis=-1)
    #boxes = nd.slice_axis(outputs, begin=2, end=6, axis=-1)
    gt_boxes = nd.slice_axis(labels, begin=1, end=5, axis=-1)
    target_score = nd.zeros((b, h, w, n, 1), ctx=scores.context)
    target_id = nd.ones_like(target_score, ctx=scores.context) * ignore_label
    target_box = nd.zeros((b, h, w, n, 4), ctx=scores.context)
    sample_weight = nd.zeros((b, h, w, n, 1), ctx=scores.context)
    for b in range(output.shape[0]):
        # find the best match for each ground-truth
        label = labels[b].asnumpy()
        valid_label = label[np.where(label[:, 0] > -0.5)[0], :]
        # shuffle because multi gt could possibly match to one anchor, we keep the last match randomly
        np.random.shuffle(valid_label)
        for l in valid_label:
            gx, gy, gw, gh = (l[1] + l[3]) / 2, (
                l[2] + l[4]) / 2, l[3] - l[1], l[4] - l[2]
            ind_x = int(gx * w)
            ind_y = int(gy * h)
            tx = gx * w - ind_x
            ty = gy * h - ind_y
            gw = gw * w
            gh = gh * h
            # find the best match using width and height only, assuming centers are identical
            intersect = np.minimum(anchors[:, 0], gw) * np.minimum(
                anchors[:, 1], gh)
            ovps = intersect / (gw * gh + anchors[:, 0] * anchors[:, 1] -
                                intersect)
            best_match = int(np.argmax(ovps))
            target_id[b, ind_y, ind_x, best_match, :] = l[0]
            target_score[b, ind_y, ind_x, best_match, :] = 1.0
            tw = np.log(gw / anchors[best_match, 0])
            th = np.log(gh / anchors[best_match, 1])
            target_box[b, ind_y, ind_x,
                       best_match, :] = mx.nd.array([tx, ty, tw, th])
            sample_weight[b, ind_y, ind_x, best_match, :] = 1.0
            # print('ind_y', ind_y, 'ind_x', ind_x, 'best_match', best_match, 't', tx, ty, tw, th, 'ovp', ovps[best_match], 'gt', gx, gy, gw/w, gh/h, 'anchor', anchors[best_match, 0], anchors[best_match, 1])
    return target_id, target_score, target_box, sample_weight
예제 #21
0
def evaluate_accuracy(data_iterator , network , ctx , dataset):
    numerator = 0
    denominator = 0

    for data, label in data_iterator:

        if dataset=="CIFAR10":
            data = nd.slice_axis(data=data, axis=3, begin=0, end=1)

        data = data.as_in_context(ctx).reshape((data.shape[0],-1))
        label = label.as_in_context(ctx)
        output = network(data) # when test , 'Dropout rate' must be 0.0

        predictions = nd.argmax(output, axis=1) # (batch_size , num_outputs)

        predictions=predictions.asnumpy()
        label=label.asnumpy()
        numerator += sum(predictions == label)
        denominator += data.shape[0]

    return (numerator / denominator)
예제 #22
0
def get_data(path, batch_size, use_bgr=False):
    image_size = (112, 112)
    data_set = verification.load_bin(path, image_size)

    data_list, issame_list = data_set
    _label = nd.ones((batch_size, 1))
    for i in range(1):  #len(data_list)):
        data = data_list[i]
        print('datasize: ', len(data))
        embeddings = None
        ba = 0
        while ba < data.shape[0]:
            bb = min(ba + batch_size, data.shape[0])
            count = bb - ba
            _data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb)
            if use_bgr:
                _data = _data[:, ::-1, :, :]
            #print(_data.shape, _label.shape)
            db = mx.io.DataBatch(data=(_data, ))  #, label=(_label,))
            ba = bb
            yield (db, count)
예제 #23
0
    def gen_noise(self):
        self.b = nd.array(pre_fir().reshape((-1, 1)), ctx=ctx)
        self.pp = pre_fftfilt(self.b,
                              shape=(self.noiseAll_size, self.train.shape[-1]),
                              nfft=None)

        if ctx == mx.gpu():
            if self.localnoise is not None:
                print('local noise dataset!')
                # noise = nd.random.shuffle(self.localnoise).as_in_context(mx.gpu())
                # noise = self.localnoise.as_in_context(mx.gpu())
                return nd.slice_axis(data=self.localnoise.as_in_context(
                    mx.gpu()),
                                     axis=0,
                                     begin=0,
                                     end=self.noiseAll_size)
            noise = GenNoise_matlab_nd(shape=(self.noiseAll_size,
                                              self.train.shape[-1]),
                                       params=self.pp)
        else:
            raise
        return noise
예제 #24
0
def train_net(args):
    ctx = []
    cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
    if len(cvd)>0:
      for i in xrange(len(cvd.split(','))):
        ctx.append(mx.gpu(i))
    if len(ctx)==0:
      ctx = [mx.cpu()]
      print('use cpu')
    else:
      print('gpu num:', len(ctx))
    prefix = args.prefix
    prefix_dir = os.path.dirname(prefix)
    if not os.path.exists(prefix_dir):
      os.makedirs(prefix_dir)
    end_epoch = args.end_epoch
    args.ctx_num = len(ctx)
    args.num_layers = int(args.network[1:])
    print('num_layers', args.num_layers)
    if args.per_batch_size==0:
      args.per_batch_size = 128
    args.batch_size = args.per_batch_size*args.ctx_num
    args.image_channel = 3

    data_dir = args.data_dir
    if args.task=='gender':
      data_dir = args.gender_data_dir
    elif args.task=='age':
      data_dir = args.age_data_dir
    print('data dir', data_dir)
    path_imgrec = None
    path_imglist = None
    prop = face_image.load_property(data_dir)
    args.num_classes = prop.num_classes
    image_size = prop.image_size
    args.image_h = image_size[0]
    args.image_w = image_size[1]
    print('image_size', image_size)
    assert(args.num_classes>0)
    print('num_classes', args.num_classes)
    path_imgrec = os.path.join(data_dir, "train.rec")


    print('Called with argument:', args)
    data_shape = (args.image_channel,image_size[0],image_size[1])
    mean = None

    begin_epoch = 0
    net = get_model()
    #if args.task=='':
    #  test_net = get_model_test(net)
    #print(net.__class__)
    #net = net0[0]
    if args.network[0]=='r' or args.network[0]=='y':
      initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style
    elif args.network[0]=='i' or args.network[0]=='x':
      initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception
    else:
      initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2)
    net.hybridize()
    if args.mode=='gluon':
      if len(args.pretrained)==0:
        pass
      else:
        net.load_params(args.pretrained, allow_missing=True, ignore_extra = True)
      net.initialize(initializer)
      net.collect_params().reset_ctx(ctx)

    val_iter = None
    if args.task=='':
      train_iter = FaceImageIter(
          batch_size           = args.batch_size,
          data_shape           = data_shape,
          path_imgrec          = path_imgrec,
          shuffle              = True,
          rand_mirror          = args.rand_mirror,
          mean                 = mean,
          cutoff               = args.cutoff,
      )
    else:
      train_iter = FaceImageIterAge(
          batch_size           = args.batch_size,
          data_shape           = data_shape,
          path_imgrec          = path_imgrec,
          task                 = args.task,
          shuffle              = True,
          rand_mirror          = args.rand_mirror,
          mean                 = mean,
          cutoff               = args.cutoff,
      )

    if args.task=='age':
      metric = CompositeEvalMetric([MAEMetric(), CUMMetric()])
    elif args.task=='gender':
      metric = CompositeEvalMetric([AccMetric()])
    else:
      metric = CompositeEvalMetric([AccMetric()])

    ver_list = []
    ver_name_list = []
    if args.task=='':
      for name in args.eval.split(','):
        path = os.path.join(data_dir,name+".bin")
        if os.path.exists(path):
          data_set = verification.load_bin(path, image_size)
          ver_list.append(data_set)
          ver_name_list.append(name)
          print('ver', name)

    def ver_test(nbatch):
      results = []
      for i in xrange(len(ver_list)):
        acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], net, ctx, batch_size = args.batch_size)
        print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
        #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
        print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2))
        results.append(acc2)
      return results

    def val_test(nbatch=0):
      acc = 0.0
      #if args.task=='age':
      if len(args.age_data_dir)>0:
        val_iter = FaceImageIterAge(
            batch_size           = args.batch_size,
            data_shape           = data_shape,
            path_imgrec          = os.path.join(args.age_data_dir, 'val.rec'),
            task                 = args.task,
            shuffle              = False,
            rand_mirror          = False,
            mean                 = mean,
        )
        _metric = MAEMetric()
        val_metric = mx.metric.create(_metric)
        val_metric.reset()
        _metric2 = CUMMetric()
        val_metric2 = mx.metric.create(_metric2)
        val_metric2.reset()
        val_iter.reset()
        for batch in val_iter:
            data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
            outputs = []
            for x in data:
                outputs.append(net(x)[2])
            val_metric.update(label, outputs)
            val_metric2.update(label, outputs)
        _value = val_metric.get_name_value()[0][1]
        print('[%d][VMAE]: %f'%(nbatch, _value))
        _value = val_metric2.get_name_value()[0][1]
        if args.task=='age':
          acc = _value
        print('[%d][VCUM]: %f'%(nbatch, _value))
      if len(args.gender_data_dir)>0:
        val_iter = FaceImageIterAge(
            batch_size           = args.batch_size,
            data_shape           = data_shape,
            path_imgrec          = os.path.join(args.gender_data_dir, 'val.rec'),
            task                 = args.task,
            shuffle              = False,
            rand_mirror          = False,
            mean                 = mean,
        )
        _metric = AccMetric()
        val_metric = mx.metric.create(_metric)
        val_metric.reset()
        val_iter.reset()
        for batch in val_iter:
            data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
            outputs = []
            for x in data:
                outputs.append(net(x)[1])
            val_metric.update(label, outputs)
        _value = val_metric.get_name_value()[0][1]
        if args.task=='gender':
          acc = _value
        print('[%d][VACC]: %f'%(nbatch, _value))
      return acc


    total_time = 0
    num_epochs = 0
    best_acc = [0]
    highest_acc = [0.0, 0.0]  #lfw and target
    global_step = [0]
    save_step = [0]
    if len(args.lr_steps)==0:
      lr_steps = [100000, 140000, 160000]
      p = 512.0/args.batch_size
      for l in xrange(len(lr_steps)):
        lr_steps[l] = int(lr_steps[l]*p)
    else:
      lr_steps = [int(x) for x in args.lr_steps.split(',')]
    print('lr_steps', lr_steps)

    kv = mx.kv.create('device')
    #kv = mx.kv.create('local')
    #_rescale = 1.0/args.ctx_num
    #opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale)
    #opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd)
    if args.mode=='gluon':
      trainer = gluon.Trainer(net.collect_params(), 'sgd', 
              {'learning_rate': args.lr, 'wd': args.wd, 'momentum': args.mom, 'multi_precision': True},
              kvstore=kv)
    else:
      _rescale = 1.0/args.ctx_num
      opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale)
      _cb = mx.callback.Speedometer(args.batch_size, 20)
      arg_params = None
      aux_params = None
      data = mx.sym.var('data')
      label = mx.sym.var('softmax_label')
      if args.margin_a>0.0:
        fc7 = net(data, label)
      else:
        fc7 = net(data)
      #sym = mx.symbol.SoftmaxOutput(data=fc7, label = label, name='softmax', normalization='valid')
      ceop = gluon.loss.SoftmaxCrossEntropyLoss()
      loss = ceop(fc7, label) 
      #loss = loss/args.per_batch_size
      loss = mx.sym.mean(loss)
      sym = mx.sym.Group( [mx.symbol.BlockGrad(fc7), mx.symbol.MakeLoss(loss, name='softmax')] )

    def _batch_callback():
      mbatch = global_step[0]
      global_step[0]+=1
      for _lr in lr_steps:
        if mbatch==_lr:
          args.lr *= 0.1
          if args.mode=='gluon':
            trainer.set_learning_rate(args.lr)
          else:
            opt.lr  = args.lr
          print('lr change to', args.lr)
          break

      #_cb(param)
      if mbatch%1000==0:
        print('lr-batch-epoch:',args.lr, mbatch)

      if mbatch>0 and mbatch%args.verbose==0:
        save_step[0]+=1
        msave = save_step[0]
        do_save = False
        is_highest = False
        if args.task=='age' or args.task=='gender':
          acc = val_test(mbatch)
          if acc>=highest_acc[-1]:
            highest_acc[-1] = acc
            is_highest = True
            do_save = True
        else:
          acc_list = ver_test(mbatch)
          if len(acc_list)>0:
            lfw_score = acc_list[0]
            if lfw_score>highest_acc[0]:
              highest_acc[0] = lfw_score
              if lfw_score>=0.998:
                do_save = True
            if acc_list[-1]>=highest_acc[-1]:
              highest_acc[-1] = acc_list[-1]
              if lfw_score>=0.99:
                do_save = True
                is_highest = True
        if args.ckpt==0:
          do_save = False
        elif args.ckpt>1:
          do_save = True
        if do_save:
          print('saving', msave)
          #print('saving gluon params')
          fname = os.path.join(args.prefix, 'model-gluon.params')
          net.save_params(fname)
          fname = os.path.join(args.prefix, 'model')
          net.export(fname, msave)
          #arg, aux = model.get_params()
          #mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux)
        print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[-1]))
      if args.max_steps>0 and mbatch>args.max_steps:
        sys.exit(0)

    def _batch_callback_sym(param):
      _cb(param)
      _batch_callback()


    if args.mode!='gluon':
      model = mx.mod.Module(
          context       = ctx,
          symbol        = sym,
      )
      model.fit(train_iter,
          begin_epoch        = 0,
          num_epoch          = args.end_epoch,
          eval_data          = None,
          eval_metric        = metric,
          kvstore            = 'device',
          optimizer          = opt,
          initializer        = initializer,
          arg_params         = arg_params,
          aux_params         = aux_params,
          allow_missing      = True,
          batch_end_callback = _batch_callback_sym,
          epoch_end_callback = None )
    else:
      loss_weight = 1.0
      if args.task=='age':
        loss_weight = 1.0/AGE
      #loss = gluon.loss.SoftmaxCrossEntropyLoss(weight = loss_weight)
      loss = nd.SoftmaxOutput
      #loss = gluon.loss.SoftmaxCrossEntropyLoss()
      while True:
          #trainer = update_learning_rate(opt.lr, trainer, epoch, opt.lr_factor, lr_steps)
          tic = time.time()
          train_iter.reset()
          metric.reset()
          btic = time.time()
          for i, batch in enumerate(train_iter):
              _batch_callback()
              #data = gluon.utils.split_and_load(batch.data[0].astype(opt.dtype), ctx_list=ctx, batch_axis=0)
              #label = gluon.utils.split_and_load(batch.label[0].astype(opt.dtype), ctx_list=ctx, batch_axis=0)
              data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
              label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
              outputs = []
              Ls = []
              with ag.record():
                  for x, y in zip(data, label):
                      #print(y.asnumpy())
                      if args.task=='':
                        if args.margin_a>0.0:
                          z = net(x,y)
                        else:
                          z = net(x)
                        #print(z[0].shape, z[1].shape)
                      else:
                        z = net(x)
                      if args.task=='gender':
                        L = loss(z[1], y)
                        #L = L/args.per_batch_size
                        Ls.append(L)
                        outputs.append(z[1])
                      elif args.task=='age':
                        for k in xrange(AGE):
                          _z = nd.slice_axis(z[2], axis=1, begin=k*2, end=k*2+2)
                          _y = nd.slice_axis(y, axis=1, begin=k, end=k+1)
                          _y = nd.flatten(_y)
                          L = loss(_z, _y)
                          #L = L/args.per_batch_size
                          #L /= AGE
                          Ls.append(L)
                        outputs.append(z[2])
                      else:
                        L = loss(z, y)
                        #L = L/args.per_batch_size
                        Ls.append(L)
                        outputs.append(z)
                      # store the loss and do backward after we have done forward
                      # on all GPUs for better speed on multiple GPUs.
                  ag.backward(Ls)
              #trainer.step(batch.data[0].shape[0], ignore_stale_grad=True)
              #trainer.step(args.ctx_num)
              n = batch.data[0].shape[0]
              #print(n,n)
              trainer.step(n)
              metric.update(label, outputs)
              if i>0 and i%20==0:
                  name, acc = metric.get()
                  if len(name)==2:
                    logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f, %s=%f'%(
                                   num_epochs, i, args.batch_size/(time.time()-btic), name[0], acc[0], name[1], acc[1]))
                  else:
                    logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f'%(
                                   num_epochs, i, args.batch_size/(time.time()-btic), name[0], acc[0]))
                  #metric.reset()
              btic = time.time()

          epoch_time = time.time()-tic

          # First epoch will usually be much slower than the subsequent epics,
          # so don't factor into the average
          if num_epochs > 0:
            total_time = total_time + epoch_time

          #name, acc = metric.get()
          #logger.info('[Epoch %d] training: %s=%f, %s=%f'%(num_epochs, name[0], acc[0], name[1], acc[1]))
          logger.info('[Epoch %d] time cost: %f'%(num_epochs, epoch_time))
          num_epochs = num_epochs + 1
          #name, val_acc = test(ctx, val_data)
          #logger.info('[Epoch %d] validation: %s=%f, %s=%f'%(epoch, name[0], val_acc[0], name[1], val_acc[1]))

          # save model if meet requirements
          #save_checkpoint(epoch, val_acc[0], best_acc)
      if num_epochs > 1:
          print('Average epoch time: {}'.format(float(total_time)/(num_epochs - 1)))
예제 #25
0
def test(data_set, net, ctx, batch_size, nfolds=10):
  print('testing verification..')
  data_list = data_set[0]
  issame_list = data_set[1]
  embeddings_list = []
  time_consumed = 0.0
  for i in xrange( len(data_list) ):
    data = data_list[i]
    embeddings = None
    ba = 0
    while ba<data.shape[0]:
      bb = min(ba+batch_size, data.shape[0])
      count = bb-ba
      #print(ba, bb)
      x = nd.slice_axis(data, axis=0, begin=bb-batch_size, end=bb)
      #print(_data.shape, _label.shape)
      time0 = datetime.datetime.now()
      #x = x.as_in_context(ctx[0])
      xs = gluon.utils.split_and_load(x, ctx_list=ctx, batch_axis=0)
      zs = []
      for x in xs:
        with mx.autograd.predict_mode():
          z = net.feature(x)
        zs.append(z)
      zss = []
      for z in zs:
        zss.append(z.asnumpy())
      zss = np.concatenate(zss, axis=0)
      #print(zss.shape)
      _embeddings = zss
      #_arg, _aux = model.get_params()
      #__arg = {}
      #for k,v in _arg.iteritems():
      #  __arg[k] = v.as_in_context(_ctx)
      #_arg = __arg
      #_arg["data"] = _data.as_in_context(_ctx)
      #_arg["softmax_label"] = _label.as_in_context(_ctx)
      #for k,v in _arg.iteritems():
      #  print(k,v.context)
      #exe = sym.bind(_ctx, _arg ,args_grad=None, grad_req="null", aux_states=_aux)
      #exe.forward(is_train=False)
      #net_out = exe.outputs
      #_embeddings = z.asnumpy()
      time_now = datetime.datetime.now()
      diff = time_now - time0
      time_consumed+=diff.total_seconds()
      #print(_embeddings.shape)
      if embeddings is None:
        embeddings = np.zeros( (data.shape[0], _embeddings.shape[1]) )
      embeddings[ba:bb,:] = _embeddings[(batch_size-count):,:]
      ba = bb
    embeddings_list.append(embeddings)

  _xnorm = 0.0
  _xnorm_cnt = 0
  for embed in embeddings_list:
    for i in xrange(embed.shape[0]):
      _em = embed[i]
      _norm=np.linalg.norm(_em)
      #print(_em.shape, _norm)
      _xnorm+=_norm
      _xnorm_cnt+=1
  _xnorm /= _xnorm_cnt

  embeddings = embeddings_list[0].copy()
  embeddings = sklearn.preprocessing.normalize(embeddings)
  acc1 = 0.0
  std1 = 0.0
  #_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)
  #acc1, std1 = np.mean(accuracy), np.std(accuracy)

  #print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
  #embeddings = np.concatenate(embeddings_list, axis=1)
  embeddings = embeddings_list[0] + embeddings_list[1]
  embeddings = sklearn.preprocessing.normalize(embeddings)
  print(embeddings.shape)
  print('infer time', time_consumed)
  _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
  acc2, std2 = np.mean(accuracy), np.std(accuracy)
  return acc1, std1, acc2, std2, _xnorm, embeddings_list
예제 #26
0
    cls_id = cls_ids.slice_axis(axis=-1, begin=i, end=i + 1)
    score = scores.slice_axis(axis=-1, begin=i, end=i + 1)
    # per class results
    per_result = concat(*[cls_id, score, bboxes], dim=-1)
    results.append(per_result)
result = concat(*results, dim=1)
if nms_thresh > 0 and nms_thresh < 1:
    result = box_nms(result,
                     overlap_thresh=nms_thresh,
                     topk=nms_topk,
                     valid_thresh=0.01,
                     id_index=0,
                     score_index=1,
                     coord_start=2,
                     force_suppress=False)
    if post_nms > 0:
        result = result.slice_axis(axis=1, begin=0, end=post_nms)

class_IDs = slice_axis(result, axis=2, begin=0, end=1)
scores = slice_axis(result, axis=2, begin=1, end=2)
bounding_boxs = slice_axis(result, axis=2, begin=2, end=6)

#%% 显示输出
ax = utils.viz.plot_bbox(img,
                         bounding_boxs[0],
                         scores[0],
                         class_IDs[0],
                         class_names=net.classes,
                         thresh=0.5)
plt.show()
예제 #27
0
def test(data_set, mx_model, batch_size, nfolds=10, data_extra = None, label_shape = None):
  print('testing verification..')
  data_list = data_set[0]
  issame_list = data_set[1]
  model = mx_model
  embeddings_list = []
  if data_extra is not None:
    _data_extra = nd.array(data_extra)
  time_consumed = 0.0
  if label_shape is None:
    _label = nd.ones( (batch_size,) )
  else:
    _label = nd.ones( label_shape )
  for i in xrange( len(data_list) ):
    data = data_list[i]
    embeddings = None
    ba = 0
    while ba<data.shape[0]:
      bb = min(ba+batch_size, data.shape[0])
      count = bb-ba
      _data = nd.slice_axis(data, axis=0, begin=bb-batch_size, end=bb)
      #print(_data.shape, _label.shape)
      time0 = datetime.datetime.now()
      if data_extra is None:
        db = mx.io.DataBatch(data=(_data,), label=(_label,))
      else:
        db = mx.io.DataBatch(data=(_data,_data_extra), label=(_label,))
      model.forward(db, is_train=False)
      net_out = model.get_outputs()
      #_arg, _aux = model.get_params()
      #__arg = {}
      #for k,v in _arg.iteritems():
      #  __arg[k] = v.as_in_context(_ctx)
      #_arg = __arg
      #_arg["data"] = _data.as_in_context(_ctx)
      #_arg["softmax_label"] = _label.as_in_context(_ctx)
      #for k,v in _arg.iteritems():
      #  print(k,v.context)
      #exe = sym.bind(_ctx, _arg ,args_grad=None, grad_req="null", aux_states=_aux)
      #exe.forward(is_train=False)
      #net_out = exe.outputs
      _embeddings = net_out[0].asnumpy()
      time_now = datetime.datetime.now()
      diff = time_now - time0
      time_consumed+=diff.total_seconds()
      #print(_embeddings.shape)
      if embeddings is None:
        embeddings = np.zeros( (data.shape[0], _embeddings.shape[1]) )
      embeddings[ba:bb,:] = _embeddings[(batch_size-count):,:]
      ba = bb
    embeddings_list.append(embeddings)

  _xnorm = 0.0
  _xnorm_cnt = 0
  for embed in embeddings_list:
    for i in xrange(embed.shape[0]):
      _em = embed[i]
      _norm=np.linalg.norm(_em)
      #print(_em.shape, _norm)
      _xnorm+=_norm
      _xnorm_cnt+=1
  _xnorm /= _xnorm_cnt

  embeddings = embeddings_list[0].copy()
  embeddings = sklearn.preprocessing.normalize(embeddings)
  acc1 = 0.0
  std1 = 0.0
  #_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)
  #acc1, std1 = np.mean(accuracy), np.std(accuracy)

  #print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
  #embeddings = np.concatenate(embeddings_list, axis=1)
  embeddings = embeddings_list[0] + embeddings_list[1]
  embeddings = sklearn.preprocessing.normalize(embeddings)
  print(embeddings.shape)
  print('infer time', time_consumed)
  _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
  acc2, std2 = np.mean(accuracy), np.std(accuracy)
  return acc1, std1, acc2, std2, _xnorm, embeddings_list
예제 #28
0
def argtopk(input, k, dim, descending=True):
    idx = nd.argsort(input, dim, is_ascend=not descending)
    return nd.slice_axis(input, dim, 0, k)
예제 #29
0
def test_badcase(data_set, mx_model, batch_size, name='', data_extra = None, label_shape = None):
  print('testing verification badcase..')
  data_list = data_set[0]
  issame_list = data_set[1]
  model = mx_model
  embeddings_list = []
  if data_extra is not None:
    _data_extra = nd.array(data_extra)
  time_consumed = 0.0
  if label_shape is None:
    _label = nd.ones( (batch_size,) )
  else:
    _label = nd.ones( label_shape )
  for i in xrange( len(data_list) ):
    data = data_list[i]
    embeddings = None
    ba = 0
    while ba<data.shape[0]:
      bb = min(ba+batch_size, data.shape[0])
      count = bb-ba
      _data = nd.slice_axis(data, axis=0, begin=bb-batch_size, end=bb)
      #print(_data.shape, _label.shape)
      time0 = datetime.datetime.now()
      if data_extra is None:
        db = mx.io.DataBatch(data=(_data,), label=(_label,))
      else:
        db = mx.io.DataBatch(data=(_data,_data_extra), label=(_label,))
      model.forward(db, is_train=False)
      net_out = model.get_outputs()
      _embeddings = net_out[0].asnumpy()
      time_now = datetime.datetime.now()
      diff = time_now - time0
      time_consumed+=diff.total_seconds()
      if embeddings is None:
        embeddings = np.zeros( (data.shape[0], _embeddings.shape[1]) )
      embeddings[ba:bb,:] = _embeddings[(batch_size-count):,:]
      ba = bb
    embeddings_list.append(embeddings)
  embeddings = embeddings_list[0] + embeddings_list[1]
  embeddings = sklearn.preprocessing.normalize(embeddings)
  thresholds = np.arange(0, 4, 0.01)
  actual_issame = np.asarray(issame_list)
  nrof_folds = 10
  embeddings1 = embeddings[0::2]
  embeddings2 = embeddings[1::2]
  assert(embeddings1.shape[0] == embeddings2.shape[0])
  assert(embeddings1.shape[1] == embeddings2.shape[1])
  nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
  nrof_thresholds = len(thresholds)
  k_fold = LFold(n_splits=nrof_folds, shuffle=False)
  
  tprs = np.zeros((nrof_folds,nrof_thresholds))
  fprs = np.zeros((nrof_folds,nrof_thresholds))
  accuracy = np.zeros((nrof_folds))
  indices = np.arange(nrof_pairs)
  
  diff = np.subtract(embeddings1, embeddings2)
  dist = np.sum(np.square(diff),1)
  data = data_list[0]

  pouts = []
  nouts = []
  
  for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
       
      # Find the best threshold for the fold
      acc_train = np.zeros((nrof_thresholds))
      #print(train_set)
      #print(train_set.__class__)
      for threshold_idx, threshold in enumerate(thresholds):
          p2 = dist[train_set]
          p3 = actual_issame[train_set]
          _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, p2, p3)
      best_threshold_index = np.argmax(acc_train)
      for threshold_idx, threshold in enumerate(thresholds):
          tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
      _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
      best_threshold = thresholds[best_threshold_index]
      for iid in test_set:
        ida = iid*2
        idb = ida+1
        asame = actual_issame[iid]
        _dist = dist[iid]
        violate = _dist - best_threshold
        if not asame:
          violate *= -1.0
        if violate>0.0:
          imga = data[ida].asnumpy().transpose( (1,2,0) )[...,::-1] #to bgr
          imgb = data[idb].asnumpy().transpose( (1,2,0) )[...,::-1]
          #print(imga.shape, imgb.shape, violate, asame, _dist)
          if asame:
            pouts.append( (imga, imgb, _dist, best_threshold, ida) )
          else:
            nouts.append( (imga, imgb, _dist, best_threshold, ida) )

        
  tpr = np.mean(tprs,0)
  fpr = np.mean(fprs,0)
  acc = np.mean(accuracy)
  pouts = sorted(pouts, key = lambda x: x[2], reverse=True)
  nouts = sorted(nouts, key = lambda x: x[2], reverse=False)
  print(len(pouts), len(nouts))
  print('acc', acc)
  gap = 10
  image_shape = (112,224,3)
  out_dir = "./badcases"
  if not os.path.exists(out_dir):
    os.makedirs(out_dir)
  if len(nouts)>0:
    threshold = nouts[0][3]
  else:
    threshold = pouts[-1][3]
  
  for item in [(pouts, 'positive(false_negative).png'), (nouts, 'negative(false_positive).png')]:
    cols = 4
    rows = 8000
    outs = item[0]
    if len(outs)==0:
      continue
    #if len(outs)==9:
    #  cols = 3
    #  rows = 3

    _rows = int(math.ceil(len(outs)/cols))
    rows = min(rows, _rows)
    hack = {}

    if name.startswith('cfp') and item[1].startswith('pos'):
      hack = {0:'manual/238_13.jpg.jpg', 6:'manual/088_14.jpg.jpg', 10:'manual/470_14.jpg.jpg', 25:'manual/238_13.jpg.jpg', 28:'manual/143_11.jpg.jpg'}

    filename = item[1]
    if len(name)>0:
      filename = name+"_"+filename
    filename = os.path.join(out_dir, filename)
    img = np.zeros( (image_shape[0]*rows+20, image_shape[1]*cols+(cols-1)*gap, 3), dtype=np.uint8 )
    img[:,:,:] = 255
    text_color = (0,0,153)
    text_color = (255,178,102)
    text_color = (153,255,51)
    for outi, out in enumerate(outs):
      row = outi//cols
      col = outi%cols
      if row==rows:
        break
      imga = out[0].copy()
      imgb = out[1].copy()
      if outi in hack:
        idx = out[4]
        print('noise idx',idx)
        aa = hack[outi]
        imgb = cv2.imread(aa)
        #if aa==1:
        #  imgb = cv2.transpose(imgb)
        #  imgb = cv2.flip(imgb, 1)
        #elif aa==3:
        #  imgb = cv2.transpose(imgb)
        #  imgb = cv2.flip(imgb, 0)
        #else:
        #  for ii in xrange(2):
        #    imgb = cv2.transpose(imgb)
        #    imgb = cv2.flip(imgb, 1)
      dist = out[2]
      _img = np.concatenate( (imga, imgb), axis=1 )
      k = "%.3f"%dist
      #print(k)
      font = cv2.FONT_HERSHEY_SIMPLEX
      cv2.putText(_img,k,(80,image_shape[0]//2+7), font, 0.6, text_color, 2)
      #_filename = filename+"_%d.png"%outi
      #cv2.imwrite(_filename, _img)
      img[row*image_shape[0]:(row+1)*image_shape[0], (col*image_shape[1]+gap*col):((col+1)*image_shape[1]+gap*col),:] = _img
    #threshold = outs[0][3]
    font = cv2.FONT_HERSHEY_SIMPLEX
    k = "threshold: %.3f"%threshold
    cv2.putText(img,k,(img.shape[1]//2-70,img.shape[0]-5), font, 0.6, text_color, 2)
    cv2.imwrite(filename, img)
예제 #30
0
    def forward(self, input_vec, loss=None, training=True):
        # print('************* ' + str(input_vec.shape[1]) + ' *************')
        # print('############# ' + str(input_vec.shape) + ' #############')
        assert input_vec.shape[1] == self.input_dimension

        # get inputs for every slot(including global)
        inputs = {}
        for slot in self.slots:
            inputs[slot] = input_vec[:, self.slot_dimension[slot][0]:self.slot_dimension[slot][1]]
        input_global = []
        for seg in self.global_dimension:
            input_global.append(input_vec[:, seg[0]:seg[1]])
        inputs['global'] = nd.concat(*input_global, dim=1)

        layer = []
        # inputs -> first_hidden_layer
        if (not self.sort_input_vec) and self.state_feature != 'dip':
            layer.append([])
            for slot in self.slots:
                layer[0].append(self.input_trans[slot](inputs[slot]))
            layer[0].append(self.input_trans['global'](inputs['global']))
        elif self.state_feature == 'dip':
            sorted_inputs = []
            for slot in self.slots:
                sorted_inputs.append(inputs[slot])
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans.forward(sorted_inputs, loss, training=training))
        elif self.sort_input_vec:
            sorted_inputs = []
            for slot in self.slots:
                tmp = inputs[slot][:, :-2].sort(is_ascend=False)
                if tmp.shape[1] < 20:
                    tmp = nd.concat(tmp, nd.zeros((tmp.shape[0], 20 - tmp.shape[1]), ctx=CTX), dim=1)
                else:
                    tmp = nd.slice_axis(tmp, axis=1, begin=0, end=20)
                sorted_inputs.append(nd.concat(tmp, inputs[slot][:, -2:], dim=1))
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans.forward(sorted_inputs, loss, training=training))

        # hidden_layers
        for i in range(self.hidden_layers - 1):
            if self.recurrent_mode is False:
                # equal to 'layer.append(self.ma_trans[i](layer[-1], loss))'
                layer.append(self.ma_trans[i](layer[i], loss))
            else:
                layer.append(self.ma_trans(layer[i], loss))

        if self.share_last_layer is False:
            # dropout of last hidden layer
            for j in range(len(self.slots)):
                layer[-1][j] = self.local_out_drop_op.forward(layer[-1][j])
            layer[-1][-1] = self.global_out_drop_op.forward(layer[-1][-1])

            # last_hidden_layer -> outputs
            outputs = []
            slotv_probs = []
            slotqs = []
            slot_probs = []
            top_decision = []
            for i in range(len(self.slots) + 1):
                if self.use_dueling is False:
                    outputs.append(self.output_trans[i](layer[-1][i]))
                else:
                    if i < len(self.slots):
                        cur_slotv_prob = self.output_trans_local_valueP.forward(layer[-1][i], training=training)
                        cur_slotv_prob = nd.softmax(cur_slotv_prob)
                    else:
                        cur_slotv_prob = self.output_trans_global_valueP.forward(layer[-1][i], training=training)
                        cur_slotv_prob = nd.softmax(cur_slotv_prob)

                    if self.dueling_share_last:
                        if i < len(self.slots):
                            cur_slotq = self.output_trans_local_slotQ.forward(layer[-1][i], training=training)
                            cur_slot_prob = self.output_trans_local_slotP.forward(layer[-1][i], training=training).reshape(-1,1)
                            cur_slotv_prob = cur_slotv_prob*cur_slot_prob
                            # cur_slot_prob = nd.softmax(cur_slot_prob)
                            if self.shared_last_layer_use_bias:
                                cur_slotq = cur_slotq + nd.slice(self.value_bias_local.data(), begin=(i, ), end=(i + 1, ))
                        else:
                            cur_slotq = self.output_trans_global_slotQ.forward(layer[-1][i], training=training)
                            cur_slot_prob = self.output_trans_global_slotP.forward(layer[-1][i], training=training).reshape(-1,1)
                            cur_slotv_prob = cur_slotv_prob*cur_slot_prob
                            # cur_slot_prob = nd.softmax(cur_slot_prob)

                        top_decision.append(cur_slot_prob)
                    else:
                        cur_slotq = self.output_trans_value[i](layer[-1][i])

                    slotv_probs.append(cur_slotv_prob)
                    slot_probs.append(cur_slot_prob)
                    slotqs.append(cur_slotq)

            # batch_slotv_probs_list = []
            # slot_prob_softmax = nd.softmax(nd.concat(*slot_probs, dim=1))
            # slot_prob_split = nd.split(slot_prob_softmax, axis=1, num_outputs=len(self.slots)+1)
            # assert len(slotv_probs) == len(self.slots)+1
            # for i in range(len(slotv_probs)):
            #     tmp = slot_prob_split[i].reshape(-1,1)*slotv_probs[i]
            #     batch_slotv_probs_list.append(tmp)
            batch_slot_prob = nd.softmax(nd.concat(*slot_probs, dim=1))
            batch_slot_slotq = nd.concat(*slotqs, dim=1)
            batch_slotv_prob = nd.softmax(nd.concat(*slotv_probs, dim=1))
            batch_top_decision = nd.softmax(nd.concat(*top_decision,dim=1))

            # print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
            # print(batch_slotv_prob)
            # print(batch_slot_prob.shape)
            # print(batch_slot_slotq.shape)
            # print(batch_slotv_prob.shape)

            prob = batch_slotv_prob
            value = nd.max(batch_slot_slotq, axis=1)
            top_decision = batch_top_decision

            # CTname = threading.currentThread().getName()
            # print(CTname+' top decision is : ')
            # print(top_decision)

        return prob, value, top_decision
예제 #31
0
def test(lfw_set, mx_model, batch_size):
    print('testing lfw..')
    lfw_data_list = lfw_set[0]
    issame_list = lfw_set[1]
    model = mx_model
    embeddings_list = []
    for i in xrange(len(lfw_data_list)):
        lfw_data = lfw_data_list[i]
        embeddings = None
        ba = 0
        while ba < lfw_data.shape[0]:
            bb = min(ba + batch_size, lfw_data.shape[0])
            _data = nd.slice_axis(lfw_data, axis=0, begin=ba, end=bb)
            _label = nd.ones((bb - ba, ))
            # print(_data.shape, _label.shape)
            db = mx.io.DataBatch(data=(_data, ), label=(_label, ))
            model.forward(db, is_train=False)
            net_out = model.get_outputs()
            # _arg, _aux = model.get_params()
            # __arg = {}
            # for k,v in _arg.iteritems():
            #  __arg[k] = v.as_in_context(_ctx)
            # _arg = __arg
            # _arg["data"] = _data.as_in_context(_ctx)
            # _arg["softmax_label"] = _label.as_in_context(_ctx)
            # for k,v in _arg.iteritems():
            #  print(k,v.context)
            # exe = sym.bind(_ctx, _arg ,args_grad=None, grad_req="null", aux_states=_aux)
            # exe.forward(is_train=False)
            # net_out = exe.outputs
            _embeddings = net_out[0].asnumpy()
            # print(_embeddings.shape)
            if embeddings is None:
                embeddings = np.zeros(
                    (lfw_data.shape[0], _embeddings.shape[1]))
            embeddings[ba:bb, :] = _embeddings
            ba = bb
        embeddings_list.append(embeddings)

    _xnorm = 0.0
    _xnorm_cnt = 0
    for embed in embeddings_list:
        for i in xrange(embed.shape[0]):
            _em = embed[i]
            _norm = np.linalg.norm(_em)
            # print(_em.shape, _norm)
            _xnorm += _norm
            _xnorm_cnt += 1
    _xnorm /= _xnorm_cnt

    embeddings = embeddings_list[0].copy()
    embeddings = sklearn.preprocessing.normalize(embeddings)
    _, _, accuracy, val, val_std, far = evaluate(embeddings,
                                                 issame_list,
                                                 nrof_folds=10)
    acc1, std1 = np.mean(accuracy), np.std(accuracy)
    # print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
    # embeddings = np.concatenate(embeddings_list, axis=1)
    embeddings = embeddings_list[0] + embeddings_list[1]
    embeddings = sklearn.preprocessing.normalize(embeddings)
    print(embeddings.shape)
    _, _, accuracy, val, val_std, far = evaluate(embeddings,
                                                 issame_list,
                                                 nrof_folds=10)
    acc2, std2 = np.mean(accuracy), np.std(accuracy)
    return acc1, std1, acc2, std2, _xnorm, embeddings_list
예제 #32
0
def FNN(epoch = 100 , batch_size=128, save_period=10 , load_period=100 ,optimizer="sgd",learning_rate= 0.01 , dataset = "MNIST", ctx=mx.gpu(0)):

    #data selection
    if dataset =="MNIST":
        train_data , test_data = MNIST(batch_size)
        path = "weights/MNIST-{}.params".format(load_period)
    elif dataset == "CIFAR10":
        train_data, test_data = CIFAR10(batch_size)
        path = "weights/CIFAR10-{}.params".format(load_period)
    elif dataset == "FashionMNIST":
        train_data, test_data = FashionMNIST(batch_size)
        path = "weights/FashionMNIST-{}.params".format(load_period)
    else:
        return "The dataset does not exist."

    '''Follow these steps:

    •Define network
    •Initialize parameters
    •Loop over inputs
    •Forward input through network to get output
    •Compute loss with output and label
    •Backprop gradient
    •Update parameters with gradient descent.
    '''
    #Fully Neural Network with 1 Hidden layer 
    net = gluon.nn.Sequential() # stacks 'Block's sequentially
    with net.name_scope():
        net.add(gluon.nn.Dense(units=50 , activation="relu", use_bias=True))
        net.add(gluon.nn.Dropout(0.2))
        net.add(gluon.nn.Dense(10,use_bias=True))

    #weights initialization
    if os.path.exists(path):
        print("loading weights")
        net.load_params(filename=path , ctx=ctx) # weights load
    else:
        print("initializing weights")
        net.collect_params().initialize(mx.init.Normal(sigma=0.1),ctx=ctx) # weights initialization

    #optimizer
    trainer = gluon.Trainer(net.collect_params() , optimizer, {"learning_rate" : learning_rate})

    #learning
    for i in tqdm(range(1,epoch+1,1)):
        for data , label in train_data:

            if dataset == "CIFAR10":
                data = nd.slice_axis(data= data , axis=3 , begin = 0 , end=1)

            data = data.as_in_context(ctx).reshape((batch_size,-1))
            label = label.as_in_context(ctx)

            with autograd.record(train_mode=True):
                output=net(data)
                
                #loss definition
                loss=gluon.loss.SoftmaxCrossEntropyLoss()(output,label)
                cost=nd.mean(loss).asscalar()
            loss.backward()
            trainer.step(batch_size,ignore_stale_grad=True)

        print(" epoch : {} , last batch cost : {}".format(i,cost))

        #weight_save
        if i % save_period==0:

            if not os.path.exists("weights"):
                os.makedirs("weights")

            print("saving weights")
            if dataset=="MNIST":
                net.save_params("weights/MNIST-{}.params".format(i))

            elif dataset=="FashionMNIST":
                net.save_params("weights/FashionMNIST-{}.params".format(i))

            elif dataset=="CIFAR10":
                net.save_params("weights/CIFAR10-{}.params".format(i))

    test_accuracy = evaluate_accuracy(test_data , net , ctx , dataset)
    print("Test_acc : {}".format(test_accuracy))

    return "optimization completed"
예제 #33
0
파일: lfw.py 프로젝트: LHQ0308/insightface
def test(lfw_set, mx_model, batch_size):
  print('testing lfw..')
  lfw_data_list = lfw_set[0]
  issame_list = lfw_set[1]
  model = mx_model
  embeddings_list = []
  for i in xrange( len(lfw_data_list) ):
    lfw_data = lfw_data_list[i]
    embeddings = None
    ba = 0
    while ba<lfw_data.shape[0]:
      bb = min(ba+batch_size, lfw_data.shape[0])
      _data = nd.slice_axis(lfw_data, axis=0, begin=ba, end=bb)
      _label = nd.ones( (bb-ba,) )
      #print(_data.shape, _label.shape)
      db = mx.io.DataBatch(data=(_data,), label=(_label,))
      model.forward(db, is_train=False)
      net_out = model.get_outputs()
      #_arg, _aux = model.get_params()
      #__arg = {}
      #for k,v in _arg.iteritems():
      #  __arg[k] = v.as_in_context(_ctx)
      #_arg = __arg
      #_arg["data"] = _data.as_in_context(_ctx)
      #_arg["softmax_label"] = _label.as_in_context(_ctx)
      #for k,v in _arg.iteritems():
      #  print(k,v.context)
      #exe = sym.bind(_ctx, _arg ,args_grad=None, grad_req="null", aux_states=_aux)
      #exe.forward(is_train=False)
      #net_out = exe.outputs
      _embeddings = net_out[0].asnumpy()
      #print(_embeddings.shape)
      if embeddings is None:
        embeddings = np.zeros( (lfw_data.shape[0], _embeddings.shape[1]) )
      embeddings[ba:bb,:] = _embeddings
      ba = bb
    embeddings_list.append(embeddings)

  _xnorm = 0.0
  _xnorm_cnt = 0
  for embed in embeddings_list:
    for i in xrange(embed.shape[0]):
      _em = embed[i]
      _norm=np.linalg.norm(_em)
      #print(_em.shape, _norm)
      _xnorm+=_norm
      _xnorm_cnt+=1
  _xnorm /= _xnorm_cnt

  embeddings = embeddings_list[0].copy()
  embeddings = sklearn.preprocessing.normalize(embeddings)
  _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)
  acc1, std1 = np.mean(accuracy), np.std(accuracy)
  #print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
  #embeddings = np.concatenate(embeddings_list, axis=1)
  embeddings = embeddings_list[0] + embeddings_list[1]
  embeddings = sklearn.preprocessing.normalize(embeddings)
  print(embeddings.shape)
  _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)
  acc2, std2 = np.mean(accuracy), np.std(accuracy)
  return acc1, std1, acc2, std2, _xnorm, embeddings_list
def test(data_set, mx_model, batch_size, nfolds=10, data_extra = None, label_shape = None):
  print('testing verification..')
  data_list = data_set[0]
  issame_list = data_set[1]
  model = mx_model
  embeddings_list = []
  if data_extra is not None:
    _data_extra = nd.array(data_extra)
  time_consumed = 0.0
  if label_shape is None:
    _label = nd.ones( (batch_size,) )
  else:
    _label = nd.ones( label_shape )
  for i in xrange( len(data_list) ):
    data = data_list[i]
    embeddings = None
    ba = 0
    while ba<data.shape[0]:
      bb = min(ba+batch_size, data.shape[0])
      count = bb-ba
      _data = nd.slice_axis(data, axis=0, begin=bb-batch_size, end=bb)
      #print(_data.shape, _label.shape)
      time0 = datetime.datetime.now()
      if data_extra is None:
        db = mx.io.DataBatch(data=(_data,), label=(_label,))
      else:
        db = mx.io.DataBatch(data=(_data,_data_extra), label=(_label,))
      model.forward(db, is_train=False)
      net_out = model.get_outputs()
      #_arg, _aux = model.get_params()
      #__arg = {}
      #for k,v in _arg.iteritems():
      #  __arg[k] = v.as_in_context(_ctx)
      #_arg = __arg
      #_arg["data"] = _data.as_in_context(_ctx)
      #_arg["softmax_label"] = _label.as_in_context(_ctx)
      #for k,v in _arg.iteritems():
      #  print(k,v.context)
      #exe = sym.bind(_ctx, _arg ,args_grad=None, grad_req="null", aux_states=_aux)
      #exe.forward(is_train=False)
      #net_out = exe.faceEmbeddingModels
      _embeddings = net_out[0].asnumpy()
      time_now = datetime.datetime.now()
      diff = time_now - time0
      time_consumed+=diff.total_seconds()
      #print(_embeddings.shape)
      if embeddings is None:
        embeddings = np.zeros( (data.shape[0], _embeddings.shape[1]) )
      embeddings[ba:bb,:] = _embeddings[(batch_size-count):,:]
      ba = bb
    embeddings_list.append(embeddings)

  _xnorm = 0.0
  _xnorm_cnt = 0
  for embed in embeddings_list:
    for i in xrange(embed.shape[0]):
      _em = embed[i]
      _norm=np.linalg.norm(_em)
      #print(_em.shape, _norm)
      _xnorm+=_norm
      _xnorm_cnt+=1
  _xnorm /= _xnorm_cnt

  embeddings = embeddings_list[0].copy()
  embeddings = sklearn.preprocessing.normalize(embeddings)
  acc1 = 0.0
  std1 = 0.0
  #_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)
  #acc1, std1 = np.mean(accuracy), np.std(accuracy)

  #print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
  #embeddings = np.concatenate(embeddings_list, axis=1)
  embeddings = embeddings_list[0] + embeddings_list[1]
  embeddings = sklearn.preprocessing.normalize(embeddings)
  print(embeddings.shape)
  print('infer time', time_consumed)
  _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
  acc2, std2 = np.mean(accuracy), np.std(accuracy)
  return acc1, std1, acc2, std2, _xnorm, embeddings_list
예제 #35
0
def test(data_set,
         mx_model,
         batch_size,
         nfolds=10,
         data_extra=None,
         label_shape=None):
    # data_set 存放数据和标签的元组
    # print('mx_model:',mx_model)
    # sys.exit(0)
    # pdb.set_trace()
    print('testing verification..')
    data_list = data_set[
        0]  # data_list: 两个元素,每个元素为(12000L, 3L, 112L, 112L)的NDArray
    issame_list = data_set[1]  # issame_list: 长度为6000的列表,每个元素为bool
    model = mx_model
    embeddings_list = []
    if data_extra is not None:
        _data_extra = nd.array(data_extra)
    time_consumed = 0.0
    if label_shape is None:
        _label = nd.ones((batch_size, ))  # (32L,)的 1
    else:
        _label = nd.ones(label_shape)

    # print('len(data_list);',len(data_list))
    # sys.exit(0)
    # pdb.set_trace()
    for i in range(len(data_list)):  # 2
        data = data_list[i]  # (12000L, 3L, 112L, 112L)的NDArray

        embeddings = None
        ba = 0
        while ba < data.shape[0]:  # 12000
            bb = min(ba + batch_size, data.shape[0])  # 32,64,96,128,160,192...
            count = bb - ba  # 32,32,...

            _data = nd.slice_axis(data, axis=0, begin=bb - batch_size,
                                  end=bb)  # (32L, 3L, 112L, 112L)
            # print('_data.shape, _label.shape:',_data.shape, _label.shape) #(64, 3, 112, 112) (64,)
            time0 = datetime.datetime.now()
            if data_extra is None:
                db = mx.io.DataBatch(data=(_data, ), label=(_label, ))
            else:
                db = mx.io.DataBatch(
                    data=(_data, _data_extra), label=(_label, )
                )  # DataBatch: data shapes: [(32L, 3L, 112L, 112L)] label shapes: [(32L,)]

            model.forward(db, is_train=False)
            net_out = model.get_outputs(
            )  # len(net_out)==1,其中的元素为(32L, 512L)的NDArray

            # _arg, _aux = model.get_params()
            # __arg = {}
            # for k,v in _arg.iteritems():
            #  __arg[k] = v.as_in_context(_ctx)
            # _arg = __arg
            # _arg["data"] = _data.as_in_context(_ctx)
            # _arg["softmax_label"] = _label.as_in_context(_ctx)
            # for k,v in _arg.iteritems():
            #  print(k,v.context)
            # exe = sym.bind(_ctx, _arg ,args_grad=None, grad_req="null", aux_states=_aux)
            # exe.forward(is_train=False)
            # net_out = exe.outputs
            _embeddings = net_out[0].asnumpy()
            time_now = datetime.datetime.now()
            diff = time_now - time0
            time_consumed += diff.total_seconds()
            # print('_embeddings.shape:',_embeddings.shape)
            if embeddings is None:
                embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
            embeddings[ba:bb, :] = _embeddings[(batch_size -
                                                count):, :]  # (12000, 512)
            ba = bb
        embeddings_list.append(embeddings)  # 循环执行完长度为2,保存imgs和imgs_flip
    # pdb.set_trace()
    _xnorm = 0.0
    _xnorm_cnt = 0
    for embed in embeddings_list:
        for i in range(embed.shape[0]):  # 12000
            _em = embed[i]  # 512
            _norm = np.linalg.norm(_em)  # 元素平方加和开根号

            _xnorm += _norm
            _xnorm_cnt += 1

    _xnorm /= _xnorm_cnt

    acc1 = 0.0
    std1 = 0.0

    embeddings = embeddings_list[0] + embeddings_list[
        1]  # 两者element-wise元素加和,shape:(12000, 512)
    # if np.isnan(embeddings).any():
    #     print('有缺失值')
    #     embeddings_inf = np.isnan(embeddings)
    #     embeddings[embeddings_inf] = 0.0
    # #
    # if np.isfinite(embeddings).any():
    #     print('有无穷大')
    #     embeddings_inf = np.isinf(embeddings)
    #     embeddings[embeddings_inf] = 0.0
    # 标准化
    embeddings = sklearn.preprocessing.normalize(embeddings)  # (12000, 512)

    print('infer time', time_consumed)

    # accuracy:(10L,),存放10折交叉验证的10次结果
    _, _, accuracy, val, val_std, far = evaluate(embeddings,
                                                 issame_list,
                                                 nrof_folds=nfolds)

    acc2, std2 = np.mean(accuracy), np.std(accuracy)
    return acc1, std1, acc2, std2, _xnorm, embeddings_list
def test_badcase(data_set, mx_model, batch_size, name='', data_extra = None, label_shape = None):
  print('testing verification badcase..')
  data_list = data_set[0]
  issame_list = data_set[1]
  model = mx_model
  embeddings_list = []
  if data_extra is not None:
    _data_extra = nd.array(data_extra)
  time_consumed = 0.0
  if label_shape is None:
    _label = nd.ones( (batch_size,) )
  else:
    _label = nd.ones( label_shape )
  for i in xrange( len(data_list) ):
    data = data_list[i]
    embeddings = None
    ba = 0
    while ba<data.shape[0]:
      bb = min(ba+batch_size, data.shape[0])
      count = bb-ba
      _data = nd.slice_axis(data, axis=0, begin=bb-batch_size, end=bb)
      #print(_data.shape, _label.shape)
      time0 = datetime.datetime.now()
      if data_extra is None:
        db = mx.io.DataBatch(data=(_data,), label=(_label,))
      else:
        db = mx.io.DataBatch(data=(_data,_data_extra), label=(_label,))
      model.forward(db, is_train=False)
      net_out = model.get_outputs()
      _embeddings = net_out[0].asnumpy()
      time_now = datetime.datetime.now()
      diff = time_now - time0
      time_consumed+=diff.total_seconds()
      if embeddings is None:
        embeddings = np.zeros( (data.shape[0], _embeddings.shape[1]) )
      embeddings[ba:bb,:] = _embeddings[(batch_size-count):,:]
      ba = bb
    embeddings_list.append(embeddings)
  embeddings = embeddings_list[0] + embeddings_list[1]
  embeddings = sklearn.preprocessing.normalize(embeddings)
  thresholds = np.arange(0, 4, 0.01)
  actual_issame = np.asarray(issame_list)
  nrof_folds = 10
  embeddings1 = embeddings[0::2]
  embeddings2 = embeddings[1::2]
  assert(embeddings1.shape[0] == embeddings2.shape[0])
  assert(embeddings1.shape[1] == embeddings2.shape[1])
  nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
  nrof_thresholds = len(thresholds)
  k_fold = LFold(n_splits=nrof_folds, shuffle=False)
  
  tprs = np.zeros((nrof_folds,nrof_thresholds))
  fprs = np.zeros((nrof_folds,nrof_thresholds))
  accuracy = np.zeros((nrof_folds))
  indices = np.arange(nrof_pairs)
  
  diff = np.subtract(embeddings1, embeddings2)
  dist = np.sum(np.square(diff),1)
  data = data_list[0]

  pouts = []
  nouts = []
  
  for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
       
      # Find the best threshold for the fold
      acc_train = np.zeros((nrof_thresholds))
      #print(train_set)
      #print(train_set.__class__)
      for threshold_idx, threshold in enumerate(thresholds):
          p2 = dist[train_set]
          p3 = actual_issame[train_set]
          _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, p2, p3)
      best_threshold_index = np.argmax(acc_train)
      for threshold_idx, threshold in enumerate(thresholds):
          tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
      _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
      best_threshold = thresholds[best_threshold_index]
      for iid in test_set:
        ida = iid*2
        idb = ida+1
        asame = actual_issame[iid]
        _dist = dist[iid]
        violate = _dist - best_threshold
        if not asame:
          violate *= -1.0
        if violate>0.0:
          imga = data[ida].asnumpy().transpose( (1,2,0) )[...,::-1] #to bgr
          imgb = data[idb].asnumpy().transpose( (1,2,0) )[...,::-1]
          #print(imga.shape, imgb.shape, violate, asame, _dist)
          if asame:
            pouts.append( (imga, imgb, _dist, best_threshold, ida) )
          else:
            nouts.append( (imga, imgb, _dist, best_threshold, ida) )

        
  tpr = np.mean(tprs,0)
  fpr = np.mean(fprs,0)
  acc = np.mean(accuracy)
  pouts = sorted(pouts, key = lambda x: x[2], reverse=True)
  nouts = sorted(nouts, key = lambda x: x[2], reverse=False)
  print(len(pouts), len(nouts))
  print('acc', acc)
  gap = 10
  image_shape = (112,224,3)
  out_dir = "./badcases"
  if not os.path.exists(out_dir):
    os.makedirs(out_dir)
  if len(nouts)>0:
    threshold = nouts[0][3]
  else:
    threshold = pouts[-1][3]
  
  for item in [(pouts, 'positive(false_negative).png'), (nouts, 'negative(false_positive).png')]:
    cols = 4
    rows = 8000
    outs = item[0]
    if len(outs)==0:
      continue
    #if len(outs)==9:
    #  cols = 3
    #  rows = 3

    _rows = int(math.ceil(len(outs)/cols))
    rows = min(rows, _rows)
    hack = {}

    if name.startswith('cfp') and item[1].startswith('pos'):
      hack = {0:'manual/238_13.jpg.jpg', 6:'manual/088_14.jpg.jpg', 10:'manual/470_14.jpg.jpg', 25:'manual/238_13.jpg.jpg', 28:'manual/143_11.jpg.jpg'}

    filename = item[1]
    if len(name)>0:
      filename = name+"_"+filename
    filename = os.path.join(out_dir, filename)
    img = np.zeros( (image_shape[0]*rows+20, image_shape[1]*cols+(cols-1)*gap, 3), dtype=np.uint8 )
    img[:,:,:] = 255
    text_color = (0,0,153)
    text_color = (255,178,102)
    text_color = (153,255,51)
    for outi, out in enumerate(outs):
      row = outi//cols
      col = outi%cols
      if row==rows:
        break
      imga = out[0].copy()
      imgb = out[1].copy()
      if outi in hack:
        idx = out[4]
        print('noise idx',idx)
        aa = hack[outi]
        imgb = cv2.imread(aa)
        #if aa==1:
        #  imgb = cv2.transpose(imgb)
        #  imgb = cv2.flip(imgb, 1)
        #elif aa==3:
        #  imgb = cv2.transpose(imgb)
        #  imgb = cv2.flip(imgb, 0)
        #else:
        #  for ii in xrange(2):
        #    imgb = cv2.transpose(imgb)
        #    imgb = cv2.flip(imgb, 1)
      dist = out[2]
      _img = np.concatenate( (imga, imgb), axis=1 )
      k = "%.3f"%dist
      #print(k)
      font = cv2.FONT_HERSHEY_SIMPLEX
      cv2.putText(_img,k,(80,image_shape[0]//2+7), font, 0.6, text_color, 2)
      #_filename = filename+"_%d.png"%outi
      #cv2.imwrite(_filename, _img)
      img[row*image_shape[0]:(row+1)*image_shape[0], (col*image_shape[1]+gap*col):((col+1)*image_shape[1]+gap*col),:] = _img
    #threshold = outs[0][3]
    font = cv2.FONT_HERSHEY_SIMPLEX
    k = "threshold: %.3f"%threshold
    cv2.putText(img,k,(img.shape[1]//2-70,img.shape[0]-5), font, 0.6, text_color, 2)
    cv2.imwrite(filename, img)
예제 #37
0
파일: hack_mdmadqn.py 프로젝트: WowCZ/strac
    def forward(self, input_vec, loss=None):
        assert input_vec.shape[1] == self.input_dimension

        # get inputs for every slot(including global)
        inputs = {}
        for slot in self.slots:
            inputs[slot] = input_vec[:, self.slot_dimension[slot][0]:self.slot_dimension[slot][1]]
        input_global = []
        for seg in self.global_dimension:
            input_global.append(input_vec[:, seg[0]:seg[1]])
        inputs['global'] = nd.concat(*input_global, dim=1)

        layer = []
        # inputs -> first_hidden_layer
        if (not self.sort_input_vec) and self.state_feature != 'dip':
            layer.append([])
            for slot in self.slots:
                layer[0].append(self.input_trans[slot](inputs[slot]))
            layer[0].append(self.input_trans['global'](inputs['global']))
        elif self.state_feature == 'dip':
            sorted_inputs = []
            for slot in self.slots:
                sorted_inputs.append(inputs[slot])
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans(sorted_inputs, loss))
        elif self.sort_input_vec:
            sorted_inputs = []
            for slot in self.slots:
                tmp = inputs[slot][:, :-2].sort(is_ascend=False)
                if tmp.shape[1] < 20:
                    tmp = nd.concat(tmp, nd.zeros((tmp.shape[0], 20 - tmp.shape[1]), ctx=CTX), dim=1)
                else:
                    tmp = nd.slice_axis(tmp, axis=1, begin=0, end=20)
                sorted_inputs.append(nd.concat(tmp, inputs[slot][:, -2:], dim=1))
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans(sorted_inputs, loss))

        # hidden_layers
        for i in range(self.hidden_layers - 1):
            if self.recurrent_mode is False:
                # equal to 'layer.append(self.ma_trans[i](layer[-1], loss))'
                layer.append(self.ma_trans[i](layer[i], loss))
            else:
                layer.append(self.ma_trans(layer[i], loss))

        if self.share_last_layer is False:
            # dropout of last hidden layer
            for j in range(len(self.slots)):
                layer[-1][j] = self.local_out_drop_op(layer[-1][j])
            layer[-1][-1] = self.global_out_drop_op(layer[-1][-1])

            # last_hidden_layer -> outputs
            outputs = []
            for i in range(len(self.slots) + 1):
                if self.use_dueling is False:
                    outputs.append(self.output_trans[i](layer[-1][i]))
                else:
                    if i < len(self.slots):
                        tmp_adv = self.output_trans_local_advantage(sorted_inputs[i])
                    else:
                        tmp_adv = self.output_trans_global_advantage(sorted_inputs[-1])
                    if self.dueling_share_last:
                        if i < len(self.slots):
                            cur_value = self.output_trans_local_value(layer[-1][i])
                            if self.shared_last_layer_use_bias:
                                cur_value = cur_value + nd.slice(self.value_bias_local.data(), begin=(i, ), end=(i + 1, ))
                        else:
                            cur_value = self.output_trans_global_value(layer[-1][i])
                    else:
                        cur_value = self.output_trans_value[i](layer[-1][i])
                    outputs.append(
                        cur_value +
                        tmp_adv - tmp_adv.mean(axis=1).reshape(
                            (tmp_adv.shape[0], 1)).broadcast_axes(axis=1, size=tmp_adv.shape[1]))
        else:
            outputs = []
            for i in range(len(self.slots)):
                output_i = self.output_trans_local(layer[-1][i])
                if self.shared_last_layer_use_bias:
                    output_i = output_i + self.output_trans_local_biases[i].data()
                outputs.append(output_i)
            outputs.append(self.output_trans_global(layer[-1][-1]))
        return nd.concat(*outputs, dim=1)