Beispiel #1
0
    def generator_with_mask_ohem(self, graph, kerasModel, batchSize=16, inputSize=(512, 512), flipFlag=False, cropFlag=False,
                            shuffle=True, rotateFlag=True, nStackNum=1):

        '''
        Input:  batch_size * Height (512) * Width (512) * Channel (3)
        Input:  batch_size * 256 * 256 * Channel (N+1). Mask for each category. 1.0 for valid parts in category. 0.0 for invalid parts
        Output: batch_size * Height/2 (256) * Width/2 (256) * Channel (N+1)
        '''
        xdf = self.annDataFrame

        targetHeight, targetWidth = inputSize

        # train_input: npfloat,  height, width, channels
        # train_gthmap: npfloat, N heatmap + 1 background heatmap,
        train_input = np.zeros((batchSize, targetHeight, targetWidth, 3), dtype=np.float)
        train_mask = np.zeros((batchSize, targetHeight / 2, targetWidth / 2, getKpNum(self.category) ), dtype=np.float)
        train_gthmap = np.zeros((batchSize, targetHeight / 2, targetWidth / 2, getKpNum(self.category) ), dtype=np.float)
        train_ohem_mask = np.zeros((batchSize, targetHeight / 2, targetWidth / 2, getKpNum(self.category) ), dtype=np.float)
        train_ohem_gthmap = np.zeros((batchSize, targetHeight / 2, targetWidth / 2, getKpNum(self.category) ), dtype=np.float)

        ## generator need to be infinite loop
        while 1:
            # random shuffle at first
            if shuffle:
                xdf = xdf.sample(frac=1)
            count = 0
            for _index, _row in xdf.iterrows():
                xindex = count % batchSize
                xinput, xhmap = self._prcoess_img(_row, inputSize, rotateFlag, flipFlag, cropFlag, nobgFlag=True)
                xmask = generate_input_mask(_row['image_category'],
                                            (targetHeight, targetWidth, getKpNum(self.category)))

                xohem_mask, xohem_gthmap = generate_topk_mask_ohem([xinput, xmask], xhmap, kerasModel, graph,
                                            8, _row['image_category'], dynamicFlag=False)

                train_input[xindex, :, :, :] = xinput
                train_mask[xindex, :, :, :] = xmask
                train_gthmap[xindex, :, :, :] = xhmap
                train_ohem_mask[xindex, :, :, :] = xohem_mask
                train_ohem_gthmap[xindex, :, :, :] = xohem_gthmap

                # if refinenet enable, refinenet has two outputs, globalnet and refinenet
                if xindex == 0 and count != 0:
                    gthamplst = list()
                    for i in range(nStackNum):
                        gthamplst.append(train_gthmap)

                    # last stack will use ohem gthmap
                    gthamplst.append(train_ohem_gthmap)

                    yield [train_input, train_mask, train_ohem_mask], gthamplst

                count += 1
    def _net_inference_rotate(self, imgFile, imgCategory):
        from data_process import normalize_image, pad_image_inference, rotate_image_with_invrmat

        # load image and preprocess
        orgimage = cv2.imread(imgFile)

        anglelst = [-20, -10, 10, 20]

        input_img = np.zeros(shape=(len(anglelst), 512, 512, 3),
                             dtype=np.float)
        input_mask = np.zeros(shape=(len(anglelst), 256, 256,
                                     getKpNum(self.category)),
                              dtype=np.float)

        mlist = list()
        for i, angle in enumerate(anglelst):
            rotateimg, invRotMatrix, orgImgSize = rotate_image_with_invrmat(
                orgimage, angle)
            padimg, scale = pad_image_inference(rotateimg, 512, 512)
            _img = normalize_image(padimg)
            input_img[i, :, :, :] = _img
            mlist.append((scale, invRotMatrix))

        mask = generate_input_mask(imgCategory,
                                   (512, 512, getKpNum(self.category)))
        for i, angle in enumerate(anglelst):
            input_mask[i, :, :, :] = mask

        # inference
        heatmap = self.net.predict([input_img, input_mask, input_mask])
        heatmap = self._heatmap_sum(heatmap)

        # rotate back to original resolution
        sumheatmap = np.zeros(shape=(orgimage.shape[0], orgimage.shape[1],
                                     getKpNum(self.category)),
                              dtype=np.float)
        for i, item in enumerate(mlist):
            _heatmap = heatmap[i, :, :, :]
            _scale, _invRotMatrix = item
            _heatmap = cv2.resize(_heatmap,
                                  None,
                                  fx=2.0 / _scale,
                                  fy=2.0 / _scale)
            _invheatmap = cv2.warpAffine(
                _heatmap, _invRotMatrix,
                (orgimage.shape[1], orgimage.shape[0]))
            sumheatmap += _invheatmap

        return sumheatmap
    def _net_inference_flip(self, imgFile, imgCategory):
        import cv2
        from data_process import normalize_image, pad_image_inference
        assert (len(self.net.input_layers) >
                1), "input layer need to more than 1"

        batch_size = 2

        input_img = np.zeros(shape=(batch_size, 512, 512, 3), dtype=np.float)
        input_mask = np.zeros(shape=(batch_size, 256, 256,
                                     getKpNum(self.category)),
                              dtype=np.float)

        # load image and preprocess
        orgimage = cv2.imread(imgFile)

        padimg, scale = pad_image_inference(orgimage, 512, 512)
        flipimg = cv2.flip(padimg, flipCode=1)

        input_img[0, :, :, :] = normalize_image(padimg)
        input_img[1, :, :, :] = normalize_image(flipimg)

        mask = generate_input_mask(imgCategory,
                                   (512, 512, getKpNum(self.category)))
        input_mask[0, :, :, :] = mask
        input_mask[1, :, :, :] = mask

        # inference
        if len(self.net.input_layers) == 2:
            heatmap = self.net.predict([input_img, input_mask])
        elif len(self.net.input_layers) == 3:
            heatmap = self.net.predict([input_img, input_mask, input_mask])
        else:
            assert (0), str(len(self.net.input_layers)) + " should be 2 or 3 "

        # sum heatmap
        avgheatmap = self._heatmap_sum(heatmap)

        orgheatmap = avgheatmap[0, :, :, :]

        # convert to same sequency with original heatmap
        flipheatmap = avgheatmap[1, :, :, :]
        flipheatmap = self._flip_out_heatmap(flipheatmap)

        # average original and flip heatmap
        outheatmap = flipheatmap + orgheatmap
        outheatmap = outheatmap[np.newaxis, :, :, :]

        return (outheatmap, scale)
Beispiel #4
0
    def _prcoess_img(self, dfrow, inputSize, rotateFlag, flipFlag, cropFlag, nobgFlag):

        mlist = dfrow[getKpKeys(self.category)]
        imgName, kpStr = mlist[0], mlist[1:]

        # read kp annotation from csv file
        kpAnnlst = list()
        for _kpstr in kpStr:
            _kpAn = KpAnno.readFromStr(_kpstr)
            kpAnnlst.append(_kpAn)

        assert (len(kpAnnlst) == getKpNum(self.category)), str(len(kpAnnlst))+" is not the same as "+str(getKpNum(self.category))


        xcvmat = cv2.imread(os.path.join(self.train_img_path, imgName))
        if xcvmat is None:
            return None, None

        #flip as first operation.
        # flip image
        if random.choice([0, 1]) and flipFlag:
            xcvmat, kpAnnlst = self.flip_image(xcvmat, kpAnnlst)

        #if cropFlag:
        #    xcvmat, kpAnnlst = crop_image(xcvmat, kpAnnlst, 0.8, 0.95)

        # pad image to 512x512
        paddedImg, kpAnnlst = pad_image(xcvmat, kpAnnlst, inputSize[0], inputSize[1])

        assert (len(kpAnnlst) == getKpNum(self.category)), str(len(kpAnnlst)) + " is not the same as " + str(
            getKpNum(self.category))

        # output ground truth heatmap is 256x256
        trainGtHmap = self.__generate_hmap(paddedImg, kpAnnlst)

        if random.choice([0,1]) and rotateFlag:
            rAngle = np.random.randint(-1*40, 40)
            rotatedImage,  _ = rotate_image(paddedImg, list(), rAngle)
            rotatedGtHmap  = rotate_mask(trainGtHmap, rAngle)
        else:
            rotatedImage  = paddedImg
            rotatedGtHmap = trainGtHmap

        # resize image
        resizedImg    = cv2.resize(rotatedImage, inputSize)
        resizedGtHmap = cv2.resize(rotatedGtHmap, (inputSize[0]//2, inputSize[1]//2))

        return normalize_image(resizedImg), resizedGtHmap
def generate_topk_mask_ohem(input_data, gthmap, keras_model, graph, topK, image_category, dynamicFlag=False):
    '''
    :param input_data: input
    :param gthmap:  ground truth
    :param keras_model: keras model
    :param graph:  tf grpah to WA thread issue
    :param topK: number of kp selected
    :return:
    '''

    # do inference, and calculate loss of each channel
    mimg, mmask = input_data
    ximg  = mimg[np.newaxis,:,:,:]
    xmask = mmask[np.newaxis,:,:,:]

    if len(keras_model.input_layers) == 3:
        # use original mask as ohem_mask
        inputs = [ximg, xmask, xmask]
    else:
        inputs = [ximg, xmask]

    with graph.as_default():
        keras_output = keras_model.predict(inputs)

    # heatmap of last stage
    outhmap = keras_output[-1]

    channel_num = gthmap.shape[-1]

    # calculate loss
    mloss = list()
    for i in range(channel_num):
        _dtmap = outhmap[0, :, :, i]
        _gtmap = gthmap[:, :, i]
        loss   = np_euclidean_l2(_dtmap, _gtmap)
        mloss.append(loss)

    # refill input_mask, set topk as 1.0 and fill 0.0 for rest
    # fixme: topk may different b/w category
    if dynamicFlag:
        topK = getKpNum(image_category)//2

    ohem_mask   = adjsut_mask(mloss, mmask, topK)

    ohem_gthmap = ohem_mask * gthmap

    return ohem_mask, ohem_gthmap
    def _net_inference_with_mask(self, imgFile, imgCategory):
        import cv2
        from data_process import normalize_image, pad_image_inference
        assert (len(self.net.input_layers) >
                1), "input layer need to more than 1"

        # load image and preprocess
        img = cv2.imread(imgFile)

        img, scale = pad_image_inference(img, 512, 512)
        img = normalize_image(img)
        input_img = img[np.newaxis, :, :, :]

        input_mask = generate_input_mask(imgCategory,
                                         (512, 512, getKpNum(self.category)))
        input_mask = input_mask[np.newaxis, :, :, :]

        # inference
        heatmap = self.net.predict([input_img, input_mask, input_mask])

        return (heatmap, scale)
Beispiel #7
0
    def __generate_hmap(self, cvmat, kpAnnolst):
        # kpnum + background
        gthmp = np.zeros((cvmat.shape[0], cvmat.shape[1], getKpNum(self.category)), dtype=np.float)

        for i, _kpAnn in enumerate(kpAnnolst):
            if _kpAnn.visibility == -1:
                continue

            radius = 100
            gaussMask = make_gaussian(radius, radius, 20, None)

            # avoid out of boundary
            top_x, top_y = max(0, _kpAnn.x - radius/2), max(0, _kpAnn.y - radius/2)
            bottom_x, bottom_y = min(cvmat.shape[1], _kpAnn.x + radius/2), min(cvmat.shape[0], _kpAnn.y + radius/2)

            top_x_offset = top_x - (_kpAnn.x - radius/2)
            top_y_offset = top_y - (_kpAnn.y - radius/2)

            gthmp[ top_y:bottom_y, top_x:bottom_x, i] = gaussMask[top_y_offset:top_y_offset + bottom_y-top_y,
                                                                  top_x_offset:top_x_offset + bottom_x-top_x]

        return gthmp
Beispiel #8
0
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpuID)

    # TensorFlow wizardry
    config = tf.ConfigProto()

    # Don't pre-allocate memory; allocate as-needed
    config.gpu_options.allow_growth = True

    # Only allow a total of half the GPU memory to be allocated
    config.gpu_options.per_process_gpu_memory_fraction = 1.0

    # Create a session with the above options specified.
    k.tensorflow_backend.set_session(tf.Session(config=config))

    if not args.resume:
        xnet = FashionNet(512, 512, getKpNum(args.category))
        xnet.build_model(modelName=args.network, show=True)
        xnet.train(args.category,
                   epochs=args.epochs,
                   batchSize=args.batchSize,
                   lrschedule=args.lrdecay)
    else:
        xnet = FashionNet(512, 512, getKpNum(args.category))
        xnet.resume_train(args.category,
                          args.resumeModel,
                          args.network,
                          args.initEpoch,
                          epochs=args.epochs,
                          batchSize=args.batchSize)