def generator_with_mask_ohem(self, graph, kerasModel, batchSize=16, inputSize=(512, 512), flipFlag=False, cropFlag=False, shuffle=True, rotateFlag=True, nStackNum=1): ''' Input: batch_size * Height (512) * Width (512) * Channel (3) Input: batch_size * 256 * 256 * Channel (N+1). Mask for each category. 1.0 for valid parts in category. 0.0 for invalid parts Output: batch_size * Height/2 (256) * Width/2 (256) * Channel (N+1) ''' xdf = self.annDataFrame targetHeight, targetWidth = inputSize # train_input: npfloat, height, width, channels # train_gthmap: npfloat, N heatmap + 1 background heatmap, train_input = np.zeros((batchSize, targetHeight, targetWidth, 3), dtype=np.float) train_mask = np.zeros((batchSize, targetHeight / 2, targetWidth / 2, getKpNum(self.category) ), dtype=np.float) train_gthmap = np.zeros((batchSize, targetHeight / 2, targetWidth / 2, getKpNum(self.category) ), dtype=np.float) train_ohem_mask = np.zeros((batchSize, targetHeight / 2, targetWidth / 2, getKpNum(self.category) ), dtype=np.float) train_ohem_gthmap = np.zeros((batchSize, targetHeight / 2, targetWidth / 2, getKpNum(self.category) ), dtype=np.float) ## generator need to be infinite loop while 1: # random shuffle at first if shuffle: xdf = xdf.sample(frac=1) count = 0 for _index, _row in xdf.iterrows(): xindex = count % batchSize xinput, xhmap = self._prcoess_img(_row, inputSize, rotateFlag, flipFlag, cropFlag, nobgFlag=True) xmask = generate_input_mask(_row['image_category'], (targetHeight, targetWidth, getKpNum(self.category))) xohem_mask, xohem_gthmap = generate_topk_mask_ohem([xinput, xmask], xhmap, kerasModel, graph, 8, _row['image_category'], dynamicFlag=False) train_input[xindex, :, :, :] = xinput train_mask[xindex, :, :, :] = xmask train_gthmap[xindex, :, :, :] = xhmap train_ohem_mask[xindex, :, :, :] = xohem_mask train_ohem_gthmap[xindex, :, :, :] = xohem_gthmap # if refinenet enable, refinenet has two outputs, globalnet and refinenet if xindex == 0 and count != 0: gthamplst = list() for i in range(nStackNum): gthamplst.append(train_gthmap) # last stack will use ohem gthmap gthamplst.append(train_ohem_gthmap) yield [train_input, train_mask, train_ohem_mask], gthamplst count += 1
def _net_inference_rotate(self, imgFile, imgCategory): from data_process import normalize_image, pad_image_inference, rotate_image_with_invrmat # load image and preprocess orgimage = cv2.imread(imgFile) anglelst = [-20, -10, 10, 20] input_img = np.zeros(shape=(len(anglelst), 512, 512, 3), dtype=np.float) input_mask = np.zeros(shape=(len(anglelst), 256, 256, getKpNum(self.category)), dtype=np.float) mlist = list() for i, angle in enumerate(anglelst): rotateimg, invRotMatrix, orgImgSize = rotate_image_with_invrmat( orgimage, angle) padimg, scale = pad_image_inference(rotateimg, 512, 512) _img = normalize_image(padimg) input_img[i, :, :, :] = _img mlist.append((scale, invRotMatrix)) mask = generate_input_mask(imgCategory, (512, 512, getKpNum(self.category))) for i, angle in enumerate(anglelst): input_mask[i, :, :, :] = mask # inference heatmap = self.net.predict([input_img, input_mask, input_mask]) heatmap = self._heatmap_sum(heatmap) # rotate back to original resolution sumheatmap = np.zeros(shape=(orgimage.shape[0], orgimage.shape[1], getKpNum(self.category)), dtype=np.float) for i, item in enumerate(mlist): _heatmap = heatmap[i, :, :, :] _scale, _invRotMatrix = item _heatmap = cv2.resize(_heatmap, None, fx=2.0 / _scale, fy=2.0 / _scale) _invheatmap = cv2.warpAffine( _heatmap, _invRotMatrix, (orgimage.shape[1], orgimage.shape[0])) sumheatmap += _invheatmap return sumheatmap
def _net_inference_flip(self, imgFile, imgCategory): import cv2 from data_process import normalize_image, pad_image_inference assert (len(self.net.input_layers) > 1), "input layer need to more than 1" batch_size = 2 input_img = np.zeros(shape=(batch_size, 512, 512, 3), dtype=np.float) input_mask = np.zeros(shape=(batch_size, 256, 256, getKpNum(self.category)), dtype=np.float) # load image and preprocess orgimage = cv2.imread(imgFile) padimg, scale = pad_image_inference(orgimage, 512, 512) flipimg = cv2.flip(padimg, flipCode=1) input_img[0, :, :, :] = normalize_image(padimg) input_img[1, :, :, :] = normalize_image(flipimg) mask = generate_input_mask(imgCategory, (512, 512, getKpNum(self.category))) input_mask[0, :, :, :] = mask input_mask[1, :, :, :] = mask # inference if len(self.net.input_layers) == 2: heatmap = self.net.predict([input_img, input_mask]) elif len(self.net.input_layers) == 3: heatmap = self.net.predict([input_img, input_mask, input_mask]) else: assert (0), str(len(self.net.input_layers)) + " should be 2 or 3 " # sum heatmap avgheatmap = self._heatmap_sum(heatmap) orgheatmap = avgheatmap[0, :, :, :] # convert to same sequency with original heatmap flipheatmap = avgheatmap[1, :, :, :] flipheatmap = self._flip_out_heatmap(flipheatmap) # average original and flip heatmap outheatmap = flipheatmap + orgheatmap outheatmap = outheatmap[np.newaxis, :, :, :] return (outheatmap, scale)
def _prcoess_img(self, dfrow, inputSize, rotateFlag, flipFlag, cropFlag, nobgFlag): mlist = dfrow[getKpKeys(self.category)] imgName, kpStr = mlist[0], mlist[1:] # read kp annotation from csv file kpAnnlst = list() for _kpstr in kpStr: _kpAn = KpAnno.readFromStr(_kpstr) kpAnnlst.append(_kpAn) assert (len(kpAnnlst) == getKpNum(self.category)), str(len(kpAnnlst))+" is not the same as "+str(getKpNum(self.category)) xcvmat = cv2.imread(os.path.join(self.train_img_path, imgName)) if xcvmat is None: return None, None #flip as first operation. # flip image if random.choice([0, 1]) and flipFlag: xcvmat, kpAnnlst = self.flip_image(xcvmat, kpAnnlst) #if cropFlag: # xcvmat, kpAnnlst = crop_image(xcvmat, kpAnnlst, 0.8, 0.95) # pad image to 512x512 paddedImg, kpAnnlst = pad_image(xcvmat, kpAnnlst, inputSize[0], inputSize[1]) assert (len(kpAnnlst) == getKpNum(self.category)), str(len(kpAnnlst)) + " is not the same as " + str( getKpNum(self.category)) # output ground truth heatmap is 256x256 trainGtHmap = self.__generate_hmap(paddedImg, kpAnnlst) if random.choice([0,1]) and rotateFlag: rAngle = np.random.randint(-1*40, 40) rotatedImage, _ = rotate_image(paddedImg, list(), rAngle) rotatedGtHmap = rotate_mask(trainGtHmap, rAngle) else: rotatedImage = paddedImg rotatedGtHmap = trainGtHmap # resize image resizedImg = cv2.resize(rotatedImage, inputSize) resizedGtHmap = cv2.resize(rotatedGtHmap, (inputSize[0]//2, inputSize[1]//2)) return normalize_image(resizedImg), resizedGtHmap
def generate_topk_mask_ohem(input_data, gthmap, keras_model, graph, topK, image_category, dynamicFlag=False): ''' :param input_data: input :param gthmap: ground truth :param keras_model: keras model :param graph: tf grpah to WA thread issue :param topK: number of kp selected :return: ''' # do inference, and calculate loss of each channel mimg, mmask = input_data ximg = mimg[np.newaxis,:,:,:] xmask = mmask[np.newaxis,:,:,:] if len(keras_model.input_layers) == 3: # use original mask as ohem_mask inputs = [ximg, xmask, xmask] else: inputs = [ximg, xmask] with graph.as_default(): keras_output = keras_model.predict(inputs) # heatmap of last stage outhmap = keras_output[-1] channel_num = gthmap.shape[-1] # calculate loss mloss = list() for i in range(channel_num): _dtmap = outhmap[0, :, :, i] _gtmap = gthmap[:, :, i] loss = np_euclidean_l2(_dtmap, _gtmap) mloss.append(loss) # refill input_mask, set topk as 1.0 and fill 0.0 for rest # fixme: topk may different b/w category if dynamicFlag: topK = getKpNum(image_category)//2 ohem_mask = adjsut_mask(mloss, mmask, topK) ohem_gthmap = ohem_mask * gthmap return ohem_mask, ohem_gthmap
def _net_inference_with_mask(self, imgFile, imgCategory): import cv2 from data_process import normalize_image, pad_image_inference assert (len(self.net.input_layers) > 1), "input layer need to more than 1" # load image and preprocess img = cv2.imread(imgFile) img, scale = pad_image_inference(img, 512, 512) img = normalize_image(img) input_img = img[np.newaxis, :, :, :] input_mask = generate_input_mask(imgCategory, (512, 512, getKpNum(self.category))) input_mask = input_mask[np.newaxis, :, :, :] # inference heatmap = self.net.predict([input_img, input_mask, input_mask]) return (heatmap, scale)
def __generate_hmap(self, cvmat, kpAnnolst): # kpnum + background gthmp = np.zeros((cvmat.shape[0], cvmat.shape[1], getKpNum(self.category)), dtype=np.float) for i, _kpAnn in enumerate(kpAnnolst): if _kpAnn.visibility == -1: continue radius = 100 gaussMask = make_gaussian(radius, radius, 20, None) # avoid out of boundary top_x, top_y = max(0, _kpAnn.x - radius/2), max(0, _kpAnn.y - radius/2) bottom_x, bottom_y = min(cvmat.shape[1], _kpAnn.x + radius/2), min(cvmat.shape[0], _kpAnn.y + radius/2) top_x_offset = top_x - (_kpAnn.x - radius/2) top_y_offset = top_y - (_kpAnn.y - radius/2) gthmp[ top_y:bottom_y, top_x:bottom_x, i] = gaussMask[top_y_offset:top_y_offset + bottom_y-top_y, top_x_offset:top_x_offset + bottom_x-top_x] return gthmp
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpuID) # TensorFlow wizardry config = tf.ConfigProto() # Don't pre-allocate memory; allocate as-needed config.gpu_options.allow_growth = True # Only allow a total of half the GPU memory to be allocated config.gpu_options.per_process_gpu_memory_fraction = 1.0 # Create a session with the above options specified. k.tensorflow_backend.set_session(tf.Session(config=config)) if not args.resume: xnet = FashionNet(512, 512, getKpNum(args.category)) xnet.build_model(modelName=args.network, show=True) xnet.train(args.category, epochs=args.epochs, batchSize=args.batchSize, lrschedule=args.lrdecay) else: xnet = FashionNet(512, 512, getKpNum(args.category)) xnet.resume_train(args.category, args.resumeModel, args.network, args.initEpoch, epochs=args.epochs, batchSize=args.batchSize)