tfconfig.gpu_options.visible_device_list = "%s" % (",".join([ "%s" % i for i in range(args.gpuid_start, args.gpuid_start + args.gpu) ])) with tf.Session(config=tfconfig) as sess: if not args.is_load_from_pb: initialize(config=args, sess=sess) for imgfile in tqdm(imglst, ascii=True): imgname = os.path.splitext(os.path.basename(imgfile))[0] frame = cv2.imread(imgfile) #frame = np.array(Image.open(imgfile)) im = frame.astype("float32") resized_image = resizeImage(im, args.short_edge_size, args.max_size) scale = (resized_image.shape[0] * 1.0 / im.shape[0] + \ resized_image.shape[1] * 1.0 / im.shape[1]) / 2.0 feed_dict = model.get_feed_dict_forward(resized_image) if args.add_mask: sess_input = [ model.final_boxes, model.final_labels, model.final_probs, model.final_masks ] final_boxes, final_labels, final_probs, final_masks = sess.run( sess_input, feed_dict=feed_dict) else: sess_input = [
def forward(config): # the annopath is the box output from fastrcnn model # given the filelst, framepath, annopath, we get new classification score for each box, then do nms, then get the final json output all_filenames = [os.path.splitext(os.path.basename(line.strip()))[0] for line in open(config.filelst, "r").readlines()] print "total image to test %s"%len(all_filenames) if not os.path.exists(config.outbasepath): os.makedirs(config.outbasepath) models = [] for i in xrange(config.gpuid_start, config.gpuid_start+config.gpu): models.append(get_model(config, i, controller=config.controller)) tester = Tester(models, config) tfconfig = tf.ConfigProto(allow_soft_placement=True) if not config.use_all_mem: tfconfig.gpu_options.allow_growth = True # this way it will only allocate nessasary gpu, not take all tfconfig.gpu_options.visible_device_list = "%s" % (",".join(["%s" % i for i in range(config.gpuid_start, config.gpuid_start+config.gpu)])) # so only this gpu will be used with tf.Session(config=tfconfig) as sess: initialize(load=True, load_best=config.load_best, config=config, sess=sess) # num_epoch should be 1 assert config.num_epochs == 1 for filenames in tqdm(grouper(all_filenames, config.im_batch_size),ascii=True): filenames = [filename for filename in filenames if filename is not None] this_batch_num = len(filenames) if this_batch_num != config.im_batch_size: need = config.im_batch_size - this_batch_num filenames.extend(all_filenames[:need]) ori_probs = [] ori_frcnn_boxes = [] datas = [] # should be a list of Dataset obj ori_box_nums = [] for i, filename in enumerate(filenames): data = {"imgs":[], "imgdata":[], "gt":[]} videoname = filename.split("_F_")[0] image = os.path.join(config.framepath, videoname, "%s.jpg"%filename) box_npz = os.path.join(config.annopath, "%s.npz"%filename) box_data = dict(np.load(box_npz)) im = cv2.imread(image, cv2.IMREAD_COLOR) ori_shape = im.shape[:2] resized_image = resizeImage(im, config.short_edge_size, config.max_size) # [K, 4] boxes = box_data['frcnn_boxes'].copy() data['imgs'].append(image) data['gt'].append({ "boxes": boxes, }) data = Dataset(data, add_gt=True) data.data['imgdata'] = [im] data.data['resized_image'] = [resized_image] datas.append(data) ori_box_nums.append(len(boxes)) # [C, K] ori_probs.append(box_data['frcnn_probs']) # [K, 4] ori_frcnn_boxes.append(box_data['frcnn_boxes']) # data is num_gpu images, but each has multiple boxes, # so split into K jobs, each job is num_gpu images mini_datas = split_batch_by_box_num(([], datas), config.test_box_batch_size) outputs = [[] for _ in xrange(this_batch_num)] # num_gpu for mini_data in mini_datas: this_outputs = tester.step(sess, mini_data) for i in xrange(this_batch_num): outputs[i].append(this_outputs[i][0]) # [num_box_test_box_batch_size, num_class] # re-assemble boxes for i in xrange(this_batch_num): outputs[i] = np.concatenate(outputs[i], axis=0)[:ori_box_nums[i], :] for i, output in enumerate(outputs): # num_gpu # [K, num_class] dcr_prob = output dcr_prob = dcr_prob[:, 1:] # [K, C] # [C, K] dcr_prob = np.transpose(dcr_prob, axes=[1, 0]) C = dcr_prob.shape[0] # [C, K] # only use the dcr model output final_probs = dcr_prob if args.use_mul: ori_prob = ori_probs[i] final_probs = ori_prob * dcr_prob # [K, 4] for class agnostic ori_frcnn_box = ori_frcnn_boxes[i] if len(ori_frcnn_box.shape) == 2: ori_frcnn_box = np.tile(np.expand_dims(ori_frcnn_box, axis=0), [C, 1, 1]) final_boxes, final_labels, final_probs = nms_wrapper(ori_frcnn_box, final_probs, config) pred = [] for j,(box, prob, label) in enumerate(zip(final_boxes, final_probs, final_labels)): box[2] -= box[0] box[3] -= box[1] # produce x,y,w,h output cat_id = int(label) cat_name = targetid2class[cat_id] rle = None res = { "category_id": cat_id, "cat_name": cat_name, # [0-80] "score": float(round(prob, 4)), "bbox": list(map(lambda x:float(round(x,1)),box)), "segmentation":rle, } pred.append(res) # save the data filename = filenames[i] resultfile = os.path.join(config.outbasepath, "%s.json"%filename) with open(resultfile, "w") as f: json.dump(pred, f)
def get_feed_dict(self, batch, is_train=False): config = self.config N = len(batch.data['imgs']) assert N == 1 # one image per gpu for now feed_dict = {} image = batch.data['imgs'][0] if batch.data.has_key("imgdata"): image = batch.data['imgdata'][0] else: image = cv2.imread(image, cv2.IMREAD_COLOR) assert image is not None, image image = image.astype("float32") h, w = image.shape[:2] # original width/height # resize image, boxes short_edge_size = config.short_edge_size if batch.data.has_key("resized_image"): resized_image = batch.data['resized_image'][0] else: resized_image = resizeImage(image, short_edge_size, config.max_size) newh, neww = resized_image.shape[:2] if is_train: anno = batch.data['gt'][0] # 'boxes' -> [K,4], 'labels' -> [K] o_boxes = anno[ 'boxes'] # now the box is in [x1,y1,x2,y2] format, not coco box labels = anno['labels'] assert len(labels) == len(o_boxes) # boxes # (x1,y1,x2,y2) boxes = o_boxes[:, [0, 2, 1, 3]] #(x1,x2,y1,y2) boxes = boxes.reshape((-1, 2, 2)) # (x1,x2),(y1,y2) boxes[:, 0] = boxes[:, 0] * (neww * 1.0 / w) # x1,x2 boxes[:, 1] = boxes[:, 1] * (newh * 1.0 / h) # y1,y2 # random horizontal flip if config.flip_image: prob = 0.5 rand = random.random() if rand > prob: resized_image = cv2.flip(resized_image, 1) # 1 for horizontal #boxes[:,0,0] = neww - boxes[:,0,0] - boxes[:,0,1] # for (x,y,w,h) boxes[:, 0] = neww - boxes[:, 0] boxes[:, 0, :] = boxes[:, 0, :: -1] # (x_min will be x_max after flip) boxes = boxes.reshape((-1, 4)) boxes = boxes[:, [0, 2, 1, 3]] #(x1,y1,x2,y2) assert len(boxes) > 0 # for training, random select mini-batch of boxes # 1. random replicate boxes if not enough for a mini-batch if len(boxes) < config.train_box_batch_size: need = config.train_box_batch_size - len(boxes) replicate_indexes = np.random.choice(len(boxes), size=need, replace=True) full_indexes = np.concatenate( [np.arange(len(boxes)), replicate_indexes]) boxes = boxes[full_indexes, :] labels = labels[full_indexes] selected = np.random.choice(len(boxes), size=config.train_box_batch_size, replace=False) boxes = boxes[selected, :] labels = labels[selected] feed_dict[self.boxes] = boxes feed_dict[self.gt_labels] = labels # different weight for each sample in the mini-batch if config.use_weighted_loss: sample_weights = np.zeros((len(boxes), ), dtype="float") for i in xrange(len(boxes)): gt_class = labels[i] weight = config.class_weights[gt_class] sample_weights[i] = weight feed_dict[self.sample_weights] = sample_weights else: # scale the boxes only anno = batch.data['gt'][0] # 'boxes' -> [K,4], 'labels' -> [K] o_boxes = anno[ 'boxes'] # now the box is in [x1,y1,x2,y2] format, not coco box # boxes # (x1,y1,x2,y2) boxes = o_boxes[:, [0, 2, 1, 3]] #(x1,x2,y1,y2) boxes = boxes.reshape((-1, 2, 2)) # (x1,x2),(y1,y2) boxes[:, 0] = boxes[:, 0] * (neww * 1.0 / w) # x1,x2 boxes[:, 1] = boxes[:, 1] * (newh * 1.0 / h) # y1,y2 boxes = boxes.reshape((-1, 4)) boxes = boxes[:, [0, 2, 1, 3]] #(x1,y1,x2,y2) assert len(boxes) > 0 feed_dict[self.boxes] = boxes feed_dict[self.image] = resized_image feed_dict[self.is_train] = is_train return feed_dict
def train(config): eval_target = ["Vehicle", "Person", "Prop", "Push_Pulled_Object", "Bike"] eval_target = {one:1 for one in eval_target} # for weighted loss """ "BG":0, "Vehicle":1, "Person":2, "Parking_Meter":3, "Tree":4, "Skateboard":5, "Prop_Overshoulder":6, "Construction_Barrier":7, "Door":8, "Dumpster":9, "Push_Pulled_Object":10, "Construction_Vehicle":11, "Prop":12, "Bike":13, "Animal":14, """ # for weighted loss if used config.class_weights = {i:1.0 for i in xrange(config.num_class)} config.class_weights[10] = 2.0 config.class_weights[12] = 2.0 config.class_weights[13] = 2.0 train_data = read_data(config, config.filelst, config.annopath, config.framepath, is_train=True) val_data = read_data(config, config.valfilelst, config.valannopath, config.valframepath, is_train=False) config.train_num_examples = train_data.num_examples # the total step (iteration) the model will run num_steps = int(math.ceil(train_data.num_examples/float(config.im_batch_size)))*config.num_epochs num_val_steps = int(math.ceil(val_data.num_examples/float(config.im_batch_size)))*1 models = [] gpuids = range(config.gpuid_start, config.gpuid_start+config.gpu) gpuids = gpuids * config.model_per_gpu # example, model_per_gpu=2, gpu=2, gpuid_start=0 gpuids.sort() taskids = range(config.model_per_gpu) * config.gpu # [0,1,0,1] for i,j in zip(gpuids,taskids): models.append(get_model(config, gpuid=i, task=j, controller=config.controller)) config.is_train=False models_eval = [] for i,j in zip(gpuids,taskids): models_eval.append(get_model(config,gpuid=i,task=j,controller=config.controller)) config.is_train=True trainer = Trainer(models,config) tester = Tester(models_eval,config) # need final box and stuff? saver = tf.train.Saver(max_to_keep=5) # how many model to keep bestsaver = tf.train.Saver(max_to_keep=5) # just for saving the best model # start training! # allow_soft_placement : tf will auto select other device if the tf.device(*) not available tfconfig = tf.ConfigProto(allow_soft_placement=True)#,log_device_placement=True) if not config.use_all_mem: tfconfig.gpu_options.allow_growth = True # this way it will only allocate nessasary gpu, not take all tfconfig.gpu_options.visible_device_list = "%s"%(",".join(["%s"%i for i in range(config.gpuid_start, config.gpuid_start+config.gpu)])) # so only this gpu will be used # or you can set hard limit #tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.4 with tf.Session(config=tfconfig) as sess: initialize(load=config.load, load_best=config.load_best, config=config, sess=sess) isStart = True best = (-1.0, 1, "AP_mul") loss_me, box_label_loss_me, wd_me, lr_me = [FIFO_ME(config.loss_me_step) for i in xrange(4)] for batch in tqdm(train_data.get_batches(config.im_batch_size, num_batches=num_steps),total=num_steps,ascii=True,smoothing=1): global_step = sess.run(models[0].global_step) + 1 # start from 0 or the previous step validation_performance = None if (global_step % config.save_period == 0) or (config.load and isStart and ((config.ignore_vars is None) or config.force_first_eval)): # time to save model tqdm.write("step:%s/%s (epoch:%.3f)"%(global_step,num_steps,(config.num_epochs*global_step/float(num_steps)))) tqdm.write("\tsaving model %s..."%global_step) saver.save(sess,os.path.join(config.save_dir,"model"),global_step=global_step) tqdm.write("\tdone") if config.skip_first_eval and isStart: tqdm.write("skipped first eval...") validation_performance = config.best_first this_val_best_type = "null" else: e = {one:[] for one in eval_target.keys()} e_mul = {one:[] for one in eval_target.keys()} # this will be produced by drc_prob * frcnn_prob for val_batch_ in tqdm(val_data.get_batches(config.im_batch_size, num_batches=num_val_steps, shuffle=False), total=num_val_steps, ascii=True, smoothing=1): batch_idx, val_batches = val_batch_ this_batch_num = len(val_batches) # multiple image at a time for parallel inferencing with multiple gpu imgids = [] for val_batch in val_batches: # load the image here and resize image = cv2.imread(val_batch.data['imgs'][0], cv2.IMREAD_COLOR) imgid = os.path.splitext(os.path.basename(val_batch.data['imgs'][0]))[0] imgids.append(imgid) assert image is not None, image image = image.astype("float32") val_batch.data['imgdata'] = [image] resized_image = resizeImage(image, config.short_edge_size, config.max_size) # rememember the scale and original image ori_shape = image.shape[:2] #print image.shape, resized_image.shape # average H/h and W/w ? val_batch.data['resized_image'] = [resized_image] # since the val_batch['boxes'] could be (1000, 4), we need to break them down split_val_batches = split_batch_by_box_num(val_batch_, config.test_box_batch_size) ori_box_nums = [b.data['gt'][0]['boxes'].shape[0] for b in val_batches] outputs = [[] for _ in xrange(this_batch_num)] for split_val_batch in split_val_batches: this_outputs = tester.step(sess, split_val_batch) for i, this_output in enumerate(this_outputs): outputs[i].append(this_output[0]) # [K, num_class] # re-asssemble the boxes for i in xrange(len(outputs)): outputs[i] = np.concatenate(outputs[i], axis=0)[:ori_box_nums[i], :] # post process this batch, also remember the ground truth for i in xrange(this_batch_num): # num gpu imgid = imgids[i] box_yp = outputs[i] # [K, num_class] val_batch = val_batches[i] anno = val_batch.data['gt'][0] # one val_batch is single image assert len(anno['boxes']) == len(anno['labels']) == len(box_yp) for eval_class in e: classIdx = targetClass2id[eval_class] # (K scores, K 1/0 labels) bin_labels = anno['labels'] == classIdx this_yp = box_yp[:, classIdx] # [K] # frcnn is [num_class-1, K] this_yp_mul = this_yp * anno['frcnn_probs'][classIdx-1, :] e[eval_class].extend(zip(this_yp, bin_labels)) e_mul[eval_class].extend(zip(this_yp_mul, bin_labels)) aps = [] aps_mul = [] for eval_class in e: AP = compute_AP(e[eval_class]) aps.append((eval_class, AP)) AP_mul = compute_AP(e_mul[eval_class]) aps_mul.append((eval_class, AP_mul)) average_ap = np.mean([ap for _, ap in aps]) average_ap_mul = np.mean([ap for _, ap in aps_mul]) validation_performance = max([average_ap_mul, average_ap]) this_val_best_type = "AP_mul" if average_ap_mul >= average_ap else "AP" details = "|".join(["%s:%.5f"%(classname, ap) for classname, ap in aps]) details_mul = "|".join(["%s:%.5f"%(classname, ap) for classname, ap in aps_mul]) tqdm.write("\tval in %s at step %s, mean AP:%.5f, details: %s ---- mean AP_mul is %.5f, details: %s. ---- previous best at %s is %.5f, type: %s"%(num_val_steps, global_step, average_ap, details, average_ap_mul, details_mul, best[1], best[0], best[2])) if validation_performance > best[0]: tqdm.write("\tsaving best model %s..." % global_step) bestsaver.save(sess,os.path.join(config.save_dir_best, "model"), global_step=global_step) tqdm.write("\tdone") best = (validation_performance, global_step, this_val_best_type) isStart = False # skip if the batch is not complete, usually the last few ones # lazy as f**k if len(batch[1]) != config.gpu: continue try: loss, wds, box_label_losses, lr = trainer.step(sess,batch) except Exception as e: print e bs = batch[1] print "trainer error, batch files:%s"%([b.data['imgs'] for b in bs]) sys.exit() if math.isnan(loss): tqdm.write("warning, nan loss: loss:%s, box_label_loss:%s"%(loss, box_label_losses)) print "batch:%s"%([b.data['imgs'] for b in batch[1]]) sys.exit() # use moving average to compute loss loss_me.put(loss) lr_me.put(lr) for wd, box_label_loss in zip(wds, box_label_losses): wd_me.put(wd) box_label_loss_me.put(box_label_loss) if global_step % config.show_loss_period == 0: tqdm.write("step %s, moving average: learning_rate %.6f, loss %.6f, weight decay loss %.6f, box_label_loss %.6f" % (global_step, lr_me.me(), loss_me.me(), wd_me.me(), box_label_loss_me.me()))