def train_fpn(): strategy = tf.distribute.MirroredStrategy() with strategy.scope(): net = resnet() rpn_model = net.fpn_net() if os.path.exists(cfg.RESNET_PATH): print("Successful loading the weight from pre training model.") rpn_model.load_weights(cfg.RESNET_PATH, by_name=True) optimizer = tf.keras.optimizers.SGD(lr=0.02, momentum=0.9) rpn_model.compile(optimizer=optimizer, loss=[ rpn_loss_cls, rpn_reg_loss, rpn_loss_cls, rpn_reg_loss, rpn_loss_cls, rpn_reg_loss ]) early_stop = tf.keras.callbacks.EarlyStopping(monitor="loss", patience=3, verbose=2) gen = data_generator() rpn_model.fit(x=gen.g, steps_per_epoch=gen.step_per_epoch, epochs=10, verbose=1, shuffle=True, initial_epoch=0, callbacks=[early_stop], max_queue_size=32, workers=8, use_multiprocessing=True) rpn_model.save_weights("fpn_model8.h5")
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args = parser.parse_args() set_logger(log_file=args.log_file, debug_mode=args.debug_mode) torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) cudnn.benchmark = True train_loader = VideoIter(dataset_path=args.dataset_path, annotation_path=args.annotation_path, clip_length=args.clip_length, frame_stride=args.frame_interval, video_transform=build_transforms(), name='Features extraction') train_iter = torch.utils.data.DataLoader( train_loader, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, # 4, # change this part accordingly pin_memory=True) # Loading network if args.feature_extractor == 'c3d': network = C3D(pretrained=args.pretrained_c3d) elif args.feature_extractor == 'resnet': network = resnet(200) network.load_state_dict( torch.load('network/r3d200_K_200ep.pth')['state_dict']) network = network.to(device) if not path.exists(args.save_dir): mkdir(args.save_dir) features_writer = FeaturesWriter() for i_batch, (data, target, sampled_idx, dirs, vid_names) in tqdm(enumerate(train_iter)): with torch.no_grad(): outputs = network(data.to(device)).detach().cpu().numpy() for i, (dir, vid_name, start_frame) in enumerate( zip(dirs, vid_names, sampled_idx.cpu().numpy())): dir = path.join(args.save_dir, dir) features_writer.write(feature=outputs[i], video_name=vid_name, idx=start_frame, dir=dir) features_writer.dump()
def test_rpn(): net = resnet() generator = data_generator_rpn() fpn_rpn = net.fpn_net() fpn_rpn.load_weights( "/Network/Servers/lab.cs.uwlax.edu/nfs-homes/zhou2494/Desktop/expirements/fpn_model8.h5", by_name=True) for i in range(100): data = next(generator.generator) output = fpn_rpn.predict_on_batch(data[0]) xclass_small, xloc_small, xclass_mid, xloc_mid, xclass_large, xloc_large = output cy, cx, type = generate_cx(xclass_large) draw_rpn_result(cy, cx, type, xloc_large, data[0], 8, data[2]) cy, cx, type = generate_cx(xclass_small) draw_rpn_result(cy, cx, type, xloc_small, data[0], 32, data[2]) cy, cx, type = generate_cx(xclass_mid) draw_rpn_result(cy, cx, type, xloc_mid, data[0], 16, data[2])
def check_data_generator(generator, rpn_y, img_info, rpn_stride, name): """ The method to draw box to check rpn network data generator :param rpn_x: :param rpn_y: the box_cls[1,h,w,36],the box_reg[1,w,h,72] :param img_info: :return: """ std = cfg.TRAIN.std_scaling network = resnet() box_cls, box_loc = rpn_y f_width = box_loc.shape[2] f_height = box_loc.shape[1] index, box_loc = get_index(box_cls, box_loc) draw_boxes_reg = box_loc[index] / std im, box = loadimg(img_info, generator) print(img_info.get('index')) # f_width,f_height=network.cal_fm_size(im.width,im.height) np_im = np.array(im) gt_box = img_info.get('change_box') w = gt_box[2] - gt_box[0] h = gt_box[3] - gt_box[1] np_im = drawbox(np_im, (255, 0, 0), [gt_box[0], gt_box[1], w, h], 3) larges = generator_anchor() record = [] for h in range(f_height): for w in range(f_width): for large in larges: record.append([ w * rpn_stride - large[0] / 2, h * rpn_stride - large[1] / 2, large[0], large[1] ]) record = np.array(record) draw_box = record[index] print('valid positive number is : ' + str(len(draw_boxes_reg))) for i in range(len(draw_boxes_reg)): np_im = drawbox(np_im, (0, 255, 0), draw_box[i], 1) box = transform_reg(draw_box[i], draw_boxes_reg[i]) np_im = drawbox(np_im, (0, 0, 255), box, 1) cv2.imwrite('tst' + img_info.get('index') + name + '.png', np_im)
def __init__(self): super(data_generator, self).__init__('train', resnet()) self.map_whidth, self.map_height = self.network.cal_fm_size(960, 960, isFPN=True)
def test(): data = imdb('test') data_gen = data_generator_rpn() img_input = tf.keras.layers.Input((None, None, 3)) network =resnet() fpn_net=network.fpn_net(img_input) map_large, map_mid, map_small = network.resnet_50(is_share=True, inpt=img_input, is_FPN=True) roi_input = tf.keras.layers.Input(shape=(None, 4)) classfier_large = network.fast_rcnn(map_large, roi_input, 8) classfier_mid = network.fast_rcnn(map_mid, roi_input, 16) classfier_small = network.fast_rcnn(map_small, roi_input, 32) model_classfier = tf.keras.models.Model(inputs=[img_input, roi_input], outputs=[classfier_large, classfier_mid, classfier_small]) for layer in model_classfier.layers: layer._name = layer.name + "_base" fpn_net.load_weights('/Network/Servers/lab.cs.uwlax.edu/nfs-homes/zhou2494/Desktop/expirements/fpn_model8.h5', by_name=True) model_classfier.load_weights('/Network/Servers/lab.cs.uwlax.edu/nfs-homes/zhou2494/Desktop/expirements/final.h5',by_name=True) record=0 right=0 for img_info in data.roidb: record+=1 path = os.path.join(cfg.IMAGESET, img_info.get("index") + ".jpg") print(img_info.get("index")) img = Image.open(path) img, box = data_gen.handle_origin_image(img, img_info.get('box'),img_info) img = np.array(img)[None] xclass_small, xloc_small, xclass_mid, xloc_mid, xclass_large, xloc_large = fpn_net.predict(img) rois_large = rpn_to_roi(xclass_large[0, :, :, :], xloc_large[0, :, :, :], 8) print("the number of rois is :" + str(len(rois_large))) rois_mid =rpn_to_roi(xclass_mid[0, :, :, :], xloc_mid[0, :, :, :], 16) print("the number of rois is :" + str(len(rois_mid))) rois_small = rpn_to_roi(xclass_small[0, :, :, :], xloc_small[0, :, :, :], 32) print("the number of rois is :" + str(len(rois_small))) rois = np.concatenate([rois_small, rois_large, rois_mid], axis=0)[:, :4] print("the number of rois is :" + str(len(rois))) rois_expend=np.expand_dims(rois,axis=0) label_record = [] reg_record = [] cls_confidence=[] roi_record=[] index_record=[] class_large,loc_large, class_mid,loc_mid, class_small,loc_small= model_classfier.predict([img,rois_expend]) cls_score_large=np.squeeze(class_large) loc_reg_large=np.squeeze( loc_large) cls_score_mid = np.squeeze(class_mid) loc_reg_mid = np.squeeze(loc_mid) cls_score_small = np.squeeze(class_small) loc_reg_small = np.squeeze(loc_small) loc_reg_large=loc_reg_large/cfg.TRAIN.std_scaling loc_reg_mid=loc_reg_mid/cfg.TRAIN.std_scaling loc_reg_small=loc_reg_small/cfg.TRAIN.std_scaling for i in range(cls_score_large.shape[0]): w = rois[i][2] - rois[i][0] h = rois[i][3] - rois[i][1] roi = [rois[i][0] + w / 2, rois[i][1] + h / 2, w, h] if np.argmax(cls_score_large[i, :]) != cfg.NUM_CLASSES - 1 and max(cls_score_large[i, :]) > 0.8: label_record.append(cls_score_large[i, :]) index=np.argmax(cls_score_large[i, :]) index_record.append(index) roi_record.append(roi) roi_reg=cal_transform(roi,loc_reg_large[i][index*4:index*4+4]) reg_record.append(roi_reg) cls_confidence.append(cls_score_large[i, index]) if np.argmax(cls_score_mid[i, :]) != cfg.NUM_CLASSES - 1 and max(cls_score_mid[i, :]) > 0.8: label_record.append(cls_score_mid[i, :]) index = np.argmax(cls_score_mid[i, :]) index_record.append(index) roi_record.append(roi) roi_reg=cal_transform(roi,loc_reg_mid[i][index*4:index*4+4]) reg_record.append(roi_reg) cls_confidence.append(cls_score_large[i, index]) if np.argmax(cls_score_small[i, :]) != cfg.NUM_CLASSES - 1 and max(cls_score_small[i, :]) > 0.8: label_record.append(cls_score_small[i, :]) index = np.argmax(cls_score_small[i, :]) index_record.append(index) roi_record.append(roi) roi_reg=cal_transform(roi,loc_reg_small[i][index*4:index*4+4]) reg_record.append(roi_reg) cls_confidence.append(cls_score_large[i, index]) if len(reg_record)==0: continue reg_record=np.array(reg_record) cls_confidence=np.expand_dims(np.array(cls_confidence),axis=1) index_record=np.expand_dims(np.array(index_record),axis=1) np_im = np.squeeze(img) reg_record=np.concatenate((reg_record,cls_confidence,index_record),axis=1) gt_box=img_info.get('box') for cls_num in range(8): index=np.where(np.equal(reg_record[:,5],cls_num))[0] name=cfg.CLASSES_NAME[cls_num] if len(index)==0: continue boxes=nms(reg_record[index]) for roi in boxes: np_im = debug.drawbox(np_im, (0, 0, 255), roi, 1,name) cv2.imwrite('tst' + img_info.get('index')+ '.png', np_im) print(str(right)) print(str(record))
def train_frcnn(): network = resnet() img_input = keras.layers.Input(shape=(None, None, 3)) roi_input = keras.layers.Input(shape=(None, 4)) fpn_model = network.fpn_net(img_input) map_large, map_mid, map_small = network.resnet_50(is_share=True, inpt=img_input, is_FPN=True) xclass_large, xloc_large, xclass_mid, xloc_mid, xclass_small, xloc_small = network.fpn_output( img_input) classfier_large = network.fast_rcnn(map_large, roi_input, 8) classfier_mid = network.fast_rcnn(map_mid, roi_input, 16) classfier_small = network.fast_rcnn(map_small, roi_input, 32) model_classfier = keras.models.Model( inputs=[img_input, roi_input], outputs=[classfier_large, classfier_mid, classfier_small]) for layer in model_classfier.layers: layer._name = layer.name + "_base" fpn_model.load_weights( "/Network/Servers/lab.cs.uwlax.edu/nfs-homes/zhou2494/Desktop/expirements/fpn_model8.h5" ) model_classfier.load_weights( '/Network/Servers/lab.cs.uwlax.edu/nfs-homes/zhou2494/Desktop/expirements/fpn_model8.h5', by_name=True) comb_model = keras.models.Model([img_input, roi_input], [ xclass_large, xloc_large, xclass_mid, xloc_mid, xclass_small, xloc_small, classfier_large, classfier_mid, classfier_small ]) comb_model.compile(optimizer='sgd', loss='mae') optimizer = keras.optimizers.SGD(lr=0.01, decay=0.0005, momentum=0.9) optimizer_classifier = keras.optimizers.SGD(lr=0.001 / 5, decay=0.0005, momentum=0.9) fpn_model.compile(optimizer=optimizer, loss=[ rpn_loss_cls, rpn_reg_loss, rpn_loss_cls, rpn_reg_loss, rpn_loss_cls, rpn_reg_loss ]) model_classfier.compile( optimizer=optimizer_classifier, loss=[cls_loss, reg_loss, cls_loss, reg_loss, cls_loss, reg_loss], metrics=['accuracy']) comb_model.compile(optimizer='sgd', loss='mae') generator = data_generator() num_epochs = 5 epoch_length = generator.step_per_epoch data_gene = generator.g for epoch in range(num_epochs): loss_record = np.zeros((epoch_length, 6)) batch_num = 0 progbar = tf.keras.utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch + 1, num_epochs)) # first 3 epoch is warm up. if epoch == 3: backend.set_value(fpn_model.optimizer.lr, 0.001) backend.set_value(model_classfier.optimizer.lr, 0.001) while True: rpn_x, rpn_y, img_info = next(data_gene) # loss_rpn = fpn_model.train_on_batch(rpn_x, rpn_y) roi_data = np.zeros((8, cfg.MAX_ROIS, 4), dtype=np.float32) classfiy_data = np.zeros((8, cfg.MAX_ROIS, cfg.NUM_CLASSES), dtype=np.float32) reg_data = np.zeros((8, cfg.MAX_ROIS, 8 * (cfg.NUM_CLASSES - 1))) xclass_small, xloc_small, xclass_mid, xloc_mid, xclass_large, xloc_large = fpn_model.predict_on_batch( rpn_x) record = [] avaliabel = [] for batch_index in range(8): rois_large = roi_helper.rpn_to_roi( xclass_large[batch_index, :, :, :], xloc_large[batch_index, :, :, :], 8) rois_mid = roi_helper.rpn_to_roi( xclass_mid[batch_index, :, :, :], xloc_mid[batch_index, :, :, :], 16) rois_small = roi_helper.rpn_to_roi( xclass_small[batch_index, :, :, :], xloc_small[batch_index, :, :, :], 32) rois = np.concatenate([rois_small, rois_large, rois_mid], axis=0)[:, :4] rois = rois.astype(np.float32) frcnn_label = roi_helper.match_gt_box(rois, img_info[batch_index]) if frcnn_label is None: record.append(batch_index) continue rois, cls_label, reg_label = frcnn_label avaliabel.append(batch_index) train_label, selected_pos_label = select_sample(cls_label) # debug.draw_roi(rois[selected_pos_label], img_info[batch_index],rpn_x[batch_index]) roi_data[batch_index] = rois[train_label] classfiy_data[batch_index] = cls_label[train_label] reg_data[batch_index] = reg_label[train_label] for batch_index in record: index = avaliabel[0] roi_data[batch_index] = roi_data[index] classfiy_data[batch_index] = classfiy_data[index] reg_data[batch_index] = reg_data[index] loss_classfiy0 = model_classfier.train_on_batch( [rpn_x[:2], roi_data[:2]], [ classfiy_data[:2], reg_data[:2], classfiy_data[:2], reg_data[:2], classfiy_data[:2], reg_data[:2] ]) loss_classfiy1 = model_classfier.train_on_batch( [rpn_x[2:4], roi_data[2:4]], [ classfiy_data[2:4], reg_data[2:4], classfiy_data[2:4], reg_data[2:4], classfiy_data[2:4], reg_data[2:4] ]) loss_classfiy2 = model_classfier.train_on_batch( [rpn_x[4:6], roi_data[4:6]], [ classfiy_data[4:6], reg_data[4:6], classfiy_data[4:6], reg_data[4:6], classfiy_data[4:6], reg_data[4:6] ]) loss_classfiy3 = model_classfier.train_on_batch( [rpn_x[6:8], roi_data[6:8]], [ classfiy_data[6:8], reg_data[6:8], classfiy_data[6:8], reg_data[6:8], classfiy_data[6:8], reg_data[6:8] ]) loss_record[batch_num, 0] = loss_classfiy0[0] loss_record[batch_num, 1] = loss_classfiy0[1] progbar.update( batch_num, [('rpn', np.mean(loss_record[:batch_num + 1, 0])), ('fast rcnn : ', np.mean(loss_record[:batch_num + 1, 1])), ("average number of objects", len(selected_pos_label))]) batch_num += 1 if batch_num >= epoch_length: break model_classfier.save_weights("final.h5") comb_model.save_weights("finalcomb.h5")
def __init__(self): resnet_base = resnet() self.input_layer = keras.layers.Input(shape=(None, None, 3)) self.feature_map = resnet_base.resnet_50(is_share=True, inpt=self.input_layer) self.rpn_output = resnet_base.rpn_net(self.feature_map)