def generate_AUCouple_ROI_mask_image(database_name, img_path): adaptive_AU_database(database_name) global MASK_COLOR mask_color_lst = [] for color in MASK_COLOR: mask_color_lst.append(color_bgr(color)) cropped_face, AU_mask_dict = FaceMaskCropper.get_cropface_and_mask( img_path, channel_first=False) AU_couple_dict = get_zip_ROI_AU() land = FaceLandMark(config.DLIB_LANDMARK_PRETRAIN) landmark, _, _ = land.landmark(image=cropped_face) roi_polygons = land.split_ROI(landmark) for roi_no, polygon_vertex_arr in roi_polygons.items(): polygon_vertex_arr[0, :] = np.round(polygon_vertex_arr[0, :]) polygon_vertex_arr[1, :] = np.round(polygon_vertex_arr[1, :]) polygon_vertex_arr = sort_clockwise(polygon_vertex_arr.tolist()) cv2.polylines(cropped_face, [polygon_vertex_arr], True, color_bgr(RED), thickness=1) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(cropped_face, str(roi_no), tuple( np.mean(polygon_vertex_arr, axis=0).astype(np.int32)), font, 0.7, (0, 255, 255), thickness=1) already_fill_AU = set() idx = 0 gen_face_lst = dict() AU_couple_mask = dict() for AU in config.AU_ROI.keys(): AU_couple = AU_couple_dict[AU] if AU_couple in already_fill_AU: continue already_fill_AU.add(AU_couple) mask = AU_mask_dict[AU] AU_couple_mask[AU_couple] = mask color_mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB) color_mask[mask != 0] = random.choice(mask_color_lst) idx += 1 new_face = cv2.addWeighted(cropped_face, 0.75, color_mask, 0.25, 0) gen_face_lst[AU_couple] = new_face return gen_face_lst, AU_couple_mask
def get_BP4D_prescion_matrix(label_file_dir): adaptive_AU_database("BP4D") alpha = 0.2 model = GraphLassoCV(alphas=100, cv=10, max_iter=10, tol=1e-5, verbose=True, mode="lars", assume_centered=False, n_jobs=100) X = [] for file_name in os.listdir(label_file_dir): # each file is a video AU_column_idx = {} with open(label_file_dir + "/" + file_name, "r") as au_file_obj: # each file is a video for idx, line in enumerate(au_file_obj): if idx == 0: # header specify Action Unit for col_idx, AU in enumerate(line.split(",")[1:]): AU_column_idx[AU] = col_idx + 1 # read header continue # read head over , continue lines = line.split(",") frame = lines[0] au_labels = [AU for AU in config.AU_ROI.keys() \ if int(lines[AU_column_idx[AU]]) == 1] AU_bin = np.zeros(len(config.AU_SQUEEZE)) for AU in au_labels: bin_idx = config.AU_SQUEEZE.inv[AU] np.put(AU_bin, bin_idx, 1) X.append(AU_bin) X = np.array(X) print(X.shape) # X = np.transpose(X) model.fit(X) cov_ = model.covariance_ prec_ = model.precision_ return {"prec": prec_, "cov": cov_}
def generate_AUCouple_ROI_mask_image(self, database_name, img_path, roi_activate): adaptive_AU_database(database_name) cropped_face, AU_mask_dict = FaceMaskCropper.get_cropface_and_mask( img_path, channel_first=False) AU_couple_dict = get_zip_ROI_AU() land = FaceLandMark(config.DLIB_LANDMARK_PRETRAIN) landmark, _, _ = land.landmark(image=cropped_face) roi_polygons = land.split_ROI(landmark) for roi_no, polygon_vertex_arr in roi_polygons.items(): polygon_vertex_arr[0, :] = np.round(polygon_vertex_arr[0, :]) polygon_vertex_arr[1, :] = np.round(polygon_vertex_arr[1, :]) polygon_vertex_arr = sort_clockwise(polygon_vertex_arr.tolist()) cv2.polylines(cropped_face, [polygon_vertex_arr], True, (0, 0, 255), thickness=1) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(cropped_face, str(roi_no), tuple( np.mean(polygon_vertex_arr, axis=0).astype(np.int32)), font, 0.7, (0, 255, 255), thickness=1) already_fill_AU = set() AUCouple_face_dict = dict() for AU in config.AU_ROI.keys(): AU_couple = AU_couple_dict[AU] if AU_couple in already_fill_AU or AU_couple not in roi_activate: continue already_fill_AU.add(AU_couple) mask = AU_mask_dict[AU] color_mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB) color_mask[mask != 0] = (199, 21, 133) new_face = cv2.add(cropped_face, color_mask) AUCouple_face_dict[AU_couple] = new_face return AUCouple_face_dict
def generate_landmark_image(database_name, face_img_path=None, face_img=None): adaptive_AU_database(database_name) land = FaceLandMark(config.DLIB_LANDMARK_PRETRAIN) trn_img = face_img if face_img is None: trn_img = cv2.imread(face_img_path, cv2.IMREAD_COLOR) landmark_dict, _, new_image = land.landmark(image=trn_img, need_txt_img=True) roi_polygons = land.split_ROI(landmark_dict) # for roi_no, polygon_vertex_arr in roi_polygons.items(): # # if int(roi_no) == 40 or int(roi_no) == 41: # polygon_vertex_arr[0, :] = np.round(polygon_vertex_arr[0, :]) # polygon_vertex_arr[1, :] = np.round(polygon_vertex_arr[1, :]) # polygon_vertex_arr = sort_clockwise(polygon_vertex_arr.tolist()) # cv2.polylines(trn_img, [polygon_vertex_arr], True, (34,34,178), thickness=2) # font = cv2.FONT_HERSHEY_SIMPLEX # cv2.putText(trn_img, str(roi_no), tuple(np.mean(polygon_vertex_arr,axis=0).astype(np.int32)), # font,0.6,(0,255,255),thickness=1) # for i, x_y in landmark_txt.items(): # x, y = x_y # cv2.putText(trn_img, str(i), (x, y), font, 0.4, (255, 255, 255), 1) return new_image
def get_DISFA_prescion_matrix(label_file_dir): adaptive_AU_database("DISFA") alpha = 0.2 model = GraphLassoCV(alphas=100, cv=10, max_iter=100, tol=1e-5, verbose=True, mode="lars", assume_centered=False, n_jobs=100) X = [] for file_name in os.listdir(label_file_dir): subject_filename = label_file_dir + os.sep + file_name frame_label = defaultdict(dict) for au_file in os.listdir(subject_filename): abs_filename = subject_filename + "/" + au_file AU = au_file[au_file.rindex("_") + 3:au_file.rindex(".")] with open(abs_filename, "r") as file_obj: for line in file_obj: frame, AU_label = line.strip().split(",") # AU_label = int(AU_label) AU_label = 0 if int( AU_label) < 3 else 1 # 居然<3的不要,但是也取得了出色的效果 frame_label[int(frame)][AU] = int(AU_label) for frame, AU_dict in frame_label.items(): AU_bin = np.zeros(len(config.AU_SQUEEZE)) for AU, AU_label in AU_dict.items(): bin_idx = config.AU_SQUEEZE.inv[AU] np.put(AU_bin, bin_idx, AU_label) X.append(AU_bin) X = np.array(X) print(X.shape) model.fit(X) cov_ = model.covariance_ prec_ = model.precision_ return {"prec": prec_, "cov": cov_}
def main(): parser = argparse.ArgumentParser( description='Space Time Action Unit R-CNN training example:') parser.add_argument('--pid', '-pp', default='/tmp/SpaceTime_AU_R_CNN/') parser.add_argument('--gpu', '-g', nargs='+', type=int, help='GPU ID, multiple GPU split by space') parser.add_argument('--lr', '-l', type=float, default=0.001) parser.add_argument('--out', '-o', default='end_to_end_result', help='Output directory') parser.add_argument('--database', default='BP4D', help='Output directory: BP4D/DISFA/BP4D_DISFA') parser.add_argument('--iteration', '-i', type=int, default=70000) parser.add_argument('--epoch', '-e', type=int, default=20) parser.add_argument('--batch_size', '-bs', type=int, default=1) parser.add_argument('--snapshot', '-snap', type=int, default=1000) parser.add_argument('--need_validate', action='store_true', help='do or not validate during training') parser.add_argument('--mean', default=config.ROOT_PATH + "BP4D/idx/mean_no_enhance.npy", help='image mean .npy file') parser.add_argument('--backbone', default="mobilenet_v1", help="vgg/resnet101/mobilenet_v1 for train") parser.add_argument('--optimizer', default='SGD', help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta') parser.add_argument('--pretrained_model_rgb', help='imagenet/mobilenet_v1/resnet101/*.npz') parser.add_argument( '--pretrained_model_of', help= "path of optical flow pretrained model (may be single stream OF model)" ) parser.add_argument('--pretrained_model_args', nargs='+', type=float, help='you can pass in "1.0 224" or "0.75 224"') parser.add_argument('--spatial_edge_mode', type=SpatialEdgeMode, choices=list(SpatialEdgeMode), help='1:all_edge, 2:configure_edge, 3:no_edge') parser.add_argument('--spatial_sequence_type', type=SpatialSequenceType, choices=list(SpatialSequenceType), help='1:all_edge, 2:configure_edge, 3:no_edge') parser.add_argument( '--temporal_edge_mode', type=TemporalEdgeMode, choices=list(TemporalEdgeMode), help='1:rnn, 2:attention_block, 3.point-wise feed forward(no temporal)' ) parser.add_argument('--two_stream_mode', type=TwoStreamMode, choices=list(TwoStreamMode), help='spatial/ temporal/ spatial_temporal') parser.add_argument('--conv_rnn_type', type=ConvRNNType, choices=list(ConvRNNType), help='conv_lstm or conv_sru') parser.add_argument("--bi_lstm", action="store_true", help="whether to use bi-lstm as Edge/Node RNN") parser.add_argument( '--use_memcached', action='store_true', help='whether use memcached to boost speed of fetch crop&mask') # parser.add_argument('--memcached_host', default='127.0.0.1') parser.add_argument("--fold", '-fd', type=int, default=3) parser.add_argument("--layers", type=int, default=1) parser.add_argument("--label_win_size", type=int, default=3) parser.add_argument("--fix", action="store_true", help="fix parameter of conv2 update when finetune") parser.add_argument("--x_win_size", type=int, default=1) parser.add_argument("--use_label_dependency", action="store_true", help="use label dependency layer after conv_lstm") parser.add_argument("--dynamic_backbone", action="store_true", help="use dynamic backbone: conv lstm as backbone") parser.add_argument("--ld_rnn_dropout", type=float, default=0.4) parser.add_argument("--split_idx", '-sp', type=int, default=1) parser.add_argument("--use_paper_num_label", action="store_true", help="only to use paper reported number of labels" " to train") parser.add_argument( "--roi_align", action="store_true", help="whether to use roi align or roi pooling layer in CNN") parser.add_argument("--debug", action="store_true", help="debug mode for 1/50 dataset") parser.add_argument("--sample_frame", '-sample', type=int, default=10) parser.add_argument( "--snap_individual", action="store_true", help="whether to snapshot each individual epoch/iteration") parser.add_argument("--proc_num", "-proc", type=int, default=1) parser.add_argument("--fetch_mode", type=int, default=1) parser.add_argument('--eval_mode', action='store_true', help='Use test datasets for evaluation metric') args = parser.parse_args() os.makedirs(args.pid, exist_ok=True) os.makedirs(args.out, exist_ok=True) pid = str(os.getpid()) pid_file_path = args.pid + os.sep + "{0}_{1}_fold_{2}.pid".format( args.database, args.fold, args.split_idx) # with open(pid_file_path, "w") as file_obj: # file_obj.write(pid) # file_obj.flush() print('GPU: {}'.format(",".join(list(map(str, args.gpu))))) adaptive_AU_database(args.database) mc_manager = None if args.use_memcached: from collections_toolkit.memcached_manager import PyLibmcManager mc_manager = PyLibmcManager(args.memcached_host) if mc_manager is None: raise IOError("no memcached found listen in {}".format( args.memcached_host)) paper_report_label, class_num = squeeze_label_num_report( args.database, args.use_paper_num_label) paper_report_label_idx = list(paper_report_label.keys()) use_feature_map_res45 = (args.conv_rnn_type != ConvRNNType.conv_rcnn) and ( args.conv_rnn_type != ConvRNNType.fc_lstm) use_au_rcnn_loss = (args.conv_rnn_type == ConvRNNType.conv_rcnn) au_rcnn_train_chain_list = [] if args.backbone == 'vgg': au_rcnn = AU_RCNN_VGG16(pretrained_model=args.pretrained_model_rgb, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], mean_file=args.mean, use_roi_align=args.roi_align) au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn) au_rcnn_train_chain_list.append(au_rcnn_train_chain) elif args.backbone == 'resnet101': if args.two_stream_mode != TwoStreamMode.spatial_temporal: pretrained_model = args.pretrained_model_rgb if args.pretrained_model_rgb else args.pretrained_model_of au_rcnn = AU_RCNN_Resnet101( pretrained_model=pretrained_model, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], mean_file=args.mean, classify_mode=use_au_rcnn_loss, n_class=class_num, use_roi_align=args.roi_align, use_feature_map_res45=use_feature_map_res45, use_feature_map_res5=(args.conv_rnn_type != ConvRNNType.fc_lstm or args.conv_rnn_type == ConvRNNType.sep_conv_lstm), use_optical_flow_input=( args.two_stream_mode == TwoStreamMode.optical_flow), temporal_length=args.sample_frame) au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn) au_rcnn_train_chain_list.append(au_rcnn_train_chain) else: au_rcnn_rgb = AU_RCNN_Resnet101( pretrained_model=args.pretrained_model_rgb, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], mean_file=args.mean, classify_mode=use_au_rcnn_loss, n_class=class_num, use_roi_align=args.roi_align, use_feature_map_res45=use_feature_map_res45, use_feature_map_res5=(args.conv_rnn_type != ConvRNNType.fc_lstm or args.conv_rnn_type == ConvRNNType.sep_conv_lstm), use_optical_flow_input=False, temporal_length=args.sample_frame) au_rcnn_optical_flow = AU_RCNN_Resnet101( pretrained_model=args.pretrained_model_of, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], mean_file=args.mean, classify_mode=use_au_rcnn_loss, n_class=class_num, use_roi_align=args.roi_align, use_feature_map_res45=use_feature_map_res45, use_feature_map_res5=(args.conv_rnn_type != ConvRNNType.fc_lstm or args.conv_rnn_type == ConvRNNType.sep_conv_lstm), use_optical_flow_input=True, temporal_length=args.sample_frame) au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor(au_rcnn_rgb) au_rcnn_train_chain_optical_flow = AU_RCNN_ROI_Extractor( au_rcnn_optical_flow) au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb) au_rcnn_train_chain_list.append(au_rcnn_train_chain_optical_flow) elif args.backbone == "mobilenet_v1": au_rcnn = AU_RCNN_MobilenetV1( pretrained_model_type=args.pretrained_model_args, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], mean_file=args.mean, classify_mode=use_au_rcnn_loss, n_class=class_num, use_roi_align=args.roi_align) au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn) if use_au_rcnn_loss: au_rcnn_train_loss = AU_RCNN_TrainChainLoss() loss_head_module = au_rcnn_train_loss elif args.conv_rnn_type == ConvRNNType.conv_lstm: label_dependency_layer = None if args.use_label_dependency: label_dependency_layer = LabelDependencyRNNLayer( args.database, in_size=2048, class_num=class_num, train_mode=True, label_win_size=args.label_win_size) space_time_conv_lstm = SpaceTimeConv( label_dependency_layer, args.use_label_dependency, class_num, spatial_edge_mode=args.spatial_edge_mode, temporal_edge_mode=args.temporal_edge_mode, conv_rnn_type=args.conv_rnn_type) loss_head_module = space_time_conv_lstm elif args.conv_rnn_type == ConvRNNType.sep_conv_lstm: space_time_sep_conv_lstm = SpaceTimeSepConv( database=args.database, class_num=class_num, spatial_edge_mode=args.spatial_edge_mode, temporal_edge_mode=args.temporal_edge_mode) loss_head_module = space_time_sep_conv_lstm elif args.conv_rnn_type == ConvRNNType.fc_lstm: space_time_fc_lstm = SpaceTimeSepFcLSTM( database=args.database, class_num=class_num, spatial_edge_mode=args.spatial_edge_mode, temporal_edge_mode=args.temporal_edge_mode) loss_head_module = space_time_fc_lstm model = Wrapper(au_rcnn_train_chain_list, loss_head_module, args.database, args.sample_frame, use_feature_map=use_feature_map_res45, two_stream_mode=args.two_stream_mode) batch_size = args.batch_size img_dataset = AUDataset(database=args.database, fold=args.fold, split_name='trainval', split_index=args.split_idx, mc_manager=mc_manager, train_all_data=False) train_video_data = AU_video_dataset( au_image_dataset=img_dataset, sample_frame=args.sample_frame, train_mode=(args.two_stream_mode != TwoStreamMode.optical_flow), paper_report_label_idx=paper_report_label_idx, ) Transform = Transform3D train_video_data = TransformDataset(train_video_data, Transform(au_rcnn, mirror=False)) if args.proc_num == 1: train_iter = SerialIterator(train_video_data, batch_size * args.sample_frame, repeat=True, shuffle=False) else: train_iter = MultiprocessIterator(train_video_data, batch_size=batch_size * args.sample_frame, n_processes=args.proc_num, repeat=True, shuffle=False, n_prefetch=10, shared_mem=10000000) if len(args.gpu) > 1: for gpu in args.gpu: chainer.cuda.get_device_from_id(gpu).use() else: chainer.cuda.get_device_from_id(args.gpu[0]).use() model.to_gpu(args.gpu[0]) optimizer = None if args.optimizer == 'AdaGrad': optimizer = chainer.optimizers.AdaGrad( lr=args.lr ) # 原本为MomentumSGD(lr=args.lr, momentum=0.9) 由于loss变为nan问题,改为AdaGrad elif args.optimizer == 'RMSprop': optimizer = chainer.optimizers.RMSprop(lr=args.lr) elif args.optimizer == 'Adam': optimizer = chainer.optimizers.Adam(alpha=args.lr) elif args.optimizer == 'SGD': optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) elif args.optimizer == "AdaDelta": optimizer = chainer.optimizers.AdaDelta() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) optimizer_name = args.optimizer key_str = "{0}_fold_{1}".format(args.fold, args.split_idx) file_list = [] file_list.extend(os.listdir(args.out)) snapshot_model_file_name = args.out + os.sep + filter_last_checkpoint_filename( file_list, "model", key_str) # BP4D_3_fold_1_resnet101@rnn@no_temporal@use_paper_num_label@roi_align@label_dep_layer@conv_lstm@sampleframe#13_model.npz use_paper_key_str = "use_paper_num_label" if args.use_paper_num_label else "all_avail_label" roi_align_key_str = "roi_align" if args.roi_align else "roi_pooling" label_dependency_layer_key_str = "label_dep_layer" if args.use_label_dependency else "no_label_dep" single_model_file_name = args.out + os.sep + \ '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@{7}@{8}@{9}@sampleframe#{10}_model.npz'.format(args.database, args.fold, args.split_idx, args.backbone, args.spatial_edge_mode, args.temporal_edge_mode, use_paper_key_str, roi_align_key_str, label_dependency_layer_key_str, args.conv_rnn_type,args.sample_frame )#, args.label_win_size) print(single_model_file_name) pretrained_optimizer_file_name = args.out + os.sep +\ '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@{7}@{8}@{9}@sampleframe#{10}_optimizer.npz'.format(args.database, args.fold, args.split_idx, args.backbone, args.spatial_edge_mode, args.temporal_edge_mode, use_paper_key_str, roi_align_key_str, label_dependency_layer_key_str, args.conv_rnn_type, args.sample_frame)# args.label_win_size) print(pretrained_optimizer_file_name) if os.path.exists(pretrained_optimizer_file_name): print("loading optimizer snatshot:{}".format( pretrained_optimizer_file_name)) chainer.serializers.load_npz(pretrained_optimizer_file_name, optimizer) if args.snap_individual: if os.path.exists(snapshot_model_file_name) and os.path.isfile( snapshot_model_file_name): print("loading pretrained snapshot:{}".format( snapshot_model_file_name)) chainer.serializers.load_npz(snapshot_model_file_name, model) else: if os.path.exists(single_model_file_name): print("loading pretrained snapshot:{}".format( single_model_file_name)) chainer.serializers.load_npz(single_model_file_name, model) if args.fix: au_rcnn = model.au_rcnn_train_chain.au_rcnn au_rcnn.extractor.conv1.W.update_rule.enabled = False au_rcnn.extractor.bn1.gamma.update_rule.enabled = False au_rcnn.extractor.bn1.beta.update_rule.enabled = False res2_names = ["a", "b1", "b2"] for res2_name in res2_names: if res2_name == "a": getattr(au_rcnn.extractor.res2, res2_name).conv1.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn1.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn1.beta.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).conv2.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).conv3.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).conv4.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn2.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn2.beta.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn3.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn3.beta.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn4.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn4.beta.update_rule.enabled = False elif res2_name.startswith("b"): getattr(au_rcnn.extractor.res2, res2_name).conv1.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn1.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn1.beta.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).conv2.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).conv3.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn2.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn2.beta.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn3.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn3.beta.update_rule.enabled = False # if (args.spatial_edge_mode in [SpatialEdgeMode.ld_rnn, SpatialEdgeMode.bi_ld_rnn] or args.temporal_edge_mode in \ # [TemporalEdgeMode.ld_rnn, TemporalEdgeMode.bi_ld_rnn]) or (args.conv_rnn_type != ConvRNNType.conv_rcnn): # updater = BPTTUpdater(train_iter, optimizer, converter=lambda batch, device: concat_examples(batch, device, # padding=0), device=args.gpu[0]) if len(args.gpu) > 1: gpu_dict = {"main": args.gpu[0]} # many gpu will use parallel_models = {"parallel": model.au_rcnn_train_chain} for slave_gpu in args.gpu[1:]: gpu_dict[slave_gpu] = int(slave_gpu) updater = PartialParallelUpdater( train_iter, optimizer, args.database, models=parallel_models, devices=gpu_dict, converter=lambda batch, device: concat_examples( batch, device, padding=0)) else: print("only one GPU({0}) updater".format(args.gpu[0])) updater = chainer.training.StandardUpdater( train_iter, optimizer, device=args.gpu[0], converter=lambda batch, device: concat_examples( batch, device, padding=0)) @training.make_extension(trigger=(1, "epoch")) def reset_order(trainer): print("reset dataset order after one epoch") if args.debug: trainer.updater._iterators[ "main"].dataset._dataset.reset_for_debug_mode() else: trainer.updater._iterators[ "main"].dataset._dataset.reset_for_train_mode() trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(reset_order) trainer.extend(chainer.training.extensions.snapshot_object( optimizer, filename=os.path.basename(pretrained_optimizer_file_name)), trigger=(args.snapshot, 'iteration')) if not args.snap_individual: trainer.extend(chainer.training.extensions.snapshot_object( model, filename=os.path.basename(single_model_file_name)), trigger=(args.snapshot, 'iteration')) else: snap_model_file_name = '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@{7}@{8}@{9}sampleframe#{10}@win#{11}_'.format( args.database, args.fold, args.split_idx, args.backbone, args.spatial_edge_mode, args.temporal_edge_mode, use_paper_key_str, roi_align_key_str, label_dependency_layer_key_str, args.conv_rnn_type, args.sample_frame, args.label_win_size) snap_model_file_name = snap_model_file_name + "{.updater.iteration}.npz" trainer.extend(chainer.training.extensions.snapshot_object( model, filename=snap_model_file_name), trigger=(args.snapshot, 'iteration')) log_interval = 100, 'iteration' print_interval = 10, 'iteration' plot_interval = 10, 'iteration' if args.optimizer != "Adam" and args.optimizer != "AdaDelta": trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1), trigger=(10, 'epoch')) elif args.optimizer == "Adam": trainer.extend(chainer.training.extensions.ExponentialShift( "alpha", 0.1, optimizer=optimizer), trigger=(10, 'epoch')) if args.optimizer != "AdaDelta": trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( chainer.training.extensions.LogReport( trigger=log_interval, log_name="log_{0}_fold_{1}_{2}@{3}@{4}@{5}.log".format( args.fold, args.split_idx, args.backbone, args.spatial_edge_mode, args.temporal_edge_mode, args.conv_rnn_type))) # trainer.reporter.add_observer("main_par", model.loss_head_module) trainer.extend(chainer.training.extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/accuracy', ]), trigger=print_interval) trainer.extend( chainer.training.extensions.ProgressBar(update_interval=100)) if chainer.training.extensions.PlotReport.available(): trainer.extend(chainer.training.extensions.PlotReport( ['main/loss'], file_name='loss_{0}_fold_{1}_{2}@{3}@{4}@{5}.png'.format( args.fold, args.split_idx, args.backbone, args.spatial_edge_mode, args.temporal_edge_mode, args.conv_rnn_type), trigger=plot_interval), trigger=plot_interval) trainer.extend(chainer.training.extensions.PlotReport( ['main/accuracy'], file_name='accuracy_{0}_fold_{1}_{2}@{3}@{4}@{5}.png'.format( args.fold, args.split_idx, args.backbone, args.spatial_edge_mode, args.temporal_edge_mode, args.conv_rnn_type), trigger=plot_interval), trigger=plot_interval) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)' ) # open_crf layer only works for CPU mode parser.add_argument( "--model", "-m", help="pretrained model file path") # which contains pretrained target parser.add_argument("--test", "-tt", default="", help="test txt folder path") parser.add_argument("--database", "-db", default="BP4D", help="which database you want to evaluate") parser.add_argument( "--check", "-ck", action="store_true", help= "default not to check the npy file and all list file generate correctly" ) parser.add_argument("--num_attrib", type=int, default=2048, help="feature dimension") parser.add_argument("--geo_num_attrib", type=int, default=4, help='geometry feature dimension') parser.add_argument("--train_edge", default="all", help="all/spatio/temporal") parser.add_argument("--attn_heads", type=int, default=16) parser.add_argument("--layers", type=int, default=1, help="layer number of edge/node rnn") parser.add_argument( "--bi_lstm", action="store_true", help="whether or not to use bi_lstm as edge/node rnn base") parser.add_argument( "--use_relation_net", action='store_true', help='whether to use st_relation_net instead of space_time_net') parser.add_argument( "--relation_net_lstm_first", action='store_true', help='whether to use relation_net_lstm_first_forward in st_relation_net' ) args = parser.parse_args() adaptive_AU_database(args.database) mode_dict = extract_mode(args.model) paper_report_label = OrderedDict() if mode_dict["use_paper_report_label_num"]: for AU_idx, AU in sorted(config.AU_SQUEEZE.items(), key=lambda e: int(e[0])): if args.database == "BP4D": paper_use_AU = config.paper_use_BP4D elif args.database == "DISFA": paper_use_AU = config.paper_use_DISFA if AU in paper_use_AU: paper_report_label[AU_idx] = AU paper_report_label_idx = list(paper_report_label.keys()) if not paper_report_label_idx: paper_report_label_idx = None test_dir = args.test if not args.test.endswith("/") else args.test[:-1] assert args.database in test_dir dataset = GlobalDataSet(num_attrib=args.num_attrib, num_geo_attrib=args.geo_num_attrib, train_edge=args.train_edge) # ../data_info.json file_name = None for _file_name in os.listdir(args.test): if os.path.exists(args.test + os.sep + _file_name) and _file_name.endswith(".txt"): file_name = args.test + os.sep + _file_name break sample = dataset.load_data(file_name, npy_in_parent_dir=False, paper_use_label_idx=paper_report_label_idx) print("pre load done") crf_pact_structure = CRFPackageStructure( sample, dataset, num_attrib=dataset.num_attrib_type, need_s_rnn=False) print(""" ====================================== gpu:{4} argument: neighbor_mode:{0} spatial_edge_mode:{1} temporal_edge_mode:{2} use_geometry_features:{3} use_paper_report_label_num:{5} ====================================== """.format(mode_dict["neighbor_mode"], mode_dict["spatial_edge_mode"], mode_dict["temporal_edge_mode"], mode_dict["use_geo_feature"], args.gpu, mode_dict["use_paper_report_label_num"])) if args.use_relation_net: model = StRelationNetPlus( crf_pact_structure, in_size=dataset.num_attrib_type, out_size=dataset.label_bin_len, database=args.database, neighbor_mode=NeighborMode[mode_dict["neighbor_mode"]], spatial_edge_mode=SpatialEdgeMode[mode_dict["spatial_edge_mode"]], recurrent_block_type=RecurrentType[ mode_dict["temporal_edge_mode"]], attn_heads=args.attn_heads, dropout=0.0, use_geometry_features=mode_dict["use_geo_feature"], layers=args.layers, bi_lstm=args.bi_lstm, lstm_first_forward=args.relation_net_lstm_first) else: model = StAttentioNetPlus( crf_pact_structure, dataset.num_attrib_type, dataset.label_bin_len, args.database, NeighborMode[mode_dict["neighbor_mode"]], SpatialEdgeMode[mode_dict["spatial_edge_mode"]], RecurrentType[mode_dict["temporal_edge_mode"]], attn_heads=args.attn_heads, dropout=0.0, use_geometry_features=mode_dict["use_geo_feature"], layers=args.layers, bi_lstm=args.bi_lstm) print("loading {}".format(args.model)) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu(args.gpu) with chainer.no_backprop_mode(): test_data = GraphDataset(directory=test_dir, attrib_size=dataset.num_attrib_type, global_dataset=dataset, need_s_rnn=True, npy_in_parent_dir=False, need_cache_factor_graph=False, get_geometry_feature=True, paper_use_label_idx=paper_report_label_idx) test_iter = chainer.iterators.SerialIterator(test_data, 1, shuffle=False, repeat=False) au_evaluator = ActionUnitEvaluator( test_iter, model, args.gpu, database=args.database, paper_report_label=paper_report_label) observation = au_evaluator.evaluate() with open( os.path.dirname(args.model) + os.sep + "evaluation_result_{0}@{1}@{2}@{3}@{4}.json".format( args.database, NeighborMode[mode_dict["neighbor_mode"]], SpatialEdgeMode[mode_dict["spatial_edge_mode"]], RecurrentType[mode_dict["temporal_edge_mode"]], mode_dict["use_geo_feature"]), "w") as file_obj: file_obj.write( json.dumps(observation, indent=4, separators=(',', ': '))) file_obj.flush()
def main(): parser = argparse.ArgumentParser( description='generate Graph desc file script') parser.add_argument('--mean', default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy", help='image mean .npy file') parser.add_argument("--image", default='C:/Users/machen/Downloads/tmp/face.jpg') parser.add_argument( "--model", default="C:/Users/machen/Downloads/tmp/BP4D_3_fold_1.npz") parser.add_argument("--pretrained_model_name", '-premodel', default='resnet101') parser.add_argument('--database', default='BP4D', help='Output directory') parser.add_argument('--device', default=0, type=int, help='GPU device number') args = parser.parse_args() adaptive_AU_database(args.database) if args.pretrained_model_name == "resnet101": faster_rcnn = FasterRCNNResnet101( n_fg_class=len(config.AU_SQUEEZE), pretrained_model="resnet101", mean_file=args.mean, use_lstm=False, extract_len=1000 ) # 可改为/home/machen/face_expr/result/snapshot_model.npz elif args.pretrained_model_name == "vgg": faster_rcnn = FasterRCNNVGG16(n_fg_class=len(config.AU_SQUEEZE), pretrained_model="imagenet", mean_file=args.mean, use_lstm=False, extract_len=1000) if os.path.exists(args.model): print("loading pretrained snapshot:{}".format(args.model)) chainer.serializers.load_npz(args.model, faster_rcnn) if args.device >= 0: faster_rcnn.to_gpu(args.device) chainer.cuda.get_device_from_id(int(args.device)).use() heatmap_gen = HeatMapGenerator(np.load(args.model), use_relu=True) if args.device >= 0: heatmap_gen.to_gpu(args.device) cropped_face, AU_box_dict = FaceMaskCropper.get_cropface_and_box( args.image, args.image, channel_first=True) au_couple_dict = get_zip_ROI_AU() au_couple_child = get_AU_couple_child( au_couple_dict) # AU couple tuple => child fetch list au_couple_box = dict() # value is box (4 tuple coordinate) list for AU, AU_couple in au_couple_dict.items(): au_couple_box[AU_couple] = AU_box_dict[AU] box_lst = [] roi_no_AU_couple_dict = dict() roi_no = 0 for AU_couple, couple_box_lst in au_couple_box.items(): box_lst.extend(couple_box_lst) for _ in couple_box_lst: roi_no_AU_couple_dict[roi_no] = AU_couple roi_no += 1 box_lst = np.asarray(box_lst) cropped_face = cropped_face.astype(np.float32) orig_face = cropped_face cropped_face = faster_rcnn.prepare( cropped_face) # substract mean pixel value box_lst = box_lst.astype(np.float32) orig_box_lst = box_lst batch = [ (cropped_face, box_lst), ] cropped_face, box_lst = concat_examples( batch, args.device) # N,3, H, W, ; N, F, 4 if box_lst.shape[1] != config.BOX_NUM[args.database]: print("error box num {0} != {1}".format(box_lst.shape[1], config.BOX_NUM[args.database])) return with chainer.no_backprop_mode(), chainer.using_config("train", False): cropped_face = chainer.Variable(cropped_face) box_lst = chainer.Variable(box_lst) roi_preds, _ = faster_rcnn.predict(cropped_face, box_lst) # R, 22 roi_feature_maps = faster_rcnn.extract(orig_face, orig_box_lst, 'res5') # R, 2048 7,7 roi_images = [] box_lst = box_lst[0].data.astype(np.int32) for box in box_lst: y_min, x_min, y_max, x_max = box roi_image = orig_face[:, y_min:y_max + 1, x_min:x_max + 1] # N, 3, roi_H, roi_W roi_images.append(roi_image) # list of N, 3, roi_H, roi_W cmap = plt.get_cmap('jet') # image_activate_map = np.zeros((cropped_face.shape[2], cropped_face.shape[3]), dtype=np.float32) for box_id, (roi_image, roi_feature_map) in enumerate( zip(roi_images, roi_feature_maps)): y_min, x_min, y_max, x_max = box_lst[box_id] # 22, roi_h, roi_w, 3 xp = chainer.cuda.get_array_module(roi_feature_map) roi_feature_map = xp.expand_dims(roi_feature_map, 0) # class_roi_overlay_img = 22, roi_h, roi_w class_roi_activate_img = heatmap_gen.generate_activate_roi_map( roi_feature_map, (y_max - y_min + 1, x_max - x_min + 1)) roi_pred = roi_preds[box_id] # 22 # choice_activate_map = np.zeros((y_max-y_min+1, x_max-x_min+1), dtype=np.float32) # use_choice = False if len(np.nonzero(roi_pred) [0]) > 0: # TODO : 还要做做 class的选择,以及 heatmap采用cv2.add的模式相加 class_idx = random.choice(np.nonzero(roi_pred)[0]) AU = config.AU_SQUEEZE[class_idx] print(AU) choice_activate_map = class_roi_activate_img[ class_idx] # roi_h, roi_w activation_color_map = np.round( cmap(choice_activate_map)[:, :, :3] * 255).astype(np.uint8) overlay_img = roi_images[ box_id] / 2 + activation_color_map.transpose(2, 0, 1) / 2 overlay_img = np.transpose(overlay_img, (1, 2, 0)).astype(np.uint8) vis_img = cv2.cvtColor(overlay_img, cv2.COLOR_RGB2BGR) cv2.imshow("new", vis_img) cv2.waitKey(0)
for file_path in file_path_list: print("processing {}".format(file_path)) with open(file_path, "r") as file_obj: for line in file_obj: path = line.split()[0] print("processing {}".format(path)) database = line.split()[-1] abs_path = config.RGB_PATH[database] + "/" + path AU_group_box_area = stats_AU_group_area( abs_path, mc_cached, database) for AU_couple, area in AU_group_box_area.items(): all_AU_group[AU_couple].append(area) for AU_couple, area_list in all_AU_group.items(): print(AU_couple, sum(area_list) / len(area_list)) if __name__ == "__main__": database = "BP4D" file_path_list = [ "/home/machen/dataset/{}/idx/3_fold/id_trainval_1.txt".format( database), "/home/machen/dataset/{}/idx/3_fold/id_test_1.txt".format(database) ] from collections_toolkit.memcached_manager import PyLibmcManager adaptive_AU_database(database) mc_manager = PyLibmcManager('127.0.0.1') read_idx_file(file_path_list, mc_manager)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch_size', '-b', type=int, default=-1, help='each batch size will be a new file') parser.add_argument('--gpu', '-g', type=int, default=0, help='gpu that used to extract feature') parser.add_argument("--out_dir", '-o', default="/home/machen/dataset/new_graph/") parser.add_argument("--model",'-m', help="the AU R-CNN pretrained model file to load to extract feature") parser.add_argument("--trainval_test", '-tt', help="train or test") parser.add_argument("--database", default="BP4D") parser.add_argument('--use_memcached', action='store_true', help='whether use memcached to boost speed of fetch crop&mask') # parser.add_argument('--memcached_host', default='127.0.0.1') parser.add_argument('--force_write', action='store_true') parser.add_argument('--mean', default=config.ROOT_PATH + "BP4D/idx/mean_no_enhance.npy", help='image mean .npy file') parser.add_argument('--jump_exist_file', action='store_true', help='image mean .npy file') args = parser.parse_args() adaptive_AU_database(args.database) mc_manager = None if args.use_memcached: from collections_toolkit.memcached_manager import PyLibmcManager mc_manager = PyLibmcManager(args.memcached_host) if mc_manager is None: raise IOError("no memcached found listen in {}".format(args.memcached_host)) result_dict = extract_mode(args.model) fold = result_dict["fold"] backbone = result_dict["backbone"] split_idx = result_dict["split_idx"] if backbone == 'vgg': faster_rcnn = FasterRCNNVGG16(n_fg_class=len(config.AU_SQUEEZE), pretrained_model="imagenet", mean_file=args.mean, use_lstm=False, extract_len=1000, fix=False) # 可改为/home/nco/face_expr/result/snapshot_model.npz elif backbone == 'resnet101': faster_rcnn = FasterRCNNResnet101(n_fg_class=len(config.AU_SQUEEZE), pretrained_model=backbone, mean_file=args.mean, use_lstm=False, extract_len=1000, fix=False) assert os.path.exists(args.model) print("loading model file : {}".format(args.model)) chainer.serializers.load_npz(args.model, faster_rcnn) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() faster_rcnn.to_gpu(args.gpu) dataset = AUExtractorDataset(database=args.database, fold=fold, split_name=args.trainval_test, split_index=split_idx, mc_manager=mc_manager, use_lstm=False, train_all_data=False, prefix="", pretrained_target="", pretrained_model=faster_rcnn, extract_key="avg_pool", device=-1, batch_size=args.batch_size ) train_test = "train" if args.trainval_test == "trainval" else "test" jump_dataset = JumpExistFileDataset(dataset, args.out_dir, fold, args.database, split_idx, args.batch_size, train_test, args.jump_exist_file) dataset_iter = BatchKeepOrderIterator(jump_dataset, batch_size=args.batch_size, repeat=False, shuffle=False) file_key_counter = 0 last_sequence_key = None for batch in dataset_iter: features = [] bboxes = [] labels = [] file_key_counter += 1 for idx, (feature, bbox, label, img_path, _file_key_counter) in enumerate(batch): sequence_key = "_".join((img_path.split("/")[-3], img_path.split("/")[-2])) if last_sequence_key is None: last_sequence_key = sequence_key if sequence_key!=last_sequence_key: file_key_counter = 1 last_sequence_key = sequence_key assert file_key_counter == _file_key_counter, (file_key_counter, _file_key_counter, img_path) if feature is None: print("jump img_path : {}".format(img_path)) continue features.extend(feature) bboxes.extend(bbox) labels.extend(label) if features: if args.trainval_test == "trainval": file_name = args.out_dir + os.sep + "{0}_{1}_fold_{2}".format(args.database,fold, split_idx) + "/train" +os.sep +sequence_key + "@" + str(file_key_counter) + ".npz" else: file_name = args.out_dir + os.sep + "{0}_{1}_fold_{2}".format(args.database,fold, split_idx) + "/test" + os.sep +sequence_key + "@" + str(file_key_counter) + ".npz" os.makedirs(os.path.dirname(file_name), exist_ok=True) features = np.stack(features) bboxes = np.stack(bboxes) labels = np.stack(labels) print("write : {}".format(file_name)) assert not os.path.exists(file_name), file_name np.savez(file_name, feature=features, bbox=bboxes, label=labels)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') # open_crf layer only works for CPU mode parser.add_argument("--target_dir", "-t", default="result", help="pretrained model file path") # which contains pretrained target parser.add_argument("--test", "-tt", default="", help="test txt folder path") parser.add_argument("--hidden_size", "-hs",default=1024, type=int, help="hidden_size of srnn++") parser.add_argument("--database","-db",default="BP4D", help="which database you want to evaluate") parser.add_argument("--bi_lstm","-bi", action="store_true", help="srnn++ use bi_lstm or not, if pretrained model use bi_lstm, you must set this flag on") parser.add_argument("--check", "-ck", action="store_true", help="default not to check the npy file and all list file generate correctly") parser.add_argument("--num_attrib",type=int,default=2048, help="feature dimension") parser.add_argument("--train_edge",default="all",help="all/spatio/temporal") args = parser.parse_args() adaptive_AU_database(args.database) test_dir = args.test if not args.test.endswith("/") else args.test[:-1] assert args.database in test_dir dataset = GlobalDataSet(num_attrib=args.num_attrib, train_edge=args.train_edge) # ../data_info.json file_name = None for folder in os.listdir(args.test): if os.path.isdir(args.test + os.sep + folder): for _file_name in os.listdir(args.test + os.sep + folder): file_name = args.test + os.sep + folder + os.sep +_file_name break break sample = dataset.load_data(file_name) print("pre load done") target_dict = {} need_srnn = False use_crf = False for model_path in os.listdir(args.target_dir): # all model pretrained file in 3_fold_1's one folder, 3_fold_2 in another folder if model_path.endswith("model.npz"): assert ("opencrf" in model_path or "srnn_plus" in model_path) if "opencrf" in model_path: assert need_srnn == False use_crf = True # note that open_crf layer doesn't support GPU crf_pact_structure = CRFPackageStructure(sample, dataset, num_attrib=dataset.num_attrib_type, need_s_rnn=False) model = OpenCRFLayer(node_in_size=dataset.num_attrib_type, weight_len=crf_pact_structure.num_feature) print("loading {}".format(args.target_dir + os.sep + model_path, model)) chainer.serializers.load_npz(args.target_dir + os.sep + model_path, model) elif "srnn_plus" in model_path: crf_pact_structure = CRFPackageStructure(sample, dataset, num_attrib=args.hidden_size, need_s_rnn=True) with_crf = "crf" in model_path need_srnn = True model = StructuralRNNPlus(crf_pact_structure, in_size=dataset.num_attrib_type, out_size=dataset.num_label, hidden_size=args.hidden_size, with_crf=with_crf, use_bi_lstm=args.bi_lstm) # if you train bi_lstm model in pretrained model, this time you need to use bi_lstm = True print("loading {}".format(args.target_dir + os.sep + model_path)) chainer.serializers.load_npz(args.target_dir + os.sep + model_path, model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu(args.gpu) if with_crf: model.open_crf.to_cpu() trainer_keyword_pattern = re.compile(".*?((\d+_)+)_*") matcher = trainer_keyword_pattern.match(model_path) assert matcher trainer_keyword = matcher.group(1)[:-1] target_dict[trainer_keyword] = model if len(target_dict) == 0: print("error , no pretrained npz file in {}".format(args.target_dir)) return if args.check: check_pretrained_model_match_file(target_dict, args.test) with chainer.no_backprop_mode(): test_data = GraphDataset(directory=args.test, attrib_size=args.hidden_size, global_dataset=dataset, need_s_rnn=need_srnn, need_cache_factor_graph=False, target_dict=target_dict) # if there is one file that use structural_rnn, all the pact_structure need structural_rnn test_iter = chainer.iterators.SerialIterator(test_data, 1, shuffle=False, repeat=False) gpu = args.gpu if not use_crf else -1 print('using gpu :{}'.format(gpu)) chainer.config.train = False au_evaluator = ActionUnitRoILabelSplitEvaluator(test_iter, target_dict, device=gpu, database=args.database) observation = au_evaluator.evaluate() with open(args.target_dir + os.sep + "evaluation_result.json", "w") as file_obj: file_obj.write(json.dumps(observation, indent=4, separators=(',', ': '))) file_obj.flush()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--step_size', '-ss', type=int, default=3000, help='step_size for lr exponential') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--pretrain', '-pr', default='', help='Resume the training from snapshot') parser.add_argument('--snapshot', '-snap', type=int, default=100, help='snapshot iteration for save checkpoint') parser.add_argument('--test_mode', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--valid', '-val', default='', help='Test directory path contains test txt file') parser.add_argument('--test', '-tt', default='graph_test', help='Test directory path contains test txt file') parser.add_argument('--train', '-tr', default="D:/toy/", help='Train directory path contains train txt file') parser.add_argument('--train_edge', default="all", help="train temporal/all to comparision") parser.add_argument('--database', default="BP4D", help="BP4D/DISFA") parser.add_argument( '--use_pure_python', action='store_true', help= 'you can use pure python code to check whether your optimized code works correctly' ) parser.add_argument('--lr', '-l', type=float, default=0.1) parser.add_argument("--profile", "-p", action="store_true", help="whether to profile to examine speed bottleneck") parser.add_argument("--num_attrib", type=int, default=2048, help="node feature dimension") parser.add_argument("--need_cache_graph", "-ng", action="store_true", help="whether to cache factor graph to LRU cache") parser.add_argument("--eval_mode", '-eval', action="store_true", help="whether to evaluation or not") parser.add_argument("--proc_num", "-pn", type=int, default=1) parser.add_argument("--resume", action="store_true", help="resume from pretrained model") parser.set_defaults(test=False) args = parser.parse_args() config.OPEN_CRF_CONFIG["use_pure_python"] = args.use_pure_python # because we modify config.OPEN_CRF_CONFIG thus will influence the open_crf layer from graph_learning.dataset.crf_pact_structure import CRFPackageStructure from graph_learning.dataset.graph_dataset import GraphDataset from graph_learning.extensions.opencrf_evaluator import OpenCRFEvaluator from graph_learning.dataset.graph_dataset_reader import GlobalDataSet from graph_learning.updater.bptt_updater import convert from graph_learning.extensions.AU_roi_label_split_evaluator import ActionUnitEvaluator if args.use_pure_python: from graph_learning.model.open_crf.pure_python.open_crf_layer import OpenCRFLayer else: from graph_learning.model.open_crf.cython.open_crf_layer import OpenCRFLayer print_interval = 1, 'iteration' val_interval = (5, 'iteration') adaptive_AU_database(args.database) root_dir = os.path.dirname(os.path.dirname(args.train)) dataset = GlobalDataSet(num_attrib=args.num_attrib, train_edge=args.train_edge) file_name = list( filter(lambda e: e.endswith(".txt"), os.listdir(args.train)))[0] sample = dataset.load_data(args.train + os.sep + file_name) print("pre load done") crf_pact_structure = CRFPackageStructure( sample, dataset, num_attrib=dataset.num_attrib_type, need_s_rnn=False) model = OpenCRFLayer(node_in_size=dataset.num_attrib_type, weight_len=crf_pact_structure.num_feature) train_str = args.train if train_str[-1] == "/": train_str = train_str[:-1] trainer_keyword = os.path.basename(train_str) trainer_keyword_tuple = tuple(trainer_keyword.split("_")) LABEL_SPLIT = config.BP4D_LABEL_SPLIT if args.database == "BP4D" else config.DISFA_LABEL_SPLIT if trainer_keyword_tuple not in LABEL_SPLIT: return # assert "_" in trainer_keyword train_data = GraphDataset(args.train, attrib_size=dataset.num_attrib_type, global_dataset=dataset, need_s_rnn=False, need_cache_factor_graph=args.need_cache_graph, get_geometry_feature=False) if args.proc_num == 1: train_iter = chainer.iterators.SerialIterator(train_data, 1, shuffle=True) elif args.proc_num > 1: train_iter = chainer.iterators.MultiprocessIterator( train_data, batch_size=1, n_processes=args.proc_num, repeat=True, shuffle=True, n_prefetch=10, shared_mem=31457280) optimizer = chainer.optimizers.SGD(lr=args.lr) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) updater = StandardUpdater(train_iter, optimizer, converter=convert) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) interval = 1 if args.test_mode: chainer.config.train = False trainer.extend( PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', "opencrf_val/main/hit", #"opencrf_validation/main/U_hit", "opencrf_val/main/miss", #"opencrf_validation/main/U_miss", "opencrf_val/main/F1", #"opencrf_validation/main/U_F1" 'opencrf_val/main/accuracy', ]), trigger=print_interval) trainer.extend(chainer.training.extensions.observe_lr(), trigger=print_interval) trainer.extend( chainer.training.extensions.LogReport( trigger=print_interval, log_name="open_crf_{}.log".format(trainer_keyword))) optimizer_snapshot_name = "{0}_{1}_opencrf_optimizer.npz".format( trainer_keyword, args.database) model_snapshot_name = "{0}_{1}_opencrf_model.npz".format( trainer_keyword, args.database) trainer.extend(chainer.training.extensions.snapshot_object( optimizer, filename=optimizer_snapshot_name), trigger=(args.snapshot, 'iteration')) trainer.extend(chainer.training.extensions.snapshot_object( model, filename=model_snapshot_name), trigger=(args.snapshot, 'iteration')) if args.resume and os.path.exists(args.out + os.sep + model_snapshot_name): print("loading model_snapshot_name to model") chainer.serializers.load_npz(args.out + os.sep + model_snapshot_name, model) if args.resume and os.path.exists(args.out + os.sep + optimizer_snapshot_name): print("loading optimizer_snapshot_name to optimizer") chainer.serializers.load_npz( args.out + os.sep + optimizer_snapshot_name, optimizer) # trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1)) # trainer.extend(chainer.training.extensions.snapshot(), # trigger=(args.snapshot, 'epoch')) # trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.9), trigger=(1, 'epoch')) if chainer.training.extensions.PlotReport.available(): trainer.extend(chainer.training.extensions.PlotReport( ['main/loss'], file_name="{}_train_loss.png".format(trainer_keyword)), trigger=(100, "iteration")) trainer.extend(chainer.training.extensions.PlotReport( ['opencrf_val/F1', 'opencrf_val/accuracy'], file_name="{}_val_f1.png".format(trainer_keyword)), trigger=val_interval) if args.valid: valid_data = GraphDataset( args.valid, attrib_size=dataset.num_attrib_type, global_dataset=dataset, need_s_rnn=False, need_cache_factor_graph=args.need_cache_graph) validate_iter = chainer.iterators.SerialIterator(valid_data, 1, repeat=False, shuffle=False) evaluator = OpenCRFEvaluator(iterator=validate_iter, target=model, device=-1) trainer.extend(evaluator, trigger=val_interval) if args.profile: cProfile.runctx("trainer.run()", globals(), locals(), "Profile.prof") s = pstats.Stats("Profile.prof") s.strip_dirs().sort_stats("time").print_stats() else: trainer.run()
def main(): parser = argparse.ArgumentParser( description='train script of Time-axis R-CNN:') parser.add_argument('--pid', '-pp', default='/tmp/SpaceTime_AU_R_CNN/') parser.add_argument('--gpu', '-g', type=int, help='GPU ID') parser.add_argument('--lr', '-l', type=float, default=0.0001) parser.add_argument('--out', '-o', default='output_time_axis_rcnn', help='Output directory') parser.add_argument('--database', default='BP4D', help='Output directory: BP4D/DISFA/BP4D_DISFA') parser.add_argument('--iteration', '-i', type=int, default=70000) parser.add_argument('--optimizer', type=OptimizerType, choices=list(OptimizerType)) parser.add_argument('--epoch', '-e', type=int, default=20) parser.add_argument('--batch_size', '-bs', type=int, default=1) parser.add_argument('--feature_dim', type=int, default=2048) parser.add_argument('--roi_size', type=int, default=7) parser.add_argument('--snapshot', '-snap', type=int, default=5) parser.add_argument("--fold", '-fd', type=int, default=3) parser.add_argument('--two_stream_mode', type=TwoStreamMode, choices=list(TwoStreamMode), help='rgb_flow/ optical_flow/ rgb') parser.add_argument("--faster_backbone", type=FasterBackboneType, choices=list(FasterBackboneType), help='tcn/conv1d') parser.add_argument("--data_dir", type=str, default="/extract_features") parser.add_argument("--conv_layers", type=int, default=10) parser.add_argument("--split_idx", '-sp', type=int, default=1) parser.add_argument("--use_paper_num_label", action="store_true", help="only to use paper reported number of labels" " to train") parser.add_argument("--proc_num", "-proc", type=int, default=1) args = parser.parse_args() args.data_dir = config.ROOT_PATH + "/" + args.data_dir os.makedirs(args.pid, exist_ok=True) os.makedirs(args.out, exist_ok=True) pid = str(os.getpid()) pid_file_path = args.pid + os.path.sep + "{0}_{1}_fold_{2}.pid".format( args.database, args.fold, args.split_idx) with open(pid_file_path, "w") as file_obj: file_obj.write(pid) file_obj.flush() print('GPU: {}'.format(args.gpu)) adaptive_AU_database(args.database) paper_report_label, class_num = squeeze_label_num_report( args.database, args.use_paper_num_label) paper_report_label_idx = list(paper_report_label.keys()) if args.faster_backbone == FasterBackboneType.tcn: Bone = TcnBackbone elif args.faster_backbone == FasterBackboneType.conv1d: Bone = FasterBackbone if args.two_stream_mode == TwoStreamMode.rgb or args.two_stream_mode == TwoStreamMode.optical_flow: faster_extractor_backbone = Bone(args.conv_layers, args.feature_dim, 1024) faster_head_module = FasterHeadModule( args.feature_dim, class_num + 1, args.roi_size ) # note that the class number here must include background initialW = chainer.initializers.Normal(0.001) spn = SegmentProposalNetwork(1024, n_anchors=len(config.ANCHOR_SIZE), initialW=initialW) train_chain = TimeSegmentRCNNTrainChain(faster_extractor_backbone, faster_head_module, spn) model = Wrapper(train_chain, two_stream_mode=args.two_stream_mode) elif args.two_stream_mode == TwoStreamMode.rgb_flow: faster_extractor_backbone = Bone(args.conv_layers, args.feature_dim, 1024) faster_head_module = FasterHeadModule( args.feature_dim, class_num + 1, args.roi_size ) # note that the class number here must include background initialW = chainer.initializers.Normal(0.001) spn = SegmentProposalNetwork(1024, n_anchors=len(config.ANCHOR_SIZE), initialW=initialW) train_chain = TimeSegmentRCNNTrainChain(faster_extractor_backbone, faster_head_module, spn) # faster_extractor_backbone_flow = FasterBackbone(args.database, args.conv_layers, args.feature_dim, 1024) # faster_head_module_flow = FasterHeadModule(1024, class_num + 1, # args.roi_size) # note that the class number here must include background # initialW = chainer.initializers.Normal(0.001) # spn_flow = SegmentProposalNetwork(1024, n_anchors=len(config.ANCHOR_SIZE), initialW=initialW) # train_chain_flow = TimeSegmentRCNNTrainChain(faster_extractor_backbone_flow, faster_head_module_flow, spn_flow) # time_seg_train_chain_list = [train_chain_rgb, train_chain_flow] model = Wrapper(train_chain, two_stream_mode=args.two_stream_mode) if args.gpu >= 0: model.to_gpu(args.gpu) chainer.cuda.get_device(args.gpu).use() optimizer = None if args.optimizer == OptimizerType.AdaGrad: optimizer = chainer.optimizers.AdaGrad( lr=args.lr ) # 原本为MomentumSGD(lr=args.lr, momentum=0.9) 由于loss变为nan问题,改为AdaGrad elif args.optimizer == OptimizerType.RMSprop: optimizer = chainer.optimizers.RMSprop(lr=args.lr) elif args.optimizer == OptimizerType.Adam: optimizer = chainer.optimizers.Adam(alpha=args.lr) elif args.optimizer == OptimizerType.SGD: optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) elif args.optimizer == OptimizerType.AdaDelta: optimizer = chainer.optimizers.AdaDelta() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) data_dir = args.data_dir + "/{0}_{1}_fold_{2}/train".format( args.database, args.fold, args.split_idx) dataset = NpzFeatureDataset(data_dir, args.database, two_stream_mode=args.two_stream_mode, T=10.0, use_mirror_data=True) dataset = TransformDataset(dataset, Transform(mirror=True)) if args.proc_num == 1: train_iter = SerialIterator(dataset, args.batch_size, repeat=True, shuffle=True) else: train_iter = MultiprocessIterator(dataset, batch_size=args.batch_size, n_processes=args.proc_num, repeat=True, shuffle=True, n_prefetch=10, shared_mem=10000000) # BP4D_3_fold_1_resnet101@rnn@no_temporal@use_paper_num_label@roi_align@label_dep_layer@conv_lstm@sampleframe#13_model.npz use_paper_classnum = "use_paper_num_label" if args.use_paper_num_label else "all_avail_label" model_file_name = args.out + os.path.sep + \ 'time_axis_rcnn_{0}_{1}_fold_{2}@{3}@{4}@{5}@{6}_model.npz'.format(args.database, args.fold, args.split_idx, use_paper_classnum, args.two_stream_mode, args.conv_layers, args.faster_backbone) print(model_file_name) pretrained_optimizer_file_name = args.out + os.path.sep +\ 'time_axis_rcnn_{0}_{1}_fold_{2}@{3}@{4}@{5}@{6}_optimizer.npz'.format(args.database, args.fold, args.split_idx, use_paper_classnum, args.two_stream_mode, args.conv_layers,args.faster_backbone) print(pretrained_optimizer_file_name) if os.path.exists(pretrained_optimizer_file_name): print("loading optimizer snatshot:{}".format( pretrained_optimizer_file_name)) chainer.serializers.load_npz(pretrained_optimizer_file_name, optimizer) if os.path.exists(model_file_name): print("loading pretrained snapshot:{}".format(model_file_name)) chainer.serializers.load_npz(model_file_name, model.time_seg_train_chain) print("only one GPU({0}) updater".format(args.gpu)) updater = chainer.training.StandardUpdater( train_iter, optimizer, device=args.gpu, converter=lambda batch, device: concat_examples_not_string( batch, device, padding=0)) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(chainer.training.extensions.snapshot_object( optimizer, filename=os.path.basename(pretrained_optimizer_file_name)), trigger=(args.snapshot, 'epoch')) trainer.extend(chainer.training.extensions.snapshot_object( model.time_seg_train_chain, filename=os.path.basename(model_file_name)), trigger=(args.snapshot, 'epoch')) log_interval = 100, 'iteration' print_interval = 100, 'iteration' plot_interval = 100, 'iteration' if args.optimizer != "Adam" and args.optimizer != "AdaDelta": trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1), trigger=(20, 'epoch')) elif args.optimizer == "Adam": trainer.extend(chainer.training.extensions.ExponentialShift( "alpha", 0.1, optimizer=optimizer), trigger=(10, 'epoch')) if args.optimizer != "AdaDelta": trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( chainer.training.extensions.LogReport( trigger=log_interval, log_name="log_{0}_{1}_{2}_fold_{3}_{4}.log".format( args.faster_backbone, args.database, args.fold, args.split_idx, use_paper_classnum))) trainer.extend(chainer.training.extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/accuracy', 'main/rpn_accuracy', ]), trigger=print_interval) trainer.extend( chainer.training.extensions.ProgressBar(update_interval=100)) if chainer.training.extensions.PlotReport.available(): trainer.extend(chainer.training.extensions.PlotReport( ['main/loss'], file_name='loss_{0}_{1}_fold_{2}_{3}.png'.format( args.database, args.fold, args.split_idx, use_paper_classnum), trigger=plot_interval), trigger=plot_interval) trainer.extend(chainer.training.extensions.PlotReport( ['main/accuracy'], file_name='accuracy_{0}_{1}_fold_{2}_{3}.png'.format( args.database, args.fold, args.split_idx, use_paper_classnum), trigger=plot_interval), trigger=plot_interval) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--epoch', '-e', type=int, default=25, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') # open_crf layer only works for CPU mode parser.add_argument('--step_size', '-ss', type=int, default=3000, help='step_size for lr exponential') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--snapshot', '-snap', type=int, default=1, help='snapshot epochs for save checkpoint') parser.add_argument('--test_mode', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument("--test", '-tt', default='test',help='Test directory path contains test txt file to evaluation') parser.add_argument('--train', '-t', default="train", help='Train directory path contains train txt file') parser.add_argument('--database', default="BP4D", help='database to train for') parser.add_argument('--lr', '-l', type=float, default=0.01) parser.add_argument('--neighbor_mode', type=NeighborMode, choices=list(NeighborMode), help='1:concat_all,2:attention_fuse,3:random_neighbor,4.no_neighbor') parser.add_argument('--spatial_edge_mode', type=SpatialEdgeMode, choices=list(SpatialEdgeMode), help='1:all_edge, 2:configure_edge, 3:no_edge') parser.add_argument('--temporal_edge_mode',type=RecurrentType, choices=list(RecurrentType), help='1:rnn, 2:attention_block, 3.point-wise feed forward(no temporal)') parser.add_argument("--use_relation_net", action='store_true', help='whether to use st_relation_net instead of space_time_net') parser.add_argument("--relation_net_lstm_first", action='store_true', help='whether to use relation_net_lstm_first_forward in st_relation_net') parser.add_argument('--use_geometry_features',action='store_true', help='whether to use geometry features') parser.add_argument("--num_attrib", type=int, default=2048, help="number of dimension of each node feature") parser.add_argument('--geo_num_attrib', type=int, default=4, help='geometry feature length') parser.add_argument('--attn_heads', type=int, default=16, help='attention heads number') parser.add_argument('--layers', type=int, default=1, help='edge rnn and node rnn layer') parser.add_argument("--use_paper_num_label", action="store_true", help="only to use paper reported number of labels" " to train") parser.add_argument("--bi_lstm", action="store_true", help="whether to use bi-lstm as Edge/Node RNN") parser.add_argument('--weight_decay',type=float,default=0.0005, help="weight decay") parser.add_argument("--proc_num",'-proc', type=int,default=1, help="process number of dataset reader") parser.add_argument("--resume",action="store_true", help="whether to load npz pretrained file") parser.add_argument('--resume_model', '-rm', help='The relative path to restore model file') parser.add_argument("--snap_individual", action="store_true", help='whether to snap shot each fixed step into ' 'individual model file') parser.add_argument("--vis", action='store_true', help='whether to visualize computation graph') parser.set_defaults(test=False) args = parser.parse_args() if args.use_relation_net: args.out += "_relationnet" print("output file to : {}".format(args.out)) print_interval = 1, 'iteration' val_interval = 5, 'iteration' print(""" ====================================== argument: neighbor_mode:{0} spatial_edge_mode:{1} temporal_edge_mode:{2} use_geometry_features:{3} ====================================== """.format(args.neighbor_mode, args.spatial_edge_mode, args.temporal_edge_mode, args.use_geometry_features)) adaptive_AU_database(args.database) # for the StructuralRNN constuctor need first frame factor graph_backup dataset = GlobalDataSet(num_attrib=args.num_attrib, num_geo_attrib=args.geo_num_attrib, train_edge="all") file_name = list(filter(lambda e: e.endswith(".txt"), os.listdir(args.train)))[0] paper_report_label = OrderedDict() if args.use_paper_num_label: for AU_idx,AU in sorted(config.AU_SQUEEZE.items(), key=lambda e:int(e[0])): if args.database == "BP4D": paper_use_AU = config.paper_use_BP4D elif args.database =="DISFA": paper_use_AU = config.paper_use_DISFA if AU in paper_use_AU: paper_report_label[AU_idx] = AU paper_report_label_idx = list(paper_report_label.keys()) if not paper_report_label_idx: paper_report_label_idx = None sample = dataset.load_data(args.train + os.sep + file_name, npy_in_parent_dir=False, paper_use_label_idx=paper_report_label_idx) # we load first sample for construct S-RNN, it must passed to constructor argument crf_pact_structure = CRFPackageStructure(sample, dataset, num_attrib=dataset.num_attrib_type) # 只读取其中的一个视频的第一帧,由于node个数相对稳定,因此可以construct RNN # 因为我们用多分类的hinge loss,所以需要num_label是来自于2进制形式的label数+1(+1代表全0)\ if args.use_relation_net: model = StRelationNetPlus(crf_pact_structure, in_size=dataset.num_attrib_type, out_size=dataset.label_bin_len, database=args.database, neighbor_mode=args.neighbor_mode, spatial_edge_mode=args.spatial_edge_mode, recurrent_block_type=args.temporal_edge_mode, attn_heads=args.attn_heads, dropout=0.5, use_geometry_features=args.use_geometry_features, layers=args.layers, bi_lstm=args.bi_lstm, lstm_first_forward=args.relation_net_lstm_first) else: model = StAttentioNetPlus(crf_pact_structure, in_size=dataset.num_attrib_type, out_size=dataset.label_bin_len, database=args.database, neighbor_mode=args.neighbor_mode, spatial_edge_mode=args.spatial_edge_mode, recurrent_block_type=args.temporal_edge_mode, attn_heads=args.attn_heads, dropout=0.5, use_geometry_features=args.use_geometry_features, layers=args.layers, bi_lstm=args.bi_lstm) # note that the following code attrib_size will be used by open_crf for parameter number, thus we cannot pass dataset.num_attrib_type! train_data = GraphDataset(args.train, attrib_size=dataset.num_attrib_type, global_dataset=dataset, need_s_rnn=True, need_cache_factor_graph=False, npy_in_parent_dir=False, get_geometry_feature=True, paper_use_label_idx=paper_report_label_idx) # train 传入文件夹 train_iter = chainer.iterators.SerialIterator(train_data, 1, shuffle=True, repeat=True) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() if args.use_relation_net: model.st_relation_net.to_gpu(args.gpu) else: model.st_attention_net.to_gpu(args.gpu) specific_key = "all_AU_train" if paper_report_label_idx: specific_key = "paper_AU_num_train" optimizer_snapshot_name = "{0}@{1}@st_attention_network_optimizer@{2}@{3}@{4}@{5}.npz".format(args.database, specific_key, args.neighbor_mode, args.spatial_edge_mode, args.temporal_edge_mode, "use_geo" if args.use_geometry_features else "no_geo") model_snapshot_name = "{0}@{1}@st_attention_network_model@{2}@{3}@{4}@{5}.npz".format(args.database, specific_key, args.neighbor_mode, args.spatial_edge_mode, args.temporal_edge_mode, "use_geo" if args.use_geometry_features else "no_geo") if args.snap_individual: model_snapshot_name = "{0}@{1}@st_attention_network_model_snapshot_".format(args.database,specific_key) model_snapshot_name += "{.updater.iteration}" model_snapshot_name += "@{0}@{1}@{2}@{3}.npz".format(args.neighbor_mode, args.spatial_edge_mode, args.temporal_edge_mode, "use_geo" if args.use_geometry_features else "no_geo") if os.path.exists(args.out + os.sep + model_snapshot_name): print("found trained model file. load trained file: {}".format(args.out + os.sep + model_snapshot_name)) chainer.serializers.load_npz(args.out + os.sep + model_snapshot_name, model) optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) # optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) # optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) updater = BPTTUpdater(train_iter, optimizer, int(args.gpu)) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) interval = (1, 'iteration') if args.test_mode: chainer.config.train = False trainer.extend(chainer.training.extensions.observe_lr(), trigger=print_interval) trainer.extend(chainer.training.extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', "main/accuracy", ]), trigger=print_interval) log_name = "st_attention_network_{0}@{1}@{2}@{3}@{4}.log".format(args.database, args.neighbor_mode, args.spatial_edge_mode, args.temporal_edge_mode, "use_geo" if args.use_geometry_features else "no_geo") trainer.extend(chainer.training.extensions.LogReport(trigger=interval,log_name=log_name)) # trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1, training_length=(args.epoch, 'epoch'))) trainer.extend( chainer.training.extensions.snapshot_object(optimizer, filename=optimizer_snapshot_name), trigger=(args.snapshot, 'epoch')) trainer.extend( chainer.training.extensions.snapshot_object(model, filename=model_snapshot_name), trigger=(args.snapshot, 'epoch')) trainer.extend(chainer.training.extensions.ExponentialShift('lr',0.1), trigger=(10, "epoch")) if args.resume and os.path.exists(args.out + os.sep + args.resume_model): print("loading model_snapshot_name to model") chainer.serializers.load_npz(args.out + os.sep + args.resume_model, model) if args.resume and os.path.exists(args.out + os.sep + optimizer_snapshot_name): print("loading optimizer_snapshot_name to optimizer") chainer.serializers.load_npz(args.out + os.sep + optimizer_snapshot_name, optimizer) if chainer.training.extensions.PlotReport.available(): trainer.extend(chainer.training.extensions.PlotReport(['main/loss'], file_name="train_loss.png"), trigger=val_interval) trainer.extend(chainer.training.extensions.PlotReport(['main/accuracy'], file_name="train_accuracy.png"), trigger=val_interval) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--snapshot', '-snap', type=int, default=100, help='snapshot epochs for save checkpoint') parser.add_argument( '--valid', '-v', default='', help='validate directory path contains validate txt file') parser.add_argument('--train', '-t', default="train", help='Train directory path contains train txt file') parser.add_argument('--database', default="BP4D", help='database to train for') parser.add_argument('--lr', '-l', type=float, default=0.001) parser.add_argument('--hidden_size', type=int, default=1024, help="hidden_size orignally used in open_crf") parser.add_argument('--eval_mode', action='store_true', help='whether to evaluate the model') parser.add_argument("--need_cache_graph", "-ng", action="store_true", help="whether to cache factor graph to LRU cache") parser.add_argument("--bi_lstm", '-bilstm', action='store_true', help="Use bi_lstm as basic component of temporal_lstm") parser.add_argument("--num_attrib", type=int, default=2048, help="node feature dimension") parser.add_argument("--resume", action="store_true", help="whether to load npz pretrained file") parser.add_argument( "--snap_individual", action="store_true", help="whether to snapshot each individual epoch/iteration") parser.set_defaults(test=False) args = parser.parse_args() print_interval = 1, 'iteration' val_interval = 5, 'iteration' adaptive_AU_database(args.database) # for the StructuralRNN constuctor need first frame factor graph_backup dataset = GlobalDataSet(num_attrib=args.num_attrib) model = TemporalLSTM(box_num=config.BOX_NUM[args.database], in_size=args.num_attrib, out_size=dataset.label_bin_len, use_bi_lstm=args.bi_lstm, initialW=None) train_data = GraphDataset(args.train, attrib_size=args.hidden_size, global_dataset=dataset, need_s_rnn=True, need_cache_factor_graph=args.need_cache_graph) train_iter = chainer.iterators.SerialIterator(train_data, 1, shuffle=True, repeat=True) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu(args.gpu) optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) updater = BPTTUpdater(train_iter, optimizer, int(args.gpu)) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) print_interval = (1, 'iteration') trainer.extend(chainer.training.extensions.observe_lr(), trigger=print_interval) trainer.extend(chainer.training.extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', "main/accuracy" ]), trigger=print_interval) log_name = "temporal_lstm.log" trainer.extend( chainer.training.extensions.LogReport(trigger=print_interval, log_name=log_name)) # trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1, training_length=(args.epoch, 'epoch'))) optimizer_snapshot_name = "{0}_temporal_lstm_optimizer.npz".format( args.database) trainer.extend(chainer.training.extensions.snapshot_object( optimizer, filename=optimizer_snapshot_name), trigger=(args.snapshot, 'iteration')) if not args.snap_individual: model_snapshot_name = "{0}_temporal_lstm_model.npz".format( args.database) trainer.extend(chainer.training.extensions.snapshot_object( model, filename=model_snapshot_name), trigger=(args.snapshot, 'iteration')) else: model_snapshot_name = "{0}_temporal_lstm_model_".format( args.database) + "{.updater.iteration}.npz" trainer.extend(chainer.training.extensions.snapshot_object( model, filename=model_snapshot_name), trigger=(args.snapshot, 'iteration')) trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.7), trigger=(5, "epoch")) # load pretrained file if not args.snap_individual: if args.resume and os.path.exists(args.out + os.sep + model_snapshot_name): print("loading model_snapshot_name to model") chainer.serializers.load_npz( args.out + os.sep + model_snapshot_name, model) else: if args.resume: file_lst = [ filename[filename.rindex("_") + 1:filename.rindex(".")] for filename in os.listdir(args.out) ] file_no = sorted(map(int, file_lst))[-1] model_snapshot_name = "{0}_temporal_lstm_model_{1}.npz".format( args.database, file_no) chainer.serializers.load_npz( args.out + os.sep + model_snapshot_name, model) if args.resume and os.path.exists(args.out + os.sep + optimizer_snapshot_name): print("loading optimizer_snapshot_name to optimizer") chainer.serializers.load_npz( args.out + os.sep + optimizer_snapshot_name, optimizer) if chainer.training.extensions.PlotReport.available(): trainer.extend(chainer.training.extensions.PlotReport( ['main/loss'], file_name="train_loss.png"), trigger=(100, "iteration")) # trainer.extend(chainer.training.extensions.PlotReport(['opencrf_val/F1','opencrf_val/accuracy'], # file_name="val_f1.png"), trigger=val_interval) # if args.valid: # valid_data = S_RNNPlusDataset(args.valid, attrib_size=args.hidden_size, global_dataset=dataset, # need_s_rnn=True,need_cache_factor_graph=args.need_cache_graph) # attrib_size控制open-crf层的weight长度 # validate_iter = chainer.iterators.SerialIterator(valid_data, 1, shuffle=False, repeat=False) # crf_evaluator = OpenCRFEvaluator(iterator=validate_iter, target=model, device=args.gpu) # trainer.extend(crf_evaluator, trigger=val_interval, name="opencrf_val") trainer.run()
orig_from_path, "BP4D") file_obj.write("{}\n".format(line)) file_obj.flush() with open("{0}/id_test_{1}.txt".format(folder_path, i), "w") as file_obj: for subject_name in test_name_array: for img_file_path in subject_imgpath_dict[subject_name]: orig_from_path = "#" if img_file_path not in img_from else img_from[ img_file_path] video_dir = BP4D_data_reader.img_dir + os.sep + subject_name + os.sep + sequence_name + os.sep AU_set = enhance_mix_database[img_file_path] AU_set_str = ",".join(AU_set) line = "{0}\t{1}\t{2}\t{3}".format(img_file_path, AU_set_str, orig_from_path, "BP4D") file_obj.write("{}\n".format(line)) file_obj.flush() if __name__ == "__main__": from dataset_toolkit.adaptive_AU_config import adaptive_AU_database adaptive_AU_database("BP4D") gen_BP4D_subject_kfold_id_file("BP4D", "{0}/{1}".format(config.DATA_PATH["BP4D"], "idx"), kfold=10, drop_big_label=False) print("done")
for orientation, video_info_lst in subject_video[ video_name].items(): for video_info in video_info_lst: img_file_path = video_info["img_path"] img_file_path = os.sep.join( img_file_path.split("/")[-3:]) AU_set_str = ",".join(video_info["AU_label"]) if len(video_info["AU_label"]) == 0: AU_set_str = "0" orig_from_path = "#" file_obj.write("{0}\t{1}\t{2}\t{3}\n".format( img_file_path, AU_set_str, orig_from_path, video_info["database"])) file_obj.flush() if __name__ == "__main__": from dataset_toolkit.adaptive_AU_config import adaptive_AU_database # # adaptive_AU_database("BP4D") # partition = {"trn":"/home/machen/dataset/BP4D/idx/trn_partition.txt", # "valid":"/home/machen/dataset/BP4D/idx/validate_partition.txt"} # gen_BP4D_subject_id_file("{0}/{1}".format(config.DATA_PATH["BP4D"], "idx"), kfold=10, validation_size=1000) adaptive_AU_database("DISFA") # single_AU_RCNN_BP4D_subject_id_file("{0}/{1}".format(config.ROOT_PATH + os.sep+"/BP4D/", "idx"), kfold=3) gen_DISFA_subject_id_file("{0}/{1}".format( config.ROOT_PATH + os.sep + "/DISFA_1/", "idx"), kfold=3) # gen_BP4D_subject_id_file("{0}/{1}".format(config.DATA_PATH["BP4D"], "idx"), kfold=10) # gen_BP4D_subject_id_file("{0}/{1}".format(config.DATA_PATH["BP4D"], "idx"), kfold=3) # print("done")
for label, group in groupby(column): if label == 1: AU_segment_count[config.AU_SQUEEZE[AU_idx]] += 1 AU_continous_count[label].append(sum(1 for _ in group)) if 0 in AU_continous_count: del AU_continous_count[0] # only have 1 else: pass for label, val_list in AU_continous_count.items(): for sum_val in val_list: AU_all_count[config.AU_SQUEEZE[AU_idx]].append(sum_val) average_dict = {} for AU, sum_val_list in AU_all_count.items(): average_dict[AU] = sum(sum_val_list) / len(sum_val_list) return average_dict, AU_segment_count if __name__ == "__main__": adaptive_AU_database("DISFA", False) label_matrix_dict = read_idx_file( "/home/machen/dataset/DISFA/idx/3_fold/id_all.txt") average_dict, AU_segment_count = stats_frequency(label_matrix_dict) print("duration:") for AU, mean in sorted(average_dict.items(), key=lambda e: int(e[0])): if AU in config.paper_use_DISFA: print(AU, mean) print("segment count:") for AU, seg_count in sorted(AU_segment_count.items(), key=lambda e: int(e[0])): if AU in config.paper_use_DISFA: print(AU, seg_count)
def main(): print("chainer cudnn enabled: {}".format(chainer.cuda.cudnn_enabled)) parser = argparse.ArgumentParser( description='Action Unit R-CNN training example:') parser.add_argument('--pid', '-pp', default='/tmp/AU_R_CNN/') parser.add_argument('--gpu', '-g', default="0", help='GPU ID, multiple GPU split by comma, \ ' 'Note that BPTT updater do not support multi-GPU') parser.add_argument('--lr', '-l', type=float, default=0.001) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--database', default='BP4D', help='Output directory: BP4D/DISFA/BP4D_DISFA') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--iteration', '-i', type=int, default=70000) parser.add_argument('--epoch', '-e', type=int, default=20) parser.add_argument('--batch_size', '-bs', type=int, default=20) parser.add_argument('--snapshot', '-snap', type=int, default=1000) parser.add_argument('--need_validate', action='store_true', help='do or not validate during training') parser.add_argument('--mean', default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy", help='image mean .npy file') parser.add_argument('--feature_model', default="resnet101", help="vgg16/vgg19/resnet101 for train") parser.add_argument('--extract_len', type=int, default=1000) parser.add_argument('--optimizer', default='RMSprop', help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta') parser.add_argument('--pretrained_model', default='resnet101', help='imagenet/vggface/resnet101/*.npz') parser.add_argument('--pretrained_model_args', nargs='+', type=float, help='you can pass in "1.0 224" or "0.75 224"') parser.add_argument( '--use_memcached', action='store_true', help='whether use memcached to boost speed of fetch crop&mask') # parser.add_argument('--memcached_host', default='127.0.0.1') parser.add_argument("--fold", '-fd', type=int, default=3) parser.add_argument("--split_idx", '-sp', type=int, default=1) parser.add_argument( "--snap_individual", action="store_true", help="whether to snapshot each individual epoch/iteration") parser.add_argument("--proc_num", "-proc", type=int, default=1) parser.add_argument( "--use_sigmoid_cross_entropy", "-sigmoid", action="store_true", help="whether to use sigmoid cross entropy or softmax cross entropy") parser.add_argument( "--is_pretrained", action="store_true", help="whether is to pretrain BP4D later will for DISFA dataset or not") parser.add_argument( "--pretrained_target", '-pt', default="", help="whether pretrain label set will use DISFA or not") parser.add_argument("--fix", '-fix', action="store_true", help="whether to fix first few conv layers or not") parser.add_argument( '--occlude', default='', help= 'whether to use occlude face of upper/left/right/lower/none to test') parser.add_argument("--prefix", '-prefix', default="", help="_beta, for example 3_fold_beta") parser.add_argument('--eval_mode', action='store_true', help='Use test datasets for evaluation metric') parser.add_argument("--img_resolution", type=int, default=512) parser.add_argument( "--FERA", action='store_true', help='whether to use FERA data split train and validate') parser.add_argument( '--FPN', action="store_true", help= "whether to use feature pyramid network for training and prediction") parser.add_argument( '--fake_box', action="store_true", help="whether to use fake average box coordinate to predict") parser.add_argument('--roi_align', action="store_true", help="whether to use roi_align or roi_pooling") parser.add_argument("--train_test", default="trainval", type=str) parser.add_argument("--trail_times", default=20, type=int) parser.add_argument("--each_trail_iteration", default=1000, type=int) args = parser.parse_args() if not os.path.exists(args.pid): os.makedirs(args.pid) pid = str(os.getpid()) pid_file_path = args.pid + os.sep + "{0}_{1}_fold_{2}.pid".format( args.database, args.fold, args.split_idx) # with open(pid_file_path, "w") as file_obj: # file_obj.write(pid) # file_obj.flush() config.IMG_SIZE = (args.img_resolution, args.img_resolution) print('GPU: {}'.format(args.gpu)) if args.is_pretrained: adaptive_AU_database(args.pretrained_target) else: adaptive_AU_database(args.database) np.random.seed(args.seed) # 需要先构造一个list的txt文件:id_trainval_0.txt, 每一行是subject + "/" + emotion_seq + "/" frame mc_manager = None if args.use_memcached: from collections_toolkit.memcached_manager import PyLibmcManager mc_manager = PyLibmcManager(args.memcached_host) if mc_manager is None: raise IOError("no memcached found listen in {}".format( args.memcached_host)) if args.FPN: faster_rcnn = FPN101(len(config.AU_SQUEEZE), pretrained_resnet=args.pretrained_model, use_roialign=args.roi_align, mean_path=args.mean, min_size=args.img_resolution, max_size=args.img_resolution) elif args.feature_model == 'vgg16': faster_rcnn = FasterRCNNVGG16( n_fg_class=len(config.AU_SQUEEZE), pretrained_model=args.pretrained_model, mean_file=args.mean, min_size=args.img_resolution, max_size=args.img_resolution, extract_len=args.extract_len, fix=args.fix) # 可改为/home/nco/face_expr/result/snapshot_model.npz elif args.feature_model == 'vgg19': faster_rcnn = FasterRCNNVGG19(n_fg_class=len(config.AU_SQUEEZE), pretrained_model=args.pretrained_model, mean_file=args.mean, min_size=args.img_resolution, max_size=args.img_resolution, extract_len=args.extract_len, dataset=args.database, fold=args.fold, split_idx=args.split_idx) elif args.feature_model == 'resnet101': faster_rcnn = FasterRCNNResnet101( n_fg_class=len(config.AU_SQUEEZE), pretrained_model=args.pretrained_model, mean_file=args.mean, min_size=args.img_resolution, max_size=args.img_resolution, extract_len=args.extract_len ) # 可改为/home/nco/face_expr/result/snapshot_model.npz elif args.feature_model == "mobilenet_v1": faster_rcnn = FasterRCNN_MobilenetV1( pretrained_model_type=args.pretrained_model_args, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], mean_file=args.mean, n_class=len(config.AU_SQUEEZE)) batch_size = args.batch_size with chainer.no_backprop_mode(), chainer.using_config("train", False): test_data = AUDataset(database=args.database, fold=args.fold, img_resolution=args.img_resolution, split_name=args.train_test, split_index=args.split_idx, mc_manager=mc_manager, train_all_data=False, prefix=args.prefix, pretrained_target=args.pretrained_target, is_FERA=args.FERA) test_data = TransformDataset(test_data, Transform(faster_rcnn, mirror=False)) if args.fake_box: test_data = TransformDataset(test_data, FakeBoxTransform(args.database)) if args.proc_num == 1: test_iter = SerialIterator(test_data, args.batch_size, repeat=False, shuffle=True) else: test_iter = MultiprocessIterator(test_data, batch_size=args.batch_size, n_processes=args.proc_num, repeat=False, shuffle=True, n_prefetch=10, shared_mem=10000000) gpu = int(args.gpu) if "," not in args.gpu else int( args.gpu[:args.gpu.index(",")]) chainer.cuda.get_device_from_id(gpu).use() faster_rcnn.to_gpu(gpu) evaluator = SpeedEvaluator( test_iter, faster_rcnn, lambda batch, device: concat_examples_not_none( batch, device, padding=-99), device=gpu, trail_times=args.trail_times, each_trail_iteration=args.each_trail_iteration, database=args.database) observation = evaluator.evaluate() with open(args.out + os.path.sep + "evaluation_speed_test.json", "w") as file_obj: file_obj.write( json.dumps(observation, indent=4, separators=(',', ': '))) file_obj.flush()
def main(): parser = argparse.ArgumentParser( description='Space Time Action Unit R-CNN training example:') parser.add_argument('--pid', '-pp', default='/tmp/SpaceTime_AU_R_CNN/') parser.add_argument('--gpu', '-g', nargs='+', type=int, help='GPU ID, multiple GPU split by space') parser.add_argument('--lr', '-l', type=float, default=0.001) parser.add_argument('--out', '-o', default='output_two_stream', help='Output directory') parser.add_argument('--database', default='BP4D', help='Output directory: BP4D/DISFA/BP4D_DISFA') parser.add_argument('--iteration', '-i', type=int, default=70000) parser.add_argument('--epoch', '-e', type=int, default=20) parser.add_argument('--batch_size', '-bs', type=int, default=1) parser.add_argument('--snapshot', '-snap', type=int, default=1000) parser.add_argument('--mean_rgb', default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy", help='image mean .npy file') parser.add_argument('--mean_flow', default=config.ROOT_PATH + "BP4D/idx/mean_flow.npy", help='image mean .npy file') parser.add_argument('--backbone', default="resnet101", help="vgg/resnet101/mobilenet_v1 for train") parser.add_argument('--optimizer', default='SGD', help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta') parser.add_argument('--pretrained_model_rgb', help='imagenet/mobilenet_v1/resnet101/*.npz') parser.add_argument( '--pretrained_model_flow', help= "path of optical flow pretrained model, can also use the same npz with rgb" ) parser.add_argument('--two_stream_mode', type=TwoStreamMode, choices=list(TwoStreamMode), help='rgb_flow/ optical_flow/ rgb') parser.add_argument( '--use_memcached', action='store_true', help='whether use memcached to boost speed of fetch crop&mask') # parser.add_argument('--memcached_host', default='127.0.0.1') parser.add_argument("--fold", '-fd', type=int, default=3) parser.add_argument("--fix", action="store_true", help="fix parameter of conv2 update when finetune") parser.add_argument("--split_idx", '-sp', type=int, default=1) parser.add_argument("--use_paper_num_label", action="store_true", help="only to use paper reported number of labels" " to train") parser.add_argument( "--roi_align", action="store_true", help="whether to use roi align or roi pooling layer in CNN") parser.add_argument("--T", '-T', type=int, default=10) parser.add_argument("--proc_num", "-proc", type=int, default=1) args = parser.parse_args() os.makedirs(args.pid, exist_ok=True) os.makedirs(args.out, exist_ok=True) pid = str(os.getpid()) pid_file_path = args.pid + os.sep + "{0}_{1}_fold_{2}.pid".format( args.database, args.fold, args.split_idx) with open(pid_file_path, "w") as file_obj: file_obj.write(pid) file_obj.flush() print('GPU: {}'.format(",".join(list(map(str, args.gpu))))) adaptive_AU_database(args.database) mc_manager = None if args.use_memcached: from collections_toolkit.memcached_manager import PyLibmcManager mc_manager = PyLibmcManager(args.memcached_host) if mc_manager is None: raise IOError("no memcached found listen in {}".format( args.memcached_host)) paper_report_label, class_num = squeeze_label_num_report( args.database, args.use_paper_num_label) paper_report_label_idx = list(paper_report_label.keys()) au_rcnn_train_chain_list = [] if args.backbone == 'vgg': au_rcnn = AU_RCNN_VGG16(pretrained_model=args.pretrained_model_rgb, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=args.roi_align) au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn) au_rcnn_train_chain_list.append(au_rcnn_train_chain) elif args.backbone == 'resnet101': if args.two_stream_mode != TwoStreamMode.rgb_flow: assert (args.pretrained_model_rgb == "" and args.pretrained_model_flow != "") or\ (args.pretrained_model_rgb != "" and args.pretrained_model_flow == "") pretrained_model = args.pretrained_model_rgb if args.pretrained_model_rgb else args.pretrained_model_flow au_rcnn = AU_RCNN_Resnet101( pretrained_model=pretrained_model, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=args.roi_align, use_optical_flow_input=( args.two_stream_mode == TwoStreamMode.optical_flow), temporal_length=args.T) au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn) au_rcnn_train_chain_list.append(au_rcnn_train_chain) else: # rgb_flow mode au_rcnn_rgb = AU_RCNN_Resnet101( pretrained_model=args.pretrained_model_rgb, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=args.roi_align, use_optical_flow_input=False, temporal_length=args.T) au_rcnn_optical_flow = AU_RCNN_Resnet101( pretrained_model=args.pretrained_model_flow, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=args.roi_align, use_optical_flow_input=True, temporal_length=args.T) au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor(au_rcnn_rgb) au_rcnn_train_chain_optical_flow = AU_RCNN_ROI_Extractor( au_rcnn_optical_flow) au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb) au_rcnn_train_chain_list.append(au_rcnn_train_chain_optical_flow) model = Wrapper(au_rcnn_train_chain_list, class_num, args.database, args.T, two_stream_mode=args.two_stream_mode, gpus=args.gpu) batch_size = args.batch_size img_dataset = AUDataset(database=args.database, L=args.T, fold=args.fold, split_name='trainval', split_index=args.split_idx, mc_manager=mc_manager, two_stream_mode=args.two_stream_mode, train_all_data=False, paper_report_label_idx=paper_report_label_idx) train_dataset = TransformDataset( img_dataset, Transform(L=args.T, mirror=True, mean_rgb_path=args.mean_rgb, mean_flow_path=args.mean_flow)) if args.proc_num == 1: train_iter = SerialIterator(train_dataset, batch_size, repeat=True, shuffle=True) else: train_iter = MultiprocessIterator(train_dataset, batch_size=batch_size, n_processes=args.proc_num, repeat=True, shuffle=True, n_prefetch=3, shared_mem=53457280) if len(args.gpu) > 1: for gpu in args.gpu: chainer.cuda.get_device_from_id(gpu).use() else: chainer.cuda.get_device_from_id(args.gpu[0]).use() model.to_gpu(args.gpu[0]) optimizer = None if args.optimizer == 'AdaGrad': optimizer = chainer.optimizers.AdaGrad( lr=args.lr ) # 原本为MomentumSGD(lr=args.lr, momentum=0.9) 由于loss变为nan问题,改为AdaGrad elif args.optimizer == 'RMSprop': optimizer = chainer.optimizers.RMSprop(lr=args.lr) elif args.optimizer == 'Adam': optimizer = chainer.optimizers.Adam(alpha=args.lr) elif args.optimizer == 'SGD': optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) elif args.optimizer == "AdaDelta": optimizer = chainer.optimizers.AdaDelta() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) optimizer_name = args.optimizer # BP4D_3_fold_1_resnet101@rnn@no_temporal@use_paper_num_label@roi_align@label_dep_layer@conv_lstm@sampleframe#13_model.npz use_paper_key_str = "use_paper_num_label" if args.use_paper_num_label else "all_avail_label" roi_align_key_str = "roi_align" if args.roi_align else "roi_pooling" single_model_file_name = args.out + os.sep + \ '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}_model.npz'.format(args.database, args.fold, args.split_idx, args.backbone, args.two_stream_mode, use_paper_key_str, roi_align_key_str, args.T) print(single_model_file_name) pretrained_optimizer_file_name = args.out + os.sep + \ '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}_optimizer.npz'.format( args.database, args.fold, args.split_idx, args.backbone, args.two_stream_mode, use_paper_key_str, roi_align_key_str, args.T) print(pretrained_optimizer_file_name) if os.path.exists(pretrained_optimizer_file_name): print("loading optimizer snatshot:{}".format( pretrained_optimizer_file_name)) chainer.serializers.load_npz(pretrained_optimizer_file_name, optimizer) if os.path.exists(single_model_file_name): print("loading pretrained snapshot:{}".format(single_model_file_name)) chainer.serializers.load_npz(single_model_file_name, model) if args.fix: au_rcnn = model.au_rcnn_train_chain.au_rcnn au_rcnn.extractor.conv1.W.update_rule.enabled = False au_rcnn.extractor.bn1.gamma.update_rule.enabled = False au_rcnn.extractor.bn1.beta.update_rule.enabled = False res2_names = ["a", "b1", "b2"] for res2_name in res2_names: if res2_name == "a": getattr(au_rcnn.extractor.res2, res2_name).conv1.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn1.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn1.beta.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).conv2.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).conv3.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).conv4.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn2.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn2.beta.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn3.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn3.beta.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn4.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn4.beta.update_rule.enabled = False elif res2_name.startswith("b"): getattr(au_rcnn.extractor.res2, res2_name).conv1.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn1.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn1.beta.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).conv2.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).conv3.W.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn2.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn2.beta.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn3.gamma.update_rule.enabled = False getattr(au_rcnn.extractor.res2, res2_name).bn3.beta.update_rule.enabled = False updater = chainer.training.StandardUpdater( train_iter, optimizer, device=args.gpu[0], converter=lambda batch, device: concat_examples( batch, device, padding=0)) trainer = training.Trainer(updater, (10, 'iteration'), out=args.out) trainer.extend(chainer.training.extensions.snapshot_object( optimizer, filename=os.path.basename(pretrained_optimizer_file_name)), trigger=(args.snapshot, 'iteration')) trainer.extend(chainer.training.extensions.snapshot_object( model, filename=os.path.basename(single_model_file_name)), trigger=(args.snapshot, 'iteration')) log_interval = 100, 'iteration' print_interval = 100, 'iteration' plot_interval = 10, 'iteration' if args.optimizer != "Adam" and args.optimizer != "AdaDelta": trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1), trigger=(10, 'epoch')) elif args.optimizer == "Adam": trainer.extend(chainer.training.extensions.ExponentialShift( "alpha", 0.1, optimizer=optimizer), trigger=(10, 'epoch')) if args.optimizer != "AdaDelta": trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( chainer.training.extensions.LogReport( trigger=log_interval, log_name="log_{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}.log".format( args.database, args.fold, args.split_idx, args.backbone, args.two_stream_mode, use_paper_key_str, roi_align_key_str, args.T))) # trainer.reporter.add_observer("main_par", model.loss_head_module) trainer.extend(chainer.training.extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/accuracy', ]), trigger=print_interval) trainer.extend( chainer.training.extensions.ProgressBar(update_interval=100)) if chainer.training.extensions.PlotReport.available(): trainer.extend(chainer.training.extensions.PlotReport( ['main/loss'], file_name="loss_{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}.png".format( args.database, args.fold, args.split_idx, args.backbone, args.two_stream_mode, use_paper_key_str, roi_align_key_str, args.T), trigger=plot_interval), trigger=plot_interval) trainer.extend(chainer.training.extensions.PlotReport( ['main/accuracy'], file_name="accuracy_{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}.png". format(args.database, args.fold, args.split_idx, args.backbone, args.two_stream_mode, use_paper_key_str, roi_align_key_str, args.T), trigger=plot_interval), trigger=plot_interval) # trainer.run() cProfile.runctx("trainer.run()", globals(), locals(), "Profile.prof") s = pstats.Stats("Profile.prof") s.strip_dirs().sort_stats("time").print_stats()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch_size', '-b', type=int, default=1, help='each batch size will be a new file') parser.add_argument('--gpu', '-g', type=int, default=0, help='gpu that used to extract feature') parser.add_argument("--mirror", action="store_true", help="whether to mirror") parser.add_argument("--out_dir", '-o', default="/home/machen/dataset/extract_features/") parser.add_argument( "--model", '-m', help="the AU R-CNN pretrained model file to load to extract feature") parser.add_argument("--trainval_test", '-tt', help="train or test") parser.add_argument("--database", default="BP4D") parser.add_argument( '--use_memcached', action='store_true', help='whether use memcached to boost speed of fetch crop&mask') parser.add_argument('--proc_num', type=int, default=10) parser.add_argument('--memcached_host', default='127.0.0.1') parser.add_argument('--mean_rgb', default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy", help='image mean .npy file') parser.add_argument('--mean_flow', default=config.ROOT_PATH + "BP4D/idx/mean_flow.npy", help='image mean .npy file') args = parser.parse_args() adaptive_AU_database(args.database) mc_manager = None if args.use_memcached: from collections_toolkit.memcached_manager import PyLibmcManager mc_manager = PyLibmcManager(args.memcached_host) if mc_manager is None: raise IOError("no memcached found listen in {}".format( args.memcached_host)) return_dict = extract_mode(args.model) database = return_dict["database"] fold = return_dict["fold"] split_idx = return_dict["split_idx"] backbone = return_dict["backbone"] use_paper_num_label = return_dict["use_paper_num_label"] roi_align = return_dict["use_roi_align"] two_stream_mode = return_dict["two_stream_mode"] T = return_dict["T"] class_num = len(config.paper_use_BP4D) if database == "BP4D" else len( config.paper_use_DISFA) paper_report_label_idx = sorted(list(config.AU_SQUEEZE.keys())) if use_paper_num_label: paper_report_label, class_num = squeeze_label_num_report( database, True) paper_report_label_idx = list(paper_report_label.keys()) assert two_stream_mode == TwoStreamMode.rgb_flow if two_stream_mode == TwoStreamMode.rgb_flow: au_rcnn_train_chain_list = [] au_rcnn_rgb = AU_RCNN_Resnet101(pretrained_model=backbone, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=roi_align, use_optical_flow_input=False, temporal_length=T) au_rcnn_optical_flow = AU_RCNN_Resnet101(pretrained_model=backbone, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], use_roi_align=roi_align, use_optical_flow_input=True, temporal_length=T) au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor(au_rcnn_rgb) au_rcnn_train_chain_optical_flow = AU_RCNN_ROI_Extractor( au_rcnn_optical_flow) au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb) au_rcnn_train_chain_list.append(au_rcnn_train_chain_optical_flow) model = Wrapper(au_rcnn_train_chain_list, class_num, database, T, two_stream_mode=two_stream_mode, gpus=[args.gpu, args.gpu]) assert os.path.exists(args.model) print("loading model file : {}".format(args.model)) chainer.serializers.load_npz(args.model, model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() if isinstance(model, FasterRCNNResnet101): model.to_gpu(args.gpu) img_dataset = AUDataset(database=database, L=T, fold=fold, split_name=args.trainval_test, split_index=split_idx, mc_manager=mc_manager, train_all_data=False, paper_report_label_idx=paper_report_label_idx, jump_exists=True, npz_dir=args.out_dir) mirror_list = [ False, ] if args.mirror and args.trainval_test == 'trainval': mirror_list.append(True) for mirror in mirror_list: train_dataset = TransformDataset( img_dataset, Transform(T, mean_rgb_path=args.mean_rgb, mean_flow_path=args.mean_flow, mirror=mirror)) if args.proc_num > 1: dataset_iter = MultiprocessIterator(train_dataset, batch_size=args.batch_size, n_processes=args.proc_num, repeat=False, shuffle=False, n_prefetch=10, shared_mem=10000000) else: dataset_iter = SerialIterator(train_dataset, batch_size=args.batch_size, repeat=False, shuffle=False) with chainer.no_backprop_mode(), chainer.using_config( 'cudnn_deterministic', True), chainer.using_config('train', False): model_dump = DumpRoIFeature( dataset_iter, model, args.gpu, database, converter=lambda batch, device: concat_examples_not_string( batch, device, padding=0), output_path=args.out_dir, trainval_test=args.trainval_test, fold_split_idx=split_idx, mirror_data=mirror) model_dump.evaluate()
if matcher: fold = matcher.group(1) split_idx = matcher.group(2) output = args.output if args.prefix: id_list_fold_path = config.DATA_PATH[ args.database] + "/idx/{0}_fold{1}/".format(fold, args.prefix) else: id_list_fold_path = config.DATA_PATH[ args.database] + "/idx/{0}_fold/".format(fold) train_subject, test_subject = load_train_test_id(id_list_fold_path, split_idx, args.database) os.makedirs(output, exist_ok=True) adaptive_AU_database(args.database) extract_key = "" if args.pretrained_model_name == "resnet101": faster_rcnn = FasterRCNNResnet101( n_fg_class=len(config.AU_SQUEEZE), pretrained_model="resnet101", mean_file=args.mean, use_lstm=args.use_lstm, extract_len=args.extract_len ) # 可改为/home/machen/face_expr/result/snapshot_model.npz extract_key = 'avg_pool' elif args.pretrained_model_name == "vgg": faster_rcnn = FasterRCNNVGG16(n_fg_class=len(config.AU_SQUEEZE), pretrained_model="imagenet", mean_file=args.mean,
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)' ) # open_crf layer only works for CPU mode parser.add_argument( "--model", "-m", help="pretrained model file path") # which contains pretrained target parser.add_argument("--pretrained_model", "-pre", default="resnet101") parser.add_argument("--memcached_host", default="127.0.0.1") parser.add_argument('--mean', default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy", help='image mean .npy file') parser.add_argument('--proc_num', type=int, default=10, help="multiprocess fetch data process number") parser.add_argument('--two_stream_mode', type=TwoStreamMode, choices=list(TwoStreamMode), help='spatial/ temporal/ spatial_temporal') parser.add_argument('--batch', '-b', type=int, default=5, help='mini batch size') args = parser.parse_args() if not args.model.endswith("model.npz"): return mode_dict = extract_mode(args.model) database = mode_dict["database"] fold = mode_dict["fold"] split_idx = mode_dict["split_idx"] backbone = mode_dict["backbone"] spatial_edge_mode = mode_dict["spatial_edge_mode"] temporal_edge_mode = mode_dict["temporal_edge_mode"] use_paper_num_label = mode_dict["use_paper_num_label"] use_roi_align = mode_dict["use_roi_align"] use_label_dep_rnn_layer = mode_dict["label_dep_rnn_layer"] sample_frame = mode_dict["sample_frame"] conv_rnn_type = mode_dict["conv_rnn_type"] use_feature_map = (conv_rnn_type != ConvRNNType.conv_rcnn) and ( conv_rnn_type != ConvRNNType.fc_lstm) use_au_rcnn_loss = (conv_rnn_type == ConvRNNType.conv_rcnn) adaptive_AU_database(database) paper_report_label, class_num = squeeze_label_num_report( database, use_paper_num_label) paper_report_label_idx = list(paper_report_label.keys()) if not paper_report_label_idx: paper_report_label_idx = None class_num = len(config.AU_SQUEEZE) else: class_num = len(paper_report_label_idx) model_print_dict = OrderedDict() for key, value in mode_dict.items(): model_print_dict[key] = str(value) print(""" {0} ====================================== INFO: {1} ====================================== """.format(args.model, json.dumps(model_print_dict, sort_keys=True, indent=8))) if backbone == 'resnet101': au_rcnn = AU_RCNN_Resnet101( pretrained_model=args.pretrained_model, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], mean_file=args.mean, classify_mode=use_au_rcnn_loss, n_class=class_num, use_roi_align=use_roi_align, use_feature_map_res45=use_feature_map, use_feature_map_res5=(conv_rnn_type != ConvRNNType.fc_lstm or conv_rnn_type == ConvRNNType.sep_conv_lstm), temporal_length=sample_frame) elif backbone == 'resnet50': au_rcnn = AU_RCNN_Resnet50(pretrained_model=args.pretrained_model, min_size=config.IMG_SIZE[0], max_size=config.IMG_SIZE[1], mean_file=args.mean, classify_mode=use_au_rcnn_loss, n_class=class_num, use_roi_align=use_roi_align, use_feature_map=use_feature_map) au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn) # if use_label_dep_rnn_layer: # use_space = (spatial_edge_mode != SpatialEdgeMode.no_edge) # use_temporal = (temporal_edge_mode != TemporalEdgeMode.no_temporal) # label_dependency_layer = LabelDependencyLayer(database, out_size=class_num, train_mode=False, # label_win_size=2, x_win_size=1, # label_dropout_ratio=0.0, use_space=use_space, # use_temporal=use_temporal) if conv_rnn_type == ConvRNNType.conv_lstm: space_time_conv_lstm = SpaceTimeConv( None, use_label_dep_rnn_layer, class_num, spatial_edge_mode=spatial_edge_mode, temporal_edge_mode=temporal_edge_mode, conv_rnn_type=conv_rnn_type) loss_head_module = space_time_conv_lstm elif conv_rnn_type == ConvRNNType.fc_lstm: space_time_fc_lstm = SpaceTimeSepFcLSTM( database, class_num, spatial_edge_mode=spatial_edge_mode, temporal_edge_mode=temporal_edge_mode) loss_head_module = space_time_fc_lstm elif conv_rnn_type == ConvRNNType.conv_rcnn: au_rcnn_train_loss = AU_RCNN_TrainChainLoss() loss_head_module = au_rcnn_train_loss elif conv_rnn_type == ConvRNNType.sep_conv_lstm: space_time_sep_conv_lstm = SpaceTimeSepConv( database, class_num, spatial_edge_mode=spatial_edge_mode, temporal_edge_mode=temporal_edge_mode) loss_head_module = space_time_sep_conv_lstm model = Wrapper(au_rcnn_train_chain, loss_head_module, database, sample_frame, use_feature_map=use_feature_map, gpu=args.gpu) chainer.serializers.load_npz(args.model, model) print("loading {}".format(args.model)) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu(args.gpu) mc_manager = PyLibmcManager(args.memcached_host) img_dataset = AUDataset( database=database, fold=fold, split_name='test', # FIXME split_index=split_idx, mc_manager=mc_manager, train_all_data=False) video_dataset = AU_video_dataset( au_image_dataset=img_dataset, sample_frame=sample_frame, train_mode=False, #FIXME paper_report_label_idx=paper_report_label_idx, fetch_use_parrallel_iterator=True) video_dataset = TransformDataset(video_dataset, Transform3D(au_rcnn, mirror=False)) # test_iter = SerialIterator(video_dataset, batch_size=sample_frame * args.batch, # repeat=False, shuffle=False) test_iter = MultiprocessIterator(video_dataset, batch_size=sample_frame * args.batch, n_processes=args.proc_num, repeat=False, shuffle=False, n_prefetch=10, shared_mem=10000000) with chainer.no_backprop_mode(), chainer.using_config( 'cudnn_deterministic', True), chainer.using_config('train', False): npz_path = os.path.dirname( args.model) + os.path.sep + "pred_" + os.path.basename( args.model)[:os.path.basename(args.model).rindex("_")] + ".npz" print("npz_path: {}".format(npz_path)) au_evaluator = ActionUnitEvaluator( test_iter, model, args.gpu, database=database, paper_report_label=paper_report_label, converter=lambda batch, device: concat_examples_not_labels( batch, device, padding=0), sample_frame=sample_frame, output_path=npz_path) observation = au_evaluator.evaluate() with open(os.path.dirname(args.model) + os.path.sep + "evaluation_result_{0}.json".format(os.path.basename(args.model)\ [:os.path.basename(args.model).rindex("_")] ), "w") as file_obj: file_obj.write( json.dumps(observation, indent=4, separators=(',', ': '))) file_obj.flush()
def build_graph(faster_rcnn, reader_func, output_dir, database_name, force_generate, proc_num, cut: bool, extract_key, train_subject, test_subject): ''' currently CRF can only deal with single label situation so use /home/machen/dataset/BP4D/label_dict.txt to regard combine label as new single label example(each file contains one video!): node_id kown_label features 1_12 +1 np_file:/path/to/npy features:1,3,4,5,5,... node_id specific: ${frame}_${roi}, eg: 1_12 or 444 +[0,0,0,1,0,1,0] np_file:/path/to/npy features:1,3,4,5,5,... spatio can have two factor node here, for example spatio_1 means upper face, and spatio_2 means lower face relation #edge 143 4289 spatio_1 #edge 143 4289 spatio_2 #edge 112 1392 temporal mode: RNN or CRF ''' adaptive_AU_database(database_name) adaptive_AU_relation(database_name) is_binary_AU = True for video_info, subject_id in reader_func( output_dir, is_binary_AU=is_binary_AU, is_need_adaptive_AU_relation=False, force_generate=force_generate, proc_num=proc_num, cut=cut, train_subject=train_subject): node_list = [] temporal_edges = [] spatio_edges = [] h_info_array = [] box_geometry_array = [] for entry_dict in video_info: frame = entry_dict["frame"] cropped_face = entry_dict["cropped_face"] print("processing frame:{}".format(frame)) all_couple_mask_dict = entry_dict[ "all_couple_mask_dict"] # key is AU couple tuple,不管脸上有没有该AU都返回回来 image_labels = entry_dict[ "all_labels"] # each region has a label(binary or AU) bboxes = [] labels = [] AU_couple_bbox_dict = dict() for idx, (AU_couple, mask) in enumerate( all_couple_mask_dict.items() ): # AU may contain single_true AU or AU binary tuple (depends on need_adaptive_AU_relation) region_label = image_labels[ idx] # str or tuple, so all_labels index must be the same as all_couple_mask_dict connect_arr = cv2.connectedComponents(mask, connectivity=8, ltype=cv2.CV_32S) component_num = connect_arr[0] label_matrix = connect_arr[1] temp_boxes = [] for component_label in range(1, component_num): row_col = list( zip(*np.where(label_matrix == component_label))) row_col = np.array(row_col) y_min_index = np.argmin(row_col[:, 0]) y_min = row_col[y_min_index, 0] x_min_index = np.argmin(row_col[:, 1]) x_min = row_col[x_min_index, 1] y_max_index = np.argmax(row_col[:, 0]) y_max = row_col[y_max_index, 0] x_max_index = np.argmax(row_col[:, 1]) x_max = row_col[x_max_index, 1] # same region may be shared by different AU, we must deal with it coordinates = (y_min, x_min, y_max, x_max) if y_min == y_max and x_min == x_max: continue temp_boxes.append(coordinates) temp_boxes = sorted(temp_boxes, key=itemgetter( 3)) # must make sure each frame have same box order for coordinates in temp_boxes: if coordinates not in bboxes: bboxes.append(coordinates) labels.append( region_label ) # AU may contain single_true AU or AU binary tuple (depends on need_adaptive_AU_relation) AU_couple_bbox_dict[coordinates] = AU_couple del label_matrix if len(bboxes) != config.BOX_NUM[database_name]: print("boxes num != {0}, real box num= {1}".format( config.BOX_NUM[database_name], len(bboxes))) continue with chainer.no_backprop_mode(), chainer.using_config( 'train', False): bboxes = np.asarray(bboxes, dtype=np.float32) h = faster_rcnn.extract(cropped_face, bboxes, layer=extract_key) # shape = R' x 2048 assert h.shape[0] == len(bboxes) h = chainer.cuda.to_cpu(h) h = h.reshape(len(bboxes), -1) # 这个indent级别都是同一张图片内部 # print("box number, all_mask:", len(bboxes),len(all_couple_mask_dict)) for box_idx, box in enumerate(bboxes): label = labels[ box_idx] # label maybe single true AU or AU binary tuple if isinstance(label, tuple): label_arr = np.char.mod("%d", label) label = "({})".format(",".join(label_arr)) h_flat = h[box_idx] # nonzero_idx = np.nonzero(h_flat)[0] # h_flat_nonzero = h_flat[nonzero_idx] # h_info = ",".join("{}:{:.4f}".format(idx, val) for idx,val in zip(nonzero_idx,h_flat_nonzero)) node_id = "{0}_{1}".format(frame, box_idx) node_list.append("{0} {1} feature_idx:{2}".format( node_id, label, len(h_info_array))) h_info_array.append(h_flat) box_geometry_array.append(box) # 同一张画面两两组合,看有没连接线,注意AU=0,就是未出现的AU动作的区域也参与连接 for box_idx_a, box_idx_b in map( sorted, itertools.combinations(range(len(bboxes)), 2)): node_id_a = "{0}_{1}".format(frame, box_idx_a) node_id_b = "{0}_{1}".format(frame, box_idx_b) AU_couple_a = AU_couple_bbox_dict[bboxes[ box_idx_a]] # AU couple represent region( maybe symmetry in face) AU_couple_b = AU_couple_bbox_dict[bboxes[box_idx_b]] if AU_couple_a == AU_couple_b or has_edge( AU_couple_a, AU_couple_b, database_name): spatio_edges.append("#edge {0} {1} spatio".format( node_id_a, node_id_b)) box_id_temporal_dict = defaultdict( list) # key = roi/bbox id, value = node_id list cross temporal for node_info in node_list: node_id = node_info[0:node_info.index(" ")] box_id = node_id[node_id.index("_") + 1:] box_id_temporal_dict[box_id].append(node_id) for node_id_list in box_id_temporal_dict.values(): for idx, node_id in enumerate(node_id_list): if idx + 1 < len(node_id_list): node_id_next = node_id_list[idx + 1] temporal_edges.append("#edge {0} {1} temporal".format( node_id, node_id_next)) if subject_id in train_subject: output_path = "{0}/train/{1}.txt".format(output_dir, video_info[0]["video_id"]) elif subject_id in test_subject: output_path = "{0}/test/{1}.txt".format(output_dir, video_info[0]["video_id"]) os.makedirs(os.path.dirname(output_path), exist_ok=True) npz_path = output_path[:output_path.rindex(".")] + ".npz" np.savez(npz_path, appearance_features=np.asarray(h_info_array, dtype=np.float32), geometry_features=np.array(box_geometry_array, dtype=np.float32)) with open(output_path, "w") as file_obj: for line in node_list: file_obj.write("{}\n".format(line)) for line in spatio_edges: file_obj.write("{}\n".format(line)) for line in temporal_edges: file_obj.write("{}\n".format(line)) file_obj.flush() node_list.clear() spatio_edges.clear() temporal_edges.clear() h_info_array.clear() box_geometry_array.clear()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') # open_crf layer only works for CPU mode parser.add_argument("--model", "-m", help="pretrained model file path") # which contains pretrained target parser.add_argument('--proc_num', type=int, default=10, help="multiprocess fetch data process number") parser.add_argument("--data_dir", type=str, default="/home/machen/dataset/extract_features") parser.add_argument('--batch', '-b', type=int, default=1, help='mini batch size') args = parser.parse_args() if not args.model.endswith("model.npz"): return mode_dict = extract_mode(args.model) database = mode_dict["database"] fold = mode_dict["fold"] split_idx = mode_dict["split_idx"] use_paper_num_label = mode_dict["use_paper_num_label"] conv_layers = mode_dict["conv_layers"] two_stream_mode = mode_dict["two_stream_mode"] faster_backbone_type = mode_dict["faster_backbone_type"] T = 10 data_dir = args.data_dir + "/{0}_{1}_fold_{2}/test".format(database, fold, split_idx) adaptive_AU_database(database) paper_report_label, class_num = squeeze_label_num_report(database, use_paper_num_label) paper_report_label_idx = list(paper_report_label.keys()) class_num = len(config.AU_SQUEEZE) if use_paper_num_label: class_num = len(paper_report_label_idx) model_print_dict = OrderedDict() for key, value in mode_dict.items(): model_print_dict[key] = str(value) print(""" {0} ====================================== INFO: {1} ====================================== """.format(args.model, json.dumps(model_print_dict, sort_keys=True, indent=8))) if faster_backbone_type == FasterBackboneType.conv1d: faster_extractor_backbone = FasterBackbone(conv_layers, 2048, 1024) elif faster_backbone_type == FasterBackboneType.tcn: faster_extractor_backbone = TcnBackbone(conv_layers, 2048, 1024) faster_head_module = FasterHeadModule(2048, class_num + 1, 7) # note that the class number here must include background initialW = chainer.initializers.Normal(0.001) spn = SegmentProposalNetwork(1024, n_anchors=len(config.ANCHOR_SIZE), initialW=initialW) seg_predictor = TimeSegmentRCNNPredictor(faster_extractor_backbone, spn, faster_head_module) model = WrapperPredictor(seg_predictor, class_num=class_num) chainer.serializers.load_npz(args.model, model.seg_predictor.train_chain) print("loading {}".format(args.model)) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu(args.gpu) npz_dataset = NpzFeatureDataset(data_dir, database, two_stream_mode=two_stream_mode, T=T) test_iter = SerialIterator(npz_dataset, batch_size=1, repeat=False, shuffle=False) with chainer.no_backprop_mode(),chainer.using_config('cudnn_deterministic',True),chainer.using_config('train',False): # time_axis_rcnn_BP4D_3_fold_1@use_paper_num_label@rgb_flow@30_model.npz pred_result_npz_path = os.path.dirname(args.model) + os.path.sep + os.path.basename(args.model)[ :os.path.basename(args.model).rindex("_")] + "_pred_result.npz" au_evaluator = ActionUnitEvaluator(test_iter, model, args.gpu, database=database, paper_report_label=paper_report_label, converter=lambda batch, device: concat_examples_not_string(batch, device, padding=0), output_path=pred_result_npz_path) observation = au_evaluator.evaluate() with open(os.path.dirname(args.model) + os.path.sep + "evaluation_result_{0}.json".format(os.path.basename(args.model)\ [:os.path.basename(args.model).rindex("_")] ), "w") as file_obj: file_obj.write(json.dumps(observation, indent=4, separators=(',', ': '))) file_obj.flush()
def build_graph_roi_single_label(faster_rcnn, reader_func, output_dir, database_name, force_generate, proc_num, cut: bool, extract_key, train_subject, test_subject): ''' currently CRF can only deal with single label situation so use /home/machen/dataset/BP4D/label_dict.txt to regard combine label as new single label example(each file contains one video!): node_id kown_label features 1_12 +1 np_file:/path/to/npy features:1,3,4,5,5,... node_id specific: ${frame}_${roi}, eg: 1_12 or 444 +[0,0,0,1,0,1,0] np_file:/path/to/npy features:1,3,4,5,5,... spatio can have two factor node here, for example spatio_1 means upper face, and spatio_2 means lower face relation #edge 143 4289 spatio_1 #edge 143 4289 spatio_2 #edge 112 1392 temporal mode: RNN or CRF ''' adaptive_AU_database(database_name) adaptive_AU_relation(database_name) au_couple_dict = get_zip_ROI_AU( ) # value is AU couple tuple, each tuple denotes an RoI # max_au_couple_len = max(len(couple) for couple in au_couple_dict.values()) # we use itertools.product instead label_bin_len = config.BOX_NUM[ database_name] # each box/ROI only have 1 or 0 au_couple_set = set(au_couple_dict.values()) au_couple_list = list(au_couple_set) au_couple_list.append(("1", "2", "5", "7")) # because it is symmetric area is_binary_AU = True for video_info, subject_id in reader_func( output_dir, is_binary_AU=is_binary_AU, is_need_adaptive_AU_relation=False, force_generate=force_generate, proc_num=proc_num, cut=cut, train_subject=train_subject): extracted_feature_cache = dict( ) # key = np.ndarray_hash , value = h. speed up frame_box_cache = dict() # key = frame, value = boxes frame_labels_cache = dict() frame_AU_couple_bbox_dict_cache = dict() # each video file is copying multiple version but differ in label if database_name == "BP4D": label_split_list = config.BP4D_LABEL_SPLIT elif database_name == "DISFA": label_split_list = config.DISFA_LABEL_SPLIT for couples_tuple in label_split_list: # couples_tuple = ("1","3","5",.."4") cross AU_couple, config.LABEL_SPLIT come from frequent pattern statistics assert len(couples_tuple) == config.BOX_NUM[database_name] couples_tuple = tuple(map(str, sorted(map(int, couples_tuple)))) couples_tuple_set = set( couples_tuple) # use cartesian product to iterator over if len(couples_tuple_set) < len(couples_tuple): continue # limit too many combination # count = 0 # for fp in fp_set: # inter_set = couples_tuple_set & set(fp) # union_set = couples_tuple_set | set(fp) # iou = len(inter_set) / len(union_set) # if iou > 0.6: # count += 1 # if count < 20: # continue node_list = [] temporal_edges = [] spatio_edges = [] h_info_array = [] box_geometry_array = [] for entry_dict in video_info: frame = entry_dict["frame"] cropped_face = entry_dict["cropped_face"] print("processing frame:{}".format(frame)) all_couple_mask_dict = entry_dict[ "all_couple_mask_dict"] # key is AU couple tuple,不管脸上有没有该AU都返回回来 image_labels = entry_dict[ "all_labels"] # each region has a label(binary or AU) bboxes = [] labels = [] AU_couple_bbox_dict = OrderedDict() if frame in frame_box_cache: bboxes = frame_box_cache[frame] labels = frame_labels_cache[frame] AU_couple_bbox_dict = frame_AU_couple_bbox_dict_cache[ frame] else: for idx, (AU_couple, mask) in enumerate( all_couple_mask_dict.items() ): # We cannot sort this dict here, because region_label depend on order of this dict.AU may contain single_true AU or AU binary tuple (depends on need_adaptive_AU_relation) region_label = image_labels[ idx] # str or tuple, so all_labels index must be the same as all_couple_mask_dict connect_arr = cv2.connectedComponents(mask, connectivity=8, ltype=cv2.CV_32S) component_num = connect_arr[0] label_matrix = connect_arr[1] for component_label in range(1, component_num): row_col = list( zip(*np.where( label_matrix == component_label))) row_col = np.array(row_col) y_min_index = np.argmin(row_col[:, 0]) y_min = row_col[y_min_index, 0] x_min_index = np.argmin(row_col[:, 1]) x_min = row_col[x_min_index, 1] y_max_index = np.argmax(row_col[:, 0]) y_max = row_col[y_max_index, 0] x_max_index = np.argmax(row_col[:, 1]) x_max = row_col[x_max_index, 1] # same region may be shared by different AU, we must deal with it coordinates = (y_min, x_min, y_max, x_max) if y_min == y_max and x_min == x_max: continue if coordinates not in bboxes: bboxes.append( coordinates ) # bboxes and labels have the same order labels.append( region_label ) # AU may contain single_true AU or AU binary tuple (depends on need_adaptive_AU_relation) AU_couple_bbox_dict[coordinates] = AU_couple del label_matrix if len(bboxes) != config.BOX_NUM[database_name]: print("boxes num != {0}, real box num= {1}".format( config.BOX_NUM[database_name], len(bboxes))) continue frame_box_cache[frame] = bboxes frame_AU_couple_bbox_dict_cache[frame] = AU_couple_bbox_dict frame_labels_cache[frame] = labels box_idx_AU_dict = dict( ) # box_idx => AU, cannot cache! because couples_tuple each time is different already_added_AU_set = set() for box_idx, _ in enumerate(bboxes): # bboxes may from cache AU_couple = list(AU_couple_bbox_dict.values())[ box_idx] # AU_couple_bbox_dict may from cache for AU in couples_tuple: # couples_tuple not from cache, thus change after each iteration 每轮迭代完的时候变换 if AU in AU_couple and AU not in already_added_AU_set: box_idx_AU_dict[box_idx] = (AU, AU_couple) already_added_AU_set.add(AU) break cropped_face.flags.writeable = False key = hash(cropped_face.data.tobytes()) if key in extracted_feature_cache: h = extracted_feature_cache[key] else: with chainer.no_backprop_mode(), chainer.using_config( 'train', False): h = faster_rcnn.extract( cropped_face, bboxes, layer=extract_key) # shape = R' x 2048 extracted_feature_cache[key] = h assert h.shape[0] == len(bboxes) h = chainer.cuda.to_cpu(h) h = h.reshape(len(bboxes), -1) # 这个indent级别都是同一张图片内部 # print("box number, all_mask:", len(bboxes),len(all_couple_mask_dict)) assert len(box_idx_AU_dict) == config.BOX_NUM[database_name] for box_idx, (AU, AU_couple) in sorted(box_idx_AU_dict.items(), key=lambda e: int(e[0])): label = np.zeros( shape=label_bin_len, dtype=np.int32 ) # bin length became box number > AU_couple number AU_squeeze_idx = config.AU_SQUEEZE.inv[AU] label[couples_tuple.index(AU)] = labels[box_idx][ AU_squeeze_idx] # labels缓存起来可能出错 # labels[box_idx] = 0,0,1,1,...,0 but we want only look at specific idx label = tuple(label) label_arr = np.char.mod("%d", label) label = "({})".format(",".join(label_arr)) h_flat = h[box_idx] node_id = "{0}_{1}".format(frame, box_idx) node_list.append( "{0} {1} feature_idx:{2} AU_couple:{3} AU:{4}".format( node_id, label, len(h_info_array), AU_couple, AU)) h_info_array.append(h_flat) box_geometry_array.append(bboxes[box_idx]) # 同一张画面两两组合,看有没连接线,注意AU=0,就是未出现的AU动作的区域也参与连接 for box_idx_a, box_idx_b in map( sorted, itertools.combinations(range(len(bboxes)), 2)): node_id_a = "{0}_{1}".format(frame, box_idx_a) node_id_b = "{0}_{1}".format(frame, box_idx_b) AU_couple_a = AU_couple_bbox_dict[bboxes[ box_idx_a]] # AU couple represent region( maybe symmetry in face) AU_couple_b = AU_couple_bbox_dict[bboxes[box_idx_b]] if AU_couple_a == AU_couple_b or has_edge( AU_couple_a, AU_couple_b, database_name): spatio_edges.append("#edge {0} {1} spatio".format( node_id_a, node_id_b)) box_id_temporal_dict = defaultdict( list) # key = roi/bbox id, value = node_id list cross temporal for node_info in node_list: node_id = node_info[0:node_info.index(" ")] box_id = node_id[node_id.index("_") + 1:] box_id_temporal_dict[box_id].append(node_id) for node_id_list in box_id_temporal_dict.values(): for idx, node_id in enumerate(node_id_list): if idx + 1 < len(node_id_list): node_id_next = node_id_list[idx + 1] temporal_edges.append("#edge {0} {1} temporal".format( node_id, node_id_next)) train_AU_out_path = "{0}/train/{1}/{2}.txt".format( output_dir, "_".join(map(str, couples_tuple)), video_info[0]["video_id"]) test_AU_out_path = "{0}/test/{1}/{2}.txt".format( output_dir, "_".join(map(str, couples_tuple)), video_info[0]["video_id"]) if subject_id in train_subject: output_path = train_AU_out_path npz_path = output_dir + os.sep + "train" + os.sep + os.path.basename( output_path)[:os.path.basename(output_path). rindex(".")] + ".npz" elif subject_id in test_subject: output_path = test_AU_out_path npz_path = output_dir + os.sep + "test" + os.sep + os.path.basename( output_path)[:os.path.basename(output_path). rindex(".")] + ".npz" os.makedirs(os.path.dirname(output_path), exist_ok=True) if not os.path.exists(npz_path): np.savez(npz_path, appearance_features=h_info_array, geometry_features=np.array(box_geometry_array, dtype=np.float32)) with open(output_path, "w") as file_obj: for line in node_list: file_obj.write("{}\n".format(line)) for line in spatio_edges: file_obj.write("{}\n".format(line)) for line in temporal_edges: file_obj.write("{}\n".format(line)) file_obj.flush() node_list.clear() spatio_edges.clear() temporal_edges.clear() h_info_array.clear()
def main(): print("chainer cudnn enabled: {}".format(chainer.cuda.cudnn_enabled)) parser = argparse.ArgumentParser( description='Action Unit R-CNN training example:') parser.add_argument('--pid', '-pp', default='/tmp/AU_R_CNN/') parser.add_argument('--gpu', '-g', default="0", help='GPU ID, multiple GPU split by comma, \ ' 'Note that BPTT updater do not support multi-GPU') parser.add_argument('--lr', '-l', type=float, default=0.001) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--database', default='BP4D', help='Output directory: BP4D/DISFA/BP4D_DISFA') parser.add_argument('--readtype', default='rgb', help='rgb/flow') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--iteration', '-i', type=int, default=70000) parser.add_argument('--epoch', '-e', type=int, default=20) parser.add_argument('--batch_size', '-bs', type=int, default=20) parser.add_argument('--snapshot', '-snap', type=int, default=1000) parser.add_argument('--mean', default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy", help='image mean .npy file') parser.add_argument('--feature_model', default="resnet101", help="vgg or resnet101 for train") parser.add_argument('--optimizer', default='RMSprop', help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta') parser.add_argument('--pretrained_model', default='resnet101', help='imagenet/vggface/resnet101/*.npz') parser.add_argument( '--use_memcached', action='store_true', help='whether use memcached to boost speed of fetch crop&mask') # parser.add_argument('--memcached_host', default='127.0.0.1') parser.add_argument("--fold", '-fd', type=int, default=3) parser.add_argument("--split_idx", '-sp', type=int, default=1) parser.add_argument("--proc_num", "-proc", type=int, default=1) parser.add_argument( "--is_pretrained", action="store_true", help="whether is to pretrain BP4D later will for DISFA dataset or not") parser.add_argument( "--pretrained_target", '-pt', default="", help="whether pretrain label set will use DISFA or not") parser.add_argument('--eval_mode', action='store_true', help='Use test datasets for evaluation metric') parser.add_argument('--test_model', default="", help='test model for evaluation') parser.add_argument( '--occlude', default='', help= 'whether to use occlude face of upper/left/right/lower/none to test') parser.add_argument("--img_resolution", type=int, default=512) args = parser.parse_args() config.IMG_SIZE = (args.img_resolution, args.img_resolution) if not os.path.exists(args.pid): os.makedirs(args.pid) pid = str(os.getpid()) pid_file_path = args.pid + os.path.sep + "{0}_{1}_fold_{2}.pid".format( args.database, args.fold, args.split_idx) with open(pid_file_path, "w") as file_obj: file_obj.write(pid) file_obj.flush() print('GPU: {}'.format(args.gpu)) if args.is_pretrained: adaptive_AU_database(args.pretrained_target) else: adaptive_AU_database(args.database) np.random.seed(args.seed) # 需要先构造一个list的txt文件:id_trainval_0.txt, 每一行是subject + "/" + emotion_seq + "/" frame mc_manager = None if args.use_memcached: from collections_toolkit.memcached_manager import PyLibmcManager mc_manager = PyLibmcManager(args.memcached_host) if mc_manager is None: raise IOError("no memcached found listen in {}".format( args.memcached_host)) resnet101 = ResNet(len(config.AU_SQUEEZE), pretrained_model=args.pretrained_model) model = TrainChain(resnet101) if args.eval_mode: with chainer.no_backprop_mode(), chainer.using_config("train", False): if args.occlude: test_data = ImageDataset( database=args.database, fold=args.fold, split_name='test', split_index=args.split_idx, mc_manager=mc_manager, train_all_data=False, pretrained_target=args.pretrained_target, img_resolution=args.img_resolution) test_data = TransformDataset( test_data, Transform(mean_rgb_path=args.mean, mirror=False)) assert args.occlude in ["upper", "lower", "left", "right"] test_data = TransformDataset(test_data, OccludeTransform(args.occlude)) if args.proc_num == 1: test_iter = SerialIterator(test_data, 1, repeat=False, shuffle=True) else: test_iter = MultiprocessIterator(test_data, batch_size=1, n_processes=args.proc_num, repeat=False, shuffle=True, n_prefetch=10, shared_mem=10000000) single_model_file_name = args.test_model chainer.serializers.load_npz(single_model_file_name, resnet101) gpu = int(args.gpu) chainer.cuda.get_device_from_id(gpu).use() resnet101.to_gpu(gpu) evaluator = AUEvaluator(test_iter, resnet101, lambda batch, device: concat_examples( batch, device, padding=0), args.database, "/home/machen/face_expr", device=gpu, npz_out_path=args.out + os.path.sep + "npz_occlude_{0}_split_{1}.npz".format( args.occlude, args.split_idx)) observation = evaluator.evaluate() with open( args.out + os.path.sep + "evaluation_occlude_{0}_fold_{1}_result_test_mode.json" .format(args.occlude, args.split_idx), "w") as file_obj: file_obj.write( json.dumps(observation, indent=4, separators=(',', ': '))) file_obj.flush() else: test_data = ImageDataset( database=args.database, fold=args.fold, split_name='test', split_index=args.split_idx, mc_manager=mc_manager, train_all_data=False, pretrained_target=args.pretrained_target, img_resolution=args.img_resolution) test_data = TransformDataset( test_data, Transform(mean_rgb_path=args.mean, mirror=False)) if args.proc_num == 1: test_iter = SerialIterator(test_data, 1, repeat=False, shuffle=False) else: test_iter = MultiprocessIterator(test_data, batch_size=1, n_processes=args.proc_num, repeat=False, shuffle=False, n_prefetch=10, shared_mem=10000000) single_model_file_name = args.test_model chainer.serializers.load_npz(single_model_file_name, resnet101) gpu = int(args.gpu) if "," not in args.gpu else int( args.gpu[:args.gpu.index(",")]) chainer.cuda.get_device_from_id(gpu).use() resnet101.to_gpu(gpu) evaluator = AUEvaluator( test_iter, resnet101, lambda batch, device: concat_examples( batch, device, padding=0), args.database, "/home/machen/face_expr", device=gpu, npz_out_path=args.out + os.path.sep + "npz_split_{}.npz".format(args.split_idx)) observation = evaluator.evaluate() with open( args.out + os.path.sep + "evaluation_split_{}_result_train_mode.json".format( args.split_idx), "w") as file_obj: file_obj.write( json.dumps(observation, indent=4, separators=(',', ': '))) file_obj.flush() return train_data = ImageDataset(database=args.database, fold=args.fold, split_name='trainval', split_index=args.split_idx, mc_manager=mc_manager, train_all_data=args.is_pretrained, read_type=args.readtype, pretrained_target=args.pretrained_target, img_resolution=args.img_resolution) train_data = TransformDataset(train_data, Transform(args.mean, mirror=True)) if args.proc_num == 1: train_iter = SerialIterator(train_data, args.batch_size, True, True) else: train_iter = MultiprocessIterator(train_data, batch_size=args.batch_size, n_processes=args.proc_num, repeat=True, shuffle=True, n_prefetch=10, shared_mem=31457280) if "," in args.gpu: for gpu in args.gpu.split(","): chainer.cuda.get_device_from_id(int(gpu)).use() else: chainer.cuda.get_device_from_id(int(args.gpu)).use() optimizer = None if args.optimizer == 'AdaGrad': optimizer = chainer.optimizers.AdaGrad( lr=args.lr ) # 原本为MomentumSGD(lr=args.lr, momentum=0.9) 由于loss变为nan问题,改为AdaGrad elif args.optimizer == 'RMSprop': optimizer = chainer.optimizers.RMSprop(lr=args.lr) elif args.optimizer == 'Adam': print("using Adam") optimizer = chainer.optimizers.Adam(alpha=args.lr) elif args.optimizer == 'SGD': optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) elif args.optimizer == "AdaDelta": print("using AdaDelta") optimizer = chainer.optimizers.AdaDelta() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) optimizer_name = args.optimizer if not os.path.exists(args.out): os.makedirs(args.out) pretrained_optimizer_file_name = '{0}_{1}_fold_{2}_{3}_{4}_optimizer.npz'.format( args.database, args.fold, args.split_idx, args.feature_model, optimizer_name) pretrained_optimizer_file_name = args.out + os.path.sep + pretrained_optimizer_file_name single_model_file_name = args.out + os.path.sep + '{0}_{1}_fold_{2}_{3}_model.npz'.format( args.database, args.fold, args.split_idx, args.feature_model) if os.path.exists(pretrained_optimizer_file_name): print("loading optimizer snatshot:{}".format( pretrained_optimizer_file_name)) chainer.serializers.load_npz(pretrained_optimizer_file_name, optimizer) if os.path.exists(single_model_file_name): print("loading pretrained snapshot:{}".format(single_model_file_name)) chainer.serializers.load_npz(single_model_file_name, model.backbone) print(" GPU({0}) updater".format(args.gpu)) updater = chainer.training.StandardUpdater( train_iter, optimizer, device=int(args.gpu), converter=lambda batch, device: concat_examples( batch, device, padding=0)) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(chainer.training.extensions.snapshot_object( optimizer, filename=os.path.basename(pretrained_optimizer_file_name)), trigger=(args.snapshot, 'iteration')) trainer.extend(chainer.training.extensions.snapshot_object( model.backbone, filename=os.path.basename(single_model_file_name)), trigger=(args.snapshot, 'iteration')) log_interval = 100, 'iteration' print_interval = 100, 'iteration' plot_interval = 100, 'iteration' if args.optimizer != "Adam" and args.optimizer != "AdaDelta": trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1), trigger=(10, 'epoch')) elif args.optimizer == "Adam": # use Adam trainer.extend(chainer.training.extensions.ExponentialShift( "alpha", 0.5, optimizer=optimizer), trigger=(10, 'epoch')) if args.optimizer != "AdaDelta": trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( chainer.training.extensions.LogReport( trigger=log_interval, log_name="{0}_fold_{1}.log".format(args.fold, args.split_idx))) trainer.extend(chainer.training.extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/accuracy', ]), trigger=print_interval) trainer.extend( chainer.training.extensions.ProgressBar(update_interval=100)) if chainer.training.extensions.PlotReport.available(): trainer.extend(chainer.training.extensions.PlotReport( ['main/loss', "validation/main/loss"], file_name='loss_{0}_fold_{1}.png'.format(args.fold, args.split_idx), trigger=plot_interval), trigger=plot_interval) trainer.extend(chainer.training.extensions.PlotReport( ['main/accuracy'], file_name='accuracy_{0}_fold_{1}.png'.format( args.fold, args.split_idx), trigger=plot_interval), trigger=plot_interval) trainer.run()
def main(): parser = argparse.ArgumentParser( description='Space Time Action Unit R-CNN training example:') parser.add_argument('--pid', '-pp', default='/tmp/SpaceTime_AU_R_CNN/') parser.add_argument('--gpu', '-g', nargs='+', type=int, help='GPU ID, multiple GPU split by space') parser.add_argument('--lr', '-l', type=float, default=0.001) parser.add_argument('--out', '-o', default='end_to_end_result', help='Output directory') parser.add_argument('--trainval', default='train', help='train/test') parser.add_argument('--database', default='BP4D', help='Output directory: BP4D/DISFA/BP4D_DISFA') parser.add_argument('--iteration', '-i', type=int, default=70000) parser.add_argument('--epoch', '-e', type=int, default=20) parser.add_argument('--batch_size', '-bs', type=int, default=1) parser.add_argument('--snapshot', '-snap', type=int, default=1000) parser.add_argument('--need_validate', action='store_true', help='do or not validate during training') parser.add_argument('--mean', default=config.ROOT_PATH + "BP4D/idx/mean_no_enhance.npy", help='image mean .npy file') parser.add_argument('--backbone', default="mobilenet_v1", help="vgg/resnet101/mobilenet_v1 for train") parser.add_argument('--optimizer', default='RMSprop', help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta') parser.add_argument('--pretrained_model', default='mobilenet_v1', help='imagenet/mobilenet_v1/resnet101/*.npz') parser.add_argument('--pretrained_model_args', nargs='+', type=float, help='you can pass in "1.0 224" or "0.75 224"') parser.add_argument('--spatial_edge_mode', type=SpatialEdgeMode, choices=list(SpatialEdgeMode), help='1:all_edge, 2:configure_edge, 3:no_edge') parser.add_argument( '--temporal_edge_mode', type=TemporalEdgeMode, choices=list(TemporalEdgeMode), help='1:rnn, 2:attention_block, 3.point-wise feed forward(no temporal)' ) parser.add_argument("--bi_lstm", action="store_true", help="whether to use bi-lstm as Edge/Node RNN") parser.add_argument( '--use_memcached', action='store_true', help='whether use memcached to boost speed of fetch crop&mask') # parser.add_argument('--memcached_host', default='127.0.0.1') parser.add_argument("--fold", '-fd', type=int, default=3) parser.add_argument("--layers", type=int, default=1) parser.add_argument("--split_idx", '-sp', type=int, default=1) parser.add_argument("--use_paper_num_label", action="store_true", help="only to use paper reported number of labels" " to train") parser.add_argument("--previous_frame", type=int, default=50) parser.add_argument("--sample_frame", '-sample', type=int, default=25) parser.add_argument( "--snap_individual", action="store_true", help="whether to snapshot each individual epoch/iteration") parser.add_argument("--proc_num", "-proc", type=int, default=1) parser.add_argument('--eval_mode', action='store_true', help='Use test datasets for evaluation metric') args = parser.parse_args() os.makedirs(args.pid, exist_ok=True) os.makedirs(args.out, exist_ok=True) pid = str(os.getpid()) pid_file_path = args.pid + os.sep + "{0}_{1}_fold_{2}.pid".format( args.database, args.fold, args.split_idx) with open(pid_file_path, "w") as file_obj: file_obj.write(pid) file_obj.flush() print('GPU: {}'.format(",".join(list(map(str, args.gpu))))) adaptive_AU_database(args.database) mc_manager = None if args.use_memcached: from collections_toolkit.memcached_manager import PyLibmcManager mc_manager = PyLibmcManager(args.memcached_host) if mc_manager is None: raise IOError("no memcached found listen in {}".format( args.memcached_host)) train_data = AUDataset( database=args.database, fold=args.fold, split_name=args.trainval, split_index=args.split_idx, mc_manager=mc_manager, train_all_data=False, ) result_data = [ img_path for img_path, AU_set, current_database_name in train_data.result_data if args.database + "|" + img_path not in mc_manager ] sub_list = split_list(result_data, len(result_data) // 100) for img_path_lst in sub_list: with Pool(processes=50) as pool: input_list = [(img_path, None, None) for img_path in img_path_lst] result =\ pool.starmap(parallel_landmark_and_conn_component, input_list) pool.close() pool.join() for img_path, AU_box_dict, landmark_dict, box_is_whole_image in result: key_prefix = args.database + "|" key = key_prefix + img_path orig_img = cv2.imread(img_path, cv2.IMREAD_COLOR) new_face, rect = FaceMaskCropper.dlib_face_crop( orig_img, landmark_dict) print("write {}".format(key)) if mc_manager is not None and key not in mc_manager: save_dict = { "landmark_dict": landmark_dict, "AU_box_dict": AU_box_dict, "crop_rect": rect } mc_manager.set(key, save_dict)
def main(): parser = argparse.ArgumentParser(description='I3D R-CNN train:') parser.add_argument('--pid', '-pp', default='/tmp/SpaceTime_AU_R_CNN/') parser.add_argument('--gpu', '-g', nargs='+', type=int, help='GPU ID, multiple GPU split by space') parser.add_argument('--lr', '-l', type=float, default=0.001) parser.add_argument('--out', '-o', default='i3d_result', help='Output directory') parser.add_argument('--database', default='BP4D', help='Output directory: BP4D/DISFA/BP4D_DISFA') parser.add_argument('--iteration', '-i', type=int, default=70000) parser.add_argument('--epoch', '-e', type=int, default=20) parser.add_argument('--batch_size', '-bs', type=int, default=1) parser.add_argument('--snapshot', '-snap', type=int, default=1000) parser.add_argument('--mean', default=config.ROOT_PATH + "BP4D/idx/mean_no_enhance.npy", help='image mean .npy file') parser.add_argument('--backbone', default="mobilenet_v1", help="vgg/resnet101/mobilenet_v1 for train") parser.add_argument('--optimizer', default='SGD', help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta') parser.add_argument('--pretrained_rgb', help='imagenet/mobilenet_v1/resnet101/*.npz') parser.add_argument( '--pretrained_flow', help= "path of optical flow pretrained model (may be single stream OF model)" ) parser.add_argument('--two_stream_mode', type=TwoStreamMode, choices=list(TwoStreamMode), help='spatial/ temporal/ spatial_temporal') parser.add_argument( '--use_memcached', action='store_true', help='whether use memcached to boost speed of fetch crop&mask') # parser.add_argument('--memcached_host', default='127.0.0.1') parser.add_argument("--fold", '-fd', type=int, default=3) parser.add_argument("--split_idx", '-sp', type=int, default=1) parser.add_argument("--use_paper_num_label", action="store_true", help="only to use paper reported number of labels" " to train") parser.add_argument( "--roi_align", action="store_true", help="whether to use roi align or roi pooling layer in CNN") parser.add_argument("--T", '-T', type=int, default=10, help="sequence length of one video clip") parser.add_argument("--out_channel", type=int, default=2048, help="length of extract ROI feature") parser.add_argument("--proc_num", "-proc", type=int, default=1) args = parser.parse_args() os.makedirs(args.pid, exist_ok=True) os.makedirs(args.out, exist_ok=True) pid = str(os.getpid()) pid_file_path = args.pid + os.sep + "{0}_{1}_fold_{2}.pid".format( args.database, args.fold, args.split_idx) with open(pid_file_path, "w") as file_obj: file_obj.write(pid) file_obj.flush() print('GPU: {}'.format(",".join(list(map(str, args.gpu))))) adaptive_AU_database(args.database) mc_manager = None if args.use_memcached: from collections_toolkit.memcached_manager import PyLibmcManager mc_manager = PyLibmcManager(args.memcached_host) if mc_manager is None: raise IOError("no memcached found listen in {}".format( args.memcached_host)) paper_report_label, class_num = squeeze_label_num_report( args.database, args.use_paper_num_label) paper_report_label_idx = list(paper_report_label.keys()) au_rcnn_train_chain_list = [] if args.backbone == 'i3d': if args.two_stream_mode == TwoStreamMode.rgb: i3d_feature_backbone = I3DFeatureExtractor(modality='rgb') i3d_roi_head = I3DRoIHead(out_channel=args.out_channel, roi_size=7, spatial_scale=1 / 16., dropout_prob=0.) chainer.serializers.load_npz(args.pretrained_flow, i3d_feature_backbone) chainer.serializers.load_npz(args.pretrained_flow, i3d_roi_head) au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor( i3d_feature_backbone, i3d_roi_head) au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb) elif args.two_stream_mode == TwoStreamMode.optical_flow: i3d_feature_backbone_flow = I3DFeatureExtractor(modality='flow') i3d_roi_head = I3DRoIHead(out_channel=args.out_channel, roi_size=7, spatial_scale=1 / 16., dropout_prob=0.) au_rcnn_train_chain_flow = AU_RCNN_ROI_Extractor( i3d_feature_backbone_flow, i3d_roi_head) chainer.serializers.load_npz(args.pretrained_flow, i3d_feature_backbone_flow) chainer.serializers.load_npz(args.pretrained_flow, i3d_roi_head) au_rcnn_train_chain_list.append(au_rcnn_train_chain_flow) elif args.two_stream_mode == TwoStreamMode.rgb_flow: i3d_feature_backbone = I3DFeatureExtractor(modality='rgb') i3d_roi_head_rgb = I3DRoIHead(out_channel=args.out_channel, roi_size=7, spatial_scale=1 / 16., dropout_prob=0.) chainer.serializers.load_npz(args.pretrained_rgb, i3d_feature_backbone) chainer.serializers.load_npz(args.pretrained_rgb, i3d_roi_head_rgb) au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor( i3d_feature_backbone, i3d_roi_head_rgb) au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb) i3d_feature_backbone_flow = I3DFeatureExtractor(modality='flow') i3d_roi_head_flow = I3DRoIHead(out_channel=args.out_channel, roi_size=7, spatial_scale=1 / 16., dropout_prob=0.) au_rcnn_train_chain_flow = AU_RCNN_ROI_Extractor( i3d_feature_backbone_flow, i3d_roi_head_flow) au_rcnn_train_chain_list.append(au_rcnn_train_chain_flow) chainer.serializers.load_npz(args.pretrained_flow, i3d_feature_backbone_flow) chainer.serializers.load_npz(args.pretrained_flow, i3d_roi_head_flow) au_rcnn_train_loss = AU_RCNN_TrainChainLoss() loss_head_module = au_rcnn_train_loss model = Wrapper(au_rcnn_train_chain_list, loss_head_module, args.database, args.T, args.two_stream_mode, args.gpu) batch_size = args.batch_size img_dataset = AUDataset(database=args.database, fold=args.fold, split_name='trainval', split_index=args.split_idx, mc_manager=mc_manager, train_all_data=False) train_video_data = AU_video_dataset( au_image_dataset=img_dataset, sample_frame=args.T, train_mode=True, paper_report_label_idx=paper_report_label_idx) Transform = Transform3D substract_mean = SubStractMean(args.mean) train_video_data = TransformDataset( train_video_data, Transform(substract_mean, mirror=False)) if args.proc_num == 1: train_iter = SerialIterator(train_video_data, batch_size * args.sample_frame, repeat=True, shuffle=False) else: train_iter = MultiprocessIterator(train_video_data, batch_size=batch_size * args.sample_frame, n_processes=args.proc_num, repeat=True, shuffle=False, n_prefetch=10, shared_mem=10000000) for gpu in args.gpu: chainer.cuda.get_device_from_id(gpu).use() optimizer = None if args.optimizer == 'AdaGrad': optimizer = chainer.optimizers.AdaGrad( lr=args.lr ) # 原本为MomentumSGD(lr=args.lr, momentum=0.9) 由于loss变为nan问题,改为AdaGrad elif args.optimizer == 'RMSprop': optimizer = chainer.optimizers.RMSprop(lr=args.lr) elif args.optimizer == 'Adam': optimizer = chainer.optimizers.Adam(alpha=args.lr) elif args.optimizer == 'SGD': optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) elif args.optimizer == "AdaDelta": optimizer = chainer.optimizers.AdaDelta() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) optimizer_name = args.optimizer key_str = "{0}_fold_{1}".format(args.fold, args.split_idx) file_list = [] file_list.extend(os.listdir(args.out)) # BP4D_3_fold_1_resnet101@rnn@no_temporal@use_paper_num_label@roi_align@label_dep_layer@conv_lstm@sampleframe#13_model.npz use_paper_key_str = "use_paper_num_label" if args.use_paper_num_label else "all_{}_label".format( args.database) roi_align_key_str = "roi_align" if args.roi_align else "roi_pooling" single_model_file_name = args.out + os.sep + \ '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}_model.npz'.format(args.database, args.fold, args.split_idx, args.backbone, args.two_stream_mode, use_paper_key_str, roi_align_key_str, args.T) print(single_model_file_name) pretrained_optimizer_file_name = args.out + os.sep +\ '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}_optimizer.npz'.format(args.database, args.fold, args.split_idx, args.backbone, args.two_stream_mode, use_paper_key_str, roi_align_key_str, args.T) print(pretrained_optimizer_file_name) if os.path.exists(pretrained_optimizer_file_name): print("loading optimizer snatshot:{}".format( pretrained_optimizer_file_name)) chainer.serializers.load_npz(pretrained_optimizer_file_name, optimizer) if os.path.exists(single_model_file_name): print("loading pretrained snapshot:{}".format(single_model_file_name)) chainer.serializers.load_npz(single_model_file_name, model) updater = chainer.training.StandardUpdater( train_iter, optimizer, device=args.gpu[0], converter=lambda batch, device: concat_examples( batch, device, padding=0)) @training.make_extension(trigger=(1, "epoch")) def reset_order(trainer): print("reset dataset order after one epoch") trainer.updater._iterators[ "main"].dataset._dataset.reset_for_train_mode() trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(reset_order) trainer.extend(chainer.training.extensions.snapshot_object( optimizer, filename=os.path.basename(pretrained_optimizer_file_name)), trigger=(args.snapshot, 'iteration')) log_interval = 100, 'iteration' print_interval = 10, 'iteration' plot_interval = 10, 'iteration' if args.optimizer != "Adam" and args.optimizer != "AdaDelta": trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1), trigger=(10, 'epoch')) elif args.optimizer == "Adam": trainer.extend(chainer.training.extensions.ExponentialShift( "alpha", 0.1, optimizer=optimizer), trigger=(10, 'epoch')) if args.optimizer != "AdaDelta": trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( chainer.training.extensions.LogReport( trigger=log_interval, log_name="log_{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}.log".format( args.database, args.fold, args.split_idx, args.backbone, args.two_stream_mode, use_paper_key_str, roi_align_key_str, args.T))) # trainer.reporter.add_observer("main_par", model.loss_head_module) trainer.extend(chainer.training.extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/accuracy', ]), trigger=print_interval) trainer.extend( chainer.training.extensions.ProgressBar(update_interval=100)) if chainer.training.extensions.PlotReport.available(): trainer.extend(chainer.training.extensions.PlotReport( ['main/loss'], file_name='loss_{0}_fold_{1}_{2}@{3}@{4}@{5}.png'.format( args.fold, args.split_idx, args.backbone, args.spatial_edge_mode, args.temporal_edge_mode, args.conv_rnn_type), trigger=plot_interval), trigger=plot_interval) trainer.extend(chainer.training.extensions.PlotReport( ['main/accuracy'], file_name='accuracy_{0}_fold_{1}_{2}@{3}@{4}@{5}.png'.format( args.fold, args.split_idx, args.backbone, args.spatial_edge_mode, args.temporal_edge_mode, args.conv_rnn_type), trigger=plot_interval), trigger=plot_interval) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)' ) # open_crf layer only works for CPU mode parser.add_argument('--step_size', '-ss', type=int, default=3000, help='step_size for lr exponential') parser.add_argument('--gradclip', '-c', type=float, default=5, help='Gradient norm threshold to clip') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--snapshot', '-snap', type=int, default=20, help='snapshot epochs for save checkpoint') parser.add_argument('--train', '-t', default="train", help='Train directory path contains train txt file') parser.add_argument('--database', default="BP4D", help='database to train for') parser.add_argument('--lr', '-l', type=float, default=0.01) parser.add_argument('--hidden_size', type=int, default=1024, help="the hidden dimension of the middle layers") parser.add_argument("--num_attrib", type=int, default=2048, help="number of dimension of each node feature") parser.add_argument("--proc_num", '-proc', type=int, default=1, help="process number of dataset reader") parser.add_argument("--need_cache_graph", "-ng", action="store_true", help="whether to cache factor graph to LRU cache") parser.add_argument("--resume", action="store_true", help="whether to load npz pretrained file") parser.add_argument('--atten_heads', type=int, default=4, help="atten heads for parallel learning") parser.add_argument('--layer_num', type=int, default=2, help='layer number of GAT') args = parser.parse_args() print_interval = 1, 'iteration' val_interval = 5, 'iteration' adaptive_AU_database(args.database) box_num = config.BOX_NUM[args.database] # for the StructuralRNN constuctor need first frame factor graph_backup dataset = GlobalDataSet(num_attrib=args.num_attrib, train_edge="all") file_name = list( filter(lambda e: e.endswith(".txt"), os.listdir(args.train)))[0] dataset.load_data( args.train + os.sep + file_name, False ) # we load first sample for construct S-RNN, it must passed to constructor argument model = GraphAttentionModel(input_dim=dataset.num_attrib_type, hidden_dim=args.hidden_size, class_number=dataset.label_bin_len, atten_heads=args.atten_heads, layers_num=args.layer_num, frame_node_num=box_num) # note that the following code attrib_size will be used by open_crf for parameter number, thus we cannot pass dataset.num_attrib_type! train_data = GraphDataset(args.train, attrib_size=2048, global_dataset=dataset, need_s_rnn=False, need_cache_factor_graph=args.need_cache_graph, need_adjacency_matrix=True, npy_in_parent_dir=False, need_factor_graph=False) # train 传入文件夹 train_iter = chainer.iterators.SerialIterator(train_data, 1, shuffle=True, repeat=True) if args.gpu >= 0: print("using gpu : {}".format(args.gpu)) chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu(args.gpu) optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) updater = BPTTUpdater(train_iter, optimizer, device=args.gpu) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) interval = (1, 'iteration') trainer.extend(chainer.training.extensions.observe_lr(), trigger=print_interval) trainer.extend(chainer.training.extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', "main/accuracy", ]), trigger=print_interval) log_name = "GAT.log" trainer.extend( chainer.training.extensions.LogReport(trigger=interval, log_name=log_name)) # trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1, training_length=(args.epoch, 'epoch'))) optimizer_snapshot_name = "{0}_GAT_optimizer.npz".format(args.database) model_snapshot_name = "{0}_GAT_model.npz".format(args.database) trainer.extend(chainer.training.extensions.snapshot_object( optimizer, filename=optimizer_snapshot_name), trigger=(args.snapshot, 'epoch')) trainer.extend(chainer.training.extensions.snapshot_object( model, filename=model_snapshot_name), trigger=(args.snapshot, 'epoch')) trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.7), trigger=(5, "epoch")) if args.resume and os.path.exists(args.out + os.sep + model_snapshot_name): print("loading model_snapshot_name to model") chainer.serializers.load_npz(args.out + os.sep + model_snapshot_name, model) if args.resume and os.path.exists(args.out + os.sep + optimizer_snapshot_name): print("loading optimizer_snapshot_name to optimizer") chainer.serializers.load_npz( args.out + os.sep + optimizer_snapshot_name, optimizer) if chainer.training.extensions.PlotReport.available(): trainer.extend(chainer.training.extensions.PlotReport( ['main/loss'], file_name="train_loss.png"), trigger=(100, "iteration")) trainer.extend(chainer.training.extensions.PlotReport( ['opencrf_val/F1', 'opencrf_val/accuracy'], file_name="{}_val_f1.png"), trigger=val_interval) trainer.run()