Beispiel #1
0
def generate_AUCouple_ROI_mask_image(database_name, img_path):
    adaptive_AU_database(database_name)
    global MASK_COLOR

    mask_color_lst = []
    for color in MASK_COLOR:
        mask_color_lst.append(color_bgr(color))
    cropped_face, AU_mask_dict = FaceMaskCropper.get_cropface_and_mask(
        img_path, channel_first=False)
    AU_couple_dict = get_zip_ROI_AU()

    land = FaceLandMark(config.DLIB_LANDMARK_PRETRAIN)
    landmark, _, _ = land.landmark(image=cropped_face)
    roi_polygons = land.split_ROI(landmark)
    for roi_no, polygon_vertex_arr in roi_polygons.items():
        polygon_vertex_arr[0, :] = np.round(polygon_vertex_arr[0, :])
        polygon_vertex_arr[1, :] = np.round(polygon_vertex_arr[1, :])
        polygon_vertex_arr = sort_clockwise(polygon_vertex_arr.tolist())
        cv2.polylines(cropped_face, [polygon_vertex_arr],
                      True,
                      color_bgr(RED),
                      thickness=1)
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(cropped_face,
                    str(roi_no),
                    tuple(
                        np.mean(polygon_vertex_arr, axis=0).astype(np.int32)),
                    font,
                    0.7, (0, 255, 255),
                    thickness=1)
    already_fill_AU = set()
    idx = 0
    gen_face_lst = dict()
    AU_couple_mask = dict()
    for AU in config.AU_ROI.keys():
        AU_couple = AU_couple_dict[AU]
        if AU_couple in already_fill_AU:
            continue
        already_fill_AU.add(AU_couple)
        mask = AU_mask_dict[AU]
        AU_couple_mask[AU_couple] = mask
        color_mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB)

        color_mask[mask != 0] = random.choice(mask_color_lst)
        idx += 1
        new_face = cv2.addWeighted(cropped_face, 0.75, color_mask, 0.25, 0)
        gen_face_lst[AU_couple] = new_face
    return gen_face_lst, AU_couple_mask
def get_BP4D_prescion_matrix(label_file_dir):
    adaptive_AU_database("BP4D")
    alpha = 0.2
    model = GraphLassoCV(alphas=100,
                         cv=10,
                         max_iter=10,
                         tol=1e-5,
                         verbose=True,
                         mode="lars",
                         assume_centered=False,
                         n_jobs=100)

    X = []
    for file_name in os.listdir(label_file_dir):  # each file is a video
        AU_column_idx = {}
        with open(label_file_dir + "/" + file_name,
                  "r") as au_file_obj:  # each file is a video

            for idx, line in enumerate(au_file_obj):

                if idx == 0:  # header specify Action Unit
                    for col_idx, AU in enumerate(line.split(",")[1:]):
                        AU_column_idx[AU] = col_idx + 1  # read header
                    continue  # read head over , continue

                lines = line.split(",")
                frame = lines[0]
                au_labels = [AU for AU in config.AU_ROI.keys() \
                                 if int(lines[AU_column_idx[AU]]) == 1]
                AU_bin = np.zeros(len(config.AU_SQUEEZE))
                for AU in au_labels:
                    bin_idx = config.AU_SQUEEZE.inv[AU]
                    np.put(AU_bin, bin_idx, 1)
                X.append(AU_bin)
    X = np.array(X)
    print(X.shape)
    # X = np.transpose(X)
    model.fit(X)
    cov_ = model.covariance_
    prec_ = model.precision_

    return {"prec": prec_, "cov": cov_}
Beispiel #3
0
    def generate_AUCouple_ROI_mask_image(self, database_name, img_path,
                                         roi_activate):
        adaptive_AU_database(database_name)

        cropped_face, AU_mask_dict = FaceMaskCropper.get_cropface_and_mask(
            img_path, channel_first=False)
        AU_couple_dict = get_zip_ROI_AU()

        land = FaceLandMark(config.DLIB_LANDMARK_PRETRAIN)
        landmark, _, _ = land.landmark(image=cropped_face)
        roi_polygons = land.split_ROI(landmark)
        for roi_no, polygon_vertex_arr in roi_polygons.items():
            polygon_vertex_arr[0, :] = np.round(polygon_vertex_arr[0, :])
            polygon_vertex_arr[1, :] = np.round(polygon_vertex_arr[1, :])
            polygon_vertex_arr = sort_clockwise(polygon_vertex_arr.tolist())
            cv2.polylines(cropped_face, [polygon_vertex_arr],
                          True, (0, 0, 255),
                          thickness=1)
            font = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(cropped_face,
                        str(roi_no),
                        tuple(
                            np.mean(polygon_vertex_arr,
                                    axis=0).astype(np.int32)),
                        font,
                        0.7, (0, 255, 255),
                        thickness=1)
        already_fill_AU = set()
        AUCouple_face_dict = dict()
        for AU in config.AU_ROI.keys():
            AU_couple = AU_couple_dict[AU]
            if AU_couple in already_fill_AU or AU_couple not in roi_activate:
                continue
            already_fill_AU.add(AU_couple)
            mask = AU_mask_dict[AU]
            color_mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB)
            color_mask[mask != 0] = (199, 21, 133)
            new_face = cv2.add(cropped_face, color_mask)
            AUCouple_face_dict[AU_couple] = new_face

        return AUCouple_face_dict
Beispiel #4
0
def generate_landmark_image(database_name, face_img_path=None, face_img=None):
    adaptive_AU_database(database_name)
    land = FaceLandMark(config.DLIB_LANDMARK_PRETRAIN)
    trn_img = face_img
    if face_img is None:
        trn_img = cv2.imread(face_img_path, cv2.IMREAD_COLOR)
    landmark_dict, _, new_image = land.landmark(image=trn_img,
                                                need_txt_img=True)
    roi_polygons = land.split_ROI(landmark_dict)
    # for roi_no, polygon_vertex_arr in roi_polygons.items():
    #     # if int(roi_no) == 40 or int(roi_no) == 41:
    #     polygon_vertex_arr[0, :] = np.round(polygon_vertex_arr[0, :])
    #     polygon_vertex_arr[1, :] = np.round(polygon_vertex_arr[1, :])
    #     polygon_vertex_arr = sort_clockwise(polygon_vertex_arr.tolist())
    #     cv2.polylines(trn_img, [polygon_vertex_arr], True, 	(34,34,178), thickness=2)
    #     font = cv2.FONT_HERSHEY_SIMPLEX
    #     cv2.putText(trn_img, str(roi_no), tuple(np.mean(polygon_vertex_arr,axis=0).astype(np.int32)),
    #                 font,0.6,(0,255,255),thickness=1)
    # for i, x_y in landmark_txt.items():
    #     x, y = x_y
    #     cv2.putText(trn_img, str(i), (x, y), font, 0.4, (255, 255, 255), 1)
    return new_image
def get_DISFA_prescion_matrix(label_file_dir):
    adaptive_AU_database("DISFA")
    alpha = 0.2
    model = GraphLassoCV(alphas=100,
                         cv=10,
                         max_iter=100,
                         tol=1e-5,
                         verbose=True,
                         mode="lars",
                         assume_centered=False,
                         n_jobs=100)
    X = []
    for file_name in os.listdir(label_file_dir):
        subject_filename = label_file_dir + os.sep + file_name
        frame_label = defaultdict(dict)
        for au_file in os.listdir(subject_filename):
            abs_filename = subject_filename + "/" + au_file
            AU = au_file[au_file.rindex("_") + 3:au_file.rindex(".")]
            with open(abs_filename, "r") as file_obj:
                for line in file_obj:
                    frame, AU_label = line.strip().split(",")
                    # AU_label = int(AU_label)
                    AU_label = 0 if int(
                        AU_label) < 3 else 1  # 居然<3的不要,但是也取得了出色的效果
                    frame_label[int(frame)][AU] = int(AU_label)
        for frame, AU_dict in frame_label.items():
            AU_bin = np.zeros(len(config.AU_SQUEEZE))
            for AU, AU_label in AU_dict.items():
                bin_idx = config.AU_SQUEEZE.inv[AU]
                np.put(AU_bin, bin_idx, AU_label)
            X.append(AU_bin)
    X = np.array(X)
    print(X.shape)
    model.fit(X)
    cov_ = model.covariance_
    prec_ = model.precision_
    return {"prec": prec_, "cov": cov_}
def main():
    parser = argparse.ArgumentParser(
        description='Space Time Action Unit R-CNN training example:')
    parser.add_argument('--pid', '-pp', default='/tmp/SpaceTime_AU_R_CNN/')
    parser.add_argument('--gpu',
                        '-g',
                        nargs='+',
                        type=int,
                        help='GPU ID, multiple GPU split by space')
    parser.add_argument('--lr', '-l', type=float, default=0.001)
    parser.add_argument('--out',
                        '-o',
                        default='end_to_end_result',
                        help='Output directory')
    parser.add_argument('--database',
                        default='BP4D',
                        help='Output directory: BP4D/DISFA/BP4D_DISFA')
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    parser.add_argument('--epoch', '-e', type=int, default=20)
    parser.add_argument('--batch_size', '-bs', type=int, default=1)
    parser.add_argument('--snapshot', '-snap', type=int, default=1000)
    parser.add_argument('--need_validate',
                        action='store_true',
                        help='do or not validate during training')
    parser.add_argument('--mean',
                        default=config.ROOT_PATH +
                        "BP4D/idx/mean_no_enhance.npy",
                        help='image mean .npy file')
    parser.add_argument('--backbone',
                        default="mobilenet_v1",
                        help="vgg/resnet101/mobilenet_v1 for train")
    parser.add_argument('--optimizer',
                        default='SGD',
                        help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta')
    parser.add_argument('--pretrained_model_rgb',
                        help='imagenet/mobilenet_v1/resnet101/*.npz')
    parser.add_argument(
        '--pretrained_model_of',
        help=
        "path of optical flow pretrained model (may be single stream OF model)"
    )

    parser.add_argument('--pretrained_model_args',
                        nargs='+',
                        type=float,
                        help='you can pass in "1.0 224" or "0.75 224"')
    parser.add_argument('--spatial_edge_mode',
                        type=SpatialEdgeMode,
                        choices=list(SpatialEdgeMode),
                        help='1:all_edge, 2:configure_edge, 3:no_edge')
    parser.add_argument('--spatial_sequence_type',
                        type=SpatialSequenceType,
                        choices=list(SpatialSequenceType),
                        help='1:all_edge, 2:configure_edge, 3:no_edge')
    parser.add_argument(
        '--temporal_edge_mode',
        type=TemporalEdgeMode,
        choices=list(TemporalEdgeMode),
        help='1:rnn, 2:attention_block, 3.point-wise feed forward(no temporal)'
    )
    parser.add_argument('--two_stream_mode',
                        type=TwoStreamMode,
                        choices=list(TwoStreamMode),
                        help='spatial/ temporal/ spatial_temporal')
    parser.add_argument('--conv_rnn_type',
                        type=ConvRNNType,
                        choices=list(ConvRNNType),
                        help='conv_lstm or conv_sru')
    parser.add_argument("--bi_lstm",
                        action="store_true",
                        help="whether to use bi-lstm as Edge/Node RNN")
    parser.add_argument(
        '--use_memcached',
        action='store_true',
        help='whether use memcached to boost speed of fetch crop&mask')  #
    parser.add_argument('--memcached_host', default='127.0.0.1')
    parser.add_argument("--fold", '-fd', type=int, default=3)
    parser.add_argument("--layers", type=int, default=1)
    parser.add_argument("--label_win_size", type=int, default=3)
    parser.add_argument("--fix",
                        action="store_true",
                        help="fix parameter of conv2 update when finetune")
    parser.add_argument("--x_win_size", type=int, default=1)
    parser.add_argument("--use_label_dependency",
                        action="store_true",
                        help="use label dependency layer after conv_lstm")
    parser.add_argument("--dynamic_backbone",
                        action="store_true",
                        help="use dynamic backbone: conv lstm as backbone")
    parser.add_argument("--ld_rnn_dropout", type=float, default=0.4)
    parser.add_argument("--split_idx", '-sp', type=int, default=1)
    parser.add_argument("--use_paper_num_label",
                        action="store_true",
                        help="only to use paper reported number of labels"
                        " to train")
    parser.add_argument(
        "--roi_align",
        action="store_true",
        help="whether to use roi align or roi pooling layer in CNN")
    parser.add_argument("--debug",
                        action="store_true",
                        help="debug mode for 1/50 dataset")
    parser.add_argument("--sample_frame", '-sample', type=int, default=10)
    parser.add_argument(
        "--snap_individual",
        action="store_true",
        help="whether to snapshot each individual epoch/iteration")

    parser.add_argument("--proc_num", "-proc", type=int, default=1)
    parser.add_argument("--fetch_mode", type=int, default=1)
    parser.add_argument('--eval_mode',
                        action='store_true',
                        help='Use test datasets for evaluation metric')
    args = parser.parse_args()
    os.makedirs(args.pid, exist_ok=True)
    os.makedirs(args.out, exist_ok=True)
    pid = str(os.getpid())
    pid_file_path = args.pid + os.sep + "{0}_{1}_fold_{2}.pid".format(
        args.database, args.fold, args.split_idx)
    # with open(pid_file_path, "w") as file_obj:
    #     file_obj.write(pid)
    #     file_obj.flush()

    print('GPU: {}'.format(",".join(list(map(str, args.gpu)))))

    adaptive_AU_database(args.database)
    mc_manager = None
    if args.use_memcached:
        from collections_toolkit.memcached_manager import PyLibmcManager
        mc_manager = PyLibmcManager(args.memcached_host)
        if mc_manager is None:
            raise IOError("no memcached found listen in {}".format(
                args.memcached_host))

    paper_report_label, class_num = squeeze_label_num_report(
        args.database, args.use_paper_num_label)
    paper_report_label_idx = list(paper_report_label.keys())
    use_feature_map_res45 = (args.conv_rnn_type != ConvRNNType.conv_rcnn) and (
        args.conv_rnn_type != ConvRNNType.fc_lstm)
    use_au_rcnn_loss = (args.conv_rnn_type == ConvRNNType.conv_rcnn)
    au_rcnn_train_chain_list = []
    if args.backbone == 'vgg':
        au_rcnn = AU_RCNN_VGG16(pretrained_model=args.pretrained_model_rgb,
                                min_size=config.IMG_SIZE[0],
                                max_size=config.IMG_SIZE[1],
                                mean_file=args.mean,
                                use_roi_align=args.roi_align)
        au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn)
        au_rcnn_train_chain_list.append(au_rcnn_train_chain)
    elif args.backbone == 'resnet101':

        if args.two_stream_mode != TwoStreamMode.spatial_temporal:
            pretrained_model = args.pretrained_model_rgb if args.pretrained_model_rgb else args.pretrained_model_of
            au_rcnn = AU_RCNN_Resnet101(
                pretrained_model=pretrained_model,
                min_size=config.IMG_SIZE[0],
                max_size=config.IMG_SIZE[1],
                mean_file=args.mean,
                classify_mode=use_au_rcnn_loss,
                n_class=class_num,
                use_roi_align=args.roi_align,
                use_feature_map_res45=use_feature_map_res45,
                use_feature_map_res5=(args.conv_rnn_type != ConvRNNType.fc_lstm
                                      or args.conv_rnn_type
                                      == ConvRNNType.sep_conv_lstm),
                use_optical_flow_input=(
                    args.two_stream_mode == TwoStreamMode.optical_flow),
                temporal_length=args.sample_frame)
            au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain)
        else:
            au_rcnn_rgb = AU_RCNN_Resnet101(
                pretrained_model=args.pretrained_model_rgb,
                min_size=config.IMG_SIZE[0],
                max_size=config.IMG_SIZE[1],
                mean_file=args.mean,
                classify_mode=use_au_rcnn_loss,
                n_class=class_num,
                use_roi_align=args.roi_align,
                use_feature_map_res45=use_feature_map_res45,
                use_feature_map_res5=(args.conv_rnn_type != ConvRNNType.fc_lstm
                                      or args.conv_rnn_type
                                      == ConvRNNType.sep_conv_lstm),
                use_optical_flow_input=False,
                temporal_length=args.sample_frame)

            au_rcnn_optical_flow = AU_RCNN_Resnet101(
                pretrained_model=args.pretrained_model_of,
                min_size=config.IMG_SIZE[0],
                max_size=config.IMG_SIZE[1],
                mean_file=args.mean,
                classify_mode=use_au_rcnn_loss,
                n_class=class_num,
                use_roi_align=args.roi_align,
                use_feature_map_res45=use_feature_map_res45,
                use_feature_map_res5=(args.conv_rnn_type != ConvRNNType.fc_lstm
                                      or args.conv_rnn_type
                                      == ConvRNNType.sep_conv_lstm),
                use_optical_flow_input=True,
                temporal_length=args.sample_frame)
            au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor(au_rcnn_rgb)
            au_rcnn_train_chain_optical_flow = AU_RCNN_ROI_Extractor(
                au_rcnn_optical_flow)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain_optical_flow)

    elif args.backbone == "mobilenet_v1":
        au_rcnn = AU_RCNN_MobilenetV1(
            pretrained_model_type=args.pretrained_model_args,
            min_size=config.IMG_SIZE[0],
            max_size=config.IMG_SIZE[1],
            mean_file=args.mean,
            classify_mode=use_au_rcnn_loss,
            n_class=class_num,
            use_roi_align=args.roi_align)
        au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn)

    if use_au_rcnn_loss:
        au_rcnn_train_loss = AU_RCNN_TrainChainLoss()
        loss_head_module = au_rcnn_train_loss

    elif args.conv_rnn_type == ConvRNNType.conv_lstm:
        label_dependency_layer = None
        if args.use_label_dependency:
            label_dependency_layer = LabelDependencyRNNLayer(
                args.database,
                in_size=2048,
                class_num=class_num,
                train_mode=True,
                label_win_size=args.label_win_size)
        space_time_conv_lstm = SpaceTimeConv(
            label_dependency_layer,
            args.use_label_dependency,
            class_num,
            spatial_edge_mode=args.spatial_edge_mode,
            temporal_edge_mode=args.temporal_edge_mode,
            conv_rnn_type=args.conv_rnn_type)
        loss_head_module = space_time_conv_lstm
    elif args.conv_rnn_type == ConvRNNType.sep_conv_lstm:
        space_time_sep_conv_lstm = SpaceTimeSepConv(
            database=args.database,
            class_num=class_num,
            spatial_edge_mode=args.spatial_edge_mode,
            temporal_edge_mode=args.temporal_edge_mode)
        loss_head_module = space_time_sep_conv_lstm

    elif args.conv_rnn_type == ConvRNNType.fc_lstm:
        space_time_fc_lstm = SpaceTimeSepFcLSTM(
            database=args.database,
            class_num=class_num,
            spatial_edge_mode=args.spatial_edge_mode,
            temporal_edge_mode=args.temporal_edge_mode)
        loss_head_module = space_time_fc_lstm

    model = Wrapper(au_rcnn_train_chain_list,
                    loss_head_module,
                    args.database,
                    args.sample_frame,
                    use_feature_map=use_feature_map_res45,
                    two_stream_mode=args.two_stream_mode)
    batch_size = args.batch_size
    img_dataset = AUDataset(database=args.database,
                            fold=args.fold,
                            split_name='trainval',
                            split_index=args.split_idx,
                            mc_manager=mc_manager,
                            train_all_data=False)

    train_video_data = AU_video_dataset(
        au_image_dataset=img_dataset,
        sample_frame=args.sample_frame,
        train_mode=(args.two_stream_mode != TwoStreamMode.optical_flow),
        paper_report_label_idx=paper_report_label_idx,
    )

    Transform = Transform3D

    train_video_data = TransformDataset(train_video_data,
                                        Transform(au_rcnn, mirror=False))

    if args.proc_num == 1:
        train_iter = SerialIterator(train_video_data,
                                    batch_size * args.sample_frame,
                                    repeat=True,
                                    shuffle=False)
    else:
        train_iter = MultiprocessIterator(train_video_data,
                                          batch_size=batch_size *
                                          args.sample_frame,
                                          n_processes=args.proc_num,
                                          repeat=True,
                                          shuffle=False,
                                          n_prefetch=10,
                                          shared_mem=10000000)

    if len(args.gpu) > 1:
        for gpu in args.gpu:
            chainer.cuda.get_device_from_id(gpu).use()
    else:
        chainer.cuda.get_device_from_id(args.gpu[0]).use()
        model.to_gpu(args.gpu[0])

    optimizer = None
    if args.optimizer == 'AdaGrad':
        optimizer = chainer.optimizers.AdaGrad(
            lr=args.lr
        )  # 原本为MomentumSGD(lr=args.lr, momentum=0.9) 由于loss变为nan问题,改为AdaGrad
    elif args.optimizer == 'RMSprop':
        optimizer = chainer.optimizers.RMSprop(lr=args.lr)
    elif args.optimizer == 'Adam':
        optimizer = chainer.optimizers.Adam(alpha=args.lr)
    elif args.optimizer == 'SGD':
        optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    elif args.optimizer == "AdaDelta":
        optimizer = chainer.optimizers.AdaDelta()

    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    optimizer_name = args.optimizer

    key_str = "{0}_fold_{1}".format(args.fold, args.split_idx)
    file_list = []
    file_list.extend(os.listdir(args.out))
    snapshot_model_file_name = args.out + os.sep + filter_last_checkpoint_filename(
        file_list, "model", key_str)

    # BP4D_3_fold_1_resnet101@rnn@no_temporal@use_paper_num_label@roi_align@label_dep_layer@conv_lstm@sampleframe#13_model.npz
    use_paper_key_str = "use_paper_num_label" if args.use_paper_num_label else "all_avail_label"
    roi_align_key_str = "roi_align" if args.roi_align else "roi_pooling"
    label_dependency_layer_key_str = "label_dep_layer" if args.use_label_dependency else "no_label_dep"

    single_model_file_name = args.out + os.sep + \
                             '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@{7}@{8}@{9}@sampleframe#{10}_model.npz'.format(args.database,
                                                                                args.fold, args.split_idx,
                                                                                args.backbone, args.spatial_edge_mode,
                                                                                args.temporal_edge_mode,
                                                                                use_paper_key_str, roi_align_key_str,
                                                                                label_dependency_layer_key_str,
                                                                                 args.conv_rnn_type,args.sample_frame )#, args.label_win_size)
    print(single_model_file_name)
    pretrained_optimizer_file_name = args.out + os.sep +\
                             '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@{7}@{8}@{9}@sampleframe#{10}_optimizer.npz'.format(args.database,
                                                                                args.fold, args.split_idx,
                                                                                args.backbone, args.spatial_edge_mode,
                                                                                args.temporal_edge_mode,
                                                                                use_paper_key_str, roi_align_key_str,
                                                                                label_dependency_layer_key_str,
                                                                                args.conv_rnn_type, args.sample_frame)# args.label_win_size)
    print(pretrained_optimizer_file_name)

    if os.path.exists(pretrained_optimizer_file_name):
        print("loading optimizer snatshot:{}".format(
            pretrained_optimizer_file_name))
        chainer.serializers.load_npz(pretrained_optimizer_file_name, optimizer)

    if args.snap_individual:
        if os.path.exists(snapshot_model_file_name) and os.path.isfile(
                snapshot_model_file_name):
            print("loading pretrained snapshot:{}".format(
                snapshot_model_file_name))
            chainer.serializers.load_npz(snapshot_model_file_name, model)
    else:
        if os.path.exists(single_model_file_name):
            print("loading pretrained snapshot:{}".format(
                single_model_file_name))
            chainer.serializers.load_npz(single_model_file_name, model)

    if args.fix:
        au_rcnn = model.au_rcnn_train_chain.au_rcnn
        au_rcnn.extractor.conv1.W.update_rule.enabled = False
        au_rcnn.extractor.bn1.gamma.update_rule.enabled = False
        au_rcnn.extractor.bn1.beta.update_rule.enabled = False
        res2_names = ["a", "b1", "b2"]
        for res2_name in res2_names:
            if res2_name == "a":

                getattr(au_rcnn.extractor.res2,
                        res2_name).conv1.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn1.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn1.beta.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv2.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv3.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv4.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn2.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn2.beta.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn3.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn3.beta.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn4.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn4.beta.update_rule.enabled = False
            elif res2_name.startswith("b"):
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv1.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn1.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn1.beta.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv2.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv3.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn2.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn2.beta.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn3.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn3.beta.update_rule.enabled = False

    # if (args.spatial_edge_mode in [SpatialEdgeMode.ld_rnn, SpatialEdgeMode.bi_ld_rnn] or args.temporal_edge_mode in \
    #     [TemporalEdgeMode.ld_rnn, TemporalEdgeMode.bi_ld_rnn]) or (args.conv_rnn_type != ConvRNNType.conv_rcnn):
    #     updater = BPTTUpdater(train_iter, optimizer, converter=lambda batch, device: concat_examples(batch, device,
    #                           padding=0), device=args.gpu[0])

    if len(args.gpu) > 1:
        gpu_dict = {"main": args.gpu[0]}  # many gpu will use
        parallel_models = {"parallel": model.au_rcnn_train_chain}
        for slave_gpu in args.gpu[1:]:
            gpu_dict[slave_gpu] = int(slave_gpu)

        updater = PartialParallelUpdater(
            train_iter,
            optimizer,
            args.database,
            models=parallel_models,
            devices=gpu_dict,
            converter=lambda batch, device: concat_examples(
                batch, device, padding=0))
    else:
        print("only one GPU({0}) updater".format(args.gpu[0]))
        updater = chainer.training.StandardUpdater(
            train_iter,
            optimizer,
            device=args.gpu[0],
            converter=lambda batch, device: concat_examples(
                batch, device, padding=0))

    @training.make_extension(trigger=(1, "epoch"))
    def reset_order(trainer):
        print("reset dataset order after one epoch")
        if args.debug:
            trainer.updater._iterators[
                "main"].dataset._dataset.reset_for_debug_mode()
        else:
            trainer.updater._iterators[
                "main"].dataset._dataset.reset_for_train_mode()

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(reset_order)
    trainer.extend(chainer.training.extensions.snapshot_object(
        optimizer, filename=os.path.basename(pretrained_optimizer_file_name)),
                   trigger=(args.snapshot, 'iteration'))

    if not args.snap_individual:

        trainer.extend(chainer.training.extensions.snapshot_object(
            model, filename=os.path.basename(single_model_file_name)),
                       trigger=(args.snapshot, 'iteration'))

    else:
        snap_model_file_name = '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@{7}@{8}@{9}sampleframe#{10}@win#{11}_'.format(
            args.database, args.fold, args.split_idx, args.backbone,
            args.spatial_edge_mode, args.temporal_edge_mode, use_paper_key_str,
            roi_align_key_str, label_dependency_layer_key_str,
            args.conv_rnn_type, args.sample_frame, args.label_win_size)

        snap_model_file_name = snap_model_file_name + "{.updater.iteration}.npz"

        trainer.extend(chainer.training.extensions.snapshot_object(
            model, filename=snap_model_file_name),
                       trigger=(args.snapshot, 'iteration'))

    log_interval = 100, 'iteration'
    print_interval = 10, 'iteration'
    plot_interval = 10, 'iteration'
    if args.optimizer != "Adam" and args.optimizer != "AdaDelta":
        trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1),
                       trigger=(10, 'epoch'))
    elif args.optimizer == "Adam":
        trainer.extend(chainer.training.extensions.ExponentialShift(
            "alpha", 0.1, optimizer=optimizer),
                       trigger=(10, 'epoch'))
    if args.optimizer != "AdaDelta":
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
    trainer.extend(
        chainer.training.extensions.LogReport(
            trigger=log_interval,
            log_name="log_{0}_fold_{1}_{2}@{3}@{4}@{5}.log".format(
                args.fold, args.split_idx, args.backbone,
                args.spatial_edge_mode, args.temporal_edge_mode,
                args.conv_rnn_type)))
    # trainer.reporter.add_observer("main_par", model.loss_head_module)
    trainer.extend(chainer.training.extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/accuracy',
    ]),
                   trigger=print_interval)
    trainer.extend(
        chainer.training.extensions.ProgressBar(update_interval=100))

    if chainer.training.extensions.PlotReport.available():
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/loss'],
            file_name='loss_{0}_fold_{1}_{2}@{3}@{4}@{5}.png'.format(
                args.fold, args.split_idx, args.backbone,
                args.spatial_edge_mode, args.temporal_edge_mode,
                args.conv_rnn_type),
            trigger=plot_interval),
                       trigger=plot_interval)
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/accuracy'],
            file_name='accuracy_{0}_fold_{1}_{2}@{3}@{4}@{5}.png'.format(
                args.fold, args.split_idx, args.backbone,
                args.spatial_edge_mode, args.temporal_edge_mode,
                args.conv_rnn_type),
            trigger=plot_interval),
                       trigger=plot_interval)

    trainer.run()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)'
                        )  # open_crf layer only works for CPU mode
    parser.add_argument(
        "--model", "-m",
        help="pretrained model file path")  # which contains pretrained target
    parser.add_argument("--test",
                        "-tt",
                        default="",
                        help="test txt folder path")
    parser.add_argument("--database",
                        "-db",
                        default="BP4D",
                        help="which database you want to evaluate")
    parser.add_argument(
        "--check",
        "-ck",
        action="store_true",
        help=
        "default not to check the npy file and all list file generate correctly"
    )
    parser.add_argument("--num_attrib",
                        type=int,
                        default=2048,
                        help="feature dimension")
    parser.add_argument("--geo_num_attrib",
                        type=int,
                        default=4,
                        help='geometry feature dimension')
    parser.add_argument("--train_edge",
                        default="all",
                        help="all/spatio/temporal")
    parser.add_argument("--attn_heads", type=int, default=16)
    parser.add_argument("--layers",
                        type=int,
                        default=1,
                        help="layer number of edge/node rnn")
    parser.add_argument(
        "--bi_lstm",
        action="store_true",
        help="whether or not to use bi_lstm as edge/node rnn base")
    parser.add_argument(
        "--use_relation_net",
        action='store_true',
        help='whether to use st_relation_net instead of space_time_net')
    parser.add_argument(
        "--relation_net_lstm_first",
        action='store_true',
        help='whether to use relation_net_lstm_first_forward in st_relation_net'
    )

    args = parser.parse_args()
    adaptive_AU_database(args.database)
    mode_dict = extract_mode(args.model)

    paper_report_label = OrderedDict()
    if mode_dict["use_paper_report_label_num"]:
        for AU_idx, AU in sorted(config.AU_SQUEEZE.items(),
                                 key=lambda e: int(e[0])):
            if args.database == "BP4D":
                paper_use_AU = config.paper_use_BP4D
            elif args.database == "DISFA":
                paper_use_AU = config.paper_use_DISFA
            if AU in paper_use_AU:
                paper_report_label[AU_idx] = AU
    paper_report_label_idx = list(paper_report_label.keys())
    if not paper_report_label_idx:
        paper_report_label_idx = None

    test_dir = args.test if not args.test.endswith("/") else args.test[:-1]
    assert args.database in test_dir
    dataset = GlobalDataSet(num_attrib=args.num_attrib,
                            num_geo_attrib=args.geo_num_attrib,
                            train_edge=args.train_edge)  # ../data_info.json
    file_name = None
    for _file_name in os.listdir(args.test):
        if os.path.exists(args.test + os.sep +
                          _file_name) and _file_name.endswith(".txt"):
            file_name = args.test + os.sep + _file_name
            break
    sample = dataset.load_data(file_name,
                               npy_in_parent_dir=False,
                               paper_use_label_idx=paper_report_label_idx)
    print("pre load done")

    crf_pact_structure = CRFPackageStructure(
        sample, dataset, num_attrib=dataset.num_attrib_type, need_s_rnn=False)
    print("""
        ======================================
        gpu:{4}
        argument: 
                neighbor_mode:{0}
                spatial_edge_mode:{1}
                temporal_edge_mode:{2}
                use_geometry_features:{3}
                use_paper_report_label_num:{5}
        ======================================
        """.format(mode_dict["neighbor_mode"], mode_dict["spatial_edge_mode"],
                   mode_dict["temporal_edge_mode"],
                   mode_dict["use_geo_feature"], args.gpu,
                   mode_dict["use_paper_report_label_num"]))
    if args.use_relation_net:
        model = StRelationNetPlus(
            crf_pact_structure,
            in_size=dataset.num_attrib_type,
            out_size=dataset.label_bin_len,
            database=args.database,
            neighbor_mode=NeighborMode[mode_dict["neighbor_mode"]],
            spatial_edge_mode=SpatialEdgeMode[mode_dict["spatial_edge_mode"]],
            recurrent_block_type=RecurrentType[
                mode_dict["temporal_edge_mode"]],
            attn_heads=args.attn_heads,
            dropout=0.0,
            use_geometry_features=mode_dict["use_geo_feature"],
            layers=args.layers,
            bi_lstm=args.bi_lstm,
            lstm_first_forward=args.relation_net_lstm_first)
    else:
        model = StAttentioNetPlus(
            crf_pact_structure,
            dataset.num_attrib_type,
            dataset.label_bin_len,
            args.database,
            NeighborMode[mode_dict["neighbor_mode"]],
            SpatialEdgeMode[mode_dict["spatial_edge_mode"]],
            RecurrentType[mode_dict["temporal_edge_mode"]],
            attn_heads=args.attn_heads,
            dropout=0.0,
            use_geometry_features=mode_dict["use_geo_feature"],
            layers=args.layers,
            bi_lstm=args.bi_lstm)
    print("loading {}".format(args.model))
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu(args.gpu)
    with chainer.no_backprop_mode():
        test_data = GraphDataset(directory=test_dir,
                                 attrib_size=dataset.num_attrib_type,
                                 global_dataset=dataset,
                                 need_s_rnn=True,
                                 npy_in_parent_dir=False,
                                 need_cache_factor_graph=False,
                                 get_geometry_feature=True,
                                 paper_use_label_idx=paper_report_label_idx)
        test_iter = chainer.iterators.SerialIterator(test_data,
                                                     1,
                                                     shuffle=False,
                                                     repeat=False)
        au_evaluator = ActionUnitEvaluator(
            test_iter,
            model,
            args.gpu,
            database=args.database,
            paper_report_label=paper_report_label)
        observation = au_evaluator.evaluate()
        with open(
                os.path.dirname(args.model) + os.sep +
                "evaluation_result_{0}@{1}@{2}@{3}@{4}.json".format(
                    args.database, NeighborMode[mode_dict["neighbor_mode"]],
                    SpatialEdgeMode[mode_dict["spatial_edge_mode"]],
                    RecurrentType[mode_dict["temporal_edge_mode"]],
                    mode_dict["use_geo_feature"]), "w") as file_obj:
            file_obj.write(
                json.dumps(observation, indent=4, separators=(',', ': ')))
            file_obj.flush()
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser(
        description='generate Graph desc file script')
    parser.add_argument('--mean',
                        default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy",
                        help='image mean .npy file')
    parser.add_argument("--image",
                        default='C:/Users/machen/Downloads/tmp/face.jpg')
    parser.add_argument(
        "--model", default="C:/Users/machen/Downloads/tmp/BP4D_3_fold_1.npz")
    parser.add_argument("--pretrained_model_name",
                        '-premodel',
                        default='resnet101')
    parser.add_argument('--database', default='BP4D', help='Output directory')
    parser.add_argument('--device',
                        default=0,
                        type=int,
                        help='GPU device number')
    args = parser.parse_args()
    adaptive_AU_database(args.database)

    if args.pretrained_model_name == "resnet101":
        faster_rcnn = FasterRCNNResnet101(
            n_fg_class=len(config.AU_SQUEEZE),
            pretrained_model="resnet101",
            mean_file=args.mean,
            use_lstm=False,
            extract_len=1000
        )  # 可改为/home/machen/face_expr/result/snapshot_model.npz
    elif args.pretrained_model_name == "vgg":
        faster_rcnn = FasterRCNNVGG16(n_fg_class=len(config.AU_SQUEEZE),
                                      pretrained_model="imagenet",
                                      mean_file=args.mean,
                                      use_lstm=False,
                                      extract_len=1000)

    if os.path.exists(args.model):
        print("loading pretrained snapshot:{}".format(args.model))
        chainer.serializers.load_npz(args.model, faster_rcnn)
    if args.device >= 0:
        faster_rcnn.to_gpu(args.device)
        chainer.cuda.get_device_from_id(int(args.device)).use()

    heatmap_gen = HeatMapGenerator(np.load(args.model), use_relu=True)
    if args.device >= 0:
        heatmap_gen.to_gpu(args.device)
    cropped_face, AU_box_dict = FaceMaskCropper.get_cropface_and_box(
        args.image, args.image, channel_first=True)
    au_couple_dict = get_zip_ROI_AU()
    au_couple_child = get_AU_couple_child(
        au_couple_dict)  # AU couple tuple => child fetch list
    au_couple_box = dict()  # value is box (4 tuple coordinate) list

    for AU, AU_couple in au_couple_dict.items():
        au_couple_box[AU_couple] = AU_box_dict[AU]
    box_lst = []
    roi_no_AU_couple_dict = dict()
    roi_no = 0
    for AU_couple, couple_box_lst in au_couple_box.items():
        box_lst.extend(couple_box_lst)
        for _ in couple_box_lst:
            roi_no_AU_couple_dict[roi_no] = AU_couple
            roi_no += 1

    box_lst = np.asarray(box_lst)
    cropped_face = cropped_face.astype(np.float32)
    orig_face = cropped_face
    cropped_face = faster_rcnn.prepare(
        cropped_face)  # substract mean pixel value
    box_lst = box_lst.astype(np.float32)
    orig_box_lst = box_lst
    batch = [
        (cropped_face, box_lst),
    ]
    cropped_face, box_lst = concat_examples(
        batch, args.device)  # N,3, H, W, ;  N, F, 4

    if box_lst.shape[1] != config.BOX_NUM[args.database]:
        print("error box num {0} != {1}".format(box_lst.shape[1],
                                                config.BOX_NUM[args.database]))
        return
    with chainer.no_backprop_mode(), chainer.using_config("train", False):
        cropped_face = chainer.Variable(cropped_face)
        box_lst = chainer.Variable(box_lst)
        roi_preds, _ = faster_rcnn.predict(cropped_face, box_lst)  # R, 22
        roi_feature_maps = faster_rcnn.extract(orig_face, orig_box_lst,
                                               'res5')  # R, 2048 7,7

        roi_images = []
        box_lst = box_lst[0].data.astype(np.int32)
        for box in box_lst:
            y_min, x_min, y_max, x_max = box
            roi_image = orig_face[:, y_min:y_max + 1,
                                  x_min:x_max + 1]  # N, 3, roi_H, roi_W
            roi_images.append(roi_image)  # list of  N, 3, roi_H, roi_W
        cmap = plt.get_cmap('jet')
        # image_activate_map = np.zeros((cropped_face.shape[2], cropped_face.shape[3]), dtype=np.float32)
        for box_id, (roi_image, roi_feature_map) in enumerate(
                zip(roi_images, roi_feature_maps)):
            y_min, x_min, y_max, x_max = box_lst[box_id]
            # 22, roi_h, roi_w, 3
            xp = chainer.cuda.get_array_module(roi_feature_map)
            roi_feature_map = xp.expand_dims(roi_feature_map, 0)
            #   class_roi_overlay_img = 22, roi_h, roi_w
            class_roi_activate_img = heatmap_gen.generate_activate_roi_map(
                roi_feature_map, (y_max - y_min + 1, x_max - x_min + 1))
            roi_pred = roi_preds[box_id]  # 22
            # choice_activate_map = np.zeros((y_max-y_min+1, x_max-x_min+1), dtype=np.float32)
            # use_choice = False
            if len(np.nonzero(roi_pred)
                   [0]) > 0:  # TODO : 还要做做 class的选择,以及 heatmap采用cv2.add的模式相加
                class_idx = random.choice(np.nonzero(roi_pred)[0])
                AU = config.AU_SQUEEZE[class_idx]
                print(AU)
                choice_activate_map = class_roi_activate_img[
                    class_idx]  # roi_h, roi_w
                activation_color_map = np.round(
                    cmap(choice_activate_map)[:, :, :3] * 255).astype(np.uint8)
                overlay_img = roi_images[
                    box_id] / 2 + activation_color_map.transpose(2, 0, 1) / 2
                overlay_img = np.transpose(overlay_img,
                                           (1, 2, 0)).astype(np.uint8)
                vis_img = cv2.cvtColor(overlay_img, cv2.COLOR_RGB2BGR)
                cv2.imshow("new", vis_img)
                cv2.waitKey(0)
Beispiel #9
0
    for file_path in file_path_list:
        print("processing {}".format(file_path))
        with open(file_path, "r") as file_obj:
            for line in file_obj:
                path = line.split()[0]
                print("processing {}".format(path))
                database = line.split()[-1]
                abs_path = config.RGB_PATH[database] + "/" + path
                AU_group_box_area = stats_AU_group_area(
                    abs_path, mc_cached, database)
                for AU_couple, area in AU_group_box_area.items():

                    all_AU_group[AU_couple].append(area)

    for AU_couple, area_list in all_AU_group.items():
        print(AU_couple, sum(area_list) / len(area_list))


if __name__ == "__main__":
    database = "BP4D"
    file_path_list = [
        "/home/machen/dataset/{}/idx/3_fold/id_trainval_1.txt".format(
            database),
        "/home/machen/dataset/{}/idx/3_fold/id_test_1.txt".format(database)
    ]
    from collections_toolkit.memcached_manager import PyLibmcManager

    adaptive_AU_database(database)
    mc_manager = PyLibmcManager('127.0.0.1')

    read_idx_file(file_path_list, mc_manager)
Beispiel #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size', '-b', type=int, default=-1,
                        help='each batch size will be a new file')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='gpu that used to extract feature')

    parser.add_argument("--out_dir", '-o', default="/home/machen/dataset/new_graph/")
    parser.add_argument("--model",'-m', help="the AU R-CNN pretrained model file to load to extract feature")
    parser.add_argument("--trainval_test", '-tt', help="train or test")
    parser.add_argument("--database", default="BP4D")
    parser.add_argument('--use_memcached', action='store_true',
                        help='whether use memcached to boost speed of fetch crop&mask')  #
    parser.add_argument('--memcached_host', default='127.0.0.1')
    parser.add_argument('--force_write', action='store_true')
    parser.add_argument('--mean', default=config.ROOT_PATH + "BP4D/idx/mean_no_enhance.npy",
                        help='image mean .npy file')
    parser.add_argument('--jump_exist_file', action='store_true',
                        help='image mean .npy file')
    args = parser.parse_args()

    adaptive_AU_database(args.database)
    mc_manager = None
    if args.use_memcached:
        from collections_toolkit.memcached_manager import PyLibmcManager
        mc_manager = PyLibmcManager(args.memcached_host)
        if mc_manager is None:
            raise IOError("no memcached found listen in {}".format(args.memcached_host))

    result_dict = extract_mode(args.model)
    fold = result_dict["fold"]
    backbone = result_dict["backbone"]
    split_idx = result_dict["split_idx"]
    if backbone == 'vgg':
        faster_rcnn = FasterRCNNVGG16(n_fg_class=len(config.AU_SQUEEZE),
                                      pretrained_model="imagenet",
                                      mean_file=args.mean,
                                      use_lstm=False,
                                      extract_len=1000,
                                      fix=False)  # 可改为/home/nco/face_expr/result/snapshot_model.npz
    elif backbone == 'resnet101':
        faster_rcnn = FasterRCNNResnet101(n_fg_class=len(config.AU_SQUEEZE),
                                          pretrained_model=backbone,
                                          mean_file=args.mean,
                                          use_lstm=False,
                                          extract_len=1000, fix=False)
    assert os.path.exists(args.model)
    print("loading model file : {}".format(args.model))
    chainer.serializers.load_npz(args.model, faster_rcnn)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        faster_rcnn.to_gpu(args.gpu)

    dataset = AUExtractorDataset(database=args.database,
                           fold=fold, split_name=args.trainval_test,
                           split_index=split_idx, mc_manager=mc_manager, use_lstm=False,
                           train_all_data=False,
                           prefix="", pretrained_target="", pretrained_model=faster_rcnn, extract_key="avg_pool",
                           device=-1, batch_size=args.batch_size
                           )
    train_test = "train" if args.trainval_test == "trainval" else "test"
    jump_dataset = JumpExistFileDataset(dataset, args.out_dir, fold, args.database, split_idx,
                                        args.batch_size, train_test, args.jump_exist_file)
    dataset_iter = BatchKeepOrderIterator(jump_dataset, batch_size=args.batch_size, repeat=False, shuffle=False)

    file_key_counter = 0
    last_sequence_key = None
    for batch in dataset_iter:
        features = []
        bboxes = []
        labels = []
        file_key_counter += 1
        for idx, (feature, bbox, label, img_path, _file_key_counter) in enumerate(batch):

            sequence_key = "_".join((img_path.split("/")[-3], img_path.split("/")[-2]))
            if last_sequence_key is None:
                last_sequence_key = sequence_key
            if sequence_key!=last_sequence_key:
                file_key_counter = 1
                last_sequence_key = sequence_key
            assert file_key_counter == _file_key_counter, (file_key_counter, _file_key_counter, img_path)
            if feature is None:
                print("jump img_path : {}".format(img_path))
                continue

            features.extend(feature)
            bboxes.extend(bbox)
            labels.extend(label)
        if features:
            if args.trainval_test == "trainval":
                file_name = args.out_dir + os.sep + "{0}_{1}_fold_{2}".format(args.database,fold, split_idx) + "/train" +os.sep +sequence_key + "@" + str(file_key_counter) + ".npz"
            else:
                file_name = args.out_dir + os.sep + "{0}_{1}_fold_{2}".format(args.database,fold, split_idx) + "/test" + os.sep +sequence_key + "@" + str(file_key_counter) + ".npz"

            os.makedirs(os.path.dirname(file_name), exist_ok=True)
            features = np.stack(features)
            bboxes = np.stack(bboxes)
            labels = np.stack(labels)
            print("write : {}".format(file_name))
            assert not os.path.exists(file_name), file_name
            np.savez(file_name, feature=features, bbox=bboxes, label=labels)
Beispiel #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')  # open_crf layer only works for CPU mode
    parser.add_argument("--target_dir", "-t", default="result", help="pretrained model file path") # which contains pretrained target
    parser.add_argument("--test", "-tt", default="", help="test txt folder path")
    parser.add_argument("--hidden_size", "-hs",default=1024, type=int, help="hidden_size of srnn++")
    parser.add_argument("--database","-db",default="BP4D", help="which database you want to evaluate")
    parser.add_argument("--bi_lstm","-bi", action="store_true", help="srnn++ use bi_lstm or not, if pretrained model use bi_lstm, you must set this flag on")
    parser.add_argument("--check", "-ck", action="store_true", help="default not to check the npy file and all list file generate correctly")
    parser.add_argument("--num_attrib",type=int,default=2048, help="feature dimension")
    parser.add_argument("--train_edge",default="all",help="all/spatio/temporal")
    args = parser.parse_args()
    adaptive_AU_database(args.database)
    test_dir = args.test if not args.test.endswith("/") else args.test[:-1]
    assert args.database in test_dir
    dataset = GlobalDataSet(num_attrib=args.num_attrib, train_edge=args.train_edge) # ../data_info.json
    file_name = None
    for folder in os.listdir(args.test):
        if os.path.isdir(args.test + os.sep + folder):
            for _file_name in os.listdir(args.test + os.sep + folder):
                file_name = args.test + os.sep + folder  + os.sep +_file_name
                break
            break
    sample = dataset.load_data(file_name)
    print("pre load done")


    target_dict = {}
    need_srnn = False
    use_crf = False
    for model_path in os.listdir(args.target_dir):  # all model pretrained file in 3_fold_1's one folder, 3_fold_2 in another folder
        if model_path.endswith("model.npz"):
            assert ("opencrf" in model_path or "srnn_plus" in model_path)
            if "opencrf" in model_path:
                assert need_srnn == False
                use_crf = True
                # note that open_crf layer doesn't support GPU
                crf_pact_structure = CRFPackageStructure(sample, dataset, num_attrib=dataset.num_attrib_type, need_s_rnn=False)
                model = OpenCRFLayer(node_in_size=dataset.num_attrib_type, weight_len=crf_pact_structure.num_feature)
                print("loading {}".format(args.target_dir + os.sep + model_path, model))
                chainer.serializers.load_npz(args.target_dir + os.sep + model_path, model)
            elif "srnn_plus" in model_path:
                crf_pact_structure = CRFPackageStructure(sample, dataset, num_attrib=args.hidden_size, need_s_rnn=True)
                with_crf = "crf" in model_path
                need_srnn = True
                model = StructuralRNNPlus(crf_pact_structure, in_size=dataset.num_attrib_type,
                                          out_size=dataset.num_label,
                                          hidden_size=args.hidden_size, with_crf=with_crf,
                                          use_bi_lstm=args.bi_lstm)  # if you train bi_lstm model in pretrained model, this time you need to use bi_lstm = True
                print("loading {}".format(args.target_dir + os.sep + model_path))
                chainer.serializers.load_npz(args.target_dir + os.sep + model_path, model)
                if args.gpu >= 0:
                    chainer.cuda.get_device_from_id(args.gpu).use()
                    model.to_gpu(args.gpu)
                    if with_crf:
                        model.open_crf.to_cpu()
            trainer_keyword_pattern = re.compile(".*?((\d+_)+)_*")
            matcher = trainer_keyword_pattern.match(model_path)
            assert matcher
            trainer_keyword = matcher.group(1)[:-1]
            target_dict[trainer_keyword] = model
    if len(target_dict) == 0:
        print("error , no pretrained npz file in {}".format(args.target_dir))
        return
    if args.check:
        check_pretrained_model_match_file(target_dict, args.test)
    with chainer.no_backprop_mode():
        test_data = GraphDataset(directory=args.test, attrib_size=args.hidden_size, global_dataset=dataset,
                                 need_s_rnn=need_srnn, need_cache_factor_graph=False, target_dict=target_dict)  # if there is one file that use structural_rnn, all the pact_structure need structural_rnn
        test_iter = chainer.iterators.SerialIterator(test_data, 1, shuffle=False, repeat=False)
        gpu = args.gpu if not use_crf else -1
        print('using gpu :{}'.format(gpu))
        chainer.config.train = False
        au_evaluator = ActionUnitRoILabelSplitEvaluator(test_iter, target_dict, device=gpu, database=args.database)
        observation = au_evaluator.evaluate()
        with open(args.target_dir + os.sep + "evaluation_result.json", "w") as file_obj:
            file_obj.write(json.dumps(observation, indent=4, separators=(',', ': ')))
            file_obj.flush()
Beispiel #12
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--step_size',
                        '-ss',
                        type=int,
                        default=3000,
                        help='step_size for lr exponential')
    parser.add_argument('--gradclip',
                        '-c',
                        type=float,
                        default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--pretrain',
                        '-pr',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--snapshot',
                        '-snap',
                        type=int,
                        default=100,
                        help='snapshot iteration for save checkpoint')
    parser.add_argument('--test_mode',
                        action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.add_argument('--valid',
                        '-val',
                        default='',
                        help='Test directory path contains test txt file')
    parser.add_argument('--test',
                        '-tt',
                        default='graph_test',
                        help='Test directory path contains test txt file')
    parser.add_argument('--train',
                        '-tr',
                        default="D:/toy/",
                        help='Train directory path contains train txt file')
    parser.add_argument('--train_edge',
                        default="all",
                        help="train temporal/all to comparision")
    parser.add_argument('--database', default="BP4D", help="BP4D/DISFA")
    parser.add_argument(
        '--use_pure_python',
        action='store_true',
        help=
        'you can use pure python code to check whether your optimized code works correctly'
    )
    parser.add_argument('--lr', '-l', type=float, default=0.1)
    parser.add_argument("--profile",
                        "-p",
                        action="store_true",
                        help="whether to profile to examine speed bottleneck")
    parser.add_argument("--num_attrib",
                        type=int,
                        default=2048,
                        help="node feature dimension")
    parser.add_argument("--need_cache_graph",
                        "-ng",
                        action="store_true",
                        help="whether to cache factor graph to LRU cache")
    parser.add_argument("--eval_mode",
                        '-eval',
                        action="store_true",
                        help="whether to evaluation or not")
    parser.add_argument("--proc_num", "-pn", type=int, default=1)
    parser.add_argument("--resume",
                        action="store_true",
                        help="resume from pretrained model")
    parser.set_defaults(test=False)
    args = parser.parse_args()
    config.OPEN_CRF_CONFIG["use_pure_python"] = args.use_pure_python
    # because we modify config.OPEN_CRF_CONFIG thus will influence the open_crf layer
    from graph_learning.dataset.crf_pact_structure import CRFPackageStructure
    from graph_learning.dataset.graph_dataset import GraphDataset
    from graph_learning.extensions.opencrf_evaluator import OpenCRFEvaluator
    from graph_learning.dataset.graph_dataset_reader import GlobalDataSet
    from graph_learning.updater.bptt_updater import convert
    from graph_learning.extensions.AU_roi_label_split_evaluator import ActionUnitEvaluator
    if args.use_pure_python:

        from graph_learning.model.open_crf.pure_python.open_crf_layer import OpenCRFLayer
    else:
        from graph_learning.model.open_crf.cython.open_crf_layer import OpenCRFLayer

    print_interval = 1, 'iteration'
    val_interval = (5, 'iteration')
    adaptive_AU_database(args.database)
    root_dir = os.path.dirname(os.path.dirname(args.train))
    dataset = GlobalDataSet(num_attrib=args.num_attrib,
                            train_edge=args.train_edge)
    file_name = list(
        filter(lambda e: e.endswith(".txt"), os.listdir(args.train)))[0]
    sample = dataset.load_data(args.train + os.sep + file_name)
    print("pre load done")

    crf_pact_structure = CRFPackageStructure(
        sample, dataset, num_attrib=dataset.num_attrib_type, need_s_rnn=False)
    model = OpenCRFLayer(node_in_size=dataset.num_attrib_type,
                         weight_len=crf_pact_structure.num_feature)

    train_str = args.train
    if train_str[-1] == "/":
        train_str = train_str[:-1]
    trainer_keyword = os.path.basename(train_str)
    trainer_keyword_tuple = tuple(trainer_keyword.split("_"))
    LABEL_SPLIT = config.BP4D_LABEL_SPLIT if args.database == "BP4D" else config.DISFA_LABEL_SPLIT
    if trainer_keyword_tuple not in LABEL_SPLIT:
        return
    # assert "_" in trainer_keyword

    train_data = GraphDataset(args.train,
                              attrib_size=dataset.num_attrib_type,
                              global_dataset=dataset,
                              need_s_rnn=False,
                              need_cache_factor_graph=args.need_cache_graph,
                              get_geometry_feature=False)
    if args.proc_num == 1:
        train_iter = chainer.iterators.SerialIterator(train_data,
                                                      1,
                                                      shuffle=True)
    elif args.proc_num > 1:
        train_iter = chainer.iterators.MultiprocessIterator(
            train_data,
            batch_size=1,
            n_processes=args.proc_num,
            repeat=True,
            shuffle=True,
            n_prefetch=10,
            shared_mem=31457280)
    optimizer = chainer.optimizers.SGD(lr=args.lr)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    updater = StandardUpdater(train_iter, optimizer, converter=convert)
    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    interval = 1
    if args.test_mode:
        chainer.config.train = False

    trainer.extend(
        PrintReport([
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            "opencrf_val/main/hit",  #"opencrf_validation/main/U_hit",
            "opencrf_val/main/miss",  #"opencrf_validation/main/U_miss",
            "opencrf_val/main/F1",  #"opencrf_validation/main/U_F1"
            'opencrf_val/main/accuracy',
        ]),
        trigger=print_interval)
    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=print_interval)
    trainer.extend(
        chainer.training.extensions.LogReport(
            trigger=print_interval,
            log_name="open_crf_{}.log".format(trainer_keyword)))

    optimizer_snapshot_name = "{0}_{1}_opencrf_optimizer.npz".format(
        trainer_keyword, args.database)
    model_snapshot_name = "{0}_{1}_opencrf_model.npz".format(
        trainer_keyword, args.database)
    trainer.extend(chainer.training.extensions.snapshot_object(
        optimizer, filename=optimizer_snapshot_name),
                   trigger=(args.snapshot, 'iteration'))

    trainer.extend(chainer.training.extensions.snapshot_object(
        model, filename=model_snapshot_name),
                   trigger=(args.snapshot, 'iteration'))

    if args.resume and os.path.exists(args.out + os.sep + model_snapshot_name):
        print("loading model_snapshot_name to model")
        chainer.serializers.load_npz(args.out + os.sep + model_snapshot_name,
                                     model)
    if args.resume and os.path.exists(args.out + os.sep +
                                      optimizer_snapshot_name):
        print("loading optimizer_snapshot_name to optimizer")
        chainer.serializers.load_npz(
            args.out + os.sep + optimizer_snapshot_name, optimizer)

    # trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1))
    # trainer.extend(chainer.training.extensions.snapshot(),
    #                trigger=(args.snapshot, 'epoch'))

    # trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.9), trigger=(1, 'epoch'))

    if chainer.training.extensions.PlotReport.available():
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/loss'],
            file_name="{}_train_loss.png".format(trainer_keyword)),
                       trigger=(100, "iteration"))
        trainer.extend(chainer.training.extensions.PlotReport(
            ['opencrf_val/F1', 'opencrf_val/accuracy'],
            file_name="{}_val_f1.png".format(trainer_keyword)),
                       trigger=val_interval)

    if args.valid:
        valid_data = GraphDataset(
            args.valid,
            attrib_size=dataset.num_attrib_type,
            global_dataset=dataset,
            need_s_rnn=False,
            need_cache_factor_graph=args.need_cache_graph)
        validate_iter = chainer.iterators.SerialIterator(valid_data,
                                                         1,
                                                         repeat=False,
                                                         shuffle=False)
        evaluator = OpenCRFEvaluator(iterator=validate_iter,
                                     target=model,
                                     device=-1)
        trainer.extend(evaluator, trigger=val_interval)

    if args.profile:
        cProfile.runctx("trainer.run()", globals(), locals(), "Profile.prof")
        s = pstats.Stats("Profile.prof")
        s.strip_dirs().sort_stats("time").print_stats()
    else:
        trainer.run()
Beispiel #13
0
def main():
    parser = argparse.ArgumentParser(
        description='train script of Time-axis R-CNN:')
    parser.add_argument('--pid', '-pp', default='/tmp/SpaceTime_AU_R_CNN/')
    parser.add_argument('--gpu', '-g', type=int, help='GPU ID')
    parser.add_argument('--lr', '-l', type=float, default=0.0001)
    parser.add_argument('--out',
                        '-o',
                        default='output_time_axis_rcnn',
                        help='Output directory')
    parser.add_argument('--database',
                        default='BP4D',
                        help='Output directory: BP4D/DISFA/BP4D_DISFA')
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    parser.add_argument('--optimizer',
                        type=OptimizerType,
                        choices=list(OptimizerType))
    parser.add_argument('--epoch', '-e', type=int, default=20)
    parser.add_argument('--batch_size', '-bs', type=int, default=1)
    parser.add_argument('--feature_dim', type=int, default=2048)
    parser.add_argument('--roi_size', type=int, default=7)
    parser.add_argument('--snapshot', '-snap', type=int, default=5)
    parser.add_argument("--fold", '-fd', type=int, default=3)
    parser.add_argument('--two_stream_mode',
                        type=TwoStreamMode,
                        choices=list(TwoStreamMode),
                        help='rgb_flow/ optical_flow/ rgb')
    parser.add_argument("--faster_backbone",
                        type=FasterBackboneType,
                        choices=list(FasterBackboneType),
                        help='tcn/conv1d')
    parser.add_argument("--data_dir", type=str, default="/extract_features")
    parser.add_argument("--conv_layers", type=int, default=10)
    parser.add_argument("--split_idx", '-sp', type=int, default=1)
    parser.add_argument("--use_paper_num_label",
                        action="store_true",
                        help="only to use paper reported number of labels"
                        " to train")

    parser.add_argument("--proc_num", "-proc", type=int, default=1)
    args = parser.parse_args()
    args.data_dir = config.ROOT_PATH + "/" + args.data_dir
    os.makedirs(args.pid, exist_ok=True)
    os.makedirs(args.out, exist_ok=True)
    pid = str(os.getpid())
    pid_file_path = args.pid + os.path.sep + "{0}_{1}_fold_{2}.pid".format(
        args.database, args.fold, args.split_idx)
    with open(pid_file_path, "w") as file_obj:
        file_obj.write(pid)
        file_obj.flush()

    print('GPU: {}'.format(args.gpu))

    adaptive_AU_database(args.database)

    paper_report_label, class_num = squeeze_label_num_report(
        args.database, args.use_paper_num_label)
    paper_report_label_idx = list(paper_report_label.keys())

    if args.faster_backbone == FasterBackboneType.tcn:
        Bone = TcnBackbone
    elif args.faster_backbone == FasterBackboneType.conv1d:
        Bone = FasterBackbone

    if args.two_stream_mode == TwoStreamMode.rgb or args.two_stream_mode == TwoStreamMode.optical_flow:
        faster_extractor_backbone = Bone(args.conv_layers, args.feature_dim,
                                         1024)
        faster_head_module = FasterHeadModule(
            args.feature_dim, class_num + 1, args.roi_size
        )  # note that the class number here must include background
        initialW = chainer.initializers.Normal(0.001)
        spn = SegmentProposalNetwork(1024,
                                     n_anchors=len(config.ANCHOR_SIZE),
                                     initialW=initialW)
        train_chain = TimeSegmentRCNNTrainChain(faster_extractor_backbone,
                                                faster_head_module, spn)
        model = Wrapper(train_chain, two_stream_mode=args.two_stream_mode)

    elif args.two_stream_mode == TwoStreamMode.rgb_flow:
        faster_extractor_backbone = Bone(args.conv_layers, args.feature_dim,
                                         1024)
        faster_head_module = FasterHeadModule(
            args.feature_dim, class_num + 1, args.roi_size
        )  # note that the class number here must include background
        initialW = chainer.initializers.Normal(0.001)
        spn = SegmentProposalNetwork(1024,
                                     n_anchors=len(config.ANCHOR_SIZE),
                                     initialW=initialW)
        train_chain = TimeSegmentRCNNTrainChain(faster_extractor_backbone,
                                                faster_head_module, spn)

        # faster_extractor_backbone_flow = FasterBackbone(args.database, args.conv_layers, args.feature_dim, 1024)
        # faster_head_module_flow = FasterHeadModule(1024, class_num + 1,
        #                                       args.roi_size)  # note that the class number here must include background
        # initialW = chainer.initializers.Normal(0.001)
        # spn_flow = SegmentProposalNetwork(1024, n_anchors=len(config.ANCHOR_SIZE), initialW=initialW)
        # train_chain_flow = TimeSegmentRCNNTrainChain(faster_extractor_backbone_flow, faster_head_module_flow, spn_flow)
        # time_seg_train_chain_list = [train_chain_rgb, train_chain_flow]
        model = Wrapper(train_chain, two_stream_mode=args.two_stream_mode)

    if args.gpu >= 0:
        model.to_gpu(args.gpu)
        chainer.cuda.get_device(args.gpu).use()

    optimizer = None
    if args.optimizer == OptimizerType.AdaGrad:
        optimizer = chainer.optimizers.AdaGrad(
            lr=args.lr
        )  # 原本为MomentumSGD(lr=args.lr, momentum=0.9) 由于loss变为nan问题,改为AdaGrad
    elif args.optimizer == OptimizerType.RMSprop:
        optimizer = chainer.optimizers.RMSprop(lr=args.lr)
    elif args.optimizer == OptimizerType.Adam:
        optimizer = chainer.optimizers.Adam(alpha=args.lr)
    elif args.optimizer == OptimizerType.SGD:
        optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    elif args.optimizer == OptimizerType.AdaDelta:
        optimizer = chainer.optimizers.AdaDelta()

    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    data_dir = args.data_dir + "/{0}_{1}_fold_{2}/train".format(
        args.database, args.fold, args.split_idx)
    dataset = NpzFeatureDataset(data_dir,
                                args.database,
                                two_stream_mode=args.two_stream_mode,
                                T=10.0,
                                use_mirror_data=True)

    dataset = TransformDataset(dataset, Transform(mirror=True))

    if args.proc_num == 1:
        train_iter = SerialIterator(dataset,
                                    args.batch_size,
                                    repeat=True,
                                    shuffle=True)
    else:
        train_iter = MultiprocessIterator(dataset,
                                          batch_size=args.batch_size,
                                          n_processes=args.proc_num,
                                          repeat=True,
                                          shuffle=True,
                                          n_prefetch=10,
                                          shared_mem=10000000)

    # BP4D_3_fold_1_resnet101@rnn@no_temporal@use_paper_num_label@roi_align@label_dep_layer@conv_lstm@sampleframe#13_model.npz
    use_paper_classnum = "use_paper_num_label" if args.use_paper_num_label else "all_avail_label"

    model_file_name = args.out + os.path.sep + \
                             'time_axis_rcnn_{0}_{1}_fold_{2}@{3}@{4}@{5}@{6}_model.npz'.format(args.database,
                                                                                args.fold, args.split_idx,
                                                                                use_paper_classnum, args.two_stream_mode,
                                                                                            args.conv_layers, args.faster_backbone)
    print(model_file_name)
    pretrained_optimizer_file_name = args.out + os.path.sep +\
                             'time_axis_rcnn_{0}_{1}_fold_{2}@{3}@{4}@{5}@{6}_optimizer.npz'.format(args.database,
                                                                                args.fold, args.split_idx,
                                                                                 use_paper_classnum, args.two_stream_mode,
                                                                                                args.conv_layers,args.faster_backbone)
    print(pretrained_optimizer_file_name)

    if os.path.exists(pretrained_optimizer_file_name):
        print("loading optimizer snatshot:{}".format(
            pretrained_optimizer_file_name))
        chainer.serializers.load_npz(pretrained_optimizer_file_name, optimizer)

    if os.path.exists(model_file_name):
        print("loading pretrained snapshot:{}".format(model_file_name))
        chainer.serializers.load_npz(model_file_name,
                                     model.time_seg_train_chain)

    print("only one GPU({0}) updater".format(args.gpu))
    updater = chainer.training.StandardUpdater(
        train_iter,
        optimizer,
        device=args.gpu,
        converter=lambda batch, device: concat_examples_not_string(
            batch, device, padding=0))

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(chainer.training.extensions.snapshot_object(
        optimizer, filename=os.path.basename(pretrained_optimizer_file_name)),
                   trigger=(args.snapshot, 'epoch'))

    trainer.extend(chainer.training.extensions.snapshot_object(
        model.time_seg_train_chain,
        filename=os.path.basename(model_file_name)),
                   trigger=(args.snapshot, 'epoch'))

    log_interval = 100, 'iteration'
    print_interval = 100, 'iteration'
    plot_interval = 100, 'iteration'
    if args.optimizer != "Adam" and args.optimizer != "AdaDelta":
        trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1),
                       trigger=(20, 'epoch'))
    elif args.optimizer == "Adam":
        trainer.extend(chainer.training.extensions.ExponentialShift(
            "alpha", 0.1, optimizer=optimizer),
                       trigger=(10, 'epoch'))
    if args.optimizer != "AdaDelta":
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
    trainer.extend(
        chainer.training.extensions.LogReport(
            trigger=log_interval,
            log_name="log_{0}_{1}_{2}_fold_{3}_{4}.log".format(
                args.faster_backbone, args.database, args.fold, args.split_idx,
                use_paper_classnum)))
    trainer.extend(chainer.training.extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/roi_loc_loss',
        'main/roi_cls_loss',
        'main/rpn_loc_loss',
        'main/rpn_cls_loss',
        'main/accuracy',
        'main/rpn_accuracy',
    ]),
                   trigger=print_interval)
    trainer.extend(
        chainer.training.extensions.ProgressBar(update_interval=100))

    if chainer.training.extensions.PlotReport.available():
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/loss'],
            file_name='loss_{0}_{1}_fold_{2}_{3}.png'.format(
                args.database, args.fold, args.split_idx, use_paper_classnum),
            trigger=plot_interval),
                       trigger=plot_interval)
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/accuracy'],
            file_name='accuracy_{0}_{1}_fold_{2}_{3}.png'.format(
                args.database, args.fold, args.split_idx, use_paper_classnum),
            trigger=plot_interval),
                       trigger=plot_interval)

    trainer.run()
Beispiel #14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--epoch', '-e', type=int, default=25,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')  # open_crf layer only works for CPU mode
    parser.add_argument('--step_size', '-ss', type=int, default=3000,
                        help='step_size for lr exponential')
    parser.add_argument('--gradclip', '-c', type=float, default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--snapshot', '-snap', type=int, default=1, help='snapshot epochs for save checkpoint')
    parser.add_argument('--test_mode', action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.add_argument("--test", '-tt', default='test',help='Test directory path contains test txt file to evaluation')
    parser.add_argument('--train', '-t', default="train",
                        help='Train directory path contains train txt file')
    parser.add_argument('--database',  default="BP4D",
                        help='database to train for')
    parser.add_argument('--lr', '-l', type=float, default=0.01)
    parser.add_argument('--neighbor_mode', type=NeighborMode, choices=list(NeighborMode), help='1:concat_all,2:attention_fuse,3:random_neighbor,4.no_neighbor')
    parser.add_argument('--spatial_edge_mode', type=SpatialEdgeMode, choices=list(SpatialEdgeMode), help='1:all_edge, 2:configure_edge, 3:no_edge')
    parser.add_argument('--temporal_edge_mode',type=RecurrentType, choices=list(RecurrentType), help='1:rnn, 2:attention_block, 3.point-wise feed forward(no temporal)')
    parser.add_argument("--use_relation_net", action='store_true', help='whether to use st_relation_net instead of space_time_net')
    parser.add_argument("--relation_net_lstm_first", action='store_true',
                        help='whether to use relation_net_lstm_first_forward in st_relation_net')
    parser.add_argument('--use_geometry_features',action='store_true', help='whether to use geometry features')
    parser.add_argument("--num_attrib", type=int, default=2048, help="number of dimension of each node feature")
    parser.add_argument('--geo_num_attrib', type=int, default=4, help='geometry feature length')
    parser.add_argument('--attn_heads', type=int, default=16, help='attention heads number')
    parser.add_argument('--layers', type=int, default=1, help='edge rnn and node rnn layer')
    parser.add_argument("--use_paper_num_label", action="store_true", help="only to use paper reported number of labels"
                                                                           " to train")
    parser.add_argument("--bi_lstm", action="store_true", help="whether to use bi-lstm as Edge/Node RNN")
    parser.add_argument('--weight_decay',type=float,default=0.0005, help="weight decay")
    parser.add_argument("--proc_num",'-proc', type=int,default=1, help="process number of dataset reader")
    parser.add_argument("--resume",action="store_true", help="whether to load npz pretrained file")
    parser.add_argument('--resume_model', '-rm', help='The relative path to restore model file')
    parser.add_argument("--snap_individual", action="store_true", help='whether to snap shot each fixed step into '
                                                                       'individual model file')
    parser.add_argument("--vis", action='store_true', help='whether to visualize computation graph')



    parser.set_defaults(test=False)
    args = parser.parse_args()
    if args.use_relation_net:
        args.out += "_relationnet"
        print("output file to : {}".format(args.out))
    print_interval = 1, 'iteration'
    val_interval = 5, 'iteration'
    print("""
    ======================================
        argument: 
            neighbor_mode:{0}
            spatial_edge_mode:{1}
            temporal_edge_mode:{2}
            use_geometry_features:{3}
    ======================================
    """.format(args.neighbor_mode, args.spatial_edge_mode, args.temporal_edge_mode, args.use_geometry_features))
    adaptive_AU_database(args.database)
    # for the StructuralRNN constuctor need first frame factor graph_backup
    dataset = GlobalDataSet(num_attrib=args.num_attrib, num_geo_attrib=args.geo_num_attrib,
                            train_edge="all")
    file_name = list(filter(lambda e: e.endswith(".txt"), os.listdir(args.train)))[0]

    paper_report_label = OrderedDict()
    if args.use_paper_num_label:
        for AU_idx,AU in sorted(config.AU_SQUEEZE.items(), key=lambda e:int(e[0])):
            if args.database == "BP4D":
                paper_use_AU = config.paper_use_BP4D
            elif args.database =="DISFA":
                paper_use_AU = config.paper_use_DISFA
            if AU in paper_use_AU:
                paper_report_label[AU_idx] = AU
    paper_report_label_idx = list(paper_report_label.keys())
    if not paper_report_label_idx:
        paper_report_label_idx = None


    sample = dataset.load_data(args.train + os.sep + file_name, npy_in_parent_dir=False,
                               paper_use_label_idx=paper_report_label_idx)  # we load first sample for construct S-RNN, it must passed to constructor argument
    crf_pact_structure = CRFPackageStructure(sample, dataset, num_attrib=dataset.num_attrib_type)  # 只读取其中的一个视频的第一帧,由于node个数相对稳定,因此可以construct RNN
    # 因为我们用多分类的hinge loss,所以需要num_label是来自于2进制形式的label数+1(+1代表全0)\

    if args.use_relation_net:
        model = StRelationNetPlus(crf_pact_structure, in_size=dataset.num_attrib_type, out_size=dataset.label_bin_len,
                              database=args.database, neighbor_mode=args.neighbor_mode,
                              spatial_edge_mode=args.spatial_edge_mode, recurrent_block_type=args.temporal_edge_mode,
                              attn_heads=args.attn_heads, dropout=0.5, use_geometry_features=args.use_geometry_features,
                              layers=args.layers, bi_lstm=args.bi_lstm, lstm_first_forward=args.relation_net_lstm_first)
    else:
        model = StAttentioNetPlus(crf_pact_structure, in_size=dataset.num_attrib_type, out_size=dataset.label_bin_len,
                              database=args.database, neighbor_mode=args.neighbor_mode,
                              spatial_edge_mode=args.spatial_edge_mode, recurrent_block_type=args.temporal_edge_mode,
                              attn_heads=args.attn_heads, dropout=0.5, use_geometry_features=args.use_geometry_features,
                              layers=args.layers, bi_lstm=args.bi_lstm)

    # note that the following code attrib_size will be used by open_crf for parameter number, thus we cannot pass dataset.num_attrib_type!
    train_data = GraphDataset(args.train, attrib_size=dataset.num_attrib_type, global_dataset=dataset, need_s_rnn=True,
                              need_cache_factor_graph=False, npy_in_parent_dir=False, get_geometry_feature=True,
                              paper_use_label_idx=paper_report_label_idx)  # train 传入文件夹

    train_iter = chainer.iterators.SerialIterator(train_data, 1, shuffle=True, repeat=True)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        if args.use_relation_net:
            model.st_relation_net.to_gpu(args.gpu)
        else:
            model.st_attention_net.to_gpu(args.gpu)

    specific_key = "all_AU_train"
    if paper_report_label_idx:
        specific_key = "paper_AU_num_train"

    optimizer_snapshot_name = "{0}@{1}@st_attention_network_optimizer@{2}@{3}@{4}@{5}.npz".format(args.database,
                                                                                            specific_key,
                                                                                              args.neighbor_mode,
                                                                                              args.spatial_edge_mode,
                                                                                              args.temporal_edge_mode,
                                                                                              "use_geo" if args.use_geometry_features else "no_geo")
    model_snapshot_name = "{0}@{1}@st_attention_network_model@{2}@{3}@{4}@{5}.npz".format(args.database,
                                                                                          specific_key,
                                                                                      args.neighbor_mode,
                                                                                      args.spatial_edge_mode,
                                                                                      args.temporal_edge_mode,
                                                                                      "use_geo" if args.use_geometry_features else "no_geo")
    if args.snap_individual:
        model_snapshot_name = "{0}@{1}@st_attention_network_model_snapshot_".format(args.database,specific_key)
        model_snapshot_name += "{.updater.iteration}"
        model_snapshot_name += "@{0}@{1}@{2}@{3}.npz".format(args.neighbor_mode,
                                                             args.spatial_edge_mode,
                                                             args.temporal_edge_mode,
                                                             "use_geo" if args.use_geometry_features else "no_geo")
    if os.path.exists(args.out + os.sep + model_snapshot_name):
        print("found trained model file. load trained file: {}".format(args.out + os.sep + model_snapshot_name))
        chainer.serializers.load_npz(args.out + os.sep + model_snapshot_name, model)

    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    # optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))
    # optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay))
    updater = BPTTUpdater(train_iter, optimizer, int(args.gpu))
    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    interval = (1, 'iteration')
    if args.test_mode:
        chainer.config.train = False
    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=print_interval)
    trainer.extend(chainer.training.extensions.PrintReport(
        ['iteration', 'epoch', 'elapsed_time', 'lr',
         'main/loss', "main/accuracy",
         ]), trigger=print_interval)

    log_name = "st_attention_network_{0}@{1}@{2}@{3}@{4}.log".format(args.database,
                                                                      args.neighbor_mode,
                                                                      args.spatial_edge_mode,
                                                                      args.temporal_edge_mode,
                                                                "use_geo" if args.use_geometry_features else "no_geo")

    trainer.extend(chainer.training.extensions.LogReport(trigger=interval,log_name=log_name))
    # trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1, training_length=(args.epoch, 'epoch')))

    trainer.extend(
        chainer.training.extensions.snapshot_object(optimizer,
                                                    filename=optimizer_snapshot_name),
        trigger=(args.snapshot, 'epoch'))

    trainer.extend(
        chainer.training.extensions.snapshot_object(model,
                                                    filename=model_snapshot_name),
        trigger=(args.snapshot, 'epoch'))

    trainer.extend(chainer.training.extensions.ExponentialShift('lr',0.1), trigger=(10, "epoch"))

    if args.resume and os.path.exists(args.out + os.sep + args.resume_model):
        print("loading model_snapshot_name to model")
        chainer.serializers.load_npz(args.out + os.sep + args.resume_model, model)
    if args.resume and os.path.exists(args.out + os.sep + optimizer_snapshot_name):
        print("loading optimizer_snapshot_name to optimizer")
        chainer.serializers.load_npz(args.out + os.sep + optimizer_snapshot_name, optimizer)

    if chainer.training.extensions.PlotReport.available():
        trainer.extend(chainer.training.extensions.PlotReport(['main/loss'],
                                                              file_name="train_loss.png"),
                                                              trigger=val_interval)
        trainer.extend(chainer.training.extensions.PlotReport(['main/accuracy'],
                                                              file_name="train_accuracy.png"), trigger=val_interval)

    trainer.run()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--gradclip',
                        '-c',
                        type=float,
                        default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--snapshot',
                        '-snap',
                        type=int,
                        default=100,
                        help='snapshot epochs for save checkpoint')
    parser.add_argument(
        '--valid',
        '-v',
        default='',
        help='validate directory path contains validate txt file')
    parser.add_argument('--train',
                        '-t',
                        default="train",
                        help='Train directory path contains train txt file')
    parser.add_argument('--database',
                        default="BP4D",
                        help='database to train for')
    parser.add_argument('--lr', '-l', type=float, default=0.001)
    parser.add_argument('--hidden_size',
                        type=int,
                        default=1024,
                        help="hidden_size orignally used in open_crf")
    parser.add_argument('--eval_mode',
                        action='store_true',
                        help='whether to evaluate the model')
    parser.add_argument("--need_cache_graph",
                        "-ng",
                        action="store_true",
                        help="whether to cache factor graph to LRU cache")
    parser.add_argument("--bi_lstm",
                        '-bilstm',
                        action='store_true',
                        help="Use bi_lstm as basic component of temporal_lstm")
    parser.add_argument("--num_attrib",
                        type=int,
                        default=2048,
                        help="node feature dimension")
    parser.add_argument("--resume",
                        action="store_true",
                        help="whether to load npz pretrained file")
    parser.add_argument(
        "--snap_individual",
        action="store_true",
        help="whether to snapshot each individual epoch/iteration")

    parser.set_defaults(test=False)
    args = parser.parse_args()
    print_interval = 1, 'iteration'
    val_interval = 5, 'iteration'

    adaptive_AU_database(args.database)

    # for the StructuralRNN constuctor need first frame factor graph_backup
    dataset = GlobalDataSet(num_attrib=args.num_attrib)
    model = TemporalLSTM(box_num=config.BOX_NUM[args.database],
                         in_size=args.num_attrib,
                         out_size=dataset.label_bin_len,
                         use_bi_lstm=args.bi_lstm,
                         initialW=None)

    train_data = GraphDataset(args.train,
                              attrib_size=args.hidden_size,
                              global_dataset=dataset,
                              need_s_rnn=True,
                              need_cache_factor_graph=args.need_cache_graph)

    train_iter = chainer.iterators.SerialIterator(train_data,
                                                  1,
                                                  shuffle=True,
                                                  repeat=True)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu(args.gpu)

    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))
    updater = BPTTUpdater(train_iter, optimizer, int(args.gpu))
    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    print_interval = (1, 'iteration')

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=print_interval)
    trainer.extend(chainer.training.extensions.PrintReport([
        'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
        "main/accuracy"
    ]),
                   trigger=print_interval)
    log_name = "temporal_lstm.log"
    trainer.extend(
        chainer.training.extensions.LogReport(trigger=print_interval,
                                              log_name=log_name))
    # trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1, training_length=(args.epoch, 'epoch')))
    optimizer_snapshot_name = "{0}_temporal_lstm_optimizer.npz".format(
        args.database)
    trainer.extend(chainer.training.extensions.snapshot_object(
        optimizer, filename=optimizer_snapshot_name),
                   trigger=(args.snapshot, 'iteration'))

    if not args.snap_individual:
        model_snapshot_name = "{0}_temporal_lstm_model.npz".format(
            args.database)
        trainer.extend(chainer.training.extensions.snapshot_object(
            model, filename=model_snapshot_name),
                       trigger=(args.snapshot, 'iteration'))
    else:
        model_snapshot_name = "{0}_temporal_lstm_model_".format(
            args.database) + "{.updater.iteration}.npz"
        trainer.extend(chainer.training.extensions.snapshot_object(
            model, filename=model_snapshot_name),
                       trigger=(args.snapshot, 'iteration'))

    trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.7),
                   trigger=(5, "epoch"))

    # load pretrained file
    if not args.snap_individual:
        if args.resume and os.path.exists(args.out + os.sep +
                                          model_snapshot_name):
            print("loading model_snapshot_name to model")
            chainer.serializers.load_npz(
                args.out + os.sep + model_snapshot_name, model)
    else:
        if args.resume:
            file_lst = [
                filename[filename.rindex("_") + 1:filename.rindex(".")]
                for filename in os.listdir(args.out)
            ]
            file_no = sorted(map(int, file_lst))[-1]
            model_snapshot_name = "{0}_temporal_lstm_model_{1}.npz".format(
                args.database, file_no)
            chainer.serializers.load_npz(
                args.out + os.sep + model_snapshot_name, model)

    if args.resume and os.path.exists(args.out + os.sep +
                                      optimizer_snapshot_name):
        print("loading optimizer_snapshot_name to optimizer")
        chainer.serializers.load_npz(
            args.out + os.sep + optimizer_snapshot_name, optimizer)

    if chainer.training.extensions.PlotReport.available():
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/loss'], file_name="train_loss.png"),
                       trigger=(100, "iteration"))
        # trainer.extend(chainer.training.extensions.PlotReport(['opencrf_val/F1','opencrf_val/accuracy'],
        #                                                       file_name="val_f1.png"), trigger=val_interval)

    # if args.valid:
    #     valid_data = S_RNNPlusDataset(args.valid, attrib_size=args.hidden_size, global_dataset=dataset,
    #                                   need_s_rnn=True,need_cache_factor_graph=args.need_cache_graph)  # attrib_size控制open-crf层的weight长度
    #     validate_iter = chainer.iterators.SerialIterator(valid_data, 1, shuffle=False, repeat=False)
    #     crf_evaluator = OpenCRFEvaluator(iterator=validate_iter, target=model, device=args.gpu)
    #     trainer.extend(crf_evaluator, trigger=val_interval, name="opencrf_val")

    trainer.run()
Beispiel #16
0
                                                       orig_from_path, "BP4D")
                    file_obj.write("{}\n".format(line))
            file_obj.flush()

        with open("{0}/id_test_{1}.txt".format(folder_path, i),
                  "w") as file_obj:
            for subject_name in test_name_array:
                for img_file_path in subject_imgpath_dict[subject_name]:
                    orig_from_path = "#" if img_file_path not in img_from else img_from[
                        img_file_path]
                    video_dir = BP4D_data_reader.img_dir + os.sep + subject_name + os.sep + sequence_name + os.sep
                    AU_set = enhance_mix_database[img_file_path]
                    AU_set_str = ",".join(AU_set)
                    line = "{0}\t{1}\t{2}\t{3}".format(img_file_path,
                                                       AU_set_str,
                                                       orig_from_path, "BP4D")
                    file_obj.write("{}\n".format(line))
            file_obj.flush()


if __name__ == "__main__":
    from dataset_toolkit.adaptive_AU_config import adaptive_AU_database

    adaptive_AU_database("BP4D")
    gen_BP4D_subject_kfold_id_file("BP4D",
                                   "{0}/{1}".format(config.DATA_PATH["BP4D"],
                                                    "idx"),
                                   kfold=10,
                                   drop_big_label=False)
    print("done")
                    for orientation, video_info_lst in subject_video[
                            video_name].items():
                        for video_info in video_info_lst:
                            img_file_path = video_info["img_path"]
                            img_file_path = os.sep.join(
                                img_file_path.split("/")[-3:])
                            AU_set_str = ",".join(video_info["AU_label"])
                            if len(video_info["AU_label"]) == 0:
                                AU_set_str = "0"
                            orig_from_path = "#"
                            file_obj.write("{0}\t{1}\t{2}\t{3}\n".format(
                                img_file_path, AU_set_str, orig_from_path,
                                video_info["database"]))
                file_obj.flush()


if __name__ == "__main__":
    from dataset_toolkit.adaptive_AU_config import adaptive_AU_database
    #
    # adaptive_AU_database("BP4D")
    # partition = {"trn":"/home/machen/dataset/BP4D/idx/trn_partition.txt",
    #              "valid":"/home/machen/dataset/BP4D/idx/validate_partition.txt"}
    # gen_BP4D_subject_id_file("{0}/{1}".format(config.DATA_PATH["BP4D"], "idx"), kfold=10, validation_size=1000)
    adaptive_AU_database("DISFA")
    # single_AU_RCNN_BP4D_subject_id_file("{0}/{1}".format(config.ROOT_PATH + os.sep+"/BP4D/", "idx"), kfold=3)
    gen_DISFA_subject_id_file("{0}/{1}".format(
        config.ROOT_PATH + os.sep + "/DISFA_1/", "idx"),
                              kfold=3)
    # gen_BP4D_subject_id_file("{0}/{1}".format(config.DATA_PATH["BP4D"], "idx"), kfold=10)
    # gen_BP4D_subject_id_file("{0}/{1}".format(config.DATA_PATH["BP4D"], "idx"), kfold=3)
    # print("done")
Beispiel #18
0
            for label, group in groupby(column):
                if label == 1:
                    AU_segment_count[config.AU_SQUEEZE[AU_idx]] += 1
                AU_continous_count[label].append(sum(1 for _ in group))
            if 0 in AU_continous_count:
                del AU_continous_count[0]  # only have 1
            else:
                pass
            for label, val_list in AU_continous_count.items():
                for sum_val in val_list:
                    AU_all_count[config.AU_SQUEEZE[AU_idx]].append(sum_val)
    average_dict = {}
    for AU, sum_val_list in AU_all_count.items():
        average_dict[AU] = sum(sum_val_list) / len(sum_val_list)
    return average_dict, AU_segment_count


if __name__ == "__main__":
    adaptive_AU_database("DISFA", False)
    label_matrix_dict = read_idx_file(
        "/home/machen/dataset/DISFA/idx/3_fold/id_all.txt")
    average_dict, AU_segment_count = stats_frequency(label_matrix_dict)
    print("duration:")
    for AU, mean in sorted(average_dict.items(), key=lambda e: int(e[0])):
        if AU in config.paper_use_DISFA:
            print(AU, mean)
    print("segment count:")
    for AU, seg_count in sorted(AU_segment_count.items(),
                                key=lambda e: int(e[0])):
        if AU in config.paper_use_DISFA:
            print(AU, seg_count)
Beispiel #19
0
def main():
    print("chainer cudnn enabled: {}".format(chainer.cuda.cudnn_enabled))
    parser = argparse.ArgumentParser(
        description='Action Unit R-CNN training example:')
    parser.add_argument('--pid', '-pp', default='/tmp/AU_R_CNN/')
    parser.add_argument('--gpu',
                        '-g',
                        default="0",
                        help='GPU ID, multiple GPU split by comma, \ '
                        'Note that BPTT updater do not support multi-GPU')
    parser.add_argument('--lr', '-l', type=float, default=0.001)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--database',
                        default='BP4D',
                        help='Output directory: BP4D/DISFA/BP4D_DISFA')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    parser.add_argument('--epoch', '-e', type=int, default=20)
    parser.add_argument('--batch_size', '-bs', type=int, default=20)
    parser.add_argument('--snapshot', '-snap', type=int, default=1000)
    parser.add_argument('--need_validate',
                        action='store_true',
                        help='do or not validate during training')
    parser.add_argument('--mean',
                        default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy",
                        help='image mean .npy file')
    parser.add_argument('--feature_model',
                        default="resnet101",
                        help="vgg16/vgg19/resnet101 for train")
    parser.add_argument('--extract_len', type=int, default=1000)
    parser.add_argument('--optimizer',
                        default='RMSprop',
                        help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta')
    parser.add_argument('--pretrained_model',
                        default='resnet101',
                        help='imagenet/vggface/resnet101/*.npz')
    parser.add_argument('--pretrained_model_args',
                        nargs='+',
                        type=float,
                        help='you can pass in "1.0 224" or "0.75 224"')
    parser.add_argument(
        '--use_memcached',
        action='store_true',
        help='whether use memcached to boost speed of fetch crop&mask')  #
    parser.add_argument('--memcached_host', default='127.0.0.1')
    parser.add_argument("--fold", '-fd', type=int, default=3)
    parser.add_argument("--split_idx", '-sp', type=int, default=1)
    parser.add_argument(
        "--snap_individual",
        action="store_true",
        help="whether to snapshot each individual epoch/iteration")
    parser.add_argument("--proc_num", "-proc", type=int, default=1)
    parser.add_argument(
        "--use_sigmoid_cross_entropy",
        "-sigmoid",
        action="store_true",
        help="whether to use sigmoid cross entropy or softmax cross entropy")
    parser.add_argument(
        "--is_pretrained",
        action="store_true",
        help="whether is to pretrain BP4D later will for DISFA dataset or not")
    parser.add_argument(
        "--pretrained_target",
        '-pt',
        default="",
        help="whether pretrain label set will use DISFA or not")
    parser.add_argument("--fix",
                        '-fix',
                        action="store_true",
                        help="whether to fix first few conv layers or not")
    parser.add_argument(
        '--occlude',
        default='',
        help=
        'whether to use occlude face of upper/left/right/lower/none to test')
    parser.add_argument("--prefix",
                        '-prefix',
                        default="",
                        help="_beta, for example 3_fold_beta")
    parser.add_argument('--eval_mode',
                        action='store_true',
                        help='Use test datasets for evaluation metric')
    parser.add_argument("--img_resolution", type=int, default=512)
    parser.add_argument(
        "--FERA",
        action='store_true',
        help='whether to use FERA data split train and validate')
    parser.add_argument(
        '--FPN',
        action="store_true",
        help=
        "whether to use feature pyramid network for training and prediction")
    parser.add_argument(
        '--fake_box',
        action="store_true",
        help="whether to use fake average box coordinate to predict")
    parser.add_argument('--roi_align',
                        action="store_true",
                        help="whether to use roi_align or roi_pooling")
    parser.add_argument("--train_test", default="trainval", type=str)
    parser.add_argument("--trail_times", default=20, type=int)
    parser.add_argument("--each_trail_iteration", default=1000, type=int)
    args = parser.parse_args()
    if not os.path.exists(args.pid):
        os.makedirs(args.pid)
    pid = str(os.getpid())
    pid_file_path = args.pid + os.sep + "{0}_{1}_fold_{2}.pid".format(
        args.database, args.fold, args.split_idx)
    # with open(pid_file_path, "w") as file_obj:
    #     file_obj.write(pid)
    #     file_obj.flush()

    config.IMG_SIZE = (args.img_resolution, args.img_resolution)

    print('GPU: {}'.format(args.gpu))
    if args.is_pretrained:
        adaptive_AU_database(args.pretrained_target)
    else:
        adaptive_AU_database(args.database)
    np.random.seed(args.seed)
    # 需要先构造一个list的txt文件:id_trainval_0.txt, 每一行是subject + "/" + emotion_seq + "/" frame
    mc_manager = None
    if args.use_memcached:
        from collections_toolkit.memcached_manager import PyLibmcManager
        mc_manager = PyLibmcManager(args.memcached_host)
        if mc_manager is None:
            raise IOError("no memcached found listen in {}".format(
                args.memcached_host))

    if args.FPN:
        faster_rcnn = FPN101(len(config.AU_SQUEEZE),
                             pretrained_resnet=args.pretrained_model,
                             use_roialign=args.roi_align,
                             mean_path=args.mean,
                             min_size=args.img_resolution,
                             max_size=args.img_resolution)
    elif args.feature_model == 'vgg16':
        faster_rcnn = FasterRCNNVGG16(
            n_fg_class=len(config.AU_SQUEEZE),
            pretrained_model=args.pretrained_model,
            mean_file=args.mean,
            min_size=args.img_resolution,
            max_size=args.img_resolution,
            extract_len=args.extract_len,
            fix=args.fix)  # 可改为/home/nco/face_expr/result/snapshot_model.npz
    elif args.feature_model == 'vgg19':
        faster_rcnn = FasterRCNNVGG19(n_fg_class=len(config.AU_SQUEEZE),
                                      pretrained_model=args.pretrained_model,
                                      mean_file=args.mean,
                                      min_size=args.img_resolution,
                                      max_size=args.img_resolution,
                                      extract_len=args.extract_len,
                                      dataset=args.database,
                                      fold=args.fold,
                                      split_idx=args.split_idx)
    elif args.feature_model == 'resnet101':
        faster_rcnn = FasterRCNNResnet101(
            n_fg_class=len(config.AU_SQUEEZE),
            pretrained_model=args.pretrained_model,
            mean_file=args.mean,
            min_size=args.img_resolution,
            max_size=args.img_resolution,
            extract_len=args.extract_len
        )  # 可改为/home/nco/face_expr/result/snapshot_model.npz
    elif args.feature_model == "mobilenet_v1":
        faster_rcnn = FasterRCNN_MobilenetV1(
            pretrained_model_type=args.pretrained_model_args,
            min_size=config.IMG_SIZE[0],
            max_size=config.IMG_SIZE[1],
            mean_file=args.mean,
            n_class=len(config.AU_SQUEEZE))

    batch_size = args.batch_size

    with chainer.no_backprop_mode(), chainer.using_config("train", False):

        test_data = AUDataset(database=args.database,
                              fold=args.fold,
                              img_resolution=args.img_resolution,
                              split_name=args.train_test,
                              split_index=args.split_idx,
                              mc_manager=mc_manager,
                              train_all_data=False,
                              prefix=args.prefix,
                              pretrained_target=args.pretrained_target,
                              is_FERA=args.FERA)
        test_data = TransformDataset(test_data,
                                     Transform(faster_rcnn, mirror=False))
        if args.fake_box:
            test_data = TransformDataset(test_data,
                                         FakeBoxTransform(args.database))
        if args.proc_num == 1:
            test_iter = SerialIterator(test_data,
                                       args.batch_size,
                                       repeat=False,
                                       shuffle=True)
        else:
            test_iter = MultiprocessIterator(test_data,
                                             batch_size=args.batch_size,
                                             n_processes=args.proc_num,
                                             repeat=False,
                                             shuffle=True,
                                             n_prefetch=10,
                                             shared_mem=10000000)

        gpu = int(args.gpu) if "," not in args.gpu else int(
            args.gpu[:args.gpu.index(",")])
        chainer.cuda.get_device_from_id(gpu).use()
        faster_rcnn.to_gpu(gpu)
        evaluator = SpeedEvaluator(
            test_iter,
            faster_rcnn,
            lambda batch, device: concat_examples_not_none(
                batch, device, padding=-99),
            device=gpu,
            trail_times=args.trail_times,
            each_trail_iteration=args.each_trail_iteration,
            database=args.database)
        observation = evaluator.evaluate()
        with open(args.out + os.path.sep + "evaluation_speed_test.json",
                  "w") as file_obj:
            file_obj.write(
                json.dumps(observation, indent=4, separators=(',', ': ')))
            file_obj.flush()
Beispiel #20
0
def main():
    parser = argparse.ArgumentParser(
        description='Space Time Action Unit R-CNN training example:')
    parser.add_argument('--pid', '-pp', default='/tmp/SpaceTime_AU_R_CNN/')
    parser.add_argument('--gpu',
                        '-g',
                        nargs='+',
                        type=int,
                        help='GPU ID, multiple GPU split by space')
    parser.add_argument('--lr', '-l', type=float, default=0.001)
    parser.add_argument('--out',
                        '-o',
                        default='output_two_stream',
                        help='Output directory')
    parser.add_argument('--database',
                        default='BP4D',
                        help='Output directory: BP4D/DISFA/BP4D_DISFA')
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    parser.add_argument('--epoch', '-e', type=int, default=20)
    parser.add_argument('--batch_size', '-bs', type=int, default=1)
    parser.add_argument('--snapshot', '-snap', type=int, default=1000)
    parser.add_argument('--mean_rgb',
                        default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy",
                        help='image mean .npy file')
    parser.add_argument('--mean_flow',
                        default=config.ROOT_PATH + "BP4D/idx/mean_flow.npy",
                        help='image mean .npy file')
    parser.add_argument('--backbone',
                        default="resnet101",
                        help="vgg/resnet101/mobilenet_v1 for train")
    parser.add_argument('--optimizer',
                        default='SGD',
                        help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta')
    parser.add_argument('--pretrained_model_rgb',
                        help='imagenet/mobilenet_v1/resnet101/*.npz')
    parser.add_argument(
        '--pretrained_model_flow',
        help=
        "path of optical flow pretrained model, can also use the same npz with rgb"
    )
    parser.add_argument('--two_stream_mode',
                        type=TwoStreamMode,
                        choices=list(TwoStreamMode),
                        help='rgb_flow/ optical_flow/ rgb')
    parser.add_argument(
        '--use_memcached',
        action='store_true',
        help='whether use memcached to boost speed of fetch crop&mask')  #
    parser.add_argument('--memcached_host', default='127.0.0.1')
    parser.add_argument("--fold", '-fd', type=int, default=3)
    parser.add_argument("--fix",
                        action="store_true",
                        help="fix parameter of conv2 update when finetune")
    parser.add_argument("--split_idx", '-sp', type=int, default=1)
    parser.add_argument("--use_paper_num_label",
                        action="store_true",
                        help="only to use paper reported number of labels"
                        " to train")
    parser.add_argument(
        "--roi_align",
        action="store_true",
        help="whether to use roi align or roi pooling layer in CNN")
    parser.add_argument("--T", '-T', type=int, default=10)
    parser.add_argument("--proc_num", "-proc", type=int, default=1)
    args = parser.parse_args()
    os.makedirs(args.pid, exist_ok=True)
    os.makedirs(args.out, exist_ok=True)
    pid = str(os.getpid())
    pid_file_path = args.pid + os.sep + "{0}_{1}_fold_{2}.pid".format(
        args.database, args.fold, args.split_idx)
    with open(pid_file_path, "w") as file_obj:
        file_obj.write(pid)
        file_obj.flush()

    print('GPU: {}'.format(",".join(list(map(str, args.gpu)))))

    adaptive_AU_database(args.database)
    mc_manager = None
    if args.use_memcached:
        from collections_toolkit.memcached_manager import PyLibmcManager
        mc_manager = PyLibmcManager(args.memcached_host)
        if mc_manager is None:
            raise IOError("no memcached found listen in {}".format(
                args.memcached_host))

    paper_report_label, class_num = squeeze_label_num_report(
        args.database, args.use_paper_num_label)
    paper_report_label_idx = list(paper_report_label.keys())

    au_rcnn_train_chain_list = []
    if args.backbone == 'vgg':
        au_rcnn = AU_RCNN_VGG16(pretrained_model=args.pretrained_model_rgb,
                                min_size=config.IMG_SIZE[0],
                                max_size=config.IMG_SIZE[1],
                                use_roi_align=args.roi_align)
        au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn)
        au_rcnn_train_chain_list.append(au_rcnn_train_chain)

    elif args.backbone == 'resnet101':
        if args.two_stream_mode != TwoStreamMode.rgb_flow:
            assert (args.pretrained_model_rgb == "" and args.pretrained_model_flow != "") or\
                   (args.pretrained_model_rgb != "" and args.pretrained_model_flow == "")
            pretrained_model = args.pretrained_model_rgb if args.pretrained_model_rgb else args.pretrained_model_flow
            au_rcnn = AU_RCNN_Resnet101(
                pretrained_model=pretrained_model,
                min_size=config.IMG_SIZE[0],
                max_size=config.IMG_SIZE[1],
                use_roi_align=args.roi_align,
                use_optical_flow_input=(
                    args.two_stream_mode == TwoStreamMode.optical_flow),
                temporal_length=args.T)
            au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain)
        else:  # rgb_flow mode
            au_rcnn_rgb = AU_RCNN_Resnet101(
                pretrained_model=args.pretrained_model_rgb,
                min_size=config.IMG_SIZE[0],
                max_size=config.IMG_SIZE[1],
                use_roi_align=args.roi_align,
                use_optical_flow_input=False,
                temporal_length=args.T)
            au_rcnn_optical_flow = AU_RCNN_Resnet101(
                pretrained_model=args.pretrained_model_flow,
                min_size=config.IMG_SIZE[0],
                max_size=config.IMG_SIZE[1],
                use_roi_align=args.roi_align,
                use_optical_flow_input=True,
                temporal_length=args.T)
            au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor(au_rcnn_rgb)
            au_rcnn_train_chain_optical_flow = AU_RCNN_ROI_Extractor(
                au_rcnn_optical_flow)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain_optical_flow)

    model = Wrapper(au_rcnn_train_chain_list,
                    class_num,
                    args.database,
                    args.T,
                    two_stream_mode=args.two_stream_mode,
                    gpus=args.gpu)
    batch_size = args.batch_size

    img_dataset = AUDataset(database=args.database,
                            L=args.T,
                            fold=args.fold,
                            split_name='trainval',
                            split_index=args.split_idx,
                            mc_manager=mc_manager,
                            two_stream_mode=args.two_stream_mode,
                            train_all_data=False,
                            paper_report_label_idx=paper_report_label_idx)

    train_dataset = TransformDataset(
        img_dataset,
        Transform(L=args.T,
                  mirror=True,
                  mean_rgb_path=args.mean_rgb,
                  mean_flow_path=args.mean_flow))

    if args.proc_num == 1:
        train_iter = SerialIterator(train_dataset,
                                    batch_size,
                                    repeat=True,
                                    shuffle=True)
    else:
        train_iter = MultiprocessIterator(train_dataset,
                                          batch_size=batch_size,
                                          n_processes=args.proc_num,
                                          repeat=True,
                                          shuffle=True,
                                          n_prefetch=3,
                                          shared_mem=53457280)

    if len(args.gpu) > 1:
        for gpu in args.gpu:
            chainer.cuda.get_device_from_id(gpu).use()
    else:
        chainer.cuda.get_device_from_id(args.gpu[0]).use()
        model.to_gpu(args.gpu[0])

    optimizer = None
    if args.optimizer == 'AdaGrad':
        optimizer = chainer.optimizers.AdaGrad(
            lr=args.lr
        )  # 原本为MomentumSGD(lr=args.lr, momentum=0.9) 由于loss变为nan问题,改为AdaGrad
    elif args.optimizer == 'RMSprop':
        optimizer = chainer.optimizers.RMSprop(lr=args.lr)
    elif args.optimizer == 'Adam':
        optimizer = chainer.optimizers.Adam(alpha=args.lr)
    elif args.optimizer == 'SGD':
        optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    elif args.optimizer == "AdaDelta":
        optimizer = chainer.optimizers.AdaDelta()

    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    optimizer_name = args.optimizer

    # BP4D_3_fold_1_resnet101@rnn@no_temporal@use_paper_num_label@roi_align@label_dep_layer@conv_lstm@sampleframe#13_model.npz
    use_paper_key_str = "use_paper_num_label" if args.use_paper_num_label else "all_avail_label"
    roi_align_key_str = "roi_align" if args.roi_align else "roi_pooling"

    single_model_file_name = args.out + os.sep + \
                             '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}_model.npz'.format(args.database,
                                                                                                 args.fold,
                                                                                                 args.split_idx,
                                                                                                 args.backbone,
                                                                                                 args.two_stream_mode,
                                                                                                 use_paper_key_str,
                                                                                                 roi_align_key_str,
                                                                                                 args.T)

    print(single_model_file_name)
    pretrained_optimizer_file_name = args.out + os.sep + \
                                     '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}_optimizer.npz'.format(
                                         args.database,
                                         args.fold, args.split_idx,
                                         args.backbone, args.two_stream_mode,
                                         use_paper_key_str, roi_align_key_str,
                                         args.T)
    print(pretrained_optimizer_file_name)

    if os.path.exists(pretrained_optimizer_file_name):
        print("loading optimizer snatshot:{}".format(
            pretrained_optimizer_file_name))
        chainer.serializers.load_npz(pretrained_optimizer_file_name, optimizer)

    if os.path.exists(single_model_file_name):
        print("loading pretrained snapshot:{}".format(single_model_file_name))
        chainer.serializers.load_npz(single_model_file_name, model)

    if args.fix:
        au_rcnn = model.au_rcnn_train_chain.au_rcnn
        au_rcnn.extractor.conv1.W.update_rule.enabled = False
        au_rcnn.extractor.bn1.gamma.update_rule.enabled = False
        au_rcnn.extractor.bn1.beta.update_rule.enabled = False
        res2_names = ["a", "b1", "b2"]
        for res2_name in res2_names:
            if res2_name == "a":

                getattr(au_rcnn.extractor.res2,
                        res2_name).conv1.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn1.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn1.beta.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv2.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv3.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv4.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn2.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn2.beta.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn3.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn3.beta.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn4.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn4.beta.update_rule.enabled = False
            elif res2_name.startswith("b"):
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv1.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn1.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn1.beta.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv2.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).conv3.W.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn2.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn2.beta.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn3.gamma.update_rule.enabled = False
                getattr(au_rcnn.extractor.res2,
                        res2_name).bn3.beta.update_rule.enabled = False

    updater = chainer.training.StandardUpdater(
        train_iter,
        optimizer,
        device=args.gpu[0],
        converter=lambda batch, device: concat_examples(
            batch, device, padding=0))

    trainer = training.Trainer(updater, (10, 'iteration'), out=args.out)
    trainer.extend(chainer.training.extensions.snapshot_object(
        optimizer, filename=os.path.basename(pretrained_optimizer_file_name)),
                   trigger=(args.snapshot, 'iteration'))

    trainer.extend(chainer.training.extensions.snapshot_object(
        model, filename=os.path.basename(single_model_file_name)),
                   trigger=(args.snapshot, 'iteration'))

    log_interval = 100, 'iteration'
    print_interval = 100, 'iteration'
    plot_interval = 10, 'iteration'
    if args.optimizer != "Adam" and args.optimizer != "AdaDelta":
        trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1),
                       trigger=(10, 'epoch'))
    elif args.optimizer == "Adam":
        trainer.extend(chainer.training.extensions.ExponentialShift(
            "alpha", 0.1, optimizer=optimizer),
                       trigger=(10, 'epoch'))
    if args.optimizer != "AdaDelta":
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
    trainer.extend(
        chainer.training.extensions.LogReport(
            trigger=log_interval,
            log_name="log_{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}.log".format(
                args.database, args.fold, args.split_idx, args.backbone,
                args.two_stream_mode, use_paper_key_str, roi_align_key_str,
                args.T)))
    # trainer.reporter.add_observer("main_par", model.loss_head_module)
    trainer.extend(chainer.training.extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/accuracy',
    ]),
                   trigger=print_interval)
    trainer.extend(
        chainer.training.extensions.ProgressBar(update_interval=100))

    if chainer.training.extensions.PlotReport.available():
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/loss'],
            file_name="loss_{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}.png".format(
                args.database, args.fold, args.split_idx, args.backbone,
                args.two_stream_mode, use_paper_key_str, roi_align_key_str,
                args.T),
            trigger=plot_interval),
                       trigger=plot_interval)
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/accuracy'],
            file_name="accuracy_{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}.png".
            format(args.database, args.fold, args.split_idx, args.backbone,
                   args.two_stream_mode, use_paper_key_str, roi_align_key_str,
                   args.T),
            trigger=plot_interval),
                       trigger=plot_interval)

    # trainer.run()
    cProfile.runctx("trainer.run()", globals(), locals(), "Profile.prof")
    s = pstats.Stats("Profile.prof")
    s.strip_dirs().sort_stats("time").print_stats()
Beispiel #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size',
                        '-b',
                        type=int,
                        default=1,
                        help='each batch size will be a new file')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='gpu that used to extract feature')
    parser.add_argument("--mirror",
                        action="store_true",
                        help="whether to mirror")
    parser.add_argument("--out_dir",
                        '-o',
                        default="/home/machen/dataset/extract_features/")
    parser.add_argument(
        "--model",
        '-m',
        help="the AU R-CNN pretrained model file to load to extract feature")
    parser.add_argument("--trainval_test", '-tt', help="train or test")
    parser.add_argument("--database", default="BP4D")
    parser.add_argument(
        '--use_memcached',
        action='store_true',
        help='whether use memcached to boost speed of fetch crop&mask')
    parser.add_argument('--proc_num', type=int, default=10)
    parser.add_argument('--memcached_host', default='127.0.0.1')
    parser.add_argument('--mean_rgb',
                        default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy",
                        help='image mean .npy file')
    parser.add_argument('--mean_flow',
                        default=config.ROOT_PATH + "BP4D/idx/mean_flow.npy",
                        help='image mean .npy file')

    args = parser.parse_args()
    adaptive_AU_database(args.database)
    mc_manager = None
    if args.use_memcached:
        from collections_toolkit.memcached_manager import PyLibmcManager
        mc_manager = PyLibmcManager(args.memcached_host)
        if mc_manager is None:
            raise IOError("no memcached found listen in {}".format(
                args.memcached_host))

    return_dict = extract_mode(args.model)
    database = return_dict["database"]
    fold = return_dict["fold"]
    split_idx = return_dict["split_idx"]
    backbone = return_dict["backbone"]
    use_paper_num_label = return_dict["use_paper_num_label"]
    roi_align = return_dict["use_roi_align"]
    two_stream_mode = return_dict["two_stream_mode"]
    T = return_dict["T"]

    class_num = len(config.paper_use_BP4D) if database == "BP4D" else len(
        config.paper_use_DISFA)
    paper_report_label_idx = sorted(list(config.AU_SQUEEZE.keys()))
    if use_paper_num_label:
        paper_report_label, class_num = squeeze_label_num_report(
            database, True)
        paper_report_label_idx = list(paper_report_label.keys())

    assert two_stream_mode == TwoStreamMode.rgb_flow
    if two_stream_mode == TwoStreamMode.rgb_flow:
        au_rcnn_train_chain_list = []
        au_rcnn_rgb = AU_RCNN_Resnet101(pretrained_model=backbone,
                                        min_size=config.IMG_SIZE[0],
                                        max_size=config.IMG_SIZE[1],
                                        use_roi_align=roi_align,
                                        use_optical_flow_input=False,
                                        temporal_length=T)

        au_rcnn_optical_flow = AU_RCNN_Resnet101(pretrained_model=backbone,
                                                 min_size=config.IMG_SIZE[0],
                                                 max_size=config.IMG_SIZE[1],
                                                 use_roi_align=roi_align,
                                                 use_optical_flow_input=True,
                                                 temporal_length=T)

        au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor(au_rcnn_rgb)
        au_rcnn_train_chain_optical_flow = AU_RCNN_ROI_Extractor(
            au_rcnn_optical_flow)

        au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb)
        au_rcnn_train_chain_list.append(au_rcnn_train_chain_optical_flow)
        model = Wrapper(au_rcnn_train_chain_list,
                        class_num,
                        database,
                        T,
                        two_stream_mode=two_stream_mode,
                        gpus=[args.gpu, args.gpu])

    assert os.path.exists(args.model)
    print("loading model file : {}".format(args.model))
    chainer.serializers.load_npz(args.model, model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        if isinstance(model, FasterRCNNResnet101):
            model.to_gpu(args.gpu)

    img_dataset = AUDataset(database=database,
                            L=T,
                            fold=fold,
                            split_name=args.trainval_test,
                            split_index=split_idx,
                            mc_manager=mc_manager,
                            train_all_data=False,
                            paper_report_label_idx=paper_report_label_idx,
                            jump_exists=True,
                            npz_dir=args.out_dir)
    mirror_list = [
        False,
    ]
    if args.mirror and args.trainval_test == 'trainval':
        mirror_list.append(True)
    for mirror in mirror_list:
        train_dataset = TransformDataset(
            img_dataset,
            Transform(T,
                      mean_rgb_path=args.mean_rgb,
                      mean_flow_path=args.mean_flow,
                      mirror=mirror))

        if args.proc_num > 1:
            dataset_iter = MultiprocessIterator(train_dataset,
                                                batch_size=args.batch_size,
                                                n_processes=args.proc_num,
                                                repeat=False,
                                                shuffle=False,
                                                n_prefetch=10,
                                                shared_mem=10000000)
        else:
            dataset_iter = SerialIterator(train_dataset,
                                          batch_size=args.batch_size,
                                          repeat=False,
                                          shuffle=False)

        with chainer.no_backprop_mode(), chainer.using_config(
                'cudnn_deterministic',
                True), chainer.using_config('train', False):
            model_dump = DumpRoIFeature(
                dataset_iter,
                model,
                args.gpu,
                database,
                converter=lambda batch, device: concat_examples_not_string(
                    batch, device, padding=0),
                output_path=args.out_dir,
                trainval_test=args.trainval_test,
                fold_split_idx=split_idx,
                mirror_data=mirror)
            model_dump.evaluate()
Beispiel #22
0
    if matcher:
        fold = matcher.group(1)
        split_idx = matcher.group(2)
    output = args.output
    if args.prefix:
        id_list_fold_path = config.DATA_PATH[
            args.database] + "/idx/{0}_fold{1}/".format(fold, args.prefix)
    else:
        id_list_fold_path = config.DATA_PATH[
            args.database] + "/idx/{0}_fold/".format(fold)
    train_subject, test_subject = load_train_test_id(id_list_fold_path,
                                                     split_idx, args.database)
    os.makedirs(output, exist_ok=True)

    adaptive_AU_database(args.database)
    extract_key = ""

    if args.pretrained_model_name == "resnet101":
        faster_rcnn = FasterRCNNResnet101(
            n_fg_class=len(config.AU_SQUEEZE),
            pretrained_model="resnet101",
            mean_file=args.mean,
            use_lstm=args.use_lstm,
            extract_len=args.extract_len
        )  # 可改为/home/machen/face_expr/result/snapshot_model.npz
        extract_key = 'avg_pool'
    elif args.pretrained_model_name == "vgg":
        faster_rcnn = FasterRCNNVGG16(n_fg_class=len(config.AU_SQUEEZE),
                                      pretrained_model="imagenet",
                                      mean_file=args.mean,
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)'
                        )  # open_crf layer only works for CPU mode
    parser.add_argument(
        "--model", "-m",
        help="pretrained model file path")  # which contains pretrained target
    parser.add_argument("--pretrained_model", "-pre", default="resnet101")
    parser.add_argument("--memcached_host", default="127.0.0.1")
    parser.add_argument('--mean',
                        default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy",
                        help='image mean .npy file')
    parser.add_argument('--proc_num',
                        type=int,
                        default=10,
                        help="multiprocess fetch data process number")
    parser.add_argument('--two_stream_mode',
                        type=TwoStreamMode,
                        choices=list(TwoStreamMode),
                        help='spatial/ temporal/ spatial_temporal')
    parser.add_argument('--batch',
                        '-b',
                        type=int,
                        default=5,
                        help='mini batch size')
    args = parser.parse_args()
    if not args.model.endswith("model.npz"):
        return
    mode_dict = extract_mode(args.model)
    database = mode_dict["database"]
    fold = mode_dict["fold"]
    split_idx = mode_dict["split_idx"]
    backbone = mode_dict["backbone"]
    spatial_edge_mode = mode_dict["spatial_edge_mode"]
    temporal_edge_mode = mode_dict["temporal_edge_mode"]
    use_paper_num_label = mode_dict["use_paper_num_label"]
    use_roi_align = mode_dict["use_roi_align"]
    use_label_dep_rnn_layer = mode_dict["label_dep_rnn_layer"]
    sample_frame = mode_dict["sample_frame"]
    conv_rnn_type = mode_dict["conv_rnn_type"]
    use_feature_map = (conv_rnn_type != ConvRNNType.conv_rcnn) and (
        conv_rnn_type != ConvRNNType.fc_lstm)
    use_au_rcnn_loss = (conv_rnn_type == ConvRNNType.conv_rcnn)

    adaptive_AU_database(database)
    paper_report_label, class_num = squeeze_label_num_report(
        database, use_paper_num_label)
    paper_report_label_idx = list(paper_report_label.keys())
    if not paper_report_label_idx:
        paper_report_label_idx = None
        class_num = len(config.AU_SQUEEZE)
    else:
        class_num = len(paper_report_label_idx)

    model_print_dict = OrderedDict()
    for key, value in mode_dict.items():
        model_print_dict[key] = str(value)
    print("""
        {0}
        ======================================
        INFO:
        {1}
        ======================================
        """.format(args.model,
                   json.dumps(model_print_dict, sort_keys=True, indent=8)))
    if backbone == 'resnet101':
        au_rcnn = AU_RCNN_Resnet101(
            pretrained_model=args.pretrained_model,
            min_size=config.IMG_SIZE[0],
            max_size=config.IMG_SIZE[1],
            mean_file=args.mean,
            classify_mode=use_au_rcnn_loss,
            n_class=class_num,
            use_roi_align=use_roi_align,
            use_feature_map_res45=use_feature_map,
            use_feature_map_res5=(conv_rnn_type != ConvRNNType.fc_lstm or
                                  conv_rnn_type == ConvRNNType.sep_conv_lstm),
            temporal_length=sample_frame)

    elif backbone == 'resnet50':
        au_rcnn = AU_RCNN_Resnet50(pretrained_model=args.pretrained_model,
                                   min_size=config.IMG_SIZE[0],
                                   max_size=config.IMG_SIZE[1],
                                   mean_file=args.mean,
                                   classify_mode=use_au_rcnn_loss,
                                   n_class=class_num,
                                   use_roi_align=use_roi_align,
                                   use_feature_map=use_feature_map)
    au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn)

    # if use_label_dep_rnn_layer:
    #     use_space = (spatial_edge_mode != SpatialEdgeMode.no_edge)
    #     use_temporal = (temporal_edge_mode != TemporalEdgeMode.no_temporal)
    #     label_dependency_layer = LabelDependencyLayer(database, out_size=class_num, train_mode=False,
    #                                                   label_win_size=2, x_win_size=1,
    #                                                   label_dropout_ratio=0.0, use_space=use_space,
    #                                                   use_temporal=use_temporal)
    if conv_rnn_type == ConvRNNType.conv_lstm:
        space_time_conv_lstm = SpaceTimeConv(
            None,
            use_label_dep_rnn_layer,
            class_num,
            spatial_edge_mode=spatial_edge_mode,
            temporal_edge_mode=temporal_edge_mode,
            conv_rnn_type=conv_rnn_type)
        loss_head_module = space_time_conv_lstm
    elif conv_rnn_type == ConvRNNType.fc_lstm:
        space_time_fc_lstm = SpaceTimeSepFcLSTM(
            database,
            class_num,
            spatial_edge_mode=spatial_edge_mode,
            temporal_edge_mode=temporal_edge_mode)
        loss_head_module = space_time_fc_lstm
    elif conv_rnn_type == ConvRNNType.conv_rcnn:
        au_rcnn_train_loss = AU_RCNN_TrainChainLoss()
        loss_head_module = au_rcnn_train_loss
    elif conv_rnn_type == ConvRNNType.sep_conv_lstm:
        space_time_sep_conv_lstm = SpaceTimeSepConv(
            database,
            class_num,
            spatial_edge_mode=spatial_edge_mode,
            temporal_edge_mode=temporal_edge_mode)
        loss_head_module = space_time_sep_conv_lstm

    model = Wrapper(au_rcnn_train_chain,
                    loss_head_module,
                    database,
                    sample_frame,
                    use_feature_map=use_feature_map,
                    gpu=args.gpu)
    chainer.serializers.load_npz(args.model, model)
    print("loading {}".format(args.model))
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu(args.gpu)

    mc_manager = PyLibmcManager(args.memcached_host)
    img_dataset = AUDataset(
        database=database,
        fold=fold,
        split_name='test',  # FIXME
        split_index=split_idx,
        mc_manager=mc_manager,
        train_all_data=False)

    video_dataset = AU_video_dataset(
        au_image_dataset=img_dataset,
        sample_frame=sample_frame,
        train_mode=False,  #FIXME
        paper_report_label_idx=paper_report_label_idx,
        fetch_use_parrallel_iterator=True)

    video_dataset = TransformDataset(video_dataset,
                                     Transform3D(au_rcnn, mirror=False))

    # test_iter = SerialIterator(video_dataset, batch_size=sample_frame * args.batch,
    #                                  repeat=False, shuffle=False)

    test_iter = MultiprocessIterator(video_dataset,
                                     batch_size=sample_frame * args.batch,
                                     n_processes=args.proc_num,
                                     repeat=False,
                                     shuffle=False,
                                     n_prefetch=10,
                                     shared_mem=10000000)

    with chainer.no_backprop_mode(), chainer.using_config(
            'cudnn_deterministic', True), chainer.using_config('train', False):
        npz_path = os.path.dirname(
            args.model) + os.path.sep + "pred_" + os.path.basename(
                args.model)[:os.path.basename(args.model).rindex("_")] + ".npz"
        print("npz_path: {}".format(npz_path))
        au_evaluator = ActionUnitEvaluator(
            test_iter,
            model,
            args.gpu,
            database=database,
            paper_report_label=paper_report_label,
            converter=lambda batch, device: concat_examples_not_labels(
                batch, device, padding=0),
            sample_frame=sample_frame,
            output_path=npz_path)
        observation = au_evaluator.evaluate()
        with open(os.path.dirname(args.model) + os.path.sep + "evaluation_result_{0}.json".format(os.path.basename(args.model)\
                                                                            [:os.path.basename(args.model).rindex("_")]
                                                           ), "w") as file_obj:
            file_obj.write(
                json.dumps(observation, indent=4, separators=(',', ': ')))
            file_obj.flush()
Beispiel #24
0
def build_graph(faster_rcnn, reader_func, output_dir, database_name,
                force_generate, proc_num, cut: bool, extract_key,
                train_subject, test_subject):
    '''
    currently CRF can only deal with single label situation
    so use /home/machen/dataset/BP4D/label_dict.txt to regard combine label as new single label
    example(each file contains one video!):
    node_id kown_label features
    1_12 +1 np_file:/path/to/npy features:1,3,4,5,5,...
    node_id specific: ${frame}_${roi}, eg: 1_12
    or
    444 +[0,0,0,1,0,1,0] np_file:/path/to/npy features:1,3,4,5,5,...
    spatio can have two factor node here, for example spatio_1 means upper face, and spatio_2 means lower face relation
    #edge 143 4289 spatio_1
    #edge 143 4289 spatio_2
    #edge 112 1392 temporal

    mode: RNN or CRF
    '''
    adaptive_AU_database(database_name)
    adaptive_AU_relation(database_name)

    is_binary_AU = True

    for video_info, subject_id in reader_func(
            output_dir,
            is_binary_AU=is_binary_AU,
            is_need_adaptive_AU_relation=False,
            force_generate=force_generate,
            proc_num=proc_num,
            cut=cut,
            train_subject=train_subject):

        node_list = []
        temporal_edges = []
        spatio_edges = []
        h_info_array = []
        box_geometry_array = []
        for entry_dict in video_info:
            frame = entry_dict["frame"]
            cropped_face = entry_dict["cropped_face"]
            print("processing frame:{}".format(frame))
            all_couple_mask_dict = entry_dict[
                "all_couple_mask_dict"]  # key is AU couple tuple,不管脸上有没有该AU都返回回来
            image_labels = entry_dict[
                "all_labels"]  # each region has a label(binary or AU)

            bboxes = []
            labels = []
            AU_couple_bbox_dict = dict()

            for idx, (AU_couple, mask) in enumerate(
                    all_couple_mask_dict.items()
            ):  # AU may contain single_true AU or AU binary tuple (depends on need_adaptive_AU_relation)
                region_label = image_labels[
                    idx]  # str or tuple, so all_labels index must be the same as all_couple_mask_dict
                connect_arr = cv2.connectedComponents(mask,
                                                      connectivity=8,
                                                      ltype=cv2.CV_32S)
                component_num = connect_arr[0]
                label_matrix = connect_arr[1]
                temp_boxes = []
                for component_label in range(1, component_num):
                    row_col = list(
                        zip(*np.where(label_matrix == component_label)))
                    row_col = np.array(row_col)
                    y_min_index = np.argmin(row_col[:, 0])
                    y_min = row_col[y_min_index, 0]
                    x_min_index = np.argmin(row_col[:, 1])
                    x_min = row_col[x_min_index, 1]
                    y_max_index = np.argmax(row_col[:, 0])
                    y_max = row_col[y_max_index, 0]
                    x_max_index = np.argmax(row_col[:, 1])
                    x_max = row_col[x_max_index, 1]
                    # same region may be shared by different AU, we must deal with it
                    coordinates = (y_min, x_min, y_max, x_max)

                    if y_min == y_max and x_min == x_max:
                        continue
                    temp_boxes.append(coordinates)
                temp_boxes = sorted(temp_boxes, key=itemgetter(
                    3))  # must make sure each frame have same box order
                for coordinates in temp_boxes:
                    if coordinates not in bboxes:
                        bboxes.append(coordinates)
                        labels.append(
                            region_label
                        )  # AU may contain single_true AU or AU binary tuple (depends on need_adaptive_AU_relation)
                        AU_couple_bbox_dict[coordinates] = AU_couple
                del label_matrix
            if len(bboxes) != config.BOX_NUM[database_name]:
                print("boxes num != {0}, real box num= {1}".format(
                    config.BOX_NUM[database_name], len(bboxes)))
                continue
            with chainer.no_backprop_mode(), chainer.using_config(
                    'train', False):
                bboxes = np.asarray(bboxes, dtype=np.float32)
                h = faster_rcnn.extract(cropped_face,
                                        bboxes,
                                        layer=extract_key)  # shape = R' x 2048
            assert h.shape[0] == len(bboxes)
            h = chainer.cuda.to_cpu(h)
            h = h.reshape(len(bboxes), -1)

            # 这个indent级别都是同一张图片内部
            # print("box number, all_mask:", len(bboxes),len(all_couple_mask_dict))
            for box_idx, box in enumerate(bboxes):
                label = labels[
                    box_idx]  # label maybe single true AU or AU binary tuple
                if isinstance(label, tuple):
                    label_arr = np.char.mod("%d", label)
                    label = "({})".format(",".join(label_arr))
                h_flat = h[box_idx]
                # nonzero_idx = np.nonzero(h_flat)[0]
                # h_flat_nonzero = h_flat[nonzero_idx]
                # h_info = ",".join("{}:{:.4f}".format(idx, val) for idx,val in zip(nonzero_idx,h_flat_nonzero))

                node_id = "{0}_{1}".format(frame, box_idx)
                node_list.append("{0} {1} feature_idx:{2}".format(
                    node_id, label, len(h_info_array)))
                h_info_array.append(h_flat)
                box_geometry_array.append(box)

            # 同一张画面两两组合,看有没连接线,注意AU=0,就是未出现的AU动作的区域也参与连接
            for box_idx_a, box_idx_b in map(
                    sorted, itertools.combinations(range(len(bboxes)), 2)):
                node_id_a = "{0}_{1}".format(frame, box_idx_a)
                node_id_b = "{0}_{1}".format(frame, box_idx_b)
                AU_couple_a = AU_couple_bbox_dict[bboxes[
                    box_idx_a]]  # AU couple represent region( maybe symmetry in face)
                AU_couple_b = AU_couple_bbox_dict[bboxes[box_idx_b]]
                if AU_couple_a == AU_couple_b or has_edge(
                        AU_couple_a, AU_couple_b, database_name):
                    spatio_edges.append("#edge {0} {1} spatio".format(
                        node_id_a, node_id_b))

        box_id_temporal_dict = defaultdict(
            list)  # key = roi/bbox id, value = node_id list cross temporal
        for node_info in node_list:
            node_id = node_info[0:node_info.index(" ")]
            box_id = node_id[node_id.index("_") + 1:]
            box_id_temporal_dict[box_id].append(node_id)

        for node_id_list in box_id_temporal_dict.values():
            for idx, node_id in enumerate(node_id_list):
                if idx + 1 < len(node_id_list):
                    node_id_next = node_id_list[idx + 1]
                    temporal_edges.append("#edge {0} {1} temporal".format(
                        node_id, node_id_next))

        if subject_id in train_subject:
            output_path = "{0}/train/{1}.txt".format(output_dir,
                                                     video_info[0]["video_id"])
        elif subject_id in test_subject:
            output_path = "{0}/test/{1}.txt".format(output_dir,
                                                    video_info[0]["video_id"])
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        npz_path = output_path[:output_path.rindex(".")] + ".npz"

        np.savez(npz_path,
                 appearance_features=np.asarray(h_info_array,
                                                dtype=np.float32),
                 geometry_features=np.array(box_geometry_array,
                                            dtype=np.float32))
        with open(output_path, "w") as file_obj:
            for line in node_list:
                file_obj.write("{}\n".format(line))
            for line in spatio_edges:
                file_obj.write("{}\n".format(line))
            for line in temporal_edges:
                file_obj.write("{}\n".format(line))
            file_obj.flush()
            node_list.clear()
            spatio_edges.clear()
            temporal_edges.clear()
            h_info_array.clear()
            box_geometry_array.clear()
Beispiel #25
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')  # open_crf layer only works for CPU mode
    parser.add_argument("--model", "-m", help="pretrained model file path") # which contains pretrained target
    parser.add_argument('--proc_num', type=int, default=10, help="multiprocess fetch data process number")
    parser.add_argument("--data_dir", type=str, default="/home/machen/dataset/extract_features")
    parser.add_argument('--batch', '-b', type=int, default=1,
                        help='mini batch size')
    args = parser.parse_args()
    if not args.model.endswith("model.npz"):
        return

    mode_dict = extract_mode(args.model)


    database = mode_dict["database"]
    fold = mode_dict["fold"]
    split_idx = mode_dict["split_idx"]
    use_paper_num_label = mode_dict["use_paper_num_label"]
    conv_layers = mode_dict["conv_layers"]
    two_stream_mode = mode_dict["two_stream_mode"]
    faster_backbone_type = mode_dict["faster_backbone_type"]
    T = 10
    data_dir = args.data_dir + "/{0}_{1}_fold_{2}/test".format(database, fold, split_idx)

    adaptive_AU_database(database)
    paper_report_label, class_num = squeeze_label_num_report(database, use_paper_num_label)
    paper_report_label_idx = list(paper_report_label.keys())
    class_num = len(config.AU_SQUEEZE)
    if use_paper_num_label:
        class_num = len(paper_report_label_idx)

    model_print_dict = OrderedDict()
    for key, value in mode_dict.items():
        model_print_dict[key] = str(value)
    print("""
        {0}
        ======================================
        INFO:
        {1}
        ======================================
        """.format(args.model, json.dumps(model_print_dict, sort_keys=True, indent=8)))
    if faster_backbone_type == FasterBackboneType.conv1d:
        faster_extractor_backbone = FasterBackbone(conv_layers, 2048, 1024)
    elif faster_backbone_type == FasterBackboneType.tcn:
        faster_extractor_backbone = TcnBackbone(conv_layers, 2048, 1024)
    faster_head_module = FasterHeadModule(2048, class_num + 1, 7)  # note that the class number here must include background
    initialW = chainer.initializers.Normal(0.001)
    spn = SegmentProposalNetwork(1024, n_anchors=len(config.ANCHOR_SIZE), initialW=initialW)
    seg_predictor = TimeSegmentRCNNPredictor(faster_extractor_backbone, spn, faster_head_module)
    model = WrapperPredictor(seg_predictor, class_num=class_num)

    chainer.serializers.load_npz(args.model, model.seg_predictor.train_chain)
    print("loading {}".format(args.model))

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu(args.gpu)
    npz_dataset = NpzFeatureDataset(data_dir, database, two_stream_mode=two_stream_mode, T=T)

    test_iter = SerialIterator(npz_dataset, batch_size=1,
                                     repeat=False, shuffle=False)


    with chainer.no_backprop_mode(),chainer.using_config('cudnn_deterministic',True),chainer.using_config('train',False):
        # time_axis_rcnn_BP4D_3_fold_1@use_paper_num_label@rgb_flow@30_model.npz
        pred_result_npz_path = os.path.dirname(args.model) + os.path.sep + os.path.basename(args.model)[
                                                                    :os.path.basename(args.model).rindex("_")] + "_pred_result.npz"
        au_evaluator = ActionUnitEvaluator(test_iter, model, args.gpu, database=database,
                                           paper_report_label=paper_report_label,
                                           converter=lambda batch, device: concat_examples_not_string(batch, device, padding=0),
                                           output_path=pred_result_npz_path)
        observation = au_evaluator.evaluate()
        with open(os.path.dirname(args.model) + os.path.sep + "evaluation_result_{0}.json".format(os.path.basename(args.model)\
                                                                            [:os.path.basename(args.model).rindex("_")]
                                                           ), "w") as file_obj:
            file_obj.write(json.dumps(observation, indent=4, separators=(',', ': ')))
            file_obj.flush()
Beispiel #26
0
def build_graph_roi_single_label(faster_rcnn, reader_func, output_dir,
                                 database_name, force_generate, proc_num,
                                 cut: bool, extract_key, train_subject,
                                 test_subject):
    '''
    currently CRF can only deal with single label situation
    so use /home/machen/dataset/BP4D/label_dict.txt to regard combine label as new single label
    example(each file contains one video!):
    node_id kown_label features
    1_12 +1 np_file:/path/to/npy features:1,3,4,5,5,...
    node_id specific: ${frame}_${roi}, eg: 1_12
    or
    444 +[0,0,0,1,0,1,0] np_file:/path/to/npy features:1,3,4,5,5,...
    spatio can have two factor node here, for example spatio_1 means upper face, and spatio_2 means lower face relation
    #edge 143 4289 spatio_1
    #edge 143 4289 spatio_2
    #edge 112 1392 temporal

    mode: RNN or CRF
    '''
    adaptive_AU_database(database_name)
    adaptive_AU_relation(database_name)
    au_couple_dict = get_zip_ROI_AU(
    )  # value is AU couple tuple, each tuple denotes an RoI
    # max_au_couple_len = max(len(couple) for couple in au_couple_dict.values())  # we use itertools.product instead
    label_bin_len = config.BOX_NUM[
        database_name]  # each box/ROI only have 1 or 0
    au_couple_set = set(au_couple_dict.values())
    au_couple_list = list(au_couple_set)
    au_couple_list.append(("1", "2", "5", "7"))  # because it is symmetric area
    is_binary_AU = True

    for video_info, subject_id in reader_func(
            output_dir,
            is_binary_AU=is_binary_AU,
            is_need_adaptive_AU_relation=False,
            force_generate=force_generate,
            proc_num=proc_num,
            cut=cut,
            train_subject=train_subject):

        extracted_feature_cache = dict(
        )  # key = np.ndarray_hash , value = h. speed up
        frame_box_cache = dict()  # key = frame, value = boxes
        frame_labels_cache = dict()
        frame_AU_couple_bbox_dict_cache = dict()
        # each video file is copying multiple version but differ in label
        if database_name == "BP4D":
            label_split_list = config.BP4D_LABEL_SPLIT
        elif database_name == "DISFA":
            label_split_list = config.DISFA_LABEL_SPLIT
        for couples_tuple in label_split_list:  # couples_tuple = ("1","3","5",.."4") cross AU_couple, config.LABEL_SPLIT come from frequent pattern statistics
            assert len(couples_tuple) == config.BOX_NUM[database_name]
            couples_tuple = tuple(map(str, sorted(map(int, couples_tuple))))
            couples_tuple_set = set(
                couples_tuple)  # use cartesian product to iterator over
            if len(couples_tuple_set) < len(couples_tuple):
                continue
            # limit too many combination
            # count = 0
            # for fp in fp_set:
            #     inter_set = couples_tuple_set & set(fp)
            #     union_set = couples_tuple_set | set(fp)
            #     iou = len(inter_set) / len(union_set)
            #     if iou > 0.6:
            #         count += 1
            # if count < 20:
            #     continue

            node_list = []
            temporal_edges = []
            spatio_edges = []
            h_info_array = []
            box_geometry_array = []
            for entry_dict in video_info:
                frame = entry_dict["frame"]
                cropped_face = entry_dict["cropped_face"]
                print("processing frame:{}".format(frame))
                all_couple_mask_dict = entry_dict[
                    "all_couple_mask_dict"]  # key is AU couple tuple,不管脸上有没有该AU都返回回来
                image_labels = entry_dict[
                    "all_labels"]  # each region has a label(binary or AU)

                bboxes = []
                labels = []
                AU_couple_bbox_dict = OrderedDict()

                if frame in frame_box_cache:
                    bboxes = frame_box_cache[frame]
                    labels = frame_labels_cache[frame]
                    AU_couple_bbox_dict = frame_AU_couple_bbox_dict_cache[
                        frame]
                else:

                    for idx, (AU_couple, mask) in enumerate(
                            all_couple_mask_dict.items()
                    ):  # We cannot sort this dict here, because region_label depend on order of this dict.AU may contain single_true AU or AU binary tuple (depends on need_adaptive_AU_relation)
                        region_label = image_labels[
                            idx]  # str or tuple, so all_labels index must be the same as all_couple_mask_dict
                        connect_arr = cv2.connectedComponents(mask,
                                                              connectivity=8,
                                                              ltype=cv2.CV_32S)
                        component_num = connect_arr[0]
                        label_matrix = connect_arr[1]
                        for component_label in range(1, component_num):
                            row_col = list(
                                zip(*np.where(
                                    label_matrix == component_label)))
                            row_col = np.array(row_col)
                            y_min_index = np.argmin(row_col[:, 0])
                            y_min = row_col[y_min_index, 0]
                            x_min_index = np.argmin(row_col[:, 1])
                            x_min = row_col[x_min_index, 1]
                            y_max_index = np.argmax(row_col[:, 0])
                            y_max = row_col[y_max_index, 0]
                            x_max_index = np.argmax(row_col[:, 1])
                            x_max = row_col[x_max_index, 1]
                            # same region may be shared by different AU, we must deal with it
                            coordinates = (y_min, x_min, y_max, x_max)

                            if y_min == y_max and x_min == x_max:
                                continue

                            if coordinates not in bboxes:
                                bboxes.append(
                                    coordinates
                                )  # bboxes and labels have the same order
                                labels.append(
                                    region_label
                                )  # AU may contain single_true AU or AU binary tuple (depends on need_adaptive_AU_relation)
                                AU_couple_bbox_dict[coordinates] = AU_couple

                        del label_matrix
                    if len(bboxes) != config.BOX_NUM[database_name]:
                        print("boxes num != {0}, real box num= {1}".format(
                            config.BOX_NUM[database_name], len(bboxes)))
                        continue
                frame_box_cache[frame] = bboxes
                frame_AU_couple_bbox_dict_cache[frame] = AU_couple_bbox_dict
                frame_labels_cache[frame] = labels
                box_idx_AU_dict = dict(
                )  # box_idx => AU, cannot cache! because couples_tuple each time is different
                already_added_AU_set = set()
                for box_idx, _ in enumerate(bboxes):  # bboxes may from cache
                    AU_couple = list(AU_couple_bbox_dict.values())[
                        box_idx]  # AU_couple_bbox_dict may from cache
                    for AU in couples_tuple:  # couples_tuple not from cache, thus change after each iteration 每轮迭代完的时候变换
                        if AU in AU_couple and AU not in already_added_AU_set:
                            box_idx_AU_dict[box_idx] = (AU, AU_couple)
                            already_added_AU_set.add(AU)
                            break

                cropped_face.flags.writeable = False
                key = hash(cropped_face.data.tobytes())
                if key in extracted_feature_cache:
                    h = extracted_feature_cache[key]
                else:
                    with chainer.no_backprop_mode(), chainer.using_config(
                            'train', False):
                        h = faster_rcnn.extract(
                            cropped_face, bboxes,
                            layer=extract_key)  # shape = R' x 2048
                        extracted_feature_cache[key] = h
                    assert h.shape[0] == len(bboxes)
                h = chainer.cuda.to_cpu(h)
                h = h.reshape(len(bboxes), -1)

                # 这个indent级别都是同一张图片内部
                # print("box number, all_mask:", len(bboxes),len(all_couple_mask_dict))
                assert len(box_idx_AU_dict) == config.BOX_NUM[database_name]
                for box_idx, (AU,
                              AU_couple) in sorted(box_idx_AU_dict.items(),
                                                   key=lambda e: int(e[0])):
                    label = np.zeros(
                        shape=label_bin_len, dtype=np.int32
                    )  # bin length became box number > AU_couple number
                    AU_squeeze_idx = config.AU_SQUEEZE.inv[AU]
                    label[couples_tuple.index(AU)] = labels[box_idx][
                        AU_squeeze_idx]  # labels缓存起来可能出错 # labels[box_idx] = 0,0,1,1,...,0  but we want only look at specific idx
                    label = tuple(label)
                    label_arr = np.char.mod("%d", label)
                    label = "({})".format(",".join(label_arr))
                    h_flat = h[box_idx]
                    node_id = "{0}_{1}".format(frame, box_idx)
                    node_list.append(
                        "{0} {1} feature_idx:{2} AU_couple:{3} AU:{4}".format(
                            node_id, label, len(h_info_array), AU_couple, AU))
                    h_info_array.append(h_flat)
                    box_geometry_array.append(bboxes[box_idx])

                # 同一张画面两两组合,看有没连接线,注意AU=0,就是未出现的AU动作的区域也参与连接
                for box_idx_a, box_idx_b in map(
                        sorted, itertools.combinations(range(len(bboxes)), 2)):
                    node_id_a = "{0}_{1}".format(frame, box_idx_a)
                    node_id_b = "{0}_{1}".format(frame, box_idx_b)
                    AU_couple_a = AU_couple_bbox_dict[bboxes[
                        box_idx_a]]  # AU couple represent region( maybe symmetry in face)
                    AU_couple_b = AU_couple_bbox_dict[bboxes[box_idx_b]]
                    if AU_couple_a == AU_couple_b or has_edge(
                            AU_couple_a, AU_couple_b, database_name):
                        spatio_edges.append("#edge {0} {1} spatio".format(
                            node_id_a, node_id_b))

            box_id_temporal_dict = defaultdict(
                list)  # key = roi/bbox id, value = node_id list cross temporal
            for node_info in node_list:
                node_id = node_info[0:node_info.index(" ")]
                box_id = node_id[node_id.index("_") + 1:]
                box_id_temporal_dict[box_id].append(node_id)

            for node_id_list in box_id_temporal_dict.values():
                for idx, node_id in enumerate(node_id_list):
                    if idx + 1 < len(node_id_list):
                        node_id_next = node_id_list[idx + 1]
                        temporal_edges.append("#edge {0} {1} temporal".format(
                            node_id, node_id_next))
            train_AU_out_path = "{0}/train/{1}/{2}.txt".format(
                output_dir, "_".join(map(str, couples_tuple)),
                video_info[0]["video_id"])
            test_AU_out_path = "{0}/test/{1}/{2}.txt".format(
                output_dir, "_".join(map(str, couples_tuple)),
                video_info[0]["video_id"])
            if subject_id in train_subject:
                output_path = train_AU_out_path
                npz_path = output_dir + os.sep + "train" + os.sep + os.path.basename(
                    output_path)[:os.path.basename(output_path).
                                 rindex(".")] + ".npz"
            elif subject_id in test_subject:
                output_path = test_AU_out_path
                npz_path = output_dir + os.sep + "test" + os.sep + os.path.basename(
                    output_path)[:os.path.basename(output_path).
                                 rindex(".")] + ".npz"
            os.makedirs(os.path.dirname(output_path), exist_ok=True)

            if not os.path.exists(npz_path):
                np.savez(npz_path,
                         appearance_features=h_info_array,
                         geometry_features=np.array(box_geometry_array,
                                                    dtype=np.float32))
            with open(output_path, "w") as file_obj:
                for line in node_list:
                    file_obj.write("{}\n".format(line))
                for line in spatio_edges:
                    file_obj.write("{}\n".format(line))
                for line in temporal_edges:
                    file_obj.write("{}\n".format(line))
                file_obj.flush()
                node_list.clear()
                spatio_edges.clear()
                temporal_edges.clear()
                h_info_array.clear()
Beispiel #27
0
def main():
    print("chainer cudnn enabled: {}".format(chainer.cuda.cudnn_enabled))
    parser = argparse.ArgumentParser(
        description='Action Unit R-CNN training example:')
    parser.add_argument('--pid', '-pp', default='/tmp/AU_R_CNN/')
    parser.add_argument('--gpu',
                        '-g',
                        default="0",
                        help='GPU ID, multiple GPU split by comma, \ '
                        'Note that BPTT updater do not support multi-GPU')
    parser.add_argument('--lr', '-l', type=float, default=0.001)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--database',
                        default='BP4D',
                        help='Output directory: BP4D/DISFA/BP4D_DISFA')
    parser.add_argument('--readtype', default='rgb', help='rgb/flow')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    parser.add_argument('--epoch', '-e', type=int, default=20)
    parser.add_argument('--batch_size', '-bs', type=int, default=20)
    parser.add_argument('--snapshot', '-snap', type=int, default=1000)
    parser.add_argument('--mean',
                        default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy",
                        help='image mean .npy file')
    parser.add_argument('--feature_model',
                        default="resnet101",
                        help="vgg or resnet101 for train")
    parser.add_argument('--optimizer',
                        default='RMSprop',
                        help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta')
    parser.add_argument('--pretrained_model',
                        default='resnet101',
                        help='imagenet/vggface/resnet101/*.npz')
    parser.add_argument(
        '--use_memcached',
        action='store_true',
        help='whether use memcached to boost speed of fetch crop&mask')  #
    parser.add_argument('--memcached_host', default='127.0.0.1')
    parser.add_argument("--fold", '-fd', type=int, default=3)
    parser.add_argument("--split_idx", '-sp', type=int, default=1)
    parser.add_argument("--proc_num", "-proc", type=int, default=1)
    parser.add_argument(
        "--is_pretrained",
        action="store_true",
        help="whether is to pretrain BP4D later will for DISFA dataset or not")
    parser.add_argument(
        "--pretrained_target",
        '-pt',
        default="",
        help="whether pretrain label set will use DISFA or not")
    parser.add_argument('--eval_mode',
                        action='store_true',
                        help='Use test datasets for evaluation metric')
    parser.add_argument('--test_model',
                        default="",
                        help='test model for evaluation')
    parser.add_argument(
        '--occlude',
        default='',
        help=
        'whether to use occlude face of upper/left/right/lower/none to test')
    parser.add_argument("--img_resolution", type=int, default=512)
    args = parser.parse_args()
    config.IMG_SIZE = (args.img_resolution, args.img_resolution)
    if not os.path.exists(args.pid):
        os.makedirs(args.pid)
    pid = str(os.getpid())
    pid_file_path = args.pid + os.path.sep + "{0}_{1}_fold_{2}.pid".format(
        args.database, args.fold, args.split_idx)
    with open(pid_file_path, "w") as file_obj:
        file_obj.write(pid)
        file_obj.flush()

    print('GPU: {}'.format(args.gpu))
    if args.is_pretrained:
        adaptive_AU_database(args.pretrained_target)
    else:
        adaptive_AU_database(args.database)
    np.random.seed(args.seed)
    # 需要先构造一个list的txt文件:id_trainval_0.txt, 每一行是subject + "/" + emotion_seq + "/" frame
    mc_manager = None
    if args.use_memcached:
        from collections_toolkit.memcached_manager import PyLibmcManager
        mc_manager = PyLibmcManager(args.memcached_host)
        if mc_manager is None:
            raise IOError("no memcached found listen in {}".format(
                args.memcached_host))
    resnet101 = ResNet(len(config.AU_SQUEEZE),
                       pretrained_model=args.pretrained_model)
    model = TrainChain(resnet101)

    if args.eval_mode:
        with chainer.no_backprop_mode(), chainer.using_config("train", False):
            if args.occlude:
                test_data = ImageDataset(
                    database=args.database,
                    fold=args.fold,
                    split_name='test',
                    split_index=args.split_idx,
                    mc_manager=mc_manager,
                    train_all_data=False,
                    pretrained_target=args.pretrained_target,
                    img_resolution=args.img_resolution)
                test_data = TransformDataset(
                    test_data, Transform(mean_rgb_path=args.mean,
                                         mirror=False))
                assert args.occlude in ["upper", "lower", "left", "right"]
                test_data = TransformDataset(test_data,
                                             OccludeTransform(args.occlude))

                if args.proc_num == 1:
                    test_iter = SerialIterator(test_data,
                                               1,
                                               repeat=False,
                                               shuffle=True)
                else:
                    test_iter = MultiprocessIterator(test_data,
                                                     batch_size=1,
                                                     n_processes=args.proc_num,
                                                     repeat=False,
                                                     shuffle=True,
                                                     n_prefetch=10,
                                                     shared_mem=10000000)
                single_model_file_name = args.test_model
                chainer.serializers.load_npz(single_model_file_name, resnet101)
                gpu = int(args.gpu)
                chainer.cuda.get_device_from_id(gpu).use()
                resnet101.to_gpu(gpu)
                evaluator = AUEvaluator(test_iter,
                                        resnet101,
                                        lambda batch, device: concat_examples(
                                            batch, device, padding=0),
                                        args.database,
                                        "/home/machen/face_expr",
                                        device=gpu,
                                        npz_out_path=args.out + os.path.sep +
                                        "npz_occlude_{0}_split_{1}.npz".format(
                                            args.occlude, args.split_idx))
                observation = evaluator.evaluate()
                with open(
                        args.out + os.path.sep +
                        "evaluation_occlude_{0}_fold_{1}_result_test_mode.json"
                        .format(args.occlude, args.split_idx),
                        "w") as file_obj:
                    file_obj.write(
                        json.dumps(observation,
                                   indent=4,
                                   separators=(',', ': ')))
                    file_obj.flush()
            else:
                test_data = ImageDataset(
                    database=args.database,
                    fold=args.fold,
                    split_name='test',
                    split_index=args.split_idx,
                    mc_manager=mc_manager,
                    train_all_data=False,
                    pretrained_target=args.pretrained_target,
                    img_resolution=args.img_resolution)
                test_data = TransformDataset(
                    test_data, Transform(mean_rgb_path=args.mean,
                                         mirror=False))
                if args.proc_num == 1:
                    test_iter = SerialIterator(test_data,
                                               1,
                                               repeat=False,
                                               shuffle=False)
                else:
                    test_iter = MultiprocessIterator(test_data,
                                                     batch_size=1,
                                                     n_processes=args.proc_num,
                                                     repeat=False,
                                                     shuffle=False,
                                                     n_prefetch=10,
                                                     shared_mem=10000000)
                single_model_file_name = args.test_model
                chainer.serializers.load_npz(single_model_file_name, resnet101)

                gpu = int(args.gpu) if "," not in args.gpu else int(
                    args.gpu[:args.gpu.index(",")])
                chainer.cuda.get_device_from_id(gpu).use()
                resnet101.to_gpu(gpu)
                evaluator = AUEvaluator(
                    test_iter,
                    resnet101,
                    lambda batch, device: concat_examples(
                        batch, device, padding=0),
                    args.database,
                    "/home/machen/face_expr",
                    device=gpu,
                    npz_out_path=args.out + os.path.sep +
                    "npz_split_{}.npz".format(args.split_idx))
                observation = evaluator.evaluate()
                with open(
                        args.out + os.path.sep +
                        "evaluation_split_{}_result_train_mode.json".format(
                            args.split_idx), "w") as file_obj:
                    file_obj.write(
                        json.dumps(observation,
                                   indent=4,
                                   separators=(',', ': ')))
                    file_obj.flush()
        return

    train_data = ImageDataset(database=args.database,
                              fold=args.fold,
                              split_name='trainval',
                              split_index=args.split_idx,
                              mc_manager=mc_manager,
                              train_all_data=args.is_pretrained,
                              read_type=args.readtype,
                              pretrained_target=args.pretrained_target,
                              img_resolution=args.img_resolution)
    train_data = TransformDataset(train_data, Transform(args.mean,
                                                        mirror=True))
    if args.proc_num == 1:
        train_iter = SerialIterator(train_data, args.batch_size, True, True)
    else:
        train_iter = MultiprocessIterator(train_data,
                                          batch_size=args.batch_size,
                                          n_processes=args.proc_num,
                                          repeat=True,
                                          shuffle=True,
                                          n_prefetch=10,
                                          shared_mem=31457280)

    if "," in args.gpu:
        for gpu in args.gpu.split(","):
            chainer.cuda.get_device_from_id(int(gpu)).use()
    else:
        chainer.cuda.get_device_from_id(int(args.gpu)).use()

    optimizer = None
    if args.optimizer == 'AdaGrad':
        optimizer = chainer.optimizers.AdaGrad(
            lr=args.lr
        )  # 原本为MomentumSGD(lr=args.lr, momentum=0.9) 由于loss变为nan问题,改为AdaGrad
    elif args.optimizer == 'RMSprop':
        optimizer = chainer.optimizers.RMSprop(lr=args.lr)
    elif args.optimizer == 'Adam':
        print("using Adam")
        optimizer = chainer.optimizers.Adam(alpha=args.lr)
    elif args.optimizer == 'SGD':
        optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    elif args.optimizer == "AdaDelta":
        print("using AdaDelta")
        optimizer = chainer.optimizers.AdaDelta()

    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    optimizer_name = args.optimizer

    if not os.path.exists(args.out):
        os.makedirs(args.out)
    pretrained_optimizer_file_name = '{0}_{1}_fold_{2}_{3}_{4}_optimizer.npz'.format(
        args.database, args.fold, args.split_idx, args.feature_model,
        optimizer_name)
    pretrained_optimizer_file_name = args.out + os.path.sep + pretrained_optimizer_file_name

    single_model_file_name = args.out + os.path.sep + '{0}_{1}_fold_{2}_{3}_model.npz'.format(
        args.database, args.fold, args.split_idx, args.feature_model)

    if os.path.exists(pretrained_optimizer_file_name):
        print("loading optimizer snatshot:{}".format(
            pretrained_optimizer_file_name))
        chainer.serializers.load_npz(pretrained_optimizer_file_name, optimizer)

    if os.path.exists(single_model_file_name):
        print("loading pretrained snapshot:{}".format(single_model_file_name))
        chainer.serializers.load_npz(single_model_file_name, model.backbone)

    print(" GPU({0}) updater".format(args.gpu))
    updater = chainer.training.StandardUpdater(
        train_iter,
        optimizer,
        device=int(args.gpu),
        converter=lambda batch, device: concat_examples(
            batch, device, padding=0))

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(chainer.training.extensions.snapshot_object(
        optimizer, filename=os.path.basename(pretrained_optimizer_file_name)),
                   trigger=(args.snapshot, 'iteration'))

    trainer.extend(chainer.training.extensions.snapshot_object(
        model.backbone, filename=os.path.basename(single_model_file_name)),
                   trigger=(args.snapshot, 'iteration'))

    log_interval = 100, 'iteration'
    print_interval = 100, 'iteration'
    plot_interval = 100, 'iteration'
    if args.optimizer != "Adam" and args.optimizer != "AdaDelta":
        trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1),
                       trigger=(10, 'epoch'))
    elif args.optimizer == "Adam":
        # use Adam
        trainer.extend(chainer.training.extensions.ExponentialShift(
            "alpha", 0.5, optimizer=optimizer),
                       trigger=(10, 'epoch'))
    if args.optimizer != "AdaDelta":
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
    trainer.extend(
        chainer.training.extensions.LogReport(
            trigger=log_interval,
            log_name="{0}_fold_{1}.log".format(args.fold, args.split_idx)))
    trainer.extend(chainer.training.extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/accuracy',
    ]),
                   trigger=print_interval)
    trainer.extend(
        chainer.training.extensions.ProgressBar(update_interval=100))

    if chainer.training.extensions.PlotReport.available():
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/loss', "validation/main/loss"],
            file_name='loss_{0}_fold_{1}.png'.format(args.fold,
                                                     args.split_idx),
            trigger=plot_interval),
                       trigger=plot_interval)
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/accuracy'],
            file_name='accuracy_{0}_fold_{1}.png'.format(
                args.fold, args.split_idx),
            trigger=plot_interval),
                       trigger=plot_interval)

    trainer.run()
Beispiel #28
0
def main():
    parser = argparse.ArgumentParser(
        description='Space Time Action Unit R-CNN training example:')
    parser.add_argument('--pid', '-pp', default='/tmp/SpaceTime_AU_R_CNN/')
    parser.add_argument('--gpu',
                        '-g',
                        nargs='+',
                        type=int,
                        help='GPU ID, multiple GPU split by space')
    parser.add_argument('--lr', '-l', type=float, default=0.001)
    parser.add_argument('--out',
                        '-o',
                        default='end_to_end_result',
                        help='Output directory')
    parser.add_argument('--trainval', default='train', help='train/test')
    parser.add_argument('--database',
                        default='BP4D',
                        help='Output directory: BP4D/DISFA/BP4D_DISFA')
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    parser.add_argument('--epoch', '-e', type=int, default=20)
    parser.add_argument('--batch_size', '-bs', type=int, default=1)
    parser.add_argument('--snapshot', '-snap', type=int, default=1000)
    parser.add_argument('--need_validate',
                        action='store_true',
                        help='do or not validate during training')
    parser.add_argument('--mean',
                        default=config.ROOT_PATH +
                        "BP4D/idx/mean_no_enhance.npy",
                        help='image mean .npy file')
    parser.add_argument('--backbone',
                        default="mobilenet_v1",
                        help="vgg/resnet101/mobilenet_v1 for train")
    parser.add_argument('--optimizer',
                        default='RMSprop',
                        help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta')
    parser.add_argument('--pretrained_model',
                        default='mobilenet_v1',
                        help='imagenet/mobilenet_v1/resnet101/*.npz')
    parser.add_argument('--pretrained_model_args',
                        nargs='+',
                        type=float,
                        help='you can pass in "1.0 224" or "0.75 224"')
    parser.add_argument('--spatial_edge_mode',
                        type=SpatialEdgeMode,
                        choices=list(SpatialEdgeMode),
                        help='1:all_edge, 2:configure_edge, 3:no_edge')
    parser.add_argument(
        '--temporal_edge_mode',
        type=TemporalEdgeMode,
        choices=list(TemporalEdgeMode),
        help='1:rnn, 2:attention_block, 3.point-wise feed forward(no temporal)'
    )
    parser.add_argument("--bi_lstm",
                        action="store_true",
                        help="whether to use bi-lstm as Edge/Node RNN")
    parser.add_argument(
        '--use_memcached',
        action='store_true',
        help='whether use memcached to boost speed of fetch crop&mask')  #
    parser.add_argument('--memcached_host', default='127.0.0.1')
    parser.add_argument("--fold", '-fd', type=int, default=3)
    parser.add_argument("--layers", type=int, default=1)
    parser.add_argument("--split_idx", '-sp', type=int, default=1)
    parser.add_argument("--use_paper_num_label",
                        action="store_true",
                        help="only to use paper reported number of labels"
                        " to train")
    parser.add_argument("--previous_frame", type=int, default=50)
    parser.add_argument("--sample_frame", '-sample', type=int, default=25)
    parser.add_argument(
        "--snap_individual",
        action="store_true",
        help="whether to snapshot each individual epoch/iteration")
    parser.add_argument("--proc_num", "-proc", type=int, default=1)
    parser.add_argument('--eval_mode',
                        action='store_true',
                        help='Use test datasets for evaluation metric')
    args = parser.parse_args()
    os.makedirs(args.pid, exist_ok=True)
    os.makedirs(args.out, exist_ok=True)
    pid = str(os.getpid())
    pid_file_path = args.pid + os.sep + "{0}_{1}_fold_{2}.pid".format(
        args.database, args.fold, args.split_idx)
    with open(pid_file_path, "w") as file_obj:
        file_obj.write(pid)
        file_obj.flush()

    print('GPU: {}'.format(",".join(list(map(str, args.gpu)))))

    adaptive_AU_database(args.database)
    mc_manager = None
    if args.use_memcached:
        from collections_toolkit.memcached_manager import PyLibmcManager
        mc_manager = PyLibmcManager(args.memcached_host)
        if mc_manager is None:
            raise IOError("no memcached found listen in {}".format(
                args.memcached_host))

    train_data = AUDataset(
        database=args.database,
        fold=args.fold,
        split_name=args.trainval,
        split_index=args.split_idx,
        mc_manager=mc_manager,
        train_all_data=False,
    )
    result_data = [
        img_path
        for img_path, AU_set, current_database_name in train_data.result_data
        if args.database + "|" + img_path not in mc_manager
    ]
    sub_list = split_list(result_data, len(result_data) // 100)

    for img_path_lst in sub_list:
        with Pool(processes=50) as pool:
            input_list = [(img_path, None, None) for img_path in img_path_lst]
            result =\
                pool.starmap(parallel_landmark_and_conn_component, input_list)
            pool.close()
            pool.join()
            for img_path, AU_box_dict, landmark_dict, box_is_whole_image in result:
                key_prefix = args.database + "|"
                key = key_prefix + img_path
                orig_img = cv2.imread(img_path, cv2.IMREAD_COLOR)
                new_face, rect = FaceMaskCropper.dlib_face_crop(
                    orig_img, landmark_dict)

                print("write {}".format(key))
                if mc_manager is not None and key not in mc_manager:
                    save_dict = {
                        "landmark_dict": landmark_dict,
                        "AU_box_dict": AU_box_dict,
                        "crop_rect": rect
                    }
                    mc_manager.set(key, save_dict)
Beispiel #29
0
def main():
    parser = argparse.ArgumentParser(description='I3D R-CNN train:')
    parser.add_argument('--pid', '-pp', default='/tmp/SpaceTime_AU_R_CNN/')
    parser.add_argument('--gpu',
                        '-g',
                        nargs='+',
                        type=int,
                        help='GPU ID, multiple GPU split by space')
    parser.add_argument('--lr', '-l', type=float, default=0.001)
    parser.add_argument('--out',
                        '-o',
                        default='i3d_result',
                        help='Output directory')
    parser.add_argument('--database',
                        default='BP4D',
                        help='Output directory: BP4D/DISFA/BP4D_DISFA')
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    parser.add_argument('--epoch', '-e', type=int, default=20)
    parser.add_argument('--batch_size', '-bs', type=int, default=1)
    parser.add_argument('--snapshot', '-snap', type=int, default=1000)
    parser.add_argument('--mean',
                        default=config.ROOT_PATH +
                        "BP4D/idx/mean_no_enhance.npy",
                        help='image mean .npy file')
    parser.add_argument('--backbone',
                        default="mobilenet_v1",
                        help="vgg/resnet101/mobilenet_v1 for train")
    parser.add_argument('--optimizer',
                        default='SGD',
                        help='optimizer: RMSprop/AdaGrad/Adam/SGD/AdaDelta')
    parser.add_argument('--pretrained_rgb',
                        help='imagenet/mobilenet_v1/resnet101/*.npz')
    parser.add_argument(
        '--pretrained_flow',
        help=
        "path of optical flow pretrained model (may be single stream OF model)"
    )
    parser.add_argument('--two_stream_mode',
                        type=TwoStreamMode,
                        choices=list(TwoStreamMode),
                        help='spatial/ temporal/ spatial_temporal')
    parser.add_argument(
        '--use_memcached',
        action='store_true',
        help='whether use memcached to boost speed of fetch crop&mask')  #
    parser.add_argument('--memcached_host', default='127.0.0.1')
    parser.add_argument("--fold", '-fd', type=int, default=3)
    parser.add_argument("--split_idx", '-sp', type=int, default=1)
    parser.add_argument("--use_paper_num_label",
                        action="store_true",
                        help="only to use paper reported number of labels"
                        " to train")
    parser.add_argument(
        "--roi_align",
        action="store_true",
        help="whether to use roi align or roi pooling layer in CNN")
    parser.add_argument("--T",
                        '-T',
                        type=int,
                        default=10,
                        help="sequence length of one video clip")
    parser.add_argument("--out_channel",
                        type=int,
                        default=2048,
                        help="length of extract ROI feature")
    parser.add_argument("--proc_num", "-proc", type=int, default=1)
    args = parser.parse_args()
    os.makedirs(args.pid, exist_ok=True)
    os.makedirs(args.out, exist_ok=True)
    pid = str(os.getpid())
    pid_file_path = args.pid + os.sep + "{0}_{1}_fold_{2}.pid".format(
        args.database, args.fold, args.split_idx)
    with open(pid_file_path, "w") as file_obj:
        file_obj.write(pid)
        file_obj.flush()

    print('GPU: {}'.format(",".join(list(map(str, args.gpu)))))

    adaptive_AU_database(args.database)
    mc_manager = None
    if args.use_memcached:
        from collections_toolkit.memcached_manager import PyLibmcManager
        mc_manager = PyLibmcManager(args.memcached_host)
        if mc_manager is None:
            raise IOError("no memcached found listen in {}".format(
                args.memcached_host))

    paper_report_label, class_num = squeeze_label_num_report(
        args.database, args.use_paper_num_label)
    paper_report_label_idx = list(paper_report_label.keys())
    au_rcnn_train_chain_list = []
    if args.backbone == 'i3d':
        if args.two_stream_mode == TwoStreamMode.rgb:
            i3d_feature_backbone = I3DFeatureExtractor(modality='rgb')
            i3d_roi_head = I3DRoIHead(out_channel=args.out_channel,
                                      roi_size=7,
                                      spatial_scale=1 / 16.,
                                      dropout_prob=0.)
            chainer.serializers.load_npz(args.pretrained_flow,
                                         i3d_feature_backbone)
            chainer.serializers.load_npz(args.pretrained_flow, i3d_roi_head)
            au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor(
                i3d_feature_backbone, i3d_roi_head)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb)
        elif args.two_stream_mode == TwoStreamMode.optical_flow:
            i3d_feature_backbone_flow = I3DFeatureExtractor(modality='flow')
            i3d_roi_head = I3DRoIHead(out_channel=args.out_channel,
                                      roi_size=7,
                                      spatial_scale=1 / 16.,
                                      dropout_prob=0.)
            au_rcnn_train_chain_flow = AU_RCNN_ROI_Extractor(
                i3d_feature_backbone_flow, i3d_roi_head)
            chainer.serializers.load_npz(args.pretrained_flow,
                                         i3d_feature_backbone_flow)
            chainer.serializers.load_npz(args.pretrained_flow, i3d_roi_head)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain_flow)
        elif args.two_stream_mode == TwoStreamMode.rgb_flow:
            i3d_feature_backbone = I3DFeatureExtractor(modality='rgb')
            i3d_roi_head_rgb = I3DRoIHead(out_channel=args.out_channel,
                                          roi_size=7,
                                          spatial_scale=1 / 16.,
                                          dropout_prob=0.)
            chainer.serializers.load_npz(args.pretrained_rgb,
                                         i3d_feature_backbone)
            chainer.serializers.load_npz(args.pretrained_rgb, i3d_roi_head_rgb)
            au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor(
                i3d_feature_backbone, i3d_roi_head_rgb)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb)

            i3d_feature_backbone_flow = I3DFeatureExtractor(modality='flow')
            i3d_roi_head_flow = I3DRoIHead(out_channel=args.out_channel,
                                           roi_size=7,
                                           spatial_scale=1 / 16.,
                                           dropout_prob=0.)
            au_rcnn_train_chain_flow = AU_RCNN_ROI_Extractor(
                i3d_feature_backbone_flow, i3d_roi_head_flow)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain_flow)

            chainer.serializers.load_npz(args.pretrained_flow,
                                         i3d_feature_backbone_flow)
            chainer.serializers.load_npz(args.pretrained_flow,
                                         i3d_roi_head_flow)

    au_rcnn_train_loss = AU_RCNN_TrainChainLoss()
    loss_head_module = au_rcnn_train_loss
    model = Wrapper(au_rcnn_train_chain_list, loss_head_module, args.database,
                    args.T, args.two_stream_mode, args.gpu)

    batch_size = args.batch_size
    img_dataset = AUDataset(database=args.database,
                            fold=args.fold,
                            split_name='trainval',
                            split_index=args.split_idx,
                            mc_manager=mc_manager,
                            train_all_data=False)

    train_video_data = AU_video_dataset(
        au_image_dataset=img_dataset,
        sample_frame=args.T,
        train_mode=True,
        paper_report_label_idx=paper_report_label_idx)

    Transform = Transform3D
    substract_mean = SubStractMean(args.mean)
    train_video_data = TransformDataset(
        train_video_data, Transform(substract_mean, mirror=False))

    if args.proc_num == 1:
        train_iter = SerialIterator(train_video_data,
                                    batch_size * args.sample_frame,
                                    repeat=True,
                                    shuffle=False)
    else:
        train_iter = MultiprocessIterator(train_video_data,
                                          batch_size=batch_size *
                                          args.sample_frame,
                                          n_processes=args.proc_num,
                                          repeat=True,
                                          shuffle=False,
                                          n_prefetch=10,
                                          shared_mem=10000000)

    for gpu in args.gpu:
        chainer.cuda.get_device_from_id(gpu).use()

    optimizer = None
    if args.optimizer == 'AdaGrad':
        optimizer = chainer.optimizers.AdaGrad(
            lr=args.lr
        )  # 原本为MomentumSGD(lr=args.lr, momentum=0.9) 由于loss变为nan问题,改为AdaGrad
    elif args.optimizer == 'RMSprop':
        optimizer = chainer.optimizers.RMSprop(lr=args.lr)
    elif args.optimizer == 'Adam':
        optimizer = chainer.optimizers.Adam(alpha=args.lr)
    elif args.optimizer == 'SGD':
        optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    elif args.optimizer == "AdaDelta":
        optimizer = chainer.optimizers.AdaDelta()

    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    optimizer_name = args.optimizer

    key_str = "{0}_fold_{1}".format(args.fold, args.split_idx)
    file_list = []
    file_list.extend(os.listdir(args.out))
    # BP4D_3_fold_1_resnet101@rnn@no_temporal@use_paper_num_label@roi_align@label_dep_layer@conv_lstm@sampleframe#13_model.npz
    use_paper_key_str = "use_paper_num_label" if args.use_paper_num_label else "all_{}_label".format(
        args.database)
    roi_align_key_str = "roi_align" if args.roi_align else "roi_pooling"

    single_model_file_name = args.out + os.sep + \
                             '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}_model.npz'.format(args.database,
                                                                                args.fold, args.split_idx,
                                                                                args.backbone, args.two_stream_mode,
                                                                                use_paper_key_str, roi_align_key_str,
                                                                                 args.T)
    print(single_model_file_name)
    pretrained_optimizer_file_name = args.out + os.sep +\
                             '{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}_optimizer.npz'.format(args.database,
                                                                                args.fold, args.split_idx,
                                                                                args.backbone, args.two_stream_mode,
                                                                                use_paper_key_str, roi_align_key_str,
                                                                                 args.T)
    print(pretrained_optimizer_file_name)

    if os.path.exists(pretrained_optimizer_file_name):
        print("loading optimizer snatshot:{}".format(
            pretrained_optimizer_file_name))
        chainer.serializers.load_npz(pretrained_optimizer_file_name, optimizer)

    if os.path.exists(single_model_file_name):
        print("loading pretrained snapshot:{}".format(single_model_file_name))
        chainer.serializers.load_npz(single_model_file_name, model)

    updater = chainer.training.StandardUpdater(
        train_iter,
        optimizer,
        device=args.gpu[0],
        converter=lambda batch, device: concat_examples(
            batch, device, padding=0))

    @training.make_extension(trigger=(1, "epoch"))
    def reset_order(trainer):
        print("reset dataset order after one epoch")
        trainer.updater._iterators[
            "main"].dataset._dataset.reset_for_train_mode()

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(reset_order)
    trainer.extend(chainer.training.extensions.snapshot_object(
        optimizer, filename=os.path.basename(pretrained_optimizer_file_name)),
                   trigger=(args.snapshot, 'iteration'))

    log_interval = 100, 'iteration'
    print_interval = 10, 'iteration'
    plot_interval = 10, 'iteration'
    if args.optimizer != "Adam" and args.optimizer != "AdaDelta":
        trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1),
                       trigger=(10, 'epoch'))
    elif args.optimizer == "Adam":
        trainer.extend(chainer.training.extensions.ExponentialShift(
            "alpha", 0.1, optimizer=optimizer),
                       trigger=(10, 'epoch'))
    if args.optimizer != "AdaDelta":
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
    trainer.extend(
        chainer.training.extensions.LogReport(
            trigger=log_interval,
            log_name="log_{0}_{1}_fold_{2}_{3}@{4}@{5}@{6}@T#{7}.log".format(
                args.database, args.fold, args.split_idx, args.backbone,
                args.two_stream_mode, use_paper_key_str, roi_align_key_str,
                args.T)))
    # trainer.reporter.add_observer("main_par", model.loss_head_module)
    trainer.extend(chainer.training.extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/accuracy',
    ]),
                   trigger=print_interval)
    trainer.extend(
        chainer.training.extensions.ProgressBar(update_interval=100))

    if chainer.training.extensions.PlotReport.available():
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/loss'],
            file_name='loss_{0}_fold_{1}_{2}@{3}@{4}@{5}.png'.format(
                args.fold, args.split_idx, args.backbone,
                args.spatial_edge_mode, args.temporal_edge_mode,
                args.conv_rnn_type),
            trigger=plot_interval),
                       trigger=plot_interval)
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/accuracy'],
            file_name='accuracy_{0}_fold_{1}_{2}@{3}@{4}@{5}.png'.format(
                args.fold, args.split_idx, args.backbone,
                args.spatial_edge_mode, args.temporal_edge_mode,
                args.conv_rnn_type),
            trigger=plot_interval),
                       trigger=plot_interval)

    trainer.run()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)'
                        )  # open_crf layer only works for CPU mode
    parser.add_argument('--step_size',
                        '-ss',
                        type=int,
                        default=3000,
                        help='step_size for lr exponential')
    parser.add_argument('--gradclip',
                        '-c',
                        type=float,
                        default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--snapshot',
                        '-snap',
                        type=int,
                        default=20,
                        help='snapshot epochs for save checkpoint')
    parser.add_argument('--train',
                        '-t',
                        default="train",
                        help='Train directory path contains train txt file')
    parser.add_argument('--database',
                        default="BP4D",
                        help='database to train for')
    parser.add_argument('--lr', '-l', type=float, default=0.01)
    parser.add_argument('--hidden_size',
                        type=int,
                        default=1024,
                        help="the hidden dimension of the middle layers")
    parser.add_argument("--num_attrib",
                        type=int,
                        default=2048,
                        help="number of dimension of each node feature")
    parser.add_argument("--proc_num",
                        '-proc',
                        type=int,
                        default=1,
                        help="process number of dataset reader")
    parser.add_argument("--need_cache_graph",
                        "-ng",
                        action="store_true",
                        help="whether to cache factor graph to LRU cache")
    parser.add_argument("--resume",
                        action="store_true",
                        help="whether to load npz pretrained file")
    parser.add_argument('--atten_heads',
                        type=int,
                        default=4,
                        help="atten heads for parallel learning")
    parser.add_argument('--layer_num',
                        type=int,
                        default=2,
                        help='layer number of GAT')

    args = parser.parse_args()
    print_interval = 1, 'iteration'
    val_interval = 5, 'iteration'

    adaptive_AU_database(args.database)

    box_num = config.BOX_NUM[args.database]
    # for the StructuralRNN constuctor need first frame factor graph_backup
    dataset = GlobalDataSet(num_attrib=args.num_attrib, train_edge="all")
    file_name = list(
        filter(lambda e: e.endswith(".txt"), os.listdir(args.train)))[0]
    dataset.load_data(
        args.train + os.sep + file_name, False
    )  # we load first sample for construct S-RNN, it must passed to constructor argument
    model = GraphAttentionModel(input_dim=dataset.num_attrib_type,
                                hidden_dim=args.hidden_size,
                                class_number=dataset.label_bin_len,
                                atten_heads=args.atten_heads,
                                layers_num=args.layer_num,
                                frame_node_num=box_num)
    # note that the following code attrib_size will be used by open_crf for parameter number, thus we cannot pass dataset.num_attrib_type!
    train_data = GraphDataset(args.train,
                              attrib_size=2048,
                              global_dataset=dataset,
                              need_s_rnn=False,
                              need_cache_factor_graph=args.need_cache_graph,
                              need_adjacency_matrix=True,
                              npy_in_parent_dir=False,
                              need_factor_graph=False)  # train 传入文件夹
    train_iter = chainer.iterators.SerialIterator(train_data,
                                                  1,
                                                  shuffle=True,
                                                  repeat=True)
    if args.gpu >= 0:
        print("using gpu : {}".format(args.gpu))
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu(args.gpu)

    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))
    updater = BPTTUpdater(train_iter, optimizer, device=args.gpu)
    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    interval = (1, 'iteration')
    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=print_interval)
    trainer.extend(chainer.training.extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        "main/accuracy",
    ]),
                   trigger=print_interval)

    log_name = "GAT.log"
    trainer.extend(
        chainer.training.extensions.LogReport(trigger=interval,
                                              log_name=log_name))
    # trainer.extend(chainer.training.extensions.ProgressBar(update_interval=1, training_length=(args.epoch, 'epoch')))
    optimizer_snapshot_name = "{0}_GAT_optimizer.npz".format(args.database)
    model_snapshot_name = "{0}_GAT_model.npz".format(args.database)
    trainer.extend(chainer.training.extensions.snapshot_object(
        optimizer, filename=optimizer_snapshot_name),
                   trigger=(args.snapshot, 'epoch'))

    trainer.extend(chainer.training.extensions.snapshot_object(
        model, filename=model_snapshot_name),
                   trigger=(args.snapshot, 'epoch'))
    trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.7),
                   trigger=(5, "epoch"))

    if args.resume and os.path.exists(args.out + os.sep + model_snapshot_name):
        print("loading model_snapshot_name to model")
        chainer.serializers.load_npz(args.out + os.sep + model_snapshot_name,
                                     model)
    if args.resume and os.path.exists(args.out + os.sep +
                                      optimizer_snapshot_name):
        print("loading optimizer_snapshot_name to optimizer")
        chainer.serializers.load_npz(
            args.out + os.sep + optimizer_snapshot_name, optimizer)

    if chainer.training.extensions.PlotReport.available():
        trainer.extend(chainer.training.extensions.PlotReport(
            ['main/loss'], file_name="train_loss.png"),
                       trigger=(100, "iteration"))
        trainer.extend(chainer.training.extensions.PlotReport(
            ['opencrf_val/F1', 'opencrf_val/accuracy'],
            file_name="{}_val_f1.png"),
                       trigger=val_interval)

    trainer.run()