コード例 #1
0
    def wrapper(self, *args, **kw):
        # 请求开始时间
        start_ts = utils.timestamp()

        values = func(self, *args, **kw)

        web.header('Content-type', 'application/json')

        if not isinstance(values, (tuple, list)):
            values = [values]

        # data = { RESPONSE_FIELDS[i]: value for i, value in enumerate(values) if i < 3 }
        data = {
            field: values[i] if i < len(values) else None
            for i, field in enumerate(RESPONSE_FIELDS)
        }
        # 包含服务器时间
        data['time'] = int(utils.timestamp()) - 2
        data['ms'] = int((utils.timestamp() - start_ts) * 1000)
        data = utils.json_dumps(data, utils.JsonEncoder)
        # 是否提供第四个参数
        # 如果提供了第四个参数, 则是一个 jsonp callback 调用
        if len(values) == 4:
            return '%s(%s)' % (values[3], data)

        return data
コード例 #2
0
ファイル: views.py プロジェクト: uimeet/AmengSMS-API
 def _xloc_init(self):
     """
     位置初始化,用于异步缓存访问客户的位置,便于在需要的时候获取
     :return:
     """
     ip.location()
     return 'var T = %s;' % utils.timestamp()
コード例 #3
0
ファイル: train_keras.py プロジェクト: csu-xiao-an/Papers
def train_tco():
    """
    Train Timeception layers based on the given configurations.
    This train scheme is Timeception-only (TCO).
    """

    # get some configs for the training
    n_workers = config.cfg.TRAIN.N_WORKERS
    n_epochs = config.cfg.TRAIN.N_EPOCHS
    dataset_name = config.cfg.DATASET_NAME
    model_name = '%s_%s' % (config.cfg.MODEL.NAME, utils.timestamp())

    # data generators
    data_generator_tr = __define_data_generator(is_training=True)
    data_generator_te = __define_data_generator(is_training=False)

    logger.info('--- start time')
    logger.info(datetime.datetime.now())
    logger.info('... [tr]: n_samples, n_batch, batch_size: %d, %d, %d' % (data_generator_tr.n_samples, data_generator_tr.n_batches, config.cfg.TRAIN.BATCH_SIZE))
    logger.info('... [te]: n_samples, n_batch, batch_size: %d, %d, %d' % (data_generator_te.n_samples, data_generator_te.n_batches, config.cfg.TEST.BATCH_SIZE))

    # callback to save the model
    save_callback = keras_utils.SaveCallback(dataset_name, model_name)

    # load model
    model = __define_timeception_model()
    logger.info(model.summary())

    # train the model
    model.fit_generator(epochs=n_epochs, generator=data_generator_tr, validation_data=data_generator_te, use_multiprocessing=True, workers=n_workers, callbacks=[save_callback], verbose=2)

    logger.info('--- finish time')
    logger.info(datetime.datetime.now())
コード例 #4
0
ファイル: task.py プロジェクト: uimeet/AmengSMS-Core
    def __init__(self, **kwargs):
        super(Task, self).__init__(**kwargs)

        self.id = utils.intval(kwargs.get('id'))

        self.type = kwargs['type']
        if isinstance(self.type, (int, long)):
            self.type = enums.Task.Type.find(self.type)

        self.type_id = kwargs.get('type_id', 0)

        self.time_created = kwargs.get('time_created', int(utils.timestamp()))
        self.time_created_text = utils.timestamp2datefmt(self.time_created)
        self.active_time = kwargs.get('active_time', self.time_created)
        self.active_time_text = utils.timestamp2datefmt(self.active_time)
        self.last_time = kwargs.get('last_time', 0)

        self.tail_num = kwargs.get(
            'tail_num', utils.make_tail_num(utils.randint(0, 999999999)))
        # 状态
        self.status = kwargs.get('status', enums.Task.Status.Waiting)
        if isinstance(self.status, (int, long)):
            self.status = enums.Task.Status.find(self.status)
        self.statsu_text = kwargs.get('status_text')

        self.exec_times = utils.intval(kwargs.get('exec_times', 0))

        self.content = kwargs.get('content', {})
        if self.content and isinstance(self.content, (str, unicode)):
            self.content = utils.json_loads(self.content)

        # 任务的处理程序,默认为None
        self._handler = None
        # 是否被执行过
        self._executed = False
コード例 #5
0
ファイル: task.py プロジェクト: uimeet/AmengSMS-Core
 def mark_delay(self, message, seconds=5):
     """
     标记一次延迟激活状态
     :param message: 状态备注
     :param seconds: 延迟的秒数
     :return:
     """
     self.task.status = enums.Task.Status.Waiting
     self.task.status_text = message
     self.message_append('info', message)
     self.necessary = False
     # 下次激活时间
     self.task.active_time = int(utils.timestamp()) + seconds
コード例 #6
0
ファイル: task.py プロジェクト: uimeet/AmengSMS-Core
    def find_actives(tail_nums=None, active_time=None, limit=20):
        """
        获取达到指定激活时间的任务
        :param tail_nums: 尾号列表
        :param active_time:
        :param limit:
        :return:
        """
        # 默认激活时间为当前时刻
        active_time = active_time or int(utils.timestamp())
        # 附加查询条件
        attach_query = ''
        # 附加尾号
        if tail_nums:
            attach_query = ' AND tail_num IN (%s)' % ','.join(
                [str(num) for num in tail_nums])

        # 默认获取状态为等待执行
        rs = db.manager.slave_core.query("""
            SELECT * FROM task WHERE status = 0 AND active_time <= $active_time%s ORDER BY active_time ASC, id ASC LIMIT $limit;
        """ % attach_query,
                                         vars=locals())
        return [Task(**r) for r in rs] if rs else None
コード例 #7
0
def train_model_on_pickled_features():
    """
    Train model.
    """

    annotation_type = 'noun'
    annot_path = Pth(
        'EPIC-Kitchens/annotation/annot_video_level_many_shots.pkl')
    (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type)

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'
    n_nodes = 128
    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment
    batch_size_tr = 20
    batch_size_te = 30
    n_epochs = 500
    epoch_offset = 0
    model_name = 'classifier_%s' % (utils.timestamp())
    model_root_path = Pth('EPIC-Kitchens/models')

    features_path = Pth(
        'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5',
        (n_frames_per_video, ))
    nodes_path = Pth('EPIC-Kitchens/features_centroids/features_random_%d.pkl',
                     (n_nodes, ))
    n_channels, side_dim = utils.get_model_feat_maps_info(
        model_type, feature_type)
    input_shape = (None, n_timesteps, side_dim, side_dim, n_channels)
    nodes = utils.pkl_load(nodes_path)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    model = __load_model_videograph(nodes, n_classes, input_shape)
    t2 = time.time()
    duration = t2 - t1
    print(model.summary(line_length=130, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data: %s' % (features_path))
    t1 = time.time()
    # features are extracting using datasets.Epic_Kitchens.i3d_keras_epic_kitchens()
    # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c'
    # to get a better performance, you need to write code to randomly sample new frames and extract their features every new epoch
    # please use this function to random sampling, instead of uniform sampling: Epic_Kitchens.__random_sample_frames_per_video_for_i3d()
    # then extract their features, as done in: Epic_Kitchens._901_extract_features_i3d()
    # then train on the extracted features. Please do so in every epoch. It's computationally heavy, but you cannot avoid random sampling to get better results.
    # Even better results if you replace I3D with a 2D/3D CNN that's previously fine-tuned on Epic-Kitchens
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()

    duration = t2 - t1
    print('... data loaded: %d' % (duration))

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = utils.calc_num_batches(n_te, batch_size_te)
    print('... [tr]: n, n_batch, batch_size: %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr))
    print('... [te]: n, n_batch, batch_size: %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te))
    print(x_tr.shape)
    print(x_te.shape)
    print(y_tr.shape)
    print(y_te.shape)

    save_callback = keras_utils.ModelSaveCallback(model, model_name,
                                                  epoch_offset,
                                                  model_root_path)
    score_callback = keras_utils.MapScoreCallback(model, None, None, x_te,
                                                  y_te, batch_size_te,
                                                  n_classes)
    model_callbacks = [save_callback, score_callback]
    model.fit(x_tr,
              y_tr,
              epochs=n_epochs,
              batch_size=batch_size_tr,
              validation_split=0.0,
              validation_data=(x_te, y_te),
              shuffle=True,
              callbacks=model_callbacks,
              verbose=2)

    print('--- finish time')
    print(datetime.datetime.now())
コード例 #8
0
def __train_model_on_video_frames_videograph(n_epochs, n_timesteps,
                                             n_centroids, timestamp,
                                             is_resume_training,
                                             start_epoch_num):
    """
    Train model of 3rd gpu, train it on features extracted on first 2 gpus.
    """

    global TRAIN_STATE
    assert (start_epoch_num > 1 and is_resume_training) or (
        start_epoch_num == 0 and not is_resume_training
    ), 'sorry, either provide resume_epoch_num or set the model as not resuming with resume_epoch_num = 0'

    n_frames_per_segment = 8
    n_frames_per_video = n_frames_per_segment * n_timesteps

    # locations
    model_name = 'classifier_from_video_frames_%s' % (timestamp)
    resume_model_json_path = Pth('EPIC-Kitchens/models/%s/%03d.json',
                                 (model_name, start_epoch_num))
    resume_model_weights_path = Pth('EPIC-Kitchens/models/%s/%03d.pkl',
                                    (model_name, start_epoch_num))

    frames_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train')
    features_te_path = Pth(
        'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames_te.h5',
        (n_frames_per_video, ))
    centroids_path = Pth(
        'EPIC-Kitchens/features_centroid/features_random_%d_centroids.pkl',
        (n_centroids, ))
    centroids_path = Pth(
        'EPIC-Kitchens/features_centroid/features_sobol_%d_centroids.pkl',
        (n_centroids, ))
    video_names_splits_path = Pth(
        'EPIC-Kitchens/annotation/video_names_splits.pkl')
    frame_relative_pathes_dict_path = Pth(
        'EPIC-Kitchens/annotation/frame_relative_pathes_dict_tr.pkl')
    annot_path = Pth(
        'EPIC-Kitchens/annotation/annot_video_level_many_shots.pkl')

    is_save_centroids = False
    is_save_model = True
    verbose = False

    n_gpus = 1
    n_classes = ds_epic_kitchens.N_NOUNS_MANY_SHOT

    batch_size_tr = 20
    batch_size_te = 40
    n_threads_te = 16

    n_feat_maps = 1024
    featmap_side_dim = 7
    input_shape = (None, n_timesteps, featmap_side_dim, featmap_side_dim,
                   n_feat_maps)

    # load centroids
    centroids = utils.pkl_load(centroids_path)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()

    # load new or previous model
    if is_resume_training:
        custom_objects = {
            'DepthwiseDilatedConv1DLayer': DepthwiseDilatedConv1DLayer,
            'DepthwiseConv1DLayer': DepthwiseConv1DLayer,
            'DepthwiseDenseLayer': DepthwiseDenseLayer,
            'ConvOverSpaceLayer': ConvOverSpaceLayer,
            'TransposeLayer': TransposeLayer,
            'ReshapeLayer': ReshapeLayer,
            'MeanLayer': MeanLayer,
            'MaxLayer': MaxLayer
        }
        model = keras_utils.load_model(resume_model_json_path,
                                       resume_model_weights_path,
                                       custom_objects=custom_objects,
                                       is_compile=False)
        model, _ = __compile_model_for_finetuning(model, n_gpus)
    else:
        model, _ = __load_model_action_vlad(n_classes,
                                            input_shape,
                                            n_gpus=n_gpus,
                                            is_load_weights=False,
                                            weight_path='')
        model, _ = __load_model_videograph(centroids, n_classes, input_shape)
        # model, _ = __load_model_timeception(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path='')
        # model, _ = __load_model_mlp_classifier_transformer_centroids_with_graph_embedding(centroids, n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path='')

    # dry run to get the model loaded in gpu
    dummy_feature = np.zeros(tuple([batch_size_tr] + list(input_shape[1:])),
                             dtype=np.float32)
    model.predict(dummy_feature)

    t2 = time.time()
    duration = t2 - t1
    print(model.summary(line_length=120, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data')
    t1 = time.time()

    (y_tr, _, _, y_te, _, _) = utils.pkl_load(annot_path)
    (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path)
    frame_relative_pathes_dict = utils.pkl_load(
        frame_relative_pathes_dict_path)
    x_te = utils.h5_load(features_te_path)
    print('... centroids: %s' % (centroids_path))

    n_tr = len(video_names_tr)
    n_te = len(video_names_te)

    # set list of video names and ground truth
    TRAIN_STATE.video_names_tr = video_names_tr
    TRAIN_STATE.class_nums_tr = y_tr

    # sample new frames
    sampled_video_frames_dict = ds_epic_kitchens.__random_sample_frames_per_video_for_i3d(
        TRAIN_STATE.video_names_tr, frames_root_path,
        frame_relative_pathes_dict, n_frames_per_segment, n_frames_per_video)
    TRAIN_STATE.video_frames_dict_tr = sampled_video_frames_dict

    del video_names_tr
    del video_names_te
    del y_tr

    n_batch_tr = keras_utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = keras_utils.calc_num_batches(n_te, batch_size_te)
    t2 = time.time()
    duration = t2 - t1
    print('... data loaded: %d' % duration)
    print('... [tr]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr, n_gpus))
    print('... [te]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te, n_gpus))

    # make model top ready
    TRAIN_STATE.model_top_ready = True
    sys.stdout.write('\n')
    for idx_epoch in range(start_epoch_num, n_epochs):

        epoch_num = idx_epoch + 1
        # wait until bottom parts start
        while TRAIN_STATE.model_bottom_1_epoch_start < epoch_num or TRAIN_STATE.model_bottom_2_epoch_start < epoch_num or TRAIN_STATE.model_bottom_3_epoch_start < epoch_num:
            threading._sleep(2.0)
            if verbose:
                print(
                    '... top part is waiting for bottom part to start extracting features for epoch %d'
                    % (epoch_num))

        # epoch started, update counter
        TRAIN_STATE.model_top_epoch_start = epoch_num

        # video names are obtained from the state at the beginning of each epoch
        video_names_tr = TRAIN_STATE.video_names_tr
        y_tr = TRAIN_STATE.class_nums_tr

        loss_tr = 0.0
        loss_tr_b = 0.0
        tt1 = time.time()
        waiting_duration_total = 0

        # loop and train
        for idx_batch_tr in range(n_batch_tr):

            batch_num_tr = idx_batch_tr + 1

            start_idx_batch = idx_batch_tr * batch_size_tr
            stop_idx_batch = (idx_batch_tr + 1) * batch_size_tr
            video_names_tr_batch = video_names_tr[
                start_idx_batch:stop_idx_batch]
            y_tr_b = y_tr[start_idx_batch:stop_idx_batch]
            is_missing_features = True

            # wait until the festures are loaded
            t1 = time.time()
            while is_missing_features:
                is_missing_features = False
                for _v_name in video_names_tr_batch:
                    if _v_name not in TRAIN_STATE.feats_dict_tr_1 and _v_name not in TRAIN_STATE.feats_dict_tr_2 and _v_name not in TRAIN_STATE.feats_dict_tr_3:
                        is_missing_features = True
                        break
                if is_missing_features:
                    threading._sleep(1.0)
                    if verbose:
                        print(
                            '... model top is waiting for missing videos: %s' %
                            _v_name)
            t2 = time.time()

            x_tr_b = __get_features_from_dictionaries(video_names_tr_batch)
            x_tr_b = np.array(x_tr_b)

            loss_batch_tr = model.train_on_batch(x_tr_b, y_tr_b)

            # after training, remove feats from dictionary (# delete feature and remove key)
            for _v_name in video_names_tr_batch:
                if _v_name in TRAIN_STATE.feats_dict_tr_1:
                    del TRAIN_STATE.feats_dict_tr_1[_v_name]
                    TRAIN_STATE.feats_dict_tr_1.pop(_v_name, None)
                elif _v_name in TRAIN_STATE.feats_dict_tr_2:
                    del TRAIN_STATE.feats_dict_tr_2[_v_name]
                    TRAIN_STATE.feats_dict_tr_2.pop(_v_name, None)
                elif _v_name in TRAIN_STATE.feats_dict_tr_3:
                    del TRAIN_STATE.feats_dict_tr_3[_v_name]
                    TRAIN_STATE.feats_dict_tr_3.pop(_v_name, None)

            loss_tr += loss_batch_tr
            loss_tr_b = loss_tr / float(batch_num_tr)
            tt2 = time.time()
            duration = tt2 - tt1
            waiting_duration = t2 - t1
            waiting_duration_total += waiting_duration
            msg = '%04ds - epoch: %02d/%02d, batch [tr]: %02d/%02d, loss: %0.2f, waited: %.01f  ' % (
                duration, epoch_num, n_epochs, batch_num_tr, n_batch_tr,
                loss_tr_b, waiting_duration)
            if verbose:
                print(msg)
            else:
                sys.stdout.write('\r%s' % (msg))

        # test
        y_pred_te = model.predict(x_te, batch_size_te, verbose=0)
        map_te_avg = 100 * metrics.mean_avg_precision_sklearn(y_te, y_pred_te)
        loss_tr /= float(n_batch_tr)

        tt2 = time.time()
        duration = tt2 - tt1
        timestamp_now = utils.timestamp()
        msg = '%04ds - epoch: %02d/%02d, loss [tr]: %0.2f, map [te]: %0.2f%%, waited: %d, finished: %s   \n' % (
            duration, epoch_num, n_epochs, loss_tr, map_te_avg,
            waiting_duration_total, timestamp_now)
        if verbose:
            print(msg)
        else:
            sys.stdout.write('\r%s' % (msg))

        # after we're done with training and testing, shuffle the list of training videos, and set in the TRAINING_STATE, also sample new frames
        video_names_tr, y_tr = __shuffle_training_data(
            TRAIN_STATE.video_names_tr, TRAIN_STATE.class_nums_tr)
        TRAIN_STATE.video_names_tr = video_names_tr
        TRAIN_STATE.class_nums_tr = y_tr
        del video_names_tr, y_tr

        # also, sample new frames
        sampled_video_frames_dict = ds_epic_kitchens.__random_sample_frames_per_video_for_i3d(
            TRAIN_STATE.video_names_tr, frames_root_path,
            frame_relative_pathes_dict, n_frames_per_segment,
            n_frames_per_video)
        TRAIN_STATE.video_frames_dict_tr = sampled_video_frames_dict

        # update counter so the bottom part starts extracting features for the next epoch
        TRAIN_STATE.model_top_epoch_end = epoch_num

        # save the model and nodes, if required
        if is_save_model:
            __save_model(model, model_name, epoch_num)

        if is_save_centroids:
            __save_centroids(model, model_name, epoch_num)

    print('--- finish time')
    print(datetime.datetime.now())
コード例 #9
0
def train_model_on_video_frames():
    """
    When training model on images, the model won't fit in gpu.
    If trained on several gpus, the batch size will get so small that BatchNorm is not applicable anymore.
    The solution is to use first 3 gpus to extract features from the backbone model (i.e. bottom part, for example: I3D or ResNet),
    and to use the 4th gpu to train our model (i.e. top part) on these features.
    """

    # this is to allow for small cpu utilization by numpy
    # has to be set before importing numpy
    # os.environ["MKL_NUM_THREADS"] = "1"
    # os.environ["NUMEXPR_NUM_THREADS"] = "1"
    # os.environ["OMP_NUM_THREADS"] = "1"

    # if training from scratch
    resume_epoch_num = 0
    is_resume_training = False
    resume_timestamp = ''

    # get the model part to run
    timestamp = utils.timestamp(
    ) if not is_resume_training else resume_timestamp
    starting_epoch_num = 0 if not is_resume_training else resume_epoch_num
    n_epochs = 500

    # for i3d-keras
    n_centroids = 128
    n_frames_bottom = 512
    n_frames_top = 64
    n_instances = 3
    model_bottom = __start_train_model_on_video_frames_backbone_i3d_keras
    model_top = __start_train_model_on_video_frames_videograph

    # also, create the files where the training state will be stored
    global TRAIN_STATE
    TRAIN_STATE = TrainingState()

    # bottom part, instance 1
    args_bottom_1 = (n_epochs, starting_epoch_num, n_frames_bottom,
                     n_instances, 1)
    thread_bottom_1 = threading.Thread(target=model_bottom, args=args_bottom_1)

    # bottom part, instance 2
    args_bottom_2 = (n_epochs, starting_epoch_num, n_frames_bottom,
                     n_instances, 2)
    thread_bottom_2 = threading.Thread(target=model_bottom, args=args_bottom_2)

    # bottom part, instance 3
    args_bottom_3 = (n_epochs, starting_epoch_num, n_frames_bottom,
                     n_instances, 3)
    thread_bottom_3 = threading.Thread(target=model_bottom, args=args_bottom_3)

    # top part
    args_top = (n_epochs, n_frames_top, n_centroids, timestamp,
                is_resume_training, starting_epoch_num)
    thread_top = threading.Thread(target=model_top, args=args_top)

    thread_top.start()
    thread_bottom_1.start()
    thread_bottom_2.start()
    thread_bottom_3.start()

    thread_top.join()
    thread_bottom_1.join()
    thread_bottom_2.join()
    thread_bottom_3.join()
コード例 #10
0
def __extract_features_rgb(begin_num=None, end_num=None):
    root_path = c.DATA_ROOT_PATH  # './data'

    # 这个文件是通过charades.py文件生成的
    annotation_path = '%s/Charades/annotation/frames_dict_trimmed_multi_label_i3d_160_frames.pkl' % (
        root_path)  # charades标注路径
    features_root_path = '%s/Charades/features_i3d_charades_rgb_mixed_5c_trimmed_20_frames' % (
        root_path)  # 特征保存路径
    video_frames_root_path = '%s/Charades/frames/Charades_v1_rgb' % (
        root_path)  # 视频帧的路径
    model_path = '%s/Charades/baseline_models/i3d/rgb_charades.pt' % (
        root_path)  # 预训练模型路径
    feature_name = 'Mixed_5c'  # 保存第几层的特征

    # 1.获取视频标注信息
    (video_frames_dict_tr,
     video_frames_dict_te) = utils.pkl_load(annotation_path)
    video_frames_dict = dict()
    video_frames_dict.update(video_frames_dict_tr)
    video_frames_dict.update(video_frames_dict_te)
    video_names = video_frames_dict.keys()

    n_videos = len(video_names)
    frame_count = 0

    if not os.path.exists(features_root_path):
        print('Sorry, path does not exist: %s' % (features_root_path))
        return

    t1 = time.time()
    print('extracting training features')
    print('start time: %s' % utils.timestamp())

    # aync reader, and get load images for the first video
    #========================================下面这个加载器没有写=================================#
    img_reader = image_utils.AsyncImageReaderCharadesForI3DTorchModel(
        n_threads=20)  # 加载图片
    img_reader.load_imgs_in_batch(
        __get_video_frame_pathes(video_names[0], video_frames_root_path,
                                 video_frames_dict))

    # load the model
    model = __load_i3d_model_rgb(model_path)
    torchsummary.summary(model, input_size=(3, 160, 224, 224))

    # loop on list of videos
    for idx_video in range(n_videos):
        video_num = idx_video + 1

        if begin_num is not None and end_num is not None:
            if video_num <= begin_num or video_num > end_num:
                continue

        video_name = video_names[idx_video]

        # wait untill the image_batch is loaded
        t1 = time.time()
        while img_reader.is_busy(
        ):  # 如果上面的img_reader.load_imgs_in_batch中的is_busy为True,则表明图片还没加载完
            threading._sleep(0.1)
        t2 = time.time()
        duration_waited = t2 - t1
        print('...... video %d/%d: %s, waited: %d' %
              (video_num, n_videos, video_name, duration_waited))

        # get the video frames
        video_frames = img_reader.get_images()

        # pre-load for the next video
        if video_num < n_videos:
            next_video_name = video_names[idx_video + 1]
            img_reader.load_imgs_in_batch(
                __get_video_frame_pathes(next_video_name,
                                         video_frames_root_path,
                                         video_frames_dict))

        video_features_path = '%s/%s.pkl' % (features_root_path, video_name)
        # if os.path.exists(video_features_path):
        #     print ('... features for video already exist: %s.pkl' % (video_name))
        #     continue

        # chrades的视频帧数是固定的160帧
        if len(video_frames) != 160:
            print('... wrong n frames: %d' % (video_num))
            continue

        # transpose to have the channel_first (160, 224, 224, 3) => (3, 160, 224, 224)
        video_frames = np.transpose(video_frames, (3, 0, 1, 2))

        # add one dimension to represent the batch size
        video_frames = np.expand_dims(video_frames, axis=0)  # (N,C,L,H,W)

        # prepare input variable
        with torch.no_grad():
            # extract features
            input_var = torch.from_numpy(video_frames).cuda()  # 将视频转为gpu
            output_var = model(input_var)
            output_var = output_var.cpu()
            features = output_var.data.numpy()  # (1, 1024, 20, 7, 7)

            # don't forget to clean up variables
            # 每一个视频的特征抽取完后就必须清空这两个变量,否则会报错
            del input_var
            del output_var

        # squeeze to remove the dimension of the batch_size
        features = features[0]  # (1024, 20, 7, 7)

        # transpose to have the channel_last
        features = np.transpose(
            features,
            (1, 2, 3,
             0))  # (20, 7, 7, 1024)=====(T,H,W,C),如果后面用的还是Pytorch,那么就不需要这一步

        # path to save the features
        utils.pkl_dump(features, video_features_path, is_highest=True)  # 保存特征

        # increment counts
        frame_count += len(video_frames)

    t2 = time.time()
    print('finish extracting %d features in %d seconds' %
          (frame_count, t2 - t1))
    print('end time: %s' % utils.timestamp())
コード例 #11
0
def train_model_videograph():
    """
    Train model.
    """

    annotation_type = 'noun'
    annot_path = Pth(
        'EPIC-Kitchens/annotations/annot_video_level_many_shots.pkl')
    (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type)

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'
    n_nodes = 128
    n_timesteps = 64
    n_frames_per_segment = 8
    n_frames_per_video = n_timesteps * n_frames_per_segment
    batch_size_tr = 20
    batch_size_te = 30
    n_epochs = 500
    epoch_offset = 0
    model_name = 'classifier_%s' % (utils.timestamp())
    model_root_path = Pth('EPIC-Kitchens/models')

    nodes_path = Pth('EPIC-Kitchens/features/nodes_random_%d.pkl', (n_nodes, ))
    features_path = Pth(
        'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5',
        (n_frames_per_video, ))
    n_channels, side_dim = utils.get_model_feat_maps_info(
        model_type, feature_type)
    input_shape = (None, n_timesteps, side_dim, side_dim, n_channels)

    # either load nodes, or generate them on the fly, but remeber to save them, as you need them in test time
    # nodes = utils.pkl_load(nodes_path)
    nodes = utils.generate_centroids(n_nodes, n_channels)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    model = __load_model_videograph(nodes, n_classes, input_shape)
    t2 = time.time()
    duration = t2 - t1
    print(model.summary(line_length=130, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data: %s' % (features_path))
    t1 = time.time()
    # features are extracting using datasets.epic_kitchens.i3d_keras_epic_kitchens()
    # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c'
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()

    duration = t2 - t1
    print('... data loaded: %d' % (duration))

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = utils.calc_num_batches(n_te, batch_size_te)
    print('... [tr]: n, n_batch, batch_size: %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr))
    print('... [te]: n, n_batch, batch_size: %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te))
    print(x_tr.shape)
    print(x_te.shape)
    print(y_tr.shape)
    print(y_te.shape)

    save_callback = keras_utils.ModelSaveCallback(model, model_name,
                                                  epoch_offset,
                                                  model_root_path)
    score_callback = keras_utils.MapScoreCallback(model, None, None, x_te,
                                                  y_te, batch_size_te,
                                                  n_classes)
    model_callbacks = [save_callback, score_callback]
    model.fit(x_tr,
              y_tr,
              epochs=n_epochs,
              batch_size=batch_size_tr,
              validation_split=0.0,
              validation_data=(x_te, y_te),
              shuffle=True,
              callbacks=model_callbacks,
              verbose=2)

    print('--- finish time')
    print(datetime.datetime.now())
コード例 #12
0
def train_model_on_pickled_features():
    """
    Train model.
    """

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'
    is_spatial_pooling = False
    is_resume_training = False

    n_timesteps = 64
    batch_size_tr = 16
    batch_size_te = 40
    n_centroids = 128
    n_epochs = 100
    n_classes = N_CLASSES
    n_gpus = 1

    model_name = 'classifier_%s' % (utils.timestamp())
    model_weight_path = ''
    model_root_path = Pth('Breakfast/models/')
    gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl')
    centroids_path = Pth(
        'Breakfast/features_centroids/features_random_%d_centroids.pkl',
        (n_centroids, ))
    features_path = Pth(
        'Breakfast/features/features_i3d_mixed_5c_%d_frames_max_pool.h5',
        (n_timesteps * 8, )) if is_spatial_pooling else Pth(
            'Breakfast/features/features_i3d_mixed_5c_%d_frames.h5',
            (n_timesteps * 8, ))

    centroids = utils.pkl_load(centroids_path)
    (video_ids_tr, y_tr), (video_ids_te,
                           y_te) = utils.pkl_load(gt_activities_path)

    n_feat_maps, feat_map_side_dim = __get_model_feat_maps_info(
        model_type, feature_type)
    feat_map_side_dim = 1 if is_spatial_pooling else feat_map_side_dim
    input_shape = (None, n_timesteps, feat_map_side_dim, feat_map_side_dim,
                   n_feat_maps)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()

    # root_model, model = __load_model_mlp_classifier_action_vlad(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path)
    # root_model, model = __load_model_mlp_classifier_timeception(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path)
    root_model, model = __load_model_mlp_classifier_video_graph(
        centroids,
        n_classes,
        input_shape,
        n_gpus=n_gpus,
        is_load_weights=is_resume_training,
        weight_path=model_weight_path)

    t2 = time.time()
    duration = t2 - t1
    print(root_model.summary(line_length=130, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data: %s' % (features_path))
    print('... centroids: %s' % (centroids_path))
    t1 = time.time()
    (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te'])
    t2 = time.time()
    duration = t2 - t1
    print('... data loaded: %d' % (duration))

    n_tr = len(x_tr)
    n_te = len(x_te)
    n_batch_tr = __calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = __calc_num_batches(n_te, batch_size_te)
    print('... [tr]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr, n_gpus))
    print('... [te]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te, n_gpus))

    save_callback = keras_utils.ModelSaveCallback(model, model_name,
                                                  model_root_path)
    model.fit(x_tr,
              y_tr,
              epochs=n_epochs,
              batch_size=batch_size_tr,
              validation_split=0.0,
              validation_data=(x_te, y_te),
              shuffle=True,
              callbacks=[save_callback],
              verbose=2)
    print('--- finish time')
    print(datetime.datetime.now())
コード例 #13
0
ファイル: charades.py プロジェクト: Prudhvinik1/timeception
def extract_features_i3d_charades():
    """
    Extract features from i3d-model
    """

    n_frames_in = 1024
    n_frames_out = 128
    n_splits_per_video = 2

    root_path = '/content/'
    frames_annot_path = '%s/charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % (root_path, n_frames_in)
    model_path = '%s/charades/baseline_models/i3d/rgb_charades.pt' % (root_path)
    frames_root_path = '%s/charades/frames/Charades_v1_rgb' % (root_path)
    features_root_path = '/local-ssd/nhussein/Charades/features_i3d_charades_rgb_mixed_5c_untrimmed_%d_frames' % (n_frames_out)

    (video_frames_dict_tr, video_frames_dict_te) = utils.pkl_load(frames_annot_path)
    video_frames_dict = dict()
    video_frames_dict.update(video_frames_dict_tr)
    video_frames_dict.update(video_frames_dict_te)
    video_names = video_frames_dict.keys()
    n_videos = len(video_names)
    del video_frames_dict_tr
    del video_frames_dict_te

    n_threads = 8
    n_frames_per_segment = 8
    assert n_frames_per_segment * n_frames_out == n_frames_in

    if not is_local_machine and not os.path.exists(features_root_path):
        print('Sorry, path does not exist: %s' % (features_root_path))
        return

    t1 = time.time()
    print('extracting training features')
    print('start time: %s' % utils.timestamp())

    # reader for getting video frames
    video_reader_tr = image_utils.AsyncVideoReaderCharadesForI3DTorchModel(n_threads=n_threads)

    # aync reader, and get load images for the first video, we will read the first group of videos
    video_group_frames = __get_video_frame_pathes(video_names[0], frames_root_path, video_frames_dict)
    video_reader_tr.load_video_frames_in_batch(video_group_frames)

    # load the model
    model = i3d_factory.load_model_i3d_charades_rgb_for_testing(model_path)
    print(torchsummary.summary(model, input_size=(3, 8, 224, 224)))

    # import torchsummary
    # print torchsummary.summary(model, (8, 3, 224, 224))
    return

    # loop on list of videos
    for idx_video in range(n_videos):

        video_num = idx_video + 1
        video_name = video_names[idx_video]

        if begin_num is not None and end_num is not None:
            if video_num <= begin_num or video_num > end_num:
                continue

        # wait until the image_batch is loaded
        t1 = time.time()
        while video_reader_tr.is_busy():
            threading._sleep(0.1)
        t2 = time.time()
        duration_waited = t2 - t1

        print('... video %04d, %04d, waited: %.02f' % (video_num, n_videos, duration_waited))

        # get the frames
        frames = video_reader_tr.get_images()  # (G*T*N, 224, 224, 3)

        # pre-load for the next video group, notice that we take into account the number of instances
        if video_num < n_videos:
            next_video_frames = __get_video_frame_pathes(video_names[idx_video + 1], frames_root_path, video_frames_dict)
            video_reader_tr.load_video_frames_in_batch(next_video_frames)

        if len(frames) != n_frames_in:
            raise ('... ... wrong n frames: %s' % (video_name))

        # reshape to make one dimension carries the frames / segment, while the other dimesion represents the batch size
        frames = np.reshape(frames, (n_frames_out, n_frames_per_segment, 224, 224, 3))  # (T, 8, 224, 224, 3)

        # transpose to have the channel_first (G*T, 8, 224, 224, 3) => (T, 3, 8, 224, 224)
        frames = np.transpose(frames, (0, 4, 1, 2, 3))

        # prepare input variable
        with torch.no_grad():
            # extract features
            input_var = torch.from_numpy(frames).cuda()
            output_var = model(input_var)
            output_var = output_var.cpu()
            features = output_var.data.numpy()  # (T, 1024, 1, 7, 7)
            # don't forget to clean up variables
            del input_var
            del output_var

        # transpose to have the channel_last
        features = np.transpose(features, (0, 2, 3, 4, 1))  # (T, 1, 7, 7, 1024)

        # reshape to have the features for each video in a separate dimension
        features = np.squeeze(features, axis=1)  # (T, 7, 7, 1024)

        # path to save the features
        video_features_path = '%s/%s.pkl' % (features_root_path, video_name)
        # if os.path.exists(video_features_path):
        #     print ('... features for video already exist: %s.pkl' % (video_name))
        #     continue

        # save features
        utils.pkl_dump(features, video_features_path, is_highest=True)

    t2 = time.time()
    print('... finish extracting features in %d seconds' % (t2 - t1))
コード例 #14
0
def train_human_object_multiple_context_gating(soft_flag=True,
                                               backbone='rcnn'):

    n_epochs = 100
    batch_size_tr = 32
    batch_size_te = 32
    n_classes = N_CLASSES

    if backbone == 'rcnn':
        print('Using backbone rcnn')
        feature_path_interaction = Pth(
            'Hico/features/h5/features_base_subject_object.h5')
        n_channels, n_regions, channel_side_dim = 4096, 12, 1
        (x_tr, x_te) = utils.h5_load_multi(feature_path_interaction,
                                           ['x_tr', 'x_te'])
        x_tr = np.swapaxes(x_tr, 1, 2)
        x_te = np.swapaxes(x_te, 1, 2)
    elif backbone == 'pairatt':
        print('Using backbone pairatt')
        feature_path_interaction = Pth('Hico/features/h5/features_pairattn.h5')
        n_channels, n_regions, channel_side_dim = 4096, 3, 1
        (x_tr, x_te) = utils.h5_load_multi(feature_path_interaction,
                                           ['x_tr', 'x_te'])

    # Features of the pose: f_context
    feature_path_c3 = Pth('Hico/features/h5/deformation.h5')
    x_cs_shape = [(512, 1, 1, 1)]

    # Features of the pose: f_context
    feature_path_c1 = Pth('Hico/features/h5/lvis.h5')
    x_cs_shape = [(1300, 1, 1, 1)]

    feature_path_c2 = Pth('Hico/features/h5/local_scene.h5')
    x_cs_shape = [(2048, 1, 1, 1)]

    feature_path_context = Pth('Hico/features/h5/stuff.h5')
    x_cs_shape = [(649, 1, 1, 1)]

    # Features of the pose: f_context
    feature_path_context = Pth('Hico/features/h5/part_states.h5')
    x_cs_shape = [(1032, 1, 1, 1)]

    feature_path_c4 = Pth('Hico/features/h5/local_pose.h5')
    x_cs_shape = [(4096, 1, 1, 1)]

    x_cs_shape = [(1300, 1, 1, 1), (2048, 1, 1, 1), (512, 1, 1, 1),
                  (4096, 1, 1, 1)]

    # Annotation of the image
    annot_path = Pth('Hico/features/h5/anno_hico.pkl')
    model_name = 'classifier_%s' % (utils.timestamp())
    input_shape = (n_channels, n_regions, channel_side_dim, channel_side_dim)

    print('--- start time')
    print(datetime.datetime.now())

    print('... loading data')
    t1 = time.time()

    (img_names_tr, y_tr, y_tr_mask, img_names_te, y_te,
     y_te_mask) = utils.pkl_load(annot_path)
    y_tr = y_tr.astype(np.float32)
    y_te = y_te.astype(np.float32)

    y_tr_mask = y_tr_mask.astype(np.float32)
    y_te_mask = y_te_mask.astype(np.float32)

    print('... context features')
    (x_tr_c1, x_te_c1) = utils.h5_load_multi(feature_path_c1, ['x_tr', 'x_te'])
    #x_tr_c1 = expand_feats(x_tr_c1)
    #x_te_c1 = expand_feats(x_te_c1)

    (x_tr_c2, x_te_c2) = utils.h5_load_multi(feature_path_c2, ['x_tr', 'x_te'])
    x_tr_c2 = expand_feats(x_tr_c2)
    x_te_c2 = expand_feats(x_te_c2)

    (x_tr_c3, x_te_c3) = utils.h5_load_multi(feature_path_c3, ['x_tr', 'x_te'])
    x_tr_c3 = expand_feats(x_tr_c3)
    x_te_c3 = expand_feats(x_te_c3)

    (x_tr_c4, x_te_c4) = utils.h5_load_multi(feature_path_c4, ['x_tr', 'x_te'])
    x_tr_c4 = expand_feats(x_tr_c4)
    x_te_c4 = expand_feats(x_te_c4)

    print('train_set_shape_interaction: ', x_tr.shape)
    print('test_set_shape_interaction: ', x_te.shape)

    print('train_set_shape_context-1: ', x_tr_c1.shape)
    print('test_set_shape_context-1: ', x_te_c1.shape)

    print('train_set_shape_context-2: ', x_tr_c2.shape)
    print('test_set_shape_context-2: ', x_te_c2.shape)

    print('train_set_shape_context-3: ', x_tr_c3.shape)
    print('test_set_shape_context-3: ', x_te_c3.shape)

    print('train_set_shape_context-4: ', x_tr_c4.shape)
    print('test_set_shape_context-4: ', x_te_c4.shape)

    t2 = time.time()
    duration = t2 - t1
    print('... loading data, duration (sec): %d' % (duration))

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()
    if soft_flag == True:
        print('Training soft fusion model')
        model = ClassifierContextLateFusionMultiSoftGate(
            n_classes, input_shape, x_cs_shape)

    t2 = time.time()
    duration = t2 - t1
    model = model.cuda()
    input_sizes = [input_shape] + list(x_cs_shape)
    #pytorch_utils.model_summary_multi_input(model, input_sizes=input_sizes, batch_size=-1, device='cuda')
    print('... model built, duration (sec): %d' % (duration))

    # callbacks
    callbacks = []

    print(
        'Interaction_feat: %s, Context_feat-1: %s, Context_feat-2: %s, Context_feat-3: %s\n'
        % (feature_path_interaction, feature_path_c1, feature_path_c2,
           feature_path_c3))

    # start training
    pytorch_utils.train_model_custom_metric_mask(
        model,
        model._optimizer,
        model._loss_fn,
        model._metric_fn, [x_tr, x_tr_c1, x_tr_c2, x_tr_c3, x_tr_c4],
        y_tr,
        y_tr_mask, [x_te, x_te_c1, x_te_c2, x_te_c3, x_te_c4],
        y_te,
        y_te_mask,
        n_epochs,
        batch_size_tr,
        batch_size_te,
        callbacks=callbacks)

    print('--- finish time')
    print(datetime.datetime.now())
コード例 #15
0
def train_tco():
    """
    Train Timeception layers based on the given configurations.
    This train scheme is Timeception-only (TCO).
    """

    # get some configs for the training
    n_epochs = config.cfg.TRAIN.N_EPOCHS
    dataset_name = config.cfg.DATASET_NAME
    model_name = '%s_%s' % (config.cfg.MODEL.NAME, utils.timestamp())
    device = 'cuda'

    # data generators
    loader_tr, n_samples_tr, n_batches_tr = __define_loader(is_training=True)
    loader_te, n_samples_te, n_batches_te = __define_loader(is_training=False)

    logger.info('--- start time')
    logger.info(datetime.datetime.now())
    logger.info('... [tr]: n_samples, n_batch, batch_size: %d, %d, %d' %
                (n_samples_tr, n_batches_tr, config.cfg.TRAIN.BATCH_SIZE))
    logger.info('... [te]: n_samples, n_batch, batch_size: %d, %d, %d' %
                (n_samples_te, n_batches_te, config.cfg.TEST.BATCH_SIZE))

    # load model
    model, optimizer, loss_fn, metric_fn, metric_fn_name = __define_timeception_model(
        device)
    logger.info(
        pytorch_utils.summary(model,
                              model._input_shape[1:],
                              batch_size=2,
                              device='cuda'))

    # save the model
    model_saver = pytorch_utils.ModelSaver(model, dataset_name, model_name)

    # loop on the epochs
    sys.stdout.write('\n')
    for idx_epoch in range(n_epochs):

        epoch_num = idx_epoch + 1

        loss_tr = 0.0
        acc_tr = 0.0
        loss_te = 0.0
        acc_te = 0.0

        tt1 = time.time()

        # flag model as training
        model.train()

        # training
        for idx_batch, (x, y_true) in enumerate(loader_tr):
            batch_num = idx_batch + 1

            x, y_true = x.to(device), y_true.to(device)
            optimizer.zero_grad()
            y_pred = model(x)
            loss = loss_fn(y_pred, y_true)
            loss.backward()
            optimizer.step()

            # calculate accuracy
            y_true = y_true.cpu().numpy().astype(np.int32)
            y_pred = y_pred.cpu().detach().numpy()
            loss_b_tr = loss.cpu().detach().numpy()
            acc_b_tr = metric_fn(y_true, y_pred)

            loss_tr += loss_b_tr
            acc_tr += acc_b_tr
            loss_b_tr = loss_tr / float(batch_num)
            acc_b_tr = acc_tr / float(batch_num)
            tt2 = time.time()
            duration = tt2 - tt1
            sys.stdout.write(
                '\r%04ds - epoch: %02d/%02d, batch [tr]: %02d/%02d, loss, %s: %0.2f, %0.2f '
                % (duration, epoch_num, n_epochs, batch_num, n_batches_tr,
                   metric_fn_name, loss_b_tr, acc_b_tr))

        # flag model as testing
        model.eval()

        # testing
        for idx_batch, (x, y_true) in enumerate(loader_te):
            batch_num = idx_batch + 1

            x, y_true = x.to(device), y_true.to(device)
            y_pred = model(x)
            loss_b_te = loss_fn(y_pred, y_true).cpu().detach().numpy()
            y_true = y_true.cpu().numpy().astype(np.int32)
            y_pred = y_pred.cpu().detach().numpy()
            acc_b_te = metric_fn(y_true, y_pred)

            loss_te += loss_b_te
            acc_te += acc_b_te
            loss_b_te = loss_te / float(batch_num)
            acc_b_te = acc_te / float(batch_num)
            tt2 = time.time()
            duration = tt2 - tt1
            sys.stdout.write(
                '\r%04ds - epoch: %02d/%02d, batch [te]: %02d/%02d, loss, %s: %0.2f, %0.2f '
                % (duration, epoch_num, n_epochs, batch_num, n_batches_te,
                   metric_fn_name, loss_b_te, acc_b_te))

        loss_tr /= float(n_batches_tr)
        loss_te /= float(n_batches_te)
        acc_tr /= float(n_batches_tr)
        acc_te /= float(n_batches_te)

        tt2 = time.time()
        duration = tt2 - tt1
        sys.stdout.write(
            '\r%04ds - epoch: %02d/%02d, [tr]: %0.2f, %0.2f, [te]: %0.2f, %0.2f           \n'
            %
            (duration, epoch_num, n_epochs, loss_tr, acc_te, loss_te, acc_te))

        # after each epoch, save data
        model_saver.save(idx_epoch)

    logger.info('--- finish time')
    logger.info(datetime.datetime.now())
コード例 #16
0
def __train_model_on_video_frames_backbone_i3d_keras(n_epochs,
                                                     starting_epoch_num,
                                                     n_frames_per_video,
                                                     n_instances,
                                                     instance_num):
    """
    Extract features from i3d-model to be used by our model.
    """

    verbose = False
    global TRAIN_STATE  # type: TrainingState
    assert instance_num in [
        1, 2, 3
    ], 'Sorry, wrong instance number: %d' % (instance_num)
    assert n_instances == 3, 'Sorry, wrong number of instances %d' % (
        n_instances)

    n_threads = 16
    n_frames_per_segment = 8
    max_preloaded_feats = 40
    n_frames_in = n_frames_per_video
    n_frames_out = int(n_frames_in / float(n_frames_per_segment))
    assert n_frames_per_segment * n_frames_out == n_frames_in

    # load the model
    model = Inception_Inflated3d_Backbone()

    # reader for getting video frames
    video_reader = image_utils.AsyncImageReaderEpicKitchensForI3dKerasModel(
        n_threads=n_threads)

    # wait until model top is ready
    while not TRAIN_STATE.model_top_ready:
        threading._sleep(5.0)
        if verbose:
            print('... bottom part (%d) is waiting for top part to get ready' %
                  (instance_num))

    # extract features for n epoch
    for idx_epoch in range(starting_epoch_num, n_epochs):

        epoch_num = idx_epoch + 1

        video_frames_dict = TRAIN_STATE.video_frames_dict_tr
        video_names = TRAIN_STATE.video_names_tr
        n_videos = len(video_names)

        # only first instance can modify train_state and get videos from pickle
        if instance_num == 1:
            # model started, update count
            TRAIN_STATE.model_bottom_1_epoch_start = epoch_num
        elif instance_num == 2:
            # model started, update count
            TRAIN_STATE.model_bottom_2_epoch_start = epoch_num
        elif instance_num == 3:
            # model started, update count
            TRAIN_STATE.model_bottom_3_epoch_start = epoch_num
        else:
            raise Exception('Sorry, unknown instance number: %d' %
                            (instance_num))

        if verbose:
            print('epoch %d by instance %s' % (epoch_num, instance_num))

        # aync reader, and get load images for the first video, we will read the first group of videos
        current_video_name = video_names[instance_num - 1]
        current_video_frames = video_frames_dict[current_video_name]

        # just for clarification, can be reshaped from (256,) into (T, N) = (32, 8)
        # where T is the number of segments in one video, and N is the number of frames in one segment
        # video_group_frames = np.reshape(video_group_frames, tuple([n_frames_out, n_segment_length] + list(video_group_frames.shape[1:])))
        video_reader.load_imgs_in_batch(current_video_frames)

        # extract features only for training videos
        t1 = time.time()

        if verbose:
            print('... extracting features tr')
            print('... start time: %s' % utils.timestamp())

        # loop on list of videos
        for idx_video in range(n_videos):

            if instance_num == 1:
                # wait looping if there are so many features in the dictionary
                while len(TRAIN_STATE.feats_dict_tr_1) > max_preloaded_feats:
                    threading._sleep(1.0)
                    if verbose:
                        print(
                            '... bottom part (%d) is waiting for features in the dictionary to get consumed by top part'
                            % (instance_num))

            elif instance_num == 2:
                # wait looping if there are so many features in the dictionary
                while len(TRAIN_STATE.feats_dict_tr_2) > max_preloaded_feats:
                    threading._sleep(1.0)
                    if verbose:
                        print(
                            '... bottom part (%d) is waiting for features in the dictionary to get consumed by top part'
                            % (instance_num))

            elif instance_num == 3:
                # wait looping if there are so many features in the dictionary
                while len(TRAIN_STATE.feats_dict_tr_3) > max_preloaded_feats:
                    threading._sleep(1.0)
                    if verbose:
                        print(
                            '... bottom part (%d) is waiting for features in the dictionary to get consumed by top part'
                            % (instance_num))

            # loop on groups according to instances
            if instance_num == 1 and idx_video % n_instances != 0:
                continue

            if instance_num == 2 and idx_video % n_instances != 1:
                continue

            if instance_num == 3 and idx_video % n_instances != 2:
                continue

            tg_1 = time.time()
            video_name = video_names[idx_video]
            video_num = idx_video + 1

            # wait until the image_batch is loaded
            t1 = time.time()
            while video_reader.is_busy():
                threading._sleep(0.1)
            t2 = time.time()
            duration_waited = t2 - t1
            if verbose:
                print(
                    '\n... ... model bottom (%d), video %d/%d, waited: %d, name: %s'
                    % (instance_num, video_num, n_videos, duration_waited,
                       video_name))

            # get the frames
            frames = video_reader.get_images()  # (G*T*N, 224, 224, 3)

            # pre-load for the next video group, notice that we take into account the number of instances
            if idx_video + n_instances < n_videos:
                next_video_num = video_num + n_instances
                next_video_name = video_names[idx_video + n_instances]
                next_video_frames = video_frames_dict[next_video_name]
                video_reader.load_imgs_in_batch(next_video_frames)
                if verbose:
                    print(
                        '\n... ... model bottom (%d), next video %d/%d, name: %s'
                        % (instance_num, next_video_num, n_videos,
                           next_video_name))

            if video_name in TRAIN_STATE.feats_dict_tr_1 or video_name in TRAIN_STATE.feats_dict_tr_2 or video_name in TRAIN_STATE.feats_dict_tr_3:
                raise (
                    '... ... this should not be happening, but features for video %s already exist in the dictionary'
                    % (video_name))

            if len(frames) != n_frames_per_video:
                raise ('... ... wrong n frames for video: %s' % (video_name))

            # reshape to make one dimension carries the frames / segment, while the other dimesion represents the batch size
            frames = np.reshape(
                frames, [n_frames_out, n_frames_per_segment, 224, 224, 3
                         ])  # (T, 8, 224, 224, 3)

            # get features
            features = model.predict(frames)  # (T, 1, 7, 7, 1024)

            # remove temporal axis, as it is one
            features = np.squeeze(features, axis=1)  # (T, 7, 7, 1024)

            # add feature to the dictionary
            if instance_num == 1:
                TRAIN_STATE.feats_dict_tr_1[video_name] = features
            elif instance_num == 2:
                TRAIN_STATE.feats_dict_tr_2[video_name] = features
            elif instance_num == 3:
                TRAIN_STATE.feats_dict_tr_3[video_name] = features

            tg_2 = time.time()
            if verbose:
                print('took', tg_2 - tg_1)

        t2 = time.time()
        if verbose:
            print('... finish extracting features in %d seconds' % (t2 - t1))

        # after finishing epoch, update counters
        if instance_num == 1:
            TRAIN_STATE.model_bottom_1_epoch_end = epoch_num
        if instance_num == 2:
            TRAIN_STATE.model_bottom_2_epoch_end = epoch_num
        if instance_num == 3:
            TRAIN_STATE.model_bottom_3_epoch_end = epoch_num

        # wait untill the other part finishes
        if instance_num == 1:
            while TRAIN_STATE.model_bottom_1_epoch_end > TRAIN_STATE.model_bottom_2_epoch_end or TRAIN_STATE.model_bottom_1_epoch_end > TRAIN_STATE.model_bottom_3_epoch_end:
                threading._sleep(1.0)
                if verbose:
                    print(
                        '... bottom part (1) is waiting for bottom part (2,3) to finish extracting features on epoch %d'
                        % (epoch_num))
        if instance_num == 2:
            while TRAIN_STATE.model_bottom_2_epoch_end > TRAIN_STATE.model_bottom_1_epoch_end or TRAIN_STATE.model_bottom_2_epoch_end > TRAIN_STATE.model_bottom_3_epoch_end:
                threading._sleep(1.0)
                if verbose:
                    print(
                        '... bottom part (2) is waiting for bottom part (1,3) to finish extracting features on epoch %d'
                        % (epoch_num))
        if instance_num == 3:
            while TRAIN_STATE.model_bottom_3_epoch_end > TRAIN_STATE.model_bottom_1_epoch_end or TRAIN_STATE.model_bottom_3_epoch_end > TRAIN_STATE.model_bottom_2_epoch_end:
                threading._sleep(1.0)
                if verbose:
                    print(
                        '... bottom part (3) is waiting for bottom part (1,2) to finish extracting features on epoch %d'
                        % (epoch_num))

        # if top part is not finished yet, then wait
        while TRAIN_STATE.model_top_epoch_end < TRAIN_STATE.model_bottom_1_epoch_end or TRAIN_STATE.model_top_epoch_end < TRAIN_STATE.model_bottom_2_epoch_end or TRAIN_STATE.model_top_epoch_end < TRAIN_STATE.model_bottom_3_epoch_end:
            threading._sleep(2.0)
            if verbose:
                print(
                    '... bottom part (%d) is waiting for top part to finish training on epoch: %d'
                    % (instance_num, TRAIN_STATE.model_top_epoch_end + 1))

    print('... finish extracting features for all epochs, goodbye!')
    print('... end time: %s' % utils.timestamp())
コード例 #17
0
def train_model_on_video_features_i3d():
    """
    Train model of features stored on local disc.
    """

    model_type = 'i3d_rgb'
    feature_type = 'mixed_5c'
    is_spatial_pooling = False
    is_spatial_max = False
    is_save = True
    n_gpus = 1

    batch_size_tr = 20
    batch_size_te = 30
    n_threads = 20
    n_epochs = 500
    n_classes = N_CLASSES
    n_centroids = 128
    n_timesteps = 64
    n_frames = n_timesteps * 8

    model_name = 'classifier_%s' % (utils.timestamp())
    model_weight_path = ''

    # resnet-152
    features_root_path = Pth('Breakfast/features_i3d_mixed_5c_%s_frames',
                             (n_frames))
    centroids_path = Pth(
        'Breakfast/features_centroids/features_random_%d_centroids.pkl',
        (n_centroids, ))
    video_annot_path = Pth('Breakfast/annotation/gt_activities.pkl')
    centroids = utils.pkl_load(centroids_path)

    n_feat_maps, feat_map_side_dim = __get_model_feat_maps_info(
        model_type, feature_type)
    feat_map_side_dim = 1 if is_spatial_pooling else feat_map_side_dim
    input_shape = (None, n_timesteps, feat_map_side_dim, feat_map_side_dim,
                   n_feat_maps)

    print('--- start time')
    print(datetime.datetime.now())

    # building the model
    print('... building model %s' % (model_name))
    t1 = time.time()

    # root_model, model = __load_model_mlp_classifier_conv_pool(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path=model_weight_path)
    # root_model, model = __load_model_mlp_classifier_action_vlad(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path=model_weight_path)
    # root_model, model = __load_model_mlp_classifier_timeception(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path=model_weight_path)
    root_model, model = __load_model_mlp_classifier_video_graph(
        centroids,
        n_classes,
        input_shape,
        n_gpus=n_gpus,
        is_load_weights=False,
        weight_path=model_weight_path)

    t2 = time.time()
    duration = t2 - t1
    print(root_model.summary(line_length=130, positions=None, print_fn=None))
    print('... model built, duration (sec): %d' % (duration))

    # load data
    print('... loading data: %s' % (features_root_path))
    t1 = time.time()
    (v_names_tr, y_tr), (v_names_vl,
                         y_vl), (v_names_te,
                                 y_te) = utils.pkl_load(video_annot_path)
    v_names_tr = np.hstack((v_names_tr, v_names_vl))
    y_tr = np.hstack((y_tr, y_vl))
    del v_names_vl
    del y_vl
    action_ids = np.arange(1, N_CLASSES + 1)
    y_tr = utils.label_binarize(y_tr, action_ids)
    y_te = utils.label_binarize(y_te, action_ids)
    n_tr = len(v_names_tr)
    n_te = len(v_names_te)
    n_batch_tr = keras_utils.calc_num_batches(n_tr, batch_size_tr)
    n_batch_te = keras_utils.calc_num_batches(n_te, batch_size_te)
    t2 = time.time()
    print('... centroids: %s' % (centroids_path))
    print('... data loaded: %d' % (t2 - t1))

    print('... [tr]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' %
          (n_tr, n_batch_tr, batch_size_tr, n_gpus))
    print('... [te]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' %
          (n_te, n_batch_te, batch_size_te, n_gpus))

    # load features async
    async_loader_tr = data_utils.AsyncVideoFeaturesLoaderBreakfast(
        features_root_path, y_tr, n_timesteps, batch_size_tr, n_feat_maps,
        feat_map_side_dim, n_threads)
    async_loader_te = data_utils.AsyncVideoFeaturesLoaderBreakfast(
        features_root_path, y_te, n_timesteps, batch_size_te, n_feat_maps,
        feat_map_side_dim, n_threads)

    # shuffle the data for the first time
    async_loader_tr.shuffle_data()

    # start getting images ready for the first barch
    async_loader_tr.load_feats_in_batch(1)
    async_loader_te.load_feats_in_batch(1)

    sys.stdout.write('\n')
    for idx_epoch in range(n_epochs):

        epoch_num = idx_epoch + 1

        loss_tr = 0.0
        loss_te = 0.0
        acc_tr = 0.0
        acc_te = 0.0
        tt1 = time.time()
        waiting_duration_total = 0

        # loop and train
        for idx_batch in range(n_batch_tr):

            batch_num = idx_batch + 1

            # wait untill the image_batch is loaded
            t1 = time.time()
            while async_loader_tr.is_busy():
                threading._sleep(0.1)
            t2 = time.time()

            # get batch of training samples
            x_tr_b, y_tr_b = async_loader_tr.get_batch_data()

            # start getting the next image_batch ready
            if batch_num < n_batch_tr:
                next_batch_num = batch_num + 1
                async_loader_tr.load_feats_in_batch(next_batch_num)

            # train and get predictions
            loss_batch_tr, acc_batch_tr = model.train_on_batch(x_tr_b, y_tr_b)

            loss_tr += loss_batch_tr
            acc_tr += acc_batch_tr
            loss_tr_b = loss_batch_tr / float(batch_num)
            acc_tr_b = 100 * acc_batch_tr / float(batch_num)

            tt2 = time.time()
            duration = tt2 - tt1
            waiting_duration = t2 - t1
            waiting_duration_total += waiting_duration
            sys.stdout.write(
                '\r%04ds - epoch: %02d/%02d, batch [tr]: %02d/%02d, loss_tr: %.02f, acc_tr: %.02f, waited: %.01f       '
                % (duration, epoch_num, n_epochs, batch_num, n_batch_tr,
                   loss_tr_b, acc_tr_b, waiting_duration))

        # loop and test
        for idx_batch in range(n_batch_te):

            batch_num = idx_batch + 1

            # wait untill the image_batch is loaded
            t1 = time.time()
            while async_loader_te.is_busy():
                threading._sleep(0.1)
            t2 = time.time()

            # get batch of testing samples
            x_te_b, y_te_b = async_loader_te.get_batch_data()

            # start getting the next image_batch ready
            if batch_num < n_batch_te:
                next_batch_num = batch_num + 1
                async_loader_te.load_feats_in_batch(next_batch_num)

            # test and get predictions
            loss_batch_te, acc_batch_te = model.test_on_batch(x_te_b, y_te_b)

            loss_te += loss_batch_te
            acc_te += acc_batch_te
            loss_te_b = loss_batch_te / float(batch_num)
            acc_te_b = 100 * acc_batch_te / float(batch_num)

            tt2 = time.time()
            duration = tt2 - tt1
            waiting_duration = t2 - t1
            waiting_duration_total += waiting_duration
            sys.stdout.write(
                '\r%04ds - epoch: %02d/%02d, batch [te]: %02d/%02d, loss_te: %.02f, acc_te: %.02f, waited: %.01f  '
                % (duration, epoch_num, n_epochs, batch_num, n_batch_te,
                   loss_te_b, acc_te_b, waiting_duration))

        loss_tr /= float(n_batch_tr)
        loss_te /= float(n_batch_te)

        acc_tr /= float(n_batch_tr)
        acc_te /= float(n_batch_te)

        acc_tr *= 100.0
        acc_te *= 100.0

        tt2 = time.time()
        duration = tt2 - tt1
        sys.stdout.write(
            '\r%04ds - epoch: %02d/%02d, loss_tr %.02f, acc_tr %.02f, loss_te %.02f, acc_te: %.02f, waited: %d   \n'
            % (duration, epoch_num, n_epochs, loss_tr, acc_tr, loss_te, acc_te,
               waiting_duration_total))

        # shuffle the data
        async_loader_tr.shuffle_data()

        # because we setted a new data list, start getting the first batch
        async_loader_tr.load_feats_in_batch(1)
        async_loader_te.load_feats_in_batch(1)

        # save the model, if required
        if is_save:
            __save_model(root_model, model_name, epoch_num)

    print('--- finish time')
    print(datetime.datetime.now())
コード例 #18
0
ファイル: train_pytorch.py プロジェクト: csu-xiao-an/Papers
def train_tco():
    """
    Train Timeception layers based on the given configurations.
    This train scheme is Timeception-only (TCO).
    """

    # get some configs for the training
    n_epochs = config.cfg.TRAIN.N_EPOCHS  #500
    dataset_name = config.cfg.DATASET_NAME  #Charades
    model_name = '%s_%s' % (config.cfg.MODEL.NAME, utils.timestamp()
                            )  #'charades_timeception_19.08.05-10:59:25'
    device = 'cuda'

    # data generators 生成数据集
    loader_tr, n_samples_tr, n_batches_tr = __define_loader(
        is_training=True
    )  #<torch.utils.data.dataloader.DataLoader object at 0x7f70a6145f98>,n_samples_tr = 7811,n_batches_tr=245
    loader_te, n_samples_te, n_batches_te = __define_loader(is_training=False)
    #n_samples_te=1814,n_batches_te=37

    logger.info('--- start time')
    logger.info(datetime.datetime.now())
    logger.info('... [tr]: n_samples, n_batch, batch_size: %d, %d, %d' %
                (n_samples_tr, n_batches_tr, config.cfg.TRAIN.BATCH_SIZE))
    logger.info('... [te]: n_samples, n_batch, batch_size: %d, %d, %d' %
                (n_samples_te, n_batches_te, config.cfg.TEST.BATCH_SIZE))

    # load model,这里进行加载已经构建好的模型框架
    model, optimizer, loss_fn, metric_fn, metric_fn_name = __define_timeception_model(
        device)

    print('batch_size=2, input_shape[1:]=', model._input_shape[1:])
    logger.info(
        pytorch_utils.summary(model,
                              model._input_shape[1:],
                              batch_size=2,
                              device='cuda'))  #打印模型摘要

    # save the model,保存模型状态
    model_saver = pytorch_utils.ModelSaver(model, dataset_name, model_name)

    # loop on the epochs
    sys.stdout.write('\n')
    for idx_epoch in range(n_epochs):

        epoch_num = idx_epoch + 1
        # print(epoch_num)
        loss_tr = 0.0
        acc_tr = 0.0
        loss_te = 0.0
        acc_te = 0.0
        tt1 = time.time()

        # flag model as training
        model.train()  #将模型设置为训练阶段

        # training
        for idx_batch, (x, y_true) in enumerate(loader_tr):
            batch_num = idx_batch + 1

            x, y_true = x.to(device), y_true.to(
                device)  #x.shape=(32*1024*32*7*7),即(batch*channels*T*h*w)
            optimizer.zero_grad()
            y_pred = model(x)

            loss = loss_fn(y_pred, y_true)
            loss.backward()
            optimizer.step()

            # calculate accuracy
            y_true = y_true.cpu().numpy().astype(np.int32)  #真实标签
            y_pred = y_pred.cpu().detach().numpy()  #预测标签
            loss_b_tr = loss.cpu().detach().numpy()
            acc_b_tr = metric_fn(y_true, y_pred)

            loss_tr += loss_b_tr
            acc_tr += acc_b_tr
            loss_b_tr = loss_tr / float(batch_num)  #平均损失
            acc_b_tr = acc_tr / float(batch_num)
            tt2 = time.time()
            duration = tt2 - tt1
            sys.stdout.write(
                '\r%04ds - epoch: %02d/%02d, batch [tr]: %02d/%02d, loss, %s: %0.2f, %0.2f '
                % (duration, epoch_num, n_epochs, batch_num, n_batches_tr,
                   metric_fn_name, loss_b_tr, acc_b_tr))

        # flag model as testing
        model.eval()

        # testing
        for idx_batch, (x, y_true) in enumerate(loader_te):
            batch_num = idx_batch + 1

            x, y_true = x.to(device), y_true.to(device)
            y_pred = model(x)
            loss_b_te = loss_fn(y_pred, y_true).cpu().detach().numpy()
            y_true = y_true.cpu().numpy().astype(np.int32)
            y_pred = y_pred.cpu().detach().numpy()
            acc_b_te = metric_fn(y_true, y_pred)

            loss_te += loss_b_te
            acc_te += acc_b_te
            loss_b_te = loss_te / float(batch_num)
            acc_b_te = acc_te / float(batch_num)
            tt2 = time.time()
            duration = tt2 - tt1
            sys.stdout.write(
                '\r%04ds - epoch: %02d/%02d, batch [te]: %02d/%02d, loss, %s: %0.2f, %0.2f '
                % (duration, epoch_num, n_epochs, batch_num, n_batches_te,
                   metric_fn_name, loss_b_te, acc_b_te))

        loss_tr /= float(n_batches_tr)
        loss_te /= float(n_batches_te)
        acc_tr /= float(n_batches_tr)
        acc_te /= float(n_batches_te)

        tt2 = time.time()
        duration = tt2 - tt1
        sys.stdout.write(
            '\r%04ds - epoch: %02d/%02d, [tr]: %0.2f, %0.2f, [te]: %0.2f, %0.2f           \n'
            %
            (duration, epoch_num, n_epochs, loss_tr, acc_te, loss_te, acc_te))

        # after each epoch, save data
        model_saver.save(idx_epoch)

    logger.info('--- finish time')
    logger.info(datetime.datetime.now())
コード例 #19
0
ファイル: charades.py プロジェクト: csu-xiao-an/Papers
def extract_features_i3d_charades(n_frames_in, n_frames_out):
    """
    Extract features from i3d-model
    n_frames_in = 8 * n_frames_out
    n_frames_in = 1024,512,256
    n_frames_out = 128,64,32
    """

    # n_frames_in = 1024
    # n_frames_out = 128
    n_splits_per_video = 2

    root_path = '../data'
    root_Charades_path = '/home/r/renpengzhen/Datasets/Charades'
    frames_annot_path = '%s/Charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % (
        root_path, n_frames_in)  #采样过之后的帧路径
    # model_path = '/home/r/renpengzhen/PyTorch/timeception-master/model/i3d_kinetics_model_rgb.pth' #模型存放的位置
    model_path = '%s/Charades/baseline_models/i3d/rgb_charades.pt' % (
        root_path)  # 模型存放的位置
    frames_root_path = '%s/Charades_v1_rgb' % (root_Charades_path)  #所有视频帧存放的位置
    # features_root_path = '%s/Charades/features_i3d_charades_rgb_mixed_5c_untrimmed_%d_frames' % (root_path,n_frames_out) #用来存放使用i3d进行特征提取的路径
    features_root_path = '%s/Charades/features_i3d_pytorch_charades_rgb_mixed_5c_%df' % (
        root_path, n_frames_out)  #用来存放使用i3d进行特征提取的路径

    (video_frames_dict_tr, video_frames_dict_te) = utils.pkl_load(
        frames_annot_path
    )  #导入采样帧词典:包含了训练集和测试集的视频名:帧名列表,('AXIW1', array(['AXIW1-000001.jpg', 'AXIW1-000002.jpg', 'AXIW1-000003.jpg', ..., 'AXIW1-000768.jpg', 'AXIW1-000769.jpg', 'AXIW1-000770.jpg'], dtype='<U16'))
    video_frames_dict = dict()  #构建视频帧空词典
    video_frames_dict.update(video_frames_dict_tr)
    video_frames_dict.update(video_frames_dict_te)
    video_names = list(video_frames_dict.keys())  #视频的名字
    n_videos = len(video_names)  #总视频的个数
    del video_frames_dict_tr
    del video_frames_dict_te

    n_threads = 8  #线程数
    n_frames_per_segment = 8  #每个视频段的帧数,这8帧是连续的,在采样的时候就是连续的
    assert n_frames_per_segment * n_frames_out == n_frames_in

    if not os.path.exists(features_root_path):
        os.makedirs(features_root_path)

    t1 = time.time()
    print('extracting training features')
    print('start time: %s' % utils.timestamp())

    # reader for getting video frames 用于获取视频帧的阅读器
    video_reader_tr = image_utils.AsyncVideoReaderCharadesForI3DTorchModel(
        n_threads=n_threads)

    # aync reader, and get load images for the first video, we will read the first group of videos
    video_group_frames = __get_video_frame_pathes(
        video_names[0], frames_root_path,
        video_frames_dict)  #存储第一个视频帧的所有地址,是一个np数组类型
    video_reader_tr.load_video_frames_in_batch(video_group_frames)

    # load the model
    model = i3d_torch_charades_utils.load_model_i3d_charades_rgb_for_testing(
        model_path)

    #进行一次forward,打印模型的具体输入输出细节
    print('input_size=(3, 8, 224, 224)')
    print(torchsummary.summary(model, input_size=(3, 8, 224, 224)))

    # loop on list of videos,对整个视频数据集进行操作
    for idx_video in range(n_videos):
        video_num = idx_video + 1
        video_name = video_names[idx_video]
        begin_num = 0
        end_num = n_videos

        if begin_num is not None and end_num is not None:
            if video_num <= begin_num or video_num > end_num:
                continue

        # wait until the image_batch is loaded
        t1 = time.time()
        while video_reader_tr.is_busy():
            time.sleep(0.1)
        t2 = time.time()
        duration_waited = t2 - t1

        print('... video %04d, %04d, waited: %.02f' %
              (video_num, n_videos, duration_waited))

        # get the frames
        frames = video_reader_tr.get_images(
        )  # (G*T*N, 224, 224, 3),这个我觉得是第一个视频里面裁剪过之后的帧图片

        # pre-load for the next video group, notice that we take into account the number of instances
        if video_num < n_videos:
            next_video_frames = __get_video_frame_pathes(
                video_names[idx_video + 1], frames_root_path,
                video_frames_dict)
            video_reader_tr.load_video_frames_in_batch(next_video_frames)

        if len(frames) != n_frames_in:
            raise ('... ... wrong n frames: %s' % (video_name))

        # reshape to make one dimension carries the frames / segment, while the other dimesion represents the batch size
        frames = np.reshape(frames,
                            (n_frames_out, n_frames_per_segment, 224, 224,
                             3))  # (T, 8, 224, 224, 3),T实际上就是视频段,即超级帧的个数

        # transpose to have the channel_first (G*T, 8, 224, 224, 3) => (T, 3, 8, 224, 224)
        frames = np.transpose(frames, (0, 4, 1, 2, 3))

        # prepare input variable
        with torch.no_grad():
            # extract features
            input_var = torch.from_numpy(
                frames).cuda()  #(T, 3, 8, 224, 224),T=128,64,32
            output_var = model(
                input_var)  #提取特征 torch.Size([128, 1024, 1, 7, 7])
            output_var = output_var.cpu()
            features = output_var.data.numpy()  # (T, 1024, 1, 7, 7)
            # don't forget to clean up variables
            del input_var
            del output_var

        # transpose to have the channel_last
        features = np.transpose(features,
                                (0, 2, 3, 4, 1))  # (T, 1, 7, 7, 1024)

        # reshape to have the features for each video in a separate dimension
        features = np.squeeze(features, axis=1)  # (T, 7, 7, 1024),T=128,64,32

        # path to save the features,保存特征
        video_features_path = '%s/%s.pkl' % (features_root_path, video_name
                                             )  #即将保存特征的路径
        if os.path.exists(video_features_path):
            print('... features for video already exist: %s.pkl' %
                  (video_name))
            continue

        # save features
        utils.pkl_dump(features, video_features_path, is_highest=True)

    t2 = time.time()
    print('... finish extracting features in %d seconds' % (t2 - t1))
コード例 #20
0
ファイル: train_keras.py プロジェクト: cxqj/13-timeception
def train_tco():
    """
    Train Timeception layers based on the given configurations.
    This train scheme is Timeception-only (TCO).
    """

    # get some configs for the training
    # 1.获取基本配置
    n_workers = config.cfg.TRAIN.N_WORKERS  # 10
    n_epochs = config.cfg.TRAIN.N_EPOCHS  # 500
    dataset_name = config.cfg.DATASET_NAME
    model_name = '%s_%s' % (config.cfg.MODEL.NAME, utils.timestamp())

    # data generators
    # 2.获取数据加载器
    data_generator_tr = __define_data_generator(is_training=True)
    data_generator_te = __define_data_generator(is_training=False)

    logger.info('--- start time')
    logger.info(datetime.datetime.now())
    logger.info('... [tr]: n_samples, n_batch, batch_size: %d, %d, %d' %
                (data_generator_tr.n_samples, data_generator_tr.n_batches,
                 config.cfg.TRAIN.BATCH_SIZE))
    logger.info('... [te]: n_samples, n_batch, batch_size: %d, %d, %d' %
                (data_generator_te.n_samples, data_generator_te.n_batches,
                 config.cfg.TEST.BATCH_SIZE))

    # callback to save the model
    save_callback = keras_utils.SaveCallback(dataset_name, model_name)  # 保存模型

    # load model
    model = __define_timeception_model()
    logger.info(model.summary())

    # train the model
    """
    利用Python的生成器,逐个生成数据的batch并进行训练。生成器与模型将并行执行以提高效率。例如,该函数允许我们在CPU上进行实时的数据提升,同时在GPU上进行模型训练

函数的参数是:

generator:生成器函数,生成器的输出应该为:

一个形如(inputs,targets)的tuple

一个形如(inputs, targets,sample_weight)的tuple。所有的返回值都应该包含相同数目的样本。生成器将无限在数据集上循环。每个epoch以经过模型的样本数达到samples_per_epoch时,记一个epoch结束

steps_per_epoch:整数,当生成器返回steps_per_epoch次数据时计一个epoch结束,执行下一个epoch

epochs:整数,数据迭代的轮数

verbose:日志显示,0为不在标准输出流输出日志信息,1为输出进度条记录,2为每个epoch输出一行记录

validation_data:具有以下三种形式之一

生成验证集的生成器

一个形如(inputs,targets)的tuple

一个形如(inputs,targets,sample_weights)的tuple

validation_steps: 当validation_data为生成器时,本参数指定验证集的生成器返回次数

class_weight:规定类别权重的字典,将类别映射为权重,常用于处理样本不均衡问题。

sample_weight:权值的numpy array,用于在训练时调整损失函数(仅用于训练)。可以传递一个1D的与样本等长的向量用于对样本进行1对1的加权,或者在面对时序数据时,传递一个的形式为(samples,sequence_length)的矩阵来为每个时间步上的样本赋不同的权。这种情况下请确定在编译模型时添加了sample_weight_mode='temporal'。

workers:最大进程数

max_q_size:生成器队列的最大容量

pickle_safe: 若为真,则使用基于进程的线程。由于该实现依赖多进程,不能传递non picklable(无法被pickle序列化)的参数到生成器中,因为无法轻易将它们传入子进程中。

initial_epoch: 从该参数指定的epoch开始训练,在继续之前的训练时有用。
    
    """
    model.fit_generator(epochs=n_epochs,
                        generator=data_generator_tr,
                        validation_data=data_generator_te,
                        use_multiprocessing=True,
                        workers=n_workers,
                        callbacks=[save_callback],
                        verbose=2)

    logger.info('--- finish time')
    logger.info(datetime.datetime.now())