def wrapper(self, *args, **kw): # 请求开始时间 start_ts = utils.timestamp() values = func(self, *args, **kw) web.header('Content-type', 'application/json') if not isinstance(values, (tuple, list)): values = [values] # data = { RESPONSE_FIELDS[i]: value for i, value in enumerate(values) if i < 3 } data = { field: values[i] if i < len(values) else None for i, field in enumerate(RESPONSE_FIELDS) } # 包含服务器时间 data['time'] = int(utils.timestamp()) - 2 data['ms'] = int((utils.timestamp() - start_ts) * 1000) data = utils.json_dumps(data, utils.JsonEncoder) # 是否提供第四个参数 # 如果提供了第四个参数, 则是一个 jsonp callback 调用 if len(values) == 4: return '%s(%s)' % (values[3], data) return data
def _xloc_init(self): """ 位置初始化,用于异步缓存访问客户的位置,便于在需要的时候获取 :return: """ ip.location() return 'var T = %s;' % utils.timestamp()
def train_tco(): """ Train Timeception layers based on the given configurations. This train scheme is Timeception-only (TCO). """ # get some configs for the training n_workers = config.cfg.TRAIN.N_WORKERS n_epochs = config.cfg.TRAIN.N_EPOCHS dataset_name = config.cfg.DATASET_NAME model_name = '%s_%s' % (config.cfg.MODEL.NAME, utils.timestamp()) # data generators data_generator_tr = __define_data_generator(is_training=True) data_generator_te = __define_data_generator(is_training=False) logger.info('--- start time') logger.info(datetime.datetime.now()) logger.info('... [tr]: n_samples, n_batch, batch_size: %d, %d, %d' % (data_generator_tr.n_samples, data_generator_tr.n_batches, config.cfg.TRAIN.BATCH_SIZE)) logger.info('... [te]: n_samples, n_batch, batch_size: %d, %d, %d' % (data_generator_te.n_samples, data_generator_te.n_batches, config.cfg.TEST.BATCH_SIZE)) # callback to save the model save_callback = keras_utils.SaveCallback(dataset_name, model_name) # load model model = __define_timeception_model() logger.info(model.summary()) # train the model model.fit_generator(epochs=n_epochs, generator=data_generator_tr, validation_data=data_generator_te, use_multiprocessing=True, workers=n_workers, callbacks=[save_callback], verbose=2) logger.info('--- finish time') logger.info(datetime.datetime.now())
def __init__(self, **kwargs): super(Task, self).__init__(**kwargs) self.id = utils.intval(kwargs.get('id')) self.type = kwargs['type'] if isinstance(self.type, (int, long)): self.type = enums.Task.Type.find(self.type) self.type_id = kwargs.get('type_id', 0) self.time_created = kwargs.get('time_created', int(utils.timestamp())) self.time_created_text = utils.timestamp2datefmt(self.time_created) self.active_time = kwargs.get('active_time', self.time_created) self.active_time_text = utils.timestamp2datefmt(self.active_time) self.last_time = kwargs.get('last_time', 0) self.tail_num = kwargs.get( 'tail_num', utils.make_tail_num(utils.randint(0, 999999999))) # 状态 self.status = kwargs.get('status', enums.Task.Status.Waiting) if isinstance(self.status, (int, long)): self.status = enums.Task.Status.find(self.status) self.statsu_text = kwargs.get('status_text') self.exec_times = utils.intval(kwargs.get('exec_times', 0)) self.content = kwargs.get('content', {}) if self.content and isinstance(self.content, (str, unicode)): self.content = utils.json_loads(self.content) # 任务的处理程序,默认为None self._handler = None # 是否被执行过 self._executed = False
def mark_delay(self, message, seconds=5): """ 标记一次延迟激活状态 :param message: 状态备注 :param seconds: 延迟的秒数 :return: """ self.task.status = enums.Task.Status.Waiting self.task.status_text = message self.message_append('info', message) self.necessary = False # 下次激活时间 self.task.active_time = int(utils.timestamp()) + seconds
def find_actives(tail_nums=None, active_time=None, limit=20): """ 获取达到指定激活时间的任务 :param tail_nums: 尾号列表 :param active_time: :param limit: :return: """ # 默认激活时间为当前时刻 active_time = active_time or int(utils.timestamp()) # 附加查询条件 attach_query = '' # 附加尾号 if tail_nums: attach_query = ' AND tail_num IN (%s)' % ','.join( [str(num) for num in tail_nums]) # 默认获取状态为等待执行 rs = db.manager.slave_core.query(""" SELECT * FROM task WHERE status = 0 AND active_time <= $active_time%s ORDER BY active_time ASC, id ASC LIMIT $limit; """ % attach_query, vars=locals()) return [Task(**r) for r in rs] if rs else None
def train_model_on_pickled_features(): """ Train model. """ annotation_type = 'noun' annot_path = Pth( 'EPIC-Kitchens/annotation/annot_video_level_many_shots.pkl') (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type) model_type = 'i3d_rgb' feature_type = 'mixed_5c' n_nodes = 128 n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment batch_size_tr = 20 batch_size_te = 30 n_epochs = 500 epoch_offset = 0 model_name = 'classifier_%s' % (utils.timestamp()) model_root_path = Pth('EPIC-Kitchens/models') features_path = Pth( 'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5', (n_frames_per_video, )) nodes_path = Pth('EPIC-Kitchens/features_centroids/features_random_%d.pkl', (n_nodes, )) n_channels, side_dim = utils.get_model_feat_maps_info( model_type, feature_type) input_shape = (None, n_timesteps, side_dim, side_dim, n_channels) nodes = utils.pkl_load(nodes_path) print('--- start time') print(datetime.datetime.now()) # building the model print('... building model %s' % (model_name)) t1 = time.time() model = __load_model_videograph(nodes, n_classes, input_shape) t2 = time.time() duration = t2 - t1 print(model.summary(line_length=130, positions=None, print_fn=None)) print('... model built, duration (sec): %d' % (duration)) # load data print('... loading data: %s' % (features_path)) t1 = time.time() # features are extracting using datasets.Epic_Kitchens.i3d_keras_epic_kitchens() # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c' # to get a better performance, you need to write code to randomly sample new frames and extract their features every new epoch # please use this function to random sampling, instead of uniform sampling: Epic_Kitchens.__random_sample_frames_per_video_for_i3d() # then extract their features, as done in: Epic_Kitchens._901_extract_features_i3d() # then train on the extracted features. Please do so in every epoch. It's computationally heavy, but you cannot avoid random sampling to get better results. # Even better results if you replace I3D with a 2D/3D CNN that's previously fine-tuned on Epic-Kitchens (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) t2 = time.time() duration = t2 - t1 print('... data loaded: %d' % (duration)) n_tr = len(x_tr) n_te = len(x_te) n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr) n_batch_te = utils.calc_num_batches(n_te, batch_size_te) print('... [tr]: n, n_batch, batch_size: %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr)) print('... [te]: n, n_batch, batch_size: %d, %d, %d' % (n_te, n_batch_te, batch_size_te)) print(x_tr.shape) print(x_te.shape) print(y_tr.shape) print(y_te.shape) save_callback = keras_utils.ModelSaveCallback(model, model_name, epoch_offset, model_root_path) score_callback = keras_utils.MapScoreCallback(model, None, None, x_te, y_te, batch_size_te, n_classes) model_callbacks = [save_callback, score_callback] model.fit(x_tr, y_tr, epochs=n_epochs, batch_size=batch_size_tr, validation_split=0.0, validation_data=(x_te, y_te), shuffle=True, callbacks=model_callbacks, verbose=2) print('--- finish time') print(datetime.datetime.now())
def __train_model_on_video_frames_videograph(n_epochs, n_timesteps, n_centroids, timestamp, is_resume_training, start_epoch_num): """ Train model of 3rd gpu, train it on features extracted on first 2 gpus. """ global TRAIN_STATE assert (start_epoch_num > 1 and is_resume_training) or ( start_epoch_num == 0 and not is_resume_training ), 'sorry, either provide resume_epoch_num or set the model as not resuming with resume_epoch_num = 0' n_frames_per_segment = 8 n_frames_per_video = n_frames_per_segment * n_timesteps # locations model_name = 'classifier_from_video_frames_%s' % (timestamp) resume_model_json_path = Pth('EPIC-Kitchens/models/%s/%03d.json', (model_name, start_epoch_num)) resume_model_weights_path = Pth('EPIC-Kitchens/models/%s/%03d.pkl', (model_name, start_epoch_num)) frames_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train') features_te_path = Pth( 'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames_te.h5', (n_frames_per_video, )) centroids_path = Pth( 'EPIC-Kitchens/features_centroid/features_random_%d_centroids.pkl', (n_centroids, )) centroids_path = Pth( 'EPIC-Kitchens/features_centroid/features_sobol_%d_centroids.pkl', (n_centroids, )) video_names_splits_path = Pth( 'EPIC-Kitchens/annotation/video_names_splits.pkl') frame_relative_pathes_dict_path = Pth( 'EPIC-Kitchens/annotation/frame_relative_pathes_dict_tr.pkl') annot_path = Pth( 'EPIC-Kitchens/annotation/annot_video_level_many_shots.pkl') is_save_centroids = False is_save_model = True verbose = False n_gpus = 1 n_classes = ds_epic_kitchens.N_NOUNS_MANY_SHOT batch_size_tr = 20 batch_size_te = 40 n_threads_te = 16 n_feat_maps = 1024 featmap_side_dim = 7 input_shape = (None, n_timesteps, featmap_side_dim, featmap_side_dim, n_feat_maps) # load centroids centroids = utils.pkl_load(centroids_path) print('--- start time') print(datetime.datetime.now()) # building the model print('... building model %s' % (model_name)) t1 = time.time() # load new or previous model if is_resume_training: custom_objects = { 'DepthwiseDilatedConv1DLayer': DepthwiseDilatedConv1DLayer, 'DepthwiseConv1DLayer': DepthwiseConv1DLayer, 'DepthwiseDenseLayer': DepthwiseDenseLayer, 'ConvOverSpaceLayer': ConvOverSpaceLayer, 'TransposeLayer': TransposeLayer, 'ReshapeLayer': ReshapeLayer, 'MeanLayer': MeanLayer, 'MaxLayer': MaxLayer } model = keras_utils.load_model(resume_model_json_path, resume_model_weights_path, custom_objects=custom_objects, is_compile=False) model, _ = __compile_model_for_finetuning(model, n_gpus) else: model, _ = __load_model_action_vlad(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path='') model, _ = __load_model_videograph(centroids, n_classes, input_shape) # model, _ = __load_model_timeception(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path='') # model, _ = __load_model_mlp_classifier_transformer_centroids_with_graph_embedding(centroids, n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path='') # dry run to get the model loaded in gpu dummy_feature = np.zeros(tuple([batch_size_tr] + list(input_shape[1:])), dtype=np.float32) model.predict(dummy_feature) t2 = time.time() duration = t2 - t1 print(model.summary(line_length=120, positions=None, print_fn=None)) print('... model built, duration (sec): %d' % (duration)) # load data print('... loading data') t1 = time.time() (y_tr, _, _, y_te, _, _) = utils.pkl_load(annot_path) (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) frame_relative_pathes_dict = utils.pkl_load( frame_relative_pathes_dict_path) x_te = utils.h5_load(features_te_path) print('... centroids: %s' % (centroids_path)) n_tr = len(video_names_tr) n_te = len(video_names_te) # set list of video names and ground truth TRAIN_STATE.video_names_tr = video_names_tr TRAIN_STATE.class_nums_tr = y_tr # sample new frames sampled_video_frames_dict = ds_epic_kitchens.__random_sample_frames_per_video_for_i3d( TRAIN_STATE.video_names_tr, frames_root_path, frame_relative_pathes_dict, n_frames_per_segment, n_frames_per_video) TRAIN_STATE.video_frames_dict_tr = sampled_video_frames_dict del video_names_tr del video_names_te del y_tr n_batch_tr = keras_utils.calc_num_batches(n_tr, batch_size_tr) n_batch_te = keras_utils.calc_num_batches(n_te, batch_size_te) t2 = time.time() duration = t2 - t1 print('... data loaded: %d' % duration) print('... [tr]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr, n_gpus)) print('... [te]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_te, n_batch_te, batch_size_te, n_gpus)) # make model top ready TRAIN_STATE.model_top_ready = True sys.stdout.write('\n') for idx_epoch in range(start_epoch_num, n_epochs): epoch_num = idx_epoch + 1 # wait until bottom parts start while TRAIN_STATE.model_bottom_1_epoch_start < epoch_num or TRAIN_STATE.model_bottom_2_epoch_start < epoch_num or TRAIN_STATE.model_bottom_3_epoch_start < epoch_num: threading._sleep(2.0) if verbose: print( '... top part is waiting for bottom part to start extracting features for epoch %d' % (epoch_num)) # epoch started, update counter TRAIN_STATE.model_top_epoch_start = epoch_num # video names are obtained from the state at the beginning of each epoch video_names_tr = TRAIN_STATE.video_names_tr y_tr = TRAIN_STATE.class_nums_tr loss_tr = 0.0 loss_tr_b = 0.0 tt1 = time.time() waiting_duration_total = 0 # loop and train for idx_batch_tr in range(n_batch_tr): batch_num_tr = idx_batch_tr + 1 start_idx_batch = idx_batch_tr * batch_size_tr stop_idx_batch = (idx_batch_tr + 1) * batch_size_tr video_names_tr_batch = video_names_tr[ start_idx_batch:stop_idx_batch] y_tr_b = y_tr[start_idx_batch:stop_idx_batch] is_missing_features = True # wait until the festures are loaded t1 = time.time() while is_missing_features: is_missing_features = False for _v_name in video_names_tr_batch: if _v_name not in TRAIN_STATE.feats_dict_tr_1 and _v_name not in TRAIN_STATE.feats_dict_tr_2 and _v_name not in TRAIN_STATE.feats_dict_tr_3: is_missing_features = True break if is_missing_features: threading._sleep(1.0) if verbose: print( '... model top is waiting for missing videos: %s' % _v_name) t2 = time.time() x_tr_b = __get_features_from_dictionaries(video_names_tr_batch) x_tr_b = np.array(x_tr_b) loss_batch_tr = model.train_on_batch(x_tr_b, y_tr_b) # after training, remove feats from dictionary (# delete feature and remove key) for _v_name in video_names_tr_batch: if _v_name in TRAIN_STATE.feats_dict_tr_1: del TRAIN_STATE.feats_dict_tr_1[_v_name] TRAIN_STATE.feats_dict_tr_1.pop(_v_name, None) elif _v_name in TRAIN_STATE.feats_dict_tr_2: del TRAIN_STATE.feats_dict_tr_2[_v_name] TRAIN_STATE.feats_dict_tr_2.pop(_v_name, None) elif _v_name in TRAIN_STATE.feats_dict_tr_3: del TRAIN_STATE.feats_dict_tr_3[_v_name] TRAIN_STATE.feats_dict_tr_3.pop(_v_name, None) loss_tr += loss_batch_tr loss_tr_b = loss_tr / float(batch_num_tr) tt2 = time.time() duration = tt2 - tt1 waiting_duration = t2 - t1 waiting_duration_total += waiting_duration msg = '%04ds - epoch: %02d/%02d, batch [tr]: %02d/%02d, loss: %0.2f, waited: %.01f ' % ( duration, epoch_num, n_epochs, batch_num_tr, n_batch_tr, loss_tr_b, waiting_duration) if verbose: print(msg) else: sys.stdout.write('\r%s' % (msg)) # test y_pred_te = model.predict(x_te, batch_size_te, verbose=0) map_te_avg = 100 * metrics.mean_avg_precision_sklearn(y_te, y_pred_te) loss_tr /= float(n_batch_tr) tt2 = time.time() duration = tt2 - tt1 timestamp_now = utils.timestamp() msg = '%04ds - epoch: %02d/%02d, loss [tr]: %0.2f, map [te]: %0.2f%%, waited: %d, finished: %s \n' % ( duration, epoch_num, n_epochs, loss_tr, map_te_avg, waiting_duration_total, timestamp_now) if verbose: print(msg) else: sys.stdout.write('\r%s' % (msg)) # after we're done with training and testing, shuffle the list of training videos, and set in the TRAINING_STATE, also sample new frames video_names_tr, y_tr = __shuffle_training_data( TRAIN_STATE.video_names_tr, TRAIN_STATE.class_nums_tr) TRAIN_STATE.video_names_tr = video_names_tr TRAIN_STATE.class_nums_tr = y_tr del video_names_tr, y_tr # also, sample new frames sampled_video_frames_dict = ds_epic_kitchens.__random_sample_frames_per_video_for_i3d( TRAIN_STATE.video_names_tr, frames_root_path, frame_relative_pathes_dict, n_frames_per_segment, n_frames_per_video) TRAIN_STATE.video_frames_dict_tr = sampled_video_frames_dict # update counter so the bottom part starts extracting features for the next epoch TRAIN_STATE.model_top_epoch_end = epoch_num # save the model and nodes, if required if is_save_model: __save_model(model, model_name, epoch_num) if is_save_centroids: __save_centroids(model, model_name, epoch_num) print('--- finish time') print(datetime.datetime.now())
def train_model_on_video_frames(): """ When training model on images, the model won't fit in gpu. If trained on several gpus, the batch size will get so small that BatchNorm is not applicable anymore. The solution is to use first 3 gpus to extract features from the backbone model (i.e. bottom part, for example: I3D or ResNet), and to use the 4th gpu to train our model (i.e. top part) on these features. """ # this is to allow for small cpu utilization by numpy # has to be set before importing numpy # os.environ["MKL_NUM_THREADS"] = "1" # os.environ["NUMEXPR_NUM_THREADS"] = "1" # os.environ["OMP_NUM_THREADS"] = "1" # if training from scratch resume_epoch_num = 0 is_resume_training = False resume_timestamp = '' # get the model part to run timestamp = utils.timestamp( ) if not is_resume_training else resume_timestamp starting_epoch_num = 0 if not is_resume_training else resume_epoch_num n_epochs = 500 # for i3d-keras n_centroids = 128 n_frames_bottom = 512 n_frames_top = 64 n_instances = 3 model_bottom = __start_train_model_on_video_frames_backbone_i3d_keras model_top = __start_train_model_on_video_frames_videograph # also, create the files where the training state will be stored global TRAIN_STATE TRAIN_STATE = TrainingState() # bottom part, instance 1 args_bottom_1 = (n_epochs, starting_epoch_num, n_frames_bottom, n_instances, 1) thread_bottom_1 = threading.Thread(target=model_bottom, args=args_bottom_1) # bottom part, instance 2 args_bottom_2 = (n_epochs, starting_epoch_num, n_frames_bottom, n_instances, 2) thread_bottom_2 = threading.Thread(target=model_bottom, args=args_bottom_2) # bottom part, instance 3 args_bottom_3 = (n_epochs, starting_epoch_num, n_frames_bottom, n_instances, 3) thread_bottom_3 = threading.Thread(target=model_bottom, args=args_bottom_3) # top part args_top = (n_epochs, n_frames_top, n_centroids, timestamp, is_resume_training, starting_epoch_num) thread_top = threading.Thread(target=model_top, args=args_top) thread_top.start() thread_bottom_1.start() thread_bottom_2.start() thread_bottom_3.start() thread_top.join() thread_bottom_1.join() thread_bottom_2.join() thread_bottom_3.join()
def __extract_features_rgb(begin_num=None, end_num=None): root_path = c.DATA_ROOT_PATH # './data' # 这个文件是通过charades.py文件生成的 annotation_path = '%s/Charades/annotation/frames_dict_trimmed_multi_label_i3d_160_frames.pkl' % ( root_path) # charades标注路径 features_root_path = '%s/Charades/features_i3d_charades_rgb_mixed_5c_trimmed_20_frames' % ( root_path) # 特征保存路径 video_frames_root_path = '%s/Charades/frames/Charades_v1_rgb' % ( root_path) # 视频帧的路径 model_path = '%s/Charades/baseline_models/i3d/rgb_charades.pt' % ( root_path) # 预训练模型路径 feature_name = 'Mixed_5c' # 保存第几层的特征 # 1.获取视频标注信息 (video_frames_dict_tr, video_frames_dict_te) = utils.pkl_load(annotation_path) video_frames_dict = dict() video_frames_dict.update(video_frames_dict_tr) video_frames_dict.update(video_frames_dict_te) video_names = video_frames_dict.keys() n_videos = len(video_names) frame_count = 0 if not os.path.exists(features_root_path): print('Sorry, path does not exist: %s' % (features_root_path)) return t1 = time.time() print('extracting training features') print('start time: %s' % utils.timestamp()) # aync reader, and get load images for the first video #========================================下面这个加载器没有写=================================# img_reader = image_utils.AsyncImageReaderCharadesForI3DTorchModel( n_threads=20) # 加载图片 img_reader.load_imgs_in_batch( __get_video_frame_pathes(video_names[0], video_frames_root_path, video_frames_dict)) # load the model model = __load_i3d_model_rgb(model_path) torchsummary.summary(model, input_size=(3, 160, 224, 224)) # loop on list of videos for idx_video in range(n_videos): video_num = idx_video + 1 if begin_num is not None and end_num is not None: if video_num <= begin_num or video_num > end_num: continue video_name = video_names[idx_video] # wait untill the image_batch is loaded t1 = time.time() while img_reader.is_busy( ): # 如果上面的img_reader.load_imgs_in_batch中的is_busy为True,则表明图片还没加载完 threading._sleep(0.1) t2 = time.time() duration_waited = t2 - t1 print('...... video %d/%d: %s, waited: %d' % (video_num, n_videos, video_name, duration_waited)) # get the video frames video_frames = img_reader.get_images() # pre-load for the next video if video_num < n_videos: next_video_name = video_names[idx_video + 1] img_reader.load_imgs_in_batch( __get_video_frame_pathes(next_video_name, video_frames_root_path, video_frames_dict)) video_features_path = '%s/%s.pkl' % (features_root_path, video_name) # if os.path.exists(video_features_path): # print ('... features for video already exist: %s.pkl' % (video_name)) # continue # chrades的视频帧数是固定的160帧 if len(video_frames) != 160: print('... wrong n frames: %d' % (video_num)) continue # transpose to have the channel_first (160, 224, 224, 3) => (3, 160, 224, 224) video_frames = np.transpose(video_frames, (3, 0, 1, 2)) # add one dimension to represent the batch size video_frames = np.expand_dims(video_frames, axis=0) # (N,C,L,H,W) # prepare input variable with torch.no_grad(): # extract features input_var = torch.from_numpy(video_frames).cuda() # 将视频转为gpu output_var = model(input_var) output_var = output_var.cpu() features = output_var.data.numpy() # (1, 1024, 20, 7, 7) # don't forget to clean up variables # 每一个视频的特征抽取完后就必须清空这两个变量,否则会报错 del input_var del output_var # squeeze to remove the dimension of the batch_size features = features[0] # (1024, 20, 7, 7) # transpose to have the channel_last features = np.transpose( features, (1, 2, 3, 0)) # (20, 7, 7, 1024)=====(T,H,W,C),如果后面用的还是Pytorch,那么就不需要这一步 # path to save the features utils.pkl_dump(features, video_features_path, is_highest=True) # 保存特征 # increment counts frame_count += len(video_frames) t2 = time.time() print('finish extracting %d features in %d seconds' % (frame_count, t2 - t1)) print('end time: %s' % utils.timestamp())
def train_model_videograph(): """ Train model. """ annotation_type = 'noun' annot_path = Pth( 'EPIC-Kitchens/annotations/annot_video_level_many_shots.pkl') (y_tr, y_te), n_classes = __load_annotation(annot_path, annotation_type) model_type = 'i3d_rgb' feature_type = 'mixed_5c' n_nodes = 128 n_timesteps = 64 n_frames_per_segment = 8 n_frames_per_video = n_timesteps * n_frames_per_segment batch_size_tr = 20 batch_size_te = 30 n_epochs = 500 epoch_offset = 0 model_name = 'classifier_%s' % (utils.timestamp()) model_root_path = Pth('EPIC-Kitchens/models') nodes_path = Pth('EPIC-Kitchens/features/nodes_random_%d.pkl', (n_nodes, )) features_path = Pth( 'EPIC-Kitchens/features/features_i3d_mixed_5c_%d_frames.h5', (n_frames_per_video, )) n_channels, side_dim = utils.get_model_feat_maps_info( model_type, feature_type) input_shape = (None, n_timesteps, side_dim, side_dim, n_channels) # either load nodes, or generate them on the fly, but remeber to save them, as you need them in test time # nodes = utils.pkl_load(nodes_path) nodes = utils.generate_centroids(n_nodes, n_channels) print('--- start time') print(datetime.datetime.now()) # building the model print('... building model %s' % (model_name)) t1 = time.time() model = __load_model_videograph(nodes, n_classes, input_shape) t2 = time.time() duration = t2 - t1 print(model.summary(line_length=130, positions=None, print_fn=None)) print('... model built, duration (sec): %d' % (duration)) # load data print('... loading data: %s' % (features_path)) t1 = time.time() # features are extracting using datasets.epic_kitchens.i3d_keras_epic_kitchens() # we use out-of-box i3d (pre-trained on kinetics, NOT fine-tuned on epic-kitchens) with last conv feature 7*7*1024 'mixed_5c' (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) t2 = time.time() duration = t2 - t1 print('... data loaded: %d' % (duration)) n_tr = len(x_tr) n_te = len(x_te) n_batch_tr = utils.calc_num_batches(n_tr, batch_size_tr) n_batch_te = utils.calc_num_batches(n_te, batch_size_te) print('... [tr]: n, n_batch, batch_size: %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr)) print('... [te]: n, n_batch, batch_size: %d, %d, %d' % (n_te, n_batch_te, batch_size_te)) print(x_tr.shape) print(x_te.shape) print(y_tr.shape) print(y_te.shape) save_callback = keras_utils.ModelSaveCallback(model, model_name, epoch_offset, model_root_path) score_callback = keras_utils.MapScoreCallback(model, None, None, x_te, y_te, batch_size_te, n_classes) model_callbacks = [save_callback, score_callback] model.fit(x_tr, y_tr, epochs=n_epochs, batch_size=batch_size_tr, validation_split=0.0, validation_data=(x_te, y_te), shuffle=True, callbacks=model_callbacks, verbose=2) print('--- finish time') print(datetime.datetime.now())
def train_model_on_pickled_features(): """ Train model. """ model_type = 'i3d_rgb' feature_type = 'mixed_5c' is_spatial_pooling = False is_resume_training = False n_timesteps = 64 batch_size_tr = 16 batch_size_te = 40 n_centroids = 128 n_epochs = 100 n_classes = N_CLASSES n_gpus = 1 model_name = 'classifier_%s' % (utils.timestamp()) model_weight_path = '' model_root_path = Pth('Breakfast/models/') gt_activities_path = Pth('Breakfast/annotation/gt_activities.pkl') centroids_path = Pth( 'Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids, )) features_path = Pth( 'Breakfast/features/features_i3d_mixed_5c_%d_frames_max_pool.h5', (n_timesteps * 8, )) if is_spatial_pooling else Pth( 'Breakfast/features/features_i3d_mixed_5c_%d_frames.h5', (n_timesteps * 8, )) centroids = utils.pkl_load(centroids_path) (video_ids_tr, y_tr), (video_ids_te, y_te) = utils.pkl_load(gt_activities_path) n_feat_maps, feat_map_side_dim = __get_model_feat_maps_info( model_type, feature_type) feat_map_side_dim = 1 if is_spatial_pooling else feat_map_side_dim input_shape = (None, n_timesteps, feat_map_side_dim, feat_map_side_dim, n_feat_maps) print('--- start time') print(datetime.datetime.now()) # building the model print('... building model %s' % (model_name)) t1 = time.time() # root_model, model = __load_model_mlp_classifier_action_vlad(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path) # root_model, model = __load_model_mlp_classifier_timeception(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path) root_model, model = __load_model_mlp_classifier_video_graph( centroids, n_classes, input_shape, n_gpus=n_gpus, is_load_weights=is_resume_training, weight_path=model_weight_path) t2 = time.time() duration = t2 - t1 print(root_model.summary(line_length=130, positions=None, print_fn=None)) print('... model built, duration (sec): %d' % (duration)) # load data print('... loading data: %s' % (features_path)) print('... centroids: %s' % (centroids_path)) t1 = time.time() (x_tr, x_te) = utils.h5_load_multi(features_path, ['x_tr', 'x_te']) t2 = time.time() duration = t2 - t1 print('... data loaded: %d' % (duration)) n_tr = len(x_tr) n_te = len(x_te) n_batch_tr = __calc_num_batches(n_tr, batch_size_tr) n_batch_te = __calc_num_batches(n_te, batch_size_te) print('... [tr]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr, n_gpus)) print('... [te]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_te, n_batch_te, batch_size_te, n_gpus)) save_callback = keras_utils.ModelSaveCallback(model, model_name, model_root_path) model.fit(x_tr, y_tr, epochs=n_epochs, batch_size=batch_size_tr, validation_split=0.0, validation_data=(x_te, y_te), shuffle=True, callbacks=[save_callback], verbose=2) print('--- finish time') print(datetime.datetime.now())
def extract_features_i3d_charades(): """ Extract features from i3d-model """ n_frames_in = 1024 n_frames_out = 128 n_splits_per_video = 2 root_path = '/content/' frames_annot_path = '%s/charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % (root_path, n_frames_in) model_path = '%s/charades/baseline_models/i3d/rgb_charades.pt' % (root_path) frames_root_path = '%s/charades/frames/Charades_v1_rgb' % (root_path) features_root_path = '/local-ssd/nhussein/Charades/features_i3d_charades_rgb_mixed_5c_untrimmed_%d_frames' % (n_frames_out) (video_frames_dict_tr, video_frames_dict_te) = utils.pkl_load(frames_annot_path) video_frames_dict = dict() video_frames_dict.update(video_frames_dict_tr) video_frames_dict.update(video_frames_dict_te) video_names = video_frames_dict.keys() n_videos = len(video_names) del video_frames_dict_tr del video_frames_dict_te n_threads = 8 n_frames_per_segment = 8 assert n_frames_per_segment * n_frames_out == n_frames_in if not is_local_machine and not os.path.exists(features_root_path): print('Sorry, path does not exist: %s' % (features_root_path)) return t1 = time.time() print('extracting training features') print('start time: %s' % utils.timestamp()) # reader for getting video frames video_reader_tr = image_utils.AsyncVideoReaderCharadesForI3DTorchModel(n_threads=n_threads) # aync reader, and get load images for the first video, we will read the first group of videos video_group_frames = __get_video_frame_pathes(video_names[0], frames_root_path, video_frames_dict) video_reader_tr.load_video_frames_in_batch(video_group_frames) # load the model model = i3d_factory.load_model_i3d_charades_rgb_for_testing(model_path) print(torchsummary.summary(model, input_size=(3, 8, 224, 224))) # import torchsummary # print torchsummary.summary(model, (8, 3, 224, 224)) return # loop on list of videos for idx_video in range(n_videos): video_num = idx_video + 1 video_name = video_names[idx_video] if begin_num is not None and end_num is not None: if video_num <= begin_num or video_num > end_num: continue # wait until the image_batch is loaded t1 = time.time() while video_reader_tr.is_busy(): threading._sleep(0.1) t2 = time.time() duration_waited = t2 - t1 print('... video %04d, %04d, waited: %.02f' % (video_num, n_videos, duration_waited)) # get the frames frames = video_reader_tr.get_images() # (G*T*N, 224, 224, 3) # pre-load for the next video group, notice that we take into account the number of instances if video_num < n_videos: next_video_frames = __get_video_frame_pathes(video_names[idx_video + 1], frames_root_path, video_frames_dict) video_reader_tr.load_video_frames_in_batch(next_video_frames) if len(frames) != n_frames_in: raise ('... ... wrong n frames: %s' % (video_name)) # reshape to make one dimension carries the frames / segment, while the other dimesion represents the batch size frames = np.reshape(frames, (n_frames_out, n_frames_per_segment, 224, 224, 3)) # (T, 8, 224, 224, 3) # transpose to have the channel_first (G*T, 8, 224, 224, 3) => (T, 3, 8, 224, 224) frames = np.transpose(frames, (0, 4, 1, 2, 3)) # prepare input variable with torch.no_grad(): # extract features input_var = torch.from_numpy(frames).cuda() output_var = model(input_var) output_var = output_var.cpu() features = output_var.data.numpy() # (T, 1024, 1, 7, 7) # don't forget to clean up variables del input_var del output_var # transpose to have the channel_last features = np.transpose(features, (0, 2, 3, 4, 1)) # (T, 1, 7, 7, 1024) # reshape to have the features for each video in a separate dimension features = np.squeeze(features, axis=1) # (T, 7, 7, 1024) # path to save the features video_features_path = '%s/%s.pkl' % (features_root_path, video_name) # if os.path.exists(video_features_path): # print ('... features for video already exist: %s.pkl' % (video_name)) # continue # save features utils.pkl_dump(features, video_features_path, is_highest=True) t2 = time.time() print('... finish extracting features in %d seconds' % (t2 - t1))
def train_human_object_multiple_context_gating(soft_flag=True, backbone='rcnn'): n_epochs = 100 batch_size_tr = 32 batch_size_te = 32 n_classes = N_CLASSES if backbone == 'rcnn': print('Using backbone rcnn') feature_path_interaction = Pth( 'Hico/features/h5/features_base_subject_object.h5') n_channels, n_regions, channel_side_dim = 4096, 12, 1 (x_tr, x_te) = utils.h5_load_multi(feature_path_interaction, ['x_tr', 'x_te']) x_tr = np.swapaxes(x_tr, 1, 2) x_te = np.swapaxes(x_te, 1, 2) elif backbone == 'pairatt': print('Using backbone pairatt') feature_path_interaction = Pth('Hico/features/h5/features_pairattn.h5') n_channels, n_regions, channel_side_dim = 4096, 3, 1 (x_tr, x_te) = utils.h5_load_multi(feature_path_interaction, ['x_tr', 'x_te']) # Features of the pose: f_context feature_path_c3 = Pth('Hico/features/h5/deformation.h5') x_cs_shape = [(512, 1, 1, 1)] # Features of the pose: f_context feature_path_c1 = Pth('Hico/features/h5/lvis.h5') x_cs_shape = [(1300, 1, 1, 1)] feature_path_c2 = Pth('Hico/features/h5/local_scene.h5') x_cs_shape = [(2048, 1, 1, 1)] feature_path_context = Pth('Hico/features/h5/stuff.h5') x_cs_shape = [(649, 1, 1, 1)] # Features of the pose: f_context feature_path_context = Pth('Hico/features/h5/part_states.h5') x_cs_shape = [(1032, 1, 1, 1)] feature_path_c4 = Pth('Hico/features/h5/local_pose.h5') x_cs_shape = [(4096, 1, 1, 1)] x_cs_shape = [(1300, 1, 1, 1), (2048, 1, 1, 1), (512, 1, 1, 1), (4096, 1, 1, 1)] # Annotation of the image annot_path = Pth('Hico/features/h5/anno_hico.pkl') model_name = 'classifier_%s' % (utils.timestamp()) input_shape = (n_channels, n_regions, channel_side_dim, channel_side_dim) print('--- start time') print(datetime.datetime.now()) print('... loading data') t1 = time.time() (img_names_tr, y_tr, y_tr_mask, img_names_te, y_te, y_te_mask) = utils.pkl_load(annot_path) y_tr = y_tr.astype(np.float32) y_te = y_te.astype(np.float32) y_tr_mask = y_tr_mask.astype(np.float32) y_te_mask = y_te_mask.astype(np.float32) print('... context features') (x_tr_c1, x_te_c1) = utils.h5_load_multi(feature_path_c1, ['x_tr', 'x_te']) #x_tr_c1 = expand_feats(x_tr_c1) #x_te_c1 = expand_feats(x_te_c1) (x_tr_c2, x_te_c2) = utils.h5_load_multi(feature_path_c2, ['x_tr', 'x_te']) x_tr_c2 = expand_feats(x_tr_c2) x_te_c2 = expand_feats(x_te_c2) (x_tr_c3, x_te_c3) = utils.h5_load_multi(feature_path_c3, ['x_tr', 'x_te']) x_tr_c3 = expand_feats(x_tr_c3) x_te_c3 = expand_feats(x_te_c3) (x_tr_c4, x_te_c4) = utils.h5_load_multi(feature_path_c4, ['x_tr', 'x_te']) x_tr_c4 = expand_feats(x_tr_c4) x_te_c4 = expand_feats(x_te_c4) print('train_set_shape_interaction: ', x_tr.shape) print('test_set_shape_interaction: ', x_te.shape) print('train_set_shape_context-1: ', x_tr_c1.shape) print('test_set_shape_context-1: ', x_te_c1.shape) print('train_set_shape_context-2: ', x_tr_c2.shape) print('test_set_shape_context-2: ', x_te_c2.shape) print('train_set_shape_context-3: ', x_tr_c3.shape) print('test_set_shape_context-3: ', x_te_c3.shape) print('train_set_shape_context-4: ', x_tr_c4.shape) print('test_set_shape_context-4: ', x_te_c4.shape) t2 = time.time() duration = t2 - t1 print('... loading data, duration (sec): %d' % (duration)) # building the model print('... building model %s' % (model_name)) t1 = time.time() if soft_flag == True: print('Training soft fusion model') model = ClassifierContextLateFusionMultiSoftGate( n_classes, input_shape, x_cs_shape) t2 = time.time() duration = t2 - t1 model = model.cuda() input_sizes = [input_shape] + list(x_cs_shape) #pytorch_utils.model_summary_multi_input(model, input_sizes=input_sizes, batch_size=-1, device='cuda') print('... model built, duration (sec): %d' % (duration)) # callbacks callbacks = [] print( 'Interaction_feat: %s, Context_feat-1: %s, Context_feat-2: %s, Context_feat-3: %s\n' % (feature_path_interaction, feature_path_c1, feature_path_c2, feature_path_c3)) # start training pytorch_utils.train_model_custom_metric_mask( model, model._optimizer, model._loss_fn, model._metric_fn, [x_tr, x_tr_c1, x_tr_c2, x_tr_c3, x_tr_c4], y_tr, y_tr_mask, [x_te, x_te_c1, x_te_c2, x_te_c3, x_te_c4], y_te, y_te_mask, n_epochs, batch_size_tr, batch_size_te, callbacks=callbacks) print('--- finish time') print(datetime.datetime.now())
def train_tco(): """ Train Timeception layers based on the given configurations. This train scheme is Timeception-only (TCO). """ # get some configs for the training n_epochs = config.cfg.TRAIN.N_EPOCHS dataset_name = config.cfg.DATASET_NAME model_name = '%s_%s' % (config.cfg.MODEL.NAME, utils.timestamp()) device = 'cuda' # data generators loader_tr, n_samples_tr, n_batches_tr = __define_loader(is_training=True) loader_te, n_samples_te, n_batches_te = __define_loader(is_training=False) logger.info('--- start time') logger.info(datetime.datetime.now()) logger.info('... [tr]: n_samples, n_batch, batch_size: %d, %d, %d' % (n_samples_tr, n_batches_tr, config.cfg.TRAIN.BATCH_SIZE)) logger.info('... [te]: n_samples, n_batch, batch_size: %d, %d, %d' % (n_samples_te, n_batches_te, config.cfg.TEST.BATCH_SIZE)) # load model model, optimizer, loss_fn, metric_fn, metric_fn_name = __define_timeception_model( device) logger.info( pytorch_utils.summary(model, model._input_shape[1:], batch_size=2, device='cuda')) # save the model model_saver = pytorch_utils.ModelSaver(model, dataset_name, model_name) # loop on the epochs sys.stdout.write('\n') for idx_epoch in range(n_epochs): epoch_num = idx_epoch + 1 loss_tr = 0.0 acc_tr = 0.0 loss_te = 0.0 acc_te = 0.0 tt1 = time.time() # flag model as training model.train() # training for idx_batch, (x, y_true) in enumerate(loader_tr): batch_num = idx_batch + 1 x, y_true = x.to(device), y_true.to(device) optimizer.zero_grad() y_pred = model(x) loss = loss_fn(y_pred, y_true) loss.backward() optimizer.step() # calculate accuracy y_true = y_true.cpu().numpy().astype(np.int32) y_pred = y_pred.cpu().detach().numpy() loss_b_tr = loss.cpu().detach().numpy() acc_b_tr = metric_fn(y_true, y_pred) loss_tr += loss_b_tr acc_tr += acc_b_tr loss_b_tr = loss_tr / float(batch_num) acc_b_tr = acc_tr / float(batch_num) tt2 = time.time() duration = tt2 - tt1 sys.stdout.write( '\r%04ds - epoch: %02d/%02d, batch [tr]: %02d/%02d, loss, %s: %0.2f, %0.2f ' % (duration, epoch_num, n_epochs, batch_num, n_batches_tr, metric_fn_name, loss_b_tr, acc_b_tr)) # flag model as testing model.eval() # testing for idx_batch, (x, y_true) in enumerate(loader_te): batch_num = idx_batch + 1 x, y_true = x.to(device), y_true.to(device) y_pred = model(x) loss_b_te = loss_fn(y_pred, y_true).cpu().detach().numpy() y_true = y_true.cpu().numpy().astype(np.int32) y_pred = y_pred.cpu().detach().numpy() acc_b_te = metric_fn(y_true, y_pred) loss_te += loss_b_te acc_te += acc_b_te loss_b_te = loss_te / float(batch_num) acc_b_te = acc_te / float(batch_num) tt2 = time.time() duration = tt2 - tt1 sys.stdout.write( '\r%04ds - epoch: %02d/%02d, batch [te]: %02d/%02d, loss, %s: %0.2f, %0.2f ' % (duration, epoch_num, n_epochs, batch_num, n_batches_te, metric_fn_name, loss_b_te, acc_b_te)) loss_tr /= float(n_batches_tr) loss_te /= float(n_batches_te) acc_tr /= float(n_batches_tr) acc_te /= float(n_batches_te) tt2 = time.time() duration = tt2 - tt1 sys.stdout.write( '\r%04ds - epoch: %02d/%02d, [tr]: %0.2f, %0.2f, [te]: %0.2f, %0.2f \n' % (duration, epoch_num, n_epochs, loss_tr, acc_te, loss_te, acc_te)) # after each epoch, save data model_saver.save(idx_epoch) logger.info('--- finish time') logger.info(datetime.datetime.now())
def __train_model_on_video_frames_backbone_i3d_keras(n_epochs, starting_epoch_num, n_frames_per_video, n_instances, instance_num): """ Extract features from i3d-model to be used by our model. """ verbose = False global TRAIN_STATE # type: TrainingState assert instance_num in [ 1, 2, 3 ], 'Sorry, wrong instance number: %d' % (instance_num) assert n_instances == 3, 'Sorry, wrong number of instances %d' % ( n_instances) n_threads = 16 n_frames_per_segment = 8 max_preloaded_feats = 40 n_frames_in = n_frames_per_video n_frames_out = int(n_frames_in / float(n_frames_per_segment)) assert n_frames_per_segment * n_frames_out == n_frames_in # load the model model = Inception_Inflated3d_Backbone() # reader for getting video frames video_reader = image_utils.AsyncImageReaderEpicKitchensForI3dKerasModel( n_threads=n_threads) # wait until model top is ready while not TRAIN_STATE.model_top_ready: threading._sleep(5.0) if verbose: print('... bottom part (%d) is waiting for top part to get ready' % (instance_num)) # extract features for n epoch for idx_epoch in range(starting_epoch_num, n_epochs): epoch_num = idx_epoch + 1 video_frames_dict = TRAIN_STATE.video_frames_dict_tr video_names = TRAIN_STATE.video_names_tr n_videos = len(video_names) # only first instance can modify train_state and get videos from pickle if instance_num == 1: # model started, update count TRAIN_STATE.model_bottom_1_epoch_start = epoch_num elif instance_num == 2: # model started, update count TRAIN_STATE.model_bottom_2_epoch_start = epoch_num elif instance_num == 3: # model started, update count TRAIN_STATE.model_bottom_3_epoch_start = epoch_num else: raise Exception('Sorry, unknown instance number: %d' % (instance_num)) if verbose: print('epoch %d by instance %s' % (epoch_num, instance_num)) # aync reader, and get load images for the first video, we will read the first group of videos current_video_name = video_names[instance_num - 1] current_video_frames = video_frames_dict[current_video_name] # just for clarification, can be reshaped from (256,) into (T, N) = (32, 8) # where T is the number of segments in one video, and N is the number of frames in one segment # video_group_frames = np.reshape(video_group_frames, tuple([n_frames_out, n_segment_length] + list(video_group_frames.shape[1:]))) video_reader.load_imgs_in_batch(current_video_frames) # extract features only for training videos t1 = time.time() if verbose: print('... extracting features tr') print('... start time: %s' % utils.timestamp()) # loop on list of videos for idx_video in range(n_videos): if instance_num == 1: # wait looping if there are so many features in the dictionary while len(TRAIN_STATE.feats_dict_tr_1) > max_preloaded_feats: threading._sleep(1.0) if verbose: print( '... bottom part (%d) is waiting for features in the dictionary to get consumed by top part' % (instance_num)) elif instance_num == 2: # wait looping if there are so many features in the dictionary while len(TRAIN_STATE.feats_dict_tr_2) > max_preloaded_feats: threading._sleep(1.0) if verbose: print( '... bottom part (%d) is waiting for features in the dictionary to get consumed by top part' % (instance_num)) elif instance_num == 3: # wait looping if there are so many features in the dictionary while len(TRAIN_STATE.feats_dict_tr_3) > max_preloaded_feats: threading._sleep(1.0) if verbose: print( '... bottom part (%d) is waiting for features in the dictionary to get consumed by top part' % (instance_num)) # loop on groups according to instances if instance_num == 1 and idx_video % n_instances != 0: continue if instance_num == 2 and idx_video % n_instances != 1: continue if instance_num == 3 and idx_video % n_instances != 2: continue tg_1 = time.time() video_name = video_names[idx_video] video_num = idx_video + 1 # wait until the image_batch is loaded t1 = time.time() while video_reader.is_busy(): threading._sleep(0.1) t2 = time.time() duration_waited = t2 - t1 if verbose: print( '\n... ... model bottom (%d), video %d/%d, waited: %d, name: %s' % (instance_num, video_num, n_videos, duration_waited, video_name)) # get the frames frames = video_reader.get_images() # (G*T*N, 224, 224, 3) # pre-load for the next video group, notice that we take into account the number of instances if idx_video + n_instances < n_videos: next_video_num = video_num + n_instances next_video_name = video_names[idx_video + n_instances] next_video_frames = video_frames_dict[next_video_name] video_reader.load_imgs_in_batch(next_video_frames) if verbose: print( '\n... ... model bottom (%d), next video %d/%d, name: %s' % (instance_num, next_video_num, n_videos, next_video_name)) if video_name in TRAIN_STATE.feats_dict_tr_1 or video_name in TRAIN_STATE.feats_dict_tr_2 or video_name in TRAIN_STATE.feats_dict_tr_3: raise ( '... ... this should not be happening, but features for video %s already exist in the dictionary' % (video_name)) if len(frames) != n_frames_per_video: raise ('... ... wrong n frames for video: %s' % (video_name)) # reshape to make one dimension carries the frames / segment, while the other dimesion represents the batch size frames = np.reshape( frames, [n_frames_out, n_frames_per_segment, 224, 224, 3 ]) # (T, 8, 224, 224, 3) # get features features = model.predict(frames) # (T, 1, 7, 7, 1024) # remove temporal axis, as it is one features = np.squeeze(features, axis=1) # (T, 7, 7, 1024) # add feature to the dictionary if instance_num == 1: TRAIN_STATE.feats_dict_tr_1[video_name] = features elif instance_num == 2: TRAIN_STATE.feats_dict_tr_2[video_name] = features elif instance_num == 3: TRAIN_STATE.feats_dict_tr_3[video_name] = features tg_2 = time.time() if verbose: print('took', tg_2 - tg_1) t2 = time.time() if verbose: print('... finish extracting features in %d seconds' % (t2 - t1)) # after finishing epoch, update counters if instance_num == 1: TRAIN_STATE.model_bottom_1_epoch_end = epoch_num if instance_num == 2: TRAIN_STATE.model_bottom_2_epoch_end = epoch_num if instance_num == 3: TRAIN_STATE.model_bottom_3_epoch_end = epoch_num # wait untill the other part finishes if instance_num == 1: while TRAIN_STATE.model_bottom_1_epoch_end > TRAIN_STATE.model_bottom_2_epoch_end or TRAIN_STATE.model_bottom_1_epoch_end > TRAIN_STATE.model_bottom_3_epoch_end: threading._sleep(1.0) if verbose: print( '... bottom part (1) is waiting for bottom part (2,3) to finish extracting features on epoch %d' % (epoch_num)) if instance_num == 2: while TRAIN_STATE.model_bottom_2_epoch_end > TRAIN_STATE.model_bottom_1_epoch_end or TRAIN_STATE.model_bottom_2_epoch_end > TRAIN_STATE.model_bottom_3_epoch_end: threading._sleep(1.0) if verbose: print( '... bottom part (2) is waiting for bottom part (1,3) to finish extracting features on epoch %d' % (epoch_num)) if instance_num == 3: while TRAIN_STATE.model_bottom_3_epoch_end > TRAIN_STATE.model_bottom_1_epoch_end or TRAIN_STATE.model_bottom_3_epoch_end > TRAIN_STATE.model_bottom_2_epoch_end: threading._sleep(1.0) if verbose: print( '... bottom part (3) is waiting for bottom part (1,2) to finish extracting features on epoch %d' % (epoch_num)) # if top part is not finished yet, then wait while TRAIN_STATE.model_top_epoch_end < TRAIN_STATE.model_bottom_1_epoch_end or TRAIN_STATE.model_top_epoch_end < TRAIN_STATE.model_bottom_2_epoch_end or TRAIN_STATE.model_top_epoch_end < TRAIN_STATE.model_bottom_3_epoch_end: threading._sleep(2.0) if verbose: print( '... bottom part (%d) is waiting for top part to finish training on epoch: %d' % (instance_num, TRAIN_STATE.model_top_epoch_end + 1)) print('... finish extracting features for all epochs, goodbye!') print('... end time: %s' % utils.timestamp())
def train_model_on_video_features_i3d(): """ Train model of features stored on local disc. """ model_type = 'i3d_rgb' feature_type = 'mixed_5c' is_spatial_pooling = False is_spatial_max = False is_save = True n_gpus = 1 batch_size_tr = 20 batch_size_te = 30 n_threads = 20 n_epochs = 500 n_classes = N_CLASSES n_centroids = 128 n_timesteps = 64 n_frames = n_timesteps * 8 model_name = 'classifier_%s' % (utils.timestamp()) model_weight_path = '' # resnet-152 features_root_path = Pth('Breakfast/features_i3d_mixed_5c_%s_frames', (n_frames)) centroids_path = Pth( 'Breakfast/features_centroids/features_random_%d_centroids.pkl', (n_centroids, )) video_annot_path = Pth('Breakfast/annotation/gt_activities.pkl') centroids = utils.pkl_load(centroids_path) n_feat_maps, feat_map_side_dim = __get_model_feat_maps_info( model_type, feature_type) feat_map_side_dim = 1 if is_spatial_pooling else feat_map_side_dim input_shape = (None, n_timesteps, feat_map_side_dim, feat_map_side_dim, n_feat_maps) print('--- start time') print(datetime.datetime.now()) # building the model print('... building model %s' % (model_name)) t1 = time.time() # root_model, model = __load_model_mlp_classifier_conv_pool(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path=model_weight_path) # root_model, model = __load_model_mlp_classifier_action_vlad(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path=model_weight_path) # root_model, model = __load_model_mlp_classifier_timeception(n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path=model_weight_path) root_model, model = __load_model_mlp_classifier_video_graph( centroids, n_classes, input_shape, n_gpus=n_gpus, is_load_weights=False, weight_path=model_weight_path) t2 = time.time() duration = t2 - t1 print(root_model.summary(line_length=130, positions=None, print_fn=None)) print('... model built, duration (sec): %d' % (duration)) # load data print('... loading data: %s' % (features_root_path)) t1 = time.time() (v_names_tr, y_tr), (v_names_vl, y_vl), (v_names_te, y_te) = utils.pkl_load(video_annot_path) v_names_tr = np.hstack((v_names_tr, v_names_vl)) y_tr = np.hstack((y_tr, y_vl)) del v_names_vl del y_vl action_ids = np.arange(1, N_CLASSES + 1) y_tr = utils.label_binarize(y_tr, action_ids) y_te = utils.label_binarize(y_te, action_ids) n_tr = len(v_names_tr) n_te = len(v_names_te) n_batch_tr = keras_utils.calc_num_batches(n_tr, batch_size_tr) n_batch_te = keras_utils.calc_num_batches(n_te, batch_size_te) t2 = time.time() print('... centroids: %s' % (centroids_path)) print('... data loaded: %d' % (t2 - t1)) print('... [tr]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_tr, n_batch_tr, batch_size_tr, n_gpus)) print('... [te]: n, n_batch, batch_size, n_gpus: %d, %d, %d, %d' % (n_te, n_batch_te, batch_size_te, n_gpus)) # load features async async_loader_tr = data_utils.AsyncVideoFeaturesLoaderBreakfast( features_root_path, y_tr, n_timesteps, batch_size_tr, n_feat_maps, feat_map_side_dim, n_threads) async_loader_te = data_utils.AsyncVideoFeaturesLoaderBreakfast( features_root_path, y_te, n_timesteps, batch_size_te, n_feat_maps, feat_map_side_dim, n_threads) # shuffle the data for the first time async_loader_tr.shuffle_data() # start getting images ready for the first barch async_loader_tr.load_feats_in_batch(1) async_loader_te.load_feats_in_batch(1) sys.stdout.write('\n') for idx_epoch in range(n_epochs): epoch_num = idx_epoch + 1 loss_tr = 0.0 loss_te = 0.0 acc_tr = 0.0 acc_te = 0.0 tt1 = time.time() waiting_duration_total = 0 # loop and train for idx_batch in range(n_batch_tr): batch_num = idx_batch + 1 # wait untill the image_batch is loaded t1 = time.time() while async_loader_tr.is_busy(): threading._sleep(0.1) t2 = time.time() # get batch of training samples x_tr_b, y_tr_b = async_loader_tr.get_batch_data() # start getting the next image_batch ready if batch_num < n_batch_tr: next_batch_num = batch_num + 1 async_loader_tr.load_feats_in_batch(next_batch_num) # train and get predictions loss_batch_tr, acc_batch_tr = model.train_on_batch(x_tr_b, y_tr_b) loss_tr += loss_batch_tr acc_tr += acc_batch_tr loss_tr_b = loss_batch_tr / float(batch_num) acc_tr_b = 100 * acc_batch_tr / float(batch_num) tt2 = time.time() duration = tt2 - tt1 waiting_duration = t2 - t1 waiting_duration_total += waiting_duration sys.stdout.write( '\r%04ds - epoch: %02d/%02d, batch [tr]: %02d/%02d, loss_tr: %.02f, acc_tr: %.02f, waited: %.01f ' % (duration, epoch_num, n_epochs, batch_num, n_batch_tr, loss_tr_b, acc_tr_b, waiting_duration)) # loop and test for idx_batch in range(n_batch_te): batch_num = idx_batch + 1 # wait untill the image_batch is loaded t1 = time.time() while async_loader_te.is_busy(): threading._sleep(0.1) t2 = time.time() # get batch of testing samples x_te_b, y_te_b = async_loader_te.get_batch_data() # start getting the next image_batch ready if batch_num < n_batch_te: next_batch_num = batch_num + 1 async_loader_te.load_feats_in_batch(next_batch_num) # test and get predictions loss_batch_te, acc_batch_te = model.test_on_batch(x_te_b, y_te_b) loss_te += loss_batch_te acc_te += acc_batch_te loss_te_b = loss_batch_te / float(batch_num) acc_te_b = 100 * acc_batch_te / float(batch_num) tt2 = time.time() duration = tt2 - tt1 waiting_duration = t2 - t1 waiting_duration_total += waiting_duration sys.stdout.write( '\r%04ds - epoch: %02d/%02d, batch [te]: %02d/%02d, loss_te: %.02f, acc_te: %.02f, waited: %.01f ' % (duration, epoch_num, n_epochs, batch_num, n_batch_te, loss_te_b, acc_te_b, waiting_duration)) loss_tr /= float(n_batch_tr) loss_te /= float(n_batch_te) acc_tr /= float(n_batch_tr) acc_te /= float(n_batch_te) acc_tr *= 100.0 acc_te *= 100.0 tt2 = time.time() duration = tt2 - tt1 sys.stdout.write( '\r%04ds - epoch: %02d/%02d, loss_tr %.02f, acc_tr %.02f, loss_te %.02f, acc_te: %.02f, waited: %d \n' % (duration, epoch_num, n_epochs, loss_tr, acc_tr, loss_te, acc_te, waiting_duration_total)) # shuffle the data async_loader_tr.shuffle_data() # because we setted a new data list, start getting the first batch async_loader_tr.load_feats_in_batch(1) async_loader_te.load_feats_in_batch(1) # save the model, if required if is_save: __save_model(root_model, model_name, epoch_num) print('--- finish time') print(datetime.datetime.now())
def train_tco(): """ Train Timeception layers based on the given configurations. This train scheme is Timeception-only (TCO). """ # get some configs for the training n_epochs = config.cfg.TRAIN.N_EPOCHS #500 dataset_name = config.cfg.DATASET_NAME #Charades model_name = '%s_%s' % (config.cfg.MODEL.NAME, utils.timestamp() ) #'charades_timeception_19.08.05-10:59:25' device = 'cuda' # data generators 生成数据集 loader_tr, n_samples_tr, n_batches_tr = __define_loader( is_training=True ) #<torch.utils.data.dataloader.DataLoader object at 0x7f70a6145f98>,n_samples_tr = 7811,n_batches_tr=245 loader_te, n_samples_te, n_batches_te = __define_loader(is_training=False) #n_samples_te=1814,n_batches_te=37 logger.info('--- start time') logger.info(datetime.datetime.now()) logger.info('... [tr]: n_samples, n_batch, batch_size: %d, %d, %d' % (n_samples_tr, n_batches_tr, config.cfg.TRAIN.BATCH_SIZE)) logger.info('... [te]: n_samples, n_batch, batch_size: %d, %d, %d' % (n_samples_te, n_batches_te, config.cfg.TEST.BATCH_SIZE)) # load model,这里进行加载已经构建好的模型框架 model, optimizer, loss_fn, metric_fn, metric_fn_name = __define_timeception_model( device) print('batch_size=2, input_shape[1:]=', model._input_shape[1:]) logger.info( pytorch_utils.summary(model, model._input_shape[1:], batch_size=2, device='cuda')) #打印模型摘要 # save the model,保存模型状态 model_saver = pytorch_utils.ModelSaver(model, dataset_name, model_name) # loop on the epochs sys.stdout.write('\n') for idx_epoch in range(n_epochs): epoch_num = idx_epoch + 1 # print(epoch_num) loss_tr = 0.0 acc_tr = 0.0 loss_te = 0.0 acc_te = 0.0 tt1 = time.time() # flag model as training model.train() #将模型设置为训练阶段 # training for idx_batch, (x, y_true) in enumerate(loader_tr): batch_num = idx_batch + 1 x, y_true = x.to(device), y_true.to( device) #x.shape=(32*1024*32*7*7),即(batch*channels*T*h*w) optimizer.zero_grad() y_pred = model(x) loss = loss_fn(y_pred, y_true) loss.backward() optimizer.step() # calculate accuracy y_true = y_true.cpu().numpy().astype(np.int32) #真实标签 y_pred = y_pred.cpu().detach().numpy() #预测标签 loss_b_tr = loss.cpu().detach().numpy() acc_b_tr = metric_fn(y_true, y_pred) loss_tr += loss_b_tr acc_tr += acc_b_tr loss_b_tr = loss_tr / float(batch_num) #平均损失 acc_b_tr = acc_tr / float(batch_num) tt2 = time.time() duration = tt2 - tt1 sys.stdout.write( '\r%04ds - epoch: %02d/%02d, batch [tr]: %02d/%02d, loss, %s: %0.2f, %0.2f ' % (duration, epoch_num, n_epochs, batch_num, n_batches_tr, metric_fn_name, loss_b_tr, acc_b_tr)) # flag model as testing model.eval() # testing for idx_batch, (x, y_true) in enumerate(loader_te): batch_num = idx_batch + 1 x, y_true = x.to(device), y_true.to(device) y_pred = model(x) loss_b_te = loss_fn(y_pred, y_true).cpu().detach().numpy() y_true = y_true.cpu().numpy().astype(np.int32) y_pred = y_pred.cpu().detach().numpy() acc_b_te = metric_fn(y_true, y_pred) loss_te += loss_b_te acc_te += acc_b_te loss_b_te = loss_te / float(batch_num) acc_b_te = acc_te / float(batch_num) tt2 = time.time() duration = tt2 - tt1 sys.stdout.write( '\r%04ds - epoch: %02d/%02d, batch [te]: %02d/%02d, loss, %s: %0.2f, %0.2f ' % (duration, epoch_num, n_epochs, batch_num, n_batches_te, metric_fn_name, loss_b_te, acc_b_te)) loss_tr /= float(n_batches_tr) loss_te /= float(n_batches_te) acc_tr /= float(n_batches_tr) acc_te /= float(n_batches_te) tt2 = time.time() duration = tt2 - tt1 sys.stdout.write( '\r%04ds - epoch: %02d/%02d, [tr]: %0.2f, %0.2f, [te]: %0.2f, %0.2f \n' % (duration, epoch_num, n_epochs, loss_tr, acc_te, loss_te, acc_te)) # after each epoch, save data model_saver.save(idx_epoch) logger.info('--- finish time') logger.info(datetime.datetime.now())
def extract_features_i3d_charades(n_frames_in, n_frames_out): """ Extract features from i3d-model n_frames_in = 8 * n_frames_out n_frames_in = 1024,512,256 n_frames_out = 128,64,32 """ # n_frames_in = 1024 # n_frames_out = 128 n_splits_per_video = 2 root_path = '../data' root_Charades_path = '/home/r/renpengzhen/Datasets/Charades' frames_annot_path = '%s/Charades/annotation/frames_dict_untrimmed_multi_label_i3d_%d_frames.pkl' % ( root_path, n_frames_in) #采样过之后的帧路径 # model_path = '/home/r/renpengzhen/PyTorch/timeception-master/model/i3d_kinetics_model_rgb.pth' #模型存放的位置 model_path = '%s/Charades/baseline_models/i3d/rgb_charades.pt' % ( root_path) # 模型存放的位置 frames_root_path = '%s/Charades_v1_rgb' % (root_Charades_path) #所有视频帧存放的位置 # features_root_path = '%s/Charades/features_i3d_charades_rgb_mixed_5c_untrimmed_%d_frames' % (root_path,n_frames_out) #用来存放使用i3d进行特征提取的路径 features_root_path = '%s/Charades/features_i3d_pytorch_charades_rgb_mixed_5c_%df' % ( root_path, n_frames_out) #用来存放使用i3d进行特征提取的路径 (video_frames_dict_tr, video_frames_dict_te) = utils.pkl_load( frames_annot_path ) #导入采样帧词典:包含了训练集和测试集的视频名:帧名列表,('AXIW1', array(['AXIW1-000001.jpg', 'AXIW1-000002.jpg', 'AXIW1-000003.jpg', ..., 'AXIW1-000768.jpg', 'AXIW1-000769.jpg', 'AXIW1-000770.jpg'], dtype='<U16')) video_frames_dict = dict() #构建视频帧空词典 video_frames_dict.update(video_frames_dict_tr) video_frames_dict.update(video_frames_dict_te) video_names = list(video_frames_dict.keys()) #视频的名字 n_videos = len(video_names) #总视频的个数 del video_frames_dict_tr del video_frames_dict_te n_threads = 8 #线程数 n_frames_per_segment = 8 #每个视频段的帧数,这8帧是连续的,在采样的时候就是连续的 assert n_frames_per_segment * n_frames_out == n_frames_in if not os.path.exists(features_root_path): os.makedirs(features_root_path) t1 = time.time() print('extracting training features') print('start time: %s' % utils.timestamp()) # reader for getting video frames 用于获取视频帧的阅读器 video_reader_tr = image_utils.AsyncVideoReaderCharadesForI3DTorchModel( n_threads=n_threads) # aync reader, and get load images for the first video, we will read the first group of videos video_group_frames = __get_video_frame_pathes( video_names[0], frames_root_path, video_frames_dict) #存储第一个视频帧的所有地址,是一个np数组类型 video_reader_tr.load_video_frames_in_batch(video_group_frames) # load the model model = i3d_torch_charades_utils.load_model_i3d_charades_rgb_for_testing( model_path) #进行一次forward,打印模型的具体输入输出细节 print('input_size=(3, 8, 224, 224)') print(torchsummary.summary(model, input_size=(3, 8, 224, 224))) # loop on list of videos,对整个视频数据集进行操作 for idx_video in range(n_videos): video_num = idx_video + 1 video_name = video_names[idx_video] begin_num = 0 end_num = n_videos if begin_num is not None and end_num is not None: if video_num <= begin_num or video_num > end_num: continue # wait until the image_batch is loaded t1 = time.time() while video_reader_tr.is_busy(): time.sleep(0.1) t2 = time.time() duration_waited = t2 - t1 print('... video %04d, %04d, waited: %.02f' % (video_num, n_videos, duration_waited)) # get the frames frames = video_reader_tr.get_images( ) # (G*T*N, 224, 224, 3),这个我觉得是第一个视频里面裁剪过之后的帧图片 # pre-load for the next video group, notice that we take into account the number of instances if video_num < n_videos: next_video_frames = __get_video_frame_pathes( video_names[idx_video + 1], frames_root_path, video_frames_dict) video_reader_tr.load_video_frames_in_batch(next_video_frames) if len(frames) != n_frames_in: raise ('... ... wrong n frames: %s' % (video_name)) # reshape to make one dimension carries the frames / segment, while the other dimesion represents the batch size frames = np.reshape(frames, (n_frames_out, n_frames_per_segment, 224, 224, 3)) # (T, 8, 224, 224, 3),T实际上就是视频段,即超级帧的个数 # transpose to have the channel_first (G*T, 8, 224, 224, 3) => (T, 3, 8, 224, 224) frames = np.transpose(frames, (0, 4, 1, 2, 3)) # prepare input variable with torch.no_grad(): # extract features input_var = torch.from_numpy( frames).cuda() #(T, 3, 8, 224, 224),T=128,64,32 output_var = model( input_var) #提取特征 torch.Size([128, 1024, 1, 7, 7]) output_var = output_var.cpu() features = output_var.data.numpy() # (T, 1024, 1, 7, 7) # don't forget to clean up variables del input_var del output_var # transpose to have the channel_last features = np.transpose(features, (0, 2, 3, 4, 1)) # (T, 1, 7, 7, 1024) # reshape to have the features for each video in a separate dimension features = np.squeeze(features, axis=1) # (T, 7, 7, 1024),T=128,64,32 # path to save the features,保存特征 video_features_path = '%s/%s.pkl' % (features_root_path, video_name ) #即将保存特征的路径 if os.path.exists(video_features_path): print('... features for video already exist: %s.pkl' % (video_name)) continue # save features utils.pkl_dump(features, video_features_path, is_highest=True) t2 = time.time() print('... finish extracting features in %d seconds' % (t2 - t1))
def train_tco(): """ Train Timeception layers based on the given configurations. This train scheme is Timeception-only (TCO). """ # get some configs for the training # 1.获取基本配置 n_workers = config.cfg.TRAIN.N_WORKERS # 10 n_epochs = config.cfg.TRAIN.N_EPOCHS # 500 dataset_name = config.cfg.DATASET_NAME model_name = '%s_%s' % (config.cfg.MODEL.NAME, utils.timestamp()) # data generators # 2.获取数据加载器 data_generator_tr = __define_data_generator(is_training=True) data_generator_te = __define_data_generator(is_training=False) logger.info('--- start time') logger.info(datetime.datetime.now()) logger.info('... [tr]: n_samples, n_batch, batch_size: %d, %d, %d' % (data_generator_tr.n_samples, data_generator_tr.n_batches, config.cfg.TRAIN.BATCH_SIZE)) logger.info('... [te]: n_samples, n_batch, batch_size: %d, %d, %d' % (data_generator_te.n_samples, data_generator_te.n_batches, config.cfg.TEST.BATCH_SIZE)) # callback to save the model save_callback = keras_utils.SaveCallback(dataset_name, model_name) # 保存模型 # load model model = __define_timeception_model() logger.info(model.summary()) # train the model """ 利用Python的生成器,逐个生成数据的batch并进行训练。生成器与模型将并行执行以提高效率。例如,该函数允许我们在CPU上进行实时的数据提升,同时在GPU上进行模型训练 函数的参数是: generator:生成器函数,生成器的输出应该为: 一个形如(inputs,targets)的tuple 一个形如(inputs, targets,sample_weight)的tuple。所有的返回值都应该包含相同数目的样本。生成器将无限在数据集上循环。每个epoch以经过模型的样本数达到samples_per_epoch时,记一个epoch结束 steps_per_epoch:整数,当生成器返回steps_per_epoch次数据时计一个epoch结束,执行下一个epoch epochs:整数,数据迭代的轮数 verbose:日志显示,0为不在标准输出流输出日志信息,1为输出进度条记录,2为每个epoch输出一行记录 validation_data:具有以下三种形式之一 生成验证集的生成器 一个形如(inputs,targets)的tuple 一个形如(inputs,targets,sample_weights)的tuple validation_steps: 当validation_data为生成器时,本参数指定验证集的生成器返回次数 class_weight:规定类别权重的字典,将类别映射为权重,常用于处理样本不均衡问题。 sample_weight:权值的numpy array,用于在训练时调整损失函数(仅用于训练)。可以传递一个1D的与样本等长的向量用于对样本进行1对1的加权,或者在面对时序数据时,传递一个的形式为(samples,sequence_length)的矩阵来为每个时间步上的样本赋不同的权。这种情况下请确定在编译模型时添加了sample_weight_mode='temporal'。 workers:最大进程数 max_q_size:生成器队列的最大容量 pickle_safe: 若为真,则使用基于进程的线程。由于该实现依赖多进程,不能传递non picklable(无法被pickle序列化)的参数到生成器中,因为无法轻易将它们传入子进程中。 initial_epoch: 从该参数指定的epoch开始训练,在继续之前的训练时有用。 """ model.fit_generator(epochs=n_epochs, generator=data_generator_tr, validation_data=data_generator_te, use_multiprocessing=True, workers=n_workers, callbacks=[save_callback], verbose=2) logger.info('--- finish time') logger.info(datetime.datetime.now())