Example #1
0
def benchmark_load_video(video_dir):
    # it's around 4x faster to load converted jpeg files
    video_id = '04GBFOZS5F.mp4'
    import scipy.misc
    from PIL import Image

    if video_dir is None:
        video_dir = config.RAW_VIDEO_DIR

    with utils.timeit_context('load video files'):
        frames = utils.load_video_clip_frames(
            video_fn=video_dir / video_id,
            frames_numbers=config.PREDICT_FRAMES,
            output_size=(config.INPUT_ROWS, config.INPUT_COLS))

    dest_dir = '/opt/data_fast/tmp/' + video_id[:-4]
    os.makedirs(str(dest_dir), exist_ok=True)

    for i, frame in enumerate(config.PREDICT_FRAMES):
        img = Image.fromarray(np.clip(frames[i], 0, 255).astype(np.uint8))
        img.save(str(dest_dir + f'/{i+2:04}.jpg'), quality=85)

    with utils.timeit_context('load images'):
        for i, frame in enumerate(config.PREDICT_FRAMES):
            fn = str(dest_dir + f'/{i+2:04}.jpg')
            X = scipy.misc.imread(fn).astype(np.float32)
Example #2
0
 def _train(self, X, y):
     self.model = XGBClassifier(n_estimators=1600,
                                objective='multi:softprob',
                                learning_rate=0.03,
                                silent=False,
                                n_jobs=config.N_CORES)
     with utils.timeit_context('fit 1600 est'):
         y_cat = np.argmax(y, axis=1)
         self.model.fit(
             X, y_cat
         )  # , eval_set=[(X_test, y_test)], early_stopping_rounds=20, verbose=True)
     pickle.dump(self.model, open(str(self.model_fn.resolve()), "wb"))
Example #3
0
    def train(self):
        """
        Train model using saved L1 model out of fold predictions
        """
        X_all_combined = []
        y_all_combined = []

        # list of (model_name, fold) tuples
        models_with_folds = []

        for model_name in self.l1_model_names:
            model_with_folds = []
            for fold in config.TRAIN_FOLDS:
                model_with_folds.append((model_name, fold))
            models_with_folds.append(model_with_folds)

        requests = []
        for model_with_folds in models_with_folds:
            for model_name, fold in model_with_folds:
                requests.append((model_name, fold, self.preprocess_l1_model_output))

        with utils.timeit_context('load all data'):
            results = self.pool.starmap(load_train_data, requests)

        for model_with_folds in models_with_folds:
            X_combined = []
            y_combined = []
            for model_name, fold in model_with_folds:
                X, y, video_ids = results[requests.index((model_name, fold))]
                logger.debug("Load", model_name, fold, X.shape, y.shape)
                X_combined.append(X)
                y_combined.append(y)

            X_all_combined.append(np.row_stack(X_combined))
            y_all_combined.append(np.row_stack(y_combined))

        X = np.column_stack(X_all_combined)
        y = y_all_combined[0]

        logger.debug("Training model", X.shape, y.shape)

        y_cat = np.argmax(y, axis=1)
        self._train(X, y_cat)