def benchmark_load_video(video_dir): # it's around 4x faster to load converted jpeg files video_id = '04GBFOZS5F.mp4' import scipy.misc from PIL import Image if video_dir is None: video_dir = config.RAW_VIDEO_DIR with utils.timeit_context('load video files'): frames = utils.load_video_clip_frames( video_fn=video_dir / video_id, frames_numbers=config.PREDICT_FRAMES, output_size=(config.INPUT_ROWS, config.INPUT_COLS)) dest_dir = '/opt/data_fast/tmp/' + video_id[:-4] os.makedirs(str(dest_dir), exist_ok=True) for i, frame in enumerate(config.PREDICT_FRAMES): img = Image.fromarray(np.clip(frames[i], 0, 255).astype(np.uint8)) img.save(str(dest_dir + f'/{i+2:04}.jpg'), quality=85) with utils.timeit_context('load images'): for i, frame in enumerate(config.PREDICT_FRAMES): fn = str(dest_dir + f'/{i+2:04}.jpg') X = scipy.misc.imread(fn).astype(np.float32)
def _train(self, X, y): self.model = XGBClassifier(n_estimators=1600, objective='multi:softprob', learning_rate=0.03, silent=False, n_jobs=config.N_CORES) with utils.timeit_context('fit 1600 est'): y_cat = np.argmax(y, axis=1) self.model.fit( X, y_cat ) # , eval_set=[(X_test, y_test)], early_stopping_rounds=20, verbose=True) pickle.dump(self.model, open(str(self.model_fn.resolve()), "wb"))
def train(self): """ Train model using saved L1 model out of fold predictions """ X_all_combined = [] y_all_combined = [] # list of (model_name, fold) tuples models_with_folds = [] for model_name in self.l1_model_names: model_with_folds = [] for fold in config.TRAIN_FOLDS: model_with_folds.append((model_name, fold)) models_with_folds.append(model_with_folds) requests = [] for model_with_folds in models_with_folds: for model_name, fold in model_with_folds: requests.append((model_name, fold, self.preprocess_l1_model_output)) with utils.timeit_context('load all data'): results = self.pool.starmap(load_train_data, requests) for model_with_folds in models_with_folds: X_combined = [] y_combined = [] for model_name, fold in model_with_folds: X, y, video_ids = results[requests.index((model_name, fold))] logger.debug("Load", model_name, fold, X.shape, y.shape) X_combined.append(X) y_combined.append(y) X_all_combined.append(np.row_stack(X_combined)) y_all_combined.append(np.row_stack(y_combined)) X = np.column_stack(X_all_combined) y = y_all_combined[0] logger.debug("Training model", X.shape, y.shape) y_cat = np.argmax(y, axis=1) self._train(X, y_cat)