def predict_on_test(model_name, fold, data_model_name=None, data_fold=None): ds = pd.read_csv(config.SUBMISSION_FORMAT) classes = list(ds.columns)[1:] if data_model_name is None: data_model_name = model_name if data_fold is None: data_fold = fold with utils.timeit_context('load data'): X = load_test_data('../output/prediction_test_frames/', data_model_name, data_fold) print(X.shape) model = model_nn(input_size=X.shape[1]) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) model.load_weights(f"../output/nn1_{model_name}_{fold}_full.pkl") with utils.timeit_context('predict'): prediction = model.predict(X) for col, cls in enumerate(classes): ds[cls] = np.clip(prediction[:, col], 0.001, 0.999) os.makedirs('../submissions', exist_ok=True) ds.to_csv( f'../submissions/submission_one_model_nn_{model_name}_{data_fold}.csv', index=False, float_format='%.7f')
def predict_on_test(model_name, fold, use_cache=False): model = pickle.load( open(f"../output/xgb_{model_name}_{fold}_full.pkl", "rb")) print(model) ds = pd.read_csv(config.SUBMISSION_FORMAT) classes = list(ds.columns)[1:] print(classes) data_dir = f'../output/prediction_test_frames' with utils.timeit_context('load data'): cache_fn = f'../output/prediction_test_frames/{model_name}_{fold}_cache.npy' if use_cache: X = np.load(cache_fn) else: X = load_test_data(data_dir, model_name, fold) np.save(cache_fn, X) print(X.shape) with utils.timeit_context('predict'): prediction = model.predict_proba(X) if prediction.shape[1] == 23: prediction = np.insert(prediction, obj=12, values=0.0, axis=1) for col, cls in enumerate(classes): ds[cls] = np.clip(prediction[:, col], 0.001, 0.999) os.makedirs('../submissions', exist_ok=True) ds.to_csv(f'../submissions/submission_one_model_{model_name}_{fold}.csv', index=False, float_format='%.7f')
def train_model_lgb_combined_folds(combined_model_name, model_with_folds): X_combined = [] y_combined = [] for model_name, fold in model_with_folds: with utils.timeit_context('load data'): X, y, video_ids = load_train_data(model_name, fold) X_combined.append(X) y_combined.append(y) X = np.row_stack(X_combined) y = np.row_stack(y_combined) y_cat = np.argmax(y, axis=1) print(X.shape, y.shape) print(np.unique(y_cat)) with utils.timeit_context('fit 200 est'): param = { 'num_leaves': 50, 'objective': 'multiclass', 'max_depth': 5, 'learning_rate': .05, 'max_bin': 200, 'num_class': NB_CAT, 'metric': ['multi_logloss'] } model = lgb.train(param, lgb.Dataset(X, label=y_cat), num_boost_round=200) pickle.dump( model, open(f"../output/lgb_combined_folds_{combined_model_name}.pkl", "wb"))
def predict_unused_clips(data_model_name, data_fold, combined_model_name): ds = pd.read_csv(config.SUBMISSION_FORMAT) classes = list(ds.columns)[1:] data_dir = f'../output/prediction_unused_frames/' video_ids = [fn[:-4] for fn in os.listdir(data_dir) if fn.endswith('.csv')] with utils.timeit_context('load data'): X = load_test_data_uncached(data_dir, data_model_name, data_fold, video_ids) model = model_nn(input_size=X.shape[1]) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) model.load_weights(f"../output/nn1_{combined_model_name}_0_full.pkl") with utils.timeit_context('predict'): prediction = model.predict(X) ds = pd.DataFrame(data={'filename': video_ids}) for col, cls in enumerate(classes): ds[cls] = prediction[:, col] # np.clip(prediction[:, col], 0.001, 0.999) # os.makedirs('../submissions', exist_ok=True) ds.to_csv( f'../output/prediction_unused_frames/{data_model_name}_{data_fold}.csv', index=False, float_format='%.7f')
def train_model_xgboost_combined_folds(combined_model_name, model_with_folds): X_combined = [] y_combined = [] for model_name, fold in model_with_folds: with utils.timeit_context('load data'): X, y, video_ids = load_train_data(model_name, fold) X_combined.append(X) y_combined.append(y) X = np.row_stack(X_combined) y = np.row_stack(y_combined) y_cat = np.argmax(y, axis=1) print(X.shape, y.shape) print(np.unique(y_cat)) model = XGBClassifier(n_estimators=500, objective='multi:softprob', learning_rate=0.1, silent=True) with utils.timeit_context('fit 500 est'): model.fit(X, y_cat) pickle.dump( model, open(f"../output/xgb_combined_folds_{combined_model_name}.pkl", "wb"))
def uncompress_zar(fn_src, fn_dst): print(fn_src) print(fn_dst) print(zarr.storage.default_compressor) zarr.storage.default_compressor = None ds = ChunkedDataset(fn_src).open(cached=False) dst_dataset = ChunkedDataset(fn_dst) dst_dataset.initialize() # 'w', # # num_scenes=len(ds.scenes), # # num_frames=len(ds.frames), # # num_agents=len(ds.agents), # # num_tl_faces=len(ds.tl_faces) # ) with utils.timeit_context("copy scenes"): dst_dataset.scenes.append(ds.scenes[:]) with utils.timeit_context("copy frames"): dst_dataset.frames.append(ds.frames[:]) with utils.timeit_context("copy agents"): for i in tqdm(range(0, len(ds.agents), 1024 * 1024)): dst_dataset.agents.append(ds.agents[i:i + 1024 * 1024]) with utils.timeit_context("copy tl_faces"): dst_dataset.tl_faces.append(ds.tl_faces[:])
def train_all_models_lgb_combined(combined_model_name, models_with_folds): X_all_combined = [] y_all_combined = [] requests = [] results = [] for model_with_folds in models_with_folds: for model_name, fold in model_with_folds: requests.append((model_name, fold)) # results.append(load_one_model(requests[-1])) pool = Pool(40) with utils.timeit_context('load all data'): results = pool.starmap(load_train_data, requests) for model_with_folds in models_with_folds: X_combined = [] y_combined = [] for model_name, fold in model_with_folds: X, y, video_ids = results[requests.index((model_name, fold))] print(model_name, fold, X.shape) X_combined.append(X) y_combined.append(y) X_all_combined.append(np.row_stack(X_combined)) y_all_combined.append(np.row_stack(y_combined)) X = np.column_stack(X_all_combined) y = y_all_combined[0] print(X.shape, y.shape) y_cat = np.argmax(y, axis=1) print(X.shape, y.shape) print(np.unique(y_cat)) with utils.timeit_context('fit'): param = { 'num_leaves': 50, 'objective': 'multiclass', 'max_depth': 5, 'learning_rate': .05, 'max_bin': 300, 'num_class': NB_CAT, 'metric': ['multi_logloss'] } model = lgb.train(param, lgb.Dataset(X, label=y_cat), num_boost_round=260) pickle.dump( model, open(f"../output/lgb_combined_{combined_model_name}.pkl", "wb"))
def predict_on_test_combined(combined_model_name, models_with_folds): ds = pd.read_csv(config.SUBMISSION_FORMAT) classes = list(ds.columns)[1:] folds = [1, 2, 3, 4] X_combined = {fold: [] for fold in folds} try: X_combined = pickle.load( open(f"../output/X_combined_xgb_{combined_model_name}.pkl", 'rb')) except FileNotFoundError: requests = [] for model_with_folds in models_with_folds: for data_model_name, data_fold in model_with_folds: data_dir = f'../output/prediction_test_frames' with utils.timeit_context('load data'): requests.append((data_dir, data_model_name, data_fold)) # X_combined[data_fold].append(load_test_data(data_dir, ds.filename)) # print(X_combined[-1].shape) pool = Pool(40) results = pool.map(load_test_data_one_model, requests) for data_fold, X in results: X_combined[data_fold].append(X) pickle.dump( X_combined, open(f"../output/X_combined_xgb_{combined_model_name}.pkl", "wb")) model = pickle.load( open(f"../output/xgb_combined_{combined_model_name}.pkl", "rb")) print(model) predictions = [] with utils.timeit_context('predict'): for fold in [1, 2, 3, 4]: X = np.column_stack(X_combined[fold]) predictions.append(model.predict_proba(X)) print('prediction', predictions[-1].shape) prediction = np.mean(np.array(predictions).astype(np.float64), axis=0) os.makedirs('../submissions', exist_ok=True) print('predictions', prediction.shape) for clip10 in [5, 4, 3, 2]: clip = 10**(-clip10) for col, cls in enumerate(classes): ds[cls] = np.clip(prediction[:, col] * (1 - clip * 2) + clip, clip, 1.0 - clip) ds.to_csv( f'../submissions/submission_combined_models_xgboost_{combined_model_name}_clip_{clip10}.csv', index=False, float_format='%.8f')
def predict_all_single_fold_models(): ds = pd.read_csv(config.SUBMISSION_FORMAT) classes = list(ds.columns)[1:] total_weight = 0.0 result = np.zeros((ds.shape[0], NB_CAT)) requests = [] for model_with_folds in config.ALL_MODELS: for model_name, fold in model_with_folds: requests.append((model_name, fold)) pool = Pool(8) with utils.timeit_context('load all data'): results = pool.starmap(load_test_data_from_std_path, requests) for models in config.ALL_MODELS: for model_name, fold in models: model = pickle.load( open(f"../output/xgb_{model_name}_{fold}_full.pkl", "rb")) print(model_name, fold, model) with utils.timeit_context('load data'): X = results[requests.index((model_name, fold))] # X = load_test_data_from_std_path(model_name, fold) print(X.shape) with utils.timeit_context('predict'): prediction = model.predict_proba(X) if prediction.shape[1] == 23: prediction = np.insert(prediction, obj=12, values=0.0, axis=1) weight = config.MODEL_WEIGHTS[model_name] result += prediction * weight total_weight += weight os.makedirs('../submissions', exist_ok=True) result /= total_weight for clip10 in [5, 4, 3, 2]: clip = 10**(-clip10) for col, cls in enumerate(classes): ds[cls] = np.clip(result[:, col] * (1 - clip * 2) + clip, clip, 1.0 - clip) ds.to_csv( f'../submissions/submission_single_folds_models_xgboost_clip_{clip10}.csv', index=False, float_format='%.8f')
def check_unet(weights): dataset = Dataset() model = model_unet(INPUT_SHAPE) model.load_weights(weights) batch_size = 16 for batch_x, batch_y in dataset.generate_validation(batch_size=batch_size): print(batch_x.shape, batch_y.shape) with utils.timeit_context('predict 16 images'): prediction = model.predict_on_batch(batch_x) for i in range(batch_size): # plt.imshow(unprocess_input(batch_x[i])) # plt.imshow(prediction[i, :, :, 0], alpha=0.75) img = batch_x[i].astype(np.float32) mask = prediction[i, :, :, 0] utils.print_stats('img', img) utils.print_stats('mask', mask) img[:, :, 0] *= mask img[:, :, 1] *= mask img[:, :, 2] *= mask img = unprocess_input(img) plt.imshow(img) plt.show()
def check_performance(dataset, name="", num_samples=64 * 20, random_order=False): with utils.timeit_context(f"iterate {name} dataset"): sample = dataset[63] # print("image shape", sample["image"]["image_sem"].shape, sample["image"]["image_sem"].dtype) print("Keys:", sample.keys()) target_positions = sample["target_positions"] target_positions_world = transform_points(target_positions, sample["world_from_agent"]) # output_mask = sample["output_mask"] img = dataset.rasterizer.to_rgb(sample["image"].transpose(1, 2, 0)) plt.imshow(img) agents_history = sample["agents_history"] cur_frame_positions = agents_history[-1, :, :2] * 100.0 cur_frame_velocity = agents_history[-1, :, 2:4] * 10.0 cur_frame_positions_img = transform_points(cur_frame_positions, sample["raster_from_agent"]) plt.scatter(cur_frame_positions_img[:, 0], cur_frame_positions_img[:, 1]) plt.scatter(cur_frame_positions_img[:, 0] + cur_frame_velocity[:, 0] * 1.0, cur_frame_positions_img[:, 1] + cur_frame_velocity[:, 1] * 1.0, c='red') plt.show() nb_samples = len(dataset) for i in tqdm(range(num_samples)): if random_order: sample = dataset[np.random.randint(0, nb_samples)] else: sample = dataset[i] target_positions = sample["target_positions"]
def main(): video = VideoCapture(video_sources.video_2) frame = video.read() backSubtractor = BackgroundSubtractorAVG(0.2, denoise(frame)) for frame in video.frames(): with utils.timeit_context(): frame = denoise(frame) foreGround = backSubtractor.getForeground(frame) # Apply thresholding on the background and display the resulting mask ret, mask = cv2.threshold(foreGround, 15, 255, cv2.THRESH_BINARY) cv2.imshow('input', frame) cv2.imshow('foreground', foreGround) # Note: The mask is displayed as a RGB image, you can # display a grayscale image by converting 'foreGround' to # a grayscale before applying the threshold. cv2.imshow('mask', mask) if cv2.waitKey(10) & 0xFF == 27: break video.release() cv2.destroyAllWindows()
def check_dataset(): with utils.timeit_context('load ds'): ds = NihDataset(fold=0, is_training=True, img_size=512, verbose=True) # print(ds.annotations(ds.patient_ids[0])) # patient_id = 10056 #ds.patient_ids[0] # plt.imshow(ds.images[patient_id]) # # annotation_list = ds.training_samples.loc[[patient_id]] # # for _, row in annotation_list.iterrows(): # plt.plot( # [row[f'p{i}_x'] for i in [1, 2, 3, 4, 1]], # [row[f'p{i}_y'] for i in [1, 2, 3, 4, 1]], # c='y' # ) # plt.show() ds.is_training = False plt.imshow(ds[0]['img']) plt.figure() ds.is_training = True for sample in ds: print(sample['categories']) print(np.array(ds.categories)[sample['categories'] > 0.5]) plt.cla() plt.imshow(sample['img']) plt.show()
def try_train_model_nn(model_name, fold): with utils.timeit_context('load data'): X, y, video_ids = load_train_data(model_name, fold) print(X.shape, y.shape) model = model_nn(input_size=X.shape[1]) model.compile(optimizer=Adam(lr=1e-3), loss='binary_crossentropy', metrics=['accuracy']) model.summary() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) batch_size = 64 model.fit( X_train, y_train, batch_size=batch_size, epochs=128, verbose=1, validation_data=[X_test, y_test], callbacks=[ReduceLROnPlateau(factor=0.2, verbose=True, min_lr=1e-6)]) prediction = model.predict(X_test) print(y_test.shape, prediction.shape) print(metrics.pri_matrix_loss(y_test, prediction)) print(metrics.pri_matrix_loss(y_test, np.clip(prediction, 0.001, 0.999))) delta = prediction - y_test print(np.min(delta), np.max(delta), np.mean(np.abs(delta)), np.sum(np.abs(delta) > 0.5))
def main(): from skimage.feature import peak_local_max from skimage.morphology import watershed import scipy.ndimage as ndi img = realImage() # img = testImage() img = fillHoles(img) thresh = img.copy() with utils.timeit_context(): dst = ndi.distance_transform_edt(img) localMax = peak_local_max(dst, indices=False, min_distance=1, labels=thresh) markers = ndi.label(localMax)[0] labels = watershed(-dst, markers, mask=thresh) segmImg = (labels * (255 / labels.max())).astype(np.uint8) wnd = CvNamedWindow(flags=cv2.WINDOW_NORMAL) segmWnd = CvNamedWindow('segm', flags=cv2.WINDOW_NORMAL) wnd.imshow(img) segmWnd.imshow(segmImg) cvWaitKeys()
def train_model_nn(model_name, fold, load_cache=True): with utils.timeit_context('load data'): X, y, video_ids = load_train_data(model_name, fold) print(X.shape, y.shape) model = model_nn(input_size=X.shape[1]) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) model.summary() batch_size = 64 def cheduler(epoch): if epoch < 32: return 1e-3 if epoch < 48: return 4e-4 if epoch < 80: return 1e-4 return 1e-5 model.fit(X, y, batch_size=batch_size, epochs=128, verbose=1, callbacks=[LearningRateScheduler(schedule=cheduler)]) model.save_weights(f"../output/nn1_{model_name}_{fold}_full.pkl")
def try_train_all_models_nn_combined(models_with_folds): X_all_combined = [] y_all_combined = [] for model_with_folds in models_with_folds: X_combined = [] y_combined = [] for model_name, fold in model_with_folds: with utils.timeit_context('load data'): X, y, video_ids = load_train_data(model_name, fold) X_combined.append(X) y_combined.append(y) X_all_combined.append(np.row_stack(X_combined)) y_all_combined.append(np.row_stack(y_combined)) X = np.column_stack(X_all_combined) y = y_all_combined[0] print(X.shape, y.shape) model = model_nn_combined(input_size=X.shape[1]) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) model.summary() batch_size = 64 def cheduler(epoch): if epoch < 32: return 1e-3 if epoch < 48: return 4e-4 if epoch < 80: return 1e-4 return 1e-5 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) model.fit(X_train, y_train, batch_size=batch_size, epochs=128, verbose=1, validation_data=[X_test, y_test], callbacks=[LearningRateScheduler(schedule=cheduler)]) prediction = model.predict(X_test) print(y_test.shape, prediction.shape) print(metrics.pri_matrix_loss(y_test, prediction)) print(metrics.pri_matrix_loss(y_test, np.clip(prediction, 0.001, 0.999))) print(metrics.pri_matrix_loss(y_test, np.clip(prediction, 0.0001, 0.9999))) delta = prediction - y_test print(np.min(delta), np.max(delta), np.mean(np.abs(delta)), np.sum(np.abs(delta) > 0.5))
def train_all_models_xgboost_combined(combined_model_name, models_with_folds): X_all_combined = [] y_all_combined = [] requests = [] results = [] for model_with_folds in models_with_folds: for model_name, fold in model_with_folds: requests.append((model_name, fold)) # results.append(load_one_model(requests[-1])) pool = Pool(40) with utils.timeit_context('load all data'): results = pool.starmap(load_train_data, requests) for model_with_folds in models_with_folds: X_combined = [] y_combined = [] for model_name, fold in model_with_folds: X, y, video_ids = results[requests.index((model_name, fold))] print(model_name, fold, X.shape) X_combined.append(X) y_combined.append(y) X_all_combined.append(np.row_stack(X_combined)) y_all_combined.append(np.row_stack(y_combined)) X = np.column_stack(X_all_combined) y = y_all_combined[0] print(X.shape, y.shape) y_cat = np.argmax(y, axis=1) print(X.shape, y.shape) print(np.unique(y_cat)) model = XGBClassifier(n_estimators=1600, objective='multi:softprob', learning_rate=0.03, silent=False) with utils.timeit_context('fit 1600 est'): model.fit( X, y_cat ) # , eval_set=[(X_test, y_test)], early_stopping_rounds=20, verbose=True) pickle.dump( model, open(f"../output/xgb_combined_{combined_model_name}.pkl", "wb"))
def predict_combined_folds_models(): ds = pd.read_csv(config.SUBMISSION_FORMAT) classes = list(ds.columns)[1:] total_weight = 0.0 result = np.zeros((ds.shape[0], NB_CAT)) data_dir = '../output/prediction_test_frames/' pool = ThreadPool(8) for models in config.ALL_MODELS: combined_model_name = models[0][0] + '_combined' def load_data(request): model_name, fold = request return load_test_data(data_dir, model_name, fold) with utils.timeit_context('load 4 folds data'): X_for_folds = pool.map(load_data, models) model = model_nn(input_size=X_for_folds[0].shape[1]) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) model.load_weights( f"../output/nn_combined_folds_{combined_model_name}.pkl") for (model_name, fold), X in zip(models, X_for_folds): with utils.timeit_context('predict'): prediction = model.predict(X) weight = config.MODEL_WEIGHTS[model_name] result += prediction * weight total_weight += weight os.makedirs('../submissions', exist_ok=True) result /= total_weight for clip10 in [5, 4, 3, 2]: clip = 10**(-clip10) for col, cls in enumerate(classes): ds[cls] = np.clip(result[:, col] * (1 - clip * 2) + clip, clip, 1.0 - clip) ds.to_csv( f'../submissions/submission_combined_folds_models_nn_clip_{clip10}.csv', index=False, float_format='%.8f')
def check_color_dataset_aug(): with utils.timeit_context('load ds'): ds = ClassificationDataset(fold=0, is_training=True) while True: sample = ds[1] plt.imshow(sample['img']) print(ds.samples[sample['idx']], sample['scale']) plt.show()
def check_performance(): import pytorch_retinanet.dataloader import torch with utils.timeit_context('load ds'): ds = DetectionDataset(fold=0, is_training=True, img_size=512) dataloader_train = torch.utils.data.DataLoader( ds, num_workers=16, batch_size=12, shuffle=True, collate_fn=pytorch_retinanet.dataloader.collater2d) data_iter = tqdm(enumerate(dataloader_train), total=len(dataloader_train)) with utils.timeit_context('1000 batches:'): for iter_num, data in data_iter: if iter_num > 1000: break
def predict_masks(fold): # weights = '../output/checkpoints/mask_unet/model_unet1/checkpoint-best-019-0.0089.hdf5' weights = '../output/ruler_masks_unet.h5' model = model_unet(INPUT_SHAPE) model.load_weights(weights) batch_size = 16 input_samples = [] processed_samples = 0 dest_dir = '../output/ruler_masks' for dir_name in os.listdir(IMAGES_DIR): clip_dir = os.path.join(IMAGES_DIR, dir_name) os.makedirs(os.path.join(dest_dir, dir_name), exist_ok=True) for frame_name in os.listdir(clip_dir): if not frame_name.endswith('.jpg'): continue input_samples.append((dir_name, frame_name)) if fold == 1: input_samples = input_samples[:len(input_samples) // 2] elif fold == 2: input_samples = input_samples[len(input_samples) // 2:] pool = ThreadPool(processes=8) save_batch_size = 64 for batch_input_samples in utils.chunks(input_samples, batch_size * save_batch_size): def process_sample(sample): img_data = scipy.misc.imread( os.path.join(IMAGES_DIR, sample[0], sample[1])) img_data = scipy.misc.imresize(img_data, 0.5, interp='cubic') return preprocess_input(img_data) def generate_x(): while True: for samples in utils.chunks(batch_input_samples, batch_size): yield np.array(pool.map(process_sample, samples)) with utils.timeit_context('predict {} images, {}/{}, {:.1}%'.format( batch_size * save_batch_size, processed_samples, len(input_samples), 100.0 * processed_samples / len(input_samples))): predictions = model.predict_generator(generate_x(), steps=save_batch_size, verbose=1) for i in range(predictions.shape[0]): dir_name, fn = input_samples[processed_samples] processed_samples += 1 fn = fn.replace('jpg', 'png') scipy.misc.imsave(os.path.join(dest_dir, dir_name, fn), (predictions[i, :, :, 0] * 255.0).astype( np.uint8))
def check_dataset_aug(): with utils.timeit_context('load ds'): ds = ClassificationDataset(fold=0, is_training=True, img_aug_level=20, geometry_aug_level=10) while True: sample = ds[1] utils.print_stats('img', sample['img']) plt.imshow(np.moveaxis(sample['img'], 0, 2)[:, :, :3]) plt.show()
def check_dataset(): with utils.timeit_context('load ds'): ds = ClassificationDataset(fold=0, is_training=False) for sample in ds: plt.cla() plt.imshow(sample['img']) print(ds.samples[sample['idx']], sample['scale']) plt.show()
def train_all_single_fold_models(): for models in config.ALL_MODELS: for model_name, fold in models: weights_fn = f"../output/xgb_{model_name}_{fold}_full.pkl" print(model_name, fold, weights_fn) if os.path.exists(weights_fn): print('skip existing file') else: with utils.timeit_context('train'): model_xgboost(model_name, fold)
def predict_on_test_combined(combined_model_name, models_with_folds): ds = pd.read_csv(config.SUBMISSION_FORMAT) classes = list(ds.columns)[1:] folds = [1, 2, 3, 4] X_combined = {fold: [] for fold in folds} for model_with_folds in models_with_folds: for data_model_name, data_fold in model_with_folds: data_dir = f'../output/prediction_test_frames/' with utils.timeit_context('load data'): X_combined[data_fold].append( load_test_data(data_dir, data_model_name, data_fold)) # print(X_combined[-1].shape) pickle.dump(X_combined, open(f"../output/X_combined_{combined_model_name}.pkl", "wb")) # X_combined = pickle.load(open(f"../output/X_combined_{combined_model_name}.pkl", 'rb')) model = model_nn_combined( input_size=np.column_stack(X_combined[1]).shape[1]) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) model.load_weights(f"../output/nn_{combined_model_name}_full.pkl") predictions = [] with utils.timeit_context('predict'): for fold in [1, 2, 3, 4]: X = np.column_stack(X_combined[fold]) predictions.append(model.predict(X)) prediction = np.mean(np.array(predictions).astype(np.float64), axis=0) os.makedirs('../submissions', exist_ok=True) for clip10 in [5, 4, 3, 2]: clip = 10**(-clip10) for col, cls in enumerate(classes): ds[cls] = np.clip(prediction[:, col] * (1 - clip * 2) + clip, clip, 1.0 - clip) ds.to_csv( f'../submissions/submission_combined_models_nn_{combined_model_name}_clip_{clip10}.csv', index=False, float_format='%.8f')
def generate_train_benchmark(use_watershed): data = dataset.UVectorNetDataset(fold=1, batch_size=8, output_watershed=use_watershed) count = 0 with utils.timeit_context('generate 100 batches'): for X, y in data.generate_train(): count += 1 if count >= 10: break
def predict_combined_folds_models(): ds = pd.read_csv(config.SUBMISSION_FORMAT) classes = list(ds.columns)[1:] total_weight = 0.0 result = np.zeros((ds.shape[0], NB_CAT)) pool = Pool(16) for models in config.ALL_MODELS: combined_model_name = models[0][0] + '_combined' # def load_data(request): # model_name, fold = request # return load_test_data(data_dir, model_name, fold) with utils.timeit_context('load 4 folds data'): X_for_folds = pool.starmap(load_test_data_from_std_path, models) model = pickle.load( open(f"../output/xgb_combined_folds_{combined_model_name}.pkl", "rb")) for (model_name, fold), X in zip(models, X_for_folds): with utils.timeit_context('predict'): prediction = model.predict_proba(X) weight = config.MODEL_WEIGHTS[model_name] result += prediction * weight total_weight += weight os.makedirs('../submissions', exist_ok=True) result /= total_weight for clip10 in [5, 4, 3, 2]: clip = 10**(-clip10) for col, cls in enumerate(classes): ds[cls] = np.clip(result[:, col] * (1 - clip * 2) + clip, clip, 1.0 - clip) ds.to_csv( f'../submissions/submission_combined_folds_models_xgboost_clip_{clip10}.csv', index=False, float_format='%.8f')
def predict_all_single_fold_models(): ds = pd.read_csv(config.SUBMISSION_FORMAT) classes = list(ds.columns)[1:] total_weight = 0.0 result = np.zeros((ds.shape[0], NB_CAT)) data_dir = '../output/prediction_test_frames/' for models in config.ALL_MODELS: for model_name, fold in models: weights_fn = f"../output/nn1_{model_name}_{fold}_full.pkl" print(model_name, fold, weights_fn) with utils.timeit_context('load data'): X = load_test_data(data_dir, model_name, fold) print(X.shape) model = model_nn(input_size=X.shape[1]) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) model.load_weights(weights_fn) with utils.timeit_context('predict'): prediction = model.predict(X) weight = config.MODEL_WEIGHTS[model_name] result += prediction * weight total_weight += weight os.makedirs('../submissions', exist_ok=True) result /= total_weight for clip10 in [5, 4, 3, 2]: clip = 10**(-clip10) for col, cls in enumerate(classes): ds[cls] = np.clip(result[:, col] * (1 - clip * 2) + clip, clip, 1.0 - clip) ds.to_csv( f'../submissions/submission_single_folds_models_nn_clip_{clip10}.csv', index=False, float_format='%.8f')
def packer(map_name='wmap', save_to=None): import texture import packer utils.pg_init() textures = texture.TextureGroup.get_group(map_name) with utils.timeit_context('Packing'): pack = packer.ImagePack(img.image for _, img in textures.iter_all()) if save_to: pg.image.save(pack.image, save_to) else: utils.show_surface(pack.image)
def pack_all(): import texture import packer utils.pg_init() allNames = [ 'smap', 'wmap', 'mmap' ] + ['fight{:03d}'.format(i) for i in range(110)] rates = [] for name in allNames: with utils.timeit_context('Load and pack ' + name): try: textures = texture.TextureGroup.get_group(name) pack = packer.ImagePack(img.image for _, img in textures.iter_all()) rates.append(pack.rate) except FileNotFoundError: pack = None pass # if pack: # utils.show_surface(pack.image) pg.quit() import matplotlib.pyplot as plt plt.hist(rates) plt.show()
def load_image(name='smap.png'): utils.pg_init() with utils.timeit_context('Load image: ' + name): pg.image.load(name)