def featImpMDA_Clustered(clf, X, y, clstrs, n_splits=10): cvGen = KFold(n_splits=n_splits) scr0, scr1 = pd.Series(dtype='float64'), pd.DataFrame( columns=clstrs.keys()) for i, (train, test) in enumerate(cvGen.split(X=X)): X0, y0, = X.iloc[train, :], y.iloc[train] X1, y1 = X.iloc[test, :], y.iloc[test] fit = clf.fit(X=X0, y=y0) prob = fit.predict_proba(X1) scr0.loc[i] = -log_loss(y1, prob, labels=clf.classes_) for j in scr1.columns: X1_ = X1.copy(deep=True) for k in clstrs[j]: np.random.shuffle(X1_[k].values) # shuffle clusters prob = fit.predict_proba(X1_) scr1.loc[i, j] = -log_loss(y1, prob, labels=clf.classes_) imp = (-1 * scr1).add(scr0, axis=0) imp = imp / (-1 * scr1) imp = pd.concat( { 'mean': imp.mean(), 'std': imp.std() * imp.shape[0]**-.5 }, axis=1) imp.index = ['C_' + str(i) for i in imp.index] return imp
def kfolded(data, folds, seed=1337): kf = KFold(n_splits=folds, random_state=seed) for i, (train_index, test_index) in enumerate(kf.split(data)): if len(data.shape) > 1: yield (data[train_index, :], data[test_index, :], i) else: yield (data[train_index], data[test_index], i)
def cv(config): np.random.seed(3435) data_file = config.get('data_file') with open(data_file, "rb") as f: (datasets, targets, vocab) = pickle.load(f) logger.info('Loaded vect_file: %s', data_file) if config.get('vector_type') == 'word2vec': w2v_file = config.get('w2v_file') w2v = load_bin_vec(w2v_file, vocab) add_unknown_words(w2v, vocab) initialW = [] for entry in sorted(vocab.items(), key=lambda x: x[1]): initialW.append(w2v[entry[0]]) initialW = np.array(initialW) logger.info('Loaded word2vec: %s', w2v_file) else: initialW = None model_config = {} model_config.update(config.get('model')) model_config['batch_size'] = config.get('batch_size') model_config['epoch'] = config.get('epoch') model_config['gpu'] = config.get('gpu') model_config['non_static'] = config.get('non_static') model_config['n_vocab'] = len(vocab) model_config['doc_length'] = datasets.shape[1] model_config['initialW'] = initialW for phase in range(1, config.get('phase') + 1): logger.info('Cross Validation: %d/%d', phase, config.get('phase')) kf = KFold(n_splits=config.get('split')) for train_index, test_index in kf.split(datasets): train_index = np.random.permutation(train_index) X_train = datasets[train_index] Y_train = targets[train_index] X_test = datasets[test_index] Y_test = targets[test_index] logger.info('Fitting: %s -> %s', X_train.shape, Y_train.shape) (_, clf) = create_classifier(**model_config) clf.fit(X_train, Y_train, dataset_creator=lambda X, y, model: XyDataset( X=X, y=y, model=model, X_dtype=np.int32)) logger.info('Predicting: %s -> %s', X_test.shape, Y_test.shape) preds = clf.predict(X_test, dataset_creator=lambda X, model: XyDataset( X=X, model=model, X_dtype=np.int32)) logger.info('accuracy: {0}'.format(accuracy_score(Y_test, preds))) if config.get('fold_out'): break logger.info('Done')
class StackingModel(BaseEstimator, RegressorMixin, TransformerMixin): def __init__(self, mod, meta_model): self.mod = mod self.meta_model = meta_model self.kf = KFold(n_splits=5, random_state=42, shuffle=True) def fit(self, X, y): self.saved_model = [list() for i in self.mod] oof_train = np.zeros((X.shape[0], len(self.mod))) for i, model in enumerate(self.mod): for train_index, val_index in self.kf.split(X, y): renew_model = clone(model) renew_model.fit(X[train_index], y[train_index]) self.saved_model[i].append(renew_model) oof_train[val_index, i] = renew_model.predict(X[val_index]) self.meta_model.fit(oof_train, y) return self def predict(self, X): whole_test = np.column_stack([ np.column_stack(model.predict(X) for model in single_model).mean(axis=1) for single_model in self.saved_model ]) return self.meta_model.predict(whole_test) def get_oof(self, X, y, test_X): oof = np.zeros((X.shape[0], len(self.mod))) test_single = np.zeros((test_X.shape[0], 5)) test_mean = np.zeros((test_X.shape[0], len(self.mod))) for i, model in enumerate(self.mod): for j, (train_index, val_index) in enumerate(self.kf.split(X, y)): clone_model = clone(model) clone_model.fit(X[train_index], y[train_index]) oof[val_index, i] = clone_model.predict(X[val_index]) test_single[:, j] = clone_model.predict(test_X) test_mean[:, i] = test_single.mean(axis=1) return oof, test_mean
def featImpMDA_Clustered(clf, X, y, clstrs, n_splits=10): """ SNIPPET 6.5 Clustered MDA Args: clf: X: y: clstrs: n_splits: Returns: """ from sklearn.metrics import log_loss from sklearn.model_selection._split import KFold cvGen = KFold(n_splits=n_splits) scr0, scr1 = pd.Series(), pd.DataFrame(columns=clstrs.keys()) for i, (train, test) in enumerate(cvGen.split(X=X)): X0, y0 = X.iloc[train, :], y.iloc[train] X1, y1 = X.iloc[test, :], y.iloc[test] fit = clf.fit(X=X0, y=y0) prob = fit.predict_proba(X1) scr0.loc[i] = -log_loss(y1, prob, labels=clf.classes_) for j in scr1.columns: X1_ = X1.copy(deep=True) for k in clstrs[j]: np.random.shuffle(X1_[k].values) # shuffle clusters prob = fit.predict_proba(X1_) scr1.loc[i, j] = -log_loss(y1, prob, labels=clf.classes_) imp = (-1 * scr1).add(scr0, axis=0) imp /= -1 * scr1 imp = pd.concat({ 'mean': imp.mean(), 'std': imp.std() * imp.shape[0]**-.5 }, axis=1) imp.index = ['C_' + str(i) for i in imp.index] return imp
def featImpMDA(clf, X, y, n_splits=10): """ feat importance based on OOS score reduction SNIPPET 6.3 Implementation of MDA Args: clf: X: y: n_splits: Returns: """ from sklearn.metrics import log_loss from sklearn.model_selection._split import KFold cvGen = KFold(n_splits=n_splits) scr0, scr1 = pd.Series(), pd.DataFrame(columns=X.columns) for i, (train, test) in enumerate(cvGen.split(X=X)): X0, y0 = X.iloc[train, :], y.iloc[train] X1, y1 = X.iloc[test, :], y.iloc[test] fit = clf.fit(X=X0, y=y0) # the fit occurs here prob = fit.predict_proba(X1) # prediction before shuffling scr0.loc[i] = -log_loss(y1, prob, labels=clf.classes_) for j in X.columns: X1_ = X1.copy(deep=True) np.random.shuffle(X1_[j].values) # shuffle one column prob = fit.predict_proba(X1_) # prediction after shuffling scr1.loc[i, j] = -log_loss(y1, prob, labels=clf.classes_) imp = (-1 * scr1).add(scr0, axis=0) imp /= -1 * scr1 imp = pd.concat({ 'mean': imp.mean(), 'std': imp.std() * imp.shape[0]**-.5 }, axis=1) # CLT return imp
def featImpMDA(clf, X, y, n_splits=10): #feat importance based on OOS score reduction cvGen = KFold(n_splits=n_splits) scr0, scr1 = pd.Series(dtype='float64'), pd.DataFrame(columns=X.columns) for i, (train, test) in enumerate(cvGen.split(X=X)): x0, y0 = X.iloc[train, :], y.iloc[train] x1, y1 = X.iloc[test, :], y.iloc[test] fit = clf.fit(X=x0, y=y0) # the fit occures prob = fit.predict_proba(x1) #prediction before shuffles scr0.loc[i] = -log_loss(y1, prob, labels=clf.classes_) for j in X.columns: X1_ = x1.copy(deep=True) np.random.shuffle(X1_[j].values) #shuffle one columns prob = fit.predict_proba(X1_) #prediction after shuffle scr1.loc[i, j] = -log_loss(y1, prob, labels=clf.classes_) imp = (-1 * scr1).add(scr0, axis=0) imp = imp / (-1 * scr1) imp = pd.concat({ 'mean': imp.mean(), 'std': imp.std() * imp.shape[0]**-.5 }, axis=1) #CLT return imp
def cross_validate_approximation(train_samples, train_vals, options, nfolds, method, random_folds=True): ntrain_samples = train_samples.shape[1] if random_folds != 'sklearn': fold_sample_indices = get_random_k_fold_sample_indices( ntrain_samples, nfolds, random_folds) else: from sklearn.model_selection._split import KFold sklearn_cv = KFold(nfolds) indices = np.arange(train_samples.shape[1]) fold_sample_indices = [ te for tr, te in sklearn_cv.split(train_vals, train_vals) ] approx_list = [] residues_list = [] cv_score = 0 for kk in range(len(fold_sample_indices)): K = np.ones(ntrain_samples, dtype=bool) K[fold_sample_indices[kk]] = False train_samples_kk = train_samples[:, K] train_vals_kk = train_vals[K, :] test_samples_kk = train_samples[:, fold_sample_indices[kk]] test_vals_kk = train_vals[fold_sample_indices[kk]] approx_kk = approximate(train_samples_kk, train_vals_kk, method, options).approx residues = approx_kk(test_samples_kk) - test_vals_kk approx_list.append(approx_kk) residues_list.append(residues) cv_score += np.sum(residues**2, axis=0) cv_score = np.sqrt(cv_score / ntrain_samples) return approx_list, residues_list, cv_score
def main(): parser = make_parser() args = parser.parse_args() # load data train_x, train_y = read_input_data(args.train_h5) test_x, test_y = read_input_data(args.test_h5) # used as val for now # SpaceNet all_ids = np.array(generate_ids(args.data_dirs, None)) kfold = KFold(n_splits=2, shuffle=True) # args.n_folds splits = [s for s in kfold.split(all_ids)] folds = [int(f) for f in '0'.split(",")] fold = folds[0] train_ind, test_ind = splits[fold] train_ids = all_ids[train_ind] val_ids = all_ids[test_ind] train_generator = MULSpacenetDataset( data_dirs=args.data_dirs, wdata_dir=args.wdata_dir, image_ids=train_ids, crop_shape=(args.crop_size, args.crop_size), batch_size=args.large_batch_size, seed=777, image_name_template='PS-MS/SN3_roads_train_AOI_5_Khartoum_PS-MS_{id}.tif', masks_dict=get_groundtruth(args.data_dirs) ) val_generator = MULSpacenetDataset( data_dirs=args.data_dirs, wdata_dir=args.wdata_dir, image_ids=val_ids, batch_size=args.test_batch_size, crop_shape=(args.crop_size, args.crop_size), seed=777, image_name_template='PS-MS/SN3_roads_train_AOI_5_Khartoum_PS-MS_{id}.tif', masks_dict=get_groundtruth(args.data_dirs), ) # x in shape (channels, width, height, num_images) ? x, y = next(train_generator) train_y_shape = y.shape images_scale = np.max(train_x) if images_scale > 1: print('Normalizing images by a factor of {}'.format(images_scale)) train_x = train_x / images_scale test_x = test_x / images_scale input_data = (train_x, train_y, test_x, test_y) # package for more concise argument passing if args.test_batch_size == 0: args.test_batch_size = test_y.shape[0] print('Data shapes:', train_x.shape, train_y.shape, test_x.shape, test_y.shape) if train_y.shape[0] % args.large_batch_size != 0: print("WARNING large batch size doesn't divide train set evenly") if test_y.shape[0] % args.test_batch_size != 0: print("WARNING batch size doesn't divide test set evenly") # get all_weights. Do it in 1 chunk if it fits into memory hf_weights = h5py.File(args.weights_h5, 'r') if args.stream_inputs: all_weights = hf_weights['all_weights'] # future work: change to streamds if you want it to be faster else: all_weights = np.array(hf_weights['all_weights'], dtype='f8') shapes = [literal_eval(s) for s in hf_weights.attrs['var_shapes'].decode('utf-8').split(';')] print(all_weights.shape) print(shapes) num_iters = min(args.max_iters, all_weights.shape[0] - 1) dim_sum = all_weights.shape[1] # set up output file output_name = args.output_h5 if not output_name: # use default gradients name assert args.weights_h5[-8:] == '/weights' output_name = args.weights_h5[:-8] + '/gradients_adaptive' if args.max_iters < all_weights.shape[0] - 1: output_name += '_{}iters'.format(args.max_iters) print('Writing gradients to file {}'.format(output_name)) dsets = {} hf_grads = h5py.File(output_name, 'w-') dsets['trainloss'] = hf_grads.create_dataset('trainloss', (num_iters + 1,), dtype='f4', compression='gzip') dsets['testloss'] = hf_grads.create_dataset('testloss', (num_iters + 1,), dtype='f4', compression='gzip') dsets['num_splits'] = hf_grads.create_dataset('num_splits', (num_iters,), dtype='i', compression='gzip') pool = ThreadPool(args.num_gpus) iters_to_calc = divide_with_remainder(num_iters, args.num_gpus) results = [] overall_timerstart = time.time() for gpu in range(args.num_gpus): # each process writes to a different variable in the file dsets['grads_train_{}'.format(gpu)] = hf_grads.create_dataset( 'grads_train_{}'.format(gpu), (len(iters_to_calc[gpu]) * args.default_num_splits + 1, dim_sum), maxshape=(None, dim_sum), dtype='f4', compression='gzip') dsets['grads_test_{}'.format(gpu)] = hf_grads.create_dataset( 'grads_test_{}'.format(gpu), (len(iters_to_calc[gpu]) * args.default_num_splits + 1, dim_sum), maxshape=(None, dim_sum), dtype='f4', compression='gzip') if args.num_gpus > 1: ret = pool.apply_async(run_thread, (gpu, iters_to_calc[gpu], all_weights, shapes, train_y_shape, train_generator, val_generator, dim_sum, args, dsets, hf_grads)) results.append(ret) else: run_thread(gpu, iters_to_calc[gpu], all_weights, shapes, train_y_shape, train_generator, val_generator, dim_sum, args, dsets, hf_grads) pool.close() pool.join() print('return values: ', [res.get() for res in results]) print('total time elapsed:', time.time() - overall_timerstart) hf_weights.close() hf_grads.close()
def main(): parser = make_parser() args = parser.parse_args() if args.tf_seed != -1: tf.random.set_random_seed(args.tf_seed) if not args.no_shuffle and args.shuffle_seed != -1: np.random.seed(args.shuffle_seed) # load data train_x, train_y = read_input_data(args.train_h5) test_x, test_y = read_input_data(args.test_h5) # used as val # SpaceNet all_ids = np.array(generate_ids(args.data_dirs, None)) kfold = KFold(n_splits=2, shuffle=True) # args.n_folds splits = [s for s in kfold.split(all_ids)] folds = [int(f) for f in '0'.split(",")] fold = folds[0] train_ind, test_ind = splits[fold] train_ids = all_ids[train_ind] val_ids = all_ids[test_ind] masks_dict = get_groundtruth(args.data_dirs) # Returns normalized to interval [-1, 1] train_generator = MULSpacenetDataset( data_dirs=args.data_dirs, wdata_dir=args.wdata_dir, image_ids=train_ids, batch_size=args.train_batch_size, crop_shape=(args.crop_size, args.crop_size), seed=777, image_name_template='PS-MS/SN3_roads_train_AOI_5_Khartoum_PS-MS_{id}.tif', masks_dict=masks_dict ) val_generator = MULSpacenetDataset( data_dirs=args.data_dirs, wdata_dir=args.wdata_dir, image_ids=val_ids, batch_size=args.test_batch_size, crop_shape=(args.crop_size, args.crop_size), seed=777, image_name_template='PS-MS/SN3_roads_train_AOI_5_Khartoum_PS-MS_{id}.tif', masks_dict=masks_dict ) # train_x in shape (batch_size, width, height, channels) = (train_batch_size, crop_size, crop_size, 12) # train_x, train_y = next(train_generator) # train_generator.reset() # test_x, test_y = next(val_generator) # val_generator.reset() # train_y_shape = train_y.shape images_scale = np.max(train_x) if images_scale > 1: print('Normalizing images by a factor of {}'.format(images_scale)) train_x = train_x / images_scale test_x = test_x / images_scale if args.test_batch_size == 0: args.test_batch_size = test_y.shape[0] print('Data shapes:', train_x.shape, train_y.shape, test_x.shape, test_y.shape) if train_y.shape[0] % args.train_batch_size != 0: print("WARNING batch size doesn't divide train set evenly") if train_y.shape[0] % args.large_batch_size != 0: print("WARNING large batch size doesn't divide train set evenly") if test_y.shape[0] % args.test_batch_size != 0: print("WARNING batch size doesn't divide test set evenly") # build model if args.arch == 'linknet': model = network_builders.build_linknet() elif args.arch == 'fc': model = network_builders.build_network_fc(args) elif args.arch == 'fc_cust': model = network_builders.build_fc_adjustable(args) elif args.arch == 'lenet': model = network_builders.build_lenet_conv(args) elif args.arch == 'allcnn': model = network_builders.build_all_cnn(args) elif args.arch == 'resnet': model = network_builders.build_resnet(args) elif args.arch == 'vgg': model = network_builders.build_vgg_half(args) else: raise Error("Unknown architeciture {}".format(args.arch)) init_model(model, args) define_training(model, args) sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) sess.run(tf.global_variables_initializer()) if args.init_weights_h5: load_initial_weights(sess, model, args) for collection in ['tb_train_step']: # 'eval_train' and 'eval_test' added manually later tf.summary.scalar(collection + '_acc', model.accuracy, collections=[collection]) tf.summary.scalar(collection + '_loss', model.loss, collections=[collection]) tb_writer, hf = None, None dsets = {} if args.output_dir: tb_writer = tf.summary.FileWriter(args.output_dir, sess.graph) # set up output for gradients/weights if args.save_weights: dim_sum = sum([tf.size(var).eval() for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]) total_iters = args.num_epochs * int(train_y.shape[0] / args.train_batch_size) total_chunks = int(total_iters / args.save_every) hf = h5py.File(args.output_dir + '/weights', 'w-') # write metadata var_shapes = np.string_(';'.join([str(var.get_shape()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])) hf.attrs['var_shapes'] = var_shapes var_names = np.string_(';'.join([str(var.name) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])) hf.attrs['var_names'] = var_names # all individual weights at every iteration, where all_weights[i] = weights before iteration i: dsets['all_weights'] = hf.create_dataset('all_weights', (total_chunks + 1, dim_sum), dtype='f8', compression='gzip') print(f'all_weights shape: ({total_chunks + 1}, {dim_sum})') if args.save_training_grads: dsets['training_grads'] = hf.create_dataset('training_grads', (total_chunks, dim_sum), dtype='f8', compression='gzip') ########## Run main thing ########## print('=' * 100) train_and_eval(sess, model, train_x, train_y, test_x, test_y, tb_writer, dsets, args) # train_and_eval(sess, model, train_y_shape, train_generator, val_generator, tb_writer, dsets, args) if tb_writer: tb_writer.close() if hf: hf.close()
def feature_importance_mda(classifier, X, y, n_splits=10, plot=False, figsize=(10, 10)): """ Feature importance based out-of-sample Mean-Decrease Accuracy (MDA). Arguments --------- classifier : tree classifier Tree classifier to apply on data. X : pandas.DataFrame Data Frame with features as columns and samples as rows. y : pandas.Series Series containing class membership. plot : bool Option to plot feature importance. figsize : (float, float) Dimensions of the plot. Returns ------- pandas.DataFrame Data frame with features importance. Notes ----- Function adapted from "Machine Learning for Asset Managers", Marcos López de Prado (2020). """ # Checks if not isinstance(X, pd.DataFrame): raise AssertionError("X must be pandas.DataFrame.") if not isinstance(y, pd.Series): raise AssertionError("y must be pandas.Series.") # Generate K-fold cross validation cv_gen = KFold(n_splits=n_splits) scr0 = pd.Series() scr1 = pd.DataFrame(columns=X.columns) # Loop over the folds: for i, (train, test) in enumerate(cv_gen.split(X=X)): # Train/Test split X0, y0 = X.iloc[train, :], y.iloc[train] X1, y1 = X.iloc[test, :], y.iloc[test] # Fit fit = classifier.fit(X=X0, y=y0) # Prediction before shuffling prob = fit.predict_proba(X1) scr0.loc[i] = -log_loss(y1, prob, labels=classifier.classes_) for j in X.columns: X1_ = X1.copy(deep=True) # Shuffle one column np.random.shuffle(X1_[j].values) # Prediction after shuffling prob = fit.predict_proba(X1_) scr1.loc[i, j] = -log_loss(y1, prob, labels=classifier.classes_) fimp_df = (-1 * scr1).add(scr0, axis=0) fimp_df = fimp_df / (-1 * scr1) fimp_df = pd.concat( { 'Importance Mean': fimp_df.mean(), 'Importance Std': fimp_df.std() * fimp_df.shape[0]**-.5 }, axis=1) # Sort values sorted_fimp = fimp_df.sort_values(by='Importance Mean') # Plot if plot is True: plt.figure(figsize=figsize) plt.title( "Feature importance based on out-of-sample Mean-Decrease Accuracy (MDA)." ) plt.barh(y=sorted_fimp.index, width=sorted_fimp['Importance Mean'], xerr=sorted_fimp['Importance Std']) plt.show() return sorted_fimp #---------#---------#---------#---------#---------#---------#---------#---------#---------#
def cross_validation(): """ Computes multiple classifiers, saves scores, plots data file :return: Dict with classifiers scores """ classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), GaussianProcessClassifier(1.0 * RBF(1.0)), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), MLPClassifier(alpha=1, max_iter=1000), AdaBoostClassifier(), GaussianNB(), QuadraticDiscriminantAnalysis(), SGDClassifier(), LogisticRegression() ] names = [ "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process", "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes", "QDA", "SGD", "Logistic Regression" ] # Splitting data for cross validation kf = KFold(n_splits=5, shuffle=True) splits = list(kf.split(X)) cv_result = {} # Iterates trough classifiers, saving each score to a dict for classifier, name in zip(classifiers, names): model = classifier model_accuracy = [] model_precision = [] model_recall = [] model_f1score = [] for split in splits: train_indices, test_indices = split X_train = X[train_indices] X_test = X[test_indices] y_train = y[train_indices] y_test = y[test_indices] model.fit(X_train, y_train) y_pred = model.predict(X_test) model_accuracy.append(accuracy_score(y_test, y_pred)) model_precision.append(precision_score(y_test, y_pred, average='weighted')) model_recall.append(recall_score(y_test, y_pred, average='weighted')) model_f1score.append(f1_score(y_test, y_pred, average='weighted')) cv_result[name] = [model_accuracy, model_precision, model_recall, model_f1score] # Saving to a data file export_df = pd.DataFrame() for key, values in cv_result.items(): export_df[f'{key}'] = values export_df.to_csv('data/cv_result.csv') return cv_result
def main(): if args.crop_size: print('Using crops of shape ({}, {})'.format(args.crop_size, args.crop_size)) else: print('Using full size images') all_ids = np.array(generate_ids(args.data_dirs, args.clahe)) np.random.seed(args.seed) kfold = KFold(n_splits=args.n_folds, shuffle=True) splits = [s for s in kfold.split(all_ids)] folds = [int(f) for f in args.fold.split(",")] for fold in folds: encoded_alias = encode_params(args.clahe, args.preprocessing_function, args.stretch_and_mean) city = "all" if args.city: city = args.city.lower() best_model_file = '{}/{}_{}_{}.h5'.format(args.models_dir, encoded_alias, city, args.network) channels = 8 if args.ohe_city: channels = 12 model = make_model(args.network, (None, None, channels)) if args.weights is None: print('No weights passed, training from scratch') else: print('Loading weights from {}'.format(args.weights)) model.load_weights(args.weights, by_name=True) freeze_model(model, args.freeze_till_layer) optimizer = RMSprop(lr=args.learning_rate) if args.optimizer: if args.optimizer == 'rmsprop': optimizer = RMSprop(lr=args.learning_rate) elif args.optimizer == 'adam': optimizer = Adam(lr=args.learning_rate) elif args.optimizer == 'sgd': optimizer = SGD(lr=args.learning_rate, momentum=0.9, nesterov=True) train_ind, test_ind = splits[fold] train_ids = all_ids[train_ind] val_ids = all_ids[test_ind] if args.city: val_ids = [id for id in val_ids if args.city in id[0]] train_ids = [id for id in train_ids if args.city in id[0]] print('Training fold #{}, {} in train_ids, {} in val_ids'.format(fold, len(train_ids), len(val_ids))) masks_gt = get_groundtruth(args.data_dirs) if args.clahe: template = 'CLAHE-MUL-PanSharpen/MUL-PanSharpen_{id}.tif' else: template = 'MUL-PanSharpen/MUL-PanSharpen_{id}.tif' train_generator = MULSpacenetDataset( data_dirs=args.data_dirs, wdata_dir=args.wdata_dir, clahe=args.clahe, batch_size=args.batch_size, image_ids=train_ids, masks_dict=masks_gt, image_name_template=template, seed=args.seed, ohe_city=args.ohe_city, stretch_and_mean=args.stretch_and_mean, preprocessing_function=args.preprocessing_function, crops_per_image=args.crops_per_image, crop_shape=(args.crop_size, args.crop_size), random_transformer=RandomTransformer(horizontal_flip=True, vertical_flip=True), ) val_generator = MULSpacenetDataset( data_dirs=args.data_dirs, wdata_dir=args.wdata_dir, clahe=args.clahe, batch_size=1, image_ids=val_ids, image_name_template=template, masks_dict=masks_gt, seed=args.seed, ohe_city=args.ohe_city, stretch_and_mean=args.stretch_and_mean, preprocessing_function=args.preprocessing_function, shuffle=False, crops_per_image=1, crop_shape=(1280, 1280), random_transformer=None ) best_model = ModelCheckpoint(filepath=best_model_file, monitor='val_dice_coef_clipped', verbose=1, mode='max', save_best_only=False, save_weights_only=True) model.compile(loss=make_loss(args.loss_function), optimizer=optimizer, metrics=[dice_coef, binary_crossentropy, ceneterline_loss, dice_coef_clipped]) def schedule_steps(epoch, steps): for step in steps: if step[1] > epoch: print("Setting learning rate to {}".format(step[0])) return step[0] print("Setting learning rate to {}".format(steps[-1][0])) return steps[-1][0] callbacks = [best_model, EarlyStopping(patience=20, verbose=1, monitor='val_dice_coef_clipped', mode='max')] if args.schedule is not None: steps = [(float(step.split(":")[0]), int(step.split(":")[1])) for step in args.schedule.split(",")] lrSchedule = LearningRateScheduler(lambda epoch: schedule_steps(epoch, steps)) callbacks.insert(0, lrSchedule) if args.clr is not None: clr_params = args.clr.split(',') base_lr = float(clr_params[0]) max_lr = float(clr_params[1]) step = int(clr_params[2]) mode = clr_params[3] clr = CyclicLR(base_lr=base_lr, max_lr=max_lr, step_size=step, mode=mode) callbacks.append(clr) steps_per_epoch = len(all_ids) / args.batch_size + 1 if args.steps_per_epoch: steps_per_epoch = args.steps_per_epoch model.fit_generator( train_generator, steps_per_epoch=steps_per_epoch, epochs=args.epochs, validation_data=val_generator, validation_steps=len(val_ids), callbacks=callbacks, max_queue_size=30, verbose=1, workers=args.num_workers) del model K.clear_session() gc.collect()
def main(): if args.crop_size: print('Using crops of shape ({}, {})'.format(args.crop_size, args.crop_size)) else: print('Using full size images') all_ids = np.array(generate_ids(args.data_dirs, args.clahe)) np.random.seed(args.seed) kfold = KFold(n_splits=args.n_folds, shuffle=True) splits = [s for s in kfold.split(all_ids)] folds = [int(f) for f in args.fold.split(",")] for fold in folds: encoded_alias = encode_params(args.clahe, args.preprocessing_function, args.stretch_and_mean) city = "all" if args.city: city = args.city.lower() best_model_file = '{}/{}_{}_{}.h5'.format(args.models_dir, encoded_alias, city, args.network) channels = 8 if args.ohe_city: channels = 12 model = make_model(args.network, (None, None, channels)) if args.weights is None: print('No weights passed, training from scratch') else: print('Loading weights from {}'.format(args.weights)) model.load_weights(args.weights, by_name=True) freeze_model(model, args.freeze_till_layer) optimizer = RMSprop(lr=args.learning_rate) if args.optimizer: if args.optimizer == 'rmsprop': optimizer = RMSprop(lr=args.learning_rate) elif args.optimizer == 'adam': optimizer = Adam(lr=args.learning_rate) elif args.optimizer == 'sgd': optimizer = SGD(lr=args.learning_rate, momentum=0.9, nesterov=True) train_ind, test_ind = splits[fold] train_ids = all_ids[train_ind] val_ids = all_ids[test_ind] if args.city: val_ids = [id for id in val_ids if args.city in id[0]] train_ids = [id for id in train_ids if args.city in id[0]] print('Training fold #{}, {} in train_ids, {} in val_ids'.format( fold, len(train_ids), len(val_ids))) masks_gt = get_groundtruth(args.data_dirs) if args.clahe: template = 'CLAHE-MUL-PanSharpen/MUL-PanSharpen_{id}.tif' else: template = 'MUL-PanSharpen/MUL-PanSharpen_{id}.tif' train_generator = MULSpacenetDataset( data_dirs=args.data_dirs, wdata_dir=args.wdata_dir, clahe=args.clahe, batch_size=args.batch_size, image_ids=train_ids, masks_dict=masks_gt, image_name_template=template, seed=args.seed, ohe_city=args.ohe_city, stretch_and_mean=args.stretch_and_mean, preprocessing_function=args.preprocessing_function, crops_per_image=args.crops_per_image, crop_shape=(args.crop_size, args.crop_size), random_transformer=RandomTransformer(horizontal_flip=True, vertical_flip=True), ) val_generator = MULSpacenetDataset( data_dirs=args.data_dirs, wdata_dir=args.wdata_dir, clahe=args.clahe, batch_size=1, image_ids=val_ids, image_name_template=template, masks_dict=masks_gt, seed=args.seed, ohe_city=args.ohe_city, stretch_and_mean=args.stretch_and_mean, preprocessing_function=args.preprocessing_function, shuffle=False, crops_per_image=1, crop_shape=(1280, 1280), random_transformer=None) best_model = ModelCheckpoint(filepath=best_model_file, monitor='val_dice_coef_clipped', verbose=1, mode='max', save_best_only=False, save_weights_only=True) model.compile(loss=make_loss(args.loss_function), optimizer=optimizer, metrics=[ dice_coef, binary_crossentropy, ceneterline_loss, dice_coef_clipped ]) def schedule_steps(epoch, steps): for step in steps: if step[1] > epoch: print("Setting learning rate to {}".format(step[0])) return step[0] print("Setting learning rate to {}".format(steps[-1][0])) return steps[-1][0] callbacks = [ best_model, EarlyStopping(patience=20, verbose=1, monitor='val_dice_coef_clipped', mode='max') ] if args.schedule is not None: steps = [(float(step.split(":")[0]), int(step.split(":")[1])) for step in args.schedule.split(",")] lrSchedule = LearningRateScheduler( lambda epoch: schedule_steps(epoch, steps)) callbacks.insert(0, lrSchedule) if args.clr is not None: clr_params = args.clr.split(',') base_lr = float(clr_params[0]) max_lr = float(clr_params[1]) step = int(clr_params[2]) mode = clr_params[3] clr = CyclicLR(base_lr=base_lr, max_lr=max_lr, step_size=step, mode=mode) callbacks.append(clr) steps_per_epoch = len(all_ids) / args.batch_size + 1 if args.steps_per_epoch: steps_per_epoch = args.steps_per_epoch model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=args.epochs, validation_data=val_generator, validation_steps=len(val_ids), callbacks=callbacks, max_queue_size=30, verbose=1, workers=args.num_workers) del model K.clear_session() gc.collect()