def evaluation(self, global_step): eval_input_fn = self.input_fn_builder(features=self.dev_features, seq_length=self.max_seq_length, is_training=False, drop_remainder=False) predictions = self.estimator.predict(eval_input_fn, yield_single_examples=False) res = np.concatenate([a["prob"] for a in predictions], axis=0) metrics = PRF(np.array(self.dev_label), res.argmax(axis=-1)) print('\n Global step is : ', global_step) MAP, AvgRec, MRR = eval_reranker(self.dev_cid, self.dev_label, res[:, 0]) metrics['MAP'] = MAP metrics['AvgRec'] = AvgRec metrics['MRR'] = MRR metrics['global_step'] = global_step print_metrics(metrics, 'dev', save_dir=self._log_save_path) return MAP * 100, MRR
def print_info(self, iter_time, name, loss): if np.mod(iter_time, self.flags.print_freq) == 0: ord_output = collections.OrderedDict([(name, loss), ('dataset', self.flags.dataset), ('discriminator', self.flags.discriminator), ('train_interval', np.float32(self.flags.train_interval)), ('gpu_index', self.flags.gpu_index)]) utils.print_metrics(iter_time, ord_output)
def test(args, model, data_path, fold, gpu, dicts, data_loader): filename = data_path.replace('train', fold) device = torch.device('cuda:{}'.format( args.gpu)) if args.gpu != -1 else torch.device('cpu') print('file for evaluation: %s' % filename) num_labels = len(dicts['ind2c']) y, yhat, yhat_raw, hids, losses = [], [], [], [], [] model.eval() data_iter = iter(data_loader) num_iter = len(data_loader) for i in range(num_iter): with torch.no_grad(): inputs_id, labels, text_inputs, inputs_mask = next(data_iter) inputs_id, labels = inputs_id.to(device), labels.to(device) output, loss = model(inputs_id, labels, None) output = torch.sigmoid(output) output = output.data.cpu().numpy() losses.append(loss.item()) target_data = labels.data.cpu().numpy() yhat_raw.append(output) output = np.round(output) y.append(target_data) yhat.append(output) y = np.concatenate(y, axis=0) yhat = np.concatenate(yhat, axis=0) yhat_raw = np.concatenate(yhat_raw, axis=0) k = 5 if num_labels == 50 else [8, 15] metrics = all_metrics(yhat, y, k=k, yhat_raw=yhat_raw) print_metrics(metrics) metrics['loss_%s' % fold] = np.mean(losses) return metrics
def train(model, train_data_loader, val_data_loader, loss_fn, optimizer, n_epochs, model_name): best_auprc = -1 for epoch_i in range(1, n_epochs + 1): start = time.time() model.train() ## Training train_loss, train_metrics = run_batch(model, optimizer, train_data_loader, epoch_i, "train", loss_fn) model.eval() with paddle.no_grad(): ## Validation if val_data_loader: val_loss, val_metrics = run_batch(model, optimizer, val_data_loader, epoch_i, "val", loss_fn) if best_auprc < val_metrics[1]: current_sate = get_model_params_state( model, args, epoch_i, *val_metrics) paddle.save(current_sate, f"{model_name}.pdparams") best_auprc = val_metrics[1] if train_data_loader: print(f"\n#### Epoch {epoch_i} time {time.time() - start:.4f}s") print_metrics(train_loss, 0, 0) if val_data_loader: print(f"#### Validation epoch {epoch_i}") print_metrics(val_loss, *val_metrics)
def main(): path = '../data/accidents' data = pd.read_csv(f'{path}/accident_data_clean_balanced.csv', header=0) # Feature columns cat_cols = ['roadway_type', 'intersection', 'light_condition', 'atmospheric_conditions', 'manner_of_collision', 'body_type', 'vehicle_conditions', 'part_of_day'] binary_cols = ['land_use_urban', 'national_highway_system', 'previous_dwi_convictions', 'previous_speeding_convictions', 'speeding_related', 'driver_vision_obscured', 'is_weekend', 'multiple_vehicles', 'nonmotorist_involved', 'multiple_motorists', 'drunk_driver_involved'] numeric_cols = ['vehicle_year', 'speed_limit'] data[cat_cols] = data[cat_cols].apply(lambda x: x.astype('category')) labels = data['multiple_fatalities'] features = data[cat_cols + binary_cols + numeric_cols] # features = pd.get_dummies(features, columns=cat_cols, drop_first=True) # features.rename(columns={'manner_of_collision_Not Collision with Motor Vehicle in Transport (Not Necessarily in Transport for\n2005-2009)': 'manner_of_collision_Not Collision with Motor Vehicle in Transport'}, # inplace=True) feature_names = features.columns oe = OrdinalEncoder() features = oe.fit_transform(features) scaler = StandardScaler() features = scaler.fit_transform(features) X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=2020) print('Class Balance') print(y_test.value_counts()) print() models = { 'Random Forest': (RandomForestClassifier(n_estimators=100, min_samples_leaf=5, random_state=2020), 'rf'), 'Logistic Regression': (LogisticRegressionCV(cv=5, scoring='f1', max_iter=1000, random_state=2020), 'lr') } for name, (model, suffix) in models.items(): print(name) print('-' * 20) model.fit(X_train, y_train) y_pred = model.predict(X_test) y_probs = model.predict_proba(X_test)[:, 1] utils.print_metrics(y_test, y_pred) utils.roc_curve(y_test, y_probs, name, suffix) utils.feature_importance(model, feature_names, name, suffix) utils.permutation_importances(model, X_test, y_test, feature_names, name, suffix) utils.permutation_importances(model, X_train, y_train, feature_names, name, suffix, dataset='train') print('#' * 50)
def print_info_integrated(self, loss, iter_time): if np.mod(iter_time, self.flags.print_freq) == 0: ord_output = collections.OrderedDict([('tar_iters', self.flags.iters), ('G_gen_loss', loss[0]), ('Dy_dis_loss', loss[1]), ('F_gen_loss', loss[2]), ('Dx_dis_loss', loss[3]), ('gpu_index', self.flags.gpu_index)]) utils.print_metrics(iter_time, ord_output)
def test(args, model, data_path, fold, gpu, dicts, data_loader): filename = data_path.replace('train', fold) print('file for evaluation: %s' % filename) num_labels = len(dicts['ind2c']) y, yhat, yhat_raw, hids, losses = [], [], [], [], [] model.eval() # loader data_iter = iter(data_loader) num_iter = len(data_loader) for i in range(num_iter): with torch.no_grad(): if args.model.find("bert") != -1: inputs_id, segments, masks, labels = next(data_iter) inputs_id, segments, masks, labels = torch.LongTensor(inputs_id), torch.LongTensor(segments), \ torch.LongTensor(masks), torch.FloatTensor(labels) if gpu >= 0: inputs_id, segments, masks, labels = inputs_id.cuda( gpu), segments.cuda(gpu), masks.cuda(gpu), labels.cuda(gpu) output, loss = model(inputs_id, segments, masks, labels) else: inputs_id, labels, text_inputs = next(data_iter) inputs_id, labels, = torch.LongTensor(inputs_id), torch.FloatTensor(labels) if gpu >= 0: inputs_id, labels, text_inputs = inputs_id.cuda(gpu), labels.cuda(gpu), text_inputs.cuda(gpu) output, loss = model(inputs_id, labels, text_inputs) output = torch.sigmoid(output) output = output.data.cpu().numpy() losses.append(loss.item()) target_data = labels.data.cpu().numpy() yhat_raw.append(output) output = np.round(output) y.append(target_data) yhat.append(output) y = np.concatenate(y, axis=0) yhat = np.concatenate(yhat, axis=0) yhat_raw = np.concatenate(yhat_raw, axis=0) k = 5 if num_labels == 50 else [8,15] metrics = all_metrics(yhat, y, k=k, yhat_raw=yhat_raw) print_metrics(metrics) metrics['loss_%s' % fold] = np.mean(losses) return metrics
def main(config_path='./configs/config.yaml'): config = load_config(config_path) init_experiment(config) set_random_seed(config.seed) train_dataset = getattr(data, config.train.dataset.type)( config.data_root, **vars(config.train.dataset.params)) train_loader = getattr(data, config.train.loader.type)( train_dataset, **vars(config.train.loader.params)) val_dataset = getattr(data, config.val.dataset.type)( config.data_root, **vars(config.val.dataset.params)) val_loader = getattr(data, config.val.loader.type)(val_dataset, **vars( config.val.loader.params)) device = torch.device(config.device) model = getattr(models, config.model.type)(**vars(config.model.params)).to(device) optimizer = getattr(optims, config.optim.type)(model.parameters(), **vars(config.optim.params)) scheduler = None loss_f = getattr(losses, config.loss.type)(**vars(config.loss.params)) early_stopping = EarlyStopping(save=config.model.save, path=config.model.save_path, **vars(config.stopper.params)) train_writer = SummaryWriter(log_dir=os.path.join(config.tb_dir, 'train')) val_writer = SummaryWriter(log_dir=os.path.join(config.tb_dir, 'val')) for epoch in range(1, config.epochs + 1): print(f'Epoch {epoch}') train_metrics = train(model, optimizer, train_loader, loss_f, device) print_metrics('Train', train_metrics) write_metrics(epoch, train_metrics, train_writer) val_metrics = val(model, val_loader, loss_f, device) print_metrics('Val', val_metrics) write_metrics(epoch, val_metrics, val_writer) early_stopping(val_metrics['avg_weighted_loss'], model) # will save the best model to disk if early_stopping.early_stop: print(f'Early stopping after {epoch} epochs.') break if scheduler: scheduler.step() train_writer.close() val_writer.close() if config.model.save: torch.save( model.state_dict(), config.model.save_path.replace('checkpoint', 'last_checkpoint'))
def print_info(self, loss, iter_time): if np.mod(iter_time, self.flags.print_freq) == 0: ord_output = collections.OrderedDict([('cur_iter', iter_time), ('tar_iters', self.flags.iters), ('batch_size', self.flags.batch_size), ('d_loss', loss[0]), ('g_loss', loss[1]), ('dataset', self.flags.dataset), ('gpu_index', self.flags.gpu_index)]) utils.print_metrics(iter_time, ord_output)
def print_info(self, loss): if np.mod(self.iter_time, self.flags.print_freq) == 0: ord_output = collections.OrderedDict([ ('G_loss', loss[0]), ('Dy_loss', loss[1]), ('F_loss', loss[2]), ('Dx_loss', loss[3]), ('dataset', self.dataset.name), ('gpu_index', self.flags.gpu_index) ]) utils.print_metrics(self.iter_time, ord_output)
def measure(self, generated, vessels, masks, num_data, iter_time, phase, total_time): # masking vessels_in_mask, generated_in_mask = utils.pixel_values_in_mask( vessels, generated, masks) # averaging processing time avg_pt = (total_time / num_data) * 1000 # average processing tiem # evaluate Area Under the Curve of ROC and Precision-Recall auc_roc = utils.AUC_ROC(vessels_in_mask, generated_in_mask) auc_pr = utils.AUC_PR(vessels_in_mask, generated_in_mask) # binarize to calculate Dice Coeffient binarys_in_mask = utils.threshold_by_otsu(generated, masks) dice_coeff = utils.dice_coefficient_in_train(vessels_in_mask, binarys_in_mask) acc, sensitivity, specificity = utils.misc_measures( vessels_in_mask, binarys_in_mask) score = auc_pr + auc_roc + dice_coeff + acc + sensitivity + specificity # # auc_sum for saving best model in training # auc_sum = auc_roc + auc_pr # if self.flags.stage == 2: # #auc_sum = auc_roc + auc_pr # auc_sum = auc_roc + auc_pr # else: # auc_sum = auc_roc + auc_pr auc_sum = dice_coeff + acc + auc_pr # print information ord_output = collections.OrderedDict([('auc_pr', auc_pr), ('auc_roc', auc_roc), ('dice_coeff', dice_coeff), ('acc', acc), ('sensitivity', sensitivity), ('specificity', specificity), ('score', score), ('auc_sum', auc_sum), ('best_auc_sum', self.best_auc_sum), ('avg_pt', avg_pt)]) utils.print_metrics(iter_time, ord_output) # write in tensorboard when in train mode only if phase == 'train': self.model.measure_assign(auc_pr, auc_roc, dice_coeff, acc, sensitivity, specificity, score, iter_time) elif phase == 'test': # write in npy format for evaluation utils.save_obj(vessels_in_mask, generated_in_mask, os.path.join(self.auc_out_dir, "auc_roc.npy"), os.path.join(self.auc_out_dir, "auc_pr.npy")) return auc_sum
def main(): csv_files = glob.glob(player_dir + "/*.csv") abt = [] ing = [] n_files = len(csv_files) for i, filename in enumerate(csv_files): print("elaborating file {} of {}".format(i + 1, n_files)) ph = pd.read_csv(filename) # get player history ph["timestamp"] = pd.to_numeric(ph["timestamp"], downcast='integer') ph.set_index(['timestamp'], inplace=True) n_weeks = 52 for index in range(1, n_weeks+1): # consider only the last ABT week if only_last_abt_week and index < n_weeks and ph.at[index, 'status'] == 1 and ph.at[index + 1, 'status'] == 1: ph.at[index, 'status'] = 0 if ph.at[index, 'status'] != 2 and index > history-1: # not churned and has an history row = [] for j in range(history): ev = ph.at[index-j, 'evolution'] la = ph.at[index-j, 'lvl_avg'] th = ph.at[index-j, 'time_hours'] ca = ph.at[index-j, 'current_absence'] pr = ph.at[index-j, 'week_present_ratio'] row += [ev, la, th, ca, pr] if ph.at[index, 'status'] == 0: # active ing.append(row) else: # about to churn abt.append(row) print("ABT sequences: {}".format(len(abt))) print("ING sequences: {}".format(len(ing))) abt_labels = np.array([1 for i in range(len(abt))]) ing_labels = np.array([0 for i in range(len(ing))])[:len(abt)] abt = np.array(abt) ing = np.array(ing)[:len(abt)] X, y = shuffle(np.concatenate((abt, ing)), np.concatenate((abt_labels, ing_labels))) # scale the data scaler = StandardScaler() X = scaler.fit_transform(X) # prepare training set and test set X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # use svm clf = SVC() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) acc = accuracy_score(y_pred, y_test) print("accuracy on test set: {:.3f}".format(acc)) utils.print_metrics(y_pred, y_test) """
def print_info(self, loss, iter_batch, iter_epoch, tar_batch): if np.mod(iter_batch, self.flags.print_freq) == 0: ord_output = collections.OrderedDict([ ('cur_batch', iter_batch), ('tar_batch', tar_batch), ('cur_epoch', iter_epoch), ('tar_epochs', self.flags.epochs), ('batch_size', self.flags.batch_size), ('total_loss', loss[0]), ('gpu_index', self.flags.gpu_index) ]) utils.print_metrics(iter_batch, ord_output)
def main(): csv_path = 'data/all_test_clean.csv' tweets, targets, labels = load_csv(csv_path) print('--- LOADED CSV ---') model = load_bert() print('--- LOADED MODEL ---') preds = predict(model, tweets, targets) save_npy(preds, 'ada_bert', 'preds/') print('--- SAVED PREDS ---') print_metrics(preds, labels, 'ada_bert')
def train_ddi_vae(dim_z, hidden_layers_px, hidden_layers_qz, save_path): ############################# ''' Experiment Parameters ''' ############################# num_batches = 100 #Number of minibatches in a single epoch, num_examples % self.num_batches == 0 # dim_z = 50 #Dimensionality of latent variable (z) epochs = 3001 #Number of epochs through the full dataset learning_rate = 3e-3 #Learning rate of ADAM l2_loss = 1e-6 #L2 Regularisation weight seed = 31415 #Seed for RNG #Neural Networks parameterising p(x|z), q(z|x) # hidden_layers_px = [ 600, 1000, 800, 500 ] # hidden_layers_qz = [ 600, 1000, 800, 500 ] #################### ''' Load Dataset ''' #################### # mnist_path = 'mnist/mnist_28.pkl.gz' # #Uses anglpy module from original paper (linked at top) to load the dataset # train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(mnist_path, binarize_y=True) # # x_train, y_train = train_x.T, train_y.T # x_valid, y_valid = valid_x.T, valid_y.T # x_test, y_test = test_x.T, test_y.T x_train, y_train, x_valid, y_valid, x_test, y_test = load_dataset("/home/cdy/ykq/vae/ddi/train_dataset") utils.print_metrics(['x_train', x_train.shape[0], x_train.shape[1]], ['y_train', y_train.shape[0], y_train.shape[1]], ['x_valid', x_valid.shape[0], x_valid.shape[1]], ['y_valid', y_valid.shape[0], y_valid.shape[1]], ['x_test', x_test.shape[0], x_test.shape[1]], ['y_test', y_test.shape[0], y_test.shape[1]], ) dim_x = x_train.shape[1] dim_y = y_train.shape[1] ###################################### ''' Train Variational Auto-Encoder ''' ###################################### VAE = DdiVariationalAutoencoder( dim_x = dim_x, dim_z = dim_z, hidden_layers_px = hidden_layers_px, hidden_layers_qz = hidden_layers_qz, l2_loss = l2_loss ) #draw_img uses pylab and seaborn to draw images of original vs. reconstruction #every n iterations (set to 0 to disable) VAE.train( x = x_train, x_valid = x_valid, epochs = epochs, num_batches = num_batches, save_path=save_path, learning_rate = learning_rate, seed = seed, stop_iter = 30, print_every = 10, draw_img = 0 )
def print_info(self, loss, iter_time, epoch_time): if np.mod(iter_time, self.flags.print_freq) == 0: ord_output = collections.OrderedDict([ ('cur_epoch', epoch_time), ('tar_Epoch', self.flags.epoch), ('batch_size', self.flags.batch_size), ('G_loss', loss[0]), ('Dy_loss', loss[1]), ('F_loss', loss[2]), ('Dx_loss', loss[3]), ('dataset', self.flags.dataset), ('gpu_index', self.flags.gpu_index) ]) utils.print_metrics(iter_time, ord_output)
def print_info(self, loss, iter_time): if np.mod(iter_time, self.flags.print_freq) == 0: ord_output = collections.OrderedDict([ ('cur_iter', iter_time), ('tar_iter', self.num_iters), ('batch_size', self.flags.batch_size), ('content_loss', loss[0]), ('style_loss', loss[1]), ('tv_loss', loss[2]), ('total_loss', loss[3]), ('gpu_index', self.flags.gpu_index) ]) utils.print_metrics(iter_time, ord_output)
def main(seg1_fname, seg2_fname, calc_rand_score=True, calc_rand_error=True, calc_variation_score=True, calc_variation_information=True, relabel2d=False, foreground_restricted=True, split_0_segment=True, other=None): ''' Script functionality, computes the overlap matrix, computes any specified metrics, and prints the results nicely ''' print("Loading Data...") seg1 = io_utils.import_file(seg1_fname) seg2 = io_utils.import_file(seg2_fname) prep = utils.parse_fns( utils.prep_fns, [relabel2d, foreground_restricted ] ) seg1, seg2 = utils.run_preprocessing( seg1, seg2, prep ) om = utils.calc_overlap_matrix(seg1, seg2, split_0_segment) #Calculating each desired metric metrics = utils.parse_fns( utils.metric_fns, [calc_rand_score, calc_rand_error, calc_variation_score, calc_variation_information] ) results = {} for (name,metric_fn) in metrics: if relabel2d: full_name = "2D {}".format(name) else: full_name = name (f,m,s) = metric_fn( om, full_name, other ) results["{} Full".format(name)] = f results["{} Merge".format(name)] = m results["{} Split".format(name)] = s print("") utils.print_metrics(results)
def main(): path = '../data/accidents' data = pd.read_csv(f'{path}/accident_data_clean_balanced.csv', header=0) cat_cols = [ 'month', 'roadway_type', 'intersection', 'light_condition', 'atmospheric_conditions', 'manner_of_collision', 'body_type', 'vehicle_conditions', 'part_of_day' ] binary_cols = [ 'land_use_urban', 'national_highway_system', 'previous_dwi_convictions', 'previous_speeding_convictions', 'speeding_related', 'driver_vision_obscured', 'is_weekend', 'multiple_vehicles', 'nonmotorist_involved', 'multiple_motorists', 'drunk_driver_involved' ] numeric_cols = ['vehicle_year', 'speed_limit'] data[cat_cols] = data[cat_cols].apply(lambda x: x.astype('category')) labels = data['multiple_fatalities'] features = data[cat_cols + binary_cols + numeric_cols] feature_names = features.columns # oe = OrdinalEncoder() # features = oe.fit_transform(features) features = pd.get_dummies(features, columns=cat_cols) # scaler = StandardScaler() # features = scaler.fit_transform(features) X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=2020) print('Class Balance') print(y_test.value_counts()) print() model = GridSearchCV(estimator=KNeighborsClassifier(), param_grid={'n_neighbors': range(1, 20, 2)}, cv=5, scoring='f1') model.fit(X_train, y_train) print(model.best_params_) print() y_pred = model.predict(X_test) y_probs = model.predict_proba(X_test)[:, 1] utils.print_metrics(y_test, y_pred) utils.roc_curve(y_test, y_probs, 'KNN', 'knn')
def print_results(plot=False): from utils import print_error_hist, calculate_metrics, print_metrics, plot_conf_matrix import pandas as pd global y_test global y_pred global y_train exp_var, mse, mae, r2, error_percentage, recall, precision = calculate_metrics(y_test, y_pred, y_train, plot) if plot: print_metrics(exp_var, mse, mae, r2, error_percentage, recall, precision) # return result if average need to be calculated return exp_var, mse, mae, r2, error_percentage, recall, precision
def one_test(self, batch_dataset, config): test_data = [ batch for batch in batch_dataset.batch_test_data(2 * config.batch_size) ] steps = batch_dataset.test_steps_num cID = batch_dataset.test_cID self.is_train = False self.cweight = [1., 1., 1.] test_metrics, _ = self.evaluate(test_data, steps, 'test', cID) print_metrics(test_metrics, 'test', categories_num=self.args.categories_num)
def print_info(self, loss, iter_time, num_try): if np.mod(iter_time, self.flags.print_freq) == 0: ord_output = collections.OrderedDict([ ('num_try', num_try), ('tar_try', self.flags.num_try), ('cur_iter', iter_time), ('tar_iters', self.flags.iters), ('batch_size', self.flags.sample_batch), ('context_loss', np.mean(loss[0])), ('prior_loss', np.mean(loss[1])), ('total_loss', np.mean(loss[2])), ('mask_type', self.flags.mask_type), ('gpu_index', self.flags.gpu_index) ]) utils.print_metrics(iter_time, ord_output)
def print_info(self, loss, iter_time): if np.mod(iter_time, self.flags.print_freq) == 0: ord_output = collections.OrderedDict([ ('cur_iter', iter_time), ('tar_iters', self.flags.iters), ('batch_size', self.flags.batch_size), ('G_loss', loss[0]), ('G_gen_loss', loss[1]), ('G_cond_loss', loss[2]), ('G_cycle_loss', loss[3]), ('Dy_loss', loss[4]), ('F_loss', loss[5]), ('F_gen_loss', loss[6]), ('F_cond_loss', loss[7]), ('F_cycle_loss', loss[3]), ('Dx_loss', loss[8]), ('dataset', self.flags.dataset), ('gpu_index', self.flags.gpu_index) ]) utils.print_metrics(iter_time, ord_output)
def k_stratified(self): ''' Classify the data splitting the training and test sets in folds preserving the percentage of samples for each class ''' sss = StratifiedShuffleSplit(self.__label_list, n_iter=1, train_size=0.7, test_size=0.3) for train_index, test_index in sss: print(len(train_index), len(test_index)) instance_train, instance_test = self.__data_vectorized[ train_index], self.__data_vectorized[test_index] label_train, label_test = self.__label_list[ train_index], self.__label_list[test_index] predicted = self.__classifier.fit( instance_train, label_train).predict(instance_test) print_metrics(label_test, predicted)
def predict_labels(self, x_test, y_test): test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES) tf.variables_initializer(test_vars).run() x_test_mu = x_test[:, :self.dim_x] x_test_lsgms = x_test[:, self.dim_x:2 * self.dim_x] accuracy, cross_entropy, precision, recall = \ self.session.run( [self.eval_accuracy, self.eval_cross_entropy, self.eval_precision, self.eval_recall], feed_dict = {self.x_labelled_mu: x_test_mu, self.x_labelled_lsgms: x_test_lsgms, self.y_lab: y_test} ) utils.print_metrics('X', ['Test', 'accuracy', accuracy], ['Test', 'cross-entropy', cross_entropy], ['Test', 'precision', precision], ['Test', 'recall', recall])
def main( seg1_fname, seg2_fname, calc_rand_score=True, calc_rand_error=True, calc_variation_score=True, calc_variation_information=True, relabel2d=False, foreground_restricted=True, split_0_segment=True, other=None, ): """ Script functionality, computes the overlap matrix, computes any specified metrics, and prints the results nicely """ print("Loading Data...") seg1 = io_utils.import_file(seg1_fname) seg2 = io_utils.import_file(seg2_fname) prep = utils.parse_fns(utils.prep_fns, [relabel2d, foreground_restricted]) seg1, seg2 = utils.run_preprocessing(seg1, seg2, prep) om = utils.calc_overlap_matrix(seg1, seg2, split_0_segment) # Calculating each desired metric metrics = utils.parse_fns( utils.metric_fns, [calc_rand_score, calc_rand_error, calc_variation_score, calc_variation_information] ) results = {} for (name, metric_fn) in metrics: if relabel2d: full_name = "2D {}".format(name) else: full_name = name (f, m, s) = metric_fn(om, full_name, other) results["{} Full".format(name)] = f results["{} Merge".format(name)] = m results["{} Split".format(name)] = s print("") utils.print_metrics(results)
def predict_labels( self, x_test, y_test ): test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES) tf.initialize_variables(test_vars).run() x_test_mu = x_test[:,:self.dim_x] x_test_lsgms = x_test[:,self.dim_x:2*self.dim_x] accuracy, cross_entropy, precision, recall = \ self.session.run( [self.eval_accuracy, self.eval_cross_entropy, self.eval_precision, self.eval_recall], feed_dict = {self.x_labelled_mu: x_test_mu, self.x_labelled_lsgms: x_test_lsgms, self.y_lab: y_test} ) utils.print_metrics( 'X', ['Test', 'accuracy', accuracy], ['Test', 'cross-entropy', cross_entropy], ['Test', 'precision', precision], ['Test', 'recall', recall] )
def measure(self, generated, vessels, masks, num_data, iter_time, phase, total_time): vessels_in_mask, generated_in_mask = utils.pixel_values_in_mask( vessels, generated, masks) avg_pt = (total_time / num_data) * 1000 # average processing tiem # evaluation auc_roc = utils.AUC_ROC(vessels_in_mask, generated_in_mask) auc_pr = utils.AUC_PR(vessels_in_mask, generated_in_mask) binarys_in_mask = utils.threshold_by_otsu(generated, masks) dice_coeff = utils.dice_coefficient_in_train(vessels_in_mask, binarys_in_mask) acc, sensitivity, specificity = utils.misc_measures( vessels_in_mask, binarys_in_mask) score = auc_pr + auc_roc + dice_coeff + acc + sensitivity + specificity # print information ord_output = collections.OrderedDict([('auc_pr', auc_pr), ('auc_roc', auc_roc), ('dice_coeff', dice_coeff), ('acc', acc), ('sensitivity', sensitivity), ('specificity', specificity), ('score', score), ('best_dice_coeff', self.best_dice_coeff), ('avg_pt', avg_pt)]) utils.print_metrics(iter_time, ord_output) # write in tensorboard if phase == 'train': self.model.measure_assign(auc_pr, auc_roc, dice_coeff, acc, sensitivity, specificity, score, iter_time) if phase == 'test': # write in npy format for evaluation utils.save_obj(vessels_in_mask, generated_in_mask, os.path.join(self.auc_out_dir, "auc_roc.npy"), os.path.join(self.auc_out_dir, "auc_pr.npy")) return dice_coeff
def one_train(self, epochs, batch_size, train_data, train_label, dev_data, dev_label): self.compile_model() for e in range(epochs): history = self.model.fit(train_data, train_label, batch_size=batch_size, verbose=1, validation_data=(dev_data, dev_label)) dev_out = self.model.predict(dev_data, batch_size=2 * batch_size, verbose=1) metrics = PRF(dev_label, (dev_out > 0.5).astype('int32').reshape([-1])) metrics['epoch'] = e + 1 metrics['val_loss'] = history.history['val_loss'] print_metrics(metrics, metrics_type=self.__class__.__name__ + self.args.selfname, save_dir=self.args.log_dir)
def main(): path = '../data/persons' data = pd.read_csv(f'{path}/person_data_clean.csv', header=0) cat_cols = ['person_type', 'trafficway_type', 'manner_of_collision', 'body_type', 'seating_position', 'ejection', 'safety_equipment_use'] binary_cols = ['sex', 'land_use_urban', 'rollover', 'air_bag_deployed'] numeric_cols = ['age'] data[cat_cols] = data[cat_cols].apply(lambda x: x.astype('category')) labels = data['fatality'] features = data[cat_cols + binary_cols + numeric_cols] feature_names = features.columns oe = OrdinalEncoder() features = oe.fit_transform(features) # features = pd.get_dummies(features, columns=cat_cols) scaler = StandardScaler() features = scaler.fit_transform(features) X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=2020) print('Class Balance') print(y_test.value_counts()) print() model = GridSearchCV(estimator=KNeighborsClassifier(), param_grid={'n_neighbors': range(1, 20, 2)}, cv=5, scoring='f1') model.fit(X_train, y_train) print(model.best_params_) print() y_pred = model.predict(X_test) y_probs = model.predict_proba(X_test)[:, 1] utils.print_metrics(y_test, y_pred) utils.roc_curve(y_test, y_probs, 'KNN', 'knn')
def test_seg_model(model, args): # prepare dataset test_dset = ChromosomeDataset(os.path.join(args.data_dir+args.simu_type, "test_imgs"), transform = transforms.Compose([transforms.ToTensor(),])) test_dataloader = DataLoader(test_dset, batch_size=args.batch_size, shuffle=False, num_workers=0) model.eval() # Set model to evaluate mode metrics = defaultdict(float) epoch_samples = 0 for inputs, labels in test_dataloader: inputs = inputs.cuda() labels = labels.cuda() # forward # track history if only in train with torch.no_grad(): outputs = model(inputs) loss = calc_loss(outputs, labels, metrics) # statistics epoch_samples += inputs.size(0) print_metrics(metrics, epoch_samples, "test")
def print_info(self, loss, iter_time, is_sup=True): if np.mod(iter_time, self.flags.print_freq) == 0: if is_sup: ord_output = collections.OrderedDict([('tar_iters', self.flags.iters), ('G_loss_sup', loss[0]), ('G_gen_loss_sup', loss[1]), ('G_cond_loss', loss[2]), ('G_gdl_loss', loss[3]), ('G_perceptual_loss', loss[4]), ('G_ssim_loss', loss[5]), ('G_cycle_loss_sup', loss[6]), ('Dy_loss_sup', loss[7]), ('F_loss_sup', loss[8]), ('F_gen_loss_sup', loss[9]), ('F_cond_loss', loss[10]), ('F_gdl_loss', loss[11]), ('F_perceptual_loss', loss[12]), ('F_ssim_loss', loss[13]), ('F_cycle_loss_sup', loss[6]), ('Dx_loss_sup', loss[14]), ('gpu_index', self.flags.gpu_index)]) else: ord_output = collections.OrderedDict([('tar_iters', self.flags.iters), ('G_loss_unsup', loss[0]), ('G_gen_loss_unsup', loss[1]), ('G_cycle_loss_unsup', loss[2]), ('Dy_loss_unsup', loss[3]), ('F_loss_unsup', loss[4]), ('F_gen_loss_unsup', loss[5]), ('F_cycle_loss_unsup', loss[2]), ('Dx_loss_unsup', loss[6]), ('gpu_index', self.flags.gpu_index)]) utils.print_metrics(iter_time, ord_output)
def train( self, x_labelled, y, x_unlabelled, epochs, x_valid, y_valid, print_every = 1, learning_rate = 3e-4, beta1 = 0.9, beta2 = 0.999, seed = 31415, stop_iter = 100, save_path = None, load_path = None ): ''' Session and Summary ''' if save_path is None: self.save_path = 'checkpoints/model_GC_{}-{}-{}_{}.cpkt'.format( self.num_lab,learning_rate,self.batch_size,time.time()) else: self.save_path = save_path np.random.seed(seed) tf.set_random_seed(seed) with self.G.as_default(): self.optimiser = tf.train.AdamOptimizer( learning_rate = learning_rate, beta1 = beta1, beta2 = beta2 ) self.train_op = self.optimiser.minimize( self.cost ) init = tf.initialize_all_variables() self._test_vars = None _data_labelled = np.hstack( [x_labelled, y] ) _data_unlabelled = x_unlabelled x_valid_mu, x_valid_lsgms = x_valid[ :, :self.dim_x ], x_valid[ :, self.dim_x:2*self.dim_x ] with self.session as sess: sess.run(init) if load_path == 'default': self.saver.restore( sess, self.save_path ) elif load_path is not None: self.saver.restore( sess, load_path ) best_eval_accuracy = 0. stop_counter = 0 for epoch in range(epochs): ''' Shuffle Data ''' np.random.shuffle( _data_labelled ) np.random.shuffle( _data_unlabelled ) ''' Training ''' for x_l_mu, x_l_lsgms, y, x_u_mu, x_u_lsgms in utils.feed_numpy_semisupervised( self.num_lab_batch, self.num_ulab_batch, _data_labelled[:,:2*self.dim_x], _data_labelled[:,2*self.dim_x:],_data_unlabelled ): training_result = sess.run( [self.train_op, self.cost], feed_dict = { self.x_labelled_mu: x_l_mu, self.x_labelled_lsgms: x_l_lsgms, self.y_lab: y, self.x_unlabelled_mu: x_u_mu, self.x_unlabelled_lsgms: x_u_lsgms } ) training_cost = training_result[1] ''' Evaluation ''' stop_counter += 1 if epoch % print_every == 0: test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES) if test_vars: if test_vars != self._test_vars: self._test_vars = list(test_vars) self._test_var_init_op = tf.initialize_variables(test_vars) self._test_var_init_op.run() eval_accuracy, eval_cross_entropy = \ sess.run( [self.eval_accuracy, self.eval_cross_entropy], feed_dict = { self.x_labelled_mu: x_valid_mu, self.x_labelled_lsgms: x_valid_lsgms, self.y_lab: y_valid } ) if eval_accuracy > best_eval_accuracy: best_eval_accuracy = eval_accuracy self.saver.save( sess, self.save_path ) stop_counter = 0 utils.print_metrics( epoch+1, ['Training', 'cost', training_cost], ['Validation', 'accuracy', eval_accuracy], ['Validation', 'cross-entropy', eval_cross_entropy] ) if stop_counter >= stop_iter: print('Stopping GC training') print('No change in validation accuracy for {} iterations'.format(stop_iter)) print('Best validation accuracy: {}'.format(best_eval_accuracy)) print('Model saved in {}'.format(self.save_path)) break
def train( self, x, x_valid, epochs, num_batches, print_every = 1, learning_rate = 3e-4, beta1 = 0.9, beta2 = 0.999, seed = 31415, stop_iter = 100, save_path = None, load_path = None, draw_img = 1 ): self.num_examples = x.shape[0] self.num_batches = num_batches assert self.num_examples % self.num_batches == 0, '#Examples % #Batches != 0' self.batch_size = self.num_examples // self.num_batches ''' Session and Summary ''' if save_path is None: self.save_path = 'checkpoints/model_VAE_{}-{}_{}.cpkt'.format(learning_rate,self.batch_size,time.time()) else: self.save_path = save_path np.random.seed(seed) tf.set_random_seed(seed) with self.G.as_default(): self.optimiser = tf.train.AdamOptimizer( learning_rate = learning_rate, beta1 = beta1, beta2 = beta2 ) self.train_op = self.optimiser.minimize( self.cost ) init = tf.initialize_all_variables() self._test_vars = None with self.session as sess: sess.run(init) if load_path == 'default': self.saver.restore( sess, self.save_path ) elif load_path is not None: self.saver.restore( sess, load_path ) training_cost = 0. best_eval_log_lik = - np.inf stop_counter = 0 for epoch in range(epochs): ''' Shuffle Data ''' np.random.shuffle( x ) ''' Training ''' for x_batch in utils.feed_numpy( self.batch_size, x ): training_result = sess.run( [self.train_op, self.cost], feed_dict = { self.x: x_batch } ) training_cost = training_result[1] ''' Evaluation ''' stop_counter += 1 if epoch % print_every == 0: test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES) if test_vars: if test_vars != self._test_vars: self._test_vars = list(test_vars) self._test_var_init_op = tf.initialize_variables(test_vars) self._test_var_init_op.run() eval_log_lik, x_recon_eval = \ sess.run( [self.eval_log_lik, self.x_recon_eval], feed_dict = { self.x: x_valid } ) if eval_log_lik > best_eval_log_lik: best_eval_log_lik = eval_log_lik self.saver.save( sess, self.save_path ) stop_counter = 0 utils.print_metrics( epoch+1, ['Training', 'cost', training_cost], ['Validation', 'log-likelihood', eval_log_lik] ) if draw_img > 0 and epoch % draw_img == 0: import matplotlib matplotlib.use('Agg') import pylab import seaborn as sns five_random = np.random.random_integers(x_valid.shape[0], size = 5) x_sample = x_valid[five_random] x_recon_sample = x_recon_eval[five_random] sns.set_style('white') f, axes = pylab.subplots(5, 2, figsize=(8,12)) for i,row in enumerate(axes): row[0].imshow(x_sample[i].reshape(28, 28), vmin=0, vmax=1) im = row[1].imshow(x_recon_sample[i].reshape(28, 28), vmin=0, vmax=1, cmap=sns.light_palette((1.0, 0.4980, 0.0549), input="rgb", as_cmap=True)) pylab.setp([a.get_xticklabels() for a in row], visible=False) pylab.setp([a.get_yticklabels() for a in row], visible=False) f.subplots_adjust(left=0.0, right=0.9, bottom=0.0, top=1.0) cbar_ax = f.add_axes([0.9, 0.1, 0.04, 0.8]) f.colorbar(im, cax=cbar_ax, use_gridspec=True) pylab.tight_layout() pylab.savefig('img/recon-'+str(epoch)+'.png', format='png') pylab.clf() pylab.close('all') if stop_counter >= stop_iter: print('Stopping VAE training') print('No change in validation log-likelihood for {} iterations'.format(stop_iter)) print('Best validation log-likelihood: {}'.format(best_eval_log_lik)) print('Model saved in {}'.format(self.save_path)) break