def evaluate_model(config: Config, model: TransformersCRF, data_loader: DataLoader, name: str, insts: List, print_each_type_metric: bool = False): ## evaluation p_dict, total_predict_dict, total_entity_dict = Counter(), Counter( ), Counter() batch_size = data_loader.batch_size with torch.no_grad(): for batch_id, batch in tqdm(enumerate(data_loader, 0), desc="--evaluating batch", total=len(data_loader)): one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] batch_max_scores, batch_max_ids = model.decode( words=batch.input_ids.to(config.device), word_seq_lens=batch.word_seq_len.to(config.device), orig_to_tok_index=batch.orig_to_tok_index.to(config.device), input_mask=batch.attention_mask.to(config.device)) batch_p, batch_predict, batch_total = evaluate_batch_insts( one_batch_insts, batch_max_ids, batch.label_ids, batch.word_seq_len, config.idx2labels) p_dict += batch_p total_predict_dict += batch_predict total_entity_dict += batch_total batch_id += 1 f1Scores = [] if print_each_type_metric or config.print_detail_f1 or ( config.earlystop_atr == "macro"): for key in total_entity_dict: precision_key, recall_key, fscore_key = get_metric( p_dict[key], total_entity_dict[key], total_predict_dict[key]) print( f"[{key}] Prec.: {precision_key:.2f}, Rec.: {recall_key:.2f}, F1: {fscore_key:.2f}" ) f1Scores.append(fscore_key) if len(f1Scores) > 0: print( f"[{name} set Total] Macro F1: {sum(f1Scores) / len(f1Scores):.2f}" ) total_p = sum(list(p_dict.values())) total_predict = sum(list(total_predict_dict.values())) total_entity = sum(list(total_entity_dict.values())) precision, recall, fscore = get_metric(total_p, total_entity, total_predict) print(colored( f"[{name} set Total] Prec.: {precision:.2f}, Rec.: {recall:.2f}, Micro F1: {fscore:.2f}", 'blue'), flush=True) if config.earlystop_atr == "macro" and len(f1Scores) > 0: fscore = sum(f1Scores) / len(f1Scores) return [precision, recall, fscore]
def get_next_best_model(index, current_prediction, all_predictions, targets): no_of_models = len(all_predictions) current_error = (get_metric(current_prediction, targets))[0] next_best_model_index = -1 for i in xrange(0, no_of_models): temp_prediction = (index * current_prediction + all_predictions[i]) / (index + 1) metric = get_metric(temp_prediction, targets) if metric[0] < current_error: next_best_model_index = i current_error = metric[0] return next_best_model_index
def main(): import sys import pathlib __dir__ = pathlib.Path(os.path.abspath(__file__)) sys.path.append(str(__dir__)) sys.path.append(str(__dir__.parent.parent)) from models import build_model, build_loss from data_loader import get_dataloader from utils import Trainer from utils import get_post_processing from utils import get_metric config = anyconfig.load(open('config.yaml', 'rb')) train_loader = get_dataloader(config['dataset']['train']) validate_loader = get_dataloader(config['dataset']['validate']) criterion = build_loss(config['loss']).cuda() model = build_model(config['arch']) post_p = get_post_processing(config['post_processing']) metric = get_metric(config['metric']) trainer = Trainer(config=config, model=model, criterion=criterion, train_loader=train_loader, post_process=post_p, metric_cls=metric, validate_loader=validate_loader) trainer.train()
def test(model, criterion, val_loader, device): model.eval() # Run the model on some test examples with torch.no_grad(): loss_total = 0 mae_avg, f_score_avg = 0, 0 tbar = tqdm(val_loader) for i, sample_batched in enumerate(tbar): rgb, dep, gt = sample_batched['img'].to(device), sample_batched[ 'depth'].to(device), sample_batched['gt'].to(device) output = model((rgb, dep)) loss = criterion(output, gt) loss_total += loss.item() result = model.get_result(output) mae, f_score = utils.get_metric(sample_batched, result) mae_avg, f_score_avg = mae_avg + mae, f_score_avg + f_score loss_avg, mae_avg, f_score_avg = loss_total / ( i + 1), mae_avg / len(tbar), (f_score_avg / len(tbar)).max().item() print( f'loss: {loss_avg:.3f} mae:{mae_avg:.4f} f_max:{f_score_avg:.4f}') wandb.log({ 'val_ave_loss': (loss_avg), 'val_ave_mae': mae_avg, 'val_f_score_ave': f_score_avg }) # Save the model in the exchangeable ONNX format # torch.onnx.export(model, sample_batched, "d3net_model.onnx") # wandb.save("d3net_model.onnx") return mae_avg, f_score_avg
def __init__(self, model_path, gpu_id=0): from models import build_model from data_loader import get_dataloader from post_processing import get_post_processing from utils import get_metric self.gpu_id = gpu_id if self.gpu_id is not None and isinstance( self.gpu_id, int) and torch.cuda.is_available(): self.device = torch.device("cuda:%s" % self.gpu_id) torch.backends.cudnn.benchmark = True else: self.device = torch.device("cpu") print('load model:', model_path) checkpoint = torch.load(model_path, map_location=torch.device('cpu')) config = checkpoint['config'] config['arch']['backbone']['pretrained'] = False self.validate_loader = get_dataloader(config['dataset']['validate'], config['distributed']) self.model = build_model(config['arch'].pop('type'), **config['arch']) self.model.load_state_dict(checkpoint['state_dict']) self.model.to(self.device) self.post_process = get_post_processing(config['post_processing']) self.metric_cls = get_metric(config['metric'])
def __init__(self, model_path, gpu_id=0): from models import build_model from data_loader import get_dataloader from post_processing import get_post_processing from utils import get_metric self.gpu_id = gpu_id if self.gpu_id is not None and isinstance(self.gpu_id, int) and torch.cuda.is_available(): self.device = torch.device("cuda:%s" % self.gpu_id) torch.backends.cudnn.benchmark = True else: self.device = torch.device("cpu") # print(self.gpu_id) 0 checkpoint = torch.load(model_path, map_location=torch.device('cpu')) config = checkpoint['config'] config['arch']['backbone']['pretrained'] = False config['dataset']['train']['dataset']['args']['data_path'][0] = '/home/share/gaoluoluo/dbnet/datasets/train_zhen.txt' config['dataset']['validate']['dataset']['args']['data_path'][0] = '/home/share/gaoluoluo/dbnet/datasets/test_zhen.txt' print("config:",config) self.validate_loader = get_dataloader(config['dataset']['validate'], config['distributed']) self.model = build_model(config['arch']) self.model.load_state_dict(checkpoint['state_dict']) self.model.to(self.device) self.post_process = get_post_processing(config['post_processing']) self.metric_cls = get_metric(config['metric'])
def benchmark(self): model_path = os.path.join(BROOT, f'models/{self.dataset}-{self.model}-2.0') valid_data = os.path.join(BROOT, get_valid_data(self.dataset, self.model)) inference_bin = os.path.join(BROOT, 'build/inference') ret = subprocess.run([ inference_bin, '--logtostderr', '--model', model_path, '--data', valid_data, '--mode', mode, '--batch_size', str(batch_size), '--num_labels', get_num_labels(self.dataset), '--seq_lens', str(self.seq_len), '--min_graph', str(self.args.min_graph), '--ignore_copy', str(self.args.ignore_copy), ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if ret.returncode != 0: print(ret.stderr.decode('ascii')) assert False, 'Prediction failed.' prediction = list() for line in ret.stdout.decode('ascii').splitlines(): if line.startswith('Sents/s'): _, qps = line.split() else: prediction.append(int(line)) prediction = np.asarray(prediction) testcase = os.path.join(BROOT, get_valid_labels(self.dataset)) labels = read_label(testcase) metric = get_metric(self.dataset) ret = metric(prediction, labels) stat = {'Sents/s': float(qps)} stat['metric_value'] = ret stat['metric'] = metric.__name__ stat['batch_size'] = batch_size stat['dataset'] = self.dataset stat['model'] = self.model + '-2.0' stat['mode'] = self.mode if self.seq_len == 0: stat['seq_len'] = 'dynamic' else: stat['seq_len'] = self.seq_len return stat
def get_sample_scores(self, epoch, pred_list): pred_list = (-pred_list).argsort().argsort()[:, 0] HIT_1, NDCG_1, MRR = get_metric(pred_list, 1) HIT_5, NDCG_5, MRR = get_metric(pred_list, 5) HIT_10, NDCG_10, MRR = get_metric(pred_list, 10) post_fix = { "Epoch": epoch, "HIT@1": '{:.4f}'.format(HIT_1), "NDCG@1": '{:.4f}'.format(NDCG_1), "HIT@5": '{:.4f}'.format(HIT_5), "NDCG@5": '{:.4f}'.format(NDCG_5), "HIT@10": '{:.4f}'.format(HIT_10), "NDCG@10": '{:.4f}'.format(NDCG_10), "MRR": '{:.4f}'.format(MRR), } print(post_fix) with open(self.args.log_file, 'a') as f: f.write(str(post_fix) + '\n') return [HIT_1, NDCG_1, HIT_5, NDCG_5, HIT_10, NDCG_10, MRR], str(post_fix)
def main(_): logger.info('Loading Models From {:}'.format(FLAGS.output_dir)) logp_col_name = FLAGS.logp_col if FLAGS.add_logp else None test_dataset = DataSet(csv_file_path=FLAGS.test_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) validation_dataset = DataSet(csv_file_path=FLAGS.validation_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) validation_predictions = np.empty( (len(FLAGS.model_names), validation_dataset.num_examples)) test_predictions_ = np.empty( (len(FLAGS.model_names), test_dataset.num_examples)) for i in xrange(0, len(FLAGS.model_names)): predictions = get_prediction_from_model(FLAGS.model_names[i], FLAGS.model_params[i][0], FLAGS.model_params[i][1], FLAGS.model_params[i][2], test_dataset, validation_dataset) validation_predictions[i, :] = predictions[0] test_predictions_[i, :] = predictions[1] ensemble_predictor = [ ensemble_prediction_rf_regression, ensemble_prediction_top_k, ensemble_prediction_greedy ] predictor_names = ["Random forest regression", "Top 10", "Greedy"] for fun, name in zip(ensemble_predictor, predictor_names): emsemble_preditions = fun(validation_dataset, validation_predictions, test_predictions_) prediction_metric = get_metric(emsemble_preditions, test_dataset.labels) logger.info("Method {:} RMSE: {:}, AAE: {:}, R: {:}".format( name, prediction_metric[0], prediction_metric[1], prediction_metric[2])) final_prediction_path = os.path.join(FLAGS.output_dir, "ensemble_test_prediction.csv") save_results(final_prediction_path, test_dataset.labels, emsemble_preditions) logging.info( "------------------------------DONE------------------------------") logging.info("") logging.info("")
def ensemble_prediction_top_k(validation_dataset, all_validation_predictions, all_test_predictions, k=10): no_of_models = len(all_validation_predictions) errors = [] for i in xrange(0, no_of_models): metric = get_metric(all_validation_predictions[i], validation_dataset.labels) errors.append(metric[0]) errors = np.array(errors) index_of_best_networks = errors.argsort()[:k] # logging.info("Top {:} models: {:}".format(k, index_of_best_networks)) emsemble_preditions = np.mean(all_test_predictions[index_of_best_networks], axis=0) return emsemble_preditions
def main(config): import torch from models import build_model, build_loss from data_loader import get_dataloader from trainer import Trainer from post_processing import get_post_processing from utils import get_metric if torch.cuda.device_count() > 1: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://", world_size=torch.cuda.device_count(), rank=args.local_rank) config['distributed'] = True else: config['distributed'] = False config['local_rank'] = args.local_rank train_loader = get_dataloader(config['dataset']['train'], config['distributed']) assert train_loader is not None if 'validate' in config['dataset']: validate_loader = get_dataloader(config['dataset']['validate'], False) else: validate_loader = None criterion = build_loss(config['loss'].pop('type'), **config['loss']).cuda() config['arch']['backbone']['in_channels'] = 3 if config['dataset'][ 'train']['dataset']['args']['img_mode'] != 'GRAY' else 1 config['arch']['backbone']['pretrained'] = False model = build_model(config['arch']['type'], **config['arch']) post_p = get_post_processing(config['post_processing']) metric = get_metric(config['metric']) trainer = Trainer(config=config, model=model, criterion=criterion, train_loader=train_loader, post_process=post_p, metric_cls=metric, validate_loader=validate_loader) trainer.train()
def __init__(self, model_path, gpu_id=0): from models import get_model from data_loader import get_dataloader from post_processing import get_post_processing from utils import get_metric self.device = torch.device("cuda:%s" % gpu_id) if gpu_id is not None: torch.backends.cudnn.benchmark = True checkpoint = torch.load(model_path, map_location=torch.device('cpu')) config = checkpoint['config'] config['arch']['args']['pretrained'] = False self.validate_loader = get_dataloader(config['dataset']['validate'], config['distributed']) self.model = get_model(config['arch']) self.model.load_state_dict(checkpoint['state_dict']) self.model.to(self.device) self.post_process = get_post_processing(config['post_processing']) self.metric_cls = get_metric(config['metric'])
def single_target_training(cfg, X_train, y_train, X_test, logger): model_name = list(cfg["model"].keys())[0] metric = utils.get_metric(cfg) metric_name = cfg["metric"]["name"] experiment_id = utils.get_experiment_id(cfg) run_name = cfg["mlflow"]["run_name"] logger.info(f"experiment config: {run_name}") logger.info( f"CV method: {cfg['split']['name']} {cfg['split']['params']['n_splits']}-Fold" ) with mlflow.start_run(run_name=run_name, experiment_id=experiment_id): trainer = get_trainer(cfg, model_name, X_train, y_train, X_test) y_oof, models, y_pred = training_step(trainer) metrics = utils.calc_metrics(cfg, metric, y_train.values, y_oof) fig = utils.plot_feature_importance(models, X_train, model_name) logger.info(f"CV score : {metrics[metric_name]}") utils.mlflow_logger(cfg, metrics, fig, targets=None) return y_pred
def multi_target_training(cfg, X_train, y_trains, X_test, logger): model_name = list(cfg["model"].keys())[0] targets = cfg["training"]["targets"] y_oof = np.zeros((len(X_train), len(targets))) y_pred = np.zeros((len(X_test), len(targets))) metric = utils.get_metric(cfg) metric_name = cfg["metric"]["name"] figs = [] experiment_id = utils.get_experiment_id(cfg) run_name = cfg["mlflow"]["run_name"] logger.info(f"experiment config: {run_name}") logger.info( f"CV method: {cfg['split']['name']} {cfg['split']['params']['n_splits']}-Fold" ) with mlflow.start_run(run_name=run_name, experiment_id=experiment_id): for i, target in enumerate(targets): logger.info(f"Training for {target}") y_train = y_trains[target] trainer = get_trainer(cfg, model_name, X_train, y_train, X_test) y_oof_, models, y_pred_ = training_step(trainer) y_oof[:, i] = y_oof_ y_pred[:, i] = y_pred_ fig = utils.plot_feature_importance(models, X_train, model_name, target) figs.append(fig) metrics = utils.calc_metrics(cfg, metric, y_trains.values, y_oof) logger.info(f"CV score : {metrics[metric_name]}") utils.mlflow_logger(cfg, metrics, figs, targets) return y_pred
def train(self, sess, epochs, train_dataset, validation_dataset, output_dir, enable_plotting = 0, Targets_UnNormalization_fn = lambda x:x): ''' Returns: dict with keys ['primary', 'secondacy'] mapping to validation scores (accuracy, auc OR rmse, r2) ''' merged_summaries = tf.summary.merge_all() train_writer = tf.summary.FileWriter(output_dir + '/train', sess.graph) train_metric = self.evaluate(sess, train_dataset, Targets_UnNormalization_fn=Targets_UnNormalization_fn) validation_metric = self.evaluate(sess, validation_dataset, Targets_UnNormalization_fn=Targets_UnNormalization_fn) # train_results_file_path = os.path.join(output_dir, "train_result.csv") if enable_plotting: plt.subplot(2, 1, 1) plt.title('Training data set') plt.axis([0, epochs, 0, train_metric['primary']]) plt.subplot(2, 1, 2) plt.title('Vaidation data set') plt.axis([0, epochs, 0, validation_metric['primary']]) plt.ion() logger.info('Start Training') steps_in_epoch = train_dataset.num_examples // self.batch_size # self.get_g_structure(sess,train_dataset) abort = 0 for epoch in range(0, epochs): if abort: break for i in range(0, steps_in_epoch): feed_dict = self.fill_feed_dict(train_dataset, self.batch_size) none = sess.run([self.train_op], feed_dict=feed_dict) summ = sess.run(merged_summaries) train_writer.add_summary(summ, epoch) train_dataset.reset_epoch(permute=True) sess.run([self.global_step_update_op]) if epoch % 5 == 0: train_metric = self.evaluate(sess, train_dataset, Targets_UnNormalization_fn=Targets_UnNormalization_fn) validation_metric = self.evaluate(sess, validation_dataset, Targets_UnNormalization_fn=Targets_UnNormalization_fn) if enable_plotting: plt.subplot(2, 1, 1) plt.scatter(epoch, train_metric['primary'], color='red', marker=".") plt.scatter(epoch, train_metric['secondary'], color='blue', marker=".") plt.subplot(2, 1, 2) plt.scatter(epoch, validation_metric['primary'], color='red', marker=".") plt.scatter(epoch, validation_metric['secondary'], color='blue', marker=".") plt.pause(0.05) learning_rate = self.get_learning_rate(sess) if np.isnan(train_metric['primary']) and np.isnan(train_metric['secondary']): logger.info("Epoch: {:}, Learning rate {:.8f}, All metrics are NaN: aborting training") abort = 1 break if 'rmse' in train_metric: logger.info( "Epoch: {:}, Learning rate {:.8f}, Train RMSE: {:.4f}, Train R2: {:.4f}, Validation RMSE {:.4f}, Validation R2 {:.4f}". format(epoch, learning_rate[0], train_metric['rmse'], train_metric['r2'], validation_metric['rmse'], validation_metric['r2'], precision=8) ) else: logger.info( "Epoch: {:}, Learning rate {:.8f}, Train Accuracy {:.4f}, Train AUC: {:.4f}, Validation Accuracy {:.4f}, Validation AUC {:.4f}". format(epoch, learning_rate[0], train_metric['accuracy'], train_metric['auc'], validation_metric['accuracy'], validation_metric['auc'], precision=8) ) training_predictions = Targets_UnNormalization_fn(self.predict(sess, train_dataset)) save_results(output_dir, Targets_UnNormalization_fn(train_dataset.labels), training_predictions, additional_str='_train') validation_predictions = Targets_UnNormalization_fn(self.predict(sess, validation_dataset)) save_results(output_dir, Targets_UnNormalization_fn(validation_dataset.labels), validation_predictions, additional_str='_valid') logger.info('Training Finished') return get_metric(training_predictions, Targets_UnNormalization_fn(train_dataset.labels)), get_metric(validation_predictions, Targets_UnNormalization_fn(validation_dataset.labels)) # python dict
def evaluate(self, sess, dataset, Targets_UnNormalization_fn = lambda x:x): predictions = self.predict(sess, dataset) targets = dataset.labels return get_metric(Targets_UnNormalization_fn(predictions), Targets_UnNormalization_fn(targets))
def main(config): import torch from models import build_model, build_loss from data_loader import get_dataloader from trainer import Trainer from post_processing import get_post_processing from utils import get_metric from utils import setup_logger if torch.cuda.device_count() > 1: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://", world_size=torch.cuda.device_count(), rank=args.local_rank) config['distributed'] = True else: config['distributed'] = False config['local_rank'] = args.local_rank config['arch']['backbone']['in_channels'] = 3 if config['dataset'][ 'train']['dataset']['args']['img_mode'] != 'GRAY' else 1 model = build_model(config['arch']) if config['local_rank'] == 0: save_dir = os.path.join(config['trainer']['output_dir'], config['name'] + '_' + model.name) if not os.path.exists(save_dir): os.makedirs(save_dir) logger = setup_logger(os.path.join(save_dir, 'train.log')) if 'evolve' in config.keys( ) and config['evolve']['flag'] and not config['distributed']: meta = { 'optimizer.args.lr': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lr_scheduler.args.warmup_epoch': (1, 0, 5), 'loss.alpha': (1, 0.5, 3), 'loss.beta': (2, 5, 20), 'loss.ohem_ratio': (1, 1, 5), 'post_processing.args.box_thresh': (0.3, 0.4, 1.0), 'dataset.train.dataset.args.pre_processes.[1].args.min_crop_side_ratio': (1, 0.1, 0.9), 'dataset.train.dataset.args.pre_processes.[2].args.thresh_max': (0.3, 0.4, 1.0), } # image mixup (probability) config['notest'] = True config['nosave'] = True saved_path = os.path.join(config['trainer']['output_dir'], config['name'] + '_' + model.name) if not os.path.exists(os.path.join(saved_path, 'evolve')): os.makedirs(os.path.join(saved_path, 'evolve')) yaml_file = os.path.join(saved_path, 'evolve', 'hyp_evolved.yaml') evolve_file = os.path.join(saved_path, 'evolve', 'evolve.txt') for _ in range(300): if os.path.exists(evolve_file): parent = 'single' x = np.loadtxt(evolve_file, ndmin=2) n = min(5, len(x)) x = x[np.argsort(-fitness(x))][:n] w = fitness(x) - fitness(x).min() if len(x) == 1: x = x[0] elif parent == 'single': # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape( n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([x[0] for x in meta.values()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all( v == 1 ): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) # for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) # hyp[k] = float(x[i + 7] * v[i]) # mutate for i, k in enumerate(meta.keys()): config_keys = k.split('.') str_config = 'config' for config_key in config_keys: if config_key.startswith('[') and config_key.endswith( ']'): str_config = str_config + config_key else: str_config = str_config + '[\'' + config_key + '\']' exec(str_config + '=x[i]*v[i]') meta_value = [] for k, v in meta.items(): config_keys = k.split('.') str_config = 'config' for config_key in config_keys: if config_key.startswith('[') and config_key.endswith(']'): str_config = str_config + config_key else: str_config = str_config + '[\'' + config_key + '\']' # str_config = 'config[\'' + '\'][\''.join(k.split('.')) + '\']' exec('print(' + str_config + ')') exec(str_config + '=max(' + str_config + ', v[1])') exec(str_config + ' = min(' + str_config + ', v[2])') exec(str_config + ' = round(' + str_config + ', 5)') exec('meta_value.append(' + str_config + ')') # hyp[k] = max(hyp[k], v[1]) # lower limit # hyp[k] = min(hyp[k], v[2]) # upper limit # hyp[k] = round(hyp[k], 5) # significant digits train_loader = get_dataloader(config['dataset']['train'], config['distributed']) assert train_loader is not None if 'validate' in config['dataset']: validate_loader = get_dataloader(config['dataset']['validate'], False) else: validate_loader = None criterion = build_loss(config['loss']).cuda() post_p = get_post_processing(config['post_processing']) metric = get_metric(config['metric']) trainer = Trainer( config=config, model=model, criterion=criterion, train_loader=train_loader, post_process=post_p, metric_cls=metric, validate_loader=validate_loader, logger=(logger if config['local_rank'] == 0 else None)) results = trainer.train() print_mutation(results, yaml_file, evolve_file, meta_value) else: train_loader = get_dataloader(config['dataset']['train'], config['distributed']) assert train_loader is not None if 'validate' in config['dataset']: validate_loader = get_dataloader(config['dataset']['validate'], False) else: validate_loader = None criterion = build_loss(config['loss']).cuda() post_p = get_post_processing(config['post_processing']) metric = get_metric(config['metric']) trainer = Trainer( config=config, model=model, criterion=criterion, train_loader=train_loader, post_process=post_p, metric_cls=metric, validate_loader=validate_loader, logger=(logger if config['local_rank'] == 0 else None)) trainer.train()
def BootstrapCI(pred1, labels, score_func, pred2=None, cluster=None, type_of_ci='bca', confidence_level=0.95, sample_size=None, num_bootstrap=2000): # ensure all input are converted into numpy for convenience reasons pred1 = np.array(pred1).reshape(-1) labels = np.array(labels).reshape(-1) if pred2 is not None: pred2 = np.array(pred2).reshape(-1) if cluster is not None: cluster = np.array(cluster).reshape(-1) # check the validity of arguments assert len(pred1) == len( labels ), f"There are {len(pred1)} predictions but {len(labels)} ground truth entries." # check if the second model has same number of outputs if pred2 is not None: assert len(pred1) == len( pred2 ), f"There are {len(pred1)} predictions from model 1 but {len(pred2)} predictions from model 2." # check if we are using the correct ci method if type_of_ci.startswith("paired") and pred2 is None: raise ValueError( "Predictions from a second model is required to compute paired confidence intervals." ) if not type_of_ci.startswith("paired") and pred2 is not None: raise ValueError( "Non-paired confidence intervals cannot be applied to a pair of model outputs." ) assert 0.0 < confidence_level < 1.0, "Confidence level must be within range of [0.0, 1.0]" if cluster is None and type_of_ci.startswith('cluster'): raise ValueError( "If no clustering info is provided please use non-clustered CI methods for better performance." ) # get the score function if it is supported by sklearn if isinstance(score_func, str): try: score_func = get_metric(score_func) except AttributeError: print( f"Specified metric \"{score_func}\" is not supported. Please refer to the documentation for available metrics or build your own." ) exit(0) # get the bootstrap sample size if not specified if sample_size is None: sample_size = len(labels) if pred2 is None: preds = (pred1, ) else: preds = (pred1, pred2) # run the statistical test ci_func = get_ci(type_of_ci) lo, hi, scores = ci_func(*preds, labels, score_func, cluster, confidence_level, sample_size, num_bootstrap) return lo, hi, scores
filename = args.filename if args.filename else 'output.csv' # get all instances instances = utils.get_all_instances(resource) # process and write to csv with open(filename, 'w') as csvfile: # initialize csv writer csvwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # write the headers to csv csvwriter.writerow(utils.csv_headers[resource]) # loop through each instance for instance in instances: # get datapoints and process if resource == 'ec2': instance_id = instance.id elif resource == 'rds': instance_id = instance['DBInstanceIdentifier'] result = utils.get_metric(resource, instance_id, period, days) item_list_arr = utils.process_metric(result) # write metrics to csv utils.write_to_csv(resource, csvwriter, instance, item_list_arr) print('CSV file %s created.' % filename)
def evaluate(self, sess, dataset): predictions = self.predict(sess, dataset) targets = dataset.labels return get_metric(predictions, targets)
def validation(self, epoch, result_save_path=None): print('Validation :') self.model.eval() metric_all = np.zeros(2) for index, val_loader in enumerate(self.val_loaders): dataset = self.p['val_datasets'][index].split('/')[-1] print('Validation [{}]'.format(dataset)) result_save_path_tmp = None if result_save_path is not None: result_save_path_tmp = os.path.join(result_save_path, dataset) os.makedirs(result_save_path_tmp, exist_ok=True) loss_total = 0 tbar = tqdm(val_loader) mae_avg, f_score_avg = 0, 0 for i, sample_batched in enumerate(tbar): input = self.model.get_input(sample_batched) gt = self.model.get_gt(sample_batched) with torch.no_grad(): output = self.model(input) loss = self.model.get_loss(output, gt) loss_total += loss.item() tbar.set_description('Loss: {:.3f}'.format(loss_total / (i + 1))) result = self.model.get_result(output) mae, f_score = utils.get_metric(sample_batched, result, result_save_path_tmp) mae_avg, f_score_avg = mae_avg + mae, f_score_avg + f_score print('Loss: %.3f' % (loss_total / (i + 1))) mae_avg, f_score_avg = mae_avg / len(tbar), f_score_avg / len(tbar) metric = np.array([mae_avg, f_score_avg.max().item()]) print('[{}]-> mae:{:.4f} f_max:{:.4f}'.format( dataset, metric[0], metric[1])) metric_all += metric metric_all = metric_all / len(self.val_loaders) is_best = utils.metric_better_than(metric_all, self.best_metric) self.best_metric = metric_all if is_best else self.best_metric print('Metric_Select[MAE]: {:.4f} ({:.4f})'.format( metric_all[0], self.best_metric[0])) pth_state = { 'current_epoch': epoch, 'best_metric': self.best_metric, 'model': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'scheduler': self.scheduler.state_dict() } if self.p['if_save_checkpoint']: torch.save(pth_state, os.path.join(self.p['snapshot_path'], 'checkpoint.pth')) if is_best: torch.save(pth_state, os.path.join(self.p['snapshot_path'], 'best.pth')) if self.p['if_use_tensorboard']: self.writer.add_scalar('Loss/test', (loss_total / (i + 1)), epoch) self.writer.add_scalar('Metric/mae', metric_all[0], epoch) self.writer.add_scalar('Metric/f_max', metric_all[1], epoch)
def evaluate(self, X, y_true, metric="accuracy_score"): y_pred = self.predict(X) if (len(y_pred.shape) == 2 and y_pred.shape[1] > 1 and (len(y_true.shape) == 1 or y_true.shape[1] == 1)): y_true = one_hot(y_true) return get_metric(metric)(y_true, y_pred)