def train_test_base(X_train, X_test, y_train, y_test, task, name, inputs, seed): mtl = 1 if y_test.shape[1] > 1 else 0 # multi-label if name == 'lr': # print('Start training Logistic Regression:') model = LogisticRegression() else: # print('Start training Random Forest:') model = RandomForestClassifier() if mtl: model = OneVsRestClassifier(model) else: y_train, y_test = y_train[:, 0], y_test[:, 0] t0 = time.time() model.fit(X_train, y_train) t1 = time.time() # print('Running time:', t1 - t0) probs = model.predict_proba(X_test) metrics = [] if mtl: for idx in range(y_test.shape[1]): metric = cal_metric(y_test[:, idx], probs[:, idx]) # print(idx + 1, metric) metrics.append(metric) # print('Avg', np.mean(metrics, axis=0).tolist()) else: metric = cal_metric(y_test, probs[:, 1]) f1, auc, aupr = metric # print(metric) print(f'{task},{name},{inputs},{seed},{f1},{auc},{aupr}')
def convert(self, kind="user"): """Convert underlying metric objects. Conversion to user format returns a dictionary with each element mapping metric name to metric value. Conversion to db format returns a list of dictionaries, each with keys "name", "scoring", and "value" mapping to their respective values. Both formats convert np.floating values to Python floats. Parameters ---------- kind : str One of "user" or "db" """ if kind=="user": metrics = {} for m in self._list: metrics.update(m.convert(kind="user")) elif kind=="db": metrics = [] for m in self._list: metrics.append(m.convert(kind="db")) else: ValueError("Bad kind: {}".format(kind)) return metrics
def train(projectname, models, x_train, y_train): for k in range(MODEL_NUMBER): mn0, mn1, metrics0, metrics1 = [], [], [], [] print("Training ", k, "th model...") for i in range(y_train.shape[0]): if y_train[i] == 0: mn0.append(x_train[0][i]) metrics0.append(x_train[1][i]) else: mn1.append(x_train[0][i]) metrics1.append(x_train[1][i]) size = (int)(len(mn1) * SUBSETSIZE) temp_set = [] mn0 = np.array(mn0) mn1 = np.array(mn1) metrics0 = np.array(metrics0) metrics1 = np.array(metrics1) indices0 = np.arange(mn0.shape[0]) indices1 = np.arange(mn1.shape[0]) #np.random.shuffle(indices0) #np.random.shuffle(indices1) indices0 = shuffle(indices0) indices1 = shuffle(indices1) mn0 = mn0[indices0[:size]] mn1 = mn1[indices1[:size]] metrics0 = metrics0[indices0[:size]] metrics1 = metrics1[indices1[:size]] temp_set = [] for i in range(size): temp_set.append([mn0[i], metrics0[i], 0]) temp_set.append([mn1[i], metrics1[i], 1]) np.random.shuffle(temp_set) y = [] mn = [] metrics = [] for i in range(len(temp_set)): mn.append(temp_set[i][0]) metrics.append(temp_set[i][1]) y.append(temp_set[i][2]) mn = np.array(mn) metrics = np.array(metrics) x = [mn, metrics] y = np.array(y) models[k].fit(x, y, epochs=10, batch_size=5, verbose=0) json_string = models[k].to_json() open( 'D:/TSE/python/largeclass/model/' + projectname + '-' + (str)(k) + '.json', 'w').write(json_string) models[k].save_weights('D:/TSE/python/largeclass/model/' + projectname + '-' + (str)(k) + '.h5') return models
def train_test_base(X_train, X_test, y_train, y_test, name): mtl = 1 if y_test.shape[1] > 1 else 0 # multi-label if name == 'lr': print('Start training Logistic Regression:') model = LogisticRegression() param_grid = {'penalty': ['l1', 'l2']} else: print('Start training Random Forest:') model = RandomForestClassifier() param_grid = { 'n_estimators': [x for x in range(20, 40, 5)], 'max_depth': [None, 20, 40, 60, 80, 100] } if mtl: model = OneVsRestClassifier(model) else: y_train, y_test = y_train[:, 0], y_test[:, 0] t0 = time.time() gridsearch = GridSearchCV(model, param_grid, scoring='roc_auc', cv=5) gridsearch.fit(X_train, y_train) model = gridsearch.best_estimator_ t1 = time.time() print('Running time:', t1 - t0) probs = model.predict_proba(X_test) metrics = [] if mtl: for idx in range(y_test.shape[1]): metric = cal_metric(y_test[:, idx], probs[:, idx]) print(idx + 1, metric) metrics.append(metric) print('Avg', np.mean(metrics, axis=0).tolist()) else: metric = cal_metric(y_test, probs[:, 1]) print(metric)
def average_metric(true: list, predicted: list, metric) -> float: """ Computes an average metric from a list of true and predicted values. This function iterates from 1 to `len(predicted)` where in each iteration computes the metric over the true and `predicted[:current_index]`, at the end an average metric is calculated. Parameters ---------- true : list of str List of true elements. predicted : list of str List of predicted elements. metric : callable A metric function. Returns ------- float Average metric. """ metrics = [] for k in range(1, len(predicted) + 1): predicted_k = predicted[:k] p = metric(true, predicted_k) metrics.append(p) try: return statistics.mean(metrics) except statistics.StatisticsError: return 0.0
def test(models, x_test, y_test): mn0, mn1, metrics0, metrics1 = [], [], [], [] for i in range(y_test.shape[0]): if y_test[i] == 0: mn0.append(x_test[0][i]) metrics0.append(x_test[1][i]) else: mn1.append(x_test[0][i]) metrics1.append(x_test[1][i]) size = (int)(len(mn0) / 0.9329 * 0.0671 + 0.5) temp_set = [] mn0 = np.array(mn0) mn1 = np.array(mn1) metrics0 = np.array(metrics0) metrics1 = np.array(metrics1) indices0 = np.arange(mn0.shape[0]) indices1 = np.arange(mn1.shape[0]) np.random.shuffle(indices0) np.random.shuffle(indices1) mn1 = mn1[indices1[:size]] metrics1 = metrics1[indices1[:size]] temp_set = [] for i in range(mn0.shape[0]): temp_set.append([mn0[i], metrics0[i], 0]) for i in range(size): temp_set.append([mn1[i], metrics1[i], 1]) np.random.shuffle(temp_set) print('---', len(temp_set), '---') y = [] mn = [] metrics = [] for i in range(len(temp_set)): mn.append(temp_set[i][0]) metrics.append(temp_set[i][1]) y.append(temp_set[i][2]) mn = np.array(mn) metrics = np.array(metrics) x = [mn, metrics] y = np.array(y) predict = [] for i in range(MODEL_NUMBER): predict.append(models[i].predict(x)) y_pre = [] for i in range(y.shape[0]): t = 0.0 for j in range(MODEL_NUMBER): t += predict[j][i] y_pre.append(t / MODEL_NUMBER) return eval(y_pre, y)
def predict_original_for_loader(loaded_models, dataloader, config, is_pseudo): predicts = defaultdict(lambda: {'original': defaultdict(dict), 'target': defaultdict(dict), 'processed': defaultdict(dict)}) predicts_each = defaultdict() metrics = [] for i, key in enumerate(loaded_models.keys()): print('【%d/%d】' % (i+1, len(loaded_models.keys()))) model = loaded_models[key]['pseudo_model' if is_pseudo else 'model'] for images, targets, image_ids in tqdm.tqdm(dataloader): image_id = image_ids[0] preds, _ = model(images, targets) metrics.append(calculate_score_for_each(preds, targets)) if i == 0: predicts[image_id]['target']['boxes'] = targets[0]['boxes'] predicts_each[image_id] = defaultdict(dict) predicts_each[image_id][key]['boxes'] = preds[0]['boxes'] predicts_each[image_id][key]['scores'] = preds[0]['scores'] for image_id, d in predicts_each.items(): idx = 0 all_empty = False while not all_empty: top_scores = [] top_boxes = [] for key in predicts_each[image_id].keys(): if d[key]['scores'].shape[0] <= idx: continue top_scores.append(d[key]['scores'][idx]) top_boxes.append(d[key]['boxes'][idx, :]) if len(top_scores) == 0: all_empty = True continue top_scores = np.array(top_scores) top_boxes = np.array(top_boxes) top_sorted_idx = np.argsort(top_scores)[::-1] top_boxes = top_boxes[top_sorted_idx, :] top_scores = top_scores[top_sorted_idx] if 'boxes' not in predicts[image_id]['original']: predicts[image_id]['original']['boxes'] = top_boxes predicts[image_id]['original']['scores'] = top_scores else: if config['apply']: keeped_top_score = predicts[image_id]['original']['scores'][-1] top_scores -= np.max([0.0, (top_scores[0] - keeped_top_score + config['subtraction'])]) predicts[image_id]['original']['boxes'] = np.concatenate([predicts[image_id]['original']['boxes'], top_boxes], axis=0) predicts[image_id]['original']['scores'] = np.concatenate([predicts[image_id]['original']['scores'], top_scores], axis=0) sorted_idx = np.argsort(predicts[image_id]['original']['scores'])[::-1] predicts[image_id]['original']['boxes'] = predicts[image_id]['original']['boxes'][sorted_idx, :] predicts[image_id]['original']['scores'] = predicts[image_id]['original']['scores'][sorted_idx] idx += 1 return predicts, np.array(metrics)
def fit(self, X, Y): """ Self explanatory @param: X (np.ndarray) @param: Y (np.ndarray) @returns: results (pd.DataFrame) with all cv results """ assert(X.shape[0] == Y.shape[0]) self.splits = list(self.kfold.split(X)) # set up results dictionary results = {"params": [], "mean_test_score": []} for param_title in self.parameters.keys(): results[f"param_{param_title}"] = [] for i in range(self.kfold.get_n_splits()): results[f"test{i}_score"] = [] self.best_estimator_, self.best_score_, self.best_params_ = None, np.inf, None for params in itertools.product(*self.parameters.values()): # set param param_dict = {} for param_idx, param_title in enumerate(self.parameters.keys()): results[f"param_{param_title}"].append(params[param_idx]) param_dict[param_title] = params[param_idx] results["params"].append(param_dict) # perform split models, metrics = [], [] for split_idx, elem in enumerate(self.splits): train_idx, valid_idx = elem # model = self.model_func(param_dict, self.feat_params[split_idx]) model = base.clone(self.model) model = model.set_params(**{**param_dict, **self.feat_params[split_idx]}) Xtrain, Xtest = X[train_idx], X[valid_idx] Ytrain, Ytest = Y[train_idx], Y[valid_idx] if self.input_valid: model.fit(Xtrain, Ytrain.flatten(), Xtest, Ytest.flatten()) else: model.fit(Xtrain, Ytrain.flatten()) pred = model.predict(Xtest) metric = self.metric_func(Ytest, pred) # bookkeeping models.append(model) metrics.append(metric) results[f"test{split_idx}_score"].append(metric) avg_score = np.average(metrics) results["mean_test_score"].append(avg_score) if avg_score < self.best_score_: self.best_score_ = avg_score self.best_estimator_ = models[np.argmin(metrics)] self.best_params_ = param_dict return pd.DataFrame(results)
def evaluate_metrics_by_forecast_horizon(ds, column=None, model=sklearn.linear_model.LinearRegression(), metric=sklearn.metrics.r2_score, fit_set='training', predict_set='test'): logger = logging.getLogger() if column is None: column = ds.input_all.columns if not checks.is_iterable_not_string(column): column = [column] assert fit_set in ('training', 'validation', 'test') assert predict_set in ('training', 'validation', 'test') if fit_set == 'training': fit_sets = ds.training_set all_fit_sets = ds.all_training_sets elif fit_set == 'validation': fit_sets = ds.validation_set all_fit_sets = ds.all_validation_sets else: fit_sets = ds.test_set all_fit_sets = ds.all_test_sets if predict_set == 'training': predict_sets = ds.training_set elif predict_set == 'validation': predict_sets = ds.validation_set else: predict_sets = ds.test_set logger.info('Evaluating the metric for column(s) %s' % ', '.join(['"%s"' % c for c in column])) metrics = [] if len(fit_sets) == len(predict_sets): for fs, ps in zip(fit_sets, predict_sets): x_train = fs.input[column].values y_train = fs.output.values model.fit(x_train, y_train) x_predict = ps.input[column].values y_predict = ps.output.values y_predict_pred = model.predict(x_predict) predict_set_metrics = [] for i in range(len(ds.forecast_horizon)): predict_set_metrics.append(metric(y_predict[:,i], y_predict_pred[:,i])) metrics.append(predict_set_metrics) else: x_train = all_fit_sets.input[column].values y_train = all_fit_sets.output.values model.fit(x_train, y_train) for ps in predict_sets: x_predict = ps.input[column].values y_predict = ps.output.values y_predict_pred = model.predict(x_predict) predict_set_metrics = [] for i in range(len(ds.forecast_horizon)): predict_set_metrics.append(metric(y_predict[:,i], y_predict_pred[:,i])) metrics.append(predict_set_metrics) # The mean is taken over the predict sets return np.mean(metrics, axis=0)
def calculate_metrics(self, y_predicted: np.array, y_true: np.array) -> List[ClassMetrics]: classes_num = self.get_classes_num(y_true) metrics = [] for class_idx in range(classes_num): class_stats = self.calculate_class_metrics(y_predicted, y_true, class_idx) metrics.append(class_stats) return metrics
def evaluate_significance(null_predictions, *, metric=sklearn.metrics.balanced_accuracy_score): """ Prints several summary metrics of classification performance. Parameters ---------- null_predictions : list Length [n_perms+1,]. Each item is a [labels, prediction] pair, with `predictions[0]` being the unshuffled result (typically the output of `compute_null_predictions()`). metric : optional Function conforming to the `sklearn.metrics` interface. Returns ------- metrics : np.array `metric` calculated for every item in `null_predictions`. """ metrics = [] for labels, predictions in null_predictions: # Reorder for sklearn # Easier to convert to string and let sklearn sort a common coding predictions = utils.to_categorical(predictions, to_string=True) # Not strictly necessary, but throws error on missing entries labels = labels[predictions.index.levels[-1]] labels = utils.to_categorical(labels, to_string=True) labels = labels.reindex(predictions.index, level=-1) metrics.append(metric(labels, predictions)) metrics = np.asarray(metrics) # Summarise print("True accuracy: {: .2f}".format(metrics[0])) print("Null accuracy [+/- s.d.]: {: .2f} [+/- {:.2f}]".format( np.mean(metrics[1:]), np.std(metrics[1:]))) print("Approx (2.5%, 97.5%) CI: {: .2f}, {:.2f}".format( np.percentile(metrics[1:], 2.5), np.percentile(metrics[1:], 97.5))) # Include true in permutation distribution # Phipson & Smyth, 2010: https://doi.org/10.2202/1544-6115.1585 # https://stats.stackexchange.com/a/112352 k = np.sum(metrics >= metrics[0]) n = len(metrics) print("p(True > Null) [95% CI]: {: .3f} [{:.2e}, {:.2e}]".format( k / n, *scipy.stats.beta.interval(0.95, k, n - k))) print() return metrics
def get_metric_options() -> typing.List[typing.Dict[str, str]]: """ Returns: A list of all metrics that may be used in a format that may be read by other apps and interpretted """ metrics: typing.List[typing.Dict[str, str]] = list() for metric in get_all_metrics(): metrics.append({ "name": metric.get_name(), "description": metric.get_descriptions(), "identifier": metric.get_identifier() }) return metrics
def _bootstrap(metric: Metric, y_true: np.ndarray, y_pred: np.ndarray, n_bootstrap: int, conf_interval: float, seed: int, **kwargs) -> BootstrapResults: """Performs bootstrapping on a given `Metric`. Args: metric: An instance of a `Metric`. y_true: Ground truth (correct) target values. y_pred: Estimated targets as returned by a classifier. n_bootstrap: An integer denoting the number of bootstrap iterations. conf_interval: A float denoting the width of confidence interval. seed: An int denoting the seed for the PRNG. **kwargs: Additional keyword arguments passed to each Metric's `func`. Returns: A BootstrapResults namedtuple tuple of the mean, standard deviation, and lower and upper bounds for conf_interval of the `Metric` over `n_bootstrap` bootstrapping iterations. If n_bootstrap=0, i.e., no bootstrapping is used, the returned standard deviation and lower and upper bounds are numpy.nan. """ if n_bootstrap == 0: return BootstrapResults(metric(y_true, y_pred, **kwargs), np.nan, np.nan, np.nan, np.nan) prng = np.random.RandomState(seed) lo_perc = (100 - conf_interval) / 2 hi_perc = 100 - lo_perc metrics = [] num_observations = len(y_pred) while len(metrics) < n_bootstrap: idx = prng.randint(0, high=num_observations, size=num_observations) sample_true = y_true[idx] sample_preds = y_pred[idx] if metric.binary_only and len(np.unique(sample_true)) < 2: continue metrics.append(metric(sample_true, sample_preds, **kwargs)) metric_mean = np.mean(metrics, axis=0) metric_std = np.std(metrics, axis=0) metric_lo, metric_hi = np.percentile(metrics, [lo_perc, hi_perc], axis=0) return BootstrapResults(metric_mean, metric_std, conf_interval, metric_lo, metric_hi)
def run_check(check_file: str): gt_df = model.widerface.WIDERFACEDataset( root='data/WIDER_val/images', meta='data/wider_face_split/wider_face_val_bbx_gt.txt') check_df = model.widerface.WIDERFACEDataset( root='./', meta=check_file) metrics = [] pred = [] labels = [] for idx, image in enumerate(check_df): bboxes = [[b['x'], b['y'], b['w'], b['h']] for b in image.bboxes] scores = [b['blur'] for b in image.bboxes] gt_image = gt_df[image.filename] gt_bboxes = [[b['x'], b['y'], b['w'], b['h']] for b in gt_image.bboxes] if len(gt_bboxes) > len(bboxes): for _ in range(0, len(gt_bboxes) - len(bboxes)): metrics.append(0) pred.append(0) labels.append(1) if len(bboxes) == 0: continue iou = iou_scores(bboxes, gt_bboxes) for iou_idx, iou_score in enumerate(iou): metrics.append(scores[iou_idx]) if iou_score >= 0.5: pred.append(1) labels.append(1) else: pred.append(1) labels.append(0) metrics = np.array(metrics) labels = np.array(labels) pred = np.array(pred) p, r, _ = sklearn.metrics.precision_recall_curve(labels, metrics) print('AP:', sklearn.metrics.auc(r, p))
def objective(cls, args): metrics = [] for seed in range(1, cls.cv_fold+1): model = cls.interpreter(args[0]) data_path = args[1] targets_path = args[2] X_train, Y_train, X_test, Y_test = cls.load_data( data_path=data_path, targets_path=targets_path, train_set = cls.train_set, random_state=seed ) model.fit(X_train, Y_train) metric = cls.evaluate(model, X_test, Y_test) metrics.append(metric) metrics_mean = np.array(metrics).mean() print(metrics_mean) return 1 - metrics_mean
def evaluate_metrics_by_forecast_horizon( ds, column=None, model=sklearn.linear_model.LinearRegression(), metric=sklearn.metrics.r2_score): logger = logging.getLogger() if column is None: column = ds.input_all.columns if not checks.is_iterable_not_string(column): column = [column] logger.info('Evaluating the metric for column(s) %s' % ', '.join(['"%s"' % c for c in column])) metrics = [] if len(ds.training_set) == len(ds.validation_set): for ts, vs in zip(ds.training_set, ds.validation_set): x_train = ts.input[column].values y_train = ts.output.values model.fit(x_train, y_train) x_validation = vs.input[column].values y_validation = vs.output.values y_validation_pred = model.predict(x_validation) validation_set_metrics = [] for i in range(len(ds.forecast_horizon)): validation_set_metrics.append( metric(y_validation[:, i], y_validation_pred[:, i])) metrics.append(validation_set_metrics) else: x_train = ds.all_training_sets.input[column].values y_train = ds.all_training_sets.output.values model.fit(x_train, y_train) for vs in ds.validation_set: x_validation = vs.input[column].values y_validation = vs.output.values y_validation_pred = model.predict(x_validation) validation_set_metrics = [] for i in range(len(ds.forecast_horizon)): validation_set_metrics.append( metric(y_validation[:, i], y_validation_pred[:, i])) metrics.append(validation_set_metrics) # The mean is taken over the validation sets return np.mean(metrics, axis=0)
def _build_default_metrics(binary: bool) -> List[Metric]: """Builds and returns the default set of `Metric`s.""" metrics = [ Metric('num', lambda y_true, y_pred: len(y_true), binary_only=binary) ] if binary: metrics.extend([ Metric('auc', sklearn.metrics.roc_auc_score, binary_only=True), Metric('auprc', sklearn.metrics.average_precision_score, binary_only=True), TopPercentileMetric('freq', compute_frequency, binary_only=True, top_percentile=100), ]) for top_percentile in [10, 5, 1]: metrics.append( TopPercentileMetric('freq @{:04.1f}'.format(top_percentile), compute_frequency, binary_only=True, top_percentile=top_percentile)) else: metrics.extend([ Metric('pearson', sp.stats.pearsonr, binary_only=False), Metric('spearman', sp.stats.spearmanr, binary_only=False), Metric('mse', sklearn.metrics.mean_squared_error, binary_only=False), Metric('mae', sklearn.metrics.mean_absolute_error, binary_only=False), ]) return metrics
def _create_metrics(self, folder): columns = ["F1 score", "Matthews"] indices = [ self._get_classifier_name(index) for index in self.classifier_indices ] metrics = pd.DataFrame(columns=columns) LOG.info("Start creating metrics") for index in self.classifier_indices: tmp = np.empty((0, 2)) classifier = self.classifiers[index] Xtrain, ytrain, Xtest, ytest = self.dataset.get_dataset() for i in range(self.experiments): LOG.info("Experiment {}/{}".format( index * i + i, self.experiments * len(self.classifier_indices))) classifier.fit(Xtrain, ytrain) prediction = classifier.predict(Xtest) f1_score, matthews = get_metrics(prediction, ytest) entry = np.array([[f1_score, matthews]]) tmp = np.concatenate((tmp, entry)) mean = np.mean(tmp, axis=0) metrics = metrics.append( pd.DataFrame(np.reshape(mean, (1, 2)), index=[self._get_classifier_name(index)], columns=columns)) basename = self._get_basename() name = "metrics_" + basename + "_" + ".csv" path = os.path.join(folder, name) LOG.info("Saving metrics as: {}".format(path)) metrics.to_csv(path, sep=',')
def train(model, dataset, cfg): print("Our config:") pprint.pprint(cfg) dataset_name = cfg.dataset + "-" + cfg.model + "-" + cfg.name device = 'cuda' if cfg.cuda else 'cpu' if not torch.cuda.is_available() and cfg.cuda: device = 'cpu' print( "WARNING: cuda was requested but is not available, using cpu instead." ) print(f'Using device: {device}') print(cfg.output_dir) if not exists(cfg.output_dir): os.makedirs(cfg.output_dir) # Setting the seed np.random.seed(cfg.seed) random.seed(cfg.seed) torch.manual_seed(cfg.seed) if cfg.cuda: torch.cuda.manual_seed_all(cfg.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Dataset train_size = int(0.8 * len(dataset)) valid_size = len(dataset) - train_size torch.manual_seed(cfg.seed) train_dataset, valid_dataset = torch.utils.data.random_split( dataset, [train_size, valid_size]) #disable data aug valid_dataset.data_aug = None # fix labels train_dataset.labels = dataset.labels[train_dataset.indices] valid_dataset.labels = dataset.labels[valid_dataset.indices] # Dataloader train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=cfg.shuffle, num_workers=cfg.threads, pin_memory=cfg.cuda) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=cfg.batch_size, shuffle=cfg.shuffle, num_workers=cfg.threads, pin_memory=cfg.cuda) #print(model) # Optimizer optim = torch.optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=1e-5, amsgrad=True) print(optim) criterion = torch.nn.BCEWithLogitsLoss() # Checkpointing start_epoch = 0 best_metric = 0. weights_for_best_validauc = None auc_test = None metrics = [] weights_files = glob(join( cfg.output_dir, f'{dataset_name}-e*.pt')) # Find all weights files if len(weights_files): # Find most recent epoch epochs = np.array([ int(w[len(join(cfg.output_dir, f'{dataset_name}-e')):-len('.pt')]. split('-')[0]) for w in weights_files ]) start_epoch = epochs.max() weights_file = [ weights_files[i] for i in np.argwhere(epochs == np.amax(epochs)).flatten() ][0] model.load_state_dict(torch.load(weights_file).state_dict()) with open(join(cfg.output_dir, f'{dataset_name}-metrics.pkl'), 'rb') as f: metrics = pickle.load(f) best_metric = metrics[-1]['best_metric'] weights_for_best_validauc = model.state_dict() print("Resuming training at epoch {0}.".format(start_epoch)) print("Weights loaded: {0}".format(weights_file)) model.to(device) for epoch in range(start_epoch, cfg.num_epochs): avg_loss = train_epoch(cfg=cfg, epoch=epoch, model=model, device=device, optimizer=optim, train_loader=train_loader, criterion=criterion) auc_valid = valid_test_epoch(name='Valid', epoch=epoch, model=model, device=device, data_loader=valid_loader, criterion=criterion)[0] if np.mean(auc_valid) > best_metric: best_metric = np.mean(auc_valid) weights_for_best_validauc = model.state_dict() torch.save(model, join(cfg.output_dir, f'{dataset_name}-best.pt')) # only compute when we need to stat = { "epoch": epoch + 1, "trainloss": avg_loss, "validauc": auc_valid, 'best_metric': best_metric } metrics.append(stat) with open(join(cfg.output_dir, f'{dataset_name}-metrics.pkl'), 'wb') as f: pickle.dump(metrics, f) torch.save(model, join(cfg.output_dir, f'{dataset_name}-e{epoch + 1}.pt')) return metrics, best_metric, weights_for_best_validauc
def predict_sentences_reporting_bias(negative_sample_weighting=1, number_of_models=1, positives_per_pdf=1): X, y, X_sents, vec, study_sent_indices = _get_sentence_level_X_y() kf = KFold(len(study_sent_indices), n_folds=5, shuffle=True) metrics = [] for fold_i, (train, test) in enumerate(kf): print "making test sentences" test_indices = [study_sent_indices[i] for i in test] train_indices = [study_sent_indices[i] for i in train] X_sents_test = sublist(X_sents, test_indices) # [X_sents[i] for i in test] print "done!" # pdb.set_trace() # print "generating split" X_train = X[np_indices(train_indices)] y_train = y[np_indices(train_indices)] X_test = X[np_indices(test_indices)] y_test = y[np_indices(test_indices)] # print "done!" all_indices = np.arange(len(y_train)) train_positives = np.nonzero(y_train)[0] train_negatives = all_indices[~train_positives] total_positives = len(train_positives) if (negative_sample_weighting * total_positives) > len(train_negatives): sample_negative_examples = len(train_negatives) else: sample_negative_examples = negative_sample_weighting * total_positives models = [] print "fitting models..." p = progressbar.ProgressBar(number_of_models, timer=True) for model_no in range(number_of_models): p.tap() train_negatives_sample = np.random.choice(train_negatives, sample_negative_examples, replace=False) train_sample = np.concatenate([train_positives, train_negatives_sample]) clf = SGDClassifier(loss="hinge", penalty="l2") clf.fit(X_train[train_sample], y_train[train_sample]) models.append(clf) TP = 0 FP = 0 TN = 0 FN = 0 print "testing..." p = progressbar.ProgressBar(len(test_indices), timer=True) for start, end in test_indices: p.tap() study_X = X[np_indices((start, end))] study_y = y[np_indices((start, end))] preds_all = np.mean([clf.predict(study_X) for clf in models], 0) max_indices = preds_all.argsort()[-positives_per_pdf:][::-1] + start real_index = np.where(study_y == 1)[0][0] + start if real_index in max_indices: TP += 1 TN += len(study_y) - positives_per_pdf FP += positives_per_pdf - 1 # FN += 0 else: # TP += 0 TN += len(study_y) - positives_per_pdf - 1 FN += 1 FP += positives_per_pdf print len(study_y) precision = float(TP) / (float(TP) + float(FP)) recall = float(TP) / (float(TP) + float(FN)) f1 = 2 * ((precision * recall) / (precision + recall)) accuracy = float(TP) / len(test_indices) metrics.append({"precision": precision, "recall": recall, "f1": f1, "accuracy": accuracy}) print pprint(metrics) metric_types = ["precision", "recall", "f1", "accuracy"] for metric_type in metric_types: metric_vec = [metric[metric_type] for metric in metrics] metric_mean = np.mean(metric_vec) print "%s: %.5f" % (metric_type, metric_mean)
def true_hybrid_prediction_test(model, test_mode=False): print "True Hybrid prediction" print "=" * 40 print s = model s.generate_data() # some variations use the quote data internally # for sentence prediction (for additional features) s_cheat = HybridModel(test_mode=False) s_cheat.generate_data() for test_domain in CORE_DOMAINS: print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40) domain_uids = s.domain_uids(test_domain) no_studies = len(domain_uids) kf = KFold(no_studies, n_folds=5, shuffle=False) print "making scorer" ftwo_scorer = make_scorer(fbeta_score, beta=2) tuned_parameters = [{"alpha": np.logspace(-4, -1, 10)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 1, 10)]}] clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring=ftwo_scorer) metrics = [] for fold_i, (train, test) in enumerate(kf): print "training doc level model with test data, please wait..." d = DocumentLevelModel(test_mode=False) d.generate_data(uid_filter=domain_uids[train]) d.vectorize() doc_X, doc_y = d.X_domain_all(domain=test_domain), d.y_domain_all(domain=test_domain) doc_tuned_parameters = {"alpha": np.logspace(-4, -1, 10)} doc_clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), doc_tuned_parameters, scoring='f1') doc_clf.fit(doc_X, doc_y) s.set_doc_model(doc_clf, d.vectorizer) s_cheat.vectorize(test_domain) s.vectorize(test_domain, use_vectorizer=s_cheat.vectorizer) X_train, y_train = s_cheat.X_y_uid_filtered(domain_uids[train], test_domain) # train on the *true* labels X_test, y_test = s.X_y_uid_filtered(domain_uids[test], test_domain) clf.fit(X_train, y_train) y_preds = clf.predict(X_test) fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds))[:,1] metrics.append(fold_metric) # get the scores for positive instances print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2]) metrics.append(fold_metric) # get the scores for positive instances # summary score summary_metrics = np.mean(metrics, axis=0) print "=" * 40 print "mean score:\tprecision %.2f, recall %.2f, f-score %.2f" % (summary_metrics[0], summary_metrics[1], summary_metrics[2])
def main(train_function): client = AdvisorClient() # Get or create the study study_configuration = { "goal": "MINIMIZE", "randomInitTrials": 1, "maxTrials": 5, "maxParallelTrials": 1, "params": [ { "parameterName": "gamma", "type": "DOUBLE", "minValue": 0.001, "maxValue": 0.01, "feasiblePoints": "", "scallingType": "LINEAR" }, { "parameterName": "C", "type": "DOUBLE", "minValue": 0.5, "maxValue": 1.0, "feasiblePoints": "", "scallingType": "LINEAR" }, { "parameterName": "kernel", "type": "CATEGORICAL", "minValue": 0, "maxValue": 0, "feasiblePoints": "linear, poly, rbf, sigmoid, precomputed", "scallingType": "LINEAR" }, { "parameterName": "coef0", "type": "DOUBLE", "minValue": 0.0, "maxValue": 0.5, "feasiblePoints": "", "scallingType": "LINEAR" }, ] } study = client.create_study("Study", study_configuration, "BayesianOptimization") #study = client.get_study_by_id(6) # Get suggested trials trials = client.get_suggestions(study.id, 3) # Generate parameters parameter_value_dicts = [] for trial in trials: parameter_value_dict = json.loads(trial.parameter_values) print("The suggested parameters: {}".format(parameter_value_dict)) parameter_value_dicts.append(parameter_value_dict) # Run training metrics = [] for i in range(len(trials)): metric = train_function(**parameter_value_dicts[i]) #metric = train_function(parameter_value_dicts[i]) metrics.append(metric) # Complete the trial for i in range(len(trials)): trial = trials[i] client.complete_trial_with_one_metric(trial, metrics[i]) is_done = client.is_study_done(study.id) best_trial = client.get_best_trial(study.id) print("The study: {}, best trial: {}".format(study, best_trial))
def from_list_db(cls, l): metrics = cls() for item in l: metrics.append(Metric.from_dict(item,kind="db")) return metrics
def document_prediction_test(model=DocumentLevelModel(test_mode=False)): print "Document level prediction" print "=" * 40 print d = model d.generate_data() # some variations use the quote data internally # for sentence prediction (for additional features) d.vectorize() for test_domain in CORE_DOMAINS: print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40) # f1_prefer_nos = make_scorer(f1_score, pos_label="NO") tuned_parameters = {"alpha": np.logspace(-4, -1, 10)} clf = GridSearchCV(SGDClassifier(loss="log", penalty="L2"), tuned_parameters, scoring='f1') # clf = SGDClassifier(loss="hinge", penalty="L2") domain_uids = d.domain_uids(test_domain) no_studies = len(domain_uids) kf = KFold(no_studies, n_folds=5, shuffle=False) metrics = [] for fold_i, (train, test) in enumerate(kf): X_train, y_train = d.X_y_uid_filtered(domain_uids[train], test_domain) X_test, y_test = d.X_y_uid_filtered(domain_uids[test], test_domain) clf.fit(X_train, y_train) y_preds = clf.predict(X_test) fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds, labels=RoB_CLASSES))[:3] print ('fold %d\t' % (fold_i)) + '\t'.join(RoB_CLASSES) # for metric_type, scores in zip(["prec.", "recall", "f1"], fold_metric): # print "%s\t%.2f\t%.2f\t%.2f" % (metric_type, scores[0], scores[1], scores[2]) # print # print clf.best_params_ #### START CONFUSION real_no_indices = (y_test=="NO") print "The actual NOs were predicted as..." print collections.Counter(y_preds[real_no_indices]) #### END CONFUSION metrics.append(fold_metric) # get the scores for positive instances # print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2]) mean_scores = np.mean(metrics, axis=0) print "=" * 40 print 'means \t' + '\t'.join(RoB_CLASSES) for metric_type, scores in zip(["prec.", "recall", "f1"], mean_scores): print "%s\t%.2f\t%.2f\t%.2f" % (metric_type, scores[0], scores[1], scores[2]) print # then train all for most informative features clf = SGDClassifier(loss="hinge", penalty="L2", alpha=0.01) X_all = d.X_domain_all(test_domain) y_all = d.y_domain_all(test_domain) clf.fit(X_all, y_all) print show_most_informative_features_ynu(d.vectorizer, clf)
def extract_spearman_for_fold(metrics, fold, i, predictions, truth, y_ground_truth, test, y_pred, learn_options): spearman = util.spearmanr_nonan(y_ground_truth[test].flatten(), y_pred.flatten())[0] assert not np.isnan(spearman), "found nan spearman" metrics.append(spearman)
def extract_NDCG_for_fold(metrics, fold, i, predictions, truth, y_ground_truth, test, y_pred, learn_options): NDCG_fold = ranking_metrics.ndcg_at_k_ties(y_ground_truth[test].flatten(), y_pred.flatten(), learn_options["NDGC_k"]) metrics.append(NDCG_fold)
def hybrid_doc_prediction_test(model=HybridDocModel(test_mode=False)): print "Hybrid doc level prediction" print "=" * 40 print d = model d.generate_data() # some variations use the quote data internally # for sentence prediction (for additional features) for test_domain in CORE_DOMAINS: print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40) domain_uids = d.domain_uids(test_domain) no_studies = len(domain_uids) kf = KFold(no_studies, n_folds=5, shuffle=False) tuned_parameters = {"alpha": np.logspace(-4, -1, 5)} clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='f1') metrics = [] for fold_i, (train, test) in enumerate(kf): s = SentenceModel(test_mode=False) s.generate_data(uid_filter=domain_uids[train]) s.vectorize() sents_X, sents_y = s.X_domain_all(domain=test_domain), s.y_domain_all(domain=test_domain) sent_tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 2, 10)]}] sent_clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='recall') sent_clf.fit(sents_X, sents_y) d.set_sent_model(sent_clf, s.vectorizer) d.vectorize(test_domain) X_train, y_train = d.X_y_uid_filtered(domain_uids[train], test_domain) X_test, y_test = d.X_y_uid_filtered(domain_uids[test], test_domain) clf.fit(X_train, y_train) y_preds = clf.predict(X_test) fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds, labels=RoB_CLASSES))[:3] print ('fold %d\t' % (fold_i)) + '\t'.join(RoB_CLASSES) for metric_type, scores in zip(["prec.", "recall", "f1"], fold_metric): print "%s\t%.2f\t%.2f\t%.2f" % (metric_type, scores[0], scores[1], scores[2]) print metrics.append(fold_metric) # get the scores for positive instances # print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2]) mean_scores = np.mean(metrics, axis=0) print "=" * 40 print 'means \t' + '\t'.join(RoB_CLASSES) for metric_type, scores in zip(["prec.", "recall", "f1"], mean_scores): print "%s\t%.2f\t%.2f\t%.2f" % (metric_type, scores[0], scores[1], scores[2]) print
def main(): """Model training routine.""" ws = Workspace.from_config() # Get the dataset from the workspace data = Dataset.get_by_name(ws, "house_prices") data = data.to_pandas_dataframe() # Split features and labels X = data.drop(columns="price") y = data["price"] # Get the run to start logging run = Run.get_context() # Log training data and CV params run.log("Training size", X.shape[0]) run.log("CV splits", cv_splits) run.log("CV test proportion", test_prop) # Run cross-validation metrics = [] results = [] model = LinearRegression() cv = ShuffleSplit(n_splits=cv_splits, test_size=test_prop) for train, test in cv.split(X): model.fit(X.loc[train, :], y[train]) y_true = y[test] y_pred = model.predict(X.loc[test, :]) results.append( pd.DataFrame({ "actual": y_true, "predicted": y_pred, "residual": y_pred - y_true, })) metrics.append(compute_metrics(y_true, y_pred, log_metrics)) results = pd.concat(results) metrics = pd.DataFrame(metrics) # Log accuracy metrics in AzureML (mean from splits) for metric in metrics.columns: run.log(metric.replace("_", " ").title(), metrics[metric].mean()) # Log predictions and residuals histograms run.log_predictions("Predictions", histogram_predictions(results["predicted"])) run.log_residuals("Residuals", histogram_residuals(results["residual"])) # Register if percentage error below threshold performance = sklearn.metrics.mean_absolute_percentage_error( results["actual"], results["predicted"]) if performance <= performance_threshold: # Retrain on all data model = model.fit(X, y) # Complete the run so files get uploaded joblib.dump(model, model_file) run.upload_file(model_file, model_file) # Register the model run.register_model( model_name=model_id, model_path=model_file, model_framework=Model.Framework.SCIKITLEARN, model_framework_version=sklearn.__version__, description=f"Mean Absolute Percentage Error: {performance}", )
def binary_hybrid_doc_prediction_test(model=HybridDocModel, test_mode=False): print "Binary hybrid doc level prediction version 2 (maybe quicker!!)" print "=" * 40 print d = model(test_mode=test_mode) d.generate_data(binarize=True) # some variations use the quote data internally # for sentence prediction (for additional features) for test_domain in CORE_DOMAINS: print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40) domain_uids = d.domain_uids(test_domain) no_studies = len(domain_uids) kf = KFold(no_studies, n_folds=5, shuffle=False) tuned_parameters = {"alpha": np.logspace(-4, -1, 10), "class_weight": [{1: i, -1: 1} for i in np.logspace(-1, 1, 10)]} clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='precision') metrics = [] s = SentenceModel(test_mode=test_mode) s.generate_data(uid_filter=domain_uids) s.vectorize() for fold_i, (train, test) in enumerate(kf): sents_X, sents_y = s.X_y_uid_filtered(domain_uids[test], test_domain) sent_tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 2, 10)]}] sent_clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='recall') sent_clf.fit(sents_X, sents_y) d.set_sent_model(sent_clf, s.vectorizer) d.vectorize(test_domain) X_train, y_train = d.X_y_uid_filtered(domain_uids[train], test_domain) X_test, y_test = d.X_y_uid_filtered(domain_uids[test], test_domain) clf.fit(X_train, y_train) y_preds = clf.predict(X_test) fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds))[:,1] metrics.append(fold_metric) # get the scores for positive instances print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2]) metrics.append(fold_metric) # get the scores for positive instances if fold_i == 0: # make a plot of the first curve probas_ = clf.best_estimator_.predict_proba(X_test) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y_test, probas_[:, 1]) roc_auc = auc(fpr, tpr) print("Area under the ROC curve : %f" % roc_auc) # Plot ROC curve pl.clf() pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc) pl.plot([0, 1], [0, 1], 'k--') pl.xlim([0.0, 1.0]) pl.ylim([0.0, 1.0]) pl.xlabel('False Positive Rate') pl.ylabel('True Positive Rate') pl.title(test_domain) pl.legend(loc="lower right") pl.show() summary_metrics = np.mean(metrics, axis=0) print "=" * 40 print "mean score:\tprecision %.2f, recall %.2f, f-score %.2f" % (summary_metrics[0], summary_metrics[1], summary_metrics[2]) # then train all for most informative features sents_X, sents_y = s.X_domain_all(domain=test_domain), s.y_domain_all(domain=test_domain) sent_tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 2, 10)]}] sent_clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='recall') sent_clf.fit(sents_X, sents_y) d.set_sent_model(sent_clf, s.vectorizer) d.vectorize(test_domain) X_all, y_all = d.X_y_uid_filtered(domain_uids, test_domain) clf.fit(X_all, y_all) print show_most_informative_features(d.vectorizer, clf.best_estimator_)
def sentence_prediction_test(class_weight={1: 5, -1:1}, model=SentenceModel(test_mode=True)): print print print print "Sentence level prediction" print "=" * 40 print s = model print "Model name:\t" + s.__class__.__name__ print s.__doc__ print "class_weight=%s" % (str(class_weight),) s.generate_data() s.vectorize() for test_domain in CORE_DOMAINS: print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40) domain_uids = s.domain_uids(test_domain) no_studies = len(domain_uids) kf = KFold(no_studies, n_folds=5, shuffle=False, indices=True) # # tuned_parameters = {"alpha": np.logspace(-4, -1, 10)} # tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 1, 5)]}] # clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='recall') print "making scorer" ftwo_scorer = make_scorer(fbeta_score, beta=2) tuned_parameters = [{"alpha": np.logspace(-4, -1, 10)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 1, 10)]}] clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring=ftwo_scorer) metrics = [] for fold_i, (train, test) in enumerate(kf): X_train, y_train = s.X_y_uid_filtered(domain_uids[train], test_domain) X_test, y_test = s.X_y_uid_filtered(domain_uids[test], test_domain) clf.fit(X_train, y_train) y_preds = clf.predict(X_test) fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds))[:,1] metrics.append(fold_metric) # get the scores for positive instances print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2]) # if not sample and list_features: # # not an obvious way to get best features for ensemble # print show_most_informative_features(s.vectorizer, clf) # summary score summary_metrics = np.mean(metrics, axis=0) print "=" * 40 print "mean score:\tprecision %.2f, recall %.2f, f-score %.2f" % (summary_metrics[0], summary_metrics[1], summary_metrics[2]) # then train all for most informative features clf = SGDClassifier(loss="hinge", penalty="L2", alpha=0.01, class_weight={1: 5, -1: 1}) X_all = s.X_domain_all(test_domain) y_all = s.y_domain_all(test_domain) clf.fit(X_all, y_all) print show_most_informative_features(s.vectorizer, clf)
def simple_hybrid_prediction_test(model=HybridModel(test_mode=True)): print "Hybrid prediction" print "=" * 40 print s = model s.generate_data() # some variations use the quote data internally # for sentence prediction (for additional features) for test_domain in CORE_DOMAINS: s.vectorize(test_domain) print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40) domain_uids = s.domain_uids(test_domain) no_studies = len(domain_uids) kf = KFold(no_studies, n_folds=5, shuffle=False) # tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 1, 5)]}] # clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='f1') print "making scorer" ftwo_scorer = make_scorer(fbeta_score, beta=2) tuned_parameters = [{"alpha": np.logspace(-4, -1, 10)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 1, 10)]}] clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring=ftwo_scorer) metrics = [] for fold_i, (train, test) in enumerate(kf): X_train, y_train = s.X_y_uid_filtered(domain_uids[train], test_domain) X_test, y_test = s.X_y_uid_filtered(domain_uids[test], test_domain) clf.fit(X_train, y_train) y_preds = clf.predict(X_test) fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds))[:,1] metrics.append(fold_metric) # get the scores for positive instances print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2]) metrics.append(fold_metric) # get the scores for positive instances # summary score summary_metrics = np.mean(metrics, axis=0) print "=" * 40 print "mean score:\tprecision %.2f, recall %.2f, f-score %.2f" % (summary_metrics[0], summary_metrics[1], summary_metrics[2])
def run_predict(self, modelfile, weightfile): """ Run model prediction """ #=========================== #== SET DATA #=========================== logger.info("Setting input data from data loader ...") status= self.__set_data() if status<0: logger.error("Input data set failed!") return -1 #=========================== #== LOAD MODEL #=========================== #- Create the network architecture and weights from file logger.info("Loading model architecture and weights from files %s %s ..." % (modelfile, weightfile)) if self.__load_model(modelfile, weightfile)<0: logger.warn("Failed to load model from files!") return -1 if self.model is None: logger.error("Loaded model is None!") return -1 #=========================== #== PREDICT #=========================== # - Get predicted output data logger.info("Predicting model output data ...") predout= self.model.predict( x=self.test_data_generator, steps=1, verbose=2, workers=self.nworkers, use_multiprocessing=self.use_multiprocessing ) print("predout") print(type(predout)) print(predout.shape) # - Convert one-hot encoding to target ids logger.info("Retrieving target ids from predicted output ...") self.targets_pred= np.argmax(predout, axis=1) print("targets_pred") print(self.targets_pred) print(type(self.targets_pred)) print(self.targets_pred.shape) # - Get predicted output class id logger.info("Computing predicted class ids from targets ...") self.classids_pred= [self.classid_remap_inv[item] for item in self.targets_pred] print("classids_pred") print(self.classids_pred) print(type(self.classids_pred)) # - Get predicted output class prob logger.info("Predicting output classid ...") self.probs_pred= [predout[i,self.targets_pred[i]] for i in range(predout.shape[0])] print("probs_pred") print(self.probs_pred) print(type(self.probs_pred)) # - Save predicted data to file logger.info("Saving prediction data to file %s ..." % (self.outfile)) N= predout.shape[0] snames= np.array(self.source_names).reshape(N,1) objids= np.array(self.source_ids).reshape(N,1) objids_pred= np.array(self.classids_pred).reshape(N,1) probs_pred= np.array(self.probs_pred).reshape(N,1) outdata= np.concatenate( (snames, objids, objids_pred, probs_pred), axis=1 ) head= "# sname id id_pred prob" Utils.write_ascii(outdata, self.outfile, head) #================================ #== COMPUTE AND SAVE METRICS #================================ # - Retrieve metrics logger.info("Computing classification metrics on predicted data ...") report= classification_report(self.target_ids, self.targets_pred, target_names=self.target_names, output_dict=True) self.accuracy= report['accuracy'] self.precision= report['weighted avg']['precision'] self.recall= report['weighted avg']['recall'] self.f1score= report['weighted avg']['f1-score'] self.class_precisions= [] self.class_recalls= [] self.class_f1scores= [] for class_name in self.target_names: class_precision= report[class_name]['precision'] class_recall= report[class_name]['recall'] class_f1score= report[class_name]['f1-score'] self.class_precisions.append(class_precision) self.class_recalls.append(class_recall) self.class_f1scores.append(class_f1score) logger.info("accuracy=%f" % (self.accuracy)) logger.info("precision=%f" % (self.precision)) logger.info("recall=%f" % (self.recall)) logger.info("f1score=%f" % (self.f1score)) logger.info("--> Metrics per class") print("classnames") print(self.target_names) print("precisions") print(self.class_precisions) print("recall") print(self.class_recalls) print("f1score") print(self.class_f1scores) # - Retrieving confusion matrix logger.info("Retrieving confusion matrix ...") cm= confusion_matrix(self.target_ids, self.targets_pred) print("confusion matrix") print(cm) # - Saving metrics to file logger.info("Saving metrics to file %s ..." % (self.outfile_metrics)) metrics= [self.accuracy, self.precision, self.recall, self.f1score] metric_names= ["accuracy","precision","recall","f1score"] for i in range(len(self.target_names)): classname= self.target_names[i] precision= self.class_precisions[i] recall= self.class_recalls[i] f1score= self.class_f1scores[i] metrics.append(precision) metrics.append(recall) metrics.append(f1score) metric_names.append("precision_" + classname) metric_names.append("recall_" + classname) metric_names.append("f1score_" + classname) Nmetrics= len(metrics) metric_data= np.array(metrics).reshape(1,Nmetrics) metric_names_str= ' '.join(str(item) for item in metric_names) head= '{} {}'.format("# ",metric_names_str) print("metric_data") print(metrics) print(len(metrics)) print(metric_data.shape) Utils.write_ascii(metric_data, self.outfile_metrics, head) return 0
def main(train_function): client = AdvisorClient() # Get or create the study study_configuration = { "goal": "MINIMIZE", "randomInitTrials": 1, "maxTrials": 5, "maxParallelTrials": 1, "params": [ { "parameterName": "gamma", "type": "DOUBLE", "minValue": 0.001, "maxValue": 0.01, "feasiblePoints": "", "scalingType": "LINEAR" }, { "parameterName": "C", "type": "DOUBLE", "minValue": 0.5, "maxValue": 1.0, "feasiblePoints": "", "scalingType": "LINEAR" }, { "parameterName": "kernel", "type": "CATEGORICAL", "minValue": 0, "maxValue": 0, "feasiblePoints": "linear, poly, rbf, sigmoid, precomputed", "scalingType": "LINEAR" }, { "parameterName": "coef0", "type": "DOUBLE", "minValue": 0.0, "maxValue": 0.5, "feasiblePoints": "", "scalingType": "LINEAR" }, ] } study = client.create_study("Study", study_configuration, "BayesianOptimization") #study = client.get_study_by_id(6) num_trials = 20 for i in range(num_trials): # Get suggested trials trials = client.get_suggestions(study.name, 3) # Generate parameters parameter_value_dicts = [] for trial in trials: parameter_value_dict = json.loads(trial.parameter_values) print("The suggested parameters: {}".format(parameter_value_dict)) parameter_value_dicts.append(parameter_value_dict) # Run training metrics = [] for i in range(len(trials)): metric = train_function(**parameter_value_dicts[i]) #metric = train_function(parameter_value_dicts[i]) metrics.append(metric) # Complete the trial for i in range(len(trials)): trial = trials[i] client.complete_trial_with_one_metric(trial, metrics[i]) is_done = client.is_study_done(study.name) best_trial = client.get_best_trial(study.name) print("The study: {}, best trial: {}".format(study, best_trial)) print(best_trial.parameter_values)
with tqdm.tqdm(test_dataloader) as tq: for step, (input_nodes, pos_graph, neg_graph, mfgs) in enumerate(tq): # feature copy from CPU to GPU takes place here inputs = mfgs[0].srcdata['feat'] outputs = model(mfgs, inputs).float() pos_score = pred(pos_graph, outputs) neg_score = pred(neg_graph, outputs) # print("Positive Score: ", pos_score[:100]) # print("Negative Scor: ", neg_score[:100]) batches += 1 metrics = [] metrics.append(compute_auc(pos_score, neg_score)) # metrics.append(compute_f1(pos_score, neg_score)) # metrics.append(compute_prec(pos_score, neg_score)) # metrics.append(compute_recall(pos_score, neg_score)) print('Step: ', step) print('ROC-AUC Score: ', metrics[0]) # print('F1-Score: ', metrics[1]) # print('Precision Score: ', metrics[2]) # print('Recall Score: ', metrics[3]) score_array[0] += metrics[0] # score_array[1] += metrics[1] # score_array[2] += metrics[2] # score_array[3] += metrics[3] roc_auc.append(score_array[0] / batches)
def extract_NDCG_for_fold(metrics, fold, i, predictions, truth, y_ground_truth, test, y_pred, learn_options): NDCG_fold = ranking_metrics.ndcg_at_k_ties( y_ground_truth[test].flatten(), y_pred.flatten(), learn_options["NDGC_k"] ) metrics.append(NDCG_fold)
100 * true_positives / all_real_signals, 100 * purity, 100 * efficiency, 100 * accuracy ] # TODO mettere solo due cifre decimali import glob validation_paths = glob.glob( '/storage/users/Muciaccia/data/validation/**/*.netCDF4', recursive=True) # TODO 10 risulta venire dopo 1 e non dopo 9 metrics = [] for path in validation_paths: validation_dataset = xarray.open_dataset(path) metrics.append(compute_metrics(validation_dataset)) # TODO salvare i risultati su file # TODO fare vari plot dei risultati # TODO considerare il fatto che le immagini sono troncate da 148 a 128 # TODO magari fare anche la validation col rumore bianco senza buchi (e con la vera ampiezza relativa del regnale) import pandas # TODO 'false alarms' è brutto e poco chiaro. mettere qualcosa tipo 'misclassified noise' metrics = pandas.DataFrame(metrics, columns=[ 'signal_intensity', 'all_validation_samples', 'rejected noise (%)', 'false alarms (%)',
def from_dict_user(cls, d): metrics = cls() for key in d: metrics.append(Metric.from_dict({key:d[key]},kind="user")) return metrics
def select(self, ds): assert len(ds.training_set ) > 0, 'Must have at least one training set in the dataset' assert len( ds.validation_set ) > 0, 'Must have at least one validation set in the dataset' logger = logging.getLogger() included_columns = [] for c in ds.input_working.columns: if self.__exclude_column_re is not None and self.__exclude_column_re.match( c): logger.info('- Excluding column due to exclude_column_re: %s' % c) continue if self.__include_column_re is not None and not self.__include_column_re.match( c): logger.info('- Including column due to include_column_re: %s' % c) continue included_columns.append(c) selected_columns = [] prev_best_metric = None best_metric = None best_metric_column = None while len(selected_columns) < len(included_columns): for i, next_column in enumerate(included_columns): logger.info('- Trying column %d of %d, "%s"' % (i + 1, len(included_columns), next_column)) if next_column in selected_columns: logger.info(' Column "%s" already selected, continuing' % next_column) continue current_columns = [] current_columns.extend(selected_columns) current_columns.append(next_column) metrics = [] if len(ds.training_set) == len(ds.validation_set): for ts, vs in zip(ds.training_set, ds.validation_set): x_train = ts.input[current_columns].values y_train = ts.output.values self.__model.fit(x_train, y_train) x_validation = vs.input[current_columns].values y_validation = vs.output.values y_validation_pred = self.__model.predict(x_validation) if self.__weight_metric_by_forecast_horizon: metric = 0. for k, fh in enumerate(ds.forecast_horizon): metric += (fh / np.max( ds.forecast_horizon)) * self.__metric( y_validation[:, k], y_validation_pred[:, k]) else: metric = self.__metric(y_validation, y_validation_pred) metrics.append(metric) else: x_train = ds.all_training_sets.input[ current_columns].values y_train = ds.all_training_sets.output.values self.__model.fit(x_train, y_train) for vs in ds.validation_set: x_validation = vs.input[current_columns].values y_validation = vs.output.values y_validation_pred = self.__model.predict(x_validation) if self.__weight_metric_by_forecast_horizon: metric = 0. for k, fh in enumerate(ds.forecast_horizon): metric += (fh / np.max( ds.forecast_horizon)) * self.__metric( y_validation[:, k], y_validation_pred[:, k]) else: metric = self.__metric(y_validation, y_validation_pred) metrics.append(metric) mean_metric = np.mean(metrics) log_message = ' Achieved metric %05f' % mean_metric if best_metric is not None: log_message += ', the maximum being %05f' % best_metric logger.info(' Achieved metric %05f' % mean_metric) if best_metric is None or best_metric < mean_metric: best_metric = mean_metric best_metric_column = next_column if prev_best_metric is not None and best_metric - prev_best_metric < self.__metric_improvement_threshold: break selected_columns.append(best_metric_column) logger.info( '*** Selected column "%s", which improved the metric to %05f' % (best_metric_column, best_metric)) logger.info('*** So far, selected %d columns: %s' % (len(selected_columns), ', '.join( ['"%s"' % sc for sc in selected_columns]))) prev_best_metric = best_metric logger.info('*** Final metric: %05f' % best_metric) return selected_columns
def xgboost_baseline(X, y, regression=False, n_splits=10, test_size=0.25, eval_metric="auc", optimized_metric=metrics.average_precision_score, max_evals=10, weight_imbalanced=False, verbose=False, random_state=777): """ Quickly run benchmark multiple times on a dataset to evalute xgboost model on it. At each iteration, we split the dataset in train / valid / test and trained a tuned xgboost model. We return different classification performance metrics. If categorical feature in dataset, each categorical feature is transformed with labelEncoder (from scikit-learn), then we transformed data with one-hot-encoding. :param X: whole feature set (matrix pandas DataFrame) :param y: target vector (vector pandas Series) :param n_splits: number of iteration to repeat. :param test_size: size of the test set (this value is reused to split the (1-test_size) train set in train and valid set). :param max_evals: try max_evals parameters combination for evaluation before returning best hyerparameters combination. :param random_state: seed used by the random number generator for reproductibility. :return: classification performance metrics for each iteration: f1_score, f2_score, precision_score, recall_score, metrics.average_precision_score, roc_auc_score. """ metrics = [] rs = ShuffleSplit( n_splits=n_splits, test_size=test_size, random_state=random_state) for train_index, test_index in tqdm(rs.split(X), total=n_splits): # get train and test set X_train = X.iloc[train_index] X_test = X.iloc[test_index] y_train = y.iloc[train_index] y_test = y.iloc[test_index] # Add valid set X_train, X_val, y_train, y_val = train_test_split( X_train, y_train, test_size=test_size, random_state=random_state) # XGboost model if regression: xgboost = XGBRegressorTuning( X_train, y_train, X_val, y_val, eval_metric=eval_metric, optimized_metric=optimized_metric, max_evals=max_evals, verbose=verbose, random_state=random_state) metrics.append( regression_score_metrics(xgboost.model, X_test, y_test)) else: xgboost = XGBClassifierTuning( X_train, y_train, X_val, y_val, eval_metric=eval_metric, optimized_metric=optimized_metric, max_evals=max_evals, weight_imbalanced=weight_imbalanced, verbose=verbose, random_state=random_state) metrics.append( classification_score_metrics(xgboost.model, X_test, y_test)) return metrics
def binary_doc_prediction_test(model=DocumentLevelModel, test_mode=False): print print print print "Binary doc prediction" print "=" * 40 print s = model(test_mode=test_mode) s.generate_data(binarize=True) s.vectorize() for test_domain in CORE_DOMAINS: print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40) domain_uids = s.domain_uids(test_domain) no_studies = len(domain_uids) kf = KFold(no_studies, n_folds=5, shuffle=False, indices=True) # # tuned_parameters = {"alpha": np.logspace(-4, -1, 10)} # tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 1, 5)]}] # clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='recall') # print "making scorer" # ftwo_scorer = make_scorer(fbeta_score, beta=2) tuned_parameters = {"alpha": np.logspace(-4, -1, 10), "class_weight": [{1: i, -1: 1} for i in np.logspace(-1, 1, 10)]} clf = GridSearchCV(SGDClassifier(loss="log", penalty="L2"), tuned_parameters, scoring="precision") metrics = [] for fold_i, (train, test) in enumerate(kf): X_train, y_train = s.X_y_uid_filtered(domain_uids[train], test_domain) X_test, y_test = s.X_y_uid_filtered(domain_uids[test], test_domain) clf.fit(X_train, y_train) y_preds = clf.predict(X_test) fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds))[:,1] metrics.append(fold_metric) # get the scores for positive instances print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2]) if fold_i == 0: # make a plot of the first curve probas_ = clf.best_estimator_.predict_proba(X_test) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y_test, probas_[:, 1]) roc_auc = auc(fpr, tpr) print("Area under the ROC curve : %f" % roc_auc) # Plot ROC curve pl.clf() pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc) pl.plot([0, 1], [0, 1], 'k--') pl.xlim([0.0, 1.0]) pl.ylim([0.0, 1.0]) pl.xlabel('False Positive Rate') pl.ylabel('True Positive Rate') pl.title(test_domain) pl.legend(loc="lower right") pl.show() # summary score summary_metrics = np.mean(metrics, axis=0) print "=" * 40 print "mean score:\tprecision %.2f, recall %.2f, f-score %.2f" % (summary_metrics[0], summary_metrics[1], summary_metrics[2]) X_all, y_all = s.X_y_uid_filtered(domain_uids, test_domain) clf.fit(X_all, y_all) print show_most_informative_features(s.vectorizer, clf.best_estimator_)