Esempio n. 1
0
def eval_features(probs, labels, data_loader, num_features, split, is_acc):
    if is_acc:
        ImageNet_folder_label_dict = misc.load_ImageNet_label_dict()
        loader_label_folder_dict = {
            v: k
            for k, v, in data_loader.dataset.data.class_to_idx.items()
        }
        loader_label_holder = labels
    else:
        top1, top5 = "N/A", "N/A"

    m_scores, m_std = calculate_kl_div(probs[:num_features], splits=split)

    if is_acc:
        converted_labels = []
        for loader_label in loader_label_holder:
            converted_labels.append(ImageNet_folder_label_dict[
                loader_label_folder_dict[loader_label]])
        pred = torch.argmax(probs, 1).detach().cpu().numpy() - 1
        top1 = top_k_accuracy_score([i + 1 for i in converted_labels],
                                    probs[:, 1:1001].detach().cpu().numpy(),
                                    k=1)
        top5 = top_k_accuracy_score([i + 1 for i in converted_labels],
                                    probs[:, 1:1001].detach().cpu().numpy(),
                                    k=5)
    return m_scores, m_std, top1, top5
Esempio n. 2
0
    def compute_and_log_metrics(self, y_true: np.ndarray, y_pred: np.ndarray,
                                y_proba: np.ndarray, subset: str):
        self.log(f"{subset}_macro_f1",
                 f1_score(y_true, y_pred, average="macro", zero_division=0))

        # if only two classes, skip top_k accuracy since not enough classes
        if self.num_classes > 2:
            for k in DEFAULT_TOP_K:
                if k < self.num_classes:
                    self.log(
                        f"{subset}_top_{k}_accuracy",
                        top_k_accuracy_score(
                            y_true.argmax(
                                axis=1
                            ),  # top k accuracy only supports single label case
                            y_proba,
                            labels=np.arange(y_proba.shape[1]),
                            k=k,
                        ),
                    )
        else:
            self.log(f"{subset}_accuracy", accuracy_score(y_true, y_pred))

        for metric_name, label, metric in compute_species_specific_metrics(
                y_true, y_pred, self.species):
            self.log(f"species/{subset}_{metric_name}/{label}", metric)
Esempio n. 3
0
def accuracy(y_true, y_pred, top_k=2):
    y_pred = bug_probability(y_pred)
    y_true = np.argmax(y_true, axis=1)
    top_k_acc_score = top_k_accuracy_score(y_true,
                                           y_pred,
                                           k=top_k,
                                           labels=np.arange(80))
    return top_k_acc_score
def main():
    args = parse_arguments()
    path_to_csv = os.path.join(args.out_dir, 'perf_clf.csv')

    if args.quantize:
        suffix = '_q'
    else:
        suffix = ''

    model_name = f'model_{args.model_arch}.pt'

    embeddings_train = np.load(os.path.join(args.input_dir, f'embeddings_{args.model_arch}_train{suffix}.npy'))
    embeddings_val = np.load(os.path.join(args.input_dir, f'embeddings_{args.model_arch}_val{suffix}.npy'))

    X_train = embeddings_train[:, :-1]
    y_train = embeddings_train[:, -1].astype(np.int64)
    X_val = embeddings_val[:, :-1]
    y_val = embeddings_val[:, -1].astype(np.int64)

    clf = GaussianNaiveBayes()
    clf.fit(X_train, y_train)

    y_jll_train = clf.predict_likelihood(X_train)
    y_jll_val = clf.predict_likelihood(X_val)

    acc_train = accuracy_score(y_train, y_jll_train.argmax(-1))
    acc_val = accuracy_score(y_val, y_jll_val.argmax(-1))

    print(model_name)
    print('\tAccuracy@1: {:.2%}'.format(top_k_accuracy_score(y_val, y_jll_val, k=1)))
    print('\tAccuracy@5: {:.2%}'.format(top_k_accuracy_score(y_val, y_jll_val, k=5)))
    print('\tAccuracy@10: {:.2%}'.format(top_k_accuracy_score(y_val, y_jll_val, k=10)))

    header = ''
    mode = 'a'
    if not os.path.exists(path_to_csv):
        header += 'architecture;train_acc;val_acc'
        mode += '+'

    with open(path_to_csv, mode) as csv:
        if header:
            csv.write(f'{header}\n')

        result = f'{args.model_arch}{suffix};{acc_train};{acc_val}'
        csv.write(f'{result}\n')
 def test_top_k_accuracy_evaluator(self):
     gts = [[0.4, 1.0, 1.0], [0.4666666, 0.7333333, 1.0]]
     for k_idx, top_k in enumerate([1, 2, 5]):
         for i, (targets, predictions) in enumerate(zip(self.TARGETS, self.PREDICTIONS)):
             eval = TopKAccuracyEvaluator(top_k)
             eval.add_predictions(predictions, targets)
             top_k_acc = eval.get_report()[f"accuracy_top{top_k}"]
             import sklearn.metrics as sm
             if predictions.shape[1] == 2:
                 predictions = predictions[:, 1]
             self.assertAlmostEqual(sm.top_k_accuracy_score(targets, predictions, k=top_k), top_k_acc)
             self.assertAlmostEqual(top_k_acc, gts[i][k_idx], places=5)
def multiclass_report(x_train,
                      y_train,
                      x_val,
                      y_val,
                      clf=None,
                      dataset_name=None):
    """Utility function to score classifier
    Pass in the classifier if you want to test train, test times etc.
    """
    n_classes = len(set(y_train))
    labels = sorted(list(set(y_train)))

    with Timer() as train_time:
        clf.fit(x_train, y_train)

    with Timer() as test_time:
        y_pred_proba = clf.predict_proba(x_val)

    y_pred = np.argmax(y_pred_proba, axis=1)

    results = {
        'Train time': train_time.elapsed,
        'Test time': test_time.elapsed
    }
    results['clf'] = clf.__class__.__name__
    results['dataset'] = dataset_name
    results['Weighted Fscore'] = metrics.f1_score(y_val,
                                                  y_pred,
                                                  average='weighted')
    results['Top-1 score'] = metrics.top_k_accuracy_score(y_val,
                                                          y_pred_proba,
                                                          k=1)
    results['Top-5 score'] = metrics.top_k_accuracy_score(
        y_val, y_pred_proba, k=5) if n_classes > 5 else None
    results['n_classes'] = n_classes
    results['n_train_samples'] = len(x_train)
    results['n_test_samples'] = len(x_val)

    return results
Esempio n. 7
0
    def get_metrics(self):
        all_true = self.true_labels
        all_scores = self.pred_scores

        metric_dict = {}

        for k in [1, 3, 5]:
            metric_dict[f"Acc@{k}"] = metrics.top_k_accuracy_score(
                y_true=all_true,
                y_score=all_scores,
                k=k,
                labels=list(range(all_scores.shape[1])))

        return metric_dict
Esempio n. 8
0
    def test(self, images, targets, use_aux=False):
        result = self.net.forward(images)
        result = self.wrap_result(result, use_aux)
        loss_record = self.calculate_losses(result, targets, use_aux)

        # Accuracy counts
        model_prediction = result['final'].cpu().numpy()
        targets_np = targets.cpu().numpy()
        accuracy = {
            'top1':
            top_k_accuracy_score(targets_np,
                                 model_prediction,
                                 k=1,
                                 normalize=False,
                                 labels=range(self.n_classes)),
            'top5':
            top_k_accuracy_score(targets_np,
                                 model_prediction,
                                 k=5,
                                 normalize=False,
                                 labels=range(self.n_classes))
        }

        return [loss_record, accuracy]
    def evaluate_objective(self, data_split, neg_sampling_strategy=None, negative_factor=1):
        at = [1, 3, 5, 10]
        count = 0
        scores = defaultdict(list)

        for input_nodes, seeds, blocks in getattr(self, f"{data_split}_loader"):
            blocks = [blk.to(self.device) for blk in blocks]

            if self.masker is None:
                masked = None
            else:
                masked = self.masker.get_mask(self.seeds_to_python(seeds))

            src_embs = self._graph_embeddings(input_nodes, blocks, masked=masked)
            node_embs_, element_embs_, labels = self.prepare_for_prediction(
                src_embs, seeds, self.target_embedding_fn, negative_factor=negative_factor,
                neg_sampling_strategy=neg_sampling_strategy,
                train_embeddings=False
            )
            # indices = self.seeds_to_global(seeds).tolist()
            # labels = self.target_embedder[indices]
            # labels = torch.LongTensor(labels).to(self.device)
            acc, loss, logits = self.compute_acc_loss(node_embs_, element_embs_, labels, return_logits=True)

            y_pred = nn.functional.softmax(logits, dim=-1).to("cpu").numpy()
            y_true = np.zeros(y_pred.shape)
            y_true[np.arange(0, y_true.shape[0]), labels.to("cpu").numpy()] = 1.

            if self.measure_scores:
                if count % self.dilate_scores == 0:
                    y_true_onehot = np.array(y_true)
                    labels = list(range(y_true_onehot.shape[1]))

                    for k in at:
                        scores[f"ndcg@{k}"].append(ndcg_score(y_true, y_pred, k=k))
                        scores[f"acc@{k}"].append(
                            top_k_accuracy_score(y_true_onehot.argmax(-1), y_pred, k=k, labels=labels)
                        )

            scores["Loss"].append(loss.item())
            scores["Accuracy"].append(acc)
            count += 1

        if count == 0:
            count += 1

        scores = {key: sum_scores(val) for key, val in scores.items()}
        return scores
 def validation_step(self, batch, batch_idx):
     sequence = batch["sequence"]
     sequence_lengths = batch["sequence_lengths"]
     target = batch["target"]
     last_items = batch["last_item"]
     logits = self.forward(batch)
     loss = self.criterion(logits, target, sequence_lengths)
     last_item_predictions = torch.softmax(logits[:, -1], dim=1)
     accuracies = {
         f"valid_acc@{k}":
         top_k_accuracy_score(last_items.detach().cpu().numpy(),
                              last_item_predictions.detach().cpu().numpy(),
                              k=k,
                              labels=np.arange(self.num_items))
         for k in [20, 50, 100]
     }
     return {"valid_loss": loss, **accuracies}
    def _update_metrics(self, prediction, ground_truth):
        top_k_accuracy = top_k_accuracy_score(ground_truth,
                                              prediction,
                                              k=self._k)
        prediction = prediction.argmax(axis=1)
        accuracy = accuracy_score(ground_truth, prediction)
        precision, recall, fscore, _ = precision_recall_fscore_support(
            ground_truth, prediction, average='macro', zero_division=1)

        self.accuracy = self._compute_moving_average(self.accuracy,
                                                     accuracy * 100)
        self.top_k_accuracy = self._compute_moving_average(
            self.top_k_accuracy, top_k_accuracy * 100)
        self.precision = self._compute_moving_average(self.precision,
                                                      precision * 100)
        self.recall = self._compute_moving_average(self.recall, recall * 100)
        self.fscore = self._compute_moving_average(self.fscore, fscore * 100)
Esempio n. 12
0
    def test_model(self, test_category_to_docIDs: dict,
                   categories_to_corpus: dict) -> dict:
        """
        Test model performance on test set. Calculates metrics: accuracy, top2,
        top3, precision, recall and AUC.

        Parameters
        ----------
        test_category_to_docIDs: dict
            Category to DocIDs for test data.
        categories_to_corpus: dict
            Category to category market matrix corpus.

        Returns
        -------
        metrics: dict
            Dictionary with accuracy, top2, top3, precision, recall and auc
        """
        n_batches = 10
        metrics = {
            "accuracy": 0,
            "top2": 0,
            "top3": 0,
            "precision": 0,
            "recall": 0,
            "time": 0,
        }

        # Load data:
        X_test_all, y_test_all = self.load_batch_data(categories_to_corpus,
                                                      test_category_to_docIDs,
                                                      -1)
        n_docs_per_batch = int(len(X_test_all) / n_batches)
        for i in range(n_batches - 1):
            print(f"Test {i}")
            X_test = X_test_all[i * n_docs_per_batch:(i + 1) *
                                n_docs_per_batch]
            y_test = y_test_all[i * n_docs_per_batch:(i + 1) *
                                n_docs_per_batch]
            # Predict data:
            y_predicted = self.skmodel.predict(X_test)
            start = time()
            y_score = self.skmodel.predict_proba(X_test)
            end = time()
            # Calculate time elapsed
            total_time = end - start
            # Calculate metrics:
            test_accuracy = accuracy_score(y_true=y_test, y_pred=y_predicted)
            test_precision = precision_score(y_true=y_test,
                                             y_pred=y_predicted,
                                             average="macro")
            test_recall = recall_score(y_true=y_test,
                                       y_pred=y_predicted,
                                       average="macro")
            test_top2 = top_k_accuracy_score(y_true=y_test,
                                             y_score=y_score,
                                             k=2,
                                             labels=np.unique(y_test))
            test_top3 = top_k_accuracy_score(y_true=y_test,
                                             y_score=y_score,
                                             k=3,
                                             labels=np.unique(y_test))
            # Save and Report metrics:
            metrics["accuracy"] = (metrics["accuracy"] + test_accuracy) / 2
            metrics["top2"] = (metrics["top2"] + test_top2) / 2
            metrics["top3"] = (metrics["top3"] + test_top3) / 2
            metrics["precision"] = (metrics["precision"] + test_precision) / 2
            metrics["recall"] = (metrics["recall"] + test_recall) / 2
            metrics["time"] = (metrics["time"] + total_time) / 2
            print(metrics)
            # Delete to save space:
            del X_test
            del y_test

        return metrics
Esempio n. 13
0
# normalization

x_test = x_test.astype('float32')/255

# load model
model = load_model('../Models/fruits_keras_model6.h5')
# evaluate model on test dataset
pred_prob = model.predict(x_test)
#pred_class = model.predict_classes(x_test)
pred_class = np.argmax(pred_prob, axis=-1)
# reduce to 1D array
# pred_class = pred_class[:, 0]
# print(pred_class)
# metrics
accuracy = accuracy_score(y_test, pred_class)
k_accuracy = top_k_accuracy_score(y_test, pred_prob, k=5)
print('accuracy =  %.3f' % (accuracy * 100.0),
      'top-5 accuracy = %.3f' % (k_accuracy*100))
"""precision = precision_score(y_test, pred_class)
recall = recall_score(y_test, pred_class)"""
report = classification_report(y_test, pred_class)
print("Classification Report: ")
print(report)
"""f1 = f1_score(y_test, pred_class,average='macro')
print("f1 score: ", f1)"""
confusionMatrix = confusion_matrix(
    y_test, pred_class)  # row(true), column(predicted)
np.set_printoptions(threshold=sys.maxsize)
print("Confusion matrix: ")
print(confusionMatrix)
np.set_printoptions(threshold=False)
Esempio n. 14
0
def run(config: dict, holdout: bool, debug: bool) -> None:
    log("Run with configuration:")
    log(f"{config}")
    seed_everything(config["seed"])

    with span("Load train and test set:"):
        train_test_set = load_train_test_set(config)
        log(f"{train_test_set.shape}")
        emb_df = pd.read_csv("./data/interim/emb_df.csv")
        n_emb = emb_df.shape[1] - 1
        emb_cols = [str(i) for i in range(n_emb)]
        emb_df.rename(columns={"city_id": "past_city_id"}, inplace=True)

    with span("Preprocessing:"):
        with span("Shift target values for input sequence."):
            unk_city_id = 0
            train_test_set["past_city_id"] = (
                train_test_set.groupby("utrip_id")["city_id"].shift(1).fillna(
                    unk_city_id).astype(int))
            unk_hotel_country = "UNK"
            train_test_set["past_hotel_country"] = (
                train_test_set.groupby("utrip_id")["hotel_country"].shift(
                    1).fillna(unk_hotel_country).astype(str))
            train_test_set = pd.merge(train_test_set,
                                      emb_df,
                                      on="past_city_id",
                                      how="left")
            train_test_set[emb_cols] = train_test_set[emb_cols].fillna(0)
            train_test_set["city_embedding"] = train_test_set[emb_cols].apply(
                lambda x: list(x), axis=1)

        with span("Encode of target values."):
            target_le = preprocessing.LabelEncoder()
            train_test_set["city_id"] = target_le.fit_transform(
                train_test_set["city_id"])
            train_test_set["past_city_id"] = target_le.transform(
                train_test_set["past_city_id"])

        with span("Add features."):
            log("Convert data type of checkin and checkout.")
            train_test_set["checkin"] = pd.to_datetime(
                train_test_set["checkin"])
            train_test_set["checkout"] = pd.to_datetime(
                train_test_set["checkout"])

            log("Create month_checkin feature.")
            train_test_set["month_checkin"] = train_test_set[
                "checkin"].dt.month
            train_test_set["year_checkin"] = train_test_set["checkin"].dt.year

            log("Create days_stay feature.")
            train_test_set["days_stay"] = (
                train_test_set["checkout"] -
                train_test_set["checkin"]).dt.days.apply(lambda x: np.log10(x))

            log("Create num_checkin feature.")
            train_test_set["num_checkin"] = (train_test_set.groupby(
                "utrip_id")["checkin"].rank().apply(lambda x: np.log10(x)))

            log("Create days_move feature.")
            train_test_set["past_checkout"] = train_test_set.groupby(
                "utrip_id")["checkout"].shift(1)
            train_test_set["days_move"] = (
                (train_test_set["checkin"] - train_test_set["past_checkout"]
                 ).dt.days.fillna(0).apply(lambda x: np.log1p(x)))

            log("Create aggregation features.")
            num_visit_drop_duplicates = train_test_set.query("city_id != 0")[[
                "user_id", "city_id"
            ]].drop_duplicates().groupby("city_id").size().apply(
                lambda x: np.log1p(x)).reset_index()
            num_visit_drop_duplicates.columns = [
                "past_city_id", "num_visit_drop_duplicates"
            ]
            num_visit = train_test_set.query("city_id != 0")[[
                "user_id", "city_id"
            ]].groupby("city_id").size().apply(
                lambda x: np.log1p(x)).reset_index()
            num_visit.columns = ["past_city_id", "num_visit"]
            num_visit_same_city = train_test_set[
                train_test_set['city_id'] == train_test_set['city_id'].shift(
                    1)].groupby("city_id").size().apply(
                        lambda x: np.log1p(x)).reset_index()
            num_visit_same_city.columns = [
                "past_city_id", "num_visit_same_city"
            ]
            train_test_set = pd.merge(train_test_set,
                                      num_visit_drop_duplicates,
                                      on="past_city_id",
                                      how="left")
            train_test_set = pd.merge(train_test_set,
                                      num_visit,
                                      on="past_city_id",
                                      how="left")
            train_test_set = pd.merge(train_test_set,
                                      num_visit_same_city,
                                      on="past_city_id",
                                      how="left")
            train_test_set["num_visit_drop_duplicates"].fillna(0, inplace=True)
            train_test_set["num_visit"].fillna(0, inplace=True)
            train_test_set["num_visit_same_city"].fillna(0, inplace=True)
            train_test_set["num_stay_consecutively"] = train_test_set.groupby(
                ["utrip_id", "past_city_id"])["past_city_id"].rank(
                    method="first").fillna(1).apply(lambda x: np.log1p(x))

        with span("Encode of categorical values."):
            cat_le = {}
            for c in CATEGORICAL_COLS:
                le = preprocessing.LabelEncoder()
                train_test_set[c] = le.fit_transform(
                    train_test_set[c].fillna("UNK").astype(str).values)
                cat_le[c] = le

        train = train_test_set[train_test_set["row_num"].isnull()]
        test = train_test_set[~train_test_set["row_num"].isnull()]

        with span("aggregate features by utrip_id"):
            x_train, x_test_using_train, x_test = [], [], []
            for c in ["city_id", "past_city_id"
                      ] + CATEGORICAL_COLS + NUMERICAL_COLS:
                x_train.append(train.groupby("utrip_id")[c].apply(list))
                x_test.append(test.groupby("utrip_id")[c].apply(list))
                x_test_using_train.append(
                    test.groupby("utrip_id")[c].apply(lambda x: list(x)[:-1]))
            x_train = pd.concat(x_train, axis=1)
            x_test = pd.concat(x_test, axis=1)
            x_test_using_train = pd.concat(x_test_using_train, axis=1)

        with span("sampling training data"):
            x_train["n_trips"] = x_train["city_id"].map(lambda x: len(x))
            x_test_using_train["n_trips"] = x_test_using_train["city_id"].map(
                lambda x: len(x))
            x_train = (x_train.query("n_trips > 2").sort_values(
                "n_trips").reset_index(drop=True))
            x_test_using_train = (
                x_test_using_train.sort_values("n_trips").reset_index(
                    drop=True))
            x_test = x_test.reset_index(drop=True)
            log(f"x_train: {x_train.shape}, x_test: {x_test.shape}")

        if debug:
            log("'--debug' specified. Shrink data size into 1000.")
            x_train = x_train.iloc[:1000]
            x_test = x_test.iloc[:1000]
            config["params"]["num_epochs"] = 2
            log(f"x_train: {x_train.shape}, x_test: {x_test.shape}")

    with span("Prepare data loader for test:"):
        test_dataset = Dataset(x_test, is_train=False)
        test_dataloader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=1,
            num_workers=os.cpu_count(),
            pin_memory=True,
            collate_fn=Collator(is_train=False),
            shuffle=False,
        )

    with span("Get folds:"):
        cv = StratifiedKFold(
            n_splits=config["fold"]["n_splits"],
            shuffle=config["fold"]["shuffle"],
        )
        folds = cv.split(x_train, pd.cut(x_train["n_trips"], 5, labels=False))

    log("Training:")
    oof_preds = np.zeros((len(x_train), len(target_le.classes_)),
                         dtype=np.float32)
    test_preds = np.zeros((len(x_test), len(target_le.classes_)),
                          dtype=np.float32)

    for i_fold, (trn_idx, val_idx) in enumerate(folds):
        if holdout and i_fold > 0:
            break
        with span(f"Fold = {i_fold}"):
            x_trn = x_train.loc[trn_idx, :]
            x_val = x_train.loc[val_idx, :]
            x_trn = pd.concat([x_trn, x_test_using_train],
                              axis=0,
                              ignore_index=True)
            train_dataset = Dataset(x_trn, is_train=True)
            valid_dataset = Dataset(x_val, is_train=True)
            train_dataloader = torch.utils.data.DataLoader(
                train_dataset,
                batch_size=config["params"]["bacth_size"],
                num_workers=os.cpu_count(),
                pin_memory=True,
                collate_fn=Collator(is_train=True),
                shuffle=True,
            )
            valid_dataloader = torch.utils.data.DataLoader(
                valid_dataset,
                batch_size=1,
                num_workers=os.cpu_count(),
                pin_memory=True,
                collate_fn=Collator(is_train=True),
                shuffle=False,
            )
            model_cls = MODELS[config["model_name"]]
            model = model_cls(
                n_city_id=len(target_le.classes_),
                n_booker_country=len(cat_le["booker_country"].classes_),
                n_device_class=len(cat_le["device_class"].classes_),
                n_affiliate_id=len(cat_le["affiliate_id"].classes_),
                n_month_checkin=len(cat_le["month_checkin"].classes_),
                n_hotel_country=len(cat_le["past_hotel_country"].classes_),
                emb_dim=config["params"]["emb_dim"],
                rnn_dim=config["params"]["rnn_dim"],
                dropout=config["params"]["dropout"],
                rnn_dropout=config["params"]["rnn_dropout"],
            )
            if i_fold == 0:
                log(f"{summary(model)}")

            criterion = FocalLossWithOutOneHot(gamma=0.5)
            # Prepare optimizer
            param_optimizer = list(model.named_parameters())
            no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
            optimizer_grouped_parameters = [
                {
                    "params": [
                        p for n, p in param_optimizer
                        if not any(nd in n for nd in no_decay)
                    ],
                    "weight_decay":
                    0.01,
                },
                {
                    "params": [
                        p for n, p in param_optimizer
                        if any(nd in n for nd in no_decay)
                    ],
                    "weight_decay":
                    0.0,
                },
            ]
            optimizer = AdamW(
                optimizer_grouped_parameters,
                lr=1e-4,
                weight_decay=0.01,
            )
            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer, T_max=30, eta_min=1e-6)
            logdir = (Path(config["output_dir_path"]) / config["exp_name"] /
                      f"fold{i_fold}")
            loaders = {"train": train_dataloader, "valid": valid_dataloader}
            runner = CustomRunner(device=DEVICE)
            runner.train(
                model=model,
                criterion=criterion,
                optimizer=optimizer,
                scheduler=scheduler,
                loaders=loaders,
                main_metric="accuracy04",
                minimize_metric=False,
                logdir=logdir,
                num_epochs=config["params"]["num_epochs"],
                verbose=True,
            )

            log("Predictions using validation data")
            oof_preds[val_idx, :] = np.array(
                list(
                    map(
                        lambda x: x.cpu().numpy()[-1, :],
                        runner.predict_loader(
                            loader=valid_dataloader,
                            resume=f"{logdir}/checkpoints/best.pth",
                            model=model,
                        ),
                    )))
            y_val = x_val["city_id"].map(lambda x: x[-1]).values
            score = top_k_accuracy_score(y_val,
                                         oof_preds[val_idx, :],
                                         k=4,
                                         labels=np.arange(
                                             len(target_le.classes_)))
            log(f"val acc@4: {score}")
            np.save(
                Path(config["output_dir_path"]) / config["exp_name"] /
                f"y_val_pred_fold{i_fold}",
                oof_preds[val_idx, :],
            )

            test_preds_ = np.array(
                list(
                    map(
                        lambda x: x.cpu().numpy()[-1, :],
                        runner.predict_loader(
                            loader=test_dataloader,
                            resume=f"{logdir}/checkpoints/best.pth",
                            model=model,
                        ),
                    )))
            test_preds += test_preds_ / cv.n_splits
            np.save(
                Path(config["output_dir_path"]) / config["exp_name"] /
                f"y_test_pred_fold{i_fold}",
                test_preds_,
            )

    log("Evaluation OOF valies:")
    y_train = x_train["city_id"].map(lambda x: x[-1])
    score = top_k_accuracy_score(y_train,
                                 oof_preds,
                                 k=4,
                                 labels=np.arange(len(target_le.classes_)))
    log(f"oof acc@4: {score}")

    log("Save files:")
    np.save(
        Path(config["output_dir_path"]) / config["exp_name"] / f"y_oof_pred",
        oof_preds,
    )
    np.save(
        Path(config["output_dir_path"]) / config["exp_name"] / f"y_test_pred",
        test_preds,
    )
Esempio n. 15
0
        verb_labels = list(verbs_classes.values())
        noun_labels = list(nouns_classes.values())
    else:
        verb_labels = list(verbs_categories_classes.values())
        noun_labels = list(nouns_categories_classes.values())

    # compute and display the metrics
    print('\nCompute metrics')
    y_true_verb_labels = np.argmax(y_true_verb, axis=-1)
    y_pred_verb_labels = np.argmax(y_pred_verb, axis=-1)
    metrics = {
        'verb_top_1_accuracy':
        [accuracy_score(y_true_verb_labels, y_pred_verb_labels)],
        'verb_top_3_accuracy': [
            top_k_accuracy_score(y_true_verb_labels,
                                 y_pred_verb,
                                 k=3,
                                 labels=verb_labels)
        ],
        'verb_top_5_accuracy': [
            top_k_accuracy_score(y_true_verb_labels,
                                 y_pred_verb,
                                 k=5,
                                 labels=verb_labels)
        ],
        'verb_confusion_matrix': [
            confusion_matrix(y_true_verb_labels,
                             y_pred_verb_labels,
                             labels=verb_labels)
        ]
    }
Esempio n. 16
0
def analyse_od(model: str, dataset: str, split: str, pivot_file: TextIO):
    """
    TODO
    """
    if split == "kh":
        return

    source_dataset = load_dataset(f"{dataset}.txt")

    label_indices = get_label_indices(source_dataset)

    numeric_labels = list(range(len(label_indices)))
    num_labels = len(numeric_labels)

    split_name = split if split != "kh" else f"kh-{model}"
    split_path = f"{dataset}.strat-0.15.{split_name}.splits"

    holdout_dataset = load_dataset(os.path.join(split_path, "holdout.txt"))

    schedule_dataset = load_dataset(os.path.join(split_path, "schedule.txt"))

    y_true = [label_indices[label] for label in holdout_dataset.values()]

    splitter = TopNSplitter(50)

    iteration = 0
    cumulative_corrections = 0
    _, remaining_dataset = splitter(schedule_dataset)
    while True:
        holdout_predictions_path = os.path.join(
            split_path, f"{model}/{iteration}/predictions")

        if not os.path.exists(holdout_predictions_path):
            break

        holdout_predictions = load_rois_predictions(holdout_predictions_path,
                                                    holdout_dataset,
                                                    num_labels)

        y_score = list(holdout_predictions.values())

        y_score = [
            coerce_incorrect(num_labels, truth, prediction)
            for truth, prediction in zip(y_true, y_score)
        ]

        top_1 = top_k_accuracy_score(y_true,
                                     y_score,
                                     k=1,
                                     labels=numeric_labels,
                                     normalize=True)

        pivot_file.write(",".join(
            map(str, [
                model, dataset, split, iteration, "holdout", "accuracy", top_1
            ])) + "\n")

        update_dataset, remaining_dataset = splitter(remaining_dataset)

        update_predictions_path = os.path.join(
            split_path, f"{model}/{iteration}/update_predictions")

        if os.path.exists(update_predictions_path):
            update_y_true = [
                label_indices[label] for label in update_dataset.values()
            ]
            update_predictions = load_rois_predictions(update_predictions_path,
                                                       update_dataset,
                                                       num_labels)
            update_y_score = list(update_predictions.values())
            update_y_score = [
                coerce_incorrect(num_labels, truth, prediction)
                for truth, prediction in zip(update_y_true, update_y_score)
            ]
            update_top_1 = top_k_accuracy_score(update_y_true,
                                                update_y_score,
                                                k=1,
                                                labels=numeric_labels,
                                                normalize=True)
            pivot_file.write(",".join(
                map(str, [
                    model, dataset, split, iteration, "update", "accuracy",
                    update_top_1
                ])) + "\n")
            cumulative_corrections += int((1 - update_top_1) * 50)
            pivot_file.write(",".join(
                map(str, [
                    model, dataset, split, iteration, "update",
                    "cumulative_corrections", cumulative_corrections
                ])) + "\n")

        iteration += 1
Esempio n. 17
0
def train_and_validate(model, train_data_loader, val_data_loader, cfg, experiment=None):

    # Set visible devices
    parallel_model = cfg.performance.parallel_mode
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    # Set cuda
    cuda_available = torch.cuda.is_available()
    if cuda_available:
        torch.cuda.empty_cache()
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model.to(device)

    # CUDNN Auto-tuner. Use True when input size and model is static
    torch.backends.cudnn.benchmark = cfg.performance.cuddn_auto_tuner

    if cfg.training.freeze_lower:
        for p in model.parameters():
            p.requires_grad = False
        model.Linear_layer.weight.requires_grad = True
        model.Linear_layer.bias.requires_grad = True

    # Create Criterion and Optimizer
    if cfg.optimizer.loss_function == 'hinge':
        metric_name = 'R2'
        goal_type = 'regression'
        # Set loss criterion
        criterion = HingeLossRegression(cfg.optimizer.loss_epsilon, reduction=None)
        # Hinge loss is dependent on L2 regularization so we cannot use AdamW
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                                     lr=cfg.optimizer.learning_rate, weight_decay=cfg.optimizer.weight_decay)
    elif cfg.optimizer.loss_function == 'mse':
        metric_name = 'R2'
        goal_type = 'regression'
        # Set loss criterion
        criterion = nn.MSELoss(reduction='none')
        # Set optimizer
        optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                                      lr=cfg.optimizer.learning_rate, weight_decay=cfg.optimizer.weight_decay)
    elif cfg.optimizer.loss_function == 'cross-entropy':
        metric_name = 'Accuracy'
        goal_type = 'classification'
        # Get counts for each class
        # Instantiate class counts to 1 instead of 0 to prevent division by zero in case data is missing
        class_counts = np.array(cfg.model.n_classes*[1])
        for i in train_data_loader.dataset.unique_exams['target'].value_counts().index:
            class_counts[i] = train_data_loader.dataset.unique_exams['target'].value_counts().loc[i]
        # Calculate the inverse normalized ratio for each class
        weights = class_counts / class_counts.sum()
        weights = 1 / weights
        weights = weights / weights.sum()
        weights = torch.FloatTensor(weights).cuda()
        criterion = nn.CrossEntropyLoss(weight=weights, reduction='none')
        optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                                      lr=cfg.optimizer.learning_rate, weight_decay=cfg.optimizer.weight_decay)

    elif cfg.optimizer.loss_function == 'all-threshold':
        metric_name = 'Accuracy'
        goal_type = 'ordinal-regression'
        # Get counts for each class
        # Instantiate class counts to 1 instead of 0 to prevent division by zero in case data is missing
        class_counts = np.array(len(train_data_loader.dataset.unique_exams['target'].unique())*[1])
        for i in train_data_loader.dataset.unique_exams['target'].value_counts().index:
            class_counts[i] = train_data_loader.dataset.unique_exams['target'].value_counts().loc[i]
        # Calculate the inverse normalized ratio for each class
        weights = class_counts / class_counts.sum()
        weights = 1 / weights
        weights = weights / weights.sum()
        weights = torch.FloatTensor(weights).cuda()
        criterion = OrdinalRegressionAT(sample_weights=weights, reduction=None)
        optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                                      lr=cfg.optimizer.learning_rate, weight_decay=cfg.optimizer.weight_decay)

    if parallel_model:
        print("Available GPUS: {}".format(torch.cuda.device_count()))
        model = nn.DataParallel(model)

    use_half_prec = cfg.performance.half_precision

    # Initialize GradScaler for autocasting
    scaler = GradScaler(enabled=use_half_prec)

    print('Model parameters: {}'.format(sum(p.numel() for p in model.parameters() if p.requires_grad)))

    mem_params = sum([param.nelement() * param.element_size() for param in model.parameters()])
    mem_buffs = sum([buf.nelement() * buf.element_size() for buf in model.buffers()])
    mem = mem_params + mem_buffs  # in bytes
    print('Model memory size: {}'.format(mem))

    # Initialize scheduler
    use_scheduler = cfg.optimizer.use_scheduler
    if use_scheduler:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=cfg.optimizer.s_patience, factor=cfg.optimizer.s_factor)
        #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, total_steps=len(train_data_loader))
        #scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,150,350], gamma=0.1)

    # Maximum value used for gradient clipping = max fp16/2
    gradient_clipping = cfg.performance.gradient_clipping
    max_norm = cfg.performance.gradient_clipping_max_norm

    # Set anomaly detection
    torch.autograd.set_detect_anomaly(cfg.performance.anomaly_detection)

    # Begin training

    max_val_metric = -10000

    for i in range(cfg.training.epochs):

        start_time_epoch = time.time()

        batch_time_t = AverageMeter()
        data_time_t = AverageMeter()
        losses_t = AverageMeter()
        metric_values_t = AverageMeter()

        batch_time_v = AverageMeter()
        data_time_v = AverageMeter()
        losses_v = AverageMeter()
        metric_values_v = AverageMeter()
        if goal_type == 'classification':
            top3_values_v = AverageMeter()
            top5_values_v = AverageMeter()

        end_time_t = time.time()
        # Training

        model.train()
        for j, (inputs_t, targets_t, indexes_t, _, _) in enumerate(train_data_loader):
            # Update timer for data retrieval
            data_time_t.update(time.time() - end_time_t)
            
            # Move input to CUDA if available
            if cuda_available:
                if len(inputs_t) > 1:
                    for p, inp in enumerate(inputs_t):
                        if not torch.isfinite(inp).all():
                            raise ValueError('Input from dataloader not finite')
                        inputs_t[p] = inp.to(device, non_blocking=True)
                else:
                    if not torch.isfinite(inputs_t).all():
                        raise ValueError('Input from dataloader not finite')
                    inputs_t = inputs_t.to(device, non_blocking=True)
                if goal_type == 'classification':
                    targets_t = targets_t.long().squeeze()
                targets_t = targets_t.to(device, non_blocking=True)



            # Do forward and backwards pass

            # Get model train output and train loss
            with autocast(enabled=use_half_prec):
                outputs_t = model(inputs_t)
                if goal_type == 'ordinal-regression':
                    loss_t = criterion(outputs_t, targets_t, model.module.thresholds)
                else:
                    loss_t = criterion(outputs_t, targets_t)
                loss_mean_t = loss_t.mean()
            if cfg.data_loader.weighted_sampler:
                for index, loss in zip(indexes_t, loss_t.cpu().detach()):
                    loss_ratio = loss/loss_mean_t.cpu().detach()
                    loss_ratio = torch.clamp(loss_ratio, min=0.1, max=3)
                    train_data_loader.sampler.weights[index] = loss_ratio
            # Zero grads
            optimizer.zero_grad()

            # Backwards pass
            scaler.scale(loss_mean_t).backward()

            # Gradient Clipping
            if gradient_clipping:
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_value_(model.parameters(), max_norm)

            # Step and update
            scaler.step(optimizer)
            scaler.update()
            # Calculate and update metrics
            try:
                if not torch.isfinite(outputs_t).all():
                    raise ValueError('Output from model not finite')
                metric_targets_t = targets_t.cpu().detach().numpy()
                metric_outputs_t = outputs_t.cpu().detach().numpy()
                if goal_type == 'regression':
                    metric_t = r2_score(metric_targets_t, metric_outputs_t)
                elif goal_type == 'classification':
                    predictions_t = np.argmax(metric_outputs_t, 1)
                    metric_t = accuracy_score(metric_targets_t, predictions_t)
                elif goal_type == 'ordinal-regression':
                    labels_t = get_ORAT_labels(metric_outputs_t, model.module.thresholds)
                    metric_t = accuracy_score(metric_targets_t, labels_t)
                metric_values_t.update(metric_t)
                losses_t.update(loss_mean_t)
            except ValueError as ve:
                print('Failed to calculate {} with error: {} and output: {}'.format(metric_name, ve, outputs_t))

            # Update timer for batch
            batch_time_t.update(time.time() - end_time_t)

            if j % 100 == 0:
                print('Training Batch: [{}/{}] in epoch: {} \t '
                      'Training Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) \t '
                      'Training Data Time: {data_time.val:.3f} ({data_time.avg:.3f}) \t '
                      'Training Loss: {loss.val:.4f} ({loss.avg:.4f}) \t '
                      'Training {metric_name} Score: {metric.val:.3f} ({metric.avg:.3f}) \t'
                      .format(j+1, len(train_data_loader), i + 1, batch_time=batch_time_t, data_time=data_time_t,
                              loss=losses_t, metric_name=metric_name, metric=metric_values_t))
            # Reset end timer
            end_time_t = time.time()
        
        # End of training epoch prints and updates
        print('Finished Training Epoch: {} \t '
              'Training Time: {batch_time.avg:.3f} \t '
              'Training Data Time: {data_time.avg:.3f}) \t '
              'Training Loss: {loss.avg:.4f} \t '
              'Training {metric_name} score: {metric.avg:.3f} \t'
              .format(i+1, batch_time=batch_time_t, data_time=data_time_t, loss=losses_t, metric_name=metric_name,
                      metric=metric_values_t))

        if cfg.logging.logging_enabled:
            log_train_metrics(experiment, losses_t.avg, metric_values_t.avg, optimizer.param_groups[0]['lr'])

        # Validation

        # Only run validation every 10 epochs to save training time
        if i % 10 == 0:
            end_time_v = time.time()
            model.eval()
            if goal_type == 'classification':
                all_result_v = np.zeros((0, cfg.model.n_classes))
            else:
                all_result_v = np.zeros((0))
            all_target_v = np.zeros((0))
            all_uids_v = np.zeros((0))
            all_loss_v = np.zeros((0))
            for k, (inputs_v, targets_v, _, uids_v, _) in enumerate(val_data_loader):
                # Update timer for data retrieval
                data_time_v.update(time.time() - end_time_v)

                # Move input to CUDA if available
                if cuda_available:
                    if len(inputs_v) > 1:
                        for p, inp in enumerate(inputs_v):
                            inputs_v[p] = inp.to(device, non_blocking=True)
                    else:
                        inputs_v = inputs_v.to(device, non_blocking=True)
                    if goal_type == 'classification':
                        targets_v = targets_v.long().squeeze()
                    targets_v = targets_v.to(device, non_blocking=True)

                with torch.no_grad():
                    # Get model validation output and validation loss
                    with autocast(enabled=use_half_prec):
                        outputs_v = model(inputs_v)
                        if goal_type == 'ordinal-regression':
                            loss_v = criterion(outputs_v, targets_v, model.module.thresholds)
                        else:
                            loss_v = criterion(outputs_v, targets_v)
                        loss_mean_v = loss_v.mean()

                # Update timer for batch
                batch_time_v.update(time.time() - end_time_v)

                # Update metrics
                if cfg.evaluation.use_best_sample:
                    if goal_type == 'classification':
                        all_result_v = np.concatenate((all_result_v, outputs_v.cpu().detach().numpy()))
                        all_target_v = np.concatenate((all_target_v, targets_v.cpu().detach().numpy()))
                        all_loss_v = np.concatenate((all_loss_v, loss_v.cpu().detach().numpy()))
                    else:
                        all_result_v = np.concatenate((all_result_v, outputs_v.squeeze(dim=1).cpu().detach().numpy()))
                        all_target_v = np.concatenate((all_target_v, targets_v.squeeze(dim=1).cpu().detach().numpy()))
                        all_loss_v = np.concatenate((all_loss_v, loss_v.cpu().squeeze(dim=1).detach().numpy()))
                    all_uids_v = np.concatenate((all_uids_v, uids_v))

                else:
                    metric_targets_v = targets_v.cpu().detach().numpy()
                    metric_outputs_v = outputs_v.cpu().detach().numpy()
                    if goal_type == 'regression':
                        metric_v = r2_score(metric_targets_v, metric_outputs_v)
                    elif goal_type == 'classification':
                        predictions_v = np.argmax(metric_outputs_v, 1)
                        metric_v = accuracy_score(metric_targets_v, predictions_v)
                    elif goal_type == 'ordinal-regression':
                        labels_v = get_ORAT_labels(metric_outputs_v, model.module.thresholds)
                        metric_v = accuracy_score(metric_targets_v, labels_v)
                    metric_values_v.update(metric_v)
                    losses_v.update(loss_mean_v)

                    if k % 100 == 0:
                        print('Validation Batch: [{}/{}] in epoch: {} \t '
                              'Validation Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) \t '
                              'Validation Data Time: {data_time.val:.3f} ({data_time.avg:.3f}) \t '
                              'Validation Loss: {loss.val:.4f} ({loss.avg:.4f}) \t '
                              'Validation {metric_name}: {metric.val:.3f} ({metric.avg:.3f})\t'
                              .format(k + 1, len(val_data_loader), i + 1, batch_time=batch_time_v, data_time=data_time_v,
                                      loss=losses_v, metric_name=metric_name, metric=metric_values_v))

                end_time_v = time.time()

            if cfg.evaluation.use_best_sample:
                # As results are over all possible combinations of views in each examination
                # each different combination needs to have a weight equal to its ratio.
                val_data = np.array((all_uids_v, all_loss_v))
                val_data = val_data.transpose(1, 0)
                pd_val_data = pd.DataFrame(val_data, columns=['us_id', 'loss'])
                pd_val_data['loss'] = pd_val_data['loss'].astype(np.float32)
                val_ue = pd_val_data.drop_duplicates(subset='us_id')[['us_id', 'loss']]
                all_mean_loss = []
                for ue in val_ue.itertuples():
                    exam_results = pd_val_data[pd_val_data['us_id'] == ue.us_id]
                    num_combinations = len(exam_results)
                    weight = 1/num_combinations
                    mean_exam_loss = exam_results['loss'].mean()
                    all_mean_loss.append(mean_exam_loss)
                    for indx in exam_results.index:
                        pd_val_data.loc[indx, 'metric_weight'] = weight
                np_loss = np.array(all_mean_loss, dtype=np.float32)
                loss_mean_v = np_loss.mean()
                weights = pd_val_data['metric_weight'].to_numpy()
                if goal_type == 'regression':
                    metric_v = r2_score(all_target_v, all_result_v, sample_weight=weights)
                elif goal_type == 'classification':
                    top3_v = top_k_accuracy_score(all_target_v.astype(np.int), all_result_v, k=3, sample_weight=weights)
                    top5_v = top_k_accuracy_score(all_target_v.astype(np.int), all_result_v, k=5, sample_weight=weights)
                    predictions_v = np.argmax(all_result_v, 1)
                    metric_v = accuracy_score(all_target_v.astype(np.int), predictions_v, sample_weight=weights)
                elif goal_type == 'ordinal-regression':
                    labels_v = get_ORAT_labels(all_result_v, model.module.thresholds)
                    metric_v = accuracy_score(all_target_v.astype(np.int), labels_v, sample_weight=weights)
            else:
                loss_mean_v = losses_v.avg
                metric_v = metric_values_v.avg

            # End of validation epoch prints and updates
            print('Finished Validation Epoch: {} \t '
                  'Validation Time: {batch_time.avg:.3f} \t '
                  'Validation Data Time: {data_time.avg:.3f} \t '
                  'Validation Loss: {loss:.4f} \t '
                  'Validation {metric_name}: {metric:.3f}\t'
                  .format(i+1, batch_time=batch_time_v, data_time=data_time_v, loss=loss_mean_v, metric_name=metric_name
                          , metric=metric_v))
            if goal_type == 'regression':
                print('Example targets: {} \n Example outputs: {}'.format(torch.squeeze(targets_v), torch.squeeze(outputs_v)))
            
            if use_scheduler:
                scheduler.step(loss_mean_v)

            if cfg.training.checkpointing_enabled and cfg.logging.logging_enabled:
                experiment_id = experiment["sys/id"].fetch()
                if metric_v > max_val_metric:
                    checkpoint_name = cfg.training.checkpoint_save_path + cfg.model.name + '_' + cfg.data.type + '_'\
                                      + cfg.data.name + '_exp_' + experiment_id + '.pth'
                    save_checkpoint(checkpoint_name, model, optimizer)
                    max_val_metric = metric_v
            elif cfg.training.checkpointing_enabled:
                if metric_v > max_val_metric:
                    checkpoint_name = cfg.training.checkpoint_save_path + cfg.model.name + '_' + cfg.data.type + '_'\
                                      + cfg.data.name + '_test' + '.pth'
                    save_checkpoint(checkpoint_name, model, optimizer)
                    max_val_metric = metric_v

            if cfg.logging.logging_enabled:
                log_val_metrics(experiment, loss_mean_v, metric_v, max_val_metric)

        epoch_time = time.time() - start_time_epoch
        rem_epochs = cfg.training.epochs - (i+1)
        rem_time = rem_epochs * epoch_time
        print('Epoch {} completed. Time to complete: {}. Estimated remaining time: {}'.format(i+1, epoch_time, format_time(rem_time)))