name="valid_accuracy")
    ])

# Verbose to false
net.verbose = 1

#%% Fit the model

io = net.fit(WD)

# Save model
net.save_params(f_params='models/baselineNN.pkl')

#%% Or load it from disk

net.initialize()
net.load_params(f_params="models/baselineNN.pkl")

#%% Predict on train

# Out
yhat = net.predict(WD)
# Classes
yhatc = yhat.argmax(axis=1)
# True labels
ytrue = WD.y
(ytrue == yhatc).sum() / yhatc.size

# Classification report
from sklearn import metrics
print(
Beispiel #2
0
    callbacks=[
        cp_best_model,
        cp_best_train,
        progressbar,
        # cyclicLR,
        epoch_MAE_train,
    ])

#######################################################################
# TRAIN MODEL
#######################################################################
print("############################################################")
print("\n\t\tTRAINING MODEL\n")
print("############################################################\n")

model.initialize()
# print("size of inputs fed to optimizer:\t", len(X_train), len(Y_train))
model.fit(X_train, Y_train)

# KFold cross-validation Gridsearch
# from sklearn.model_selection import GridSearchCV
# params = {
#     'optimizer__weight_decay': [0.0005, 0.001, 0.0005, 0.001, 0.005, 0.01],
#     'max_epochs': [40],
#     'lr': [0.05]
# }
# gs = GridSearchCV(model, params, refit=False, scoring = MAE_scorer)
#
# gs.fit(X_train, Y_train)
# print("Best grid search score:\t", gs.best_score_, gs.best_params_)
Beispiel #3
0
class CovClassifier(BaseEstimator):
    def __init__(
        self,
        weight_decay=1e-4,
        lr=0.01,
        seed=42,
        device="cpu",
        tuning=False,
        momentum=0.9,
        opt="sgd",
    ):
        self.seed = seed
        self.tuning = tuning
        self.weight_decay = weight_decay
        self.lr = lr
        self.device = device
        self.momentum = momentum
        self.opt = opt

        if opt == "adagrad":
            kwargs = dict(
                optimizer=optim.Adagrad,
                optimizer__weight_decay=self.weight_decay,
                optimizer__lr=lr,
            )
        elif opt == "asgd":
            kwargs = dict(
                optimizer=optim.ASGD,
                optimizer__weight_decay=self.weight_decay,
                optimizer__lr=lr,
                optimizer__t0=1e3,
            )
        else:
            kwargs = dict(
                optimizer=optim.SGD,
                optimizer__weight_decay=self.weight_decay,
                optimizer__lr=lr,
                optimizer__momentum=self.momentum,
                optimizer__nesterov=True,
            )
        self.model = NeuralNet(
            module=Linear,
            lr=lr,
            criterion=nn.CrossEntropyLoss,
            warm_start=True,
            max_epochs=1,
            batch_size=-1,
            train_split=None,
            device=device,
            **kwargs,
        )
        super().__init__()

    def _set_seed(self):
        seed = self.seed
        assert seed is not None, "Specify seed, don't leave seed=None"
        s = str(seed) * 10
        sha = sha256(bytes(s, "ascii"))
        randint = int("0x" + sha.hexdigest(), 0)
        capped = randint % (2**32 - 1)

        torch.manual_seed(capped)
        random.seed(capped)
        return np.random.RandomState(capped)

    def initialize(self):
        self.rng_ = self._set_seed()
        if hasattr(self.model, "initialized_") and self.model.initialized_:
            raise ValueError("Reinitializing!")
        self.model.initialize()
        #         self.model_ = Net()
        #         self.optimizer_ = optim.AdaGrad(weight_decay=self.weight_decay)
        assert self.model.initialized_ == True
        self.initialized_ = True

        self.history_ = []
        self.models_ = []
        self.meta_ = {
            "model_updates": 0,
            "num_examples": 0,
            "len_dataset": int(200e3),
            **self.get_params(),
        }
        # [1]:https://www.kaggle.com/c/forest-cover-type-prediction/data
        if self.tuning:
            self.meta_["len_dataset"] *= 0.8
        return True