LR = .01 OPTIMIZERS = { 'SGD': (optim.SGD(lr=LR), functools.partial(torch.optim.SGD, lr=LR), optimizers.SGD(lr=LR)), 'Adam': (optim.Adam(lr=LR, beta_1=.9, beta_2=.999, eps=KERAS_EPS), functools.partial(torch.optim.Adam, lr=LR, betas=(.9, .999), eps=KERAS_EPS), optimizers.Adam(lr=LR, beta_1=.9, beta_2=.999)), 'AdaDelta': (optim.AdaDelta(rho=.95, eps=KERAS_EPS), functools.partial(torch.optim.Adadelta, rho=.95, eps=KERAS_EPS), optimizers.Adadelta(rho=.95)), 'AdaGrad': (optim.AdaGrad(lr=LR, eps=KERAS_EPS), functools.partial(torch.optim.Adagrad, lr=LR), optimizers.Adagrad(lr=LR)), 'Momentum': (optim.Momentum(lr=LR, rho=.1), functools.partial(torch.optim.SGD, lr=LR, momentum=.1), optimizers.SGD(lr=LR, momentum=.1)) } def add_intercept(x): return {**x, 'intercept': 1.} for name, (creme_optim, torch_optim, keras_optim) in OPTIMIZERS.items(): X_y = stream.iter_sklearn_dataset(dataset=datasets.load_boston(),
rho = config['rho'] beta_1 = config['beta_1'] beta_2 = config['beta_2'] eps = config['eps'] gamma = config['gamma'] final_lr = config['final_lr'] alpha = config['alpha'] FTRL_l1 = config['FTRL_l1'] FTRL_l2 = config['FTRL_l2'] if (opt == "AdaBound"): optimizer = optim.AdaBound(lr, beta_1, beta_2, eps, gamma, final_lr) elif (opt == "AdaDelta"): optimizer = optim.AdaDelta(rho, eps) elif (opt == "AdaGrad"): optimizer = optim.AdaGrad(lr, eps) elif (opt == "Adam"): optimizer = optim.Adam(lr, beta_1, beta_2, eps) elif (opt == "FTRLProximal"): optimizer = optim.FTRLProximal(alpha, beta, l1, l2) elif (opt == "Momentum"): optimizer = optim.Momentum(lr, rho) elif (opt == "RMSProp"): optimizer = optim.RMSProp(lr, rho, eps) elif (opt == "VanillaSGD"): optimizer = optim.VanillaSGD(lr) elif (opt == "NesterovMomentum"): optimizer = optim.NesterovMomentum(lr, rho) else: optimizer = None
'lm, dataset', [ pytest.param( lm(optimizer=copy.deepcopy(optimizer), initializer=initializer, l2=0), dataset, id=f'{lm.__name__} - {optimizer} - {initializer}' ) for lm, dataset in [ (lm.LinearRegression, datasets.TrumpApproval()), (lm.LogisticRegression, datasets.Bananas()) ] for optimizer, initializer in itertools.product( [ optim.AdaBound(), optim.AdaDelta(), optim.AdaGrad(), optim.AdaMax(), optim.Adam(), optim.AMSGrad(), # TODO: check momentum optimizers # optim.Momentum(), # optim.NesterovMomentum(), optim.RMSProp(), optim.SGD() ], [ optim.initializers.Zeros(), optim.initializers.Normal(mu=0, sigma=1, seed=42) ] ) ]
]), 'Logistic regression w/ VanillaSGD': compose.Pipeline([ preprocessing.StandardScaler(), linear_model.LogisticRegression( optimizer=optim.VanillaSGD( lr=optim.OptimalLR() ) ) ]), 'Logistic regression w/ Adam': compose.Pipeline([ preprocessing.StandardScaler(), linear_model.LogisticRegression(optim.Adam(optim.OptimalLR())) ]), 'Logistic regression w/ AdaGrad': compose.Pipeline([ preprocessing.StandardScaler(), linear_model.LogisticRegression(optim.AdaGrad(optim.OptimalLR())) ]), 'Logistic regression w/ RMSProp': compose.Pipeline([ preprocessing.StandardScaler(), linear_model.LogisticRegression(optim.RMSProp(optim.OptimalLR())) ]) } fig, ax = plt.subplots(figsize=(10, 6)) for name, model in models.items(): print(name) metric, train_duration, pred_duration = evaluate_model( X_y=datasets.fetch_electricity(), model=model, metric=metrics.Accuracy()