def setUp(self): ######### # toy data device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.X = torch.tensor([[1, 2], [3, 4]], dtype=torch.float, device=device) self.Y = torch.tensor([0, 1], dtype=torch.int64, device=device) self.train_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(self.X, self.Y), batch_size=2) ######### # base kn self.kn = greedyFeedforward() self.kn.add_layer( kFullyConnected(X=self.X, n_out=2, kernel='gaussian', sigma=3, bias=True)) self.kn.add_layer( kFullyConnected(X=self.X, n_out=2, kernel='gaussian', sigma=2, bias=True)) # manually set some weights self.kn.layer0.weight.data = torch.Tensor([[.1, .2], [.5, .7]]) self.kn.layer0.bias.data = torch.Tensor([0., 0.]) self.kn.layer1.weight.data = torch.Tensor([[1.2, .3], [.2, 1.7]]) self.kn.layer1.bias.data = torch.Tensor([0.1, 0.2]) self.kn.add_critic(self.kn.layer1.phi) self.kn.add_loss(torch.nn.CosineSimilarity()) self.kn.add_metric(torch.nn.CosineSimilarity()) self.kn.add_loss(torch.nn.CrossEntropyLoss(reduction='sum')) self.kn.add_metric(torch.nn.CrossEntropyLoss(reduction='sum')) ######### # ensemble self.kn_ensemble = greedyFeedforward() self.kn_ensemble.add_layer(K.to_ensemble(self.kn.layer0, batch_size=1)) self.kn_ensemble.add_layer(K.to_ensemble(self.kn.layer1, batch_size=1)) self.kn_ensemble.add_critic(self.kn.layer1.phi) self.kn_ensemble.add_loss(torch.nn.CosineSimilarity()) self.kn_ensemble.add_metric(torch.nn.CosineSimilarity()) self.kn_ensemble.add_loss(torch.nn.CrossEntropyLoss(reduction='sum')) self.kn_ensemble.add_metric(torch.nn.CrossEntropyLoss(reduction='sum')) self.kn.to(device) self.kn_ensemble.to(device)
def __init__(self, in_channels=4, sigma=1., n_centers=100): super(kNatureConvBody, self).__init__() self.feature_dim = 512 self.conv1 = layer_init( nn.Conv2d(in_channels, 32, kernel_size=8, stride=4)) self.conv2 = layer_init(nn.Conv2d(32, 64, kernel_size=4, stride=2)) self.conv3 = layer_init(nn.Conv2d(64, 64, kernel_size=3, stride=1)) # self.fc4 = layer_init(nn.Linear(7 * 7 * 64, self.feature_dim)) self.fc4 = kFullyConnected(X=torch.rand(n_centers, 7 * 7 * 64), n_out=self.feature_dim, sigma=sigma, trainable_X=True)
def __init__(self, state_dim, hidden_units=(64, 64), sigma=1., gate=F.relu, n_centers=100): super(kFCBody, self).__init__() self.layer0 = nn.Linear(state_dim, hidden_units[0]) self.layer1 = kFullyConnected(X=torch.rand(n_centers, hidden_units[0]), n_out=hidden_units[1], sigma=sigma, trainable_X=True) self.feature_dim = hidden_units[1]
def to_ensemble(layer, batch_size): """ Break a layer object into an equivalent ensemble layer object. Parameters ---------- layer : kFullyConnected Supports kFullyConnected only. Returns ------- ensemble_layer : kFullyConnectedEnsemble """ from kernet.layers.kernelized_layer import kFullyConnected, kFullyConnectedEnsemble # FIXME throws an error when excuting kn.py, probably some import problem # FIXME do not do the import in the beginning, will cause circular # inference assert isinstance(layer, kFullyConnected) X = layer.X ensemble_layer = kFullyConnectedEnsemble() for i, x in enumerate(get_batch(X, batch_size=batch_size)): use_bias = True if layer.bias is not None and i == 0 else False component = kFullyConnected( X=x[0], n_out=layer.n_out, kernel=layer.kernel, sigma=layer.phi.sigma, # TODO only support Gaussian kernel bias=use_bias) # FIXME: can it be dangerous to modify params in place? component.weight.data = \ layer.weight[:,i*batch_size:(i+1)*batch_size].clone() if use_bias: component.bias.data = layer.bias.clone() # shallow copy only (create new memory instead of an alias to the # original data), this is to prevent the returned ensemble instance share # underlying weights with the given layer instance ensemble_layer.add_comp(component) return ensemble_layer
shuffle=False) x_train, y_train = next(iter(dummy_train_loader)) # get a balanced subset of size n as centers x_train2, y_train2 = K.get_subset(X=x_train, Y=y_train, n=n_center2, shuffle=True) layer1 = LeNet5_conv(1, padding=2) # layer1 = LeNet5(1, padding=2) # a kernelized, fully-connected layer. X is the set of centers, n_out is the number of kernel machines on this layer layer2 = kFullyConnected(X=x_train2, n_out=n_class, kernel='gaussian', sigma=sigma2, bias=True) if ensemble: layer2 = layer2.to_ensemble(component_size) # add optimizer to each layer. There is no need to assign each optimizer to the parameters of the corresponding layer manually, this will later be done by the model in net._compile() when you call net.fit(). net.add_optimizer( torch.optim.Adam(params=layer1.parameters(), lr=lr1, weight_decay=w_decay1)) net.add_optimizer( torch.optim.Adam(params=layer2.parameters(), lr=lr2, weight_decay=w_decay2))
x_train, y_train = next(iter(dummy_train_loader)) # get a balanced subset of size n as centers x_train2, y_train2 = K.get_subset(X=x_train, Y=y_train, n=n_center2, shuffle=True) x_train3, y_train3 = K.get_subset(X=x_train, Y=y_train, n=n_center3, shuffle=True) # a kernelized, fully-connected layer. X is the set of centers, n_out is the number of kernel machines on this layer layer1 = kFullyConnected(X=x_train, n_out=hidden_dim1, kernel='gaussian', sigma=sigma1, bias=True) layer2 = kFullyConnected(X=x_train2, n_out=hidden_dim2, kernel='gaussian', sigma=sigma2, bias=True) layer3 = kFullyConnected(X=x_train3, n_out=n_class, kernel='gaussian', sigma=sigma3, bias=True) if not ensemble: net.add_layer(layer1)