def tucker_decomposition_linear_layer(layer, decompose_rate): ranks = estimate_linear_ranks(layer, decompose_rate) print(layer, "Auto Estimated ranks", ranks) core, [last] = partial_tucker(layer.weight.data, modes=[1], ranks=ranks, init='svd', tol=1) # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Linear(core.shape[1], \ core.shape[0], bias=True) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Linear(last.shape[0], \ last.shape[1], bias=False) core_layer.bias.data = layer.bias.data last_layer.weight.data = last.unsqueeze(-1).unsqueeze(-1) core_layer.weight.data = core new_layers = [last_layer, core_layer] return nn.Sequential(*new_layers)
def tucker1_decompose_conv_layer(layer, rank=None, criterion=tucker1_rank): if rank is None or rank == -1: rank = criterion(layer) core, [last] = \ partial_tucker(layer.weight.data, \ modes=[0], ranks=rank, init='svd') ''' # A pointwise convolution that reduces the channels from S to R3 first_layer = torch.nn.Conv2d(in_channels=first.shape[0], \ out_channels=first.shape[1], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=False) ''' # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Conv2d(in_channels=core.shape[1], \ out_channels=core.shape[0], kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=False) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Conv2d(in_channels=last.shape[1], \ out_channels=last.shape[0], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=True) if layer.bias is not None: last_layer.bias.data = layer.bias.data last_layer.weight.data = last.unsqueeze(-1).unsqueeze(-1) core_layer.weight.data = core new_layers = [core_layer, last_layer] return nn.Sequential(*new_layers)
def tucker_for_first_conv_layer(layer,rank): ranks=[rank[0]] print(layer, "Auto Estimated ranks", ranks) core, [last] = partial_tucker(layer.weight.data,modes=[ 1], ranks=ranks, init='svd') # A pointwise convolution that reduces the channels from S to R3 # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Conv2d(in_channels=ranks[0], \ out_channels=core.shape[0], kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=True) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Conv2d(in_channels=last.shape[1], \ out_channels=ranks[0], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=False) core_layer.bias.data = layer.bias.data last_layer.weight.data = torch.transpose(last, 1, 0).unsqueeze(-1).unsqueeze(-1) core_layer.weight.data = core new_layers = [last_layer,core_layer] return nn.Sequential(*new_layers)
def _rank_reduce(weights, rank_ratio): # Do not include the kernel dimensions eigen_length = min(weights.shape[:2]) target_rank = [int(eigen_length * rank_ratio)]*2 core, factors = partial_tucker( weights, modes=[0, 1], init="svd", svd="truncated_svd", rank=target_rank) return tensorly.tucker_to_tensor(core, factors)
def factorize_conv2d_rtk(tensor, params): input_shape, output_shape, ranks = params["input_shape"], params[ "output_shape"], params["ranks"] shape = tensor.shape assert len(tensor.shape) == 4, "The input tensor should be 4-order." input_order, output_order = len(input_shape), len(output_shape) assert input_order + output_order == len(ranks), \ "The length of ranks should be the sum of lengths of input and output shapes." assert shape[2] == np.prod(input_shape), \ "The product of input_shape should match the 3rd-dimension of the tensor." assert shape[3] == np.prod(output_shape), \ "The product of output_shape should match the 4th-dimension of the tensor." tensor = np.reshape(tensor, list(tensor.shape[:2]) + input_shape + output_shape) core_factor, factors = partial_tucker( tensor, list(range(2, 2 + input_order + output_order)), ranks) input_factors = factors[:input_order] core_factor = np.reshape(core_factor, (shape[0], shape[1], np.prod(ranks[:input_order]), np.prod(ranks[input_order:]))) output_factors = factors[input_order:] for l in range(output_order): output_factors[l] = np.transpose(output_factors[l]) return [input_factors, core_factor, output_factors]
def tucker_for_first_linear_layer(layer,decompose_rate): ranks = estimate_ranks(layer, decompose_rate) print(layer, "VBMF Estimated ranks", ranks) core, [last, first] = partial_tucker(layer.weight.data, modes=[0, 1], ranks=ranks, init='svd') # A pointwise convolution that reduces the channels from S to R3 first_layer = torch.nn.Linear(first.shape[0], \ first.shape[1], bias=False) # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Linear(core.shape[1], \ core.shape[0], bias=False) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Linear(last.shape[1], \ last.shape[0], bias=True) last_layer.bias.data = layer.bias.data first_layer.weight.data = \ torch.transpose(first, 1, 0).squeeze(-1).squeeze(-1) print('first layer shape is :',first_layer.weight.data.shape) last_layer.weight.data = last.squeeze(-1).squeeze(-1) print('last layer shape is :', last_layer.weight.data.shape) core_layer.weight.data = core print('core layer shape is :', core_layer.weight.data.shape) new_layers = [first_layer, core_layer, last_layer] return nn.Sequential(*new_layers)
def tucker_decomposition_conv_layer(layer,rank,conv_count): # Gets a conv layer, # returns a nn.Sequential object with the Tucker decomposition. ranks=[rank[conv_count],rank[conv_count-1]] print(layer, "Auto Estimated ranks", ranks) core, [last, first] = partial_tucker(layer.weight.data,modes=[0, 1], ranks=ranks, init='svd') # A pointwise convolution that reduces the channels from S to R3 first_layer = torch.nn.Conv2d(in_channels=first.shape[0], \ out_channels=first.shape[1], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=False) # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Conv2d(in_channels=core.shape[1], \ out_channels=core.shape[0], kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=False) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Conv2d(in_channels=last.shape[1], \ out_channels=last.shape[0], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=True) last_layer.bias.data = layer.bias.data first_layer.weight.data = \ torch.transpose(first, 1, 0).unsqueeze(-1).unsqueeze(-1) last_layer.weight.data = last.unsqueeze(-1).unsqueeze(-1) core_layer.weight.data = core new_layers = [first_layer, core_layer, last_layer] return nn.Sequential(*new_layers)
def linear_decomp(self, layer): W = layer.weight.data ranks = self.estimate_rank(W) print('Ranks:', ranks) if ranks == 0: return layer last, first = [None for _ in range(2)] if self.decomp_type == 'tucker': first, [last] = partial_tucker(W, modes=[0], ranks=ranks, init='svd') first_layer = nn.Linear(first.shape[1], first.shape[0], bias=False) first_layer.weight.data = first last_layer = nn.Linear(last.shape[1], last.shape[0], bias=True) last_layer.weight.data = last elif self.decomp_type == 'cp': weights, [last, first] = parafac(W, rank=ranks, init='random') first_layer = nn.Linear(first.shape[0], first.shape[1], bias=False) first_layer.weight.data = first.t() last_layer = nn.Linear(last.shape[1], last.shape[0], bias=True) last_layer.weight.data = last if layer.bias is not None: last_layer.bias.data = layer.bias.data new_layer = nn.Sequential(first_layer, last_layer) return new_layer
def tucker_decomposition_conv_layer(layer): """Gets a conv layer, returns a nn.Sequential object with the Tucker decomposition. The ranks are estimated with a Python implementation of VBMF https://github.com/CasvandenBogaard/VBMF """ ranks = estimate_ranks(layer) if ranks is None: return layer print(layer, "VBMF Estimated ranks", ranks) core, [last, first] = partial_tucker(layer.weight.data, modes=[0, 1], rank=ranks, init="svd") # A pointwise convolution that reduces the channels from S to R3 first_layer = torch.nn.Conv2d( in_channels=first.shape[0], out_channels=first.shape[1], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=False, ) # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Conv2d( in_channels=core.shape[1], out_channels=core.shape[0], kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=False, ) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Conv2d( in_channels=last.shape[1], out_channels=last.shape[0], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=True, ) last_layer.bias.data = layer.bias.data first_layer.weight.data = torch.transpose(first, 1, 0).unsqueeze(-1).unsqueeze(-1) last_layer.weight.data = last.unsqueeze(-1).unsqueeze(-1) core_layer.weight.data = core new_layers = [first_layer, core_layer, last_layer] return nn.Sequential(*new_layers)
def tucker_decomposition_conv_layer_BN(layer): """ Gets a conv layer, returns a nn.Sequential object with the Tucker decomposition. The ranks are estimated with a Python implementation of VBMF https://github.com/CasvandenBogaard/VBMF """ ranks = estimate_ranks(layer) print(layer, "VBMF Estimated ranks", ranks) core, [last, first] = \ partial_tucker(layer.weight.data.numpy(), \ modes=[0, 1], ranks=ranks, init='svd') # A pointwise convolution that reduces the channels from S to R3 first_layer = torch.nn.Conv2d(in_channels = first.shape[0], \ out_channels = first.shape[1], kernel_size = 1, \ stride = layer.stride, padding = 0, dilation = layer.dilation, bias = False) # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Conv2d(in_channels = core.shape[1], \ out_channels = core.shape[0], kernel_size = layer.kernel_size, stride = layer.stride, padding = layer.padding, dilation = layer.dilation, bias = False) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Conv2d(in_channels = last.shape[1], \ out_channels = last.shape[0], kernel_size = 1, \ stride = layer.stride, padding = 0, dilation = layer.dilation, bias = True) last_layer.bias.data = layer.bias.data # Add BatchNorm between decomposed layers bn_first = nn.BatchNorm2d(first.shape[1]) bn_core = nn.BatchNorm2d(core.shape[0]) bn_last = nn.BatchNorm2d(last.shape[0]) # Transpose add dimensions to fit into the PyTorch tensors first = first.transpose((1, 0)) first_layer.weight.data = torch.from_numpy(np.float32(\ np.expand_dims(np.expand_dims(first.copy(), axis=-1), axis=-1))) last_layer.weight.data = torch.from_numpy(np.float32(\ np.expand_dims(np.expand_dims(last.copy(), axis=-1), axis=-1))) core_layer.weight.data = torch.from_numpy(np.float32(core.copy())) new_layers = [first_layer, bn_first, core_layer, bn_core, last_layer, bn_last] return nn.Sequential(*new_layers)
def tucker_decomp(W, rank): core, [last, first] = partial_tucker(W + eps, modes=[0, 1], ranks=rank, init='random') fk = first.t_() lk = last new_layers = [fk, core, lk] return new_layers
def tucker_decomposition_conv_layer(layer): """ Gets a conv layer, returns a nn.Sequential object with the Tucker decomposition. The ranks are estimated with a Python implementation of VBMF https://github.com/CasvandenBogaard/VBMF """ ranks = estimate_ranks(layer) print(layer, "VBMF Estimated ranks", ranks) core, [last, first] = \ partial_tucker(layer.weight.data.numpy(), \ modes=[0, 1], ranks=ranks, init='svd') # A pointwise convolution that reduces the channels from S to R3 first_layer = torch.nn.Conv2d(in_channels = first.shape[0], \ out_channels = first.shape[1], kernel_size = 1, \ stride = layer.stride, padding = 0, dilation = layer.dilation, bias = False) # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Conv2d(in_channels = core.shape[1], \ out_channels = core.shape[0], kernel_size = layer.kernel_size, stride = layer.stride, padding = layer.padding, dilation = layer.dilation, bias = False) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Conv2d(in_channels = last.shape[1], \ out_channels = last.shape[0], kernel_size = 1, \ stride = layer.stride, padding = 0, dilation = layer.dilation, bias = True) last_layer.bias.data = layer.bias.data # Transpose add dimensions to fit into the PyTorch tensors first = first.transpose((1, 0)) first_layer.weight.data = torch.from_numpy(np.float32(\ np.expand_dims(np.expand_dims(first.copy(), axis=-1), axis=-1))) last_layer.weight.data = torch.from_numpy(np.float32(\ np.expand_dims(np.expand_dims(last.copy(), axis=-1), axis=-1))) core_layer.weight.data = torch.from_numpy(np.float32(core.copy())) new_layers = [first_layer, core_layer, last_layer] return nn.Sequential(*new_layers)
def factorize_conv2d_tk(tensor, params): ranks = params["ranks"] shape = tensor.shape assert len(shape) == 4, "The input tensor should be 4-order." assert len(ranks) == 2, "The length of ranks should be 2." core, factors = partial_tucker(tensor, [2, 3], ranks) factors[0] = np.reshape(factors[0], (1, 1, shape[2], ranks[0])) factors[1] = np.reshape(np.transpose(factors[1]), (1, 1, ranks[1], shape[3])) return [factors[0], core, factors[1]]
def tucker_decomposition(layer, tucker_rank): """ :param layer: weight tensor of dimensions (k,k,c,f) :param tucker_rank: list [r1,r2] :return: list of Conv2D layers [input_layer,core_layer,ouptut_layer] - input layer is a Conv2D layer of dimensions (1,1,c,r1) - core layer is a Conv2D layer of dimensions (k,k,r1,r2) - output layer is a Conv2D layer of dimensions (1,1,f,r1) """ strides = layer.get_config()['strides'] padding = layer.get_config()['padding'] weights = layer.get_weights()[0] bias = None if len(layer.get_weights()) > 1: bias = layer.get_weights()[1] # core - (k,k,r1,r2) # I - (c,r1) # O - (f,r2) core, [I, O] = partial_tucker(weights, modes=[2, 3], ranks=tucker_rank, init='svd') input_layer = Conv2D(filters=I.shape[1], kernel_size=1, strides=(1, 1), padding='valid', use_bias=False) core_layer = Conv2D(filters=core.shape[-1], kernel_size=core.shape[0], strides=strides, padding=padding, use_bias=False) output_layer = Conv2D(filters=O.shape[0], kernel_size=1, strides=(1, 1), padding='valid', use_bias=True) input_layer.build(input_shape=[None, None, I.shape[0]]) core_layer.build(input_shape=[None, None, core.shape[-2]]) output_layer.build(input_shape=[None, None, core.shape[-1]]) input_layer.set_weights([I[np.newaxis, np.newaxis]]) core_layer.set_weights([core]) output_layer.set_weights([np.transpose(O)[np.newaxis, np.newaxis], bias]) return [input_layer, core_layer, output_layer]
def __init__(self, layer, ranks='evbmf', init=True): """ Class initializer. """ super(DecomposedConv2d, self).__init__() device = layer.weight.device weight = layer.weight.data out_channels, in_channels, _, _ = weight.shape out_rank, in_rank = self.choose_ranks(weight, ranks) self.in_channel_layer = nn.Conv2d(in_channels=in_channels, out_channels=in_rank, kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=False).to(device) self.core_layer = nn.Conv2d(in_channels=in_rank, out_channels=out_rank, kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=False).to(device) self.out_channel_layer = nn.Conv2d(in_channels=out_rank, out_channels=out_channels, kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=layer.bias is not None).to(device) if init: core, factors = decomp.partial_tucker(weight, modes=[0, 1], ranks=(out_rank, in_rank), init='svd') (out_channel_factor, in_channel_factor) = factors if self.out_channel_layer.bias is not None: self.out_channel_layer.bias.data = layer.bias.data transposed = torch.transpose(in_channel_factor, 1, 0) self.in_channel_layer.weight.data = \ transposed.unsqueeze(-1).unsqueeze(-1) self.out_channel_layer.weight.data = \ out_channel_factor.unsqueeze(-1).unsqueeze(-1) self.core_layer.weight.data = core
def tucker(W: torch.Tensor, ranks): core, [last, first] = partial_tucker(W.detach().numpy(), modes=[0, 1], rank=ranks, init='svd') kernel_size = tuple(W.shape[2:]) conv1 = nn.Conv2d(in_channels=first.shape[0], out_channels=first.shape[1], kernel_size=1, bias=False) conv2 = nn.Conv2d(in_channels=core.shape[1], out_channels=core.shape[0], kernel_size=kernel_size, bias=False) conv3 = nn.Conv2d(in_channels=last.shape[1], out_channels=last.shape[0], kernel_size=1, bias=False) conv1.weight.data = torch.from_numpy(np.transpose(first)).unsqueeze(-1).unsqueeze(-1) conv2.weight.data = torch.from_numpy(core) conv3.weight.data = torch.from_numpy(last).unsqueeze(-1).unsqueeze(-1) return nn.Sequential(conv1, conv2, conv3)
def tucker_decompose_conv(layer, ranks=None, criterion=tucker_ranks): """ decompose filter NxCxHxW to 3 filters: R1xCx1x1 , R2xR1xHxW, and NxR2x1x1 Unlike other decomposition methods, it requires 2 ranks https://github.com/CasvandenBogaard/VBMF """ if ranks is None or ranks == [-1, -1] or ranks == -1: ranks = criterion(layer) ''' # Sanity Checks if (np.prod(ranks) >= conv_layer.in_channels * conv_layer.out_channels): print("np.prod(ranks) >= conv_layer.in_channels * conv_layer.out_channels)") continue if (any(r <= 0 for r in ranks)): print("One of the estimated ranks is 0 or less. Skipping layer") continue ''' core, [last, first] = \ partial_tucker(layer.weight.data, \ modes=[0, 1], rank=ranks, init='svd') # A pointwise convolution that reduces the channels from S to R3 first_layer = torch.nn.Conv2d(in_channels=first.shape[0], \ out_channels=first.shape[1], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=False) # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Conv2d(in_channels=core.shape[1], \ out_channels=core.shape[0], kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=False) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Conv2d(in_channels=last.shape[1], \ out_channels=last.shape[0], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=True) if layer.bias is not None: last_layer.bias.data = layer.bias.data first_layer.weight.data = \ torch.transpose(first, 1, 0).unsqueeze(-1).unsqueeze(-1) last_layer.weight.data = last.unsqueeze(-1).unsqueeze(-1) core_layer.weight.data = core new_layers = [first_layer, core_layer, last_layer] return nn.Sequential(*new_layers)
def f(self, x): x1 = x[:, 0] x2 = x[:, 1] ranks = [int(x1), int(x2)] core, [last, first] = partial_tucker(conv.weight.data.cpu().numpy(), modes=[0, 1], ranks=ranks, init="svd") recon_error = tl.norm( conv.weight.data.cpu().numpy() - tl.tucker_to_tensor( (core, [last, first])), 2, ) / tl.norm(conv.weight.data.cpu().numpy(), 2) # recon_error = np.nan_to_num(recon_error) ori_out = conv.weight.data.shape[0] ori_in = conv.weight.data.shape[1] ori_ker = conv.weight.data.shape[2] ori_ker2 = conv.weight.data.shape[3] first_out = first.shape[0] first_in = first.shape[1] core_out = core.shape[0] core_in = core.shape[1] last_out = last.shape[0] last_in = last.shape[1] original_computation = ori_out * ori_in * ori_ker * ori_ker2 decomposed_computation = ((first_out * first_in) + (core_in * core_out * ori_ker * ori_ker2) + (last_in * last_out)) computation_error = decomposed_computation / original_computation if computation_error > 1.0: computation_error = 5.0 Error = float(recon_error + computation_error) print("%d, %d, %f, %f, %f" % (x1, x2, recon_error, computation_error, Error)) return Error
def basisCompute(self): """ PCA compression of simulation data. :return pca_basis: Matrix with columns as orthogonal basis vectors :return pca_weights: Coefficients of basis vectors at simulation points """ X = tl.tensor(self.unitMat) self.core, self.factors = tld.partial_tucker( X[:, :, :], modes=[0, 1], tol=self.tol, ranks=[self.user_dim, self.user_dim2]) self.core = tlb.to_numpy(self.core)
def tucker_decomp(layer): W = layer.weight.data rank = tucker_rank(layer) core, [last, first] = partial_tucker(W, modes=[0, 1], rank=rank, init="svd") first_layer = nn.Conv2d( in_channels=first.shape[0], out_channels=first.shape[1], kernel_size=1, padding=0, bias=False, ) core_layer = nn.Conv2d( in_channels=core.shape[1], out_channels=core.shape[0], kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=False, ) last_layer = nn.Conv2d( in_channels=last.shape[1], out_channels=last.shape[0], kernel_size=1, padding=0, bias=True, ) if layer.bias is not None: last_layer.bias.data = layer.bias.data fk = first.t_().unsqueeze_(-1).unsqueeze_(-1) lk = last.unsqueeze_(-1).unsqueeze_(-1) first_layer.weight.data = fk last_layer.weight.data = lk core_layer.weight.data = core new_layers = nn.Sequential(*[first_layer, core_layer, last_layer]) return new_layers
def tucker_xavier(layer): ranks = estimate_ranks(layer) print(layer, "VBMF Estimated ranks", ranks) core, [last, first] = \ partial_tucker(layer.weight.data.numpy(), modes=[0, 1], ranks=ranks, init='svd') # A pointwise convolution that reduces the channels from S to R3 first_layer = torch.nn.Conv2d(in_channels=first.shape[0], out_channels=first.shape[1], kernel_size=1, stride=layer.stride, padding=0, dilation=layer.dilation, bias=False) # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Conv2d(in_channels=core.shape[1], out_channels=core.shape[0], kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=False) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Conv2d(in_channels=last.shape[1], out_channels=last.shape[0], kernel_size=1, stride=layer.stride, padding=0, dilation=layer.dilation, bias=True) last_layer.bias.data = layer.bias.data new_layers = [first_layer, core_layer, last_layer] # Xavier init: for l in new_layers: xavier_weights2(l) return nn.Sequential(*new_layers)
def get_weight(self, weights): """ :param weights: :return: tucker decomposition """ weight = np.transpose(np.squeeze(weights), (3, 2, 0, 1)) ranks = self.estimate_ranks(weight) if ranks is None: return None print("new ranks : ({})".format(ranks)) core, [last, first] = partial_tucker(weight, modes=[0, 1], ranks=ranks, init='svd') weight = [] weight_1 = first[np.newaxis, np.newaxis, :, :] weight.append(weight_1) weight_2 = np.transpose(core, (2, 3, 1, 0)) weight.append(weight_2) weight_3 = np.transpose(last, (1, 0))[np.newaxis, np.newaxis, :, :] weight.append(weight_3) return weight
def tucker_reconstruction_loss(layer, rank): """ :param: layer is a weight tensor of dimensions (k,k,c,f) :param: tucker_rank is a list [r1,r2] :return: L2 reconstruction loss for the weight matrix after tucker decomposition and reconstruction """ weights = layer.get_weights()[0] modes = [2, 3] core, factors = partial_tucker(weights, modes=modes, ranks=rank, init='svd') reconstructed = core for i in range(len(factors)): reconstructed = tl.tenalg.mode_dot(reconstructed, factors[i], modes[i]) return np.mean((weights - reconstructed)**2)
def tucker_decomp(layer, rank): W = layer.weight.data # TODO: find how to init when SVD already computed # http://tensorly.org/stable/_modules/tensorly/decomposition/_tucker.html core, [last, first] = partial_tucker(W, modes=[0, 1], rank=rank, init='svd') first_layer = nn.Conv2d(in_channels=first.shape[0], out_channels=first.shape[1], kernel_size=1, padding=0, bias=False) core_layer = nn.Conv2d(in_channels=core.shape[1], out_channels=core.shape[0], kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=False) last_layer = nn.Conv2d(in_channels=last.shape[1], out_channels=last.shape[0], kernel_size=1, padding=0, bias=True) if layer.bias is not None: last_layer.bias.data = layer.bias.data fk = first.t_().unsqueeze_(-1).unsqueeze_(-1) lk = last.unsqueeze_(-1).unsqueeze_(-1) first_layer.weight.data = fk last_layer.weight.data = lk core_layer.weight.data = core new_layers = [first_layer, core_layer, last_layer] return new_layers
def tucker_decomposition_conv_layer(layer, ranks): core, [last, first] = partial_tucker(layer.weight.data, modes=[0, 1], ranks=ranks, init='svd') #print(core.shape, last.shape, first.shape) # A pointwise convolution that reduces the channels from S to R3 first_layer = torch.nn.Conv2d(in_channels=first.shape[0], out_channels=first.shape[1], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=False) # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Conv2d(in_channels=core.shape[1], out_channels=core.shape[0], kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=False) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Conv2d(in_channels=last.shape[1], \ out_channels=last.shape[0], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=True) last_layer.bias.data = layer.bias.data first_layer.weight.data = torch.transpose(first, 1, 0).unsqueeze(-1).unsqueeze(-1) last_layer.weight.data = last.unsqueeze(-1).unsqueeze(-1) core_layer.weight.data = core new_layers = [first_layer, core_layer, last_layer] #for l in new_layers: # print(l.weight.data.shape) return nn.Sequential(*new_layers)
def BayesOpt_tucker_decomposition(): ranks = estimate_ranks_BayesOpt() print(conv, "BayesOpt estimated ranks", ranks) core, [last, first] = partial_tucker(conv.weight.data.cpu().numpy(), modes=[0, 1], tol=10e-5, ranks=ranks, init="svd") first_layer = torch.nn.Conv2d(in_channels=first.shape[0], out_channels=first.shape[1], kernel_size=1, stride=1) core_layer = torch.nn.Conv2d( in_channels=core.shape[1], out_channels=core.shape[0], kernel_size=conv.kernel_size, stride=conv.stride, padding=conv.padding, dilation=conv.dilation, bias=False, ) last_layer = torch.nn.Conv2d(in_channels=last.shape[1], out_channels=last.shape[0], kernel_size=1, stride=1) first = torch.from_numpy(first.copy()) last = torch.from_numpy(last.copy()) core = torch.from_numpy(core.copy()) first_layer.weight.data = (torch.transpose( first, 1, 0).unsqueeze(-1).unsqueeze(-1).data.cuda()) last_layer.weight.data = last.unsqueeze(-1).unsqueeze(-1).data.cuda() core_layer.weight.data = core.data.cuda() new_layers = [first_layer, core_layer, last_layer] return nn.Sequential(*new_layers)
def f(self, x): x1 = x[:, 0] x2 = x[:, 1] ranks = [int(x1), int(x2)] core, [last, first] = partial_tucker(conv.weight.data, modes=[0, 1], ranks=ranks, init='svd') recon_error = tl.norm( conv.weight.data - tl.tucker_to_tensor( (core, [last, first])), 2) / tl.norm(conv.weight.data, 2) #recon_error = np.nan_to_num(recon_error) ori_out = conv.weight.data.shape[0] ori_in = conv.weight.data.shape[1] ori_ker = conv.weight.data.shape[2] ori_ker2 = conv.weight.data.shape[3] first_out = first.shape[0] first_in = first.shape[1] core_out = core.shape[0] core_in = core.shape[1] last_out = last.shape[0] last_in = last.shape[1] original_computation = ori_out * ori_in * ori_ker * ori_ker2 decomposed_computation = (first_out * first_in) + ( core_in * core_out * ori_ker * ori_ker2) + (last_in * last_out) computation_error = decomposed_computation / original_computation Error = float(recon_error + computation_error) return Error
def BayesOpt_tucker_decomposition(): ranks = estimate_ranks_BayesOpt() print(conv, "BayesOpt estimated ranks", ranks) core, [last, first] = partial_tucker(conv.weight.data, modes=[0, 1], tol=10e-5, ranks=ranks, init='svd') first_layer = torch.nn.Conv2d(in_channels=first.shape[0], out_channels=first.shape[1], kernel_size=1, stride=1, padding=0, dilation=conv.dilation, bias=False) core_layer = torch.nn.Conv2d(in_channels=core.shape[1], out_channels=core.shape[0], kernel_size=conv.kernel_size, stride=conv.stride, padding=conv.padding, dilation=conv.dilation, bias=False) last_layer = torch.nn.Conv2d(in_channels=last.shape[1], out_channels=last.shape[1], kernel_size=1, stride=1, padding=0, dilation=0) first_layer.weight.data = torch.transpose(first, 1, 0).unsqueeze(-1).unsqueeze(-1) last_layer.weight.data = last.unsqueeze(-1).unsqueeze(-1) core_layer.weight.data = core new_layers = [first_layer, core_layer, last_layer] return nn.Sequential(*new_layers)
def setData(self, Y=None, X=None, X_r=None, X_o=None, basis=None, nbasis=None, **kwargs): self.Y = Y self.n = Y.shape[0] self.t = SP.zeros([ Y.ndim - 1, ], dtype=int) for i in range(1, Y.ndim): self.t[i - 1] = Y.shape[i] self.nt = SP.prod(Y.shape) if X_r != None: X = X_r if X != None: self.covar_r.X = X if X_o != None: self.covar_o.X = X_o if (basis == None and nbasis == None): self.Y_hat, self.basis = partial_tucker(Y, modes=range(1, Y.ndim), ranks=self.bn, init='svd', tol=10e-5) reconstruction = SP.zeros(self.Y.shape) for i in range(self.Y_hat.shape[0]): reconstruction[i, :] = tl.tucker_to_tensor( self.Y_hat[i, :], self.basis) res = Y - reconstruction temp, self.nbasis = partial_tucker(res, modes=range(1, Y.ndim), ranks=self.nbn, init='svd', tol=10e-5) elif (basis != None and nbasis == None): self.basis = basis b = [] for i in range(len(basis)): b.append(basis[i].T) self.Y_hat = tl.tenalg.multi_mode_dot(Y, b, modes=range(1, Y.ndim)) reconstruction = SP.zeros(self.Y.shape) for i in range(self.Y_hat.shape[0]): reconstruction[i, :] = tl.tucker_to_tensor( self.Y_hat[i, :], self.basis) res = Y - reconstruction temp, self.nbasis = partial_tucker(res, modes=range(1, Y.ndim), ranks=self.nbn, init='svd', tol=10e-5) elif (basis == None and nbasis != None): self.Y_hat, self.basis = partial_tucker(Y, modes=range(1, Y.ndim), ranks=self.bn, init='svd', tol=10e-5) reconstruction = SP.zeros(self.Y.shape) for i in range(self.Y_hat.shape[0]): reconstruction[i, :] = tl.tucker_to_tensor( self.Y_hat[i, :], self.basis) res = Y - reconstruction self.nbasis = nbasis nb = [] for i in range(len(nbasis)): nb.append(nbasis[i].T) temp = tl.tenalg.multi_mode_dot(res, nb, modes=range(1, Y.ndim)) elif (basis != None and nbasis != None): self.basis = basis b = [] for i in range(len(basis)): b.append(basis[i].T) self.Y_hat = tl.tenalg.multi_mode_dot(Y, b, modes=range(1, Y.ndim)) reconstruction = SP.zeros(self.Y.shape) for i in range(self.Y_hat.shape[0]): reconstruction[i, :] = tl.tucker_to_tensor( self.Y_hat[i, :], self.basis) res = Y - reconstruction self.nbasis = nbasis nb = [] for i in range(len(nbasis)): nb.append(nbasis[i].T) temp = tl.tenalg.multi_mode_dot(res, nb, modes=range(1, Y.ndim)) for i in range(Y.ndim - 1): self.covar_c[i].X = unfold(self.Y_hat, mode=i + 1) for i in range(Y.ndim - 1): self.covar_s[i].X = unfold(temp, mode=i + 1) self._invalidate_cache()
def tucker_decomposition(layer, rank, device): if isinstance(rank, int): ranks = rank elif rank == 'VBMF': ranks = estimate_ranks(layer) print(ranks) core, [last, first] = \ partial_tucker(layer.weight.data.cpu().numpy(), modes=[0, 1], rank=ranks, init='svd') first_layer = torch.nn.Conv2d(in_channels=first.shape[0], out_channels=first.shape[1], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=False) # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Conv2d(in_channels=core.shape[1], out_channels=core.shape[0], kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=False) # A pointwise convolution that increases the channels from R4 to T if layer.bias is not None: last_layer = torch.nn.Conv2d(in_channels=last.shape[1], out_channels=last.shape[0], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=True) last_layer.bias.data = layer.bias.data else: last_layer = torch.nn.Conv2d(in_channels=last.shape[1], out_channels=last.shape[0], kernel_size=1, stride=1, padding=0, dilation=layer.dilation, bias=False) first_tensor = torch.from_numpy(first.copy()).to(device) last_tensor = torch.from_numpy(last.copy()).to(device) core_tensor = torch.from_numpy(core.copy()).to(device) first_layer.weight.data = torch.transpose(first_tensor, 1, 0).unsqueeze(-1).unsqueeze(-1) last_layer.weight.data = last_tensor.unsqueeze(-1).unsqueeze(-1) core_layer.weight.data = core_tensor new_layers = [first_layer, core_layer, last_layer] return nn.Sequential(*new_layers), ranks
def _tucker_decomposition(self, layer, rank, offline=False, filename=''): """ Gets a conv layer and a target rank and returns a nn.Sequential object with the decomposition Args: layer: the conv layer to decompose rank: the rank of the CP-decomposition offline: bool, if true the weights will be loaded from the file specified in file name filename: string, file from which we have to load the weights. Returns: The compressed 3 layers that substitutes the original one. """ print('[Decomposer]: computing Tucker decomposition of the layer {} with rank {}'.format(layer, rank)) # THIS SHOULD BE ENHANCED BY USING A SUBPROCESS CALL # WHICH CALLS THE MATLAB SCRIPT AND RETRIEVE THE RESULT if offline: last, first, vertical, horizontal = load_cpd_weights(filename) else: core, [last, first] = partial_tucker(layer.weight.data.numpy(), modes=[0, 1], ranks=ranks, init='svd') # A pointwise convolution that reduces the channels from S to R3 first_layer = torch.nn.Conv2d(in_channels=first.shape[0], out_channels=first.shape[1], kernel_size=1, stride=layer.stride, padding=0, dilation=layer.dilation, bias=False) # A regular 2D convolution layer with R3 input channels # and R3 output channels core_layer = torch.nn.Conv2d(in_channels=core.shape[1], out_channels=core.shape[0], kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, bias=False) # A pointwise convolution that increases the channels from R4 to T last_layer = torch.nn.Conv2d(in_channels=last.shape[1], out_channels=last.shape[0], kernel_size=1, stride=layer.stride, padding=0, dilation=layer.dilation, bias=True) last_layer.bias.data = layer.bias.data # Transpose add dimensions to fit into the PyTorch tensors first = first.transpose((1, 0)) first_layer.weight.data = torch.from_numpy(np.float32( np.expand_dims(np.expand_dims(first.copy(), axis=-1), axis=-1))) last_layer.weight.data = torch.from_numpy(np.float32( np.expand_dims(np.expand_dims(last.copy(), axis=-1), axis=-1))) core_layer.weight.data = torch.from_numpy(np.float32(core.copy())) new_layers = [first_layer, core_layer, last_layer] return new_layers
def _fit(self, tensor): if self.to_center: tensor, *means = utils.center_3d_tensor(tensor, lat_lon_separately=self.lat_lon_sep_centering) # Initial guess nan_mask = np.logical_and(np.isnan(tensor), self.mask[:, :, None]) non_nan_mask = np.logical_and(~nan_mask, self.mask[:, :, None]) tensor[nan_mask] = 0 tensor[self.inverse_mask] = 0 # Outside of an investigated area everything is considered to be zero pbar = trange(self.nitemax, desc='Reconstruction') conv_error = 0 energy_per_iter = [] for i in pbar: if self.decomp_type.lower() == 'hooi': ranks = np.repeat(self.R, tensor.ndim) if isinstance(self.R, int) else self.R G, A = tld.partial_tucker(tensor, modes=list(range(len(ranks))), ranks=ranks, tol=self.tol, n_iter_max=self.td_iter_max) elif self.decomp_type.lower() == 'trunchosvd': G, A = self.trunc_hosvd(tensor) elif self.decomp_type.lower() == 'parafac': G, A = self.parafac(tensor, self.R, n_iter_max=self.td_iter_max, tol=self.tol) else: raise Exception(f'{self.decomp_type} is unsupported.') # Save energy characteristics for this iteration if self.with_energy: energy_i = self.calculate_energy(tensor, G, A) energy_per_iter.append(energy_i) tensor_hat = self.recontruct_tensor_by_factors(G, A) tensor_hat[non_nan_mask] = tensor[non_nan_mask] # Transfer known points from the previous iteration tensor_hat[self.inverse_mask] = 0 # Keeping outer area zeroed new_conv_error = np.sqrt(np.mean(np.power(tensor_hat[nan_mask] - tensor[nan_mask], 2))) / tensor[non_nan_mask].std() tensor = tensor_hat pbar.set_postfix(error=new_conv_error, rel_error=abs(new_conv_error - conv_error)) logger.info(f'Error/Relative Error at iteraion {i}: {new_conv_error}, {abs(new_conv_error - conv_error)}') grad_conv_error = abs(new_conv_error - conv_error) conv_error = new_conv_error if self.early_stopping: break_condition = (conv_error <= self.toliter) or (grad_conv_error < self.toliter) else: break_condition = (conv_error <= self.toliter) if break_condition: break energy_per_iter = np.array(energy_per_iter) if self.to_center: tensor = utils.decenter_3d_tensor(tensor, *means, lat_lon_separately=self.lat_lon_sep_centering) if self.keep_non_negative_only: tensor[tensor < 0] = 0 tensor[self.inverse_mask] = np.nan # Put None's outside the investigated area # Save energies in model for distinct components (lat, lon, t) if self.with_energy: for i in range(tensor.ndim): setattr(self, f'total_energy_{i}', np.array(energy_per_iter[:, i, 0])) setattr(self, f'explained_energy_{i}', np.array(energy_per_iter[:, i, 1])) setattr(self, f'explained_energy_ratio_{i}', np.array(energy_per_iter[:, i, 2])) self.final_iter = i self.conv_error = conv_error self.grad_conv_error = grad_conv_error self.reconstructed_tensor = tensor self.core_tensor = G self.factors = A
def decompose_tensor(X, R3, R4): core, factors = td.partial_tucker(X.numpy(), modes = [2,3], ranks = [R3, R4]) return core, factors