def __init__(self, net): super(WrapInception, self).__init__() self.net = net self.mean = P(torch.tensor([0.485, 0.456, 0.406]).view(1, -1, 1, 1), requires_grad=False) self.std = P(torch.tensor([0.229, 0.224, 0.225]).view(1, -1, 1, 1), requires_grad=False)
def __init__(self, output_size, eps=1e-5, momentum=0.1, cross_replica=False, mybn=False): super(bn, self).__init__() self.output_size = output_size # Prepare gain and bias layers self.gain = P(torch.ones(output_size), requires_grad=True) self.bias = P(torch.zeros(output_size), requires_grad=True) # epsilon to avoid dividing by 0 self.eps = eps # Momentum self.momentum = momentum # Use cross-replica batchnorm? self.cross_replica = cross_replica # Use my batchnorm? self.mybn = mybn if self.cross_replica: self.bn = SyncBN2d(output_size, eps=self.eps, momentum=self.momentum, affine=False) elif mybn: self.bn = myBN(output_size, self.eps, self.momentum) # Register buffers if neither of the above else: self.register_buffer('stored_mean', torch.zeros(output_size)) self.register_buffer('stored_var', torch.ones(output_size))
def __init__(self, dim, activ=True, groups=32): super().__init__() self.dim = dim self.groups = groups self.activ = activ self.gamma = P(torch.ones(dim)) self.beta = P(torch.zeros(dim)) self.v = P(torch.ones(dim))
def __init__(self, model, img_res=224): super().__init__() self.model = model self.img_res = img_res self.mean = P(torch.tensor([0.485, 0.456, 0.406]).view(1, -1, 1, 1), requires_grad=False) self.std = P(torch.tensor([0.229, 0.224, 0.225]).view(1, -1, 1, 1), requires_grad=False)
def __init__(self, net): super(WrapR2plus1d_18,self).__init__() self.net = net self.removed = list(self.net.children())[-1] self.remained = list(self.net.children())[:-1] self.poolModel= torch.nn.Sequential(*self.remained) #xiaodan: mean and std stats from https://pytorch.org/docs/stable/torchvision/models.html#video-classification self.mean = P(torch.tensor([0.43216, 0.394666, 0.37645]).view(1, -1, 1, 1), requires_grad=False) self.std = P(torch.tensor([0.22803, 0.22145, 0.216989]).view(1, -1, 1, 1), requires_grad=False)
def __init__(self, shard_dim, shard_in_dim, shard_out_dim): super().__init__() self.shard_dim = shard_dim self.shard_in_dim = shard_in_dim self.shard_out_dim = shard_out_dim self.pre_bias = P(torch.randn(shard_in_dim, shard_dim) * 0.05) self.kernel = P( torch.randn(shard_in_dim, shard_out_dim, shard_dim) * 0.05) self.post_bias = P(torch.randn(shard_out_dim, shard_dim) * 0.05)
def __init__(self, in_features, out_features, threshold, bias=True): super(LinearSVDO, self).__init__() self.in_features = in_features self.out_features = out_features self.threshold = threshold self.W = P(torch.Tensor(out_features, in_features)) self.log_sigma = P(torch.Tensor(out_features, in_features)) self.bias = P(torch.Tensor(1, out_features)) self.reset_parameters()
def __init__(self, in_height, in_width, out_classes, embed_dim, inputs_per_neuron, outputs_per_neuron, num_neurons, latent_dim, ticks_per_sample, graph_optimizer, g, g_optimizer, d, d_optimizer, clf, clf_optimizer): super().__init__(ticks_per_sample) # Configuration. in_dim = in_height * in_width self.in_height = in_height self.in_width = in_width self.in_dim = in_dim self.out_classes = out_classes self.embed_dim = embed_dim self.inputs_per_neuron = inputs_per_neuron self.outputs_per_neuron = outputs_per_neuron self.num_neurons = num_neurons self.latent_dim = latent_dim self.ticks_per_sample = ticks_per_sample # State (sent to next tick). # # - Activations (num neurons). self.register_buffer('activations', torch.randn(num_neurons)) # Graph (ie, embeddings). # # - Input sinks (embed dim, outputs per neuron, in dim). # - Neuron sinks (embed dim, outputs per neuron, num neurons). # - Neuron sources (embed dim, inputs per neuron, num neurons). x = self.make_input_sinks(in_height, in_width, embed_dim) x = x.unsqueeze(1) x = x.repeat(1, outputs_per_neuron, 1) self.input_sinks = P(x) self.neuron_sinks = P( torch.randn(embed_dim, outputs_per_neuron, num_neurons)) self.neuron_sources = P( torch.randn(embed_dim, inputs_per_neuron, num_neurons)) parameters = self.input_sinks, self.neuron_sinks, self.neuron_sources self.graph_optimizer = graph_optimizer(parameters) # Neurons (ie, a sharded GAN). # # - Generator (latent dim, num neurons) -> (inputs per neuron, num neurons). # - Discriminator (inputs per neuron, num neurons) -> (1, num neurons). self.g = g self.g_optimizer = g_optimizer(g.parameters()) self.d = d self.d_optimizer = d_optimizer(d.parameters()) # Output (ie, a model). # # - Model: (num neurons) -> (clf dim). self.clf = clf self.clf_optimizer = clf_optimizer(clf.parameters())
def __init__( self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], shape=(1, -1, 1, 1, 1), rescale=True, ): super().__init__() self.shape = shape self.mean = P(torch.tensor(mean).view(shape), requires_grad=False) self.std = P(torch.tensor(std).view(shape), requires_grad=False) self.rescale = rescale
def __init__(self, input_size, hidden_size, bias=True, dropout=0.0, jit=False): super(SlowLSTM, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.bias = bias self.dropout = dropout # input to hidden weights self.w_xi = P(T(hidden_size, input_size)) self.w_xf = P(T(hidden_size, input_size)) self.w_xo = P(T(hidden_size, input_size)) self.w_xc = P(T(hidden_size, input_size)) # hidden to hidden weights self.w_hi = P(T(hidden_size, hidden_size)) self.w_hf = P(T(hidden_size, hidden_size)) self.w_ho = P(T(hidden_size, hidden_size)) self.w_hc = P(T(hidden_size, hidden_size)) # bias terms self.b_i = T(hidden_size).fill_(0) self.b_f = T(hidden_size).fill_(0) self.b_o = T(hidden_size).fill_(0) self.b_c = T(hidden_size).fill_(0) # Wrap biases as parameters if desired, else as variables without gradients if bias: W = P else: W = V self.b_i = W(self.b_i) self.b_f = W(self.b_f) self.b_o = W(self.b_o) self.b_c = W(self.b_c) self.reset_parameters() self.jit = jit
def __init__(self, in_planes, out_planes, stride, layer_index): super(BasicBlock, self).__init__() self.activation = nn.ReLU(inplace=True) self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False) self.equalInOut = (in_planes == out_planes) self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False) or None # Scalar gain and bias self.gain, self.biases = P(torch.ones(1,1,1,1)), nn.ParameterList([P(torch.zeros(1,1,1,1)) for _ in range(4)]) # layer index self.layer_index = layer_index
def __init__(self, in_height, in_width, out_classes, embed_dim, inputs_per_neuron, outputs_per_neuron, num_neurons, latent_dim, ticks_per_sample): super().__init__(ticks_per_sample) in_dim = in_height * in_width self.in_height = in_height self.in_width = in_width self.in_dim = in_dim self.out_classes = out_classes self.embed_dim = embed_dim self.inputs_per_neuron = inputs_per_neuron self.outputs_per_neuron = outputs_per_neuron self.num_neurons = num_neurons self.ticks_per_sample = ticks_per_sample x = torch.randn(embed_dim, outputs_per_neuron, in_dim) self.input_dests = P(x) x = torch.randn(embed_dim, inputs_per_neuron, num_neurons) self.neuron_sources = P(x) # (batch size, inputs per neuron, num neurons) # :: # (batch size, outputs per neuron, num neurons) mids_per_neuron = (inputs_per_neuron + outputs_per_neuron) // 2 self.predict = nn.Sequential( ShardedLinearBlock(num_neurons, inputs_per_neuron, mids_per_neuron), ShardedLinearBlock(num_neurons, mids_per_neuron, outputs_per_neuron), ShardedLinear(num_neurons, outputs_per_neuron, outputs_per_neuron), ) self.register_buffer('pred_out', torch.rand(outputs_per_neuron, num_neurons)) x = torch.randn(embed_dim, outputs_per_neuron, num_neurons) self.neuron_dests = P(x) x = torch.randn(embed_dim, inputs_per_neuron, out_classes) self.output_sources = P(x) parameters = [self.input_dests, self.neuron_sources] + \ list(self.predict.parameters()) + \ [self.neuron_dests, self.output_sources] self.optimizer = Adam(parameters)
def __init__(self, ch, conv_func=nn.Conv2d, **kwargs): super().__init__() # Channel multiplier self.ch = ch self.conv_func = conv_func self.theta = self.conv_func(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False) self.phi = self.conv_func(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False) self.g = self.conv_func(self.ch, self.ch // 2, kernel_size=1, padding=0, bias=False) self.o = self.conv_func(self.ch // 2, self.ch, kernel_size=1, padding=0, bias=False) # Learnable gain parameter self.gamma = P(torch.tensor(0.0), requires_grad=True)
def __init__(self, ch, time_steps, which_conv=SNConv3d, name='full_attention'): super(FullAttention, self).__init__() #assume input tensor as (B,T, C, H, W) # Channel multiplier self.ch = ch self.T = time_steps self.which_conv = which_conv self.theta = self.which_conv(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False) self.phi = self.which_conv(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False) self.g = self.which_conv(self.ch, self.ch // 2, kernel_size=1, padding=0, bias=False) self.o = self.which_conv(self.ch // 2, self.ch, kernel_size=1, padding=0, bias=False) # Learnable gain parameter self.gamma = P(torch.tensor(0.), requires_grad=True)
def __init__(self, ch, which_conv=SNConv2d, name='attention'): super(Attention, self).__init__() # Channel multiplier self.ch = ch self.which_conv = which_conv self.theta = self.which_conv(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False) self.phi = self.which_conv(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False) self.g = self.which_conv(self.ch, self.ch // 2, kernel_size=1, padding=0, bias=False) self.o = self.which_conv(self.ch // 2, self.ch, kernel_size=1, padding=0, bias=False) # Learnable gain parameter self.gamma = P(torch.tensor(0.), requires_grad=True)
def __init__(self, channels): super(Attention, self).__init__() self.channels = channels self.conv_f = sn_conv1x1(self.channels, self.channels // 8) self.conv_g = sn_conv1x1(self.channels, self.channels // 8) self.conv_h = sn_conv1x1(self.channels, self.channels // 2) self.conv_v = sn_conv1x1(self.channels // 2, self.channels) self.gamma = P(torch.zeros(1), requires_grad=True)
def __init__(self, model, action_size=1, init_value=0.0, *args, **kwargs): super(DiagonalGaussianPolicy, self).__init__(model, *args, **kwargs) self.init_value = init_value self.logstd = th.zeros((1, action_size)) + self.init_value self.logstd = P(self.logstd) self.halflog2pie = V(T([2 * pi * exp(1)])) * 0.5 self.halflog2pi = V(T([2.0 * pi])) * 0.5 self.pi = V(T([pi]))
def __init__(self, ch, which_conv, pool_size_per_cluster, num_k, feature_dim, warmup_total_iter=1000, cp_momentum=0.3, device='cuda'): super(ConceptAttentionProto, self).__init__() self.myid = "atten_concept_prototypes" self.device = device self.pool_size_per_cluster = pool_size_per_cluster self.num_k = num_k self.feature_dim = feature_dim self.ch = ch # input channel self.total_pool_size = self.num_k * self.pool_size_per_cluster self.register_buffer( 'concept_pool', torch.rand(self.feature_dim, self.total_pool_size)) self.register_buffer('concept_proto', torch.rand(self.feature_dim, self.num_k)) # concept pool is arranged as memory cell, i.e. linearly arranged as a 2D tensor, use get_cluster_ptr to get starting pointer for each cluster # states that indicating the warmup self.register_buffer('warmup_iter_counter', torch.FloatTensor([0.])) self.warmup_total_iter = warmup_total_iter self.register_buffer( 'pool_structured', torch.FloatTensor([0.]) ) # 0 means pool is un clustered, 1 mean pool is structured as clusters arrays # register attention module self.which_conv = which_conv self.theta = self.which_conv(self.ch, self.feature_dim, kernel_size=1, padding=0, bias=False) self.phi = self.which_conv(self.ch, self.feature_dim, kernel_size=1, padding=0, bias=False) self.g = self.which_conv(self.ch, self.feature_dim, kernel_size=1, padding=0, bias=False) self.o = self.which_conv(self.feature_dim, self.ch, kernel_size=1, padding=0, bias=False) # Learnable gain parameter self.gamma = P(torch.tensor(0.), requires_grad=True) # self.momentum self.cp_momentum = cp_momentum
def __init__(self, ch, n_hashes, q_cluster_size, k_cluster_size, q_attn_size=None, k_attn_size=None, max_iters=10, r=1, clustering_algo='lsh', progress=False, which_conv=SNConv2d, name='attention'): ''' SmyrfAttention for BigGAN. ''' super(AttentionApproximation, self).__init__() # Channel multiplier self.ch = ch self.which_conv = which_conv # queries self.theta = self.which_conv(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False) # keys self.phi = self.which_conv(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False) self.g = self.which_conv(self.ch, self.ch // 2, kernel_size=1, padding=0, bias=False) self.o = self.which_conv(self.ch // 2, self.ch, kernel_size=1, padding=0, bias=False) # Learnable gain parameter self.gamma = P(torch.tensor(0.), requires_grad=True) self.smyrf = SmyrfAttention(n_hashes=n_hashes, q_cluster_size=q_cluster_size, k_cluster_size=k_cluster_size, q_attn_size=q_attn_size, k_attn_size=k_attn_size, max_iters=max_iters, clustering_algo=clustering_algo, r=r) self.progress = progress
def __init__(self, manipulator_model, detector_model, manipulate_func='video'): super().__init__() self.manipulator_model = manipulator_model self.detector_model = detector_model self.amp_param = P(5 * torch.ones(1, 1, 1, 1)) self.manipulator_func = manipulate_func self.manipulate = { 'video': self.manipulator_model.manipulate_video, 'frame': self.manipulate_frame, }.get(manipulate_func, 'video')
def __init__(self, Node_feature_dim, Axis_feature_dim, hidden, attention=False, possible_split_parts=None, reorder_len=4): super(Agent, self).__init__() # All parameters are here, but not all initialized here self._hidden = hidden self._node_cell = NodeCell(Node_feature_dim, hidden) self._axis_cell = AxisCell(Axis_feature_dim, hidden) self._reduce_axis_cell = AxisCell(Axis_feature_dim, hidden) # whether to use attention if attention: self._attention = P(torch.rand([2 * hidden, hidden], requires_grad=True)) self._with_attention = attention # useful states self._up_states = {} self._down_states = {} self._last_h = torch.ones(hidden) # use to decide how many parts Split Schedule can split self._possible_split_parts = possible_split_parts if possible_split_parts else [1, 2, 3, 4] # a layer before deciders self._trans = nn.Linear(hidden * 2, hidden) # a decider whether ot compute inline, [inline, not_inline] self._inline_decider = nn.Linear(hidden, 2) # a decider whether and where to compute at, [at, not_at, where] self._at_decider = nn.Linear(hidden, 3) # a decider whether and how to split an axis, [p1, p2...] self._split_decider = nn.Linear(hidden + Axis_feature_dim, len(self._possible_split_parts)) # a decider whether to fuse with the latter axis, [fuse, not_fuse] self._fuse_decider = nn.Linear(hidden + Axis_feature_dim, 2) # a decider whether to reorder the axes, [v1, v2...] self._reorder_decider = nn.Linear(hidden + Axis_feature_dim * reorder_len, reorder_len) # a decider whether to parallel the axis, [parallel, not_parallel] self._parallel_decider = nn.Linear(hidden + Axis_feature_dim, 2) # a decider whether to unroll the axis, [unroll, not_unroll] self._unroll_decider = nn.Linear(hidden + Axis_feature_dim, 2) # a decider whether to vectorize the axis, [vectorize, not_vectorize] self._vectorize_decider = nn.Linear(hidden + Axis_feature_dim, 2) # use to mark whether reset self._new = False self._first_call = True
def __init__(self, ch, which_conv=SNConv2d): super(ConceptAttention, self).__init__() self.myid = "atten" # Channel multiplier self.ch = ch self.which_conv = which_conv self.theta = self.which_conv(self.ch, self.ch // 2, kernel_size=1, padding=0, bias=False) self.phi = nn.Linear(512, self.ch // 2) self.g = nn.Linear(512, self.ch // 2) self.o = self.which_conv(self.ch // 2, self.ch, kernel_size=1, padding=0, bias=False) # Learnable gain parameter self.gamma = P(torch.tensor(0.), requires_grad=True)
def __init__(self, *inner, rate=0.05): super().__init__() self.inner = Sequence(*inner) self.raw_rate = P(inverse_sigmoid(rate))
import torch from torch.nn import Parameter as P wei = P(torch.rand(1, 10, 100, 5)) x = P(torch.rand(64, 5)) y = torch.randint(0, 5, [64]) wei = wei.expand(64, -1, -1, -1) all = (wei - x.unsqueeze(1).unsqueeze(1)).norm(dim=-1) mask = torch.zeros(size=(wei.size(0), wei.size(1)), dtype=torch.bool) mask[torch.arange(0, 64), y] = True cha = all.gather(1, y.unsqueeze(1).unsqueeze(2).expand(-1, -1, 100)).squeeze() sum = 0 cha_1 = all[mask.unsqueeze(2).expand(-1, -1, 100)].reshape(mask.size(0), -1) sum = (cha - cha_1).sum() assert sum == 0 for i in range(0, 64): for j in range(0, 100): sum += cha_1[i, j] - (wei[i, y[i], j, :] - x[i, :]).norm() if sum != 0: print(i, sum) pull = cha_1.max(-1)[0] mask_fan = mask[:] == False cha_fan = all[mask_fan.unsqueeze(2).expand(-1, -1, 100)].reshape( mask.size(0), -1, all.size(-1))
def __init__(self, MaxValue=1, weight=None, size_average=None, reduce=None, reduction='mean'): super(OrdinalLoss, self).__init__(weight, size_average, reduce, reduction) assert MaxValue != 0 self.MaxVaule = MaxValue self.w = P(t.Tensor(2))