def forward(self, x): bs = x.shape[0] xnp = x.cpu().data.astype(np.int32) onehot = np.zeros((bs, x.shape[1], self.maxlen + self.syms), dtype=np.float32) for i in range(x.shape[1]): onehot[range(bs), i, i] = 1 onehot[range(bs), i, self.maxlen + xnp[:, i]] = 1 onehot = onehot.reshape(bs * x.shape[1], self.maxlen + self.syms) x = Tensor(onehot, device=x.device).dot( self.embed).reshape(shape=(bs, x.shape[1], -1)) x = x.sequential(self.tbs) x = x.reshape(shape=(-1, x.shape[-1])).dot(self.final).logsoftmax() return x.reshape(shape=(bs, -1, x.shape[-1]))
def load_weights(self, url): weights = fetch(url) # First 5 values (major, minor, subversion, Images seen) header = np.frombuffer(weights, dtype=np.int32, count = 5) self.seen = header[3] def numel(tensor): from functools import reduce return reduce(lambda x, y: x*y, tensor.shape) weights = np.frombuffer(weights, dtype=np.float32) weights = weights[5:] ptr = 0 for i in range(len(self.module_list)): module_type = self.blocks[i + 1]["type"] if module_type == "convolutional": model = self.module_list[i] try: # we have batchnorm, load conv weights without biases, and batchnorm values batch_normalize = int(self.blocks[i + 1]["batch_normalize"]) except: # no batchnorm, load conv weights + biases batch_normalize = 0 conv = model[0] if (batch_normalize): bn = model[1] # Get the number of weights of batchnorm num_bn_biases = numel(bn.bias) # Load weights bn_biases = Tensor(weights[ptr:ptr + num_bn_biases]) ptr += num_bn_biases bn_weights = Tensor(weights[ptr:ptr+num_bn_biases]) ptr += num_bn_biases bn_running_mean = Tensor(weights[ptr:ptr+num_bn_biases]) ptr += num_bn_biases bn_running_var = Tensor(weights[ptr:ptr+num_bn_biases]) ptr += num_bn_biases # Cast the loaded weights into dims of model weights bn_biases = bn_biases.reshape(shape=tuple(bn.bias.shape)) bn_weights = bn_weights.reshape(shape=tuple(bn.weight.shape)) bn_running_mean = bn_running_mean.reshape(shape=tuple(bn.running_mean.shape)) bn_running_var = bn_running_var.reshape(shape=tuple(bn.running_var.shape)) # Copy data bn.bias = bn_biases bn.weight = bn_weights bn.running_mean = bn_running_mean bn.running_var = bn_running_var else: # load biases of the conv layer num_biases = numel(conv.bias) # Load wieghts conv_biases = Tensor(weights[ptr: ptr+num_biases]) ptr += num_biases # Reshape conv_biases = conv_biases.reshape(shape=tuple(conv.bias.shape)) # Copy conv.bias = conv_biases # Load weighys for conv layers num_weights = numel(conv.weight) conv_weights = Tensor(weights[ptr:ptr+num_weights]) ptr += num_weights conv_weights = conv_weights.reshape(shape=tuple(conv.weight.shape)) conv.weight = conv_weights