Beispiel #1
0
    def forward(self, x):
        bs = x.shape[0]
        xnp = x.cpu().data.astype(np.int32)
        onehot = np.zeros((bs, x.shape[1], self.maxlen + self.syms),
                          dtype=np.float32)
        for i in range(x.shape[1]):
            onehot[range(bs), i, i] = 1
            onehot[range(bs), i, self.maxlen + xnp[:, i]] = 1
        onehot = onehot.reshape(bs * x.shape[1], self.maxlen + self.syms)

        x = Tensor(onehot, device=x.device).dot(
            self.embed).reshape(shape=(bs, x.shape[1], -1))
        x = x.sequential(self.tbs)
        x = x.reshape(shape=(-1, x.shape[-1])).dot(self.final).logsoftmax()
        return x.reshape(shape=(bs, -1, x.shape[-1]))
Beispiel #2
0
  def load_weights(self, url):
    weights = fetch(url)
    # First 5 values (major, minor, subversion, Images seen)
    header = np.frombuffer(weights, dtype=np.int32, count = 5)
    self.seen = header[3]

    def numel(tensor):
      from functools import reduce
      return reduce(lambda x, y: x*y, tensor.shape)

    weights = np.frombuffer(weights, dtype=np.float32)
    weights = weights[5:]

    ptr = 0
    for i in range(len(self.module_list)):
      module_type = self.blocks[i + 1]["type"]

      if module_type == "convolutional":
        model = self.module_list[i]
        try: # we have batchnorm, load conv weights without biases, and batchnorm values
          batch_normalize = int(self.blocks[i + 1]["batch_normalize"])
        except: # no batchnorm, load conv weights + biases
          batch_normalize = 0
        
        conv = model[0]

        if (batch_normalize):
          bn = model[1]

          # Get the number of weights of batchnorm
          num_bn_biases = numel(bn.bias)

          # Load weights
          bn_biases = Tensor(weights[ptr:ptr + num_bn_biases])
          ptr += num_bn_biases

          bn_weights = Tensor(weights[ptr:ptr+num_bn_biases])
          ptr += num_bn_biases

          bn_running_mean = Tensor(weights[ptr:ptr+num_bn_biases])
          ptr += num_bn_biases

          bn_running_var = Tensor(weights[ptr:ptr+num_bn_biases])
          ptr += num_bn_biases

          # Cast the loaded weights into dims of model weights
          bn_biases = bn_biases.reshape(shape=tuple(bn.bias.shape))
          bn_weights = bn_weights.reshape(shape=tuple(bn.weight.shape))
          bn_running_mean = bn_running_mean.reshape(shape=tuple(bn.running_mean.shape))
          bn_running_var = bn_running_var.reshape(shape=tuple(bn.running_var.shape))

          # Copy data
          bn.bias = bn_biases
          bn.weight = bn_weights
          bn.running_mean = bn_running_mean
          bn.running_var = bn_running_var
        else:
          # load biases of the conv layer
          num_biases = numel(conv.bias)

          # Load wieghts
          conv_biases = Tensor(weights[ptr: ptr+num_biases])
          ptr += num_biases

          # Reshape
          conv_biases = conv_biases.reshape(shape=tuple(conv.bias.shape))

          # Copy
          conv.bias = conv_biases
        
        # Load weighys for conv layers
        num_weights = numel(conv.weight)

        conv_weights = Tensor(weights[ptr:ptr+num_weights])
        ptr += num_weights

        conv_weights = conv_weights.reshape(shape=tuple(conv.weight.shape))
        conv.weight = conv_weights