Beispiel #1
0
 def _setup_weights(self):
     """
     Initializes weight tensor with random values
     ties init and dc weights if specified
     :return: Null
     """
     torch.manual_seed(42)
     # setup init
     self.weight_tensors = ParameterList()
     self.tensor_tuple = ()
     self.feature_id = []
     self.W = None
     for featurizer in self.featurizers:
         self.feature_id.append(featurizer.id)
         if featurizer.id == 'SignalInit':
             if self.tie_init:
                 signals_W = Parameter(
                     torch.randn(1).expand(1, self.output_dim))
             else:
                 signals_W = Parameter(torch.randn(1, self.output_dim))
         elif featurizer.id == 'SignalDC':
             if self.tie_dc:
                 signals_W = Parameter(
                     torch.randn(featurizer.count,
                                 1).expand(-1, self.output_dim))
             else:
                 signals_W = Parameter(
                     torch.randn(featurizer.count, self.output_dim))
         else:
             signals_W = Parameter(
                 torch.randn(featurizer.count,
                             1).expand(-1, self.output_dim))
         self.weight_tensors.append(signals_W)
     return
Beispiel #2
0
 def __init__(self, env, feat_info, output_dim, bias=False):
     super(TiedLinear, self).__init__()
     self.env = env
     # Init parameters
     self.in_features = 0.0
     self.weight_list = ParameterList()
     if bias:
         self.bias_list = ParameterList()
     else:
         self.register_parameter('bias', None)
     self.output_dim = output_dim
     self.bias_flag = bias
     # Iterate over featurizer info list
     for feat_entry in feat_info:
         learnable = feat_entry.learnable
         feat_size = feat_entry.size
         init_weight = feat_entry.init_weight
         self.in_features += feat_size
         feat_weight = Parameter(init_weight * torch.ones(1, feat_size),
                                 requires_grad=learnable)
         if learnable:
             self.reset_parameters(feat_weight)
         self.weight_list.append(feat_weight)
         if bias:
             feat_bias = Parameter(torch.zeros(1, feat_size),
                                   requires_grad=learnable)
             if learnable:
                 self.reset_parameters(feat_bias)
             self.bias_list.append(feat_bias)
Beispiel #3
0
    def __init__(self):
        super().__init__()
        directional_actions = [
            Action.LEFT_ARC, Action.NO_LEFT_ARC, Action.RIGHT_ARC,
            Action.NO_RIGHT_ARC
        ]
        directional_and_shift = directional_actions + [Action.SHIFT]
        # 如果上一步是方向的,下一步可以继续判断,或者转移
        for action in directional_actions:
            self.valid_actions[action] = directional_and_shift

        common_actions = [
            Action.PRED_GEN, Action.NO_PRED, Action.NO_LEFT_ARC,
            Action.NO_RIGHT_ARC
        ]
        # 如果上一步是NO-PRED,则按左右栈是否空来给定
        # 0. 左右都有
        self.valid_actions[(False, False)] = common_actions
        # 1. 左空右不,
        self.valid_actions[(True, False)] = [Action.RIGHT_ARC] + common_actions
        # 2. 左不右空
        self.valid_actions[(False, True)] = [Action.LEFT_ARC] + common_actions
        # 3. 左右皆空
        self.valid_actions[(True, True)] = [Action.SHIFT]

        self.masks = ParameterList()
        self.key_to_id = dict()
        for k, v in self.valid_actions.items():
            values = set(a.value for a in v)
            self.key_to_id[k] = len(self.masks)
            self.masks.append(
                Parameter(torch.tensor(
                    [1 if i in values else 0 for i in range(len(Action))]),
                          requires_grad=False))
Beispiel #4
0
 def __init__(self,
              in_size,
              out_size,
              in_rank,
              out_rank,
              alpha=1,
              beta=0.1,
              c=1e-3,
              **kwargs):
     super(tensorizedlinear, self).__init__()
     self.in_size = list(in_size)
     self.out_size = list(out_size)
     self.in_rank = list(in_rank)
     self.out_rank = list(out_rank)
     self.factors_in = ParameterList([
         Parameter(torch.Tensor(r, s)) for (r, s) in zip(in_rank, in_size)
     ])
     self.factors_out = ParameterList([
         Parameter(torch.Tensor(s, r))
         for (r, s) in zip(out_rank, out_size)
     ])
     self.core = Parameter(torch.Tensor(np.prod(out_rank),
                                        np.prod(in_rank)))
     self.bias = Parameter(torch.Tensor(np.prod(out_size)))
     self.lamb_in = ParameterList(
         [Parameter(torch.ones(r)) for r in in_rank])
     self.lamb_out = ParameterList(
         [Parameter(torch.ones(r)) for r in out_rank])
     self.alpha = Parameter(torch.tensor(alpha), requires_grad=False)
     self.beta = Parameter(torch.tensor(beta), requires_grad=False)
     self.c = Parameter(torch.tensor(c), requires_grad=False)
     self._initialize_weights()
Beispiel #5
0
    def __init__(self, base_model, h_dim, out_dim=1, device='cpu', seed=None):
        '''
        :Parameters:
        base_model: torch.nn.Module: task-agnostic model
        h_dim: int: dimension of base_model output
        out_dim: output dimension of task-specific tensor \omega
        (dimension of loss_function input)
        '''
        super().__init__()
        self.base = base_model
        self.h_dim = h_dim
        self.out_dim = out_dim
        self.device = device
        #replay buffers for the previous tasks (includes torch.utils.data.Subsets)
        self.tasks_replay_buffers = []
        #task-specific tensors (which applied to base model outputs)
        self.tasks_omegas = ParameterList()

        if out_dim == 1:
            # computes loss
            self.loss_func = nn.BCEWithLogitsLoss()

            # predicts distribution over the classes
            def pred_func(input):
                pred = F.sigmoid(input)
                return torch.stack([1. - pred, pred], dim=-1).squeeze()

            self.pred_func = pred_func

        if out_dim > 1:
            self.loss_func = nn.CrossEntropyLoss()
            self.pred_func = nn.Softmax()

        self.torch_gen = create_torch_random_gen(seed)
        self.to(self.device)
Beispiel #6
0
    def __init__(self,
                 size,
                 rank,
                 alpha=1,
                 beta=0.2,
                 c=1,
                 d=1e6,
                 e=1,
                 init='unif'):
        super(bftucker, self).__init__()
        self.size = size
        self.rank = rank
        self.dim = len(size)

        self.tau = Parameter(torch.tensor(1.0))
        self.alpha = Parameter(torch.tensor(alpha), requires_grad=False)
        self.beta = Parameter(torch.tensor(beta), requires_grad=False)
        self.c = Parameter(torch.tensor(c), requires_grad=False)
        self.d = Parameter(torch.tensor(d), requires_grad=False)
        self.e = Parameter(torch.tensor(e), requires_grad=False)
        #        self.lamb = torch.Tensor(self.rank)

        self.lamb = ParameterList([Parameter(torch.Tensor(r)) for r in rank])
        self.factors = ParameterList(
            [Parameter(torch.Tensor(s, r)) for (s, r) in zip(size, rank)])
        self.core = Parameter(torch.zeros(rank))
        self.reset_parameters(init)
Beispiel #7
0
 def __init__(self, F, l_h, l_a, C, l_keep_prob = None):
   super(FFNN, self).__init__()
      
   sizes = [F] + l_h + [C]
   self.Ws = ParameterList([Parameter(torch.randn(sizes[i], sizes[i+1])) for i in range(len(sizes)-1)])
   self.bs = ParameterList([Parameter(torch.zeros(h)) for h in sizes[1:]])
   self.fs = l_a
   self.dropout_prob = l_keep_prob if l_keep_prob != None else [1 for _ in range(len(l_a) +1)]
Beispiel #8
0
    def __init__(self, args_dict):
        super(TwoTwoNet, self).__init__()
        self.is_W_parametrized = False
        self.is_dale_constrained = False
        for k, v in args_dict.items():
            setattr(self, k, v)
        assert self.n_channels == 1
        if len(self.saturations) > 2:
            logging.error(
                'ManyChannelsIntegrator.saturations should be [low, high], not {}'
                .format(saturations))
        std = 1. / sqrt(self.n)
        self.encoders = ParameterList([
            Parameter(tch.zeros(self.n).normal_(0, std), requires_grad=False)
            for _ in range(self.n_channels)
        ])
        self.decoders = ParameterList([
            Parameter(tch.zeros(self.n).normal_(0, std), requires_grad=False)
            for _ in range(self.n_channels)
        ])
        if self.init_vectors_type == 'random':
            pass
        elif self.init_vectors_type == 'orthonormal':
            logging.info('Orthogonalizing encoders and decoders')
            plop = tch.zeros(self.n, 2 * self.n_channels)
            for idx, item in enumerate(self.encoders):
                plop[:, idx] = item.data
            for idx, item in enumerate(self.decoders):
                plop[:, len(self.encoders) + idx] = item.data
            plop = orth(plop)
            for idx, item in enumerate(self.encoders):
                item.data = plop[:, idx]
            for idx, item in enumerate(self.decoders):
                item.data = plop[:, len(self.encoders) + idx]

        self.encoders[0].data = self.encoders[0].data / tch.sqrt(
            (self.encoders[0].data**2).sum())
        self.decoders[0].data = self.decoders[0].data / tch.sqrt(
            (self.decoders[0].data**2).sum())
        # Align the encoder / decoder
        self.decoders[0].data = (
            (1. - self.init_vectors_overlap) * self.decoders[0].data +
            self.init_vectors_overlap * self.encoders[0].data)
        # Rescale the io vectors
        self.decoders[0].data = self.init_vectors_scales[0] * self.decoders[
            0].data / tch.sqrt((self.decoders[0].data**2).sum())
        self.encoders[
            0].data = self.encoders[0].data * self.init_vectors_scales[1]

        self.w = Parameter(tch.zeros(2, 2).normal_(0, std), requires_grad=True)
        eigs, _ = tch.eig(self.w, eigenvectors=False)
        spectral_rad = tch.sqrt((eigs**2).sum(dim=1).max()).item()
        assert spectral_rad != 0
        self.w.data = self.init_radius * self.w.data / spectral_rad
        self.device = tch.device(self.device_name)
        self.to(self.device)
        os.makedirs(self.save_folder, exist_ok=True)
        self.compute_relevant_quantities()
Beispiel #9
0
    def __init__(self, F, l_h, l_a, C):
        super(FFNN, self).__init__()

        sizes = [F] + l_h + [C]
        self.Ws = ParameterList([
            Parameter(torch.randn(sizes[i], sizes[i + 1]))
            for i in range(len(sizes) - 1)
        ])
        self.bs = ParameterList([Parameter(torch.zeros(h)) for h in sizes[1:]])
        self.fs = l_a
class HM_color(nn.Module):
    def __init__(self, layers=None):
        super(HM_color, self).__init__()

        if layers is None:
            layers = [38804, 2048, 128, 32]

        self.rgb_models = [
            HM_bw(layers),
            HM_bw(layers),
            HM_bw(layers),
        ]

        self.params = ParameterList()
        for model in self.rgb_models:
            self.params.extend(model.parameters())

    def forward(self, x):
        """
        x must have shape N x C x H x W
        """
        x = torch.round(x)
        flat_dim = x.shape[-1] * x.shape[-2]
        color_layers = [x[:, i].reshape(-1, flat_dim) for i in range(3)]
        outputs = [
            model.forward(layer)
            for model, layer in zip(self.rgb_models, color_layers)
        ]
        return outputs

    def loss_function(self, *fwd_outputs):
        losses = [
            model.loss_function(*output)
            for model, output in zip(self.rgb_models, fwd_outputs)
        ]
        return sum(losses)

    def sample(self, num_samples):
        fake_x = torch.zeros(num_samples)
        fantasies = [
            model.run_sleep(fake_x)[2][-1] for model in self.rgb_models
        ]
        return torch.stack(fantasies, dim=-1)

    def reconstruct(self, x):
        x = torch.round(x)
        flat_dim = x.shape[-1] * x.shape[-2]
        color_layers = [x[:, i].reshape(-1, flat_dim) for i in range(3)]
        images = [
            model.reconstruct(layer)
            for model, layer in zip(self.rgb_models, color_layers)
        ]

        return torch.stack(images, dim=-1)
Beispiel #11
0
    def __init__(self, dim: int, n_components: int) -> None:
        super(DMM, self).__init__()
        self._dim = dim
        self._n_components = n_components

        mixture_logits = torch.zeros((n_components, ), dtype=torch.float)
        self.mixture_logits = Parameter(mixture_logits)

        self.log_alphas = ParameterList()
        for _ in range(n_components):
            log_alpha = Parameter(torch.randn(dim, dtype=torch.float) / 3)
            self.log_alphas.append(log_alpha)
Beispiel #12
0
    def __init__(self,
                 metadata: Metadata,
                 min_embedding_size: int = 2,
                 max_embedding_size: int = 50) -> None:
        super(MultiInputLayer, self).__init__()

        self.metadata = metadata

        self.has_categorical = False
        self.output_size = 0

        # our embeddings need to be referenced like this to be considered in the parameters of this model
        self.embeddings = ParameterList()
        # this reference is for using the embeddings during the forward pass
        self.embedding_by_variable = {}

        for i, variable_metadata in enumerate(
                self.metadata.get_by_independent_variable()):
            # if it is a numerical variable
            if variable_metadata.is_binary() or variable_metadata.is_numerical(
            ):
                assert variable_metadata.get_size() == 1
                self.output_size += 1

            # if it is a categorical variable
            elif variable_metadata.is_categorical():
                variable_size = variable_metadata.get_size()

                # this is an arbitrary rule of thumb taken from several blog posts
                embedding_size = compute_embedding_size(
                    variable_size, min_embedding_size, max_embedding_size)

                # the embedding is implemented manually to be able to use one hot encoding
                # PyTorch embedding only accepts as input label encoding
                embedding = Parameter(data=torch.Tensor(
                    variable_size, embedding_size).normal_(),
                                      requires_grad=True)

                self.embeddings.append(embedding)
                self.embedding_by_variable[
                    variable_metadata.get_name()] = embedding

                self.output_size += embedding_size
                self.has_categorical = True

            # if it is another type
            else:
                raise Exception(
                    "Unexpected variable type '{}' for variable '{}'.".format(
                        variable_metadata.get_type(),
                        variable_metadata.get_name()))
Beispiel #13
0
    def __init__(self, tensor, gradient_update="S", rank=10):
        super().__init__()

        self.tensor = tensor
        self.num_train = len(tensor.train_vals)
        self.dims = tensor.dims
        self.ndim = len(self.dims)
        self.rank = rank
        self.datatype = tensor.datatype
        self.gradient_update = gradient_update

        self.means = ModuleList()
        self.chols = ModuleList()

        for dim, ncol in enumerate(self.dims):
            mean_list = ParameterList()
            cov_list  = ParameterList()
            for _ in range(ncol):
                mean_list.append(Parameter(torch.randn(rank), requires_grad=True))
                cov_list.append(Parameter(torch.ones(rank) + 1/4 * torch.randn(rank), requires_grad=True))

            self.means.append(mean_list)
            self.chols.append(cov_list)

        self.standard_multi_normal = MultivariateNormal(torch.zeros(rank), torch.eye(rank))
        self.sigma = 1
        self.batch_size = 64
        self.lambd = 1/self.batch_size
        self.round_robins_indices = [0 for _ in self.dims]
        self.k1 = 128
    def __init__(self, layers=None):
        super(HM_color, self).__init__()

        if layers is None:
            layers = [38804, 2048, 128, 32]

        self.rgb_models = [
            HM_bw(layers),
            HM_bw(layers),
            HM_bw(layers),
        ]

        self.params = ParameterList()
        for model in self.rgb_models:
            self.params.extend(model.parameters())
Beispiel #15
0
    def __init__(self, layers=None, scale=.1, p=None, lr=.1, lam=None):
        super().__init__()
        if layers is None:
            layers = [2, 100, 2]
        self.weights = ParameterList([
            Parameter(scale * torch.randn(m, n))
            for m, n in zip(layers[:-1], layers[1:])
        ])
        self.biases = ParameterList(
            [Parameter(scale * torch.randn(n)) for n in layers[1:]])

        self.p = p
        self.lr = lr
        self.lam = lam
        self.train = False
Beispiel #16
0
    def __init__(
        self,
        mixture_size: int,
        do_layer_norm: bool = False,
        initial_scalar_parameters: List[float] = None,
        trainable: bool = True,
    ) -> None:
        super().__init__()
        self.mixture_size = mixture_size
        self.do_layer_norm = do_layer_norm

        if initial_scalar_parameters is None:
            initial_scalar_parameters = [0.0] * mixture_size
        elif len(initial_scalar_parameters) != mixture_size:
            raise ConfigurationError(
                "Length of initial_scalar_parameters {} differs "
                "from mixture_size {}".format(initial_scalar_parameters,
                                              mixture_size))

        self.scalar_parameters = ParameterList([
            Parameter(torch.FloatTensor([initial_scalar_parameters[i]]),
                      requires_grad=trainable) for i in range(mixture_size)
        ])
        self.gamma = Parameter(torch.FloatTensor([1.0]),
                               requires_grad=trainable)
    def __init__(
        self,
        mixture_size: int,
        do_layer_norm: bool = False,
        initial_scalar_parameters: List[float] = None,
        trainable: bool = True,
    ) -> None:
        super().__init__()
        self.mixture_size = mixture_size
        self.do_layer_norm = do_layer_norm

        if initial_scalar_parameters is None:
            initial_scalar_parameters = [0.0] * mixture_size
        elif len(initial_scalar_parameters) != mixture_size:
            raise ValueError(
                f"Length of `initial_scalar_parameters` {initial_scalar_parameters} differs "
                f"from `mixture_size` {mixture_size}")

        self.scalar_parameters = ParameterList([
            Parameter(
                torch.FloatTensor([initial_scalar_parameters[i]]),
                requires_grad=trainable,
            ) for i in range(mixture_size)
        ])
        self.gamma = Parameter(torch.FloatTensor([1.0]),
                               requires_grad=trainable)
Beispiel #18
0
    def __init__(self, mixture_size: int, trainable: bool = False) -> None:
        """
        Inits scalar mix implementation.
        ``mixture = gamma * sum(s_k * tensor_k)`` where ``s = softmax(w)``, with ``w`` and ``gamma`` scalar parameters.
        :param mixture_size: size of mixtures (usually the number of layers)
        """
        super(ScalarMix, self).__init__()
        self.mixture_size = mixture_size

        initial_scalar_parameters = [0.0] * mixture_size

        self.scalar_parameters = ParameterList([
            Parameter(
                torch.tensor(
                    [initial_scalar_parameters[i]],
                    dtype=torch.float,
                    device=flair.device,
                ),
                requires_grad=trainable,
            ) for i in range(mixture_size)
        ])
        self.gamma = Parameter(torch.tensor(
            [1.0],
            dtype=torch.float,
            device=flair.device,
        ),
                               requires_grad=trainable)
    def __init__(self, args):
        #Assert module specifications are consistent.
        if not hasattr(self, 'type_modules'): self.type_modules = []
        if self.type_modules != []:
            pass
        elif type(args.type_modules) == type(''):
            self.type_modules = args.type_modules.split(',')
        else:
            assert type(args.type_modules) == type([])
            assert type(args.type_modules[0]) == type('a')
            self.type_modules = args.type_modules
        if type(args.num_modules) == type(''):
            self.num_modules = list(map(int, args.num_modules.split(',')))
        else:
            assert type(args.num_modules) == type([])
            assert type(args.num_modules[0]) == type(1)
            self.num_modules = args.num_modules
        self.num_types = len(self.num_modules)
        assert len(self.type_modules) == self.num_types, (str(self.type_modules) + \
            ' should have '+str(self.num_types)+' elts.')
        self.tot_modules = sum(self.num_modules)

        self.usage_normalization = 1e-9
        self.has_global_variable = False
        self.StructureParameters = ParameterList()
Beispiel #20
0
    def __init__(self, out_cls=10):
        # these are two useless prior
        super(Hbnn, self).__init__()
        self.prior_v = 100
        self.prior_tau_0_reciprocal = 1000
        self.num_net = 0
        self.out_cls = out_cls
        self.w0 = Net(out_cls)  # this is the network of w0
        self.hbnn = ModuleList()  # this is the network of all the classes
        self.mu_gamma_g = ParameterList()
        self.sigma_gamma_g = ParameterList()
        self.mu_gamma = Parameter(torch.ones(1))
        self.sigma_gamma = Parameter(torch.ones(1))

        if torch.cuda.is_available():
            self.w0 = self.w0.cuda()
Beispiel #21
0
def _create_candecomp_cores_unconstrained(tensor_modes, order):
    list_cores = []
    modes = tensor_modes
    for mm in modes:
        list_cores.append(Parameter(torch.Tensor(mm, order).zero_()))
    list_cores = ParameterList(list_cores)
    return list_cores
Beispiel #22
0
    def __init__(self,
                 tau_in,
                 tau_out,
                 weight_init='randn',
                 real=False,
                 gain=1,
                 device=torch.device('cpu'),
                 dtype=torch.float):
        super(MixRepsScalar, self).__init__()

        # Remove extra tailing zeros in input/output type
        while not tau_in[-1]:
            tau_in.pop()

        if type(tau_out) is int:
            tau_out = [tau_out] * len(tau_in)
        else:
            while not tau_out[-1]:
                tau_out.pop()

        self.tau_in = list(tau_in)
        self.tau_out = list(tau_out)

        self.real = real
        self.cat_dim = -1 if real else -2

        weights = init_mix_reps_weights(tau_in,
                                        tau_out,
                                        weight_init,
                                        real=real,
                                        gain=gain,
                                        device=device,
                                        dtype=dtype)
        self.weights = ParameterList([Parameter(weight) for weight in weights])
Beispiel #23
0
    def __init__(self,
                 mixture_size: int,
                 do_layer_norm: bool = False,
                 initial_scalar_parameters: List[float] = None,
                 trainable: bool = True,
                 dropout: float = None,
                 dropout_value: float = -1e20) -> None:
        super(ScalarMixWithDropout, self).__init__()
        self.mixture_size = mixture_size
        self.do_layer_norm = do_layer_norm
        self.dropout = dropout

        if initial_scalar_parameters is None:
            initial_scalar_parameters = [0.0] * mixture_size
        elif len(initial_scalar_parameters) != mixture_size:
            raise ConfigurationError(
                "Length of initial_scalar_parameters {} differs "
                "from mixture_size {}".format(initial_scalar_parameters,
                                              mixture_size))

        self.scalar_parameters = ParameterList([
            Parameter(torch.FloatTensor([initial_scalar_parameters[i]]),
                      requires_grad=trainable) for i in range(mixture_size)
        ])
        self.gamma = Parameter(torch.FloatTensor([1.0]),
                               requires_grad=trainable)

        if self.dropout:
            dropout_mask = torch.zeros(len(self.scalar_parameters))
            dropout_fill = torch.empty(len(
                self.scalar_parameters)).fill_(dropout_value)
            self.register_buffer("dropout_mask", dropout_mask)
            self.register_buffer("dropout_fill", dropout_fill)
Beispiel #24
0
    def __init__(
        self,
        mixture_size: int,
        do_layer_norm: bool = False,
        initial_scalar_parameters: Optional[List[float]] = None,
        trainable: bool = True,
    ) -> None:
        super().__init__()
        self.mixture_size = mixture_size
        self.do_layer_norm = do_layer_norm

        if initial_scalar_parameters is None:
            initial_scalar_parameters = [1.0 / mixture_size] * mixture_size
        elif len(initial_scalar_parameters) != mixture_size:
            raise ValueError(
                "initial_scalar_parameters & mixture_size not match.")

        self.scalar_parameters = ParameterList([
            Parameter(
                torch.FloatTensor([val]),
                requires_grad=trainable,
            ) for val in initial_scalar_parameters
        ])
        self.gamma = Parameter(torch.FloatTensor([1.0]),
                               requires_grad=trainable)
Beispiel #25
0
    def __init__(self, F, l_h, l_a, C, params=None):
        super(FFNN, self).__init__()

        sizes = [F] + l_h + [C]
        self.Ws = ParameterList([
            Parameter(torch.randn(sizes[i], sizes[i + 1]))
            for i in range(len(sizes) - 1)
        ])
        self.bs = ParameterList([Parameter(torch.zeros(h)) for h in sizes[1:]])
        self.fs = l_a
        if params is None:
            self.params = [None for _ in l_a]
        else:
            self.params = [
                Parameter(torch.tensor(p)) if p else None for p in params
            ]
        self.params_list = ParameterList([p for p in self.params if p])
 def reset_layer_num(self):
     num_elmo_layers = self._elmo._elmo_lstm.num_layers
     scalar_mix_parameters = [
         Parameter(torch.FloatTensor([0.0])) for i in range(num_elmo_layers)
     ]
     scalar_mix_parameters = ParameterList(scalar_mix_parameters)
     self._elmo._scalar_mixes[0].scalar_parameters = scalar_mix_parameters
     self._elmo._scalar_mixes[0].gamma = Parameter(torch.FloatTensor([1.0]))
Beispiel #27
0
    def __init__(self, mixture_size: int, do_layer_norm: bool = False) -> None:
        super(ScalarMix, self).__init__()

        self.mixture_size = mixture_size
        self.do_layer_norm = do_layer_norm

        self.scalar_parameters = ParameterList(
            [Parameter(torch.FloatTensor([0.0])) for _ in range(mixture_size)])
        self.gamma = Parameter(torch.FloatTensor([1.0]))
Beispiel #28
0
def _create_candecomp_cores(in_modes, out_modes, order):
    assert len(in_modes) == len(out_modes)
    assert order > 0
    list_cores = []
    modes = in_modes + out_modes  # extend list
    for mm in modes:
        list_cores.append(Parameter(torch.Tensor(mm, order).zero_()))
    list_cores = ParameterList(list_cores)
    return list_cores
Beispiel #29
0
def _create_tucker_params(in_modes, out_modes, ranks) :
    assert len(in_modes) == len(out_modes) == len(ranks)
    modes = in_modes + out_modes # extend list
    core = Parameter(torch.Tensor(*list(ranks+ranks)).normal_())
    factors = []
    for mm, rr in zip(modes, ranks+ranks) :
        factors.append(Parameter(torch.Tensor(mm, rr).normal_()))
    factors = ParameterList(factors)
    return core, factors
 def __init__(self, num_tensors, trainable=True):
     super(ScalarMix, self).__init__()
     self.num_tensors = num_tensors
     self.scalar_parameters = ParameterList([
         Parameter(torch.FloatTensor([0.0]), requires_grad=trainable)
         for _ in range(num_tensors)
     ])
     self.gamma = Parameter(torch.FloatTensor([1.0]),
                            requires_grad=trainable)