def __init__(self, act_fn, combinator, neurons, normalize=None, init='random', alpha_dropout=None, hr_test=None): super(MIX, self).__init__() self.combinator = combinator # name of the combinator, e.g. "Linear" self.act_fn = act_fn # basic activation function to be used, e.g. "Tanh, Sigmoid" self.normalize = normalize # normalize alpha, e.g. with a Sigmoid self.neurons = neurons # number of neurons of the layer self.alpha_dropout = alpha_dropout # apply a dropout on alpha (only for MLP_ATT) self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.act_module = { 'relu': nn.ReLU(), # dictionary containing useful functions 'sigmoid': nn.Sigmoid(), 'tanh': nn.Tanh(), 'antirelu': Antirelu(), 'identity': Identity(), 'softmax': nn.Softmax(dim=-1) } self.hr_test = hr_test # TODO: assert hr_test != False implies combinator=='MLP_ATT_b' if combinator == 'Linear': # 3 different alpha initialization for the Linear combinator assert init in ['normal', 'uniform', 'random' ], "init must be 'normal','uniform','random'" if init == 'normal': # sample taken from a gaussian N(0,1) self.alpha = nn.Parameter(torch.randn(neurons, len(act_fn)), requires_grad=True) elif init == 'uniform': # same init for each alpha, equal to 1/(num of act_fn) self.alpha = nn.Parameter(torch.ones(neurons, len(act_fn)) / len(act_fn), requires_grad=True) elif init == 'random': # sample alpha in a uniform interval self.alpha = nn.Parameter(torch.FloatTensor( neurons, len(act_fn)).uniform_(-0.5, 0.5), requires_grad=True) elif combinator in MLP_list + ATT_list: # create a list of MLP self.MLP_list = nn.ModuleList([ MLP(combinator).to(self.device) for _ in range(neurons) ]).to(self.device) if combinator == 'MLP_ATT_b': self.beta = nn.Parameter(torch.FloatTensor(neurons).uniform_( -0.5, 0.5), requires_grad=True).to(self.device) elif combinator == 'Hybrid': self.MLP_list = nn.ModuleList([]) for i in range(neurons // 3): self.MLP_list.extend([self.act_module['relu']]) self.MLP_list.extend([self.act_module['relu']]) self.MLP_list.extend([MLP('MLP1')]) elif combinator == 'MLPr': # MLPr is a mix of MLP1, MLP2 self.MLP_list = nn.ModuleList([]) for i in range(neurons // 2): self.MLP_list.extend([MLP('MLP1')]) self.MLP_list.extend([MLP('MLP2')])
def _get_module(self): module_type = self.cfg.MODULE.TYPE if module_type == "GAP": module = GAP() elif module_type == "Identity": module = Identity() else: raise NotImplementedError return module
def __init__(self, num_steps, x_size, window_size, z_what_size, rnn_hidden_size, encoder_net=[], decoder_net=[], predict_net=[], embed_net=None, bl_predict_net=[], non_linearity='ReLU', decoder_output_bias=None, decoder_output_use_sigmoid=False, use_masking=True, use_baselines=True, baseline_scalar=None, scale_prior_mean=3.0, scale_prior_sd=0.1, pos_prior_mean=0.0, pos_prior_sd=1.0, likelihood_sd=0.3, use_cuda=False): super(AIR, self).__init__() self.num_steps = num_steps self.x_size = x_size self.window_size = window_size self.z_what_size = z_what_size self.rnn_hidden_size = rnn_hidden_size self.use_masking = use_masking self.use_baselines = use_baselines self.baseline_scalar = baseline_scalar self.likelihood_sd = likelihood_sd self.use_cuda = use_cuda prototype = torch.tensor(0.).cuda() if use_cuda else torch.tensor(0.) self.options = dict(dtype=prototype.dtype, device=prototype.device) self.z_pres_size = 1 self.z_where_size = 3 # By making these parameters they will be moved to the gpu # when necessary. (They are not registered with pyro for # optimization.) self.z_where_loc_prior = nn.Parameter(torch.FloatTensor( [scale_prior_mean, pos_prior_mean, pos_prior_mean]), requires_grad=False) self.z_where_scale_prior = nn.Parameter(torch.FloatTensor( [scale_prior_sd, pos_prior_sd, pos_prior_sd]), requires_grad=False) # Create nn modules. rnn_input_size = x_size**2 if embed_net is None else embed_net[-1] rnn_input_size += self.z_where_size + z_what_size + self.z_pres_size nl = getattr(nn, non_linearity) self.rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size) self.encode = Encoder(window_size**2, encoder_net, z_what_size, nl) self.decode = Decoder(window_size**2, decoder_net, z_what_size, decoder_output_bias, decoder_output_use_sigmoid, nl) self.predict = Predict(rnn_hidden_size, predict_net, self.z_pres_size, self.z_where_size, nl) self.embed = Identity() if embed_net is None else MLP( x_size**2, embed_net, nl, True) self.bl_rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size) self.bl_predict = MLP(rnn_hidden_size, bl_predict_net + [1], nl) self.bl_embed = Identity() if embed_net is None else MLP( x_size**2, embed_net, nl, True) # Create parameters. self.h_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.c_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.bl_h_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.bl_c_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.z_where_init = nn.Parameter(torch.zeros(1, self.z_where_size)) self.z_what_init = nn.Parameter(torch.zeros(1, self.z_what_size)) if use_cuda: self.cuda()
def __init__(self, num_steps, x_size, window_size, z_what_size, rnn_hidden_size, encoder_net=[], decoder_net=[], predict_net=[], embed_net=None, bl_predict_net=[], non_linearity='ReLU', decoder_output_bias=None, decoder_output_use_sigmoid=False, use_masking=True, use_baselines=True, baseline_scalar=None, fudge_z_pres=False, use_cuda=False): super(AIR, self).__init__() self.num_steps = num_steps self.x_size = x_size self.window_size = window_size self.z_what_size = z_what_size self.rnn_hidden_size = rnn_hidden_size self.use_masking = use_masking and not fudge_z_pres self.use_baselines = use_baselines and not fudge_z_pres self.baseline_scalar = baseline_scalar self.fudge_z_pres = fudge_z_pres self.use_cuda = use_cuda self.z_pres_size = 1 self.z_where_size = 3 # By making these parameters they will be moved to the gpu # when necessary. (They are not registered with pyro for # optimization.) self.z_where_mu_prior = nn.Parameter(torch.FloatTensor([3.0, 0, 0]), requires_grad=False) self.z_where_sigma_prior = nn.Parameter(torch.FloatTensor([0.1, 1, 1]), requires_grad=False) # Create nn modules. rnn_input_size = x_size**2 if embed_net is None else embed_net[-1] rnn_input_size += self.z_where_size + z_what_size + self.z_pres_size nl = getattr(nn, non_linearity) self.rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size) self.encode = Encoder(window_size**2, encoder_net, z_what_size, nl) self.decode = Decoder(window_size**2, decoder_net, z_what_size, decoder_output_bias, decoder_output_use_sigmoid, nl) self.predict = Predict(rnn_hidden_size, predict_net, self.z_pres_size, self.z_where_size, nl) self.embed = Identity() if embed_net is None else MLP( x_size**2, embed_net, nl, True) self.bl_rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size) self.bl_predict = MLP(rnn_hidden_size, bl_predict_net + [1], nl) self.bl_embed = Identity() if embed_net is None else MLP( x_size**2, embed_net, nl, True) # Create parameters. self.h_init = zeros(1, rnn_hidden_size) self.c_init = zeros(1, rnn_hidden_size) self.bl_h_init = zeros(1, rnn_hidden_size) self.bl_c_init = zeros(1, rnn_hidden_size) self.z_where_init = zeros(1, self.z_where_size) self.z_what_init = zeros(1, self.z_what_size) if use_cuda: self.cuda()