Ejemplo n.º 1
0
    def __init__(self, input_dim=88, z_dim=100, emission_dim=100,
                 transition_dim=200, rnn_dim=600, num_layers=1, rnn_dropout_rate=0.0,
                 num_iafs=0, iaf_dim=50, use_cuda=False):
        super().__init__()
        # instantiate PyTorch modules used in the model and guide below
        self.emitter = Emitter(input_dim, z_dim, emission_dim)
        self.trans = GatedTransition(z_dim, transition_dim)
        self.combiner = Combiner(z_dim, rnn_dim)
        # dropout just takes effect on inner layers of rnn
        rnn_dropout_rate = 0. if num_layers == 1 else rnn_dropout_rate
        self.rnn = nn.RNN(input_size=input_dim, hidden_size=rnn_dim, nonlinearity='relu',
                          batch_first=True, bidirectional=False, num_layers=num_layers,
                          dropout=rnn_dropout_rate)

        # if we're using normalizing flows, instantiate those too
        self.iafs = [affine_autoregressive(z_dim, hidden_dims=[iaf_dim]) for _ in range(num_iafs)]
        self.iafs_modules = nn.ModuleList(self.iafs)

        # define a (trainable) parameters z_0 and z_q_0 that help define the probability
        # distributions p(z_1) and q(z_1)
        # (since for t = 1 there are no previous latents to condition on)
        self.z_0 = nn.Parameter(torch.zeros(z_dim))
        self.z_q_0 = nn.Parameter(torch.zeros(z_dim))
        # define a (trainable) parameter for the initial hidden state of the rnn
        self.h_0 = nn.Parameter(torch.zeros(1, 1, rnn_dim))

        self.use_cuda = use_cuda
        # if on gpu cuda-ize all PyTorch (sub)modules
        if use_cuda:
            self.cuda()
Ejemplo n.º 2
0
    def __init__(self, _c: "VAEConfig"):
        super().__init__()
        self._c = _c
        self.image_flatten_dim = _c.image_dim[0] * _c.image_dim[1]

        adam_params = {
            "lr": _c.init_lr,
            "betas": (0.96, 0.999),
            "clip_norm": 10.0,
            "lrd": 0.99996,
            "weight_decay": 2.0
        }
        self.optimizer = ClippedAdam(adam_params)

        self.emitter = Decoder(_c.z_dim,
                               _c.emitter_channel,
                               dropout_p=_c.dropout_rate)
        self.trans = GatedTransition(_c.z_dim, _c.transition_dim)
        self.combiner = Combiner(_c.z_dim, _c.rnn_dim)

        self.crnn = ConvRNN(_c.image_dim,
                            _c.rnn_dim,
                            _c.rnn_layers,
                            _c.dropout_rate,
                            use_lstm=_c.use_lstm,
                            channels=_c.crnn_channel)
        self.iafs = [
            affine_autoregressive(_c.z_dim, hidden_dims=[_c.iaf_dim])
            for _ in range(_c.num_iafs)
        ]
        self.iafs_modules = nn.ModuleList(self.iafs)

        self.z_0 = nn.Parameter(torch.zeros(_c.z_dim))
        self.z_q_0 = nn.Parameter(torch.zeros(_c.z_dim))
        self.h_0 = nn.Parameter(torch.zeros(1, 1, _c.rnn_dim))
        if _c.use_lstm:
            self.c_0 = nn.Parameter(torch.zeros(1, 1, _c.rnn_dim))
        self.cuda()
Ejemplo n.º 3
0
 def test_affine_autoregressive_shapes(self):
     for stable in [True, False]:
         for shape in [(3, ), (3, 4), (3, 4, 2)]:
             input_dim = shape[-1]
             self._test_shape(
                 shape, T.affine_autoregressive(input_dim, stable=stable))
Ejemplo n.º 4
0
 def test_affine_autoregressive_inverses(self):
     for stable in [True, False]:
         for input_dim in [2, 5, 10]:
             self._test_inverse(
                 input_dim, T.affine_autoregressive(input_dim,
                                                    stable=stable))
    def __init__(self,
                 input_channels=1,
                 z_channels=16,
                 emission_channels=[32, 16],
                 transition_channels=32,
                 flatten_channels=[16, 32],
                 rnn_input_dim=32,
                 rnn_channels=32,
                 kernel_size=3,
                 height=100,
                 width=100,
                 num_layers=1,
                 rnn_dropout_rate=0.0,
                 num_iafs=0,
                 iaf_dim=50,
                 use_cuda=False):
        super().__init__()
        self.input_channels = input_channels
        self.rnn_input_dim = rnn_input_dim
        self.height = height
        self.width = width

        # Call functions
        self.emitter = Emitter(width, height, input_channels, z_channels,
                               emission_channels, kernel_size)
        self.trans = GatedTransition(z_channels, transition_channels)
        self.combiner = Combiner(z_channels, rnn_channels)
        self.flatten = Flattener(width, height, input_channels, rnn_input_dim,
                                 flatten_channels, kernel_size)

        # Instantiate RNN
        if use_cuda:
            self.device = 'cuda'
        else:
            self.device = 'cpu'

        # Setup RNN
        rnn_dropout_rate = 0. if num_layers == 1 else rnn_dropout_rate
        self.rnn = nn.RNN(input_size=rnn_input_dim,
                          hidden_size=rnn_channels,
                          batch_first=True,
                          bidirectional=False,
                          num_layers=num_layers,
                          dropout=rnn_dropout_rate)

        # Normalizing flows, Inverse Autoregressive Flows
        self.iafs = [
            affine_autoregressive(z_channels, hidden_dims=[iaf_dim])
            for _ in range(num_iafs)
        ]
        self.iafs_modules = nn.ModuleList(self.iafs)

        # Initiate parameters z_0 and z_q_0 to build the probability
        # distributions p(z_1) and q(z_1)
        self.z_0 = nn.Parameter(torch.zeros(z_channels))
        self.z_q_0 = nn.Parameter(torch.zeros(z_channels))

        # Initial hidden state of the rnn
        self.h_0 = nn.Parameter(torch.zeros(1, 1, rnn_channels))

        # If we are on GPU
        self.use_cuda = use_cuda
        if use_cuda:
            self.cuda()
    def __init__(
        self,
        latent_dim,
        num_item,
        hidden_dim=16,
        ability_merge='mean',
        conditional_posterior=False,
        generative_model='irt',
        num_iafs=0,
        iaf_dim=32,
    ):
        super().__init__()

        self.latent_dim = latent_dim
        self.ability_dim = latent_dim
        self.response_dim = 1
        self.hidden_dim = hidden_dim
        self.num_item = num_item
        self.ability_merge = ability_merge
        self.conditional_posterior = conditional_posterior
        self.generative_model = generative_model
        self.num_iafs = num_iafs
        self.iaf_dim = iaf_dim

        self._set_item_feat_dim()
        self._set_irt_num()

        if self.num_iafs > 0:
            self.iafs = [
                affine_autoregressive(self.latent_dim,
                                      hidden_dims=[self.iaf_dim])
                for _ in range(self.num_iafs)
            ]
            self.iafs_modules = nn.ModuleList(self.iafs)

        if self.conditional_posterior:
            self.ability_encoder = ConditionalAbilityInferenceNetwork(
                self.ability_dim,
                self.response_dim,
                self.item_feat_dim,
                self.hidden_dim,
                ability_merge=self.ability_merge,
            )
        else:
            self.ability_encoder = AbilityInferenceNetwork(
                self.ability_dim,
                self.response_dim,
                self.hidden_dim,
                ability_merge=self.ability_merge,
            )

        self.item_encoder = ItemInferenceNetwork(self.num_item,
                                                 self.item_feat_dim)

        if self.generative_model == 'link':
            self.decoder = LinkedIRT(
                irt_model=f'{self.irt_num}pl',
                hidden_dim=self.hidden_dim,
            )
        elif self.generative_model == 'deep':
            self.decoder = DeepIRT(
                self.ability_dim,
                irt_model=f'{self.irt_num}pl',
                hidden_dim=self.hidden_dim,
            )
        elif self.generative_model == 'residual':
            self.decoder = ResidualIRT(
                self.ability_dim,
                irt_model=f'{self.irt_num}pl',
                hidden_dim=self.hidden_dim,
            )

        self.apply(self.weights_init)