Exemple #1
0
def create_initializer(init_type, scale=None, fillvalue=None):
    if init_type == 'identity':
        return initializers.Identity() if scale is None else initializers.Identity(scale=scale)
    if init_type == 'constant':
        return initializers.Constant(fillvalue)
    if init_type == 'zero':
        return initializers.Zero()
    if init_type == 'one':
        return initializers.One()
    if init_type == 'normal':
        return initializers.Normal() if scale is None else initializers.Normal(scale)
    if init_type == 'glorotNormal':
        return initializers.GlorotNormal() if scale is None else initializers.GlorotNormal(scale)
    if init_type == 'heNormal':
        return initializers.HeNormal() if scale is None else initializers.HeNormal(scale)
    if init_type == 'orthogonal':
        return initializers.Orthogonal(
            scale) if scale is None else initializers.Orthogonal(scale)
    if init_type == 'uniform':
        return initializers.Uniform(
            scale) if scale is None else initializers.Uniform(scale)
    if init_type == 'leCunUniform':
        return initializers.LeCunUniform(
            scale) if scale is None else initializers.LeCunUniform(scale)
    if init_type == 'glorotUniform':
        return initializers.GlorotUniform(
            scale) if scale is None else initializers.GlorotUniform(scale)
    if init_type == 'heUniform':
        return initializers.HeUniform(
            scale) if scale is None else initializers.HeUniform(scale)
    raise ValueError("Unknown initializer type: {0}".format(init_type))
 def check_orthogonality(self, w):
     initializer = initializers.Orthogonal(scale=2.0)
     initializer(w)
     n = 1 if w.ndim == 0 else len(w)
     w = w.astype(numpy.float64).reshape(n, -1)
     dots = w.dot(w.T)
     testing.assert_allclose(dots,
                             numpy.identity(n) * 4, **self.check_options)
Exemple #3
0
    def __init__(self,
                 in_capsules,
                 out_capsules,
                 ksize,
                 stride=1,
                 pad=0,
                 nobias=False,
                 initialW=None,
                 initial_bias=None):
        super().__init__()

        pi, n_pi = in_capsules
        rho, n_rho = out_capsules
        pi_dim = pi.dim
        rho_dim = rho.dim

        pi = group.induced_representation(pi, ksize)
        basis = group.intertwiner_basis(pi, rho)
        basis = basis.astype(chainer.config.dtype)
        hom_dim = basis.shape[1]

        basis_randomizer = np.empty((hom_dim, hom_dim), dtype=basis.dtype)
        basis_scale = math.sqrt(basis.shape[0] / hom_dim)
        initializers.Orthogonal(scale=basis_scale)(basis_randomizer)
        basis = np.matmul(basis, basis_randomizer)

        basis = basis.reshape((rho_dim, pi_dim * ksize * ksize, hom_dim))
        self.add_persistent('basis', basis)

        if n_pi is None:
            in_channels = None
        else:
            in_channels = n_pi * pi_dim
        out_channels = n_rho * rho_dim

        self.ksize = ksize
        self.stride = stride
        self.pad = pad
        self.in_channels = in_channels
        self.out_channels = out_channels

        with self.init_scope():
            W_initializer = initializers._get_initializer(initialW)
            self.W = variable.Parameter(W_initializer)
            if in_channels is not None:
                self._initialize_params(in_channels)
            if nobias:
                self.b = None
            else:
                if initial_bias is None:
                    initial_bias = 0
                bias_initializer = initializers._get_initializer(initial_bias)
                self.b = variable.Parameter(bias_initializer, out_channels)
 def setUp(self):
     self.w = numpy.empty(self.shape, dtype=numpy.float32)
     self.initializer = initializers.Orthogonal(scale=1.0)
 def setUp(self):
     self.w = numpy.empty(0, dtype=numpy.float32)
     self.initializer = initializers.Orthogonal()
 def check_shaped_initializer(self, xp):
     initializer = initializers.Orthogonal(scale=2.0, dtype=self.dtype)
     w = initializers.generate_array(initializer, self.shape, xp)
     self.assertIs(cuda.get_array_module(w), xp)
     self.assertTupleEqual(w.shape, self.shape)
     self.assertEqual(w.dtype, self.dtype)
 def check_initializer(self, w):
     initializer = initializers.Orthogonal(scale=2.0)
     initializer(w)
     self.assertTupleEqual(w.shape, self.shape)
     self.assertEqual(w.dtype, self.dtype)
    def __init__(self,
                 vocab,
                 essay_info_dict,
                 para_info_dict,
                 max_n_spans_para,
                 max_n_paras,
                 max_n_tokens,
                 settings,
                 baseline_heuristic=False,
                 use_elmo=True,
                 decoder="proposed"):

        ##########################
        # set default attributes #
        ##########################
        self.vocab = vocab
        self.essay_info_dict = essay_info_dict
        self.para_info_dict = para_info_dict
        self.encVocabSize = len(vocab)
        self.eDim = settings.eDim
        self.hDim = settings.hDim
        self.dropout = settings.dropout
        self.dropout_lstm = settings.dropout_lstm
        self.dropout_embedding = settings.dropout_embedding
        self.max_n_para = max_n_paras
        self.max_n_spans = max_n_spans_para
        self.max_n_tokens = max_n_tokens
        self.decoder = decoder

        self.args = settings

        ###############
        # Select LSTM #
        ###############
        self.lstm_ac = settings.lstm_ac
        self.lstm_shell = settings.lstm_shell
        self.lstm_ac_shell = settings.lstm_ac_shell
        self.lstm_type = settings.lstm_type

        #######################
        # position information #
        #######################
        self.position_info_size = self.max_n_spans * 3
        self.relative_position_info_size = 21

        ################
        # elmo setting #
        ################
        self.use_elmo = use_elmo
        if self.use_elmo:
            self.eDim = 1024

        ##########
        # others #
        ##########
        self.baseline_heuristic = baseline_heuristic

        ##############################
        # hidden representation size #
        ##############################
        self.lstm_out = self.hDim * 2

        if self.use_elmo:
            self.bow_feature_size = len(self.vocab)
        else:
            self.bow_feature_size = len(self.vocab) + 3 * self.eDim

        self.bow_rep_size = self.lstm_out

        # the size of representation created with LSTM-minus
        self.span_rep_size = self.lstm_out * 2

        # output of AC layer
        if self.lstm_ac:
            self.ac_rep_size = self.lstm_out
        else:
            self.ac_rep_size = self.span_rep_size

        # output of AM layer
        if self.lstm_shell:
            self.shell_rep_size = self.lstm_out
        else:
            self.shell_rep_size = self.span_rep_size

        # the size of ADU representation
        self.ac_shell_rep_size_in = self.ac_rep_size +\
            self.shell_rep_size + self.position_info_size + self.bow_rep_size

        # output of ADU layer
        if self.lstm_ac_shell:
            self.ac_shell_rep_size_out = self.lstm_out
        else:
            self.ac_shell_rep_size_out = self.ac_shell_rep_size_in

        # output of Encoder (ADU-level)
        self.reps_for_type_classification = self.ac_shell_rep_size_out

        # the size of ADU representations for link identification
        if self.lstm_type:
            self.type_rep_size = self.lstm_out
        else:
            self.type_rep_size = self.ac_shell_rep_size_out

        # the size of ADU pair representation
        self.span_pair_size = self.type_rep_size * 3 + self.relative_position_info_size

        n_ac_shell_latm_layers = 1

        super(BaseArgStrParser, self).__init__()

        with self.init_scope():
            self.Embed_x = chaLink.EmbedID(self.encVocabSize,
                                           self.eDim,
                                           ignore_label=-1)

            self.Bilstm = chaLink.NStepBiLSTM(n_layers=1,
                                              in_size=self.eDim,
                                              out_size=self.hDim,
                                              dropout=self.dropout_lstm)

            if self.lstm_ac:
                self.AcBilstm = chaLink.NStepBiLSTM(n_layers=1,
                                                    in_size=self.span_rep_size,
                                                    out_size=self.hDim,
                                                    dropout=self.dropout_lstm)
            if self.lstm_shell:
                self.ShellBilstm = chaLink.NStepBiLSTM(
                    n_layers=1,
                    in_size=self.span_rep_size,
                    out_size=self.hDim,
                    dropout=self.dropout_lstm)

            self.AcShellBilstm = chaLink.NStepBiLSTM(
                n_layers=n_ac_shell_latm_layers,
                in_size=self.ac_shell_rep_size_in,
                out_size=self.hDim,
                dropout=self.dropout_lstm)

            self.LastBilstm = chaLink.NStepBiLSTM(
                n_layers=1,
                in_size=self.ac_shell_rep_size_out,
                out_size=self.hDim,
                dropout=self.dropout_lstm)

            self.AcTypeLayer = chaLink.Linear(
                in_size=self.reps_for_type_classification,
                out_size=3,
                initialW=chaInit.Uniform(0.05),
                initial_bias=chaInit.Uniform(0.05))

            self.LinkTypeLayer = chaLink.Linear(
                in_size=self.reps_for_type_classification,
                out_size=2,
                initialW=chaInit.Uniform(0.05),
                initial_bias=chaInit.Uniform(0.05))

            self.RelationLayer = chaLink.Linear(
                in_size=self.span_pair_size,
                out_size=1,
                initialW=chaInit.Uniform(0.05),
                initial_bias=chaInit.Uniform(0.05))

            self.BowFCLayer = chaLink.Linear(
                in_size=self.bow_feature_size,
                out_size=self.bow_rep_size,
                initialW=chaInit.Uniform(0.05),
                initial_bias=chaInit.Uniform(0.05))

            self.root_embedding = chainer.Parameter(
                initializer=chaInit.Uniform(0.05), shape=self.type_rep_size)

            # self.position_info[0:12]: forward position
            # self.position_info[12:24]: backward position
            # self.position_info[24:28]: paragraph type
            if self.use_elmo:
                self.elmo_task_gamma = chainer.Parameter(
                    initializer=chaInit.Constant(1), shape=1)
                self.elmo_task_s = chainer.Parameter(
                    initializer=chaInit.Constant(1), shape=3)

            for param in self.Bilstm.params():
                param = chaInit.Orthogonal()
            if self.lstm_ac:
                for param in self.AcBilstm.params():
                    param = chaInit.Orthogonal()
            if self.lstm_shell:
                for param in self.ShellBilstm.params():
                    param = chaInit.Orthogonal()
            for param in self.AcShellBilstm.params():
                param = chaInit.Orthogonal()
            for param in self.LastBilstm.params():
                param = chaInit.Orthogonal()
import chainer
import chainer.functions as F
import chainer.links as L
import chainer.initializers as I

import numpy as np

initW = I.Orthogonal(dtype=np.float32)


class FitNet1(chainer.Chain):
    def __init__(self, class_labels=10):
        super(FitNet1, self).__init__()
        with self.init_scope():
            self.conv1_1 = L.Convolution2D(3,
                                           16,
                                           ksize=(3, 3),
                                           pad=1,
                                           initialW=initW)
            self.conv1_2 = L.Convolution2D(16,
                                           16,
                                           ksize=(3, 3),
                                           pad=1,
                                           initialW=initW)
            self.conv1_3 = L.Convolution2D(16,
                                           16,
                                           ksize=(3, 3),
                                           pad=1,
                                           initialW=initW)

            self.conv2_1 = L.Convolution2D(16,