예제 #1
0
    def __init__(self,
                 filter_height,
                 filter_width,
                 channels,
                 num_filters,
                 stride,
                 exp_global=Ref(Path("exp_global"))):
        """
    Args:
      num_layers: depth of the RNN
      input_dim: size of the inputs
      hidden_dim: size of the outputs (and intermediate RNN layer representations)
    """
        model = exp_global.dynet_param_collection.param_col
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.channels = channels
        self.num_filters = num_filters
        self.stride = stride  # (2,2)
        self.hidden_states = {}

        normalInit = dy.NormalInitializer(0, 0.1)
        self.filters1 = model.add_parameters(
            dim=(self.filter_height[0], self.filter_width[0], self.channels[0],
                 self.num_filters[0]),
            init=normalInit)
        self.filters2 = model.add_parameters(
            dim=(self.filter_height[1], self.filter_width[1], self.channels[1],
                 self.num_filters[1]),
            init=normalInit)
        self.filters3 = model.add_parameters(
            dim=(self.filter_height[2], self.filter_width[2], self.channels[2],
                 self.num_filters[2]),
            init=normalInit)
예제 #2
0
    def __init__(self, filter_height, filter_width, channels, num_filters,
                 stride):
        """
    :param num_layers: depth of the RNN
    :param input_dim: size of the inputs
    :param hidden_dim: size of the outputs (and intermediate RNN layer representations)
    :param model
    :param rnn_builder_factory: RNNBuilder subclass, e.g. LSTMBuilder
    """
        model = model_globals.dynet_param_collection.param_col
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.channels = channels
        self.num_filters = num_filters
        self.stride = stride  # (2,2)

        normalInit = dy.NormalInitializer(0, 0.1)
        self.filters1 = model.add_parameters(
            dim=(self.filter_height[0], self.filter_width[0], self.channels[0],
                 self.num_filters[0]),
            init=normalInit)
        self.filters2 = model.add_parameters(
            dim=(self.filter_height[1], self.filter_width[1], self.channels[1],
                 self.num_filters[1]),
            init=normalInit)
        self.filters3 = model.add_parameters(
            dim=(self.filter_height[2], self.filter_width[2], self.channels[2],
                 self.num_filters[2]),
            init=normalInit)
예제 #3
0
    def __init__(self, filter_height, filter_width, channels, num_filters,
                 stride):
        """
    Args:
      num_layers: depth of the RNN
      input_dim: size of the inputs
      hidden_dim: size of the outputs (and intermediate RNN layer representations)
    """
        model = ParamManager.my_params(self)
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.channels = channels
        self.num_filters = num_filters
        self.stride = stride  # (2,2)
        self.hidden_states = {}

        normalInit = dy.NormalInitializer(0, 0.1)
        self.filters1 = model.add_parameters(
            dim=(self.filter_height[0], self.filter_width[0], self.channels[0],
                 self.num_filters[0]),
            init=normalInit)
        self.filters2 = model.add_parameters(
            dim=(self.filter_height[1], self.filter_width[1], self.channels[1],
                 self.num_filters[1]),
            init=normalInit)
        self.filters3 = model.add_parameters(
            dim=(self.filter_height[2], self.filter_width[2], self.channels[2],
                 self.num_filters[2]),
            init=normalInit)
예제 #4
0
    def __init__(self, du, nu, de, pc, pre_user=None):
        super(LookupUserRecognizer, self).__init__(pc)


        self.du, self.nu = du, nu
        if pre_user is None:
            init = dy.NormalInitializer(1 / self.du, np.sqrt(1 / self.du))
            self.U_p = self.pc.add_lookup_parameters((self.nu, self.du), init=init, name='U')
        else:
            self.U_p = self.pc.lookup_parameters_from_numpy(pre_user, name='U')
예제 #5
0
    def __init__(self, v, du, nu, de, pc, pretrained_BU=None):
        super(LogFactVocabUserRecognizer, self).__init__(pc)

        self.du = du
        self.v, self.nu = v, nu
        # User vectors
        self.U_p = self.pc.add_lookup_parameters((nu, 1, du), init=dy.ConstInitializer(0), name='U')
        init = dy.NormalInitializer(1 / self.du, np.sqrt(1 / self.du))
        # Biases
        self.B_p = self.pc.add_parameters((v, du), init=init, name='B')
        self.avg = None
        self.BU_p = None
예제 #6
0
파일: conv.py 프로젝트: hoangcuong2011/xnmt
    def __init__(self,
                 input_dim,
                 window_receptor,
                 output_dim,
                 num_layers,
                 internal_dim,
                 non_linearity='linear',
                 exp_global=Ref(Path("exp_global"))):
        """
    Args:
      num_layers: num layers after first receptor conv
      input_dim: size of the inputs
      window_receptor: window for the receptor
      ouput_dim: size of the outputs
      internal_dim: size of hidden dimension, internal dimension
      non_linearity: Non linearity to apply between layers
      """

        model = exp_global.dynet_param_collection.param_col
        self.input_dim = input_dim
        self.window_receptor = window_receptor
        self.internal_dim = internal_dim
        self.non_linearity = non_linearity
        self.output_dim = output_dim
        if self.non_linearity == 'linear':
            self.gain = 1.0
        elif self.non_linearity == 'tanh':
            self.gain = 1.0
        elif self.non_linearity == 'relu':
            self.gain = 0.5
        elif self.non_linearity == 'sigmoid':
            self.gain = 4.0

        normalInit = dy.NormalInitializer(0, 0.1)

        self.pConv1 = model.add_parameters(dim=(self.input_dim,
                                                self.window_receptor, 1,
                                                self.internal_dim),
                                           init=normalInit)
        self.pBias1 = model.add_parameters(dim=(self.internal_dim))
        self.builder_layers = []
        for _ in range(num_layers):
            conv = model.add_parameters(dim=(self.internal_dim, 1, 1,
                                             self.internal_dim),
                                        init=normalInit)
            bias = model.add_parameters(dim=(self.internal_dim))
            self.builder_layers.append((conv, bias))

        self.last_conv = model.add_parameters(dim=(self.internal_dim, 1, 1,
                                                   self.output_dim),
                                              init=normalInit)
        self.last_bias = model.add_parameters(dim=(self.output_dim))
예제 #7
0
    def __init__(self,
                 input_dim: numbers.Integral,
                 window_receptor: numbers.Integral,
                 output_dim: numbers.Integral,
                 num_layers: numbers.Integral,
                 internal_dim: numbers.Integral,
                 non_linearity: str = 'linear') -> None:
        """
    Args:
      num_layers: num layers after first receptor conv
      input_dim: size of the inputs
      window_receptor: window for the receptor
      ouput_dim: size of the outputs
      internal_dim: size of hidden dimension, internal dimension
      non_linearity: Non linearity to apply between layers
      """

        model = param_collections.ParamManager.my_params(self)
        self.input_dim = input_dim
        self.window_receptor = window_receptor
        self.internal_dim = internal_dim
        self.non_linearity = non_linearity
        self.output_dim = output_dim
        if self.non_linearity == 'linear':
            self.gain = 1.0
        elif self.non_linearity == 'tanh':
            self.gain = 1.0
        elif self.non_linearity == 'relu':
            self.gain = 0.5
        elif self.non_linearity == 'sigmoid':
            self.gain = 4.0

        normalInit = dy.NormalInitializer(0, 0.1)

        self.pConv1 = model.add_parameters(dim=(self.input_dim,
                                                self.window_receptor, 1,
                                                self.internal_dim),
                                           init=normalInit)
        self.pBias1 = model.add_parameters(dim=(self.internal_dim, ))
        self.builder_layers = []
        for _ in range(num_layers):
            conv = model.add_parameters(dim=(self.internal_dim, 1, 1,
                                             self.internal_dim),
                                        init=normalInit)
            bias = model.add_parameters(dim=(self.internal_dim, ))
            self.builder_layers.append((conv, bias))

        self.last_conv = model.add_parameters(dim=(self.internal_dim, 1, 1,
                                                   self.output_dim),
                                              init=normalInit)
        self.last_bias = model.add_parameters(dim=(self.output_dim, ))
예제 #8
0
  def __init__(self, in_height, out_height):
    """
    Args:
      num_layers: depth of the RNN
      input_dim: size of the inputs
      hidden_dim: size of the outputs (and intermediate RNN layer representations)
      """

    model = ParamManager.my_params(self)
    self.in_height = in_height
    self.out_height = out_height

    normalInit=dy.NormalInitializer(0, 0.1)
    self.pW = model.add_parameters(dim = (self.out_height, self.in_height), init=normalInit)
    self.pb = model.add_parameters(dim = self.out_height)
예제 #9
0
파일: ff.py 프로젝트: pmichel31415/xnmt
    def __init__(self, in_height, out_height, nonlinearity='linear'):
        """
    Args:
      in_height: input dimension of the affine transform
      out_height: output dimension of the affine transform
      nonlinearity: nonlinear activation function
    """
        model = ParamManager.my_params(self)
        self.in_height = in_height
        self.out_height = out_height
        self.nonlinearity = nonlinearity

        normalInit = dy.NormalInitializer(0, 0.1)
        self.pW = model.add_parameters(dim=(self.out_height, self.in_height),
                                       init=normalInit)
        self.pb = model.add_parameters(dim=self.out_height)
예제 #10
0
    def __init__(self, num_layers, input_dim, hidden_dim, model,
                 rnn_builder_factory, chn_dim, num_filters, filter_size_time,
                 filter_size_freq, stride):
        """
    :param num_layers: depth of the RNN
    :param input_dim: size of the inputs
    :param hidden_dim: size of the outputs (and intermediate RNN layer representations)
    :param model
    :param rnn_builder_factory: RNNBuilder subclass, e.g. LSTMBuilder
    """
        assert num_layers > 0
        assert hidden_dim % 2 == 0
        assert input_dim % chn_dim == 0

        self.chn_dim = chn_dim
        self.freq_dim = input_dim / chn_dim
        self.num_filters = num_filters  # 32
        self.filter_size_time = filter_size_time  # 3
        self.filter_size_freq = filter_size_freq  # 3
        self.stride = stride  # (2,2)

        normalInit = dy.NormalInitializer(0, 0.1)
        self.filters1 = model.add_parameters(
            dim=(self.filter_size_time, self.filter_size_freq, self.chn_dim,
                 self.num_filters),
            init=normalInit)
        self.filters2 = model.add_parameters(
            dim=(self.filter_size_time, self.filter_size_freq,
                 self.num_filters, self.num_filters),
            init=normalInit)
        conv_dim_l1 = math.ceil(
            float(self.freq_dim - self.filter_size_freq + 1) /
            float(self.stride[1]))
        conv_dim_l2 = int(
            math.ceil(
                float(conv_dim_l1 - self.filter_size_freq + 1) /
                float(self.stride[1])))
        conv_dim_out = conv_dim_l2 * self.num_filters

        self.builder_layers = []
        f = rnn_builder_factory(1, conv_dim_out, hidden_dim / 2, model)
        b = rnn_builder_factory(1, conv_dim_out, hidden_dim / 2, model)
        self.builder_layers.append((f, b))
        for _ in xrange(num_layers - 1):
            f = rnn_builder_factory(1, hidden_dim, hidden_dim / 2, model)
            b = rnn_builder_factory(1, hidden_dim, hidden_dim / 2, model)
            self.builder_layers.append((f, b))
예제 #11
0
    def __init__(self, v, du, nu, de, pc, pretrained_BU=None):
        super(FactVocabUserRecognizer, self).__init__(pc)

        # prediction parameters
        self.Wh_p = self.pc.add_parameters((de, de), name='Wh')
        self.bh_p = self.pc.add_parameters((de,), name='bh', init=dy.ConstInitializer(0))
        self.Su_p = self.pc.add_parameters((du, de), name='Su')
        self.bu_p = self.pc.add_parameters((du,), name='bu', init=dy.ConstInitializer(0))
        self.du = du
        self.v, self.nu = v, nu
        # User vectors
        self.U_p = self.pc.add_lookup_parameters((nu, du), init=dy.ConstInitializer(0), name='U')
        init = dy.NormalInitializer(1 / self.du, np.sqrt(1 / self.du))
        # Biases
        self.B_p = self.pc.add_parameters((v, du), init=init, name='B')
        self.avg = None
        self.BU_p = None
예제 #12
0
    def __init__(self, in_height, out_height):
        """
      :param num_layers: depth of the RNN
      :param input_dim: size of the inputs
      :param hidden_dim: size of the outputs (and intermediate RNN layer representations)
      :param model
      :param rnn_builder_factory: RNNBuilder subclass, e.g. LSTMBuilder
      """

        model = model_globals.dynet_param_collection.param_col
        self.in_height = in_height
        self.out_height = out_height

        normalInit = dy.NormalInitializer(0, 0.1)
        self.pW = model.add_parameters(dim=(self.out_height, self.in_height),
                                       init=normalInit)
        self.pb = model.add_parameters(dim=self.out_height)
예제 #13
0
    def __init__(self,
                 in_height,
                 out_height,
                 nonlinearity='linear',
                 exp_global=Ref(Path("exp_global"))):
        """
      :param in_height, out_height: input and output dimension of the affine transform
      :param nonlinearity: nonlinear activation function
    """
        model = exp_global.dynet_param_collection.param_col
        self.in_height = in_height
        self.out_height = out_height
        self.nonlinearity = nonlinearity

        normalInit = dy.NormalInitializer(0, 0.1)
        self.pW = model.add_parameters(dim=(self.out_height, self.in_height),
                                       init=normalInit)
        self.pb = model.add_parameters(dim=self.out_height)
예제 #14
0
    def __init__(self,
                 in_height,
                 out_height,
                 exp_global=Ref(Path("exp_global"))):
        """
    Args:
      num_layers: depth of the RNN
      input_dim: size of the inputs
      hidden_dim: size of the outputs (and intermediate RNN layer representations)
      """

        model = exp_global.dynet_param_collection.param_col
        self.in_height = in_height
        self.out_height = out_height

        normalInit = dy.NormalInitializer(0, 0.1)
        self.pW = model.add_parameters(dim=(self.out_height, self.in_height),
                                       init=normalInit)
        self.pb = model.add_parameters(dim=self.out_height)
예제 #15
0
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.001):
        self._model = dy.ParameterCollection()

        self._input_dim = input_dim
        self._hidden_dim = hidden_dim
        self._output_dim = output_dim

        self._rnn = dy.SimpleRNNBuilder(self.LAYERS, self._input_dim,
                                        self._hidden_dim, self._model)
        # self._rnn.disable_dropout()
        self._W = self._model.add_parameters(
            (self._output_dim, self._hidden_dim), init=dy.NormalInitializer())

        self._learning_rate = learning_rate
        self._trainer = dy.MomentumSGDTrainer(
            self._model, learning_rate=self._learning_rate)

        self._l2_param = 0.0006
        # self._l2_param = 0.0

        self._init_layers()
예제 #16
0
    def __init__(self,
                 head_count: int,
                 model_dim: int,
                 downsample_factor: int = 1,
                 input_dim: int = None,
                 ignore_masks: bool = False,
                 plot_attention: typing.Optional[str] = None,
                 diag_gauss_mask: typing.Union[bool, numbers.Real] = False,
                 square_mask_std: bool = True,
                 cross_pos_encoding_type: typing.Optional[str] = None,
                 kq_pos_encoding_type: typing.Optional[str] = None,
                 kq_pos_encoding_size: int = 40,
                 max_len: int = 1500,
                 param_init: xnmt.param_initializers.ParamInitializer = xnmt.
                 param_initializers.GlorotInitializer(),
                 bias_init: xnmt.param_initializers.ParamInitializer = xnmt.
                 param_initializers.ZeroInitializer(),
                 linear_kvq=None,
                 kq_positional_embedder=None,
                 layer_norm=None,
                 res_shortcut=None,
                 desc: typing.Any = None) -> None:
        if input_dim is None: input_dim = model_dim
        self.input_dim = input_dim
        assert model_dim % head_count == 0
        self.dim_per_head = model_dim // head_count
        self.model_dim = model_dim
        self.head_count = head_count
        assert downsample_factor >= 1
        self.downsample_factor = downsample_factor
        self.plot_attention = plot_attention
        self.plot_attention_counter = 0
        self.desc = desc

        self.ignore_masks = ignore_masks
        self.diag_gauss_mask = diag_gauss_mask
        self.square_mask_std = square_mask_std

        self.kq_pos_encoding_type = kq_pos_encoding_type
        self.kq_pos_encoding_size = kq_pos_encoding_size
        self.max_len = max_len

        subcol = param_collections.ParamManager.my_params(self)

        if self.kq_pos_encoding_type is None:
            self.linear_kvq = self.add_serializable_component(
                "linear_kvq", linear_kvq,
                lambda: transforms.Linear(input_dim * downsample_factor,
                                          head_count * self.dim_per_head * 3,
                                          param_init=param_init,
                                          bias_init=bias_init))
        else:
            self.linear_kq, self.linear_v = \
              self.add_serializable_component("linear_kvq",
                                              linear_kvq,
                                              lambda: [
                                                transforms.Linear(input_dim * downsample_factor + self.kq_pos_encoding_size,
                                                                  head_count * self.dim_per_head * 2, param_init=param_init,
                                                                  bias_init=bias_init),
                                                transforms.Linear(input_dim * downsample_factor, head_count * self.dim_per_head,
                                                                  param_init=param_init, bias_init=bias_init)])
            assert self.kq_pos_encoding_type == "embedding"
            self.kq_positional_embedder = self.add_serializable_component(
                "kq_positional_embedder", kq_positional_embedder, lambda:
                embedders.PositionEmbedder(max_pos=self.max_len,
                                           emb_dim=self.kq_pos_encoding_size,
                                           param_init=param_init))

        if self.diag_gauss_mask:
            if self.diag_gauss_mask == "rand":
                rand_init = np.exp(
                    (np.random.random(size=(self.head_count, ))) *
                    math.log(1000))
                self.diag_gauss_mask_sigma = subcol.add_parameters(
                    dim=(1, 1, self.head_count),
                    init=dy.NumpyInitializer(rand_init))
            else:
                self.diag_gauss_mask_sigma = subcol.add_parameters(
                    dim=(1, 1, self.head_count),
                    init=dy.ConstInitializer(self.diag_gauss_mask))

        self.layer_norm = self.add_serializable_component(
            "layer_norm", layer_norm, lambda: norms.LayerNorm(model_dim))

        if model_dim != input_dim * downsample_factor:
            self.res_shortcut = self.add_serializable_component(
                "res_shortcut", res_shortcut,
                lambda: transforms.Linear(input_dim * downsample_factor,
                                          model_dim,
                                          param_init=param_init,
                                          bias_init=bias_init))
        self.cross_pos_encoding_type = cross_pos_encoding_type
        if cross_pos_encoding_type == "embedding":
            self.cross_pos_emb_p1 = subcol.add_parameters(
                dim=(self.max_len, self.dim_per_head, self.head_count),
                init=dy.NormalInitializer(mean=1.0, var=0.001))
            self.cross_pos_emb_p2 = subcol.add_parameters(
                dim=(self.max_len, self.dim_per_head, self.head_count),
                init=dy.NormalInitializer(mean=1.0, var=0.001))
        elif cross_pos_encoding_type is not None:
            raise NotImplementedError()
예제 #17
0
from tupa.config import Config
from tupa.features.feature_params import MISSING_VALUE

TRAINERS = {
    "sgd": (dy.SimpleSGDTrainer, "e0"),
    "cyclic": (dy.CyclicalSGDTrainer, "e0_min"),
    "momentum": (dy.MomentumSGDTrainer, "e0"),
    "adagrad": (dy.AdagradTrainer, "e0"),
    "adadelta": (dy.AdadeltaTrainer, None),
    "rmsprop": (dy.RMSPropTrainer, "e0"),
    "adam": (partial(dy.AdamTrainer, beta_2=0.9), "alpha"),
}

INITIALIZERS = {
    "glorot_uniform": dy.GlorotInitializer(),
    "normal": dy.NormalInitializer(),
    "uniform": dy.UniformInitializer(1),
    "const": dy.ConstInitializer(0),
}

ACTIVATIONS = {
    "square": dy.square,
    "cube": dy.cube,
    "tanh": dy.tanh,
    "sigmoid": dy.logistic,
    "relu": dy.rectify,
}


class NeuralNetwork(Classifier):
    """
예제 #18
0
    def __init__(self, vocab, pos, xpos, rels, w2i, c2i, ext_words_train,
                 ext_words_devtest, options):

        self.model = dy.ParameterCollection()
        self.pretrained_embs = dy.ParameterCollection()
        self.learning_rate = options.learning_rate
        self.trainer = dy.AdamTrainer(self.model,
                                      alpha=self.learning_rate,
                                      beta_1=0.9,
                                      beta_2=0.9,
                                      eps=1e-12)

        self.dropout = float(options.dropout)
        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.cdims = options.cembedding_dims
        self.posdims = options.posembedding_dims
        self.pred_batch_size = options.pred_batch_size
        self.ext_words_train = {
            word: ind + 2
            for word, ind in ext_words_train.items()
        }
        self.ext_words_devtest = {
            word: ind + 2
            for word, ind in ext_words_devtest.items()
        }
        self.wordsCount = vocab
        self.vocab = {word: ind + 2 for word, ind in w2i.items()}
        self.pos = {word: ind + 2 for ind, word in enumerate(pos)}
        self.id2pos = {ind: word for word, ind in self.pos.items()}
        self.xpos = {word: ind + 2 for ind, word in enumerate(xpos)}
        self.id2xpos = {ind: word for word, ind in self.xpos.items()}
        self.c2i = c2i
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = {ind: word for word, ind in self.rels.items()}
        self.vocab['PAD'] = 1
        self.pos['PAD'] = 1
        self.xpos['PAD'] = 1

        self.external_embedding, self.edim, self.edim_out = None, 0, 0
        if options.external_embedding is not None:

            self.external_embedding = np.load(options.external_embedding)
            self.ext_voc = pickle.load(
                open(options.external_embedding_voc, "rb"))
            self.edim = self.external_embedding.shape[1]
            self.projected_embs = Lin_Projection(self.model, self.edim,
                                                 self.wdims)
            self.elookup_train = self.pretrained_embs.add_lookup_parameters(
                (len(self.ext_words_train) + 2, self.edim))
            for word, i in self.ext_words_train.items():
                self.elookup_train.init_row(
                    i, self.external_embedding[self.ext_voc[word], :])
            self.elookup_train.init_row(0, np.zeros(self.edim))
            self.elookup_train.init_row(1, np.zeros(self.edim))

            self.elookup_devtest = self.pretrained_embs.add_lookup_parameters(
                (len(self.ext_words_devtest) + 2, self.edim))
            for word, i in self.ext_words_devtest.items():
                self.elookup_devtest.init_row(
                    i, self.external_embedding[self.ext_voc[word], :])
            self.elookup_devtest.init_row(0, np.zeros(self.edim))
            self.elookup_devtest.init_row(1, np.zeros(self.edim))

            self.ext_words_train['PAD'] = 1
            self.ext_words_devtest['PAD'] = 1

            print(
                'Load external embeddings. External embeddings vectors dimension',
                self.edim)

        #LSTMs
        self.fwdLSTM1 = LSTM(self.model,
                             self.wdims + self.posdims,
                             self.ldims,
                             forget_bias=0.0)
        self.bwdLSTM1 = LSTM(self.model,
                             self.wdims + self.posdims,
                             self.ldims,
                             forget_bias=0.0)
        self.fwdLSTM2 = LSTM(self.model,
                             self.ldims,
                             self.ldims,
                             forget_bias=0.0)
        self.bwdLSTM2 = LSTM(self.model,
                             self.ldims,
                             self.ldims,
                             forget_bias=0.0)
        self.fwdLSTM3 = LSTM(self.model,
                             self.ldims,
                             self.ldims,
                             forget_bias=0.0)
        self.bwdLSTM3 = LSTM(self.model,
                             self.ldims,
                             self.ldims,
                             forget_bias=0.0)

        self.biaffineParser = DeepBiaffineAttentionDecoder(
            self.model,
            len(self.rels),
            src_ctx_dim=self.ldims * 2,
            n_arc_mlp_units=400,
            n_label_mlp_units=100,
            arc_mlp_dropout=self.dropout,
            label_mlp_dropout=self.dropout)

        self.HybridCharembs = HybridCharacterAttention(self.model,
                                                       ldims=400,
                                                       input_size=self.cdims,
                                                       output_size=self.wdims,
                                                       dropout=self.dropout)

        self.wlookup = self.model.add_lookup_parameters(
            (len(vocab) + 2, self.wdims), init=dy.ConstInitializer(0))
        #0 for unknown 1 for [PAD]
        self.poslookup = self.model.add_lookup_parameters(
            (len(self.pos) + 2, self.posdims), init=dy.ConstInitializer(0))
        #0 for unknown 1 for  [PAD]
        self.xposlookup = self.model.add_lookup_parameters(
            (len(self.xpos) + 2, self.posdims), init=dy.ConstInitializer(0))
        #0 for unknown 1 for  [PAD]

        self.clookup = self.model.add_lookup_parameters(
            (len(c2i), self.cdims), init=dy.NormalInitializer())
        self.ROOT = self.model.add_parameters((self.wdims * 2),
                                              init=dy.ConstInitializer(0))
예제 #19
0
 def __init__(self, pc, d_i, d_h, d_o):
     self.i2h = pc.add_parameters((d_h, d_i), init=dy.NormalInitializer())
     self.bh = pc.add_parameters((d_h, ), init=dy.NormalInitializer())
     self.h2o = pc.add_parameters((d_o, d_h), init=dy.NormalInitializer())
     self.bo = pc.add_parameters((d_o, ), init=dy.NormalInitializer())
예제 #20
0
 def initializer(self, dim, is_lookup=False, num_shared=1):
     return dy.NormalInitializer(mean=self.mean, var=self.var)
def main():
    dy.renew_cg()
    
    try:
        train_file = open("%s" %(sys.argv[1]))
        test_file = open("%s" %(sys.argv[2]))
    except:
        print("python classification_dynet.py <train_file> <test_file>")
        sys.exit(1)

    train_text_set, train_content_label_set, train_type_label_set, unique_content, unique_type = extract_from_json(train_file)
    test_text_set, test_content_label_set, test_type_label_set, _, _ = extract_from_json(test_file)
    

    word_dict = {}
    word_dict = extract_dictionary(train_text_set, word_dict)
    word_dict = extract_dictionary(test_text_set, word_dict)

    train_feature_matrix = generate_feature_matrix(train_text_set, word_dict)
    test_feature_matrix = generate_feature_matrix(test_text_set, word_dict)


    features_total = len(train_feature_matrix[0])
    para_collec = dy.ParameterCollection()
    pW1 = para_collec.add_parameters((150, 200), dy.NormalInitializer())
    pBias1 = para_collec.add_parameters((150), dy.ConstInitializer(0))
    pW2_content = para_collec.add_parameters((100, 150), dy.NormalInitializer())
    pBias2_content = para_collec.add_parameters((100), dy.ConstInitializer(0))
    pW3_content = para_collec.add_parameters((len(unique_content), 100), dy.NormalInitializer())
    pBias3_content = para_collec.add_parameters((len(unique_content)), dy.ConstInitializer(0))
    pW2_type = para_collec.add_parameters((50, 150), dy.NormalInitializer())
    pBias2_type = para_collec.add_parameters((50), dy.ConstInitializer(0))
    pW3_type = para_collec.add_parameters((len(unique_type), 50), dy.NormalInitializer())
    pBias3_type = para_collec.add_parameters((len(unique_type)), dy.ConstInitializer(0))
    lookup = para_collec.add_lookup_parameters((features_total, 200), dy.NormalInitializer())

    trainer = dy.SimpleSGDTrainer(para_collec)
    
    for i in range(0, 1):
        # resample minority and majority classes
        majority, majority_content_label, majority_type_label, minority, minority_content_label, minority_type_label = label_separator("type", train_feature_matrix, train_content_label_set, train_type_label_set)
        minority_u_text, minority_u_content_label, minority_u_type_label = resample(minority, minority_content_label, minority_type_label, replace=True, n_samples=int(len(majority) * 3), random_state=123)

        X_train = train_feature_matrix
        y_train_content = train_content_label_set
        y_train_type = train_type_label_set

        for index in range(0, 500):

            w1 = dy.parameter(pW1)
            bias1 = dy.parameter(pBias1)
            w2_content = dy.parameter(pW2_content)
            bias2_content = dy.parameter(pBias2_content)
            w3_content = dy.parameter(pW3_content)
            bias3_content = dy.parameter(pBias3_content)
            w2_type = dy.parameter(pW2_type)
            bias2_type = dy.parameter(pBias2_type)
            w3_type = dy.parameter(pW3_type)
            bias3_type = dy.parameter(pBias3_type)
            
            input_text = []
            input_array = X_train[index]
            
            for i in range(0, X_train[index].size):
                if X_train[index][i] > 0:
                    input_text.append(lookup[X_train[index][i]])

            x = dy.concatenate(input_text, 1)
            e_in = dy.sum_dim(x, [1])/features_total
            e_affin1 = dy.affine_transform([bias1, w1, e_in])
            e_affin1 = dy.rectify(e_affin1)
            e_content_affin2 = dy.affine_transform([bias2_content, w2_content, e_affin1])
            e_content_affin2 = dy.dropout(e_content_affin2, 0.5)
            e_content_affin2 = dy.rectify(e_content_affin2)
            e_content_affin3 = dy.affine_transform([bias3_content, w3_content, e_content_affin2])
            e_content_affin3 = dy.dropout(e_content_affin3, 0.5)
            e_content_affin3 = dy.rectify(e_content_affin3)
            e_type_affin2 = dy.affine_transform([bias2_type, w2_type, e_affin1])
            e_type_affin2 = dy.dropout(e_type_affin2, 0.5)
            e_type_affin2 = dy.rectify(e_type_affin2)
            e_type_affin3 = dy.affine_transform([bias3_type, w3_type, e_type_affin2])
            e_type_affin3 = dy.dropout(e_type_affin3, 0.5)
            e_type_affin3 = dy.rectify(e_type_affin3)
            content_output = dy.pickneglogsoftmax(e_content_affin3, y_train_content[index])
            content_loss = content_output.scalar_value()
            type_output = dy.pickneglogsoftmax(e_type_affin3, y_train_type[index])
            type_loss = type_output.scalar_value()
            
            if index % 100 == 0:
                print(index, ": content_loss: ", content_loss, "type_loss", type_loss)
            
            content_output.backward()
            trainer.update()
            type_output.backward()
            trainer.update()

            dy.cg_checkpoint()

    print("testing...")
    pred_content = []
    pred_type = []

    w1 = dy.parameter(pW1)
    bias1 = dy.parameter(pBias1)
    w2_content = dy.parameter(pW2_content)
    bias2_content = dy.parameter(pBias2_content)
    w3_content = dy.parameter(pW3_content)
    bias3_content = dy.parameter(pBias3_content)
    w2_type = dy.parameter(pW2_type)
    bias2_type = dy.parameter(pBias2_type)
    w3_type = dy.parameter(pW3_type)
    bias3_type = dy.parameter(pBias3_type)

    for index in range(0, len(test_feature_matrix)):
       
        input_text = []
        line = train_text_set[index]
        for word in line:
            # check if RT
            if word == "RT":
                input_text.append(lookup[len(word_dict)])
            # check if hashtag
            if word[0] == "#":
                input_text.append(lookup[len(word_dict) + 1])

            # check if mention
            if word[0] == "@":
                input_text.append(lookup[len(word_dict) + 2])

            # just word itself 
            if word in word_dict:
                input_text.append(lookup[word_dict[word]])

            try: 
                # lower capiticalization of the word
                lower_word = str(word).lower()
                input_text.append(lookup[word_dict[lower_word]])
                # no punctuation 
                replace_punctuation = str(word).maketrans(string.punctuation, '')
                clean_word = str(word).translate(replace_punctuation)
                input_text.append(lookup[word_dict[clean_word]])
            except:
                continue

        e_in = dy.sum_dim(x, [1])/features_total
        e_affin1 = dy.affine_transform([bias1, w1, e_in])
        e_affin1 = dy.rectify(e_affin1)
        e_content_affin2 = dy.affine_transform([bias2_content, w2_content, e_affin1])
        e_content_affin2 = dy.rectify(e_content_affin2)
        e_content_affin3 = dy.affine_transform([bias3_content, w3_content, e_content_affin2])
        e_content_affin3 = dy.rectify(e_content_affin3)
        e_type_affin2 = dy.affine_transform([bias2_type, w2_type, e_affin1])
        e_type_affin2 = dy.rectify(e_type_affin2)
        e_type_affin3 = dy.affine_transform([bias3_type, w3_type, e_type_affin2])
        e_type_affin3 = dy.rectify(e_type_affin3)
        content_output = np.argmax(e_content_affin3.npvalue())
        pred_content.append(content_output)
        type_output = np.argmax(e_type_affin3.npvalue())
        pred_type.append(type_output)

    misclassification_content = 0
    misclassification_type = 0
    for index in range(0, len(pred_content)):
        if pred_content[index] != test_content_label_set[index]:
            misclassification_content += 1
        if pred_type[index] != test_type_label_set[index]:
            misclassification_type += 1
    
    print("content acc: ", (1 - float(misclassification_content/len(pred_content))))
    print("type acc: ", (1 - float(misclassification_type/len(pred_type))))
예제 #22
0
trainer = dy.SimpleSGDTrainer(
    model,  # Trainer
    opt.learning_rate,
    opt.learning_rate_decay)
trainer.set_clip_threshold(-1)  # Disable gradient clipping

# Create the parameters
params = []  # This holds the parameters for each layer
for i, (di, do) in enumerate(zip(
        dims,
        dims[1:])):  # Iterate over the input/output dimensions for each layer
    var = 2 / (di + do) / (
        1 if (i == num_layers - 1) else gain
    )  # Variance for the initialization (See Glorot, Bengio (2011))
    W_p = model.add_parameters(
        (do, di), init=dy.NormalInitializer(0, var))  # Sample weights
    b_p = model.add_parameters(
        (do, ), init=dy.ConstInitializer(0))  # Initialize biases at 0
    params.append((W_p, b_p))  # Add to the list

# Load existing model
if opt.model_in is not None:
    print('Loading from file:', opt.model_in)
    params_list = model.load(opt.model_in)
    params = [
        (W_p, b_p)
        for W_p, b_p in zip(params_list[:num_layers], params_list[num_layers:])
    ]


def run_MLP(x):
예제 #23
0
 def initializer(self,
                 dim,
                 is_lookup: bool = False,
                 num_shared: numbers.Integral = 1) -> dy.NormalInitializer:
     return dy.NormalInitializer(mean=self.mean, var=self.var)
예제 #24
0
import dynet
"""
various helper mappings
"""
## DyNet adds init option to choose initializer: https://github.com/clab/dynet/blob/master/python/CHANGES.md
INITIALIZER_MAP = {
    'glorot': dynet.GlorotInitializer(),
    'constant': dynet.ConstInitializer(0.01),
    'uniform': dynet.UniformInitializer(0.1),
    'normal': dynet.NormalInitializer(mean=0, var=1)
}

TRAINER_MAP = {
    "sgd": dynet.SimpleSGDTrainer,
    "adam": dynet.AdamTrainer,
    "adadelta": dynet.AdadeltaTrainer,
    "adagrad": dynet.AdagradTrainer,
    "momentum": dynet.MomentumSGDTrainer
}

ACTIVATION_MAP = {"tanh": dynet.tanh, "rectify": dynet.rectify}

BUILDERS = {
    "lstm": dynet.
    LSTMBuilder,  # is dynet.VanillaLSTMBuilder (cf. https://github.com/clab/dynet/issues/474)
    "lstmc": dynet.CoupledLSTMBuilder,
    "gru": dynet.GRUBuilder,
    "rnn": dynet.SimpleRNNBuilder
}
예제 #25
0
# Model parameters
num_classes = len(set(train_y)) # Number of classes
input_length = train_x.shape[1] # Dimension of the input
dh= opt.hidden_dim
di= 1

# Create model
model = dy.Model()                      # DyNet Model
trainer = dy.SimpleSGDTrainer(model,    # Trainer
                              opt.learning_rate,
                              opt.learning_rate_decay)
trainer.set_clip_threshold(-1)          # Disable gradient clipping

# Create the parameters
Wx_p = model.add_parameters((dh, di), init=dy.NormalInitializer(0, 0.001))                      # Sample weights
Wh_p = model.add_parameters((dh, dh), init=dy.IdentityInitializer())                            # Sample weights
bh_p = model.add_parameters((dh,), init=dy.ConstInitializer(0))                                 # Initialize biases at 0
A_p = model.add_parameters((num_classes, dh), init=dy.NormalInitializer(0, 1/(dh+num_classes))) # Sample weights
b_p = model.add_parameters((num_classes,), init=dy.ConstInitializer(0))                         # Initialize biases at 0

# Load existing model
if opt.model_in is not None:
    print('Loading from file:', opt.model_in)
    Wx_p, Wh_p, bh_p, A_p, b_p = model.load(opt.model_in)


def run_IRNN(x):
    """
    Runs MLP to get the last layer before softmax
    """