Esempio n. 1
0
    def setUp(self):
        events.clear()
        ParamManager.init_param_col()

        # Load a pre-trained model
        load_experiment = LoadSerialized(filename=f"test/data/tiny_jaen.model",
                                         overwrite=[
                                             {
                                                 "path": "train",
                                                 "val": None
                                             },
                                             {
                                                 "path": "status",
                                                 "val": None
                                             },
                                         ])
        EXP_DIR = '.'
        EXP = "decode"
        uninitialized_experiment = YamlPreloader.preload_obj(load_experiment,
                                                             exp_dir=EXP_DIR,
                                                             exp_name=EXP)
        loaded_experiment = initialize_if_needed(uninitialized_experiment)
        ParamManager.populate()

        # Pull out the parts we need from the experiment
        self.model = loaded_experiment.model
        src_vocab = self.model.src_reader.vocab
        trg_vocab = self.model.trg_reader.vocab

        event_trigger.set_train(False)

        self.src_data = list(
            self.model.src_reader.read_sents("test/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("test/data/head.en"))
Esempio n. 2
0
    def setUp(self):
        layer_dim = 512
        events.clear()
        ParamManager.init_param_col()
        src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab")
        trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab")
        self.model = DefaultTranslator(
            src_reader=PlainTextReader(vocab=src_vocab),
            trg_reader=PlainTextReader(vocab=trg_vocab),
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                        hidden_dim=layer_dim),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="model.decoder.rnn"),
                transform=NonLinear(input_dim=layer_dim * 2,
                                    output_dim=layer_dim),
                scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )
        event_trigger.set_train(False)

        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
Esempio n. 3
0
  def setUp(self):
    xnmt.events.clear()
    ParamManager.init_param_col()

    self.src_reader = PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.ja.vocab"))
    self.trg_reader = PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.en.vocab"))
    self.src_data = list(self.src_reader.read_sents("examples/data/head.ja"))
    self.trg_data = list(self.trg_reader.read_sents("examples/data/head.en"))
Esempio n. 4
0
    def setUp(self):
        # Seeding
        numpy.random.seed(2)
        random.seed(2)
        layer_dim = 32
        xnmt.events.clear()
        ParamManager.init_param_col()

        edge_vocab = Vocab(vocab_file="examples/data/parse/head.en.edge_vocab")
        node_vocab = Vocab(vocab_file="examples/data/parse/head.en.node_vocab")
        value_vocab = Vocab(vocab_file="examples/data/head.en.vocab")

        self.src_reader = input_readers.PlainTextReader(vocab=value_vocab)
        self.trg_reader = input_readers.CoNLLToRNNGActionsReader(
            surface_vocab=value_vocab,
            nt_vocab=node_vocab,
            edg_vocab=edge_vocab)

        self.layer_dim = layer_dim
        self.src_data = list(
            self.src_reader.read_sents("examples/data/head.en"))
        self.trg_data = list(
            self.trg_reader.read_sents("examples/data/parse/head.en.conll"))
        self.loss_calculator = MLELoss()
        self.head_composer = composer.DyerHeadComposer(
            fwd_combinator=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                hidden_dim=layer_dim),
            bwd_combinator=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                hidden_dim=layer_dim),
            transform=AuxNonLinear(input_dim=layer_dim,
                                   aux_input_dim=layer_dim,
                                   output_dim=layer_dim))

        self.model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=LookupEmbedder(emb_dim=layer_dim,
                                        vocab_size=len(value_vocab)),
            encoder=IdentitySeqTransducer(),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            decoder=RNNGDecoder(
                input_dim=layer_dim,
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim),
                transform=AuxNonLinear(input_dim=layer_dim,
                                       output_dim=layer_dim,
                                       aux_input_dim=layer_dim),
                bridge=NoBridge(dec_dim=layer_dim, dec_layers=1),
                graph_reader=self.trg_reader,
                head_composer=self.head_composer))
        event_trigger.set_train(True)

        my_batcher = batchers.TrgBatcher(batch_size=1)
        self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data)
        dy.renew_cg(immediate_compute=True, check_validity=True)
Esempio n. 5
0
  def setUp(self):
    # Seeding
    numpy.random.seed(2)
    random.seed(2)
    layer_dim = 4
    xnmt.events.clear()
    ParamManager.init_param_col()
    self.segment_encoder_bilstm = BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim)
    self.segment_composer = SumComposer()

    self.src_reader = CharFromWordTextReader(vocab=Vocab(vocab_file="test/data/head.ja.charvocab"))
    self.trg_reader = PlainTextReader(vocab=Vocab(vocab_file="test/data/head.en.vocab"))
    self.loss_calculator = FeedbackLoss(child_loss=MLELoss(), repeat=5)

    baseline = Linear(input_dim=layer_dim, output_dim=1)
    policy_network = Linear(input_dim=layer_dim, output_dim=2)
    self.poisson_prior = PoissonPrior(mu=3.3)
    self.eps_greedy = EpsilonGreedy(eps_prob=0.0, prior=self.poisson_prior)
    self.conf_penalty = ConfidencePenalty()
    self.policy_gradient = PolicyGradient(input_dim=layer_dim,
                                          output_dim=2,
                                          baseline=baseline,
                                          policy_network=policy_network,
                                          z_normalization=True,
                                          conf_penalty=self.conf_penalty)
    self.length_prior = PoissonLengthPrior(lmbd=3.3, weight=1)
    self.segmenting_encoder = SegmentingSeqTransducer(
      embed_encoder = self.segment_encoder_bilstm,
      segment_composer =  self.segment_composer,
      final_transducer = BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim),
      policy_learning = self.policy_gradient,
      eps_greedy = self.eps_greedy,
      length_prior = self.length_prior,
    )

    self.model = DefaultTranslator(
      src_reader=self.src_reader,
      trg_reader=self.trg_reader,
      src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
      encoder=self.segmenting_encoder,
      attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
      decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                    rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim,
                                                             decoder_input_dim=layer_dim, yaml_path="decoder"),
                                    transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim,
                                                           aux_input_dim=layer_dim),
                                    scorer=Softmax(vocab_size=100, input_dim=layer_dim),
                                    embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                                    bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
    )
    event_trigger.set_train(True)

    self.layer_dim = layer_dim
    self.src_data = list(self.model.src_reader.read_sents("test/data/head.ja"))
    self.trg_data = list(self.model.trg_reader.read_sents("test/data/head.en"))
    my_batcher = batchers.TrgBatcher(batch_size=3)
    self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data)
    dy.renew_cg(immediate_compute=True, check_validity=True)
Esempio n. 6
0
    def setUp(self):
        events.clear()
        ParamManager.init_param_col()

        src_vocab = Vocab(vocab_file="test/data/head.ja.vocab")
        trg_vocab = Vocab(vocab_file="test/data/head.en.vocab")
        self.src_reader = PlainTextReader(vocab=src_vocab)
        self.trg_reader = PlainTextReader(vocab=trg_vocab)
        self.src_data = list(self.src_reader.read_sents("test/data/head.ja"))
        self.trg_data = list(self.trg_reader.read_sents("test/data/head.en"))
Esempio n. 7
0
    def setUp(self):
        # Seeding
        numpy.random.seed(2)
        random.seed(2)
        layer_dim = 4
        xnmt.events.clear()
        ParamManager.init_param_col()
        self.segment_composer = SumComposer()
        self.src_reader = CharFromWordTextReader(vocab=Vocab(
            vocab_file="examples/data/head.ja.charvocab"))
        self.trg_reader = PlainTextReader(vocab=Vocab(
            vocab_file="examples/data/head.en.vocab"))
        self.loss_calculator = FeedbackLoss(child_loss=MLELoss(), repeat=5)
        self.segmenting_encoder = SegmentingSeqTransducer(
            segment_composer=self.segment_composer,
            final_transducer=BiLSTMSeqTransducer(input_dim=layer_dim,
                                                 hidden_dim=layer_dim),
        )

        self.model = DefaultTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
            encoder=self.segmenting_encoder,
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="decoder"),
                transform=AuxNonLinear(input_dim=layer_dim,
                                       output_dim=layer_dim,
                                       aux_input_dim=layer_dim),
                scorer=Softmax(vocab_size=100, input_dim=layer_dim),
                embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
        )
        event_trigger.set_train(True)

        self.layer_dim = layer_dim
        self.src_data = list(
            self.model.src_reader.read_sents("examples/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("examples/data/head.en"))
        my_batcher = batchers.TrgBatcher(batch_size=3)
        self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data)
        dy.renew_cg(immediate_compute=True, check_validity=True)
Esempio n. 8
0
    def setUp(self):
        # Seeding
        numpy.random.seed(2)
        random.seed(2)
        layer_dim = 32
        xnmt.events.clear()
        ParamManager.init_param_col()

        self.src_reader = PlainTextReader(vocab=Vocab(
            vocab_file="test/data/head.ja.vocab"))
        self.trg_reader = PlainTextReader(vocab=Vocab(
            vocab_file="test/data/head.en.vocab"))
        self.layer_dim = layer_dim
        self.src_data = list(self.src_reader.read_sents("test/data/head.ja"))
        self.trg_data = list(self.trg_reader.read_sents("test/data/head.en"))
        self.input_vocab_size = len(self.src_reader.vocab.i2w)
        self.output_vocab_size = len(self.trg_reader.vocab.i2w)
        self.loss_calculator = MLELoss()

        self.model = SimultaneousTranslator(
            src_reader=self.src_reader,
            trg_reader=self.trg_reader,
            src_embedder=SimpleWordEmbedder(emb_dim=layer_dim,
                                            vocab_size=self.input_vocab_size),
            encoder=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim),
            attender=MlpAttender(input_dim=layer_dim,
                                 state_dim=layer_dim,
                                 hidden_dim=layer_dim),
            decoder=AutoRegressiveDecoder(
                input_dim=layer_dim,
                rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                         hidden_dim=layer_dim,
                                         decoder_input_dim=layer_dim,
                                         yaml_path="decoder"),
                transform=AuxNonLinear(input_dim=layer_dim,
                                       output_dim=layer_dim,
                                       aux_input_dim=layer_dim),
                scorer=Softmax(vocab_size=self.output_vocab_size,
                               input_dim=layer_dim),
                embedder=SimpleWordEmbedder(emb_dim=layer_dim,
                                            vocab_size=self.output_vocab_size),
                bridge=NoBridge(dec_dim=layer_dim, dec_layers=1)),
        )
        event_trigger.set_train(True)

        my_batcher = batchers.TrgBatcher(batch_size=3)
        self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data)
        dy.renew_cg(immediate_compute=True, check_validity=True)
Esempio n. 9
0
    def __init__(self,
                 input_dim=Ref("exp_global.default_layer_dim"),
                 param_init=Ref("exp_global.param_init",
                                default=bare(GlorotInitializer)),
                 bias_init=Ref("exp_global.bias_init",
                               default=bare(ZeroInitializer)),
                 num_heads=8):
        assert (input_dim % num_heads == 0)

        param_collection = ParamManager.my_params(self)

        self.input_dim = input_dim
        self.num_heads = num_heads
        self.head_dim = input_dim // num_heads

        self.pWq, self.pWk, self.pWv, self.pWo = [
            param_collection.add_parameters(dim=(input_dim, input_dim),
                                            init=param_init.initializer(
                                                (input_dim, input_dim)))
            for _ in range(4)
        ]
        self.pbq, self.pbk, self.pbv, self.pbo = [
            param_collection.add_parameters(dim=(1, input_dim),
                                            init=bias_init.initializer((
                                                1,
                                                input_dim,
                                            ))) for _ in range(4)
        ]
Esempio n. 10
0
 def __init__(self,
              e0: numbers.Real = 0.1,
              eps: numbers.Real = 1e-20,
              skip_noisy: bool = False) -> None:
     super().__init__(optimizer=dy.AdagradTrainer(
         ParamManager.global_collection(), e0, eps=eps),
                      skip_noisy=skip_noisy)
Esempio n. 11
0
    def __init__(self, filter_height, filter_width, channels, num_filters,
                 stride):
        """
    Args:
      num_layers: depth of the RNN
      input_dim: size of the inputs
      hidden_dim: size of the outputs (and intermediate RNN layer representations)
    """
        model = ParamManager.my_params(self)
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.channels = channels
        self.num_filters = num_filters
        self.stride = stride  # (2,2)
        self.hidden_states = {}

        normalInit = dy.NormalInitializer(0, 0.1)
        self.filters1 = model.add_parameters(
            dim=(self.filter_height[0], self.filter_width[0], self.channels[0],
                 self.num_filters[0]),
            init=normalInit)
        self.filters2 = model.add_parameters(
            dim=(self.filter_height[1], self.filter_width[1], self.channels[1],
                 self.num_filters[1]),
            init=normalInit)
        self.filters3 = model.add_parameters(
            dim=(self.filter_height[2], self.filter_width[2], self.channels[2],
                 self.num_filters[2]),
            init=normalInit)
Esempio n. 12
0
 def __init__(self,
              eps: numbers.Real = 1e-6,
              rho: numbers.Real = 0.95,
              skip_noisy: bool = False) -> None:
     super().__init__(optimizer=dy.AdadeltaTrainer(
         ParamManager.global_collection(), eps, rho),
                      skip_noisy=skip_noisy)
Esempio n. 13
0
 def __init__(self,
              e0: numbers.Real = 0.01,
              mom: numbers.Real = 0.9,
              skip_noisy: bool = False) -> None:
     super().__init__(optimizer=dy.MomentumSGDTrainer(
         ParamManager.global_collection(), e0, mom),
                      skip_noisy=skip_noisy)
Esempio n. 14
0
    def __init__(self,
                 layers,
                 input_dim,
                 hidden_dim,
                 param_init=Ref("exp_global.param_init",
                                default=bare(GlorotInitializer)),
                 bias_init=Ref("exp_global.bias_init",
                               default=bare(ZeroInitializer))):
        if layers != 1:
            raise RuntimeError(
                "CustomLSTMSeqTransducer supports only exactly one layer")
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        model = ParamManager.my_params(self)

        # [i; f; o; g]
        self.p_Wx = model.add_parameters(dim=(hidden_dim * 4, input_dim),
                                         init=param_init.initializer(
                                             (hidden_dim * 4, input_dim)))
        self.p_Wh = model.add_parameters(dim=(hidden_dim * 4, hidden_dim),
                                         init=param_init.initializer(
                                             (hidden_dim * 4, hidden_dim)))
        self.p_b = model.add_parameters(dim=(hidden_dim * 4, ),
                                        init=bias_init.initializer(
                                            (hidden_dim * 4, )))
Esempio n. 15
0
  def setUp(self):
    # Seeding
    numpy.random.seed(2)
    random.seed(2)
    layer_dim = 32
    xnmt.events.clear()
    ParamManager.init_param_col()
   
    src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab")
    self.src_reader = CompoundReader(readers=[
      PlainTextReader(vocab=src_vocab),
      SimultActionTextReader()
    ], vocab=src_vocab)
    
    
    self.trg_reader = PlainTextReader(vocab=Vocab(vocab_file="examples/data/head.en.vocab"))
    self.layer_dim = layer_dim
    self.src_data = list(self.src_reader.read_sents(["examples/data/head.ja", "examples/data/simult/head.jaen.actions"]))
    self.trg_data = list(self.trg_reader.read_sents("examples/data/head.en"))
    self.input_vocab_size = len(self.src_reader.vocab.i2w)
    self.output_vocab_size = len(self.trg_reader.vocab.i2w)
    self.loss_calculator = loss_calculators.MLELoss()
    
    self.model = SimultaneousTranslator(
      src_reader=self.src_reader,
      trg_reader=self.trg_reader,
      src_embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=self.input_vocab_size),
      encoder=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim),
      attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim),
      decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                    rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim,
                                                             decoder_input_dim=layer_dim, yaml_path="decoder"),
                                    transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim,
                                                           aux_input_dim=layer_dim),
                                    scorer=Softmax(vocab_size=self.output_vocab_size, input_dim=layer_dim),
                                    embedder=LookupEmbedder(emb_dim=layer_dim, vocab_size=self.output_vocab_size),
                                    bridge=NoBridge(dec_dim=layer_dim, dec_layers=1)),
      policy_network = network.PolicyNetwork(transforms.MLP(2*self.layer_dim, self.layer_dim, 2)),
      policy_train_oracle=True,
      policy_test_oracle=True
    )
    event_trigger.set_train(True)
    

    my_batcher = batchers.TrgBatcher(batch_size=3)
    self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data)
    dy.renew_cg(immediate_compute=True, check_validity=True)
Esempio n. 16
0
 def __init__(self,
              alpha: numbers.Real = 0.001,
              beta_1: numbers.Real = 0.9,
              beta_2: numbers.Real = 0.999,
              eps: numbers.Real = 1e-8,
              skip_noisy: bool = False) -> None:
     super().__init__(optimizer=dy.AdamTrainer(
         ParamManager.global_collection(), alpha, beta_1, beta_2, eps),
                      skip_noisy=skip_noisy)
Esempio n. 17
0
    def __init__(self,
                 layers=1,
                 input_dim=Ref("exp_global.default_layer_dim"),
                 hidden_dim=Ref("exp_global.default_layer_dim"),
                 dropout=Ref("exp_global.dropout", default=0.0),
                 weightnoise_std=Ref("exp_global.weight_noise", default=0.0),
                 param_init=Ref("exp_global.param_init",
                                default=bare(GlorotInitializer)),
                 bias_init=Ref("exp_global.bias_init",
                               default=bare(ZeroInitializer)),
                 yaml_path=None,
                 decoder_input_dim=Ref("exp_global.default_layer_dim",
                                       default=None),
                 decoder_input_feeding=True):
        self.num_layers = layers
        model = ParamManager.my_params(self)
        if yaml_path is not None and "decoder" in yaml_path:
            if decoder_input_feeding:
                input_dim += decoder_input_dim
        self.hidden_dim = hidden_dim
        self.dropout_rate = dropout
        self.weightnoise_std = weightnoise_std
        self.input_dim = input_dim

        if not isinstance(param_init, Sequence):
            param_init = [param_init] * layers
        if not isinstance(bias_init, Sequence):
            bias_init = [bias_init] * layers

        # [i; f; o; g]
        self.p_Wx = [
            model.add_parameters(dim=(hidden_dim * 4, input_dim),
                                 init=param_init[0].initializer(
                                     (hidden_dim * 4, input_dim),
                                     num_shared=4))
        ]
        self.p_Wx += [
            model.add_parameters(dim=(hidden_dim * 4, hidden_dim),
                                 init=param_init[i].initializer(
                                     (hidden_dim * 4, hidden_dim),
                                     num_shared=4)) for i in range(1, layers)
        ]
        self.p_Wh = [
            model.add_parameters(dim=(hidden_dim * 4, hidden_dim),
                                 init=param_init[i].initializer(
                                     (hidden_dim * 4, hidden_dim),
                                     num_shared=4)) for i in range(layers)
        ]
        self.p_b = [
            model.add_parameters(dim=(hidden_dim * 4, ),
                                 init=bias_init[i].initializer(
                                     (hidden_dim * 4, ), num_shared=4))
            for i in range(layers)
        ]

        self.dropout_mask_x = None
        self.dropout_mask_h = None
Esempio n. 18
0
 def __init__(self,
              alpha=0.001,
              beta_1=0.9,
              beta_2=0.999,
              eps=1e-8,
              update_every: int = 1,
              skip_noisy: bool = False):
     super().__init__(optimizer=dy.AdamTrainer(
         ParamManager.global_collection(), alpha, beta_1, beta_2, eps),
                      skip_noisy=skip_noisy)
Esempio n. 19
0
  def __init__(self, layers=1, input_dim=512, h=1,
               dropout=0.0, attn_dropout=False, layer_norm=False, **kwargs):
    dy_model = ParamManager.my_params(self)
    self.layer_names = []
    for i in range(1, layers + 1):
      name = 'l{}'.format(i)
      layer = EncoderLayer(dy_model, input_dim, h, attn_dropout, layer_norm)
      self.layer_names.append((name, layer))

    self.dropout_val = dropout
Esempio n. 20
0
    def __init__(self,
                 filter_height,
                 filter_width,
                 channels,
                 num_filters,
                 stride,
                 rhn_num_hidden_layers,
                 rhn_dim,
                 rhn_microsteps,
                 attention_dim,
                 residual=False):
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.channels = channels
        self.num_filters = num_filters
        self.stride = stride
        self.rhn_num_hidden_layers = rhn_num_hidden_layers
        self.rhn_dim = rhn_dim
        self.rhn_microsteps = rhn_microsteps
        self.attention_dim = attention_dim
        self.residual = residual

        model = ParamManager.my_params(self)
        # Convolutional layer
        self.filter_conv = model.add_parameters(dim=(self.filter_height,
                                                     self.filter_width,
                                                     self.channels,
                                                     self.num_filters))
        # Recurrent highway layer
        self.recur = []
        self.linear = []
        self.init = []
        self.attention = []

        input_dim = num_filters
        for _ in range(rhn_num_hidden_layers):
            self.init.append(model.add_parameters((rhn_dim, )))
            self.linear.append((model.add_parameters((rhn_dim, input_dim)),
                                model.add_parameters((
                                    rhn_dim,
                                    input_dim,
                                ))))
            input_dim = rhn_dim
            recur_layer = []
            for _ in range(self.rhn_microsteps):
                recur_layer.append((model.add_parameters(
                    (rhn_dim, rhn_dim)), model.add_parameters(
                        (rhn_dim, )), model.add_parameters((
                            rhn_dim,
                            rhn_dim,
                        )), model.add_parameters((rhn_dim, ))))
            self.recur.append(recur_layer)
        # Attention layer
        self.attention.append((model.add_parameters(
            (attention_dim, rhn_dim)), model.add_parameters(attention_dim, )))
Esempio n. 21
0
 def __init__(self,
              child: SeqTransducer,
              input_dim: int,
              layer_norm: bool = False):
     self.child = child
     self.input_dim = input_dim
     self.layer_norm = layer_norm
     if layer_norm:
         model = ParamManager.my_params(self)
         self.ln_g = model.add_parameters(dim=(input_dim, ))
         self.ln_b = model.add_parameters(dim=(input_dim, ))
Esempio n. 22
0
 def setUp(self):
   # Seeding
   np.random.seed(2)
   random.seed(2)
   layer_dim = 4
   xnmt.events.clear()
   ParamManager.init_param_col()
   self.src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab")
   self.src_char_vocab = CharVocab(vocab_file="examples/data/head.ja.vocab")
   self.ngram_vocab = Vocab(vocab_file="examples/data/head.ngramcount.ja")
   self.trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab")
   
   self.src_reader = CharFromWordTextReader(vocab= self.src_vocab, char_vocab= self.src_char_vocab)
   self.trg_reader = PlainTextReader(vocab=self.trg_vocab)
   
   
   self.layer_dim = layer_dim
   self.src_data = list(self.src_reader.read_sents("examples/data/head.ja"))
   self.trg_data = list(self.trg_reader.read_sents("examples/data/head.en"))
   self.src, self.trg = batchers.TrgBatcher(batch_size=3).pack(self.src_data, self.trg_data)
   dy.renew_cg(immediate_compute=True, check_validity=True)
Esempio n. 23
0
 def __init__(self,
              ngram_size,
              param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)),
              bias_init=Ref("exp_global.param_init", default=bare(ZeroInitializer)),
              embed_dim=Ref("exp_global.default_layer_dim"),
              hidden_dim=Ref("exp_global.default_layer_dim")):
   model = ParamManager.my_params(self)
   dim = (1, ngram_size, embed_dim, hidden_dim)
   self.filter = model.add_parameters(dim=dim, init=param_init.initializer(dim))
   self.bias = model.add_parameters(dim=(embed_dim,), init=bias_init.initializer(dim))
   self.ngram_size = ngram_size
   self.embed_dim = embed_dim
Esempio n. 24
0
    def __init__(self,
                 input_dim,
                 window_receptor,
                 output_dim,
                 num_layers,
                 internal_dim,
                 non_linearity='linear'):
        """
    Args:
      num_layers: num layers after first receptor conv
      input_dim: size of the inputs
      window_receptor: window for the receptor
      ouput_dim: size of the outputs
      internal_dim: size of hidden dimension, internal dimension
      non_linearity: Non linearity to apply between layers
      """

        model = ParamManager.my_params(self)
        self.input_dim = input_dim
        self.window_receptor = window_receptor
        self.internal_dim = internal_dim
        self.non_linearity = non_linearity
        self.output_dim = output_dim
        if self.non_linearity == 'linear':
            self.gain = 1.0
        elif self.non_linearity == 'tanh':
            self.gain = 1.0
        elif self.non_linearity == 'relu':
            self.gain = 0.5
        elif self.non_linearity == 'sigmoid':
            self.gain = 4.0

        normalInit = dy.NormalInitializer(0, 0.1)

        self.pConv1 = model.add_parameters(dim=(self.input_dim,
                                                self.window_receptor, 1,
                                                self.internal_dim),
                                           init=normalInit)
        self.pBias1 = model.add_parameters(dim=(self.internal_dim, ))
        self.builder_layers = []
        for _ in range(num_layers):
            conv = model.add_parameters(dim=(self.internal_dim, 1, 1,
                                             self.internal_dim),
                                        init=normalInit)
            bias = model.add_parameters(dim=(self.internal_dim, ))
            self.builder_layers.append((conv, bias))

        self.last_conv = model.add_parameters(dim=(self.internal_dim, 1, 1,
                                                   self.output_dim),
                                              init=normalInit)
        self.last_bias = model.add_parameters(dim=(self.output_dim, ))
Esempio n. 25
0
  def __init__(self, layers=1, input_dim=512, h=1,
               dropout=0.0, attn_dropout=False, layer_norm=False,
               vocab_size = None, vocab = None,
               trg_reader = Ref("model.trg_reader")):
    dy_model = ParamManager.my_params(self)
    self.layer_names = []
    for i in range(1, layers + 1):
      name = 'l{}'.format(i)
      layer = DecoderLayer(dy_model, input_dim, h, attn_dropout, layer_norm)
      self.layer_names.append((name, layer))

    self.vocab_size = self.choose_vocab_size(vocab_size, vocab, trg_reader)
    self.output_affine = LinearSent(dy_model, input_dim, self.vocab_size)
    self.dropout_val = dropout
Esempio n. 26
0
  def __init__(self, in_height, out_height):
    """
    Args:
      num_layers: depth of the RNN
      input_dim: size of the inputs
      hidden_dim: size of the outputs (and intermediate RNN layer representations)
      """

    model = ParamManager.my_params(self)
    self.in_height = in_height
    self.out_height = out_height

    normalInit=dy.NormalInitializer(0, 0.1)
    self.pW = model.add_parameters(dim = (self.out_height, self.in_height), init=normalInit)
    self.pb = model.add_parameters(dim = self.out_height)
Esempio n. 27
0
 def update(self) -> None:
   self.global_step += 1
   if self.rescale_grads:
     torch.nn.utils.clip_grad_norm_(ParamManager.global_collection().parameters(), self.rescale_grads)
   self.scheduler.step()
   if settings.USE_TENSORBOARD:
     tee.tensorboard_writer.add_scalars(name="lr", tag_scalar_dict={"lr": self.learning_rate * self.lr_factor},
                                        global_step=self.global_step)
     if not self.skip_noisy:
       tee.tensorboard_writer.add_scalars(name="grad", tag_scalar_dict={"norm": np.exp(self.grad_log_norm())},
                                                                       global_step=self.global_step)
   if not (self.skip_noisy and self.check_gradients_noisy()):
     self.optimizer.step()
   else:
     logger.info("skipping noisy update")
Esempio n. 28
0
 def __init__(self,
              word_vocab=None,
              src_vocab=Ref(Path("model.src_reader.vocab")),
              hidden_dim=Ref("exp_global.default_layer_dim"),
              vocab_size=25000):
   super().__init__()
   param_collection = ParamManager.my_params(self)
   if word_vocab is None:
     word_vocab = Vocab()
     dict_entry = vocab_size
   else:
     dict_entry = len(word_vocab)
   self.src_vocab = src_vocab
   self.word_vocab = word_vocab
   self.embedding = param_collection.add_lookup_parameters((dict_entry, hidden_dim))
Esempio n. 29
0
 def __init__(self,
              e0: numbers.Real = 0.1,
              momentum: numbers.Real = 0.0,
              weight_decay: numbers.Real = 0.0,
              dampening: numbers.Real = 0.0,
              nesterov: bool = False,
              skip_noisy: bool = False,
              rescale_grads: numbers.Real = 5.0) -> None:
   super().__init__(optimizer=torch.optim.SGD(params=ParamManager.global_collection().parameters(),
                                              lr=e0,
                                              momentum=momentum,
                                              weight_decay=weight_decay,
                                              dampening=dampening,
                                              nesterov=nesterov),
                    skip_noisy=skip_noisy,
                    rescale_grads=rescale_grads)
Esempio n. 30
0
 def __init__(self,
              alpha: numbers.Real = 1.0,
              dim: numbers.Integral = 512,
              warmup_steps: Optional[numbers.Integral] = 4000,
              beta_1: numbers.Real = 0.9,
              beta_2: numbers.Real = 0.98,
              eps: numbers.Real = 1e-9,
              skip_noisy: bool = False,
              rescale_grads: numbers.Real = 5.0) -> None:
   super().__init__(optimizer=torch.optim.Adam(params=ParamManager.global_collection().parameters(),
                                               lr=alpha, betas=(beta_1, beta_2), eps=eps),
                    skip_noisy=skip_noisy,
                    rescale_grads=rescale_grads)
   self.dim = dim
   self.warmup_steps = warmup_steps
   self.steps = 0