Esempio n. 1
0
    def __init__(self, model, training_src, training_tgt):
        self.model = model
        self.training_src, self.src_vocab, self.rsrc_vocab = self.change_word2id_genevoc(
            training_src)
        self.training_tgt, self.tgt_vocab, self.rtgt_vocab = self.change_word2id_genevoc_output(
            training_tgt)
        self.src_vocab_size = len(self.src_vocab)
        self.tgt_vocab_size = len(self.tgt_vocab)
        self.embed_size = 128
        self.src_lookup = model.add_lookup_parameters(
            (self.src_vocab_size, self.embed_size))
        self.tgt_lookup = model.add_lookup_parameters(
            (self.tgt_vocab_size, self.embed_size))
        self.hidden_size = 128
        self.layers = 1
        self.contextsize = self.hidden_size * 2
        self.l2r_builder = dy.GRUBuilder(self.layers, self.embed_size,
                                         self.hidden_size, model)
        self.r2l_builder = dy.GRUBuilder(self.layers, self.embed_size,
                                         self.hidden_size, model)
        self.dec_builder = dy.GRUBuilder(self.layers,
                                         self.embed_size + self.contextsize,
                                         self.hidden_size * 2, model)

        self.W_y = model.add_parameters((
            self.tgt_vocab_size,
            self.hidden_size * 2 + self.contextsize,
        ))
        self.b_y = model.add_parameters(self.tgt_vocab_size)

        self.max_len = 50
Esempio n. 2
0
    def add_parameters(self,
                       dropout,
                       lstm_size,
                       optimizer,
                       model_type,
                       gru=True):

        if model_type == "gru":
            self.encoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                             lstm_size, self.model)
            self.encoder_rnn.set_dropout(dropout)
            self.encoder_rnn2 = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                              lstm_size, self.model)
            self.encoder_rnn2.set_dropout(dropout)
            self.decoder_rnn = dy.GRUBuilder(NUM_LAYERS,
                                             EMBEDDING_SIZE + lstm_size,
                                             lstm_size, self.model)
            self.decoder_rnn.set_dropout(dropout)
        else:

            self.encoder_rnn = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                              lstm_size, self.model)
            self.encoder_rnn.set_dropout(dropout)
            self.encoder_rnn2 = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                               lstm_size, self.model)
            self.encoder_rnn2.set_dropout(dropout)
            self.decoder_rnn = dy.LSTMBuilder(NUM_LAYERS,
                                              EMBEDDING_SIZE + lstm_size,
                                              lstm_size, self.model)
            self.decoder_rnn.set_dropout(dropout)

        global DROPOUT
        DROPOUT = dropout

        self.W1 = self.model.add_parameters((200, lstm_size))
        self.b1 = self.model.add_parameters((200, 1))
        self.W2 = self.model.add_parameters((100, 200))
        self.b2 = self.model.add_parameters((100, 1))
        self.W3 = self.model.add_parameters((len(self.C2I), 100))
        self.b3 = self.model.add_parameters((len(self.C2I), 1))
        self.W_query = self.model.add_parameters((lstm_size, lstm_size))
        self.W_key = self.model.add_parameters((lstm_size, lstm_size))
        self.W_val = self.model.add_parameters((lstm_size, lstm_size))
        self.W_att = self.model.add_parameters((1, EMBEDDING_SIZE))
        self.W_c_s = self.model.add_parameters((lstm_size, EMBEDDING_SIZE))
        self.W_direct = self.model.add_parameters((len(self.C2I), lstm_size))
        self.b_att = self.model.add_parameters((lstm_size, 1))
        self.b_direct = self.model.add_parameters((len(self.C2I), 1))
        self.E_lang = self.model.add_lookup_parameters((7, EMBEDDING_SIZE))

        if optimizer == "sgd":
            self.trainer = dy.SimpleSGDTrainer(self.model)
        elif optimizer == "rms":
            self.trainer = dy.RMSPropTrainer(self.model)
        if optimizer == "cyclic":
            self.trainer = dy.CyclicalSGDTrainer(self.model)
        elif optimizer == "adam":
            self.trainer = dy.AdamTrainer(self.model)
        else:
            self.trainer = dy.AdagradTrainer(self.model)
Esempio n. 3
0
    def __init__(self, Config):
        self.Config = Config
        self.model = dy.Model()

        VOCAB_SIZE = Config.data.vocab_size
        EMBEDDINGS_SIZE = Config.model.embed_dim
        LSTM_NUM_OF_LAYERS = Config.model.num_layers
        STATE_SIZE = Config.model.num_units

        with open(
                os.path.join(Config.data.base_path, Config.data.processed_path,
                             'embed.pkl'), 'rb') as f:
            embed = np.asarray(cPickle.load(f))
        oov = np.random.random((4 + Config.data.oov_size, EMBEDDINGS_SIZE))

        # self.embed = self.model.lookup_parameters_from_numpy(np.transpose(np.asarray(embed)))
        # self.oov = self.model.add_lookup_parameters((4 + Config.data.oov_size, EMBEDDINGS_SIZE), init='uniform')
        self.input_lookup = self.model.lookup_parameters_from_numpy(
            np.concatenate((oov, embed)))
        self.enc_fwd_lstm = dy.GRUBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE,
                                          STATE_SIZE, self.model)
        self.enc_bwd_lstm = dy.GRUBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE,
                                          STATE_SIZE, self.model)
        self.sess_lstm = dy.GRUBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE,
                                       STATE_SIZE, self.model)
        self.decoder_w = self.model.add_parameters((VOCAB_SIZE, STATE_SIZE),
                                                   init='uniform')
        self.decoder_b = self.model.add_parameters((VOCAB_SIZE),
                                                   init='uniform')
Esempio n. 4
0
    def __init__(self, model, training_src, training_tgt, lang_li):
        self.model = model
        self.lang_li = lang_li

        self.src_vocab, self.rsrc_vocab = self.change_word2id_genevoc(
            training_src)
        self.src_vocab_size = len(self.src_vocab)
        print 'src_vocab_size', self.src_vocab_size
        self.embed_size = 128
        self.src_lookup = self.model.add_lookup_parameters(
            (self.src_vocab_size, self.embed_size))

        self.tgt_vocab = {}
        self.rtgt_vocab = {}
        self.tgt_vocab_size = {}
        self.tgt_lookup = {}
        for ele in lang_li:
            self.tgt_vocab[ele], self.rtgt_vocab[
                ele] = self.change_word2id_genevoc_output(training_tgt[ele])
            self.tgt_vocab_size[ele] = len(self.tgt_vocab[ele])
            self.tgt_lookup[ele] = self.model.add_lookup_parameters(
                (self.tgt_vocab_size[ele], self.embed_size))
        print 'tgt_vocab_size', self.tgt_vocab_size

        self.hidden_size = 128
        self.layers = 1
        self.contextsize = self.hidden_size * 2
        self.l2r_builder = dy.GRUBuilder(self.layers, self.embed_size,
                                         self.hidden_size, self.model)
        self.r2l_builder = dy.GRUBuilder(self.layers, self.embed_size,
                                         self.hidden_size, self.model)

        self.dec_builder = {}
        self.W_y = {}
        self.b_y = {}
        for ele in lang_li:
            self.dec_builder[ele] = dy.GRUBuilder(
                self.layers, self.embed_size + self.contextsize,
                self.hidden_size * 2, self.model)
            self.W_y[ele] = self.model.add_parameters(
                (self.tgt_vocab_size[ele],
                 self.hidden_size * 2 + self.contextsize))
            self.b_y[ele] = self.model.add_parameters(self.tgt_vocab_size[ele])

        self.attention_size = 128
        self.lang_eb_size = 10
        self.langeb_lookup = self.model.add_lookup_parameters(
            (len(lang_li), self.lang_eb_size))
        self.W1_att_e = self.model.add_parameters(
            (self.attention_size, 2 * self.hidden_size))
        self.W1_att_f = self.model.add_parameters(
            (self.attention_size, 2 * self.hidden_size))
        self.W1_att_lang = self.model.add_parameters(
            (self.attention_size, self.lang_eb_size))
        self.w2_att = self.model.add_parameters((1, self.attention_size))

        self.max_len = 50
    def __init__(self, data, opt):
        self.opt = opt
        self.model = dy.ParameterCollection()
        self.trainer = dy.MomentumSGDTrainer(self.model)
        self.w2i = data.w2i
        self.wdims = opt.embedding_size
        self.ldims = opt.hidden_size
        self.attsize = opt.attention_size

        self.ext_embeddings = data.ext_embeddings
        # Model Parameters
        self.wlookup = self.model.add_lookup_parameters(
            (len(self.w2i), self.wdims))

        self.__load_external_embeddings()

        if self.opt.encoder_dir == "single":
            if self.opt.encoder_type == "lstm":
                self.sentence_rnn = [
                    dy.VanillaLSTMBuilder(1, self.wdims, self.ldims,
                                          self.model)
                ]
            elif self.opt.encoder_type == "gru":
                self.sentence_rnn = [
                    dy.GRUBuilder(1, self.wdims, self.ldims, self.model)
                ]
            self.attention_w = self.model.add_parameters(
                (self.attsize, self.ldims))
            self.attention_b = self.model.add_parameters(self.attsize)
            self.att_context = self.model.add_parameters(self.attsize)
            self.mlp_w = self.model.add_parameters(
                (1, self.ldims + 2 * self.ldims))
            self.mlp_b = self.model.add_parameters(1)
        elif self.opt.encoder_dir == "bidirectional":
            if self.opt.encoder_type == "lstm":
                self.sentence_rnn = [
                    dy.VanillaLSTMBuilder(1, self.wdims, self.ldims,
                                          self.model),
                    dy.VanillaLSTMBuilder(1, self.wdims, self.ldims,
                                          self.model),
                ]
            elif self.opt.encoder_type == "gru":
                self.sentence_rnn = [
                    dy.GRUBuilder(1, self.wdims, self.ldims, self.model),
                    dy.GRUBuilder(1, self.wdims, self.ldims, self.model),
                ]

            self.attention_w = self.model.add_parameters(
                (self.attsize, 2 * self.ldims))
            self.attention_b = self.model.add_parameters(self.attsize)
            self.att_context = self.model.add_parameters(self.attsize)
            self.mlp_w = self.model.add_parameters(
                (1, 2 * self.ldims + 4 * self.ldims))
            self.mlp_b = self.model.add_parameters(1)
Esempio n. 6
0
 def __init__(self, pc, n_in, n_out, dropout_rate, reuse=True):
     self.pc = pc
     self.n_in = n_in
     self.n_out = n_out
     self.dropout_rate = dropout_rate
     self.reuse = reuse
     if self.reuse:
         self.gru = dy.GRUBuilder(layers=1, input_dim=self.n_in, hidden_dim=self.n_out, model=self.pc)
     else:
         self.gru_f = dy.GRUBuilder(layers=1, input_dim=self.n_in, hidden_dim=self.n_out, model=self.pc)
         self.gru_b = dy.GRUBuilder(layers=1, input_dim=self.n_in, hidden_dim=self.n_out, model=self.pc)
Esempio n. 7
0
    def __init__(self, config):
        self.config = config
        self.model = dy.Model()
        self.trainer = dy.AdamTrainer(self.model)
        self.rnn = []
        self.rnn_upsample_w = []
        self.rnn_linear_w = [None]
        rnn_input_size = config.FS[0]
        first = True
        for ls, fs, ups in zip(config.rnn_layers, config.FS, config.upsample):
            self.rnn.append(dy.GRUBuilder(1, rnn_input_size, ls, self.model))
            self.rnn_upsample_w.append([self.model.add_parameters((ls, ls))] * ups)
            if first:
                first = False
            else:
                self.rnn_linear_w.append(self.model.add_parameters((ls, fs)))
            rnn_input_size = ls

        layer_is = rnn_input_size
        self.mlp_w = []
        self.mlp_b = []
        for layer_os in config.mlp:
            self.mlp_w.append(self.model.add_parameters((layer_os, layer_is)))
            self.mlp_b.append(self.model.add_parameters((layer_os)))
            layer_is = layer_os
        self.mlp_w.append(self.model.add_parameters((256, layer_is)))
        self.mlp_b.append(self.model.add_parameters((256)))
Esempio n. 8
0
 def __init__(self, in_vocab, hidden_dim, model):
     self.in_vocab = in_vocab
     self.hidden_dim = hidden_dim
     self.embedding_enc = model.add_lookup_parameters(
         (self.in_vocab, self.hidden_dim))
     self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim,
                                  model)
Esempio n. 9
0
    def __init__(self, args, src_vocab, tgt_vocab, src_vocab_id2word,
                 tgt_vocab_id2word):
        model = self.model = dy.Model()
        self.args = args
        self.src_vocab = src_vocab
        self.tgt_vocab = tgt_vocab
        self.src_vocab_id2word = src_vocab_id2word
        self.tgt_vocab_id2word = tgt_vocab_id2word

        self.src_lookup = self.model.add_lookup_parameters(
            (args.src_vocab_size, args.embed_size))
        self.tgt_lookup = self.model.add_lookup_parameters(
            (args.tgt_vocab_size, args.embed_size))

        self.enc_forward_builder = dy.GRUBuilder(1, args.embed_size,
                                                 args.hidden_size, model)
        self.enc_backward_builder = dy.GRUBuilder(1, args.embed_size,
                                                  args.hidden_size, model)

        self.dec_builder = dy.GRUBuilder(
            1, args.embed_size + args.hidden_size * 2, args.hidden_size, model)

        # target word embedding
        self.W_y = model.add_parameters((args.tgt_vocab_size, args.embed_size))
        self.b_y = model.add_parameters((args.tgt_vocab_size))
        self.b_y.zero()

        # transformation of decoder hidden states and context vectors before reading out target words
        self.W_h = model.add_parameters(
            (args.embed_size, args.hidden_size + args.hidden_size * 2))
        self.b_h = model.add_parameters((args.embed_size))
        self.b_h.zero()

        # transformation of context vectors at t_0 in decoding
        self.W_s = model.add_parameters(
            (args.hidden_size, args.hidden_size * 2))
        self.b_s = model.add_parameters((args.hidden_size))
        self.b_s.zero()

        self.W1_att_f = model.add_parameters(
            (args.attention_size, args.hidden_size * 2))
        self.W1_att_e = model.add_parameters(
            (args.attention_size, args.hidden_size))
        self.W2_att = model.add_parameters((1, args.attention_size))
Esempio n. 10
0
    def _create_model(self):
        self.logger.info('Creating the model...')

        model = dy.ParameterCollection()

        # context gru encoders
        c_fwdRnn = dy.GRUBuilder(self.model_args["gru_layers"],
                                 self.model_args["gru_input_dim"],
                                 self.model_args["gru_hidden_dim"],
                                 model)
        c_bwdRnn = dy.GRUBuilder(self.model_args["gru_layers"],
                                 self.model_args["gru_input_dim"],
                                 self.model_args["gru_hidden_dim"],
                                 model)

        # question gru encoders
        q_fwdRnn = dy.GRUBuilder(self.model_args["gru_layers"],
                                 self.model_args["gru_input_dim"],
                                 self.model_args["gru_hidden_dim"],
                                 model)
        q_bwdRnn = dy.GRUBuilder(self.model_args["gru_layers"],
                                 self.model_args["gru_input_dim"],
                                 self.model_args["gru_hidden_dim"],
                                 model)

        # embedding parameter
        lookup_params = model.add_lookup_parameters((self.model_args["vocab_size"],
                                                     self.model_args["gru_input_dim"]),
                                                    dy.UniformInitializer(self.model_args["lookup_init_scale"]))

        unk_lookup_params = model.add_lookup_parameters((self.model_args["number_of_unks"],
                                                         self.model_args["gru_input_dim"]),
                                                        dy.UniformInitializer(self.model_args["lookup_init_scale"]))

        self.logger.info('Done creating the model')

        model_parameters = {"c_fwdRnn": c_fwdRnn,
                            "c_bwdRnn": c_bwdRnn,
                            "q_fwdRnn": q_fwdRnn,
                            "q_bwdRnn": q_bwdRnn,
                            "lookup_params": lookup_params,
                            "unk_lookup_params": unk_lookup_params}
        return model, model_parameters
Esempio n. 11
0
 def __init__(self, hidden_dim, out_vocab, model, max_length=MAX_LENGTH):
     self.hidden_dim = hidden_dim
     self.out_vocab = out_vocab
     self.max_length = max_length
     self.embedding_dec = model.add_lookup_parameters(
         (self.out_vocab, self.hidden_dim))
     self.w_attn = model.add_parameters(
         (self.max_length, self.hidden_dim * 2))
     self.b_attn = model.add_parameters((self.max_length, ))
     self.w_attn_combine = model.add_parameters(
         (self.hidden_dim, self.hidden_dim * 2))
     self.b_attn_combine = model.add_parameters((self.hidden_dim, ))
     self.rnn_dec = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim,
                                  model)
     self.w_dec = model.add_parameters((self.out_vocab, self.hidden_dim))
     self.b_dec = model.add_parameters((self.out_vocab, ))
Esempio n. 12
0
def main():
	import sys
	action = sys.argv[1]
	emb_name = sys.argv[2]
	filen = sys.argv[3]
	testname = sys.argv[4]
	
	print 'setting up'
	train_words, train_tags, tag_idx, idx_tag, test_words, test_tags = setup(filen, testname)
	num_tags = len(tag_idx.keys())
	gru_model = dy.ParameterCollection()
	word_index, embeddings_mat = import_embeddings(emb_name, 300)
	hidden_layer_len = 200
	layers = 1 
	eparams = gru_model.lookup_parameters_from_numpy(embeddings_mat.A) #flatten matrix
	gru_unit = dy.GRUBuilder(layers, 300, hidden_layer_len, gru_model)
	param_mat = gru_model.add_parameters((hidden_layer_len, num_tags))
	param_bias = gru_model.add_parameters((num_tags))
	#gmodel.save("grumodel.model")
	#mdl2 = dy.ParameterCollection()
	#ep = mdl2.lookup_parameters_from_numpy(embeddings_mat.A)
	#parmat = mdl2.add_parameters((200, num_tags))
	#parbias = mdl2.add_parameters((num_tags))
	#gmodel.populate("grumodel.model")
	
	if action == 'train':
		print 'training'
		bsize, gmodel = training(3, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias)
	if action == 'tune':
		print 'tuning'
		for r in range(3, 6):
			print 'training'
			print 'epochs: ', r
			bsize,gmodel = training(r, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias)
			print 'testing'
			testing(idx_tag, gru_unit, gmodel, bsize, test_words, test_tags, word_index, param_mat, param_bias, eparams)
	if action == 'test':
		print 'training'
		#use 5 epochs
		bsize, gmodel = training(5, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias)
		print 'testing'
		testing(idx_tag, gru_unit, gmodel, bsize, test_words, test_tags, word_index, param_mat, param_bias, eparams)

	return
Esempio n. 13
0
    def _initialize_model(self):
        self.model = dy.Model()
        self.input_lookup = self.model.add_lookup_parameters(
            (self.vocab_size, self.embedding_size))

        # Attention params
        self.attention_w1 = self.model.add_parameters(
            (self.state_size, self.state_size))
        self.attention_w2 = self.model.add_parameters(
            (self.state_size, self.state_size))
        self.attention_v = self.model.add_parameters((1, self.state_size))

        # Predictive allignment params
        self.w_p = self.model.add_parameters(
            (self.state_size, self.state_size))
        self.v_p = self.model.add_parameters((1, self.state_size))

        # LSTM/GRU and last layer projection matrix
        self.lstm = dy.GRUBuilder(self.lstm_num_of_layers, self.embedding_size,
                                  self.state_size, self.model)

        self.output_w = self.model.add_parameters(
            (self.num_of_classes, self.state_size))
        self.output_b = self.model.add_parameters((self.num_of_classes))
Esempio n. 14
0
    def __init__(self, dataset, config):
        self.clip = 5 * dataset.sample_rate  # set to zero for full training
        self.sample_rate = dataset.sample_rate
        self.config = config
        self.dataset = dataset

        self.model = dy.Model()
        self.trainer = dy.AdamTrainer(self.model)

        #lookups
        self.phoneme_lookup = self.model.add_lookup_parameters(
            (len(dataset.phoneme2int), config.phone_embeddings_size))
        self.context_lookup = [
            self.model.add_lookup_parameters(
                (len(ctx), config.context_embeddings_size))
            for ctx in dataset.context2int
        ]

        #encoder
        inp_sz = config.phone_embeddings_size * 5  #+ len(dataset.context2int) * config.context_embeddings_size
        self.encoder_fw = [
            dy.LSTMBuilder(1, inp_sz, config.encoder_size, self.model)
        ]
        self.encoder_bw = [
            dy.LSTMBuilder(1, inp_sz, config.encoder_size, self.model)
        ]
        [
            self.encoder_fw.append(
                dy.LSTMBuilder(1, config.encoder_size * 2, config.encoder_size,
                               self.model))
            for _ in range(config.encoder_layers - 1)
        ]
        [
            self.encoder_bw.append(
                dy.LSTMBuilder(1, config.encoder_size * 2, config.encoder_size,
                               self.model))
            for _ in range(config.encoder_layers - 1)
        ]

        #receptive network
        receptive_input = config.receptive_input
        self.receptive_w = []
        self.receptive_b = []
        for size in config.receptive_layers:
            receptive_output = size
            self.receptive_w.append(
                self.model.add_parameters((receptive_output, receptive_input)))
            self.receptive_b.append(
                self.model.add_parameters((receptive_output)))
            receptive_input = receptive_output

        #receptive network
        attention_input = config.receptive_input
        self.attention_w = []
        self.attention_b = []
        for size in config.attention_layers:
            attention_output = size
            self.attention_w.append(
                self.model.add_parameters((attention_output, attention_input)))
            self.attention_b.append(
                self.model.add_parameters((attention_output)))
            attention_input = attention_output

        #decoder
        self.decoder = dy.GRUBuilder(
            config.decoder_layers,
            config.encoder_size * 2 + config.receptive_layers[-1],
            config.decoder_size, self.model)

        #attention
        self.att_w1 = self.model.add_parameters(
            (config.att_proj_size, config.encoder_size * 2))
        self.att_w2 = self.model.add_parameters(
            (config.att_proj_size, config.decoder_size))
        self.att_w3 = self.model.add_parameters(
            (config.att_proj_size, config.att_lsa_filters))
        self.att_w4 = self.model.add_parameters(
            (config.att_proj_size, config.attention_layers[-1]))
        self.att_v = self.model.add_parameters((1, config.att_proj_size))
        self.cnn_attention = CNN(self.model)
        self.cnn_attention.add_layer_conv(config.att_lsa_input_size,
                                          1,
                                          1,
                                          1,
                                          config.att_lsa_filters,
                                          same=True)

        #output

        presoftmax_input = config.decoder_size + config.receptive_layers[
            -1] + config.sample_trail_size
        self.presoftmax_w = []
        self.presoftmax_b = []
        for size in config.presoftmax_layers:
            presoftmax_output = size
            self.presoftmax_w.append(
                self.model.add_parameters(
                    (presoftmax_output, presoftmax_input)))
            self.presoftmax_b.append(
                self.model.add_parameters((presoftmax_output)))
            presoftmax_input = presoftmax_output

        self.softmax_w = self.model.add_parameters((257, presoftmax_input))
        self.softmax_b = self.model.add_parameters((257))
def main(datapath,
         train=None,
         test=None,
         num_epochs=2,
         batch_size=256,
         embedding_approach='random',
         embedding_size=300):

    if train is None and test is None:
        print("Either train or test!")
        sys.exit()
    ################################################################
    RNN_model = dy.ParameterCollection()

    ################################################################
    # HYPERPARAMETERS
    ################################################################
    # size of word embedding (if using "random", otherwise, dependent on the loaded embeddings)
    # embedding_size = 300
    # size of hidden layer of `RNN`
    hidden_size = 200
    # number of layers in `RNN`
    num_layers = 1
    # type of trainer
    trainer = dy.SimpleSGDTrainer(m=RNN_model, learning_rate=0.01)
    ################################################################
    ## Load the training and test data
    all_tokens, all_labels = import_emails(
        datapath)  #Add test_tokens, test_labels
    ################################################################
    #print("train_tokens = ", train_tokens[:10], "train_labels = ", train_labels[:10])
    if embedding_approach == "pretrained":
        emb_matrix_pretrained, w2i_pretrained = load_pretrained_embeddings(
            path.join(datapath + "../../hw/", "pretrained_embeddings.txt"),
            take=10000)
        embedding_parameters = RNN_model.lookup_parameters_from_numpy(
            emb_matrix_pretrained)
        embedding_size = emb_matrix_pretrained.shape[
            1]  ## Rewriting `embedding_size`
        w2i = w2i_pretrained  # ensure we use the correct lookup table
        print("embedding matrix shape: {}".format(emb_matrix_pretrained.shape))

    elif embedding_approach == "random":
        #### randomly initialized embeddings
        w2i_random = build_w2i_lookup(train_tokens)
        embedding_parameters = RNN_model.add_lookup_parameters(
            (len(w2i_random) + 1, embedding_size))
        w2i = w2i_random  # ensure we use the correct lookup table
    else:
        raise Exception("Choose a proper embedding approach")

    ###### CHOOSE HERE which approach you want to use. ######
    # RNN_unit = dy.LSTMBuilder(num_layers, embedding_size, hidden_size, RNN_model)
    RNN_unit = dy.GRUBuilder(num_layers, embedding_size, hidden_size,
                             RNN_model)
    ################################################################
    #10 fold Cross validation
    fold_size = int(np.ceil(len(all_labels) / 10))
    print(fold_size, " fold size of a 10-Fold Cross validation")
    i = 0
    test_tokens = []
    test_labels = []
    train_labels = []
    train_tokens = []
    email_accuracy_folds = []
    while i < len(all_labels):
        if i > 0:  #just doing one fold, to save time. Run predict on that
            break
        test_tokens = all_tokens[i:i + fold_size]
        test_labels = all_labels[i:i + fold_size]
        train_tokens = all_tokens[0:i]
        train_tokens.extend(all_tokens[i + fold_size:])
        train_labels = all_labels[0:i]
        train_labels.extend(all_labels[i + fold_size:])
        print("len(test_l)= ", len(test_labels), " len(train_labels)= ",
              len(train_labels), " fold_sz= ", fold_size)
        i += fold_size

        ## convert the labels to ids
        l2i = labels_to_index_map(train_labels)
        #print("len(l2i) = ",l2i)
        train_labels = [l2i[l] for l in train_labels]
        #print("After mapping train_labels = ", train_labels[:10] )
        test_labels = [l2i[l] for l in test_labels]

        # training hyperparams
        # batch_size = 256
        num_batches_training = int(np.ceil(len(train_tokens) / batch_size))
        num_batches_testing = int(np.ceil(len(test_tokens) / batch_size))
        # num_epochs = 1

        ## Projection layer
        # W (hidden x num_labels)
        pW = RNN_model.add_parameters((hidden_size, len(list(l2i.keys()))))

        # b (1 x num_labels)
        pb = RNN_model.add_parameters((len(list(l2i.keys()))))
        print("train_tokens len = ", len(train_tokens), "train_labels len =",
              len(train_labels))
        if train is not None:
            modelPath = train
            print("in train", train)

            trainAlgo(train_tokens, train_labels, num_epochs,
                      num_batches_training, batch_size, w2i,
                      embedding_parameters, pW, pb, modelPath, RNN_unit,
                      trainer, RNN_model)

        if test is not None:
            modelPath = test
            print("in test", test)
            final_predictions = testAlgo(test_tokens, test_labels,
                                         num_batches_testing, batch_size, w2i,
                                         embedding_parameters, pW, pb,
                                         modelPath, RNN_unit, RNN_model)
            email_accuracy = evaluate(final_predictions, test_labels)
            print("Email overall accuracy : {}".format(email_accuracy))
            email_accuracy_folds.append(email_accuracy)
    print("Average over 10 folds = ",
          float(sum(email_accuracy_folds) / len(email_accuracy_folds)))
Esempio n. 16
0
 def setUp(self):
     # create model
     self.m = dy.Model()
     self.rnn = dy.GRUBuilder(2, 10, 10, self.m)
Esempio n. 17
0
    def __init__(self, params, vocab, label2tag, pretrained_embeddings=None):
        """

        :param params:
        :param vocab:
        :param label2tag:
        :param pretrained_embeddings:
        """
        self.dim_w = params.dim_w
        self.win = params.win
        self.vocab = vocab
        self.n_words = len(self.vocab)
        self.dim_asp = params.dim_asp
        self.dim_opi = params.dim_opi
        self.dim_y_asp = params.n_asp_tags
        self.dim_y_opi = params.n_opi_tags
        self.n_steps = params.n_steps
        self.asp_label2tag = label2tag
        self.opi_label2tag = {0: 'O', 1: 'T'}
        self.dropout_asp = params.dropout_asp
        self.dropout_opi = params.dropout_opi
        self.dropout = params.dropout
        self.rnn_type = params.rnn_type
        self.ds_name = params.ds_name
        self.model_name = params.model_name
        self.attention_type = params.attention_type

        self.pc = dy.ParameterCollection()
        self.Emb = WDEmb(pc=self.pc, n_words=self.n_words, dim_w=self.dim_w,
                         pretrained_embeddings=pretrained_embeddings)
        #self.ASP_RNN = LSTM(pc=self.pc, n_in=self.win*self.dim_w, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        #self.OPI_RNN = LSTM(pc=self.pc, n_in=self.win*self.dim_w, n_out=self.dim_opi, dropout_rate=self.dropout_opi)
        # use dynet RNNBuilder rather than the self-defined RNN classes
        if self.rnn_type == 'LSTM':
            self.ASP_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)
            self.OPI_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_opi, self.pc)
        elif self.rnn_type == 'GRU':
            # NOT TRIED!
            self.ASP_RNN = dy.GRUBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)
            self.OPI_RNN = dy.GRUBuilder(1, self.win * self.dim_w, self.dim_opi, self.pc)
        else:
            raise Exception("Invalid RNN type!!!")
        self.THA = THA(pc=self.pc, n_steps=self.n_steps, n_in=2*self.dim_asp)
        if self.attention_type == 'bilinear':
            self.STN = ST_bilinear(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        # here dot attention is not applicable since the aspect representation and opinion representation
        # have different dimensions
        # elif self.attention_type == 'dot':
        #    self.STN = ST_dot(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        elif self.attention_type == 'concat':
            self.STN = ST_concat(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        else:
            raise Exception("Invalid attention type!!!")

        self.ASP_FC = Linear(pc=self.pc, n_in=2*self.dim_asp+2*self.dim_opi, n_out=self.dim_y_asp)
        self.OPI_FC = Linear(pc=self.pc, n_in=2*self.dim_opi, n_out=self.dim_y_opi)

        self.layers = [self.ASP_FC, self.OPI_FC, self.THA, self.STN]

        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adagrad':
            self.optimizer = dy.AdagradTrainer(self.pc)
        elif params.optimizer == 'adadelta':
             # use default value of adadelta
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        else:
            raise Exception("Invalid optimizer!!")
def main(datapath,
         train=None,
         test=None,
         num_epochs=2,
         batch_size=256,
         embedding_approach='random',
         embedding_size=300):

    if train is None and test is None:
        print("Either train or test!")
        sys.exit()

    ### initialize empty model
    ################################################################
    RNN_model = dy.ParameterCollection()

    ################################################################
    # HYPERPARAMETERS
    ################################################################
    # size of word embedding (if using "random", otherwise, dependent on the loaded embeddings)
    # embedding_size = 300
    # size of hidden layer of `RNN`
    hidden_size = 200
    # number of layers in `RNN`
    num_layers = 1
    # type of trainer
    trainer = dy.SimpleSGDTrainer(m=RNN_model, learning_rate=0.01)

    ################################################################
    ## Load the training and test data
    train_tokens, train_labels, _, _, test_tokens, test_labels = u.import_ptb(
        datapath)
    ################################################################

    if embedding_approach == "pretrained":
        emb_matrix_pretrained, w2i_pretrained = u.load_pretrained_embeddings(
            path.join(datapath, "pretrained_embeddings.txt"), take=10000)
        embedding_parameters = RNN_model.lookup_parameters_from_numpy(
            emb_matrix_pretrained)
        embedding_size = emb_matrix_pretrained.shape[
            1]  ## Rewriting `embedding_size`
        w2i = w2i_pretrained  # ensure we use the correct lookup table
        print("embedding matrix shape: {}".format(emb_matrix_pretrained.shape))

    elif embedding_approach == "random":
        #### randomly initialized embeddings
        w2i_random = u.build_w2i_lookup(train_tokens)
        embedding_parameters = RNN_model.add_lookup_parameters(
            (len(w2i_random) + 1, embedding_size))
        w2i = w2i_random  # ensure we use the correct lookup table
    else:
        raise Exception("Choose a proper embedding approach")

    ###### CHOOSE HERE which approach you want to use. ######
    # RNN_unit = dy.LSTMBuilder(num_layers, embedding_size, hidden_size, RNN_model)
    RNN_unit = dy.GRUBuilder(num_layers, embedding_size, hidden_size,
                             RNN_model)
    ################################################################

    ## convert the labels to ids
    l2i = u.labels_to_index_map(train_labels)
    train_labels = [[l2i[l] for l in sent] for sent in train_labels]
    test_labels = [[l2i[l] for l in sent] for sent in test_labels]

    # training hyperparams
    # batch_size = 256
    num_batches_training = int(np.ceil(len(train_tokens) / batch_size))
    num_batches_testing = int(np.ceil(len(test_tokens) / batch_size))
    # num_epochs = 1

    ## Projection layer
    # W (hidden x num_labels)
    pW = RNN_model.add_parameters((hidden_size, len(list(l2i.keys()))))

    # b (1 x num_labels)
    pb = RNN_model.add_parameters((len(list(l2i.keys()))))

    if train is not None:
        modelPath = train
        trainAlgo(train_tokens, train_labels, num_epochs, num_batches_training,
                  batch_size, w2i, embedding_parameters, pW, pb, modelPath,
                  RNN_unit, trainer, RNN_model)

    if test is not None:
        modelPath = test
        final_predictions = testAlgo(test_tokens, test_labels,
                                     num_batches_testing, batch_size, w2i,
                                     embedding_parameters, pW, pb, modelPath,
                                     RNN_unit, RNN_model)
        overall_accuracy, sentence_accuracy = evaluate(final_predictions,
                                                       test_labels)
        print("overall accuracy: {}".format(overall_accuracy))
Esempio n. 19
0
def test_wsj():
    print
    print '# test on wsj subset'

    data, n_types, n_labels = pickle.load(open('wsj.pkl', 'r'))

    d_emb = 50
    d_rnn = 51
    d_hid = 52
    d_actemb = 5

    minibatch_size = 5
    n_epochs = 10
    preprocess_minibatch = True

    model = dy.ParameterCollection()

    embed_word = model.add_lookup_parameters((n_types, d_emb))
    f_gru = dy.GRUBuilder(1, d_emb, d_rnn, model)
    b_gru = dy.GRUBuilder(1, d_emb, d_rnn, model)
    embed_action = model.add_lookup_parameters((n_labels, d_actemb))
    combine_arh_W = model.add_parameters((d_hid, d_actemb + d_rnn * 2 + d_hid))
    combine_arh_b = model.add_parameters(d_hid)

    initial_h = model.add_parameters(d_hid, dy.ConstInitializer(0))
    initial_actemb = model.add_parameters(d_actemb, dy.ConstInitializer(0))

    policy_W = model.add_parameters((n_labels, d_hid))
    policy_b = model.add_parameters(n_labels)

    optimizer = dy.AdamTrainer(model, alpha=0.01)

    for _ in xrange(n_epochs):

        total_loss = 0
        for batch in minibatch(data, minibatch_size, True):
            dy.renew_cg()

            combine_arh_We = dy.parameter(combine_arh_W)
            combine_arh_be = dy.parameter(combine_arh_b)

            policy_We = dy.parameter(policy_W)
            policy_be = dy.parameter(policy_b)

            loss = 0

            if preprocess_minibatch:
                # for efficiency, combine RNN outputs on entire
                # minibatch in one go (requires padding with zeros,
                # should be masked but isn't right now)
                all_tokens = [ex.tokens for ex in batch]
                max_length = max(map(len, all_tokens))
                all_tokens = [[x[i] if len(x) > i else 0 for x in all_tokens]
                              for i in range(max_length)]
                all_e = [dy.lookup_batch(embed_word, x) for x in all_tokens]
                all_rnn_out = bi_gru(f_gru, b_gru, all_e)

            losses = []
            for batch_id, ex in enumerate(batch):
                N = len(ex.tokens)
                if preprocess_minibatch:
                    rnn_out = [
                        dy.pick_batch_elem(x, batch_id)
                        for x in all_rnn_out[:N]
                    ]
                else:
                    e = [embed_word[x] for x in ex.tokens]
                    rnn_out = bi_gru(f_gru, b_gru, e)
                prev_h = dy.parameter(initial_h)  # previous hidden state
                actemb = dy.parameter(
                    initial_actemb)  # embedding of previous action
                output = []
                for t in xrange(N):
                    # update hidden state based on most recent
                    # *predicted* action (not ground truth)
                    inputs = [actemb, prev_h, rnn_out[t]]
                    h = dy.rectify(
                        dy.affine_transform([
                            combine_arh_be, combine_arh_We,
                            dy.concatenate(inputs)
                        ]))

                    # make prediction
                    pred_vec = dy.affine_transform([policy_be, policy_We, h])
                    pred = pred_vec.npvalue().argmin()
                    output.append(pred)

                    # accumulate loss (squared error against costs)
                    truth = np.ones(n_labels)
                    truth[ex.labels[t]] = 0
                    losses.append(
                        dy.squared_distance(pred_vec, dy.inputTensor(truth)))

                    # cache hidden state, previous action embedding
                    prev_h = h
                    actemb = embed_action[pred]

                # print 'output=%s, truth=%s' % (output, ex.labels)

            loss = dy.esum(losses)
            loss.backward()
            total_loss += loss.value()
            optimizer.update()
        print total_loss
Esempio n. 20
0
    def __init__(self,
                 task_in_size,
                 task_hid_size,
                 task_out_size,
                 adv_in_size,
                 adv_hid_size,
                 adv_out_size,
                 adv_count,
                 vocab_size,
                 dropout,
                 lstm_size,
                 adv_depth=1,
                 rnn_dropout=0.0,
                 rnn_type='lstm'):
        model = dy.Model()

        if rnn_type == 'lstm':
            self._rnn = dy.LSTMBuilder(lstm_size, 300, task_in_size, model)
        elif rnn_type == 'gru':
            self._rnn = dy.GRUBuilder(lstm_size, 300, task_in_size, model)
        else:
            self._rnn = dy.SimpleRNNBuilder(lstm_size, 300, task_in_size,
                                            model)

        params = {}

        params['w_lookup'] = model.add_lookup_parameters((vocab_size, 300))

        in_task = task_in_size
        params["task_w1"] = model.add_parameters((task_hid_size, in_task))
        params["task_b1"] = model.add_parameters((task_hid_size))
        params["task_w2"] = model.add_parameters(
            (task_out_size, task_hid_size))
        params["task_b2"] = model.add_parameters((task_out_size))

        for i in range(adv_count):
            for j in range(adv_depth):
                params["adv_" + str(i) + "_w" +
                       str(j + 1)] = model.add_parameters(
                           (adv_hid_size, adv_in_size))
                params["adv_" + str(i) + "_b" +
                       str(j + 1)] = model.add_parameters((adv_hid_size))
            params["adv_" + str(i) + "_w" +
                   str(adv_depth + 1)] = model.add_parameters(
                       (adv_out_size, adv_hid_size))
            params["adv_" + str(i) + "_b" +
                   str(adv_depth + 1)] = model.add_parameters((adv_out_size))

        params["contra_adv_w1"] = model.add_parameters(
            (adv_hid_size, adv_in_size))
        params["contra_adv_b1"] = model.add_parameters((adv_hid_size))
        params["contra_adv_w2"] = model.add_parameters(
            (adv_out_size, adv_hid_size))
        params["contra_adv_b2"] = model.add_parameters((adv_out_size))

        self._model = model
        self._hid_dim = task_hid_size
        self._in_dim = task_in_size
        self._adv_count = adv_count
        self._adv_depth = adv_depth
        self._params = params
        self._dropout = dropout
        self._rnn_dropout = rnn_dropout
Esempio n. 21
0
print len(train_sentences_en)

print len(valid_sentences_de)
print len(valid_sentences_en)

VOCAB_SIZE_DE = len(wids)
VOCAB_SIZE_EN = VOCAB_SIZE_DE

train_sentences = zip(train_sentences_de, train_sentences_en)
valid_sentences = zip(valid_sentences_de, valid_sentences_en)

#Specify model
model = dy.Model()

if config.GRU:
    encoder = dy.GRUBuilder(LAYER_DEPTH, EMB_SIZE, HIDDEN_SIZE, model)
    revcoder = dy.GRUBuilder(LAYER_DEPTH, EMB_SIZE, HIDDEN_SIZE, model)
    decoder = dy.GRUBuilder(LAYER_DEPTH, EMB_SIZE + HIDDEN_SIZE, HIDDEN_SIZE,
                            model)
else:
    encoder = dy.LSTMBuilder(LAYER_DEPTH, EMB_SIZE, HIDDEN_SIZE, model)
    revcoder = dy.LSTMBuilder(LAYER_DEPTH, EMB_SIZE, HIDDEN_SIZE, model)
    decoder = dy.LSTMBuilder(LAYER_DEPTH, EMB_SIZE + HIDDEN_SIZE, HIDDEN_SIZE,
                             model)

encoder_params = {}
encoder_params["lookup"] = model.add_lookup_parameters(
    (VOCAB_SIZE_DE, EMB_SIZE))

decoder_params = {}
if config.sharing:
Esempio n. 22
0
 def setUp(self):
     # create model
     self.m = dy.ParameterCollection()
     self.rnn = dy.GRUBuilder(2, 10, 10, self.m)
Esempio n. 23
0
    if len(f) <= input_size:
        training_pairs += [f]
    else:
        training_pairs += [
            f[i:i + input_size] for i in range(0,
                                               len(f) - input_size)
        ]
rng.shuffle(training_pairs)
training_pairs = training_pairs[:500]
TRAINING_PAIRS = len(training_pairs)
print(TRAINING_PAIRS, " training pairs")
print(len(typed_output_vocab) - 1, " possible types")

# set up the neural net
pc = dy.ParameterCollection()
srnn = dy.GRUBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
params: Dict[str, dy.Expression] = {}
params["lookup"] = pc.add_lookup_parameters((VOCAB_SIZE, INPUT_DIM))
params["R"] = pc.add_parameters((OUTPUT_VOCAB_SIZE, HIDDEN_DIM))
params["bias"] = pc.add_parameters((OUTPUT_VOCAB_SIZE))

# Load training data from disk if it exists
model = Path(MODEL_FILE)
if model.is_file():
    print("Model file found, loading... ", end="", flush=True)
    try:
        pc.populate(MODEL_FILE)
        print("OK")
    except Exception as e:
        print("Failed")
        print("Message was:\n\t%s" % (str(e), ))