Example #1
0
 def dynet_trainer_for_args(self, args):
     if args.trainer.lower() == "sgd":
         trainer = dy.SimpleSGDTrainer(
             model_globals.dynet_param_collection.param_col,
             learning_rate=args.learning_rate)
     elif args.trainer.lower() == "adam":
         trainer = dy.AdamTrainer(
             model_globals.dynet_param_collection.param_col,
             alpha=args.learning_rate)
     else:
         raise RuntimeError("Unknown trainer {}".format(args.trainer))
     return trainer
Example #2
0
    def build_optimizer(self):
        assert self.model is not None, 'build model first'

        print(f'Building {self.optimizer_type} optimizer...')
        if self.optimizer_type == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.model,
                                                 learning_rate=self.lr)
        elif self.optimizer_type == 'adam':
            self.optimizer = dy.AdamTrainer(self.model, alpha=self.lr)

        self.optimizer.set_clip_threshold(self.max_grad_norm)
        self.model.set_weight_decay(self.weight_decay)
Example #3
0
    def __init__(self, params, vocab, label2tag, pretrained_embeddings=None):
        """

        :param params:
        :param vocab:
        :param label2tag:
        :param pretrained_embeddings:
        """
        self.dim_w = params.dim_w
        self.win = params.win
        self.vocab = vocab
        self.n_words = len(self.vocab)
        self.dim_asp = params.dim_asp
        self.dim_y_asp = params.n_asp_tags
        self.n_steps = params.n_steps
        self.asp_label2tag = label2tag
        self.dropout_asp = params.dropout_asp
        self.dropout = params.dropout
        self.ds_name = params.ds_name
        self.model_name = params.model_name
        self.attention_type = params.attention_type

        self.pc = dy.ParameterCollection()
        self.Emb = WDEmb(pc=self.pc, n_words=self.n_words, dim_w=self.dim_w,
                         pretrained_embeddings=pretrained_embeddings)
        
        self.DEP_RecNN = DTreeBuilder(pc=self.pc, n_in=self.win * self.dim_w, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        
        self.ASP_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)

        self.BiAttention_F=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        self.BiAttention_B=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        self.BiAttention_T=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)

        self.MultiWeightLayer=MultiWeightLayer(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)

        self.ASP_FC = Linear(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_y_asp)
        
        self.layers = [self.ASP_FC,self.DEP_RecNN,self.BiAttention_F,self.BiAttention_B,self.BiAttention_T,self.MultiWeightLayer]

        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adagrad':
            self.optimizer = dy.AdagradTrainer(self.pc)
        elif params.optimizer == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        else:
            raise Exception("Invalid optimizer!!")
Example #4
0
 def setUp(self):
     # Create model
     self.m = dy.ParameterCollection()
     # Parameters
     self.p1 = self.m.add_parameters((10, 10), init=dy.ConstInitializer(1))
     self.p2 = self.m.add_parameters((10, 10), init=dy.ConstInitializer(1))
     self.lp1 = self.m.add_lookup_parameters((10, 10),
                                             init=dy.ConstInitializer(1))
     self.lp2 = self.m.add_lookup_parameters((10, 10),
                                             init=dy.ConstInitializer(1))
     # Trainer
     self.trainer = dy.SimpleSGDTrainer(self.m, learning_rate=0.1)
     self.trainer.set_clip_threshold(-1)
Example #5
0
    def __init__(self,
                 embedding,
                 output_size,
                 hidden_size,
                 seed=1,
                 crf=False,
                 dropout_rate=0.5,
                 optimizer="sgd",
                 learning_rate=0.1):
        self.tmp_patience_filename = str(uuid4()) + ".model"
        self.set_seed(seed)

        self.model = dy.ParameterCollection()

        if optimizer == "sgd":
            self.trainer = dy.SimpleSGDTrainer(self.model,
                                               learning_rate=learning_rate)
        elif optimizer == "adam":
            self.trainer = dy.AdamTrainer(self.model, alpha=learning_rate)
        else:
            raise ValueError("Unknown optimizer")

        # CRF
        if crf:
            self.num_tags = output_size + 2  # Add 2 to account for start and end tags in CRF
            self.trans_mat = self.model.add_parameters(
                (self.num_tags, self.num_tags))
            self._loss = self._calculate_crf_loss
            self._predict = self._crf_predict_sentence
        else:
            self.num_tags = output_size
            self._loss = self._calculate_loss
            self._predict = self._predict_sentence

        # Embedding
        self.lookup = self.model.lookup_parameters_from_numpy(
            embedding.vectors)
        (embed_size, _), _ = self.lookup.dim()

        # Bi-LSTM
        self.bilstm = dy.BiRNNBuilder(num_layers=2,
                                      input_dim=embed_size,
                                      hidden_dim=hidden_size * 2,
                                      model=self.model,
                                      rnn_builder_factory=dy.LSTMBuilder)

        # Dense layer
        self.w = self.model.add_parameters((self.num_tags, hidden_size * 2))
        self.b = self.model.add_parameters(self.num_tags)

        self.dropout_rate = dropout_rate
Example #6
0
    def __init__(self, words_size, chars_size,
            w_embed_size, c_embed_size, lstm_hidden_size,
            clstm_hidden_size, lstm_num_layers,
            out_hidden_size, out_size):

        self.words_size = words_size
        self.chars_size = chars_size
        self.out_size = out_size

        self.w_embed_size = w_embed_size
        self.c_embed_size = c_embed_size
        self.clstm_hidden_size = clstm_hidden_size
        self.lstm_hidden_size = lstm_hidden_size * 2 # must be even
        self.lstm_num_layers = lstm_num_layers
        self.out_hidden_size = out_hidden_size

        self.model = dy.ParameterCollection()
        self.trainer = dy.SimpleSGDTrainer(self.model, learning_rate=0.1)

        # words and tags, entities embeddings
        self.wlookup = self.model.add_lookup_parameters((words_size, self.w_embed_size))
        self.clookup = self.model.add_lookup_parameters((chars_size, self.c_embed_size))

        # feature extractor
        self.lstm = dy.LSTMBuilder(
                self.lstm_num_layers,
                self.w_embed_size +self.clstm_hidden_size,
                self.lstm_hidden_size,
                self.model,
        )

        # char encoder
        self.clstm = dy.LSTMBuilder(
                self.lstm_num_layers,
                self.c_embed_size,
                self.clstm_hidden_size,
                self.model,
        )
        self.char_to_lstm      = self.model.add_parameters((self.clstm_hidden_size, self.c_embed_size))
        self.char_to_lstm_bias = self.model.add_parameters((self.clstm_hidden_size))

        # transform word+pos vector into a vector similar to the lstm output
        # used to generate padding vectors
        self.word_to_lstm      = self.model.add_parameters((self.lstm_hidden_size, self.w_embed_size + self.clstm_hidden_size))
        self.word_to_lstm_bias = self.model.add_parameters((self.lstm_hidden_size))

        self.output_hidden      = self.model.add_parameters((self.out_hidden_size, self.lstm_hidden_size))
        self.output_hidden_bias = self.model.add_parameters((self.out_hidden_size))
        self.output      = self.model.add_parameters((self.out_size, self.out_hidden_size))
        self.output_bias = self.model.add_parameters((self.out_size))
Example #7
0
    def fit(self, train_X, train_Y, num_iterations, train_algo, seed=None):
        """
        train the tagger
        """
        print("read training data", file=sys.stderr)

        if seed:
            print(">>> using seed: ", seed, file=sys.stderr)
            random.seed(seed)  #setting random seed

        # init lookup parameters and define graph
        print("build graph", file=sys.stderr)

        num_words = len(self.w2i)
        num_chars = len(self.c2i)

        self.predictors, self.char_rnn, self.wembeds, self.cembeds = self.build_computation_graph(
            num_words, num_chars)

        if train_algo == "sgd":
            trainer = dynet.SimpleSGDTrainer(self.model)
        elif train_algo == "adam":
            trainer = dynet.AdamTrainer(self.model)

        assert (len(train_X) == len(train_Y))
        train_data = list(zip(train_X, train_Y))

        for cur_iter in range(num_iterations):
            total_loss = 0.0
            total_tagged = 0.0
            random.shuffle(train_data)
            for ((word_indices, char_indices), y) in train_data:
                # use same predict function for training and testing
                output = self.predict(word_indices, char_indices, train=True)

                loss1 = dynet.esum([
                    self.pick_neg_log(pred, gold)
                    for pred, gold in zip(output, y)
                ])
                lv = loss1.value()
                total_loss += lv
                total_tagged += len(word_indices)

                loss1.backward()
                trainer.update()

            print("iter {2} {0:>12}: {1:.2f}".format("total loss",
                                                     total_loss / total_tagged,
                                                     cur_iter),
                  file=sys.stderr)
Example #8
0
    def train(self, examples, clusters):
        # num_examples = len(examples)
        num_examples = 10
        trainer = dy.SimpleSGDTrainer(self.params)

        # Conditional Language Model
        for epoch in range(self.num_epochs):
            batch_loss = []
            loss_sum = 0
            for idx in range(num_examples):
                z_list = clusters[idx]
                onehot_zlist = []
                for z in z_list:
                    onehot_z = np.zeros(self.num_clusters)
                    onehot_z[z] = 1
                    onehot_z = dy.inputVector(onehot_z)
                    onehot_zlist.append(onehot_z)
                loss = self.lm_train_example(examples[idx], onehot_zlist)
                batch_loss.append(loss)

                # Minibatching:
                if (idx % self.minibatch == 0) or (idx + 1 == num_examples):
                    batch_loss = dy.esum(batch_loss)
                    loss_sum += batch_loss.value()
                    batch_loss.backward()
                    batch_loss = []
                    trainer.update()
                    dy.renew_cg()
            print("(Language Model) Epoch: {} | Loss: {}".format(
                epoch + 1, loss_sum))

        # Latent Variable Prediction
        for epoch in range(self.num_epochs):
            batch_loss = []
            loss_sum = 0
            for idx in range(num_examples):
                z_list = clusters[idx]
                loss = self.latent_variable_prediction(examples[idx])
                batch_loss.append(loss)

                # Minibatching:
                if (idx % self.minibatch == 0) or (idx + 1 == num_examples):
                    batch_loss = dy.esum(batch_loss)
                    loss_sum += batch_loss.value()
                    batch_loss.backward()
                    batch_loss = []
                    trainer.update()
                    dy.renew_cg()
            print("(Latent Variable Prediction) Epoch: {} | Loss: {}".format(
                epoch + 1, loss_sum))
def get_trainer(opt, s2s):
    if opt.trainer == 'sgd':
        trainer = dy.SimpleSGDTrainer(s2s.pc, learning_rate=opt.learning_rate)
    elif opt.trainer == 'clr':
        trainer = dy.CyclicalSGDTrainer(s2s.pc,
                                        learning_rate_min=opt.learning_rate /
                                        10.0,
                                        learning_rate_max=opt.learning_rate)
    elif opt.trainer == 'momentum':
        trainer = dy.MomentumSGDTrainer(s2s.pc,
                                        learning_rate=opt.learning_rate)
    elif opt.trainer == 'rmsprop':
        trainer = dy.RMSPropTrainer(s2s.pc, learning_rate=opt.learning_rate)
    elif opt.trainer == 'adam':
        trainer = dy.AdamTrainer(s2s.pc, opt.learning_rate)
    else:
        print('Trainer name invalid or not provided, using SGD',
              file=sys.stderr)
        trainer = dy.SimpleSGDTrainer(s2s.pc, learning_rate=opt.learning_rate)

    trainer.set_clip_threshold(opt.gradient_clip)

    return trainer
def main():
    training_log = open('training-'+str(datetime.now())+'.log','w')
    model = dy.Model()
    trainer = dy.SimpleSGDTrainer(model)
    training_src = read_file(sys.argv[1])
    word_freq_src = build_dict(training_src)

    training_tgt = read_file(sys.argv[2])
    word_freq_tgt = build_dict(training_tgt)

    training_src = sentence_clean(training_src,word_freq_src)
    training_tgt = sentence_clean(training_tgt,word_freq_tgt)
    dev_src = sentence_clean(read_file(sys.argv[3]),word_freq_src)
    dev_tgt = sentence_clean(read_file(sys.argv[4]),word_freq_tgt)
    test_src = sentence_clean(read_file(sys.argv[5]),word_freq_src)
    attention = Attention(model, list(training_src), list(training_tgt))

    train_data = zip(training_src, training_tgt)
    train_data.sort(key=lambda x: -len(x[0]))

    train_src = [sent[0] for sent in train_data]
    train_tgt = [sent[1] for sent in train_data]

    start = time.time()
    for epoch in range(150):
        epoch_loss = 0
        train_zip = zip(train_src, train_tgt)
        i = 0
        while i < len(train_zip):
            esum,num_words = attention.step_batch(train_zip[i:i+attention.BATCH_SIZE])
            i += attention.BATCH_SIZE
            epoch_loss += esum.scalar_value()
            esum.backward()
            trainer.update()
        # if epoch_loss < 10:
        #     end = time.time()
        #     print 'TIME ELAPSED:', end - start, 'SECONDS'
        #     break
        print 'Epoch:',epoch
        training_log.write("Epoch %d: loss=%f \n" % (epoch, epoch_loss))
        training_log.flush()
        trainer.update_epoch(1.0)
        #training_log.write(attention.translate_sentence(training_src[0])+'\n')
        if epoch % 5 == 0:
            #attention.save(epoch)
            ori_sentence = attention.translate_sentence_ori(training_src[0])
            training_log.write('ori:'+ori_sentence+'\n')
            #print '----ori finished----'
            training_log.write('new:'+attention.translate_sentence_beam(training_src[0])+'\n')
    def __init__(self):
        print("hello from Talha Yılmaz :)")

        self.word_number = 15
        self.epoch_num = 4
        self.embedding_layer_size = 32
        self.hidden_layer_size = 32
        self.min_count = 2

        self.model = dy.Model()
        self.trainer = dy.SimpleSGDTrainer(self.model)

        self.pW_hidden = self.model.add_parameters(
            (self.hidden_layer_size, self.embedding_layer_size))
        self.pB_hidden = self.model.add_parameters(self.hidden_layer_size)
Example #12
0
def train(network, train_set, val_set, epochs):
    global TEXTE
    TEXTE += "<ul>"
    MAX_STRING_LEN = 50

    def get_val_set_loss(network, val_set):
        loss = [
            network.get_loss(input_string, output_string).value()
            for input_string, output_string in val_set
        ]
        return sum(loss)

    trainer = dy.SimpleSGDTrainer(network.model)
    losses = list()
    iterations = list()
    occurences = 0

    for i in range(epochs):
        print "Epoch ", i
        for training_example in train_set:
            occurences += 1
            input_string, output_string = training_example

            loss = network.get_loss(input_string, output_string)
            # performing a forward through the network.
            loss_value = loss.value()
            # an optimization step
            loss.backward()
            trainer.update()

            if occurences % ((len(train_set) * epochs) / 100) == 0:
                val_loss = get_val_set_loss(network, val_set)
                losses.append(val_loss)
                iterations.append(occurences /
                                  (((len(train_set) * epochs) / 100)))

        plot_name = 'plots/' + str(network).split()[0].split('.')[1] + '.png'
        plt.ioff()
        fig = plt.figure()
        plt.plot(iterations, losses)
        plt.axis([0, 100, 0, len(val_set) * MAX_STRING_LEN])
        if not os.path.exists("plots"):
            os.makedirs("plots")
        plt.savefig(plot_name)
        plt.close(fig)
        TEXTE += "<il>Epoche %d - loss on validation set is %.9f </il>" % (
            i, val_loss)
    TEXTE += '</ul><img src="%s">' % plot_name
Example #13
0
 def set_trainer(self, optimization):
     if optimization == 'MomentumSGD':
         self.trainer = dy.MomentumSGDTrainer(
             self.model, learning_rate=self.hp.learning_rate)
     if optimization == 'CyclicalSGD':
         self.trainer = dy.CyclicalSGDTrainer(
             self.model,
             learning_rate_max=self.hp.learning_rate_max,
             learning_rate_min=self.hp.learning_rate_min)
     if optimization == 'Adam':
         self.trainer = dy.AdamTrainer(self.model)
     if optimization == 'RMSProp':
         self.trainer = dy.RMSPropTrainer(self.model)
     else:  # 'SimpleSGD'
         self.trainer = dy.SimpleSGDTrainer(
             self.model, learning_rate=self.hp.learning_rate)
Example #14
0
def trainExample(model):
    #SGD Trainer
    trainer = pc.SimpleSGDTrainer(model)
    symbols = []
    # words, symbols = read(TRAIN_FILEPATH_SRC)

    for i in xrange(EPOCHS):
        s = 0
        for src, target in read(TRAIN_FILEPATH_SRC):
            loss_value = train(model, trainer, src, target)
            if (s % 1000 == 0):
                print "Epoch: ", i, " Sentence: ", s, " Loss: ", loss_value
            s = s + 1
        print "Epoch: ", i, "Loss: ", loss_value

        model.save(MODEL_PATH)
Example #15
0
def main():
    model = dy.Model()

    trainer = dy.SimpleSGDTrainer(model)
    #training_src = read_file(sys.argv[1])
    #training_tgt = read_file(sys.argv[2])
    #trainFileName_src = "train.en-de.low.filt.de"
    #trainFileName_tgt = "train.en-de.low.filt.en"
    trainFileName_src = sys.argv[1]
    trainFileName_tgt = sys.argv[2]
    training_src = []
    training_tgt = []
    for line in open(trainFileName_src, 'r'):
        fields = line.strip().split(' ')
        training_src.append(fields)
    for line in open(trainFileName_tgt, 'r'):
        fields = line.strip().split(' ')
        training_tgt.append(fields)

    attention = Attention(model, training_src, training_tgt)

    (attention.src_lookup, attention.tgt_lookup, attention.l2r_builder,
     attention.r2l_builder, attention.dec_builder, attention.W_y,
     attention.b_y, attention.W1_att_f, attention.W1_att_e,
     attention.w2_att) = model.load('myModel')
    attention.l2r_builder.disable_dropout()
    attention.r2l_builder.disable_dropout()
    attention.dec_builder.disable_dropout()
    testFileName = sys.argv[3]
    testSent = []

    for line in open(testFileName, 'r'):
        fields = line.strip().split(' ')
        testSent.append(fields)

    rst = []
    #translate
    ccc = 0
    for testS in testSent:
        ccc += 1
        rst.append(attention.translate_sentence(testS))

    f = open('rst', 'w')
    for i in rst:
        f.write(i + '\n')
    f.close()
Example #16
0
    def __init__(self, char_dim, feat_dim, hidden_dim, char_size, feat_sizes):
        self._char_dim = char_dim
        self._feat_dim = feat_dim

        self._pc = dy.ParameterCollection()

        if config.adam:
            self._trainer = dy.AdamTrainer(self._pc, config.learning_rate, config.beta_1, config.beta_2, config.epsilon)
        else:
            # self._trainer = dy.AdadeltaTrainer(self._pc)
            trainer = dy.SimpleSGDTrainer(self._pc, config.learning_rate)
            trainer.set_clip_threshold(config.clip_threshold)

        # self._trainer.set_clip_threshold(1.0)

        self.params = dict()

        self.lp_c = self._pc.add_lookup_parameters((char_size, char_dim))
        self.lp_feats = []
        for idx in range(len(feat_sizes)):
            self.lp_feats.append(self._pc.add_lookup_parameters((feat_sizes[idx], feat_dim), init=dy.ConstInitializer(0.)))

        # self._pdrop_embs = pdrop_embs
        # self._pdrop_lstm = pdrop_lstm
        # self._pdrop_mlp = pdrop_mlp

        self.LSTM_builders = []

        f = dy.VanillaLSTMBuilder(1, char_dim, hidden_dim, self._pc)
        b = dy.VanillaLSTMBuilder(1, char_dim, hidden_dim, self._pc)

        self.LSTM_builders.append((f, b))
        for i in range(config.layers - 1):
            f = dy.VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc)
            b = dy.VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc)
            self.LSTM_builders.append((f, b))

        self.dec_LSTM = dy.VanillaLSTMBuilder(1, hidden_dim, hidden_dim, self._pc)

        self.MLP = self._pc.add_parameters((char_dim + feat_dim * 6 + 6, hidden_dim))
        self.MLP_bias = self._pc.add_parameters((hidden_dim))
        self.classifier = self._pc.add_parameters((hidden_dim, char_size))
        self.classifier_bias = self._pc.add_parameters((char_size))
        self.MLP_attn = self.add_parameters((char_dim + feat_dim * 6 + 6, hidden_dim))
        self.MLP_attn_bias = self.add_parameters((hidden_dim))
        self.attn_weight = self._pc.add_parameters((char_dim))
Example #17
0
def train(seq2seq, sentence_pairs, n_round=200):
    trainer = dn.SimpleSGDTrainer(seq2seq.model)
    for i in xrange(n_round):
        if (i + 1) % ((n_round + 19) / 20) == 0:
            from random import randint
            idx = randint(0, len(sentence_pairs) - 1)
        else:
            idx = -1
        for i, (in_s, out_s) in enumerate(sentence_pairs):
            loss = seq2seq.compute_loss(in_s, out_s[::-1])
            loss_value = loss.value()
            loss.backward()
            trainer.update()
            if i == idx:
                print loss_value, idx,
                print in_s, " >>> ",
                print seq2seq.generate(in_s)[::-1]
Example #18
0
def train(lstm, params, train_data, dev_data, epochs):
    trainer = dy.SimpleSGDTrainer(pc)
    for ep in range(epochs):
        i = 0
        print("EPOCH {}".format(ep))
        np.random.shuffle(train_data)
        #print("train_data {}".format(train_data[0]))
        for train_y, sentence in train_data:
            #print("sentence\n{}\ntrain_y{}".format(sentence, train_y))
            loss, _ = do_one_sentence(lstm, params, sentence, train_y)
            #print("after do one sent")
            loss.backward()
            trainer.update()
            if i % 200 == 0:
                dev_loss, dev_acc = check_loss(lstm, params, dev_data, ep > 1)
                print("loss: {:.4f}\tacc: {:.2f}".format(dev_loss, dev_acc))
            i += 1
Example #19
0
def train_network(train_data, dev_data, encoder, network):
    global prev_acc, prev_acc_ex0, model_file, report
    model = network.model
    trainer = dy.SimpleSGDTrainer(model)  

    prev_acc = prev_acc or 0.5
    prev_acc_ex0 = prev_acc_ex0 or 0.5
    
    report = []
    tagged = loss = 0
    i = 1
    t0 = time.clock()
    for ep in range(EPOCHS):
        random.shuffle(train_data)
        for s in train_data:
            i += 1
            if i % 20000 == 0:
                print("average loss last 500 cycles: {}".format(loss / tagged))
                acc, acc_ex0 = test_a_classifier_on_dev(network, dev_data) 
                print("dev accuracy after {} cycles: {}, {}".format(i, acc, acc_ex0))
                ti = time.clock() 
                report.append(OrderedDict([
                    ("cycles", i),
                    ("dev_accuracy", acc),
                    ("dev_accuracy_except_common", acc_ex0),
                    ("loss", loss / tagged),
                    ("clock_time",  round(ti-t0,2)),
                    ("saved", 0)
                ]))
                loss = 0
                tagged = 0
                ti = t0
                if acc > prev_acc:
                    print("saving")
                    network.save(model_file)
                    report[-1]["saved"] = 1
                    prev_acc = acc
                if acc_ex0 > prev_acc_ex0:
                    prev_acc_ex0 = acc_ex0
            
            sum_errs = single_training_pass(s, encoder, network)
            loss += sum_errs.scalar_value()
            tagged += len(s)
            sum_errs.backward()
            trainer.update() 
Example #20
0
def train(network, train_set, val_set, epochs):
    global TEXTE
    TEXTE += "<ul>"
    MAX_STRING_LEN = 50  # for the scale of the plot of gradient descent

    def get_val_set_loss(network, val_set):
        loss = [
            network.get_loss(input_string, output_string).value()
            for input_string, output_string in val_set
        ]
        return sum(loss)

    trainer = dy.SimpleSGDTrainer(network.model)
    losses = list()
    iterations = list()
    occurences = 0

    for i in range(epochs):
        print "Epoch ", i
        for training_example in train_set:
            occurences += 1
            input_string, output_string = training_example

            loss = network.get_loss(input_string, output_string)
            loss_value = loss.value()
            loss.backward()
            trainer.update()

            if occurences % ((len(train_set) * epochs) / 100) == 0:
                val_loss = get_val_set_loss(network, val_set)
                losses.append(val_loss)
                iterations.append(occurences /
                                  (((len(train_set) * epochs) / 100)))

        plt.ioff()
        fig = plt.figure()
        plt.plot(iterations, losses)
        plt.axis([0, 100, 0, len(val_set) * MAX_STRING_LEN])
        if not os.path.exists("plots"):
            os.makedirs("plots")
        plt.savefig('plots/plot.png')
        plt.close(fig)
        TEXTE += "<li>Epoche %d - loss on validation set is %.9f </li>" % (
            i, val_loss)
    TEXTE += '</ul><img src="plots/plot.png">'
def main():

    model = dy.Model()
    trainer = dy.SimpleSGDTrainer(model)
    training_src = read_file(sys.argv[1])
    training_tgt = read_file(sys.argv[2])
    dev_src = read_file(sys.argv[3])
    dev_tgt = read_file(sys.argv[4])
    test_src = read_file(sys.argv[5])
    model_name = sys.argv[6]

    src_vector_file = None
    if len(sys.argv) > 6:
        src_vector_file = sys.argv[7]
    test_sense_src = None
    if len(sys.argv) > 7 and not sys.argv[8].startswith("--"):
        test_sense_src = read_file_sense(sys.argv[8])

    dev = [(x, y) for (x, y) in zip(dev_src, dev_tgt)]

    if OLAF:
        print("Burrrr!  The vectors are frozen!")
    else:
        print("The vectors are not frozen and olaf is melting!")

    if DEV:
        print("In DEV mode, limiting each corpus to {0} sentences".format(
            DEV_LIMIT))

    attention = Attention(model,
                          training_src,
                          training_tgt,
                          model_name,
                          src_vectors_file=src_vector_file,
                          frozen_vectors=OLAF)

    out_language = sys.argv[1].split('.')[-1]

    if LOAD_MODEL:
        attention.load_model()
    if TRAIN:
        attention.train_batch(dev, trainer, test_src, True,
                              'test.' + out_language, test_sense_src)

    attention.translate(test_src, 'test.' + out_language)
Example #22
0
    def __init__(self, input_path, n):
        self.n = n
        self.vocabs = {}
        self.sentences = []
        self.initialize_vocab(input_path)
        self.vocab_size = len(self.vocabs)

        self.model = dy.Model()  # must not be garbage-collected/out of scope
        self.trainer = dy.SimpleSGDTrainer(self.model)
        self.embedding = self.model.add_lookup_parameters(
            (self.vocab_size, EMBEDDING_DIMENSION))
        self.w1 = self.model.add_parameters(
            (HIDDEN_SIZE, (self.n - 1) * EMBEDDING_DIMENSION))
        self.b1 = self.model.add_parameters(HIDDEN_SIZE)
        self.w2 = self.model.add_parameters((self.vocab_size, HIDDEN_SIZE))
        self.b2 = self.model.add_parameters(self.vocab_size)

        self.unknown_words = set()
Example #23
0
    def __init__(self, init_learning_rate, vw):
        self.model = dt.Model()
        self.vw = vw
        n_words = vw.size()

        self.learner = dt.SimpleSGDTrainer(self.model, e0=init_learning_rate)
        self.E = self.model.add_lookup_parameters(
            (n_words, SqaModel.WORD_EMBEDDING_DIM))
        # similarity(v,o): v^T o
        self.SelColW = self.model.add_parameters((4))
        self.SelColWhereW = self.model.add_parameters((4))
        self.NulW = self.model.add_parameters((SqaModel.WORD_EMBEDDING_DIM))
        self.ColW = self.model.add_parameters((SqaModel.WORD_EMBEDDING_DIM))

        # LSTM question representation
        self.builders = [
            dt.LSTMBuilder(1, SqaModel.WORD_EMBEDDING_DIM,
                           SqaModel.LSTM_HIDDEN_DIM, self.model),
            dt.LSTMBuilder(1, SqaModel.WORD_EMBEDDING_DIM,
                           SqaModel.LSTM_HIDDEN_DIM, self.model)
        ]
        self.pH = self.model.add_parameters(
            (SqaModel.WORD_EMBEDDING_DIM, SqaModel.LSTM_HIDDEN_DIM * 2))

        if config.d["USE_PRETRAIN_WORD_EMBEDDING"]:
            n_hit_pretrain = 0.0
            trie = config.d["embeddingtrie"]
            print("beginning to load embeddings....")
            for i in range(n_words):
                word = self.vw.i2w[i].lower()
                results = trie.items(word + config.d["recordtriesep"])
                if len(results) == 1:
                    pretrain_v = np.array(list(results[0][1]))
                    pretrain_v = pretrain_v / np.linalg.norm(pretrain_v)
                    self.E.init_row(i, pretrain_v)
                    n_hit_pretrain += 1
                else:
                    pretrain_v = self.E[i].npvalue()
                    pretrain_v = pretrain_v / np.linalg.norm(pretrain_v)
                    self.E.init_row(i, pretrain_v)

            print("the number of words that are in pretrain", n_hit_pretrain,
                  n_words, n_hit_pretrain / n_words)
            print("loading complete!")
Example #24
0
    def __init__(self, args):
        dy.renew_cg()

        self.args = args  # save for later
        self.model = dy.Model()

        if args.trainer.lower() == "sgd":
            self.trainer = dy.SimpleSGDTrainer(self.model,
                                               e0=args.learning_rate)
        elif args.trainer.lower() == "adam":
            self.trainer = dy.AdamTrainer(self.model, alpha=args.learning_rate)
        else:
            raise RuntimeError("Unknown trainer {}".format(args.trainer))

        if args.lr_decay > 1.0 or args.lr_decay <= 0.0:
            raise RuntimeError(
                "illegal lr_decay, must satisfy: 0.0 < lr_decay <= 1.0")
        self.learning_scale = 1.0
        self.early_stopping_reached = False

        # Create the model serializer
        self.create_model()
        # single mode
        if args.batch_size is None or args.batch_size == 1 or args.batch_strategy.lower(
        ) == 'none':
            print('Start training in non-minibatch mode...')
            self.logger = NonBatchLossTracker(args.eval_every,
                                              self.total_train_sent)

        # minibatch mode
        else:
            print('Start training in minibatch mode...')
            self.batcher = Batcher.select_batcher(args.batch_strategy)(
                args.batch_size)
            if args.input_format == "contvec":
                assert self.train_src[0].nparr.shape[
                    1] == self.input_embedder.emb_dim, "input embed dim is different size than expected"
                self.batcher.pad_token = np.zeros(self.input_embedder.emb_dim)
            self.train_src, self.train_trg = self.batcher.pack(
                self.train_src, self.train_trg)
            self.dev_src, self.dev_trg = self.batcher.pack(
                self.dev_src, self.dev_trg)
            self.logger = BatchLossTracker(args.eval_every,
                                           self.total_train_sent)
Example #25
0
def train(model, data):
    trainer = dy.SimpleSGDTrainer(model)
    for n in range(EPOCHS):
        totalloss = 0
        random.shuffle(data)
        for i, ex in enumerate(data):
            stdout.write('EPOCH %u: ex %u of %u\r' % (n+1,i+1,len(data)))
            tab,label,wf = ex
            loss = get_loss(tab,label,wf)
            totalloss += loss.value()
            loss.backward()
            trainer.update()
        print()
        print(totalloss/len(data))
        for ex in data[:10]:
            tab, label, wf = ex
            print('input:',''.join([tab,label]),
                  'sys:',generate(tab,label),
                  'gold:',''.join(wf))
Example #26
0
def main():
    training_log = open('training-' + str(datetime.now()) + '.log', 'w')
    model = dy.Model()
    trainer = dy.SimpleSGDTrainer(model)
    training_src = read_file(sys.argv[1])
    training_tgt = read_file(sys.argv[2])
    dev_src = read_file(sys.argv[3])
    dev_tgt = read_file(sys.argv[4])
    test_src = read_file(sys.argv[5])
    attention = Attention(model, list(training_src), list(training_tgt))

    train_data = zip(training_src, training_tgt)
    train_data.sort(key=lambda x: -len(x[0]))

    train_src = [sent[0] for sent in train_data]
    train_tgt = [sent[1] for sent in train_data]

    start = time.time()
    for epoch in range(5000):
        epoch_loss = 0
        train_zip = zip(train_src, train_tgt)
        i = 0
        while i < len(train_zip):
            esum, num_words = attention.step_batch(
                train_zip[i:i + attention.BATCH_SIZE])
            i += attention.BATCH_SIZE
            epoch_loss += esum.scalar_value()  #/ num_words
            esum.backward()
            trainer.update()
        # if epoch_loss < 10:
        #     end = time.time()
        #     print 'TIME ELAPSED:', end - start, 'SECONDS'
        #     break
        #print 'Epoch:',epoch
        print "Epoch %d: loss=%f \n" % (epoch, epoch_loss)
        training_log.write("Epoch %d: loss=%f \n" % (epoch, epoch_loss))
        training_log.flush()
        trainer.update_epoch(1.0)
        training_log.write(
            attention.translate_sentence(training_src[0]) + '\n')
        if epoch % 100 == 0:
            attention.save(epoch)
Example #27
0
def train(model, data):
    trainer = dy.SimpleSGDTrainer(model)
    for n in range(5):
        totalloss = 0
        random.shuffle(data)
        for i, io in enumerate(data):
            if i > 5000:
                break
            stdout.write('EPOCH %u: ex %u of %u\r' % (n+1,i+1,len(data)))
            input,output = io
            loss = get_loss(input, output, enc_fwd_lstm, enc_bwd_lstm, dec_lstm)
            totalloss += loss.value()
            loss.backward()
            trainer.update()
        print()
        print(totalloss/len(data))
        for input, output in data[:10]:
            print('input:',''.join(input),
                  'sys:',generate(input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm),
                  'gold:',''.join(output))
Example #28
0
    def __init__(self, vocab_size):
        self.model = dy.Model()
        self.trainer = dy.SimpleSGDTrainer(self.model)
        self.layers = 2
        self.embed_size = 1
        self.hidden_size = 1
        self.src_vocab_size = vocab_size
        self.tgt_vocab_size = vocab_size

        self.enc_builder = dy.LSTMBuilder(self.layers, self.embed_size,
                                          self.hidden_size, self.model)
        self.dec_builder = dy.LSTMBuilder(self.layers, self.embed_size,
                                          self.hidden_size, self.model)
        self.src_lookup = self.model.add_lookup_parameters(
            (self.src_vocab_size, self.embed_size))
        self.tgt_lookup = self.model.add_lookup_parameters(
            (self.tgt_vocab_size, self.embed_size))
        self.W_y = self.model.add_parameters(
            (self.tgt_vocab_size, self.hidden_size))
        self.b_y = self.model.add_parameters((self.tgt_vocab_size))
Example #29
0
    def train(self, examples):
        num_examples = len(examples)
        trainer = dy.SimpleSGDTrainer(self.params)

        for epoch in range(self.num_epochs):
            batch_loss = []
            loss_sum = 0
            for idx in range(num_examples):
                loss = self.train_example(examples[idx])
                batch_loss.append(loss)

                # Minibatching:
                if (idx % self.minibatch == 0) or (idx + 1 == num_examples):
                    batch_loss = dy.esum(batch_loss)
                    loss_sum += batch_loss.value()
                    batch_loss.backward()
                    batch_loss = []
                    trainer.update()
                    dy.renew_cg()
            print("Epoch: {} | Loss: {}".format(epoch + 1, loss_sum))
def test_softmax_model():
    """Train softmax model for a number of steps."""
    config = Config()

    # Generate random data to train the model on
    np.random.seed(1234)
    inputs = np.random.rand(config.n_samples, config.n_features)
    labels = np.zeros((config.n_samples, config.n_classes), dtype=np.int32)
    labels[:, 1] = 1
    #for i in xrange(config.n_samples):
    #    labels[i, i%config.n_classes] = 1

    mini_batches = [[
        inputs[k:k + config.batch_size], labels[k:k + config.batch_size]
    ] for k in xrange(0, config.n_samples, config.batch_size)]

    m = dy.ParameterCollection()
    trainer = dy.SimpleSGDTrainer(m)
    trainer.learning_rate = config.lr
    net = SoftmaxModel(config, m)
    for epoch in range(config.n_epochs):
        start_time = time.time()
        for mini_batch in mini_batches:
            dy.renew_cg()
            losses = []
            for ix in xrange(config.batch_size):
                l = net.create_network_return_loss(
                    np.array(mini_batch[0][ix]).reshape(1, config.n_features),
                    np.array(mini_batch[1][ix]).reshape(1, config.n_classes))
                losses.append(l)
            loss = dy.esum(losses) / config.batch_size
            loss.forward()
            loss.backward()
            trainer.update()
        duration = time.time() - start_time
        print 'Epoch {:}: loss = {:.2f} ({:.3f} sec)'.format(
            epoch, loss.value(), duration)

    print loss.value()
    assert loss.value() < .5
    print "Basic (non-exhaustive) classifier tests pass"