예제 #1
0
def main():
    model = LSTM(settings.vocab_size, settings.word_embedding_size,
                 settings.hidden_size, settings.num_layers, settings.out_dim, settings.drop_out)
    '''     pre-train word embedding init    '''
    dataset = Dataset(args.data)
    model.word_embed.weight = nn.Parameter(torch.from_numpy(dataset.get_wordembedding()))
    if torch.cuda.is_available():
        torch.cuda.manual_seed(settings.seed)
        model.cuda()
    optimizer = optim.SGD(model.parameters(), lr=settings.lr, weight_decay=1e-5)
    criteria = nn.CrossEntropyLoss()
    best_dev_acc = 0.0
    best_test_acc = 0.0

    for i in xrange(dataset.size/settings.batch_size*settings.max_epochs):
        batch_data = dataset.get_batch()
        loss = train(model, batch_data, optimizer, criteria)
        if (i+1) % settings.validate_freq == 0:
            print "validating..."
            dev_acc = test(model, dataset.dev_data)
            test_acc = test(model, dataset.test_data)
            if dev_acc > best_dev_acc:
                best_dev_acc = dev_acc
                best_test_acc = test_acc
                torch.save(model, os.path.join(args.model_dir, "sa_{}.model".format(best_dev_acc)))
            with open(os.path.join(args.model_dir, "log.txt"), "a") as logger:
                logger.write("epoch: {}, dev acc: {}, test acc: {}, " \
                  "batch loss: {}, best dev acc:{}, best test acc:{}\n".format(i*settings.batch_size/float(dataset.size),
                   dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc))
            print "epoch: {}, dev acc: {}, test acc: {}, " \
                  "batch loss: {}, best dev acc:{}, best test acc:{}".format(i*settings.batch_size/float(dataset.size),
                   dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)
 def predict(self, inputs_data):
     """
     """
     x_batch = Dataset.preprocess_for_prediction(inputs_data, self.settings) # a single batch
     
     print(x_batch)
     
     feed_dict = self._feed_data_predict(x_batch)
     outputs = self._sess.run(self._outputs_predict, feed_dict = feed_dict)
     
     return outputs
#
model_tag = 'cnn'
#

if model_tag == 'cnn':
    from model_graph_cnn import build_graph
elif model_tag == 'csm':
    from model_graph_csm import build_graph
elif model_tag == 'rnn':
    from model_graph_rnn import build_graph
elif model_tag == 'mlp':
    from model_graph_mlp import build_graph

#
# data
dataset = Dataset()
dataset.load_vocab_tokens_and_emb()
#

#
config = ModelSettings()
config.vocab = dataset.vocab
config.model_tag = model_tag
config.model_graph = build_graph
config.is_train = False
config.check_settings()
#
model = ModelWrapper(config)
model.prepare_for_prediction()
#
예제 #4
0
#

if model_tag == 'cnn':
    from model_graph_cnn import build_graph
elif model_tag == 'csm':
    from model_graph_csm import build_graph
elif model_tag == 'rnf':
    from model_graph_rnf import build_graph
elif model_tag == 'rnn':
    from model_graph_rnn import build_graph
elif model_tag == 'mlp':
    from model_graph_mlp import build_graph

#
# data
dataset = Dataset()
#

#
flag_load_data = True
# data
if flag_load_data:
    dataset.load_preprocessed_data()
else:
    dataset.pretrained_emb_file = None
    dataset.emb_dim = 200
    dataset.max_seq_len = 200
    dataset.prepare_preprocessed_data(load_vocab=False)
#
data_train, data_test = dataset.split_train_and_test()
#
    def train_and_valid(self, train_data, valid_data):
        """ 
        """        
        if not os.path.exists(self.model_dir): os.mkdir(self.model_dir)
        if not os.path.exists(self.model_dir + '_best'): os.mkdir(self.model_dir + '_best')
        
        print('Training and evaluating...')
        #start_time = time.time()
        total_batch = 0
        best_acc_val = 0.0 
        last_improved = 0
        
        lr = self.learning_rate_base
        with self._graph.as_default():
            self._sess.run(tf.assign(self._lr, tf.constant(lr, dtype=tf.float32)))
        
        valid_batches = Dataset.do_batching_data(valid_data, self.batch_size_eval)
        valid_batches = Dataset.do_standardizing_batches(valid_batches, self.settings)
        
        print('Creating model for evaluation ...')
        config_e = self.settings
        config_e.keep_prob = 1.0
        model_e = ModelWrapper(config_e)
        model_e.prepare_for_train_and_valid()
        
        flag_stop = False
        for epoch in range(self.num_epochs):
            print('Epoch: %d, training ...' % (epoch + 1) )
            
            train_batches = Dataset.do_batching_data(train_data, self.batch_size)
            train_batches = Dataset.do_standardizing_batches(train_batches, self.settings)
            
            for data_batch in train_batches:
                feed_dict = self._feed_data_train(data_batch)
                
                # valid
                if total_batch % self.valid_per_batch == 0:
                    # load
                    ckpt = tf.train.get_checkpoint_state(self.model_dir)
                    if ckpt and ckpt.model_checkpoint_path:
                        model_e._saver.restore(model_e._sess, ckpt.model_checkpoint_path)
                    #print('evaluation-model created')                                        
                    loss_val, acc_val = model_e.evaluate(valid_batches)                    
                    #print('evaluated')
                    
                    # save best
                    if acc_val >= best_acc_val:  # >=
                        best_acc_val = acc_val
                        last_improved = total_batch
                        model_e._saver_best.save(model_e._sess,
                                                 os.path.join(model_e.model_dir + '_best', model_e.model_name),
                                                 global_step = total_batch)
                        
                        # pb
                        constant_graph = graph_util.convert_variables_to_constants(
                                model_e._sess, model_e._sess.graph_def,
                                output_node_names = self.pb_outputs_name)
                        with tf.gfile.FastGFile(model_e.pb_file, mode='wb') as f:
                            f.write(constant_graph.SerializeToString())
                        #
                    
                    # stop
                    if total_batch - last_improved >= self.patience_stop:
                        str_info = "no improvement for a long time, stop optimization at curr_batch: %d" \
                                    % total_batch
                        self._log_info(str_info)
                        print(str_info)
                        #
                        flag_stop = True
                        break # for batch
                    
                    # decay
                    if total_batch - last_improved >= self.patience_decay or \
                    (not self.use_metric and \
                     total_batch > 0 and \
                     total_batch % self.patience_decay == 0):
                        lr *= self.ratio_decay
                        with self._graph.as_default():
                            self._sess.run(tf.assign(self._lr, tf.constant(lr, dtype=tf.float32)))
                        last_improved = total_batch
                        #
                        str_info = 'learning_rate DECAYED at total_batch: %d' % total_batch
                        self._log_info(str_info)
                        print(str_info)
                    
                    # time
                    # time_cost = time.time() - start_time
                    #
                    str_info = 'loss, metric, best_metric: %.6f, %.4f, %.4f' % (loss_val,
                                                                                acc_val, best_acc_val)
                    self._log_info(str_info)
                    # print(str_info)
                    #
                    str_info = 'curr_batch: %d, lr: %f' % (total_batch, lr)
                    self._log_info(str_info)
                    # print(str_info)

                # optim
                self._sess.run(self._train_op, feed_dict = feed_dict)
                total_batch += 1
                    
                # save
                if total_batch % self.save_per_batch == 0:
                    #s = session.run(merged_summary, feed_dict=feed_dict)
                    #writer.add_summary(s, total_batch)
                    loss = self._sess.run(self._loss_tensor, feed_dict = feed_dict)
                    metric = 0.0
                    if self.use_metric:
                        metric = self._sess.run(self._metric_tensor, feed_dict = feed_dict)
                    #
                    self._log_info("")
                    # print()
                    str_info = "epoch: %d" % (epoch + 1)
                    self._log_info(str_info)
                    # print(str_info)
                    #
                    str_info = "loss, metric of train: %f, %f" % (loss, metric)
                    self._log_info(str_info)                    
                    # print(str_info)
                    #                    
                    
                    self._saver.save(self._sess,
                                    os.path.join(self.model_dir, self.model_name),
                                    global_step = total_batch)
                #
            #
            if flag_stop: break # for epoch
            #
        #
        str_info = "training ended after total epoches: %d" % (epoch + 1)
        self._log_info(str_info)
        self._log_info("")
#
model_tag = 'rnn'
#

if model_tag == 'cnn':
    from model_graph_cnn import build_graph
elif model_tag == 'csm':
    from model_graph_csm import build_graph
elif model_tag == 'rnn':
    from model_graph_rnn import build_graph
elif model_tag == 'rnf':
    from model_graph_rnf import build_graph

#
# data
dataset = Dataset()
#

#
flag_load_data = True
# data
if flag_load_data:
    dataset.load_preprocessed_data()
else:
    dataset.pretrained_emb_file = None
    dataset.prepare_preprocessed_data(load_vocab=False)
#
data_train = dataset.data_idx_train, dataset.labels_idx_train
data_valid = dataset.data_idx_valid, dataset.labels_idx_valid
#