Example #1
0
        times += 1
    pred_vector = np.argmax(np.array(pred_matrix), axis=1)
    c_m = confusion_matrix(np.array(y_true),
                           pred_vector,
                           labels=range(FLAGS.n_label))
    loss /= times
    accuracy = np.sum([c_m[i][i] for i in range(FLAGS.n_label)]) / np.sum(c_m)
    return loss, accuracy, c_m


if __name__ == "__main__":
    dataset_name = ['train', 'dev', 'test']
    datamanager = DataManager(FLAGS)
    data = {}
    for tmp in dataset_name:
        data[tmp] = datamanager.load_data(FLAGS.data_dir, '%s.json' % tmp)
    vocab, embed, vocab_dict = datamanager.build_vocab(
        '%s/%s' % (FLAGS.data_dir, FLAGS.wordvec_name), data['train'])

    print('model parameters: %s' % str(FLAGS))
    print("Use cuda: %s" % use_cuda)
    print('train data: %s, dev data: %s, test data: %s' %
          (len(data['train']), len(data['dev']), len(data['test'])))

    model = rnnCapsule(FLAGS.word_dim,
                       FLAGS.hidden_dim,
                       FLAGS.n_layer,
                       FLAGS.n_label,
                       batch_size=FLAGS.batch_size,
                       max_length=FLAGS.max_length,
                       learning_rate=FLAGS.learning_rate,
Example #2
0
class AS_Capsules(object):
    def __init__(self):
        self.dataset_name = ('train', 'valid', 'test')
        self.datamanager = DataManager(FLAGS)
        self.data = {}
        for tmp in self.dataset_name:
            self.data[tmp] = self.datamanager.load_data(
                FLAGS.data_dir, '%s.txt' % tmp)
        vocab, embed, vocab_dict = self.datamanager.build_vocab(
            '%s/%s' % (FLAGS.data_dir, FLAGS.path_wordvec),
            self.data['train'] + self.data['valid'] + self.data['test'])

        print('model parameters: %s' % str(FLAGS))
        print("Use cuda: %s" % use_cuda)
        for name in self.dataset_name:
            print('Dataset Statictis: %s data: %s' %
                  (name, len(self.data[name])))

        self.model = bridgeModel(FLAGS.dim_word,
                                 FLAGS.dim_hidden,
                                 FLAGS.n_layer,
                                 FLAGS.n_label,
                                 FLAGS.n_aspect,
                                 batch_size=FLAGS.batch_size,
                                 max_length_sen=FLAGS.max_length_sen,
                                 learning_rate=FLAGS.learning_rate,
                                 lr_word_vector=FLAGS.lr_word_vector,
                                 weight_decay=FLAGS.weight_decay,
                                 vocab=vocab,
                                 embed=embed,
                                 embed_dropout_rate=FLAGS.embed_dropout,
                                 cell_dropout_rate=FLAGS.cell_dropout,
                                 final_dropout_rate=FLAGS.final_dropout,
                                 bidirectional=FLAGS.bidirectional,
                                 optim_type=FLAGS.optim_type,
                                 rnn_type=FLAGS.rnn_type,
                                 lambda1=FLAGS.lambda1,
                                 use_cuda=use_cuda)

    def train(self):
        loss_step, time_step = np.ones((num_loss, )), 0
        start_time = time.time()
        for step in range(FLAGS.iter_num):
            if step % FLAGS.per_checkpoint == 0:
                show = lambda a: '[%s]' % (' '.join(['%.4f' % x for x in a]))
                time_step = time.time() - start_time
                print(
                    "------------------------------------------------------------------"
                )
                print('Time of iter training %.2f s' % time_step)
                print("On iter step %s:, global step %d Loss-step %s" %
                      (step / FLAGS.per_checkpoint, step,
                       show(np.exp(loss_step))))
                # self.model.save_model("%s/%s" % ("./model", FLAGS.name_model), int(step/FLAGS.per_checkpoint))

                for name in self.dataset_name:
                    loss, dict_eva = evaluate(self.model, self.datamanager,
                                              self.data[name])
                    print(
                        'In dataset %s: Loss is %s, Accu-Asp is %s, F1-Asp is %s'
                        % (name, show(
                            np.exp(loss)), show(dict_eva['Asp']['acc']),
                           show(dict_eva['Asp']['f1'])))
                    print('Loss is %s, Accu-Sen is %.4f, F1-Sen is %s' %
                          (show(np.exp(loss)), dict_eva['Sen']['acc'],
                           show(dict_eva['Sen']['f1'])))
                    print('Loss is %s, Accu-All is %.4f, F1-All is %s' %
                          (show(np.exp(loss)), dict_eva['All']['acc'],
                           show(dict_eva['All']['f1'])))
                    print('For Asp, Micro-F1 is %s' % dict_eva['micro_F1_Asp'])
                    print('For Sen, C_M is \n%s' % dict_eva['Sen']['c_m'])
                    print('For All, C_M is \n%s' % dict_eva['All']['c_m'])

                start_time = time.time()
                loss_step = np.zeros((num_loss, ))

            loss_step += train(self.model, self.datamanager,
                               self.data['train']) / FLAGS.per_checkpoint