コード例 #1
0
ファイル: TreeDriver.py プロジェクト: chriswtanner/CRETE
    def __init__(self, scope, num_dirs, opt="adagrad", lr=0.025):
        print("num_dirs:", num_dirs)
        sub_dir = "ecb_" + scope + "/"

        # init stuff
        print("TORCH VERSION:", torch.__version__)
        global args
        self.args = config.parse_known_args()

        self.args.cuda = self.args.cuda and torch.cuda.is_available()
        device = torch.device("cuda:0" if self.args.cuda else "cpu")
        torch.manual_seed(self.args.seed)
        random.seed(self.args.seed)

        print("TREELSTM:", opt, "lr:", lr)

        # paths
        train_dir = os.path.join(self.args.data, str(num_dirs), 'train/',
                                 sub_dir)
        dev_dir = os.path.join(self.args.data, str(num_dirs), 'dev/', sub_dir)
        test_dir = os.path.join(self.args.data, str(num_dirs), 'test/',
                                sub_dir)

        print("train_dir:", train_dir)
        print("dev_dir:", dev_dir)

        # builds vocabulary
        sick_vocab_file = Helper.build_entire_vocab(
            os.path.join(self.args.data, str(num_dirs), 'sick.vocab'),
            train_dir, dev_dir, test_dir)
        vocab = Vocab(filename=sick_vocab_file,
                      data=[
                          Constants.PAD_WORD, Constants.UNK_WORD,
                          Constants.BOS_WORD, Constants.EOS_WORD
                      ])
        print('==> SICK vocabulary size : %d ' % vocab.size())

        # loads SICKDataset: Trees, sentences, and labels
        self.train_dataset = Helper.load_data(
            train_dir,
            os.path.join(self.args.data, str(num_dirs), 'sick_train.pth'),
            vocab, self.args.num_classes)
        self.dev_dataset = Helper.load_data(
            dev_dir, os.path.join(self.args.data, str(num_dirs),
                                  'sick_dev.pth'), vocab,
            self.args.num_classes)
        self.test_dataset = Helper.load_data(
            test_dir,
            os.path.join(self.args.data, str(num_dirs), 'sick_test.pth'),
            vocab, self.args.num_classes)

        # creates the TreeLSTM
        model = SimilarityTreeLSTM(vocab.size(), self.args.input_dim, self.args.mem_dim, self.args.hidden_dim, \
          self.args.num_classes, self.args.sparse, self.args.freeze_embed, vocab)
        criterion = nn.KLDivLoss()  #nn.CrossEntropyLoss()

        # loads glove embeddings
        emb = Helper.load_embeddings(
            self.args,
            os.path.join(self.args.data, str(num_dirs), 'sick_embed.pth'),
            vocab, device)

        # sets up the model
        model.emb.weight.data.copy_(
            emb)  # plug these into embedding matrix inside model
        model.to(device)
        criterion.to(device)
        opt = optim.Adagrad(filter(lambda p: p.requires_grad, \
           model.parameters()), lr=lr, weight_decay=self.args.wd)

        if opt == "adam":
            opt = optim.Adam(filter(lambda p: p.requires_grad, \
              model.parameters()), lr=lr)

        self.metrics = Metrics(self.args.num_classes)

        # create trainer object for training and testing
        self.trainer = Trainer(self.args, model, criterion, opt, device, vocab)