Ejemplo n.º 1
0
    def __init__(self, opt):
        self.opt = opt

        absa_dataset = ABSADatesetReader(dataset=opt.dataset,
                                         embed_dim=opt.embed_dim,
                                         use_bert=True,
                                         max_len=70)

        self.train_data_loader = BucketIterator(data=absa_dataset.train_data,
                                                batch_size=opt.batch_size,
                                                shuffle=True,
                                                max_len=90)
        self.test_data_loader = BucketIterator(data=absa_dataset.test_data,
                                               batch_size=opt.batch_size,
                                               shuffle=False,
                                               max_len=90)

        self.model = opt.model_class(absa_dataset.embedding_matrix,
                                     opt).to(opt.device)
        self._print_args()
        self.global_f1 = 0.

        if torch.cuda.is_available():
            print('cuda memory allocated:',
                  torch.cuda.memory_allocated(device=opt.device.index))
Ejemplo n.º 2
0
    def __init__(self, opt):
        self.opt = opt

        absa_dataset = ABSADatesetReader(dataset=opt.dataset,
                                         embed_dim=opt.embed_dim)

        self.train_data_loader = BucketIterator(data=absa_dataset.train_data,
                                                batch_size=opt.batch_size,
                                                shuffle=True)
        self.test_data_loader = BucketIterator(data=absa_dataset.test_data,
                                               batch_size=opt.batch_size,
                                               shuffle=False)

        self.model = opt.model_class(absa_dataset.embedding_matrix,
                                     opt).to(opt.device)
        # # self.model = opt.model_class(absa_dataset.embedding_matrix, opt)
        #
        # # multi-gpu training (should be after apex fp16 initialization)
        # if opt.n_gpu > 1:
        #     self.model = torch.nn.DataParallel(self.model)
        #
        # # Distributed training (should be after apex fp16 initialization)
        # if opt.local_rank != -1:
        #     self.model = torch.nn.parallel.DistributedDataParallel(self.model, device_ids=[opt.local_rank],
        #                                                       output_device=opt.local_rank,
        #                                                       find_unused_parameters=True)
        # self.model.to(opt.device)

        self._print_args()
        self.global_f1 = 0.

        if torch.cuda.is_available():
            print('cuda memory allocated:',
                  torch.cuda.memory_allocated(device=opt.device.index))
Ejemplo n.º 3
0
    def __init__(self, opt, post_vocab):
        self.opt = opt

        absa_dataset = ABSADatesetReader(dataset=opt.dataset,
                                         embed_dim=opt.embed_dim,
                                         post_vocab=args['post_vocab'])
        self.train_data_loader = BucketIterator(data=absa_dataset.train_data,
                                                batch_size=opt.batch_size,
                                                shuffle=True)
        self.test_data_loader = BucketIterator(data=absa_dataset.test_data,
                                               batch_size=opt.batch_size,
                                               shuffle=False)
        common_adj = absa_dataset.common_adj
        fre_embedding = absa_dataset.fre_embedding
        if torch.cuda.is_available():
            common_adj = common_adj.cuda()
            fre_embedding = fre_embedding.cuda()

        self.model = opt.model_class(absa_dataset.embedding_matrix, common_adj,
                                     fre_embedding, post_vocab,
                                     opt).to(opt.device)
        self._print_args()
        self.global_f1 = 0.

        if torch.cuda.is_available():
            print('cuda memory allocated:',
                  torch.cuda.memory_allocated(device=opt.device.index))
Ejemplo n.º 4
0
 def __init__(self, opt):
     self.opt = opt
     absa_dataset = ABSADatesetReader(dataset=opt.dataset, embed_dim=opt.embed_dim, max_seq_len=opt.max_seq_len)
     self.train_data_loader = DataLoader(dataset=absa_dataset.train_data, batch_size=opt.batch_size, shuffle=True)
     self.test_data_loader = DataLoader(dataset=absa_dataset.test_data, batch_size=len(absa_dataset.test_data), shuffle=False)
     self.final_data_loader = DataLoader(dataset=absa_dataset.final_data, batch_size=len(absa_dataset.final_data), shuffle=False)
     self.model = opt.model_class(absa_dataset.embedding_matrix, opt).to(opt.device)
     self._init_and_print_parameters()
Ejemplo n.º 5
0
    def __init__(self, opt):
        self.opt = opt

        absa_dataset = ABSADatesetReader(dataset=opt.dataset, embed_dim=opt.embed_dim)
        self.train_data_loader = BucketIterator(data=absa_dataset.train_data, batch_size=opt.batch_size, shuffle=True)
        self.test_data_loader = BucketIterator(data=absa_dataset.test_data, batch_size=opt.batch_size, shuffle=False)

        self.model = opt.model_class(absa_dataset.embedding_matrix, opt).to(opt.device)
        self._print_args()
        self.global_f1 = 0.
Ejemplo n.º 6
0
    def __init__(self, opt):
        self.opt = opt

        absa_dataset = ABSADatesetReader(dataset=opt.dataset, embed_dim=opt.embed_dim)
        valset_len = int(len(absa_dataset.train_data) * 0.1)
        self.train_data_loader = BucketIterator(data=absa_dataset.train_data[:-valset_len], batch_size=opt.batch_size, shuffle=True)
        self.val_data_loader = BucketIterator(data=absa_dataset.train_data[-valset_len:], batch_size=opt.batch_size, shuffle=False)
        bert = BertModel.from_pretrained('bert-base-chinese')   #中文预训练模型,如果要套ALBERT,得把ALBERT封装好,在这里导入,并把模型参数都改掉
        self.model = nn.DataParallel(opt.model_class(bert, opt).to(opt.device))

        self._print_args()
Ejemplo n.º 7
0
    def __init__(self, opt):
        self.opt = opt
        print('> training arguments:')
        for arg in vars(opt):
            print('>>> {0}: {1}'.format(arg, getattr(opt, arg)))

        if opt.dataset in ['restaurant', 'laptop']:
            self.my_dataset = ABSADatesetReader(dataset=opt.dataset,
                                                embed_dim=opt.embed_dim,
                                                max_seq_len=opt.max_seq_len)
            self.train_data_loader = DataLoader(
                dataset=self.my_dataset.train_data,
                batch_size=opt.batch_size,
                shuffle=True)
            self.dev_data_loader = DataLoader(
                dataset=self.my_dataset.test_data,
                batch_size=len(self.my_dataset.test_data),
                shuffle=False)
            self.test_data_loader = DataLoader(
                dataset=self.my_dataset.test_data,
                batch_size=len(self.my_dataset.test_data),
                shuffle=False)

        elif opt.dataset in ['zol_cellphone']:
            self.my_dataset = ZOLDatesetReader(
                dataset=opt.dataset,
                embed_dim=opt.embed_dim,
                max_seq_len=opt.max_seq_len,
                cnn_model_name=opt.cnn_model_name)
            self.train_data_loader = DataLoader(
                dataset=self.my_dataset.train_data,
                batch_size=opt.batch_size,
                shuffle=True)
            self.dev_data_loader = DataLoader(dataset=self.my_dataset.dev_data,
                                              batch_size=len(
                                                  self.my_dataset.dev_data),
                                              shuffle=False)
            self.test_data_loader = DataLoader(
                dataset=self.my_dataset.test_data,
                batch_size=len(self.my_dataset.test_data),
                shuffle=False)

        self.idx2word = self.my_dataset.idx2word
        self.writer = SummaryWriter(log_dir=opt.logdir)
        self.model = opt.model_class(self.my_dataset.embedding_matrix,
                                     opt).to(opt.device)
        self.reset_parameters()
Ejemplo n.º 8
0
    def __init__(self, opt):
        self.opt = opt

        absa_dataset = ABSADatesetReader(dataset=opt.dataset,
                                         embed_dim=opt.embed_dim,
                                         max_seq_len=opt.max_seq_len)
        self.train_data_loader = DataLoader(dataset=absa_dataset.train_data,
                                            batch_size=opt.batch_size,
                                            shuffle=True)
        self.test_data_loader = DataLoader(dataset=absa_dataset.test_data,
                                           batch_size=opt.batch_size,
                                           shuffle=False)

        self.model = opt.model_class(absa_dataset.embedding_matrix,
                                     opt).to(opt.device)
        print("cuda memory allocated:", torch.cuda.memory_allocated(device=0))
        self._print_args()
Ejemplo n.º 9
0
    def __init__(self, args):
        self.args = args
        torch.manual_seed(self.args.seed)
        if self.args.device == "cuda":
            torch.cuda.set_device(self.args.gpu)
            torch.cuda.manual_seed(self.args.seed)
        np.random.seed(self.args.seed)
        random.seed(self.args.seed)
        print('> training arguments:')
        for arg in vars(args):
            print('>>> {0}: {1}'.format(arg, getattr(args, arg)))

        absa_dataset = ABSADatesetReader(dataset=args.dataset,
                                         embed_dim=args.embed_dim,
                                         max_seq_len=args.max_seq_len)
        if self.args.dev > 0.0:
            random.shuffle(absa_dataset.train_data.data)
            dev_num = int(len(absa_dataset.train_data.data) * self.args.dev)
            absa_dataset.dev_data.data = absa_dataset.train_data.data[:dev_num]
            absa_dataset.train_data.data = absa_dataset.train_data.data[
                dev_num:]

        # print(len(absa_dataset.train_data.data), len(absa_dataset.dev_data.data))

        self.train_data_loader = DataLoader(dataset=absa_dataset.train_data,
                                            batch_size=args.batch_size,
                                            shuffle=True)
        if self.args.dev > 0.0:
            self.dev_data_loader = DataLoader(dataset=absa_dataset.dev_data,
                                              batch_size=len(
                                                  absa_dataset.dev_data),
                                              shuffle=False)
        self.test_data_loader = DataLoader(dataset=absa_dataset.test_data,
                                           batch_size=len(
                                               absa_dataset.test_data),
                                           shuffle=False)
        self.mdl = args.model_class(
            self.args,
            embedding_matrix=absa_dataset.embedding_matrix,
            aspect_embedding_matrix=absa_dataset.aspect_embedding_matrix)
        self.reset_parameters()
        self.mdl.encoder.weight.requires_grad = True
        self.mdl.encoder_aspect.weight.requires_grad = True
        self.mdl.to(device)
        self.criterion = nn.CrossEntropyLoss()
Ejemplo n.º 10
0
    def __init__(self, opt):
        self.opt = opt
        print('> training arguments:')
        for arg in vars(opt):
            print('>>> {0}: {1}'.format(arg, getattr(opt, arg)))

        if not os.path.exists(opt.checkpoint):
            os.mkdir(opt.checkpoint)

        # torch.manual_seed(opt.rand_seed)
        # torch.cuda.manual_seed_all(opt.rand_seed)

        transform = transforms.Compose([
            transforms.RandomCrop(
                opt.crop_size
            ),  #args.crop_size, by default it is set to be 224
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

        absa_dataset = ABSADatesetReader(transform, dataset=opt.dataset, embed_dim=opt.embed_dim, max_seq_len=opt.max_seq_len, \
                        path_image=opt.path_image)
        self.train_data_loader = DataLoader(dataset=absa_dataset.train_data,
                                            batch_size=opt.batch_size,
                                            shuffle=True)
        self.dev_data_loader = DataLoader(dataset=absa_dataset.dev_data,
                                          batch_size=opt.batch_size,
                                          shuffle=False)
        self.test_data_loader = DataLoader(dataset=absa_dataset.test_data,
                                           batch_size=opt.batch_size,
                                           shuffle=False)
        #self.writer = SummaryWriter(log_dir=opt.logdir)

        print('building model')
        net = getattr(resnet, 'resnet152')()
        net.load_state_dict(
            torch.load(os.path.join(opt.resnet_root, 'resnet152.pth')))
        self.encoder = myResnet(net, opt.fine_tune_cnn,
                                self.opt.device).to(device)
        self.model = opt.model_class(absa_dataset.embedding_matrix,
                                     opt).to(device)
        self.reset_parameters()
Ejemplo n.º 11
0
    def __init__(self,
                 module_class,
                 model_name,
                 dataset='twitter',
                 embed_dim=100,
                 max_seq_len=40,
                 batch_size=128):
        absa_dataset = ABSADatesetReader(dataset=dataset,
                                         embed_dim=embed_dim,
                                         max_seq_len=max_seq_len)
        self.train_data_loader = DataLoader(dataset=absa_dataset.train_data,
                                            batch_size=batch_size,
                                            shuffle=True)
        self.test_data_loader = DataLoader(dataset=absa_dataset.test_data,
                                           batch_size=len(
                                               absa_dataset.test_data),
                                           shuffle=False)
        self.writer = SummaryWriter(log_dir='{0}_logs'.format(model_name))

        self.model = module_class(absa_dataset.embedding_matrix).to(device)
Ejemplo n.º 12
0
    def __init__(self, opt):
        self.opt = opt
        print('> training arguments:')
        for arg in vars(opt):
            print('>>> {0}: {1}'.format(arg, getattr(opt, arg)))

        absa_dataset = ABSADatesetReader(dataset=opt.dataset,
                                         embed_dim=opt.embed_dim,
                                         max_seq_len=opt.max_seq_len)
        self.train_data_loader = DataLoader(dataset=absa_dataset.train_data,
                                            batch_size=opt.batch_size,
                                            shuffle=True)
        self.test_data_loader = DataLoader(dataset=absa_dataset.test_data,
                                           batch_size=len(
                                               absa_dataset.test_data),
                                           shuffle=False)
        self.writer = SummaryWriter(log_dir=opt.logdir)

        self.model = opt.model_class(absa_dataset.embedding_matrix,
                                     opt).to(device)
        self.reset_parameters()
Ejemplo n.º 13
0
    def __init__(self, opt):
        self.opt = opt
        print('> training arguments:')
        for arg in vars(opt):
            print('>>> {0}: {1}'.format(arg, getattr(opt, arg)))

        absa_dataset = ABSADatesetReader(dataset=opt.dataset,
                                         embed_dim=opt.embed_dim,
                                         max_seq_len=opt.max_seq_len,
                                         fold_num=opt.fold_num)
        self.train_data_loader = DataLoader(dataset=absa_dataset.train_data,
                                            batch_size=opt.batch_size,
                                            shuffle=True)
        self.test_data_loader = DataLoader(dataset=absa_dataset.test_data,
                                           batch_size=opt.batch_size,
                                           shuffle=False)
        if opt.checkpoint_path != "":
            self.model = torch.load(opt.checkpoint_path)
        else:
            self.model = opt.model_class(absa_dataset.embedding_matrix,
                                         opt).to(device)
        self.reset_parameters()