Ejemplo n.º 1
0
    def __init__(self, opt):
        self.opt = opt

        if opt.v2:
            absa_data_reader = ABSADataReaderV2(data_dir=opt.data_dir)
        else:
            absa_data_reader = ABSADataReader(data_dir=opt.data_dir)
        tokenizer = build_tokenizer(data_dir=opt.data_dir)
        embedding_matrix = build_embedding_matrix(opt.data_dir,
                                                  tokenizer.word2idx,
                                                  opt.embed_dim, opt.dataset)
        self.idx2tag, self.idx2polarity = absa_data_reader.reverse_tag_map, absa_data_reader.reverse_polarity_map
        self.train_data_loader = BucketIterator(
            data=absa_data_reader.get_train(tokenizer),
            batch_size=opt.batch_size,
            shuffle=True)
        self.dev_data_loader = BucketIterator(
            data=absa_data_reader.get_dev(tokenizer),
            batch_size=opt.batch_size,
            shuffle=False)
        self.test_data_loader = BucketIterator(
            data=absa_data_reader.get_test(tokenizer),
            batch_size=opt.batch_size,
            shuffle=False)
        self.model = opt.model_class(embedding_matrix, opt, self.idx2tag,
                                     self.idx2polarity).to(opt.device)
        self._print_args()

        if torch.cuda.is_available():
            print('>>> cuda memory allocated:',
                  torch.cuda.memory_allocated(device=opt.device.index))
Ejemplo n.º 2
0
 def __init__(self, opt):
     self.opt = opt
    
     absa_data_reader = ABSADataReader(data_dir=opt.data_dir)
     self.tokenizer = build_tokenizer(data_dir=opt.data_dir)
     embedding_matrix = build_embedding_matrix(opt.data_dir, self.tokenizer.word2idx, opt.embed_dim, opt.dataset)
     self.idx2tag, self.idx2polarity = absa_data_reader.reverse_tag_map, absa_data_reader.reverse_polarity_map
     self.model = opt.model_class(embedding_matrix, opt, self.idx2tag, self.idx2polarity).to(opt.device)
     print('loading model {0} ...'.format(opt.model_name))
     self.model.load_state_dict(torch.load(opt.state_dict_path, map_location=lambda storage, loc: storage))
     # switch model to evaluation mode
     self.model.eval()
     torch.autograd.set_grad_enabled(False)
Ejemplo n.º 3
0
    def __init__(self, opt):
        self.opt = opt
       
        absa_data_reader = ABSADataReader(data_dir=opt.data_dir)
        self.tokenizer = build_tokenizer(data_dir=opt.data_dir)
        embedding_matrix = build_embedding_matrix(opt.data_dir, self.tokenizer.word2idx, opt.embed_dim, opt.dataset)
        self.idx2tag, self.idx2polarity = absa_data_reader.reverse_tag_map, absa_data_reader.reverse_polarity_map
        self.model = opt.model_class(embedding_matrix, opt, self.idx2tag, self.idx2polarity).to(opt.device)
        print('loading model {0} ...'.format(opt.model_name))
        # self.model.load_state_dict(torch.load(opt.state_dict_path, map_location=lambda storage, loc: storage))
        # switch model to evaluation mode
        self.model.eval()

        # get a handle on s3
        session = boto3.Session(
            aws_access_key_id='XXXXXXXXXXXX',
            aws_secret_access_key='XXXXXXXX',
            region_name='XXXXXXXX')

        self.s3 = session.resource('s3')
        self.bucket = self.s3.Bucket('surveybuddy-responses')  # example: energy_market_procesing

        torch.autograd.set_grad_enabled(False)
Ejemplo n.º 4
0
    def __init__(self, opt):
        self.opt = opt

        if opt.model in [
                'bote', 'bote_v0_ablation', 'bote_v1_ablation',
                'bote_v2_ablation', 'bote_v3_ablation', 'bote_v4'
        ]:
            absa_data_reader = ABSADataReaderBERT(data_dir=opt.data_dir)
            tokenizer = BertTokenizer(opt.bert_model, opt.case, opt.spacy_lang,
                                      opt.lang)
            embedding_matrix = []
            self.train_data_loader = BucketIteratorBert(
                data=absa_data_reader.get_train(tokenizer),
                batch_size=opt.batch_size,
                shuffle=True)
            self.dev_data_loader = BucketIteratorBert(
                data=absa_data_reader.get_dev(tokenizer),
                batch_size=opt.batch_size,
                shuffle=False)
            self.test_data_loader = BucketIteratorBert(
                data=absa_data_reader.get_test(tokenizer),
                batch_size=opt.batch_size,
                shuffle=False)
        else:
            absa_data_reader = ABSADataReader(data_dir=opt.data_dir)
            tokenizer = build_tokenizer(data_dir=opt.data_dir)
            embedding_matrix = build_embedding_matrix(opt.data_dir,
                                                      tokenizer.word2idx,
                                                      opt.embed_dim,
                                                      opt.dataset,
                                                      opt.glove_fname)
            self.train_data_loader = BucketIterator(
                data=absa_data_reader.get_train(tokenizer),
                batch_size=opt.batch_size,
                shuffle=True)
            self.dev_data_loader = BucketIterator(
                data=absa_data_reader.get_dev(tokenizer),
                batch_size=opt.batch_size,
                shuffle=False)
            self.test_data_loader = BucketIterator(
                data=absa_data_reader.get_test(tokenizer),
                batch_size=opt.batch_size,
                shuffle=False)

        self.idx2tag, self.idx2polarity = absa_data_reader.reverse_tag_map, absa_data_reader.reverse_polarity_map
        self.model = opt.model_class(embedding_matrix, opt, self.idx2tag,
                                     self.idx2polarity).to(opt.device)
        self.history_metrics = {
            'epoch': [],
            'step': [],
            'train_ap_precision': [],
            'train_ap_recall': [],
            'train_ap_f1': [],
            'train_op_precision': [],
            'train_op_recall': [],
            'train_op_f1': [],
            'train_triplet_precision': [],
            'train_triplet_recall': [],
            'train_triplet_f1': [],
            'dev_ap_precision': [],
            'dev_ap_recall': [],
            'dev_ap_f1': [],
            'dev_op_precision': [],
            'dev_op_recall': [],
            'dev_op_f1': [],
            'dev_triplet_precision': [],
            'dev_triplet_recall': [],
            'dev_triplet_f1': []
        }

        self.results = {
            'aspect_extraction': {
                'precision': [],
                'recall': [],
                'f1': []
            },
            'opinion_extraction': {
                'precision': [],
                'recall': [],
                'f1': []
            },
            'triplet_extraction': {
                'precision': [],
                'recall': [],
                'f1': []
            }
        }

        self._print_args()

        if torch.cuda.is_available():
            print('>>> cuda memory allocated:',
                  torch.cuda.memory_allocated(device=opt.device.index))