def load_samples(self): base_dir = common_path.get_task_data_dir('absa', is_original=True) filepath = os.path.join(base_dir, 'MAMS-for-ABSA', 'MAMS-ACSA-mil', 'AMS-ACSA-mil.json') samples = super()._load_samples_by_filepath(filepath) return samples
def load_samples(self): base_dir = common_path.get_task_data_dir('absa', is_original=True) filepath = os.path.join(base_dir, 'SemEval-2014-Task-4-REST', 'SemEval-2014-Task-4-REST-mil', 'SemEval-2014-Task-4-REST-mil.json') samples = super()._load_samples_by_filepath(filepath) return samples
def load_samples(self): base_dir = common_path.get_task_data_dir('absa', is_original=True) filepath = os.path.join(base_dir, 'SemEval-2014-Task-4-REST', 'SemEval-2014-Task-4-REST-mil', 'SemEval-2014-Task-4-REST-mil.json') samples = super()._load_samples_by_filepath(filepath) filepath_hard = os.path.join(base_dir, 'SemEval-2014-Task-4-REST', 'SemEval-2014-Task-4-REST-mil', 'SemEval-2014-Task-4-REST-hard-mil.txt') hard_sentences = set(file_utils.read_all_lines(filepath_hard)) result = [ sample for sample in samples if sample.text.lower() in hard_sentences ] return result
import pickle import sys from collections import defaultdict from typing import List import copy import collections from bs4 import BeautifulSoup from bs4.element import Tag from sklearn.model_selection import train_test_split from nlp_tasks.common import common_path from nlp_tasks.utils import file_utils logger = logging.getLogger(__name__) base_data_dir = common_path.get_task_data_dir('absa', is_original=True) class AspectTerm: """ aspect term """ def __init__(self, term, polarity, from_index, to_index, category=None): self.term = term self.polarity = polarity # inclusive self.from_index = int(from_index) # exclusive self.to_index = int(to_index) self.category = category
import os import logging import sys import pickle import numpy as np from nlp_tasks.common import common_path from nlp_tasks.utils import file_utils from nlp_tasks.absa.data_adapter import data_object from nlp_tasks.utils import word_processor from nlp_tasks.utils import tokenizers from nlp_tasks.utils import tokenizer_wrappers task_dir = common_path.get_task_data_dir('absa') class ModelTrainTemplate: """ 1. 2. 3. 4. 5. """ def __init__(self, configuration: dict): self.configuration = configuration # 、、;, # if 'data_type' not in self.configuration:
import os import logging import sys import pickle import numpy as np from nlp_tasks.common import common_path from nlp_tasks.utils import file_utils from nlp_tasks.text_segmentation.data_adapter import data_object from nlp_tasks.utils import word_processor from nlp_tasks.utils import tokenizers from nlp_tasks.utils import tokenizer_wrappers task_dir = common_path.get_task_data_dir('text_segmentation') class ModelTrainTemplate: """ 1. 2. 3. 4. 5. """ def __init__(self, configuration: dict): self.configuration = configuration self.base_data_dir = task_dir + ('{task_name}/{current_dataset}/{model_name_complete}/{timestamp}/'\ .format_map(self.configuration))