예제 #1
0
    def load_samples(self):
        base_dir = common_path.get_task_data_dir('absa', is_original=True)

        filepath = os.path.join(base_dir, 'MAMS-for-ABSA', 'MAMS-ACSA-mil',
                                'AMS-ACSA-mil.json')
        samples = super()._load_samples_by_filepath(filepath)
        return samples
예제 #2
0
    def load_samples(self):
        base_dir = common_path.get_task_data_dir('absa', is_original=True)

        filepath = os.path.join(base_dir, 'SemEval-2014-Task-4-REST',
                                'SemEval-2014-Task-4-REST-mil',
                                'SemEval-2014-Task-4-REST-mil.json')
        samples = super()._load_samples_by_filepath(filepath)
        return samples
예제 #3
0
    def load_samples(self):
        base_dir = common_path.get_task_data_dir('absa', is_original=True)

        filepath = os.path.join(base_dir, 'SemEval-2014-Task-4-REST',
                                'SemEval-2014-Task-4-REST-mil',
                                'SemEval-2014-Task-4-REST-mil.json')
        samples = super()._load_samples_by_filepath(filepath)
        filepath_hard = os.path.join(base_dir, 'SemEval-2014-Task-4-REST',
                                     'SemEval-2014-Task-4-REST-mil',
                                     'SemEval-2014-Task-4-REST-hard-mil.txt')
        hard_sentences = set(file_utils.read_all_lines(filepath_hard))
        result = [
            sample for sample in samples
            if sample.text.lower() in hard_sentences
        ]
        return result
예제 #4
0
import pickle
import sys
from collections import defaultdict
from typing import List
import copy
import collections

from bs4 import BeautifulSoup
from bs4.element import Tag
from sklearn.model_selection import train_test_split

from nlp_tasks.common import common_path
from nlp_tasks.utils import file_utils

logger = logging.getLogger(__name__)
base_data_dir = common_path.get_task_data_dir('absa', is_original=True)


class AspectTerm:
    """
    aspect term
    """
    def __init__(self, term, polarity, from_index, to_index, category=None):
        self.term = term
        self.polarity = polarity
        # inclusive
        self.from_index = int(from_index)
        # exclusive
        self.to_index = int(to_index)
        self.category = category
예제 #5
0
import os
import logging
import sys
import pickle

import numpy as np

from nlp_tasks.common import common_path
from nlp_tasks.utils import file_utils
from nlp_tasks.absa.data_adapter import data_object
from nlp_tasks.utils import word_processor
from nlp_tasks.utils import tokenizers
from nlp_tasks.utils import tokenizer_wrappers

task_dir = common_path.get_task_data_dir('absa')


class ModelTrainTemplate:
    """
    1.
    2.
    3.
    4.
    5.
    """
    def __init__(self, configuration: dict):
        self.configuration = configuration
        # 、、;,
        #
        if 'data_type' not in self.configuration:
예제 #6
0
import os
import logging
import sys
import pickle

import numpy as np

from nlp_tasks.common import common_path
from nlp_tasks.utils import file_utils
from nlp_tasks.text_segmentation.data_adapter import data_object
from nlp_tasks.utils import word_processor
from nlp_tasks.utils import tokenizers
from nlp_tasks.utils import tokenizer_wrappers


task_dir = common_path.get_task_data_dir('text_segmentation')


class ModelTrainTemplate:
    """
    1.
    2.
    3.
    4.
    5.
    """

    def __init__(self, configuration: dict):
        self.configuration = configuration
        self.base_data_dir = task_dir + ('{task_name}/{current_dataset}/{model_name_complete}/{timestamp}/'\
            .format_map(self.configuration))