def test_get_full_module_name(self):
        from forte.processors.misc import LowerCaserProcessor

        full_name = utils.get_full_module_name(LowerCaserProcessor)
        self.assertEqual(
            full_name,
            "forte.processors.misc.lowercaser_processor.LowerCaserProcessor",
        )
Exemple #2
0
 def __init__(self,
              from_cache: bool = False,
              cache_directory: Optional[str] = None,
              append_to_cache: bool = False):
     super().__init__()
     self.from_cache = from_cache
     self._cache_directory = cache_directory
     self.component_name = get_full_module_name(self)
     self.append_to_cache = append_to_cache
Exemple #3
0
 def __init__(self,
              from_cache: bool = False,
              cache_directory: Optional[str] = None,
              append_to_cache: bool = False,
              cache_in_memory: bool = False):
     super().__init__()
     self.from_cache = from_cache
     self._cache_directory = cache_directory
     self.component_name = get_full_module_name(self)
     self.append_to_cache = append_to_cache
     self._cache_in_memory = cache_in_memory
     self._cache_ready: bool = False
     self._data_packs: List[PackType] = []
Exemple #4
0
    def test_get_data(self):
        requests = {
            Sentence: ["speaker"],
            Token: ["pos", "sense"],
            EntityMention: [],
            PredicateMention: [],
            PredicateArgument: {
                "fields": [],
                "unit": "Token"
            },
            PredicateLink: {
                "component": utils.get_full_module_name(OntonotesReader),
                "fields": ["parent", "child", "arg_type"]
            }
        }

        # case 1: get sentence context from the beginning
        instances = list(self.data_pack.get_data(Sentence))
        self.assertEqual(len(instances), 2)
        self.assertEqual(instances[1]["offset"],
                         len(instances[0]["context"]) + 1)

        # case 2: get sentence context from the second instance
        instances = list(self.data_pack.get_data(Sentence, skip_k=1))
        self.assertEqual(len(instances), 1)
        self.assertEqual(instances[0]["offset"], 165)

        # case 3: get document context
        instances = list(self.data_pack.get_data(Document, skip_k=0))
        self.assertEqual(len(instances), 1)
        self.assertEqual(instances[0]["offset"], 0)

        # case 3.1: test get single
        document: Document = self.data_pack.get_single(Document)
        self.assertEqual(document.text, instances[0]['context'])

        # case 4: test offset out of index
        instances = list(self.data_pack.get_data(Sentence, skip_k=10))
        self.assertEqual(len(instances), 0)

        # case 5: get entries
        instances = list(
            self.data_pack.get_data(Sentence, request=requests, skip_k=1))
        self.assertEqual(len(instances[0].keys()), 9)
        self.assertEqual(len(instances[0]["PredicateLink"]), 4)
        self.assertEqual(len(instances[0]["Token"]), 5)
        self.assertEqual(len(instances[0]["EntityMention"]), 3)
    def __init__(
        self,
        from_cache: bool = False,
        cache_directory: Optional[str] = None,
        append_to_cache: bool = False,
        cache_in_memory: bool = False,
    ):
        super().__init__()
        self.from_cache = from_cache
        self._cache_directory = cache_directory
        self.component_name = get_full_module_name(self)
        self.append_to_cache = append_to_cache
        self._cache_in_memory = cache_in_memory
        self._cache_ready: bool = False
        self._data_packs: List[PackType] = []

        # needed for time profiling of reader
        self._enable_profiling: bool = False
        self._start_time: float = 0.0
        self.time_profile: float = 0.0
Exemple #6
0
 def __init__(self):
     self.component_name = get_full_module_name(self)
     self.selector = DummySelector()