def test_get_full_module_name(self): from forte.processors.misc import LowerCaserProcessor full_name = utils.get_full_module_name(LowerCaserProcessor) self.assertEqual( full_name, "forte.processors.misc.lowercaser_processor.LowerCaserProcessor", )
def __init__(self, from_cache: bool = False, cache_directory: Optional[str] = None, append_to_cache: bool = False): super().__init__() self.from_cache = from_cache self._cache_directory = cache_directory self.component_name = get_full_module_name(self) self.append_to_cache = append_to_cache
def __init__(self, from_cache: bool = False, cache_directory: Optional[str] = None, append_to_cache: bool = False, cache_in_memory: bool = False): super().__init__() self.from_cache = from_cache self._cache_directory = cache_directory self.component_name = get_full_module_name(self) self.append_to_cache = append_to_cache self._cache_in_memory = cache_in_memory self._cache_ready: bool = False self._data_packs: List[PackType] = []
def test_get_data(self): requests = { Sentence: ["speaker"], Token: ["pos", "sense"], EntityMention: [], PredicateMention: [], PredicateArgument: { "fields": [], "unit": "Token" }, PredicateLink: { "component": utils.get_full_module_name(OntonotesReader), "fields": ["parent", "child", "arg_type"] } } # case 1: get sentence context from the beginning instances = list(self.data_pack.get_data(Sentence)) self.assertEqual(len(instances), 2) self.assertEqual(instances[1]["offset"], len(instances[0]["context"]) + 1) # case 2: get sentence context from the second instance instances = list(self.data_pack.get_data(Sentence, skip_k=1)) self.assertEqual(len(instances), 1) self.assertEqual(instances[0]["offset"], 165) # case 3: get document context instances = list(self.data_pack.get_data(Document, skip_k=0)) self.assertEqual(len(instances), 1) self.assertEqual(instances[0]["offset"], 0) # case 3.1: test get single document: Document = self.data_pack.get_single(Document) self.assertEqual(document.text, instances[0]['context']) # case 4: test offset out of index instances = list(self.data_pack.get_data(Sentence, skip_k=10)) self.assertEqual(len(instances), 0) # case 5: get entries instances = list( self.data_pack.get_data(Sentence, request=requests, skip_k=1)) self.assertEqual(len(instances[0].keys()), 9) self.assertEqual(len(instances[0]["PredicateLink"]), 4) self.assertEqual(len(instances[0]["Token"]), 5) self.assertEqual(len(instances[0]["EntityMention"]), 3)
def __init__( self, from_cache: bool = False, cache_directory: Optional[str] = None, append_to_cache: bool = False, cache_in_memory: bool = False, ): super().__init__() self.from_cache = from_cache self._cache_directory = cache_directory self.component_name = get_full_module_name(self) self.append_to_cache = append_to_cache self._cache_in_memory = cache_in_memory self._cache_ready: bool = False self._data_packs: List[PackType] = [] # needed for time profiling of reader self._enable_profiling: bool = False self._start_time: float = 0.0 self.time_profile: float = 0.0
def __init__(self): self.component_name = get_full_module_name(self) self.selector = DummySelector()