def _tagging(self, sentence, region_start, region_end, premodifier: 'e.g. a number, like 至少 of "至少22k"', posttoken: 'e.g. a , like 22k of "至少22k"'): entity = sentence[region_start : region_end] entity = "".join(entity) # handled class: the derived class itself derived_class = self.__class__ concept_values = { self.get_premodifier_label(): premodifier, self.get_posttoken_label(): posttoken } # creates a concept to wrap the above info concept = Concept( region_start, region_end, entity, derived_class, concept_values) concept.sign(_Premodifier) self._on_create_concept(sentence, concept) sentence.add_concept(concept) self._on_add_concept(sentence, concept)
def _tagging_number_and_unit(self, sentence, region_start, region_end, number_concept: 'a number, like 22 of 22k', synonym: 'a unit, like k of 22k'): entity = sentence[region_start:region_end] entity = "".join(entity) # handled class derived_class = RealNumber #self.__class__ # 22k = 22 x 1000 = 22000 number = number_concept.concept_values['value'] value = number * self.get_unit_size() concept_values = { 'number': number, 'metric_prefix': synonym, 'value': value } # creates a concept to wrap the above info concept = Concept(region_start, region_end, entity, derived_class, concept_values) concept.sign(_MetricPrefixUnit) self._on_create_concept(sentence, concept) sentence.add_concept(concept) self._on_add_concept(sentence, concept)
def _tagging_unit(self, sentence, region_start, region_end, synonym: 'just the unit itself'): entity = sentence[region_start:region_end] entity = "".join(entity) # handled class derived_class = self.__class__ concept_values = {'unit': synonym, 'value': self.get_unit_size()} # creates a concept to wrap the above info concept = Concept(region_start, region_end, entity, derived_class, concept_values) concept.sign(_MetricPrefixUnit) #self._on_create_concept(sentence, concept) sentence.add_concept(concept)
def _on_add_concept(self, sentence, concept): at = concept.end at = self._skip_whitespaces(sentence, at) if at >= sentence.length(): return # end-of-if # ------------------------------------------ # Handles the decimal part # # integer_part + unit + 'decimal_part' # ------------------------------------------ # finds the dominator of number concepts decimal_concept = sentence.get_prefix_dominated_concept( at, IntegerNumber, RealNumber, rawMetricPrefixUnit) if decimal_concept == None: # handles the case '一萬萬' #self.handle_postponed_evaluation(sentence, concept, at) return # end-of-if region_start = concept.start region_end = decimal_concept.end entity = sentence[region_start:region_end] entity = "".join(entity) derived_class = RealNumber # [concept_values] decimal part value = decimal_concept.concept_values['value'] if issubclass(decimal_concept.type, rawMetricPrefixUnit): # 2萬2百 -> [2萬]+[2百], [2百] is rawMetricPrefixUnit # 2 doesn't mean 2000 pass # nothing to change else: if value < 10: # 2萬5, 5 means 5000 (=5*10000/10) # 2千5, 5 means 500 (=5*1000/10) value = value * self.get_unit_size() / 10 else: # 2萬5000, 5000 is still 5000 # 2千500, 500 is still 500 pass # decimal part: nothing to change # end-of-if # [concept_values] plus integer part value += concept.concept_values['value'] concept_values = { 'tagger': self.__class__, 'value': value, } # creates a concept to wrap the above info concept = Concept(region_start, region_end, entity, derived_class, concept_values) concept.sign(_MetricPrefixUnit) sentence.add_concept(concept)