Esempio n. 1
0
    def _tagging(self, sentence, 
                 region_start, region_end, 
                 premodifier: 'e.g. a number, like 至少 of "至少22k"', 
                 posttoken: 'e.g. a , like 22k of "至少22k"'):
        
        entity = sentence[region_start : region_end]
        entity = "".join(entity)

        # handled class: the derived class itself
        derived_class = self.__class__
        
        concept_values = {
            self.get_premodifier_label(): premodifier,
            self.get_posttoken_label(): posttoken
        }
        
        # creates a concept to wrap the above info
        concept = Concept(
            region_start, region_end, 
            entity, derived_class, concept_values)
        concept.sign(_Premodifier)
        self._on_create_concept(sentence, concept)

        sentence.add_concept(concept)
        self._on_add_concept(sentence, concept)
    def _tagging_number_and_unit(self, sentence, region_start, region_end,
                                 number_concept: 'a number, like 22 of 22k',
                                 synonym: 'a unit, like k of 22k'):

        entity = sentence[region_start:region_end]
        entity = "".join(entity)

        # handled class
        derived_class = RealNumber  #self.__class__

        # 22k = 22 x 1000 = 22000
        number = number_concept.concept_values['value']
        value = number * self.get_unit_size()

        concept_values = {
            'number': number,
            'metric_prefix': synonym,
            'value': value
        }

        # creates a concept to wrap the above info
        concept = Concept(region_start, region_end, entity, derived_class,
                          concept_values)
        concept.sign(_MetricPrefixUnit)
        self._on_create_concept(sentence, concept)

        sentence.add_concept(concept)
        self._on_add_concept(sentence, concept)
    def _tagging_unit(self, sentence, region_start, region_end,
                      synonym: 'just the unit itself'):

        entity = sentence[region_start:region_end]
        entity = "".join(entity)

        # handled class
        derived_class = self.__class__

        concept_values = {'unit': synonym, 'value': self.get_unit_size()}

        # creates a concept to wrap the above info
        concept = Concept(region_start, region_end, entity, derived_class,
                          concept_values)
        concept.sign(_MetricPrefixUnit)
        #self._on_create_concept(sentence, concept)

        sentence.add_concept(concept)
    def _on_add_concept(self, sentence, concept):
        at = concept.end

        at = self._skip_whitespaces(sentence, at)
        if at >= sentence.length():
            return
        # end-of-if

        # ------------------------------------------
        # Handles the decimal part
        #
        #    integer_part + unit + 'decimal_part'
        # ------------------------------------------

        # finds the dominator of number concepts
        decimal_concept = sentence.get_prefix_dominated_concept(
            at, IntegerNumber, RealNumber, rawMetricPrefixUnit)
        if decimal_concept == None:

            # handles the case '一萬萬'
            #self.handle_postponed_evaluation(sentence, concept, at)
            return
        # end-of-if

        region_start = concept.start
        region_end = decimal_concept.end

        entity = sentence[region_start:region_end]
        entity = "".join(entity)

        derived_class = RealNumber

        # [concept_values] decimal part
        value = decimal_concept.concept_values['value']
        if issubclass(decimal_concept.type, rawMetricPrefixUnit):
            # 2萬2百 -> [2萬]+[2百], [2百] is rawMetricPrefixUnit
            # 2 doesn't mean 2000
            pass  # nothing to change
        else:
            if value < 10:
                # 2萬5, 5 means 5000 (=5*10000/10)
                # 2千5, 5 means 500 (=5*1000/10)
                value = value * self.get_unit_size() / 10
            else:
                # 2萬5000, 5000 is still 5000
                # 2千500, 500 is still 500
                pass  # decimal part: nothing to change
        # end-of-if

        # [concept_values] plus integer part
        value += concept.concept_values['value']

        concept_values = {
            'tagger': self.__class__,
            'value': value,
        }

        # creates a concept to wrap the above info
        concept = Concept(region_start, region_end, entity, derived_class,
                          concept_values)
        concept.sign(_MetricPrefixUnit)
        sentence.add_concept(concept)