Python parse 예제들, quantulum.parser.parse Python 예제들

예제 #1

0

파일 보기

파일: evaluate.py 프로젝트: crazyapidev/question-analysis

def validate_numeric_distance(user_input,answer):
    
    from quantulum import parser
    from pint import UnitRegistry,UndefinedUnitError
    
    right_answer = answer
    ureg = UnitRegistry()
    
    try:
        user_input =parser.parse(user_input)
        answer = parser.parse(answer)
        
        unit_input_unit = user_input[0].unit.name.replace("-","_").replace(" ","_") # normalizing of unit names
        answer_unit = answer[0].unit.name.replace("-","_").replace(" ","_") # normalizing of unit names
        quantity1 = ureg.Quantity(user_input[0].value,ureg.parse_expression(str(unit_input_unit))).to('kilometers')
        quantity2 = ureg.Quantity(answer[0].value,ureg.parse_expression(str(answer_unit))).to('kilometers')
         
        percentage_variation = quantity1.magnitude/quantity2.magnitude
        
        if percentage_variation == 1.0 :
            return supporting_list[random.randint(1,len(supporting_list)-1)]
        elif (percentage_variation >=.96 and percentage_variation <= 1.03 ) :
            return supporting_list[random.randint(1,len(supporting_list)-1)] + \
                    " Your answer is in well accepted range. Still if you are curious the exact Value is "+ right_answer
        else:
            return "the right answer is " + right_answer
        
    except UndefinedUnitError:
        return " Distance is expressed in meters,kilometers,miles,astronomical unit,light year"
    except BaseException as e :
        logger.info("exception : %s",e)
        return "the right answer is " + right_answer

예제 #2

0

파일 보기

파일: proc_chat.py 프로젝트: gracelin999/chatbot-hdp-public

def get_distance(line_orig, entities=None):
	miles_per_hour = MILES_PER_HOUR
	anywhere_miles = ANYWHERE_MILES

	miles = miles_per_hour # default

	line = line_orig.lower()
	quants = quan_parser.parse(line)
	if quants:
		# look at the first quantity only 
		# TODO: other quantities?
		if quants[-1].unit.name in ['dimensionless', 'mile']:
			miles = quants[-1].value
		elif quants[-1].unit.name == 'hour':
			# convert with miles per hour
			miles = quants[-1].value * miles_per_hour
	elif ("can't" in line) or ("cannot" in line) or ("can not" in line):
		miles = miles_per_hour
	elif 'not sure' in line:
		miles = miles_per_hour
	elif 'depend' in line:
		miles = miles_per_hour
	elif 'anywhere' in line:
		miles = anywhere_miles

	return miles

예제 #3

0

파일 보기

파일: Unit_Mgmt_Python2.7.py 프로젝트: SaeedSarabchi/Extending-Tables-using-a-WebTable-Corpus

def Tag_Unit_for_Value(value):
    ####
    #This Function Tags each function as [text], [numeric, unit] or [date]
    ####

    # Here we assume that dates are comprised of multiple numbers, so 2017 is not considered as a date, instead it is considered as numeric.
    # Try if the value is date:
    # First checks if the value has more than one word, then it checks if it is a date or not, to avoid assigning all values with one numbers as dates
    word_count = len(
        word_tokenize(
            value.replace("/", " ").replace("-",
                                            " ").replace(".",
                                                         "").replace(",", "")))
    if (word_count > 1):
        date_Result = is_date(value)
        if (date_Result == True):
            return ["date"]

    try:
        #Try if the value is numeric:

        quants = parser.parse(value.lower())
        unit = quants[0].unit.name
        entity = quants[0].unit.entity.name
        return ["numeric", unit, entity]

    except:
        pass

    #If Not numeric and Not date, Then the value is considered text
    return ["text"]

예제 #4

0

파일 보기

파일: __init__.py 프로젝트: prp20/mycroft-homeassistant

    def handle_sensor_intent(self, message):
        self._setup()
        if self.ha is None:
            self.speak_dialog('homeassistant.error.setup')
            return
        entity = message.data["Entity"]
        LOGGER.debug("Entity: %s" % entity)
        try:
            ha_entity = self.ha.find_entity(entity, ['sensor'])
        except RequestException:
            self.speak_dialog('homeassistant.error.offline')
            return
        if ha_entity is None:
            self.speak_dialog('homeassistant.device.unknown',
                              data={"dev_name": entity})
            return

        entity = ha_entity['id']

        # IDEA: set context for 'read it out again' or similar
        # self.set_context('Entity', ha_entity['dev_name'])

        unit_measurement = self.ha.find_entity_attr(entity)
        if unit_measurement['state'] is not None:
            sensor_unit = unit_measurement['unit_measure']
        else:
            sensor_unit = ''

        sensor_name = unit_measurement['name']
        sensor_state = unit_measurement['state']
        # extract unit for correct pronounciation
        # this is fully optional
        try:
            from quantulum import parser
            quantulumImport = True
        except ImportError:
            quantulumImport = False

        if quantulumImport and unit_measurement != '':
            quantity = parser.parse(
                (u'{} is {} {}'.format(sensor_name, sensor_state,
                                       sensor_unit)))
            if len(quantity) > 0:
                quantity = quantity[0]
                if (quantity.unit.name != "dimensionless"
                        and quantity.uncertainty <= 0.5):
                    sensor_unit = quantity.unit.name
                    sensor_state = quantity.value

        self.speak_dialog('homeassistant.sensor',
                          data={
                              "dev_name": sensor_name,
                              "value": sensor_state,
                              "unit": sensor_unit
                          })

예제 #5

0

파일 보기

파일: tests.py 프로젝트: MMatlacz/quantulum

    def test_parse(self):
        result = {'passed': 0, 'not': 0}

        """Test for parser.parse() function."""
        all_tests = load_tests_from_json()
        for test in sorted(all_tests, key=lambda x: len(x['req'])):
            try:
                self.assertEqual(parser.parse(test['req']),
                                 test['res'])
            except AssertionError:
                result['not'] = result['not'] + 1
            result['passed'] = result['passed'] + 1
        print("Passed: {}, not passed: {}".format(result['passed'],
                                                  result['not']))

예제 #6

0

파일 보기

    def handle_sensor_intent(self, message):
        entity = message.data["Entity"]
        LOGGER.debug("Entity: %s" % entity)

        ha_entity = self._find_entity(entity, ['sensor', 'switch'])
        if not ha_entity:
            return

        entity = ha_entity['id']

        # IDEA: set context for 'read it out again' or similar
        # self.set_context('Entity', ha_entity['dev_name'])

        unit_measurement = self.ha.find_entity_attr(entity)
        sensor_unit = unit_measurement.get('unit_measure') or ''

        sensor_name = unit_measurement['name']
        sensor_state = unit_measurement['state']
        # extract unit for correct pronounciation
        # this is fully optional
        try:
            from quantulum import parser
            quantulumImport = True
        except ImportError:
            quantulumImport = False

        if quantulumImport and unit_measurement != '':
            quantity = parser.parse(
                (u'{} is {} {}'.format(sensor_name, sensor_state,
                                       sensor_unit)))
            if len(quantity) > 0:
                quantity = quantity[0]
                if (quantity.unit.name != "dimensionless"
                        and quantity.uncertainty <= 0.5):
                    sensor_unit = quantity.unit.name
                    sensor_state = quantity.value

        try:
            value = float(sensor_state)
            sensor_state = nice_number(value, lang=self.language)
        except ValueError:
            pass

        self.speak_dialog('homeassistant.sensor',
                          data={
                              "dev_name": sensor_name,
                              "value": sensor_state,
                              "unit": sensor_unit
                          })

예제 #7

0

파일 보기

파일: ExtractDataCropView.py 프로젝트: Rubsy777/Text-Mining

    def checkDict(list, dictionary):

        paragraph = list[1]

        BriefDescription = paragraph.split("USE", 1)[0]
        Uses = paragraph.split("USE", 1)[1]
        Uses = Uses.split("GROWING PERIOD")[0]
        GrowingPeriod = paragraph.split("GROWING PERIOD", 1)[1]

        print 'BriefDescription = ' + BriefDescription
        print 'Uses = ' + Uses
        print 'GrowingPeriod = ' + GrowingPeriod

        for dictWord in dictionary:
            if re.search(dictWord, Uses, re.IGNORECASE):
                print 'crop is a food'
                break

        if re.search('annual', GrowingPeriod, re.IGNORECASE):
            print 'crop is Annual'

        hpos = BriefDescription.find("height")
        quantsDescription = parser.parse(BriefDescription[hpos - 20:hpos + 26])
        quantsUses = parser.parse(Uses)
        quantsPeriod = parser.parse(GrowingPeriod)
        print 'BriefDescription Quant:'
        print quantsDescription
        print 'Uses Quant:'
        print quantsUses
        print 'GrowingPeriod Quant:'
        print quantsPeriod
        #for i in quantsDescription:
        print quantsDescription[0].unit.name  # to get the unit's quantity
        print quantsDescription[0].value  # to get the amount of the quantity

        return

예제 #8

0

파일 보기

파일: Unit_Mgmt_Python2.7.py 프로젝트: SaeedSarabchi/Extending-Tables-using-a-WebTable-Corpus

def Extract_Unit_From_Text(text):
    ####
    #Testing each subsequent of the input text for containing a unit
    ####

    cleansedText = text.replace(".", "").lower()
    tokens = word_tokenize(cleansedText)
    SubSeq = Find_All_Subsequences(tokens)
    for s in SubSeq:
        try:
            quants = parser.parse("1 " + s)
            unit = quants[0].unit.name
            entity = quants[0].unit.entity.name
            if (unit != "dimensionless"):
                return ["numeric", unit, entity]
        except:
            pass

    #If nothing found, then:
    return "Null"

예제 #9

0

파일 보기

파일: tests.py 프로젝트: MMatlacz/quantulum

def wiki_test(page='CERN'):
    """Download a wikipedia page and test the parser on its content.

    Pages full of units:
        CERN
        Hubble_Space_Telescope,
        Herschel_Space_Observatory
    """
    content = wikipedia.page(page).content
    parsed = parser.parse(content)
    parts = int(round(len(content) * 1.0 / 1000))

    end_char = 0
    for num, chunk in enumerate(range(parts)):
        _ = os.system('clear')
        quants = [j for j in parsed if chunk * 1000 < j.span[0] < (chunk + 1) *
                  1000]
        beg_char = max(chunk * 1000, end_char)
        text, end_char = embed_text(quants, beg_char, chunk, content)
        print(COLOR2 % text)
        try:
            _ = input('--------- End part %d of %d\n' % (num + 1, parts))
        except (KeyboardInterrupt, EOFError):
            return

예제 #10

0

파일 보기

def main():
    completeListIngredients = []
    recipesDict = parseRecipes("recipes.json")
    # allIngredients = []
    # for recipe in recipesDict:
    #   allIngredients.extend(recipe["ingredients"])
    # allIngredients = set(allIngredients)
    # print(allIngredients)

    # Probably need to output this later for iOS use

    # outJSON["recipes"].append({
    # 'name': recipe["name"],
    # 'website': 'stackabuse.com',
    # 'from': 'Nebraska'})

    # print(allIngredients)
    for recipe in tqdm(recipesDict, desc="Generating JSON files"):
        recipeNumsList = []
        recipeUnitsList = []
        recipeContent = []

        for ingredient in recipe["ingredients"]:
            #print("FIRST PARSE", ingredient)

            is_dimensionless = False

            # Do some string cleanup
            ingredient = ingredient.lower()
            # Replace weird 1-1/2 notation
            ingredient = re.sub("(\d)\-(\d/\d)", r"\1 \2", ingredient)

            # if assd[0].find(",") != -1:
            #   print(assd[0])
            #   print(type(assd[0]))
            try:
                quants = parser.parse(
                    ingredient)  # Get rid of extraneous instructions

            except:
                # Tackling the case where there are quantities but not units
                if any(char.isdigit() for char in ingredient):
                    res = re.findall(r'\d+', ingredient)
                    quantity = res[0]
                    parsedIngredient = ingredient[ingredient.find(str(quantity)
                                                                  ) +
                                                  len(str(quantity)):]
                    parsedIngredient = stemIngredient(parsedIngredient)
                    if not re.match("^[0-9 ]+$", parsedIngredient):
                        recipeNumsList.append(quantity)
                        recipeContent.append(parsedIngredient)
                    continue

            if quants:
                if len(quants) > 0:

                    #print("Length of quants is:", str(len(quants)))
                    # Lists to add to recipeNumsList and recipeUnitsList in case we have more than 2 nums/units
                    recipeNums = []
                    recipeUnits = []

                    for quant in quants:
                        #print("NAME " + quant.unit.name)

                        # If there aren't any units identified
                        if quant.unit.name == "dimensionless":
                            is_dimensionless = True
                            #print("Got to dimensionless!!")
                            # Either there is a number and the ingredient is "3 pieces of meat"
                            if any(char.isdigit() for char in ingredient):
                                numStr = str(quant.value)
                                #print("numStr " + numStr)
                                ingredient = ingredient[
                                    ingredient.find(numStr) +
                                    len(numStr):].lstrip(" ")
                                ingredient = stemIngredient(ingredient)
                                if not re.match("^[0-9 ]+$", ingredient):
                                    if quant.value not in recipeNums:
                                        recipeNums.append(quant.value)
                                    if ingredient not in recipeContent:
                                        recipeContent.append(ingredient)
                                    recipeUnits.append("")
                            # Or there isn't any number and we just append the ingredient
                            else:
                                ingredient = stemIngredient(ingredient)
                                if not re.match("^[0-9 ]+$", ingredient):
                                    if ingredient not in recipeContent:
                                        recipeContent.append(ingredient)
                                    recipeUnits.append("")
                        else:
                            if not re.match("^[0-9 ]+$", str(quant.unit.name)):
                                if quant.value not in recipeNums:
                                    recipeNums.append(quant.value)
                                recipeUnits.append(str(quant.unit.name))

                        # except Exception as e:
                        #     #print(trap)
                        #     raise e
                        #     pass

                    if not is_dimensionless:
                        cleanIngredient = deleteUnits(ingredient, recipeUnits)
                        cleanIngredient = stemIngredient(cleanIngredient)
                        if not re.match("^[0-9 ]+$", cleanIngredient):
                            recipeContent.append(cleanIngredient)
                    recipeUnitsList.append(recipeUnits)
                    recipeNumsList.append(recipeNums)
                else:
                    #print("SHOULD WE EVEN BE HERE EMPTY ER: " + ingredient)
                    recipeContent.append(ingredient)

        #print(recipe["name"])
        #print(recipeNumsList)
        #print(recipeUnitsList)
        #print(recipeContent)
        completeListIngredients.extend(recipeContent)
        #print("______________________")
        outJSON["recipes"].append({
            "name": recipe["name"],
            "yield": recipe["yield"],
            "image": recipe["image"],
            "quantities": recipeNumsList,
            "units": recipeUnitsList,
            "content": recipeContent
        })

    with open("recipesjson.txt", "w") as outfile:
        json.dump(outJSON, outfile)

    with open("allingredients.txt", "w") as final_file:
        final_file.write(str(set(completeListIngredients)))
        final_file.close()

예제 #11

0

파일 보기

파일: romanlp.py 프로젝트: cgoliver/nlplotlib

def get_action_from_sentence(text, columns=None):
    if "\"" not in text:
        text = list(text)
        text[0] = text[0].upper()
        text = "".join(text)
        words = nltk.tokenize.word_tokenize(text)
        full_text = text
        regex = re.compile('[^0-9a-zA-Z !?]')
        text = regex.sub('', text)
        #print(text)
        numbers = find_number(text)
        #print(numbers)
        for num in numbers:
            text = text.replace(num, "many")
        #print(text)
        verbs = get_action_verb_from_string(text)
        if verbs == []:
            verbs = [words[0]]
        verb = verbs[0]
        complement = get_complement_to_verb(text, verb)
        value = qp.parse(full_text)
        #print(type(value))
        #print(value)
        #value = ",".join(value)
        #print(type(value))
        if "dimensionless" in value:
            value = value.value
        else:
            value = [value[i].value for i in range(len(value))]
        order = nltk.tokenize.word_tokenize(text)

        complement.sort(key=lambda x: order.index(x))
        if "many" in complement:
            complement.remove("many")
        complement, value = comp_to_val(complement, value)
        # print("INPUT: ", text)
        # print("Action : ", verb, complement)
        # print("Quantity : ", value)
        # print("-------------------------------------------")
    else:
        value = [text.split("\"")[1]]
        words = nltk.tokenize.word_tokenize(text)
        full_text = text
        text = text.replace(value[0], " something")
        text = list(text)
        text[0] = text[0].upper()
        text = "".join(text)
        regex = re.compile('[^a-zA-Z !?]')
        text = regex.sub('', text)
        numbers = find_number(text)
        for num in numbers:
            text = text.replace(num, "many")
        verbs = get_action_verb_from_string(text)
        if verbs == []:
            verbs = [words[0]]
        verb = verbs[0]
        complement = get_complement_to_verb(text, verb)
        #value = qp.parse(text)
        order = nltk.tokenize.word_tokenize(text)

        complement.sort(key=lambda x: order.index(x))
        if "many" in complement:
            complement.remove("many")
        comp, value = comp_to_val(complement, value)
        # print("INPUT: ", text)
        # print("Action : ", verb, complement)
        # print("Quantity : ", value)
        # print("-------------------------------------------")
        complement = [verb] + complement
    if columns:
        print(f"PARSER: {text}")
        print(f"PARSER COLUMNS: {columns}")
        used_cols = [col for col in columns if col in full_text]
        print(f"used columns parser: {used_cols}")
        return (complement, ("data.csv", used_cols))
    else:
        return (complement, value)

예제 #12

0

파일 보기

# -*- coding: utf-8 -*-
"""
Created on Thu Mar 15 13:36:10 2018

@author: kach
"""

from quantulum import parser

quants = parser.parse('i want 2 liters of water')
print(quants)

예제 #13

0

파일 보기

파일: dataset_title_helper.py 프로젝트: EmmaArnold/hdx-python-api

    def fuzzy_match_dates_in_title(cls, title, ranges, ignore_wrong_years):
        # type: (str, List[Tuple[datetime,datetime]], List[int]) -> str
        """
        Fuzzy match dates in title appending to ranges

        Args:
            title (str): Title to parse
            ranges (List[Tuple[datetime,datetime]]): List of date ranges found so far
            ignore_wrong_years (List[int]): Numbers identified as years that probably are not years

        Returns:
            str: Title with dates removed

        """
        try:
            for quant in parser.parse(title):
                if quant.unit.name == 'dimensionless':
                    continue
                ignore_wrong_years.append(int(quant.value))
        except UnboundLocalError:  # quantulum on Py2 has a bug
            pass
        for match in cls.YEAR_PATTERN.finditer(title):
            year = match.group(0)
            if int(year) in ignore_wrong_years:
                continue
            start = match.start()
            end = match.end()
            stringlr = title[max(start - 13, 0):end]
            fuzzylr = dict()
            startdatelr = None
            enddatelr = None
            deltalr = timedelta(days=1000)
            try:
                startdatelr, enddatelr = parse_date_range(stringlr, fuzzy=fuzzylr, zero_time=True)
                if startdatelr and enddatelr:
                    deltalr = enddatelr - startdatelr
            except ParserError:
                pass
            fuzzyrl = dict()
            stringrl = title[start:min(end + 13, len(title))]
            startdaterl = None
            enddaterl = None
            deltarl = timedelta(days=1000)
            try:
                startdaterl, enddaterl = parse_date_range(stringrl, fuzzy=fuzzyrl, zero_time=True)
                if startdaterl and enddaterl:
                    deltarl = enddaterl - startdaterl
            except ParserError:
                pass
            if startdatelr and deltalr <= deltarl:
                date_components = fuzzylr['date']
                ranges.append((startdatelr, enddatelr))
            elif startdaterl:
                date_components = fuzzyrl['date']
                ranges.append((startdaterl, enddaterl))
            else:
                date_components = (year)
                ranges.append(parse_date_range(year, zero_time=True))
            newtitle = title
            for date_component in date_components:
                newtitle = remove_string(newtitle, date_component, PUNCTUATION_MINUS_BRACKETS)
            logger.info('Removing date from title: %s -> %s' % (title, newtitle))
            title = newtitle
        try:
            fuzzy = dict()
            startdate, enddate = parse_date_range(title, fuzzy=fuzzy, zero_time=True)
            if startdate == enddate and len(fuzzy['date']) == 1:  # only accept dates where day, month and year are
                # all together not split throughout the string and where the date is a precise day not a range
                ranges.append((startdate, enddate))
                date_component = fuzzy['date'][0]
                newtitle = remove_string(title, date_component, PUNCTUATION_MINUS_BRACKETS)
                logger.info('Removing date from title: %s -> %s' % (title, newtitle))
                title = newtitle
        except (ParserError, OverflowError):
            pass

        return title