def validate_numeric_distance(user_input,answer): from quantulum import parser from pint import UnitRegistry,UndefinedUnitError right_answer = answer ureg = UnitRegistry() try: user_input =parser.parse(user_input) answer = parser.parse(answer) unit_input_unit = user_input[0].unit.name.replace("-","_").replace(" ","_") # normalizing of unit names answer_unit = answer[0].unit.name.replace("-","_").replace(" ","_") # normalizing of unit names quantity1 = ureg.Quantity(user_input[0].value,ureg.parse_expression(str(unit_input_unit))).to('kilometers') quantity2 = ureg.Quantity(answer[0].value,ureg.parse_expression(str(answer_unit))).to('kilometers') percentage_variation = quantity1.magnitude/quantity2.magnitude if percentage_variation == 1.0 : return supporting_list[random.randint(1,len(supporting_list)-1)] elif (percentage_variation >=.96 and percentage_variation <= 1.03 ) : return supporting_list[random.randint(1,len(supporting_list)-1)] + \ " Your answer is in well accepted range. Still if you are curious the exact Value is "+ right_answer else: return "the right answer is " + right_answer except UndefinedUnitError: return " Distance is expressed in meters,kilometers,miles,astronomical unit,light year" except BaseException as e : logger.info("exception : %s",e) return "the right answer is " + right_answer
def get_distance(line_orig, entities=None): miles_per_hour = MILES_PER_HOUR anywhere_miles = ANYWHERE_MILES miles = miles_per_hour # default line = line_orig.lower() quants = quan_parser.parse(line) if quants: # look at the first quantity only # TODO: other quantities? if quants[-1].unit.name in ['dimensionless', 'mile']: miles = quants[-1].value elif quants[-1].unit.name == 'hour': # convert with miles per hour miles = quants[-1].value * miles_per_hour elif ("can't" in line) or ("cannot" in line) or ("can not" in line): miles = miles_per_hour elif 'not sure' in line: miles = miles_per_hour elif 'depend' in line: miles = miles_per_hour elif 'anywhere' in line: miles = anywhere_miles return miles
def Tag_Unit_for_Value(value): #### #This Function Tags each function as [text], [numeric, unit] or [date] #### # Here we assume that dates are comprised of multiple numbers, so 2017 is not considered as a date, instead it is considered as numeric. # Try if the value is date: # First checks if the value has more than one word, then it checks if it is a date or not, to avoid assigning all values with one numbers as dates word_count = len( word_tokenize( value.replace("/", " ").replace("-", " ").replace(".", "").replace(",", ""))) if (word_count > 1): date_Result = is_date(value) if (date_Result == True): return ["date"] try: #Try if the value is numeric: quants = parser.parse(value.lower()) unit = quants[0].unit.name entity = quants[0].unit.entity.name return ["numeric", unit, entity] except: pass #If Not numeric and Not date, Then the value is considered text return ["text"]
def handle_sensor_intent(self, message): self._setup() if self.ha is None: self.speak_dialog('homeassistant.error.setup') return entity = message.data["Entity"] LOGGER.debug("Entity: %s" % entity) try: ha_entity = self.ha.find_entity(entity, ['sensor']) except RequestException: self.speak_dialog('homeassistant.error.offline') return if ha_entity is None: self.speak_dialog('homeassistant.device.unknown', data={"dev_name": entity}) return entity = ha_entity['id'] # IDEA: set context for 'read it out again' or similar # self.set_context('Entity', ha_entity['dev_name']) unit_measurement = self.ha.find_entity_attr(entity) if unit_measurement['state'] is not None: sensor_unit = unit_measurement['unit_measure'] else: sensor_unit = '' sensor_name = unit_measurement['name'] sensor_state = unit_measurement['state'] # extract unit for correct pronounciation # this is fully optional try: from quantulum import parser quantulumImport = True except ImportError: quantulumImport = False if quantulumImport and unit_measurement != '': quantity = parser.parse( (u'{} is {} {}'.format(sensor_name, sensor_state, sensor_unit))) if len(quantity) > 0: quantity = quantity[0] if (quantity.unit.name != "dimensionless" and quantity.uncertainty <= 0.5): sensor_unit = quantity.unit.name sensor_state = quantity.value self.speak_dialog('homeassistant.sensor', data={ "dev_name": sensor_name, "value": sensor_state, "unit": sensor_unit })
def test_parse(self): result = {'passed': 0, 'not': 0} """Test for parser.parse() function.""" all_tests = load_tests_from_json() for test in sorted(all_tests, key=lambda x: len(x['req'])): try: self.assertEqual(parser.parse(test['req']), test['res']) except AssertionError: result['not'] = result['not'] + 1 result['passed'] = result['passed'] + 1 print("Passed: {}, not passed: {}".format(result['passed'], result['not']))
def handle_sensor_intent(self, message): entity = message.data["Entity"] LOGGER.debug("Entity: %s" % entity) ha_entity = self._find_entity(entity, ['sensor', 'switch']) if not ha_entity: return entity = ha_entity['id'] # IDEA: set context for 'read it out again' or similar # self.set_context('Entity', ha_entity['dev_name']) unit_measurement = self.ha.find_entity_attr(entity) sensor_unit = unit_measurement.get('unit_measure') or '' sensor_name = unit_measurement['name'] sensor_state = unit_measurement['state'] # extract unit for correct pronounciation # this is fully optional try: from quantulum import parser quantulumImport = True except ImportError: quantulumImport = False if quantulumImport and unit_measurement != '': quantity = parser.parse( (u'{} is {} {}'.format(sensor_name, sensor_state, sensor_unit))) if len(quantity) > 0: quantity = quantity[0] if (quantity.unit.name != "dimensionless" and quantity.uncertainty <= 0.5): sensor_unit = quantity.unit.name sensor_state = quantity.value try: value = float(sensor_state) sensor_state = nice_number(value, lang=self.language) except ValueError: pass self.speak_dialog('homeassistant.sensor', data={ "dev_name": sensor_name, "value": sensor_state, "unit": sensor_unit })
def checkDict(list, dictionary): paragraph = list[1] BriefDescription = paragraph.split("USE", 1)[0] Uses = paragraph.split("USE", 1)[1] Uses = Uses.split("GROWING PERIOD")[0] GrowingPeriod = paragraph.split("GROWING PERIOD", 1)[1] print 'BriefDescription = ' + BriefDescription print 'Uses = ' + Uses print 'GrowingPeriod = ' + GrowingPeriod for dictWord in dictionary: if re.search(dictWord, Uses, re.IGNORECASE): print 'crop is a food' break if re.search('annual', GrowingPeriod, re.IGNORECASE): print 'crop is Annual' hpos = BriefDescription.find("height") quantsDescription = parser.parse(BriefDescription[hpos - 20:hpos + 26]) quantsUses = parser.parse(Uses) quantsPeriod = parser.parse(GrowingPeriod) print 'BriefDescription Quant:' print quantsDescription print 'Uses Quant:' print quantsUses print 'GrowingPeriod Quant:' print quantsPeriod #for i in quantsDescription: print quantsDescription[0].unit.name # to get the unit's quantity print quantsDescription[0].value # to get the amount of the quantity return
def Extract_Unit_From_Text(text): #### #Testing each subsequent of the input text for containing a unit #### cleansedText = text.replace(".", "").lower() tokens = word_tokenize(cleansedText) SubSeq = Find_All_Subsequences(tokens) for s in SubSeq: try: quants = parser.parse("1 " + s) unit = quants[0].unit.name entity = quants[0].unit.entity.name if (unit != "dimensionless"): return ["numeric", unit, entity] except: pass #If nothing found, then: return "Null"
def wiki_test(page='CERN'): """Download a wikipedia page and test the parser on its content. Pages full of units: CERN Hubble_Space_Telescope, Herschel_Space_Observatory """ content = wikipedia.page(page).content parsed = parser.parse(content) parts = int(round(len(content) * 1.0 / 1000)) end_char = 0 for num, chunk in enumerate(range(parts)): _ = os.system('clear') quants = [j for j in parsed if chunk * 1000 < j.span[0] < (chunk + 1) * 1000] beg_char = max(chunk * 1000, end_char) text, end_char = embed_text(quants, beg_char, chunk, content) print(COLOR2 % text) try: _ = input('--------- End part %d of %d\n' % (num + 1, parts)) except (KeyboardInterrupt, EOFError): return
def main(): completeListIngredients = [] recipesDict = parseRecipes("recipes.json") # allIngredients = [] # for recipe in recipesDict: # allIngredients.extend(recipe["ingredients"]) # allIngredients = set(allIngredients) # print(allIngredients) # Probably need to output this later for iOS use # outJSON["recipes"].append({ # 'name': recipe["name"], # 'website': 'stackabuse.com', # 'from': 'Nebraska'}) # print(allIngredients) for recipe in tqdm(recipesDict, desc="Generating JSON files"): recipeNumsList = [] recipeUnitsList = [] recipeContent = [] for ingredient in recipe["ingredients"]: #print("FIRST PARSE", ingredient) is_dimensionless = False # Do some string cleanup ingredient = ingredient.lower() # Replace weird 1-1/2 notation ingredient = re.sub("(\d)\-(\d/\d)", r"\1 \2", ingredient) # if assd[0].find(",") != -1: # print(assd[0]) # print(type(assd[0])) try: quants = parser.parse( ingredient) # Get rid of extraneous instructions except: # Tackling the case where there are quantities but not units if any(char.isdigit() for char in ingredient): res = re.findall(r'\d+', ingredient) quantity = res[0] parsedIngredient = ingredient[ingredient.find(str(quantity) ) + len(str(quantity)):] parsedIngredient = stemIngredient(parsedIngredient) if not re.match("^[0-9 ]+$", parsedIngredient): recipeNumsList.append(quantity) recipeContent.append(parsedIngredient) continue if quants: if len(quants) > 0: #print("Length of quants is:", str(len(quants))) # Lists to add to recipeNumsList and recipeUnitsList in case we have more than 2 nums/units recipeNums = [] recipeUnits = [] for quant in quants: #print("NAME " + quant.unit.name) # If there aren't any units identified if quant.unit.name == "dimensionless": is_dimensionless = True #print("Got to dimensionless!!") # Either there is a number and the ingredient is "3 pieces of meat" if any(char.isdigit() for char in ingredient): numStr = str(quant.value) #print("numStr " + numStr) ingredient = ingredient[ ingredient.find(numStr) + len(numStr):].lstrip(" ") ingredient = stemIngredient(ingredient) if not re.match("^[0-9 ]+$", ingredient): if quant.value not in recipeNums: recipeNums.append(quant.value) if ingredient not in recipeContent: recipeContent.append(ingredient) recipeUnits.append("") # Or there isn't any number and we just append the ingredient else: ingredient = stemIngredient(ingredient) if not re.match("^[0-9 ]+$", ingredient): if ingredient not in recipeContent: recipeContent.append(ingredient) recipeUnits.append("") else: if not re.match("^[0-9 ]+$", str(quant.unit.name)): if quant.value not in recipeNums: recipeNums.append(quant.value) recipeUnits.append(str(quant.unit.name)) # except Exception as e: # #print(trap) # raise e # pass if not is_dimensionless: cleanIngredient = deleteUnits(ingredient, recipeUnits) cleanIngredient = stemIngredient(cleanIngredient) if not re.match("^[0-9 ]+$", cleanIngredient): recipeContent.append(cleanIngredient) recipeUnitsList.append(recipeUnits) recipeNumsList.append(recipeNums) else: #print("SHOULD WE EVEN BE HERE EMPTY ER: " + ingredient) recipeContent.append(ingredient) #print(recipe["name"]) #print(recipeNumsList) #print(recipeUnitsList) #print(recipeContent) completeListIngredients.extend(recipeContent) #print("______________________") outJSON["recipes"].append({ "name": recipe["name"], "yield": recipe["yield"], "image": recipe["image"], "quantities": recipeNumsList, "units": recipeUnitsList, "content": recipeContent }) with open("recipesjson.txt", "w") as outfile: json.dump(outJSON, outfile) with open("allingredients.txt", "w") as final_file: final_file.write(str(set(completeListIngredients))) final_file.close()
def get_action_from_sentence(text, columns=None): if "\"" not in text: text = list(text) text[0] = text[0].upper() text = "".join(text) words = nltk.tokenize.word_tokenize(text) full_text = text regex = re.compile('[^0-9a-zA-Z !?]') text = regex.sub('', text) #print(text) numbers = find_number(text) #print(numbers) for num in numbers: text = text.replace(num, "many") #print(text) verbs = get_action_verb_from_string(text) if verbs == []: verbs = [words[0]] verb = verbs[0] complement = get_complement_to_verb(text, verb) value = qp.parse(full_text) #print(type(value)) #print(value) #value = ",".join(value) #print(type(value)) if "dimensionless" in value: value = value.value else: value = [value[i].value for i in range(len(value))] order = nltk.tokenize.word_tokenize(text) complement.sort(key=lambda x: order.index(x)) if "many" in complement: complement.remove("many") complement, value = comp_to_val(complement, value) # print("INPUT: ", text) # print("Action : ", verb, complement) # print("Quantity : ", value) # print("-------------------------------------------") else: value = [text.split("\"")[1]] words = nltk.tokenize.word_tokenize(text) full_text = text text = text.replace(value[0], " something") text = list(text) text[0] = text[0].upper() text = "".join(text) regex = re.compile('[^a-zA-Z !?]') text = regex.sub('', text) numbers = find_number(text) for num in numbers: text = text.replace(num, "many") verbs = get_action_verb_from_string(text) if verbs == []: verbs = [words[0]] verb = verbs[0] complement = get_complement_to_verb(text, verb) #value = qp.parse(text) order = nltk.tokenize.word_tokenize(text) complement.sort(key=lambda x: order.index(x)) if "many" in complement: complement.remove("many") comp, value = comp_to_val(complement, value) # print("INPUT: ", text) # print("Action : ", verb, complement) # print("Quantity : ", value) # print("-------------------------------------------") complement = [verb] + complement if columns: print(f"PARSER: {text}") print(f"PARSER COLUMNS: {columns}") used_cols = [col for col in columns if col in full_text] print(f"used columns parser: {used_cols}") return (complement, ("data.csv", used_cols)) else: return (complement, value)
# -*- coding: utf-8 -*- """ Created on Thu Mar 15 13:36:10 2018 @author: kach """ from quantulum import parser quants = parser.parse('i want 2 liters of water') print(quants)
def fuzzy_match_dates_in_title(cls, title, ranges, ignore_wrong_years): # type: (str, List[Tuple[datetime,datetime]], List[int]) -> str """ Fuzzy match dates in title appending to ranges Args: title (str): Title to parse ranges (List[Tuple[datetime,datetime]]): List of date ranges found so far ignore_wrong_years (List[int]): Numbers identified as years that probably are not years Returns: str: Title with dates removed """ try: for quant in parser.parse(title): if quant.unit.name == 'dimensionless': continue ignore_wrong_years.append(int(quant.value)) except UnboundLocalError: # quantulum on Py2 has a bug pass for match in cls.YEAR_PATTERN.finditer(title): year = match.group(0) if int(year) in ignore_wrong_years: continue start = match.start() end = match.end() stringlr = title[max(start - 13, 0):end] fuzzylr = dict() startdatelr = None enddatelr = None deltalr = timedelta(days=1000) try: startdatelr, enddatelr = parse_date_range(stringlr, fuzzy=fuzzylr, zero_time=True) if startdatelr and enddatelr: deltalr = enddatelr - startdatelr except ParserError: pass fuzzyrl = dict() stringrl = title[start:min(end + 13, len(title))] startdaterl = None enddaterl = None deltarl = timedelta(days=1000) try: startdaterl, enddaterl = parse_date_range(stringrl, fuzzy=fuzzyrl, zero_time=True) if startdaterl and enddaterl: deltarl = enddaterl - startdaterl except ParserError: pass if startdatelr and deltalr <= deltarl: date_components = fuzzylr['date'] ranges.append((startdatelr, enddatelr)) elif startdaterl: date_components = fuzzyrl['date'] ranges.append((startdaterl, enddaterl)) else: date_components = (year) ranges.append(parse_date_range(year, zero_time=True)) newtitle = title for date_component in date_components: newtitle = remove_string(newtitle, date_component, PUNCTUATION_MINUS_BRACKETS) logger.info('Removing date from title: %s -> %s' % (title, newtitle)) title = newtitle try: fuzzy = dict() startdate, enddate = parse_date_range(title, fuzzy=fuzzy, zero_time=True) if startdate == enddate and len(fuzzy['date']) == 1: # only accept dates where day, month and year are # all together not split throughout the string and where the date is a precise day not a range ranges.append((startdate, enddate)) date_component = fuzzy['date'][0] newtitle = remove_string(title, date_component, PUNCTUATION_MINUS_BRACKETS) logger.info('Removing date from title: %s -> %s' % (title, newtitle)) title = newtitle except (ParserError, OverflowError): pass return title