Example #1
0
def get_loc_params(phrase, label, mode=None):
    """
    Returns the number, unit, and direction of one phrase dictating movement based on the phrase
    and the mode (1 for turning, 2 for straight movement)
    """
    string = " ".join([word[0] for word in phrase])
    word_list = [word[0] for word in phrase]
    string = " ".join(word_list)
    if label == "Obstacle":
        unit = -1
        direction = phrase[-1][0]
        return 0, -1, direction
    elif contains_a_word_in(word_list, little):
        # print('conversion! mode: ', mode)
        if mode == 1:
            number = LITTLE_BIT_TURN
            unit = "degrees"
        else:
            number = LITTLE_BIT_MOVE
            unit = "metre"
    elif(parser.parse(string) == []):
        unit = -1
        direction = phrase[-1][0]
        return 0, -1, direction
    else:
        quant = parser.parse(string)[0]
        unit = quant.unit.name
        number = quant.value
    direction = get_direction(phrase)
    return float(number), unit, direction
Example #2
0
def preprocess(text):
    """ 
    Returns the same text, with all numbers converted from English words to 
    decimal form. 
    Ex. "move five feet forward" returns "move 5 feet forward"

    @param text: the original text (must be in lowercase)
    """
    text = text.translate(str.maketrans('', '', string.punctuation))
    quant = parser.parse(text)
    for q in quant:
        words = str(q).split(' ')
        number_word = words[0]
        number = int(q.value)
        text = text.replace(number_word, str(number))
    lst = text.split(' ', 1)
    text = text if len(lst) <= 1 else lst[1]
    print(text)
    r_expr2 = r"""
    DirectionFirst: {(((<TO|IN>)<DT>)?<RB|VBD|JJ|VBP|NN|VBN><CD><NNS|NN>?)}
    NumberFirst: {(<CD><NNS|NN>?((<TO|IN>)<DT>)?<RB|VBD|JJ|VBP|NN|VBN>)}
    """
    target_verbs = [
        "move", "spin", "rotate", "turn", "go", "drive", "stop", "travel"
    ]
    target_words = [
        "degrees", "left", "right", "forward", "backward", "clockwise",
        "counterclockwise"
    ]

    locPhrase, keywords = nlp_util.match_regex_and_keywords(
        text, r_expr2, target_words)

    return locPhrase, keywords
Example #3
0
 def find_quantities(self, position):
     """
     Parse quantity of product from position
     :param position: position in shopping list
     :return: quantity in array
     """
     quants = qparser.parse(position)
     return quants if quants else [Quantity(1, self.dimensionless)]
def find_quantities(text):
    quants = parser.parse(text)
    for quant in quants:
        quantity_spans.append({
            "start": quant.span[0],
            "end": quant.span[1],
            "word": quant.surface,
            "entity": quant.unit.entity.name,
            "uncertainty": quant.uncertainty
        })
Example #5
0
def extract_metrics(txt, dimensions=ref_dimensions, return_noise=False):
    """
    Extract metrics from the input text
    Only metrics representing the input dimensions are selected
    
    
    Attributes
    ----------
    txt : str
        Text from which the numerical values are to be extracted
    dimensions: list, optional
        List of dimensions (entities). Only metrics representing those
        dimensions are selected within text
        
        By default, the supported dimensions are "power" (eg. 25.4 W), 
        "electric potential" (eg. 400 kV), "current" (53 A),
        "length" (eg. 100m), "energy" (5 Wh) and "currency" (5 $)
        
        All the supported dimensions are referenced in unit_references.csv
    return_noise: bool, optional, default=False
        If set to true, return a second list containing all the metrics
        that were not selected using the list of dimensions
        
    Returns
    -------
    quants

        a list of objects each representing a numerical value
    """

    # Metric extraction as quantulum3 Quantity objects
    quants = list(parser.parse(txt))
    quants_of_interest = []
    noise = []

    # Dimension filtering
    for quant in quants:
        if quant.unit.entity.name in dimensions:
            quants_of_interest.append(quant)

        elif return_noise:
            noise.append(quant)


#     quants = [quant for quant in quants
#               if quant.unit.entity.name in dimensions]
# Mapping of the Quantity objects to Metric objects
    quants_of_interest = list(
        map(lambda x: quantulum_to_metric(x, True), quants_of_interest))
    if not return_noise:
        return quants_of_interest
    else:
        noise = list(map(lambda x: quantulum_to_metric(x, False), noise))
        return quants_of_interest, noise
Example #6
0
 def find_quantity_for_product(cls, product: Product):
     """
     Utility function finding unit of offered product e. g. pair (=2)
     :param product: Product instance
     :return: pair(count, unit)
     """
     for string in product.name, product.description:
         q = next(
             iter(q for q in qparser.parse(string)
                  if str(q.unit) not in cls.not_scalable_units), None)
         if q and q.value > 0.01:
             return q.value, str(q.unit)
     return 1, cls.dimensionless
Example #7
0
def get_loc_params(phrase):
    string = " ".join([word[0] for word in phrase])
    quant = parser.parse(string)[0]
    unit = quant.unit.name
    number = quant.value
    if phrase.label() == "NumberFirst":
        direction = phrase[-1][0]
    else:
        if unit == "dimensionless":
            direction = phrase[-2][0]
        else:
            direction = phrase[-3][0]
    return int(number), unit, direction
Example #8
0
    def _handle_sensor(self, message):
        entity = message.data["Entity"]
        self.log.debug("Entity: %s" % entity)

        ha_entity = self._find_entity(entity, ['sensor', 'switch'])
        if not ha_entity or not self._check_availability(ha_entity):
            return

        entity = ha_entity['id']

        # IDEA: set context for 'read it out again' or similar
        # self.set_context('Entity', ha_entity['dev_name'])

        unit_measurement = self.ha.find_entity_attr(entity)
        sensor_unit = unit_measurement.get('unit_measure') or ''

        sensor_name = unit_measurement['name']
        sensor_state = unit_measurement['state']
        # extract unit for correct pronounciation
        # this is fully optional
        try:
            from quantulum3 import parser
            quantulumImport = True
        except ImportError:
            quantulumImport = False

        if quantulumImport and unit_measurement != '':
            quantity = parser.parse(
                (u'{} is {} {}'.format(sensor_name, sensor_state,
                                       sensor_unit)))
            if len(quantity) > 0:
                quantity = quantity[0]
                if (quantity.unit.name != "dimensionless"
                        and (quantity.uncertainty is None
                             or quantity.uncertainty <= 0.5)):
                    sensor_unit = quantity.unit.name
                    sensor_state = quantity.value

        try:
            value = float(sensor_state)
            sensor_state = nice_number(value, lang=self.language)
        except ValueError:
            pass

        self.speak_dialog('homeassistant.sensor',
                          data={
                              "dev_name": sensor_name,
                              "value": sensor_state,
                              "unit": sensor_unit
                          })
Example #9
0
def amount(reference_ingredient, new_ingredient, data):
    filtered = []
    values = []
    ureg = UnitRegistry()
    for line in data:
        try:
            if reference_ingredient in line and new_ingredient in line:
                reference_q = []
                test_q = []
                recipe = json.loads(
                    line[:-1])  # Don't include the ending comma
                ingredients = recipe["ingredients"]
                for ingredient in ingredients:
                    text = ingredient["text"]
                    if reference_ingredient in text:
                        reference_q = parser.parse(text)
                    if new_ingredient in text:
                        test_q = parser.parse(text)
                if reference_q and test_q:
                    fq = str(reference_q[0].value) + " " + str(
                        reference_q[0].unit)
                    tq = str(test_q[0].value) + " " + str(test_q[0].unit)
                    reference_volume = ureg.parse_expression(fq)
                    reference_volume = reference_volume.to(ureg.cup)
                    if reference_volume.magnitude < 1:
                        pass
                    test_volume = ureg.parse_expression(tq)
                    test_volume = test_volume.to(ureg.cup)
                    score = test_volume.magnitude / reference_volume.magnitude * 1000
                    score = round(score)
                    values.append(score)
        except:
            pass
    if values:
        return round(statistics.median(values))
    else:
        return -1
Example #10
0
def get_quants_info(all_data):
    quants = []
    for i in all_data:
        try:
            quants_list = parser.parse(i)
            if (str(quants_list[0].unit.name) == 'dimensionless'):
                quants.append(0)
            else:
                try:
                    quants.append(1)
                except Exception as e:  # catches issues when there is no quant term detected
                    quants.append(0)
        except:  # cactches issues with the parsing itself  (should be rare)
            quants.append(0)
    return (quants)
Example #11
0
def preprocess(text):
    # removes punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    
    text = text.replace("seats", "feet")
    text = text.replace("seat", "feet")
    text = text.replace(u"°", " degrees")
    text = text.replace("one", "1")
    text = text.lstrip()
    quant = parser.parse(text)
    for q in quant:
        text_to_replace = parser.inline_parse_and_expand(str(q.value))
        text_to_replace = text_to_replace.replace("-", " ")
        number = int(q.value)
        text = text.replace(text_to_replace, str(number))
    return text
Example #12
0
    def annotate(self, text):

        for e in parser.parse(text):
            spoken = e.to_spoken()
            data = e.__dict__
            data["unit"] = e.unit.__dict__
            data["unit"]["entity"] = e.unit["entity"].__dict__
            e_type = e.unit["entity"]["uri"]
            if e.unit["uri"] != e.unit["entity"]["uri"]:
                e_type = e.unit["entity"]["uri"] + ":" + e.unit["uri"]
            data["spoken"] = spoken
            data.pop("span")
            if data["unit"]["currency_code"] is None:
                data["unit"].pop("currency_code")
            data = data.copy()
            data.pop("surface")
            yield Entity(e.surface, e_type, source_text=text, data=data)
Example #13
0
    def _get_quantities(self, s):
        quantums = q3_parser.parse(s)
        if len(quantums) == 0:
            return [(1, "")], s

        remainder = s
        quantities = []
        for quantum in quantums:
            quantity = quantum.value
            unit = quantum.unit.name
            if unit == "dimensionless":
                unit = ""

            unit_pluralized = ""
            # if unit != "":
            #     unit_pluralized = unit if quantity <= 1 else pattern.en.pluralize(unit)

            f = Fraction(quantity)
            remainder = remainder.replace(str(f), "")
            for common_fraction in [0.5, 0.25, 0.75]:
                remainder = remainder.replace(str(common_fraction), "")

            if unit != "":
                if quantity > 1:
                    remainder = remainder.replace(unit_pluralized, "")
                else:
                    remainder = remainder.replace(unit, "")

            remainder = self._clean_string(remainder)

            words = remainder.split(" ")
            if len(words) == 1 and unit == "":
                remainder = ""
                unit = words[0]

            if unit == "pound-mass":
                unit = "pound"
            quantities.append((quantity, unit))

        if len(quantities) > 1 and (1, "") in quantities:
            quantities.remove((1, ""))
        return quantities, remainder
Example #14
0
def get_value_unit(quantity_with_units):
    ureg = UnitRegistry()
    # this function separates the quantity and the units
    # if only units are provided, it returns a 1 for the value.
    try:  # to parse using quantulum
        parsed_units = parser.parse(quantity_with_units)[0]

        value = parsed_units.value  # get the value
        num_units_quantulum = parsed_units.surface

        # if the first position in the string is a digit means we have a number with units. Otherwise, only the units.
        if num_units_quantulum[0].isdigit():
            unit = ' '.join(num_units_quantulum.split()
                            [1:])  # take only the units (remove the number)
        else:
            unit = num_units_quantulum  # there is no number in this case
        unit = ureg.parse_expression(unit)
    except:
        print('Using Pint')
        # if quantulum fails, we try to parse using pint
        unit = ureg.parse_expression(quantity_with_units)
        value = 1
    return value, unit
def get_ingredients(lst):
    all = {}
    ingredient_lst = []
    for ingredient in lst:
        quants = parser.parse(ingredient)
        measurement = ""
        if len(quants) == 0:
            all[ingredient] = ""
            ingredient_lst.append(ingredient)
            continue
        if len(quants) == 2 and str(
                quants[0].unit) == "" and quants[1].value < 1:
            measurement = measurement + str(quants[0].value + quants[1].value)
            measurement = measurement + " " + str(quants[1].unit)
        elif len(quants) == 2 and str(
                quants[0].unit) == "" and quants[1].value > 1:
            measurement = str(quants[1].value * quants[0].value) + " " + str(
                quants[1].unit)
        else:
            measurement = str(quants[0].value) + " " + str(quants[0].unit)

        #delete measurements
        for quant in quants:
            ingredient = ingredient.replace(quant.surface, "")

        blob = TextBlob(ingredient)

        if len(blob.noun_phrases) == 0:
            all[str(blob).lstrip()] = measurement
            ingredient_lst.append(str(blob).lstrip())
        elif len(blob.noun_phrases) == 1:
            all[blob.noun_phrases[0].lstrip()] = measurement
            noun_list = str(blob.noun_phrases[0].lstrip()).split(" ")
            for noun in noun_list:
                ingredient_lst.append(noun)

    return all, ingredient_lst
    def resolve(self, text):
        """
        :param text: The text from which resolver should identify measurement
        :return:list
        """

        try:
            resolved_mappings = []
            if type(text) is str or type(text) is str:
                quant = parser.parse(text)
                for quant_each in quant:
                    if quant_each.unit.entity.name not in ("dimensionless", "time", "unknown"):
                        start, end = get_word_position(quant_each.span, text)
                        resp = {'tag': quant_each.unit.entity.name.upper(), 'entity': quant_each.surface,
                                'resolvedTo': {'unit': quant_each.unit.name, 'quantity': quant_each.value,
                                               'baseEntity': quant_each.surface}, 'start': start, 'end': end}
                        if quant_each.surface in text:
                            resolved_mappings.append(resp)
            return resolved_mappings

        except Exception as ex:
            logger.exception(ex, exc_info=True)
            logger.error(traceback.format_exc())
            return []
Example #17
0
def ingredient_from_string(s):
    original = s
    s = s.replace(" ", " ")

    quantums = parser.parse(s)
    try:
        qu = quantums[0]
    except:
        return None

    s = s.replace(qu.to_spoken(), "")
    if str(qu.unit) != "":
        s = s.replace(f"{str(qu.unit)}s", "")
        s = s.replace(str(qu.unit), "")
    s = s.replace(str(Fraction(qu.value)), "")
    s = s.split(", ")[0]
    s = s.split(" - ")[0]
    for i in range(0, 9):
        s = s.replace(str(i), "")
    s = s.replace("/", "")
    for word in [
            "oz.", "small", "medium", "large", "Tbsp.", "finely", "freshly",
            "chopped", "chopped", "tsp.", "kosher", " can ", "fresh",
            "reduced-sodium", "creamy", "lb.", "¾", "(. ounce)", "( ounce)",
            "()", "( )"
    ]:
        s = s.replace(word, "")
    s = s.replace("  ", " ")
    s = s.lstrip(".")
    s = s.lstrip(" ")

    return Ingredient(
        name=s,
        quantity=qu,
        original=original,
    )
        for product in data['products']:
            dict_service = {}

            product['search_url'] = url
            title = product['title']
            url = product['url']
            rating = product['rating']
            reviews = product['reviews']
            priceList = str(product['price']).split('₹')
            if (priceList.__len__() > 1):
                price = priceList[1]
            else:
                price = product['price']

            #Product Quantity fetching
            listQuantity = parser.parse(title)
            if (listQuantity.__len__() > 0):
                quantity = listQuantity[0].surface
            else:
                quantity = ''

            #Product name fetching
            listProduct = title.split(' ')
            if (listProduct.__len__() > 0):
                productName = listProduct[0] + ' ' + listProduct[1]

            dict_service['title'] = title
            dict_service['url'] = url
            dict_service['rating'] = rating
            dict_service['reviews'] = reviews
            dict_service['price'] = price
 def extract_most_informative(self):
     quants = parser.parse(self.text)
     # print(quants)
     return quants
Example #20
0
def get_quantulum(text: str) -> Optional[Quantity]:
    qp = parser.parse(text)
    if not qp:
        return None
    return qp[0]
Example #21
0
def respond_to_mentions(api, since_id):
    '''
    Replies to twitter mentions of the bot
    Considers any tweet after since_id to be fresh and in need of reply

    Args
        api       --  tweepy api object with initialized authentication
        since_id  --  tweepy tweet ID object associated with the last successfully replied to tweet

    Returns
        updated_since_id -- tweepy tweet ID object associated with the (new) last successfully replied to tweet
    '''
    updated_since_id = since_id
    #Extended mode is neccessary now that twitter bumped certain (now all?) languages to allow 280 characters
    #Legacy version may be safer in other languages
    for tweet in tweepy.Cursor(
            api.mentions_timeline, since_id=since_id,
            tweet_mode="extended").items():  #Use cursor to ignore pagination
        updated_since_id = max(tweet.id, updated_since_id)

        #Don't respond to replies, only fresh tweets
        if tweet.in_reply_to_status_id is not None:
            return

        #Parse units
        quantunit = parser.parse(tweet.full_text)[0]
        #print(dir(parser))
        value = quantunit.value
        unit = quantunit.unit.name

        #Convert to base SI units
        pintunit = value * ureg.parse_expression(unit)
        dims = [
            pintunit.dimensionality.get(name)
            for name in ['[mass]', '[length]', '[time]', '[current]']
        ]
        #Define and convert to a unit in terms of base SI units
        bu = ['kilogram', 'meter', 'second', 'ampere']
        baseunit = ''
        for i, u in zip(dims, bu):
            if i != 0:
                baseunit = baseunit + u + '**' + str(i) + '*'

        ureg.define('base_unit = ' + baseunit[:-1] + '= bu')
        pintunit.ito('bu')

        #Obtusify the unit
        obtuse_quant = bf.obtusify(pintunit.magnitude,
                                   dims,
                                   cf.derived_units,
                                   cf.prefixes,
                                   minvalord=-8,
                                   maxvalord=8)

        #Reply with the original tweet quoted, replacing the units
        handle = " @%s" % tweet.user.screen_name
        if len(tweet.full_text) + len(obtuse_quant) + len(handle) > 280:
            #Can't fit quotation in reply, just reply with quantity
            if len(obtuse_quant) + len(handle) > 280:
                #This will never trigger under the base parameters, but catch anyway
                log.info("Obtuse quantitiy too long to tweet, aborting reply")
                log.info("Quantity: ", obtuse_quant)
                continue
            log.info(
                "Tweet too long to quote with obtuse unit, replying with just unit"
            )
            log.info("Tweet ID: ", tweet.ID)
            reply = obtuse_quant + handle
        else:
            scrubbed_tweet = re.sub("@obtuse_units", "", tweet.full_text)
            reply = re.sub(quantunit.surface, obtuse_quant,
                           scrubbed_tweet) + handle

        #Reply
        api.update_status(status=reply, in_reply_to_status_id=tweet.id)

        return updated_since_id
Example #22
0
        steps = recipe["steps"].replace("Please enable targetting cookies to show this banner if (window.innerWidth <= 10000 && window.innerWidth >= 768) { propertag.cmd.push(function() { proper_display('jamieoliver_leftrail'); }); }", '').strip()

        steps_array = []

        steps.replace('\n', '')
        steps.replace('40.', '').replace('39.', '').replace('38.', '').replace('37.', '').replace('36.', '').replace('35.', '').replace('34.', '').replace('33.', '').replace('32.', '').replace('31.', '').replace('30.', '').replace('29.', '').replace('28.', '').replace('27.', '').replace('26.', '').replace('25.', '').replace('24.', '').replace('23.', '').replace('22.', '').replace('21.', '').replace('20.', '').replace('19.', '').replace('18.', '').replace('17.', '').replace('16.', '').replace('15.', '').replace('14.', '').replace('13.', '').replace('12.', '').replace('11.', '').replace('10.', '').replace('9.', '').replace('8.', '').replace('7.', '').replace('6.', '').replace('5.', '').replace('4.', '').replace('3.', '').replace('2.', '').replace('1.', '')
        steps = re.sub(r'(\d+)/(\d+)', lambda m: str(int(m.group(1))/int(m.group(2))), steps)

        steps_data = steps.split('.')

        print(recipe["_id"])
        for step in steps_data:
            steps_array.append({
                "text": step.strip(),
                "interpol": parser.inline_parse(step.strip()),
                "quantities": list(map(lambda s: {"value": s.surface, "type": s.unit.name }, parser.parse(step.strip() ) ))
            })

        ings_array = []

        for ing in recipe["ingredients"]:
            hnewIng = ing.replace('-', ' ').strip()
            newIng = re.sub(r'(\d+)/(\d+)', lambda m: str(int(m.group(1))/int(m.group(2))), hnewIng)
            ings_array.append({
                "description" : newIng,
                "interpol" : parser.inline_parse(newIng.strip()),
                "products" : parse_ingredient(newIng),
                "quantities": list(map(lambda s: {"value": s.surface, "type": s.unit.name }, parser.parse(newIng) ))
            })

        if recipe["yeild"]:
Example #23
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Try to create a test case from a sentence """

import sys
import json

from quantulum3 import parser, classes

if __name__ == "__main__":
    sentence = " ".join(sys.argv[1:])
    quants = parser.parse(sentence)
    res = []
    for q in quants:
        assert isinstance(q, classes.Quantity)
        quantity = {
            "value": q.value,
            "unit": q.unit.name,
            "surface": q.surface,
            "entity": q.unit.entity.name,
            "dimensions": q.unit.entity.dimensions,
            "uncertainty": q.uncertainty,
        }
        res.append(quantity)

    test = {"req": sentence, "res": res}
    print(json.dumps(test, indent=2))
Example #24
0
    async def on_message(self, message):
        version = "v1.0"

        if message.author == self.bot.user:
            return

        try:
            quants = parser.parse(message.content)
        except KeyError:
            print("[!] quantulum3 internal error")
            return
        channel = message.channel

        embed = discord.Embed(title=" ")

        react_emoji = '📝'
        for q in quants:
            if str(q.unit) in self.measures.keys():
                conversion_units = self.measures[str(q.unit)]
                result = result = q.value * conversion_units[
                    1] + conversion_units[2]
                output_unit = conversion_units[0]

                ouput_string = f"{q.value} {q.unit} = {result:.2f} {output_unit}"

                embed.add_field(name="Unit Conversion:",
                                value=ouput_string,
                                inline=False)

                react_emoji = '📏'

            elif str(q.unit) in self.currencies.keys():
                c = CurrencyConverter()

                output_strings = []
                convert_to = self.currencies.copy()
                convert_to.pop(str(q.unit))
                for currency in convert_to.values():
                    result = c.convert(q.value, self.currencies[str(q.unit)],
                                       currency)

                    output_strings.append(
                        f"{q.value} {self.currencies[str(q.unit)]} = {result:.2f} {currency}"
                    )

                conversions_string = "\n".join(output_strings)
                embed.add_field(name="Currency Conversion:",
                                value=conversions_string,
                                inline=False)

                react_emoji = '💵'

            # SPECIAL CASES
            elif str(q.unit) == 'degree fahrenheit':
                result = (q.value - 32) * (5 / 9)
                output_unit = "C"

                ouput_string = f"{q.value} {q.unit} = {result:.2f} {output_unit}"

                embed.add_field(name="Unit Conversion:",
                                value=ouput_string,
                                inline=False)

                react_emoji = '🌡️'

            elif str(q.unit) == "attowatt gausses":  # AWG
                n = q.value
                result = 0.127 * pow(92, ((36 - n) / 39))
                result_area = 3.14159 * pow(result / 2, 2)

                ouput_string = f"{q.value} AWG = {result:.4f} Ømm\n{q.value} AWG = {result_area:.4f} mm2"
                embed.add_field(name="Unit Conversion:",
                                value=ouput_string,
                                inline=False)

                react_emoji = '🔌'

            elif str(q.unit) == 'year':  # Years to seconds (regs only)
                if message.channel.id == 260957117412802561:
                    output_string = f"**{q.value} years is:**\n{12 * q.value} months\n{365 * q.value} days\n{525600 * q.value} minutes\n{31536000 * q.value} seconds"
                    embed.add_field(
                        name=
                        "xkuyax wanted this key feature, but Blaa complains about it a lot:",
                        value=output_string,
                        inline=False)

                    react_emoji = '📆'

        if len(embed.fields) > 0:
            send_embed, user = await bot_utils.await_react_confirm(
                message, self.bot, emoji=react_emoji, confirm_time=300)
            if send_embed:
                embed.set_footer(text=f"Conversion Requested By: {user}")
                await message.channel.send(embed=embed)
Example #25
0
def import_recipe():
    data = json.loads(request.data)
    url = data.get("url")
    id = data.get("id")

    scraper = scrape_me(url)
    recipe = {
        "name": scraper.title(),
        "time": scraper.total_time(),
        "ingredients": scraper.ingredients(),
        "steps": scraper.instructions(),
        "yeild": scraper.yields(),
        "image": scraper.image(),
        "refDir": scraper.title().lower().replace(" ", "_"),
        "src": url,
        "addedBy": id,
    }

    name = recipe["name"].replace('Recipe', '').strip()
    steps = recipe["steps"].replace(
        "Please enable targetting cookies to show this banner if (window.innerWidth <= 10000 && window.innerWidth >= 768) { propertag.cmd.push(function() { proper_display('jamieoliver_leftrail'); }); }",
        '').strip()

    steps_array = []

    steps.replace('\n', '')
    steps.replace('40.', '').replace('39.', '').replace('38.', '').replace(
        '37.',
        '').replace('36.', '').replace('35.', '').replace('34.', '').replace(
            '33.', '').replace('32.', '').replace('31.', '').replace(
                '30.', '').replace('29.', '').replace('28.', '').replace(
                    '27.', '').replace('26.', '').replace('25.', '').replace(
                        '24.', '').replace('23.', '').replace(
                            '22.', '').replace('21.', '').replace(
                                '20.', '').replace('19.', '').replace(
                                    '18.', '').replace('17.', '').replace(
                                        '16.', ''
                                    ).replace('15.', '').replace(
                                        '14.', ''
                                    ).replace('13.', '').replace(
                                        '12.', ''
                                    ).replace('11.', '').replace(
                                        '10.', '').replace('9.', '').replace(
                                            '8.', '').replace(
                                                '7.', '').replace(
                                                    '6.', '').replace(
                                                        '5.', '').replace(
                                                            '4.', '').replace(
                                                                '3.',
                                                                '').replace(
                                                                    '2.', ''
                                                                ).replace(
                                                                    '1.', '')
    steps = re.sub(r'(\d+)/(\d+)',
                   lambda m: str(int(m.group(1)) / int(m.group(2))), steps)

    steps_data = steps.split('.')

    for step in steps_data:
        steps_array.append({
            "text":
            step.strip(),
            "interpol":
            parser.inline_parse(step.strip()),
            "quantities":
            list(
                map(lambda s: {
                    "value": s.surface,
                    "type": s.unit.name
                }, parser.parse(step.strip())))
        })

    ings_array = []

    for ing in recipe["ingredients"]:
        hnewIng = ing.replace('-', ' ').strip()
        newIng = re.sub(r'(\d+)/(\d+)',
                        lambda m: str(int(m.group(1)) / int(m.group(2))),
                        hnewIng)
        ings_array.append({
            "description":
            newIng,
            "interpol":
            parser.inline_parse(newIng.strip()),
            "products":
            parse_ingredient(newIng),
            "quantities":
            list(
                map(lambda s: {
                    "value": s.surface,
                    "type": s.unit.name
                }, parser.parse(newIng)))
        })

    if recipe["yeild"]:
        yeild = parser.parse(recipe["yeild"])[0].surface
    else:
        yeild = 'n/a'

    complexity = len(recipe["ingredients"]) * (len(steps) + recipe["time"])

    recipe["name"] = name
    recipe["steps"] = steps_array
    recipe["yeild"] = yeild
    recipe["complexity"] = complexity
    recipe["ingredients"] = ings_array

    name = recipe["name"].strip()
    steps = recipe["steps"]

    eval_string = recipe["name"] + ': ' + '. '.join(
        list(map(lambda s: s["text"], recipe["steps"])))

    type_perdiction = eval_types(eval_string)
    if (type_perdiction and type_perdiction[0][1] > 0.25):
        type_intent = type_perdiction[0][0]
    else:
        type_intent = 'n/a'

    cusine_perdiction = eval_cusines(eval_string)
    if (cusine_perdiction and cusine_perdiction[0][1] > 0.25):
        cusine_intent = cusine_perdiction[0][0]
    else:
        cusine_intent = 'n/a'

    course_perdiction = eval_courses(eval_string)
    if (course_perdiction and course_perdiction[0][1] > 0.25):
        course_intent = course_perdiction[0][0]
    else:
        course_intent = 'n/a'

    temp_perdiction = eval_temps(eval_string)
    if (temp_perdiction and temp_perdiction[0][1] > 0.25):
        temp_intent = temp_perdiction[0][0]
    else:
        temp_intent = 'n/a'

    recipe["classification"] = {
        "type": type_intent,
        "course": course_intent,
        "cusine": cusine_intent,
        "temp": temp_intent
    }

    r = recipe_collection.insert_one(recipe)

    print(r.inserted_id)

    base = '/run/media/anfa/1C8A5B0249DD18B9/h32/'
    target_folder = base + str(r.inserted_id)

    if not os.path.exists(target_folder):
        os.makedirs(target_folder)

        with webdriver.Chrome(executable_path=driver_path) as wd:
            name = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,']", "",
                          recipe["name"].strip())
            res = fetch_image_urls(name, 10, wd=wd)

        if not res is None:
            for elem in res:
                persist_image(target_folder, elem)

            for dirpath, dirs, files in os.walk(target_folder):
                fs = files
                scores = []
                raw_scores = []
                position = 0

                if files:
                    for f in fs:
                        if f != 'hold':
                            path = dirpath + '/' + f
                            img = Image.open(path)
                            width, height = img.size

                            if int(width) > 600 and int(height) > 600:
                                if pytesseract.image_to_string(img):
                                    os.remove(path)
                                else:
                                    score = b.get_score(path)
                                    if score > 34.5:
                                        scores.append({
                                            "file": f,
                                            "score": score
                                        })
                                        raw_scores.append(score)
                                        print(score)
                                    else:
                                        os.remove(path)
                            else:
                                os.remove(path)

                    order = sorted(scores,
                                   key=lambda i: i['score'],
                                   reverse=True)

                    for score in order:
                        path = dirpath + '/' + score["file"]
                        os.rename(path, dirpath + '/' + str(position) + ".jpg")
                        position = position + 1

                    if scores:
                        avg = sum(raw_scores) / len(raw_scores)
                    else:
                        avg = 0

                    db.recipe.update_one({'_id': r.inserted_id}, {
                        '$set': {
                            'analytics': {
                                'veiws': 0,
                                'likes': 0,
                                'shares': 0,
                                'saves': 0,
                                'cooks': 0,
                                'aesthtetic': avg,
                            }
                        }
                    },
                                         upsert=False)

                    res = make_response(
                        jsonify({
                            'done': True,
                            'id': str(r.inserted_id)
                        }), 200)
                    return res
Example #26
0
def check_quantities(txt):
    quants = parser.parse(txt)
    if len(quants) != 0:
        return True
    else:
        return False
Example #27
0
def extract_quantities(txt):
    quants = parser.parse(txt)
    return [quant.surface for quant in quants]