Example #1
0
    def create_single_wine(unparsed_wine, title, url, article, tags):
        color, eyes, nose, mouth, overall, producer = ('N/A', ) * 6
        price = 0
        region, sub_region, variety, vintage, description, abv = ('', ) * 6

        # Wine Description
        single_wine_descrip = re.compile(r"""<p><strong>|<p.*?>(.*?)<\/p>""",
                                         re.M | re.S)
        descrip_results = single_wine_descrip.findall(str(article))
        description_soup = BeautifulSoup(' '.join(descrip_results),
                                         'html.parser')
        description = description_soup.getText()

        title = html.unescape(title)

        # Wine color
        if 'white' in (tag.lower() for tag in tags):
            color = 'White'
        if 'red' in (tag.lower() for tag in tags):
            color = 'Red'

        # Process single-wine results
        if unparsed_wine:
            for key, value in unparsed_wine:
                value_soup = BeautifulSoup(value, 'html.parser')
                value = value_soup.getText()
                key_soup = BeautifulSoup(key, 'html.parser')
                key = key_soup.getText().lower()

                # Todo: Switch to dictionary to shorten this?
                if "eyes" in key:
                    eyes = value
                elif "nose" in key:
                    nose = value
                elif "mouth" in key:
                    mouth = value
                elif "all in all" in key:
                    overall = value
                elif "producer" in key:
                    producer = value
                elif "price" in key:
                    price = value.replace("$", "")
                elif "sub-region" in key:
                    sub_region = value
                elif "region" in key:
                    region = value
                elif "variety" in key:
                    variety = str(value)
                elif "vintage" in key:
                    vintage = value
                elif "abv" in key:
                    abv = value

            variety_types = [
                'reisling', 'chardonnay', 'sauvignon blanc', 'syrah', 'shiraz',
                'cabernet sauvignon', 'merlot', 'pinot noir'
            ]
            if not variety:
                variety = 'N/A'
                for variety_type in variety_types:
                    if variety_type in title:
                        variety = variety_type.capitalize()

            # Search for previous history
            previous_history = History.objects.filter(url=url)
            if previous_history:
                h = History.objects.get(url=url)
                h.date = datetime.datetime.now(datetime.timezone.utc)
                h.save()
                previous_wine = Wine.wines.filter(name=title,
                                                  color=color,
                                                  harvested_from=h)
                if previous_wine:
                    p_wine = Wine.wines.get(name=title,
                                            color=color,
                                            harvested_from=h)
                    if p_wine.was_modified or p_wine.deleted:
                        print(
                            "Wine has been modified or deleted, not updating!")
                        return False
                    else:
                        print("Removing wine")
                        p_wine.harvest_delete()
            else:
                # Create History
                h = History(url=url,
                            wine_count=1,
                            date=datetime.datetime.now(datetime.timezone.utc))
                h.save()

            # Save the wine to the database
            w = Wine(name=title,
                     color=color,
                     eyes=eyes,
                     nose=nose,
                     mouth=mouth,
                     overall=overall,
                     producer=producer,
                     price=price,
                     region=region,
                     sub_region=sub_region,
                     variety=variety,
                     vintage=vintage,
                     abv=abv,
                     description=description,
                     tags=tags,
                     harvest_data=str(article),
                     harvested_from=h,
                     harvested_date=datetime.datetime.now(
                         datetime.timezone.utc))
            w.harvest_save()
        return True
Example #2
0
    def create_multi_wines_exception_a(url, article):
        wines_regex = re.compile(
            r"""<span style="text-decoration: underline;">(.*?)(?=<span)""",
            re.M | re.S | re.I)
        wines = wines_regex.findall(str(article))

        title_regex = re.compile(r"""<strong>(.*?) –""")
        wine_info_regex = re.compile(r"""<strong>(.*?)<\/strong>(.*?)(?=<)""",
                                     re.I | re.M | re.S)

        # Create history
        previous_history = History.objects.filter(url=url,
                                                  wine_count=len(wines))
        if previous_history:
            h = History.objects.get(url=url, wine_count=len(wines))
            h.date = datetime.datetime.now(datetime.timezone.utc)
            h.save()
        else:
            h = History(url=url,
                        wine_count=len(wines),
                        date=datetime.datetime.now(datetime.timezone.utc))
            h.save()

        for wine in wines:
            info = wine_info_regex.findall(wine)

            color, eyes, nose, mouth, overall, producer = ('N/A', ) * 6
            price = 0
            region, sub_region, variety, vintage, description, abv = ('', ) * 6

            title = title_regex.findall(wine)

            if title:
                title = title[0]
            else:
                print("Couldn't determine title!")
                continue

            if 'Riesling' in title:
                variety = 'Riesling'
            elif 'Chardonnay' in title:
                variety = 'Chardonnay'
            elif 'Sauvignon Blanc' in title:
                variety = 'Sauvignon Blanc'
            elif 'Syrah':
                variety = 'Syrah'
            elif 'Shiraz':
                variety = 'Shiraz'
            elif 'Cabernet Sauvignon':
                variety = 'Cabernet Sauvignon'
            elif 'Merlot':
                variety = 'Merlot'
            elif 'Pinot Noir':
                variety = 'Pinot Noir'

            for key, value in info:
                value_soup = BeautifulSoup(value, 'html.parser')
                value = value_soup.getText()
                key_soup = BeautifulSoup(key, 'html.parser')
                key = key_soup.getText()
                if ("Eyes" in key) or ("eyes" in key):
                    eyes = value
                elif ("Nose" in key) or ("nose" in key):
                    nose = value
                elif ("Mouth" in key) or ("mouth" in key):
                    mouth = value
                elif ("All in all" in key) or ("all in all" in key):
                    overall = value
                elif ("Producer" in key) or ("producer" in key):
                    producer = value
                elif ("Price" in key) or ("price" in key):
                    price = value.replace("$", "")
                elif ("Sub-Region" in key) or ("sub-region" in key):
                    sub_region = value
                elif ("Region" in key) or ("region" in key):
                    region = value
                elif ("Variety" in key) or ("variety" in key):
                    variety = value
                elif ("Vintage" in key) or ("vintage" in key):
                    vintage = value
                elif ("ABV" in key) or ("abv" in key):
                    abv = value

                # Search for previous history
            if previous_history:
                previous_wine = Wine.wines.filter(name=title,
                                                  color=color,
                                                  harvested_from=h)
                if previous_wine:
                    p_wine = Wine.wines.get(name=title,
                                            color=color,
                                            harvested_from=h)
                    if p_wine.was_modified or p_wine.deleted:
                        print(
                            "Wine has been modified or deleted, not updating!")
                        return False
                    else:
                        print("Removing wine and updating")
                        p_wine.harvest_delete()

            w = Wine(name=title,
                     color=color,
                     eyes=eyes,
                     nose=nose,
                     mouth=mouth,
                     overall=overall,
                     producer=producer,
                     price=price,
                     region=region,
                     sub_region=sub_region,
                     variety=variety,
                     vintage=vintage,
                     abv=abv,
                     description=description,
                     tags='',
                     harvest_data=str(article),
                     harvested_from=h,
                     harvested_date=datetime.datetime.now(
                         datetime.timezone.utc))
            w.harvest_save()
Example #3
0
    def create_multi_wines(wines, url, article, tags):
        # Regex
        title_regex = re.compile(r"""<strong>(.*)""", re.M | re.S | re.I)
        alt_title_regex = re.compile(r"""<span .*>(.*?)</span>""")
        # Create history
        previous_history = History.objects.filter(url=url,
                                                  wine_count=len(wines))
        if previous_history:
            h = History.objects.get(url=url, wine_count=len(wines))
            h.date = datetime.datetime.now(datetime.timezone.utc)
            h.save()
        else:
            h = History(url=url,
                        wine_count=len(wines),
                        date=datetime.datetime.now(datetime.timezone.utc))
            h.save()

        print("Length of wines:" + str(len(wines)))

        for wine in wines:  # ['title', 'subregion/region price', 'description']

            title = title_regex.findall(wine[0])
            if title:
                title = title[0]
            else:
                title = alt_title_regex.findall(wine[0])
                if not title:
                    # If there is no title there is no wine so break this loop
                    print("No wine??")
                    h.wine_count -= 1
                    h.save()
                    continue

            title = html.unescape(strip_tags(title))

            color, eyes, nose, mouth, overall, producer = ('N/A', ) * 6
            price = 0
            region, sub_region, variety, vintage, description, abv = ('', ) * 6

            if 'Riesling' in title:
                variety = 'Riesling'
            elif 'Chardonnay' in title:
                variety = 'Chardonnay'
            elif 'Sauvignon Blanc' in title:
                variety = 'Sauvignon Blanc'
            elif 'Syrah' in title:
                variety = 'Syrah'
            elif 'Shiraz' in title:
                variety = 'Shiraz'
            elif 'Cabernet Sauvignon' in title:
                variety = 'Cabernet Sauvignon'
            elif 'Merlot' in title:
                variety = 'Merlot'
            elif 'Pinot Noir' in title:
                variety = 'Pinot Noir'
            else:
                variety = ''

            if '#ff99cc' in wine[0]:
                color = 'Rosé'
            if '#339966' in wine[0]:
                color = 'White'
            if '#800000' in wine[0]:
                color = 'Red'

            region_price_regex = re.compile(r"""\((.*?)\) \$(.+)|\((.*?)\)""",
                                            re.M | re.S | re.I)
            region_price = region_price_regex.findall(wine[1])
            total_region = []
            if region_price:
                region_price = region_price[0]

                if region_price[0]:
                    total_region = region_price[0]

                if region_price[1]:
                    price = region_price[1]

                if region_price[2]:
                    total_region = region_price[2]

                if total_region:
                    region_list = total_region.split(',')
                    # print(len(region_list))
                    region_count = len(region_list)
                    if region_count is 1:
                        region = region_list[0].strip()
                    elif region_count is 2:
                        region = region_list[1].strip()
                        sub_region = region_list[0].strip()
                    elif region_count > 2:
                        # print(region_list)
                        region_number = region_count - 1
                        sub_region_number = region_count - 2
                        alt_sub_region_number = region_count - 3

                        region = region_list[region_number].strip()

                        sub_region = region_list[alt_sub_region_number].strip(
                        ) + ", " + region_list[sub_region_number].strip()

            description = str(wine[2].strip())

            # Search for previous history
            if previous_history:
                previous_wine = Wine.wines.filter(name=title,
                                                  color=color,
                                                  harvested_from=h)
                if previous_wine:
                    p_wine = Wine.wines.get(name=title,
                                            color=color,
                                            harvested_from=h)
                    if p_wine.was_modified or p_wine.deleted:
                        print(
                            "Wine has been modified or deleted, not updating!")
                        return False
                    else:
                        print("Removing wine and updating")
                        p_wine.harvest_delete()

            w = Wine(name=title,
                     color=color,
                     eyes=eyes,
                     nose=nose,
                     mouth=mouth,
                     overall=overall,
                     producer=producer,
                     price=price,
                     region=region,
                     sub_region=sub_region,
                     variety=variety,
                     vintage=vintage,
                     abv=abv,
                     description=str(description),
                     tags=str(tags),
                     harvest_data=str(article),
                     harvested_from=h,
                     harvested_date=datetime.datetime.now(
                         datetime.timezone.utc))
            w.harvest_save()
        return True
Example #4
0
    def create_single_wine(unparsed_wine, title, url, article, tags):
        color, eyes, nose, mouth, overall, producer = ('N/A',) * 6
        price = 0
        region, sub_region, variety, vintage, description, abv = ('',) * 6

        # Description
        single_wine_descrip = re.compile(r"""<p><strong>|<p.*?>(.*?)<\/p>""", re.M | re.S)
        descrip_results = single_wine_descrip.findall(str(article))
        description_soup = BeautifulSoup(' '.join(descrip_results), 'html.parser')
        description = description_soup.getText()

        title = html.unescape(title)

        # Try figure out the colour
        if ('white' in tags) or ('White' in tags):
            color = 'White'
        if ('red' in tags) or ('Red' in tags):
            color = 'Red'

        # Process single-wine results
        if unparsed_wine:
            for key, value in unparsed_wine:
                value_soup = BeautifulSoup(value, 'html.parser')
                value = value_soup.getText()
                key_soup = BeautifulSoup(key, 'html.parser')
                key = key_soup.getText()

                if ("Eyes" in key) or ("eyes" in key):
                    eyes = value
                elif ("Nose" in key) or ("nose" in key):
                    nose = value
                elif ("Mouth" in key) or ("mouth" in key):
                    mouth = value
                elif ("All in all" in key) or ("all in all" in key):
                    overall = value
                elif ("Producer" in key) or ("producer" in key):
                    producer = value
                elif ("Price" in key) or ("price" in key):
                    price = value.replace("$", "")
                elif ("Sub-Region" in key) or ("sub-region" in key):
                    sub_region = value
                elif ("Region" in key) or ("region" in key):
                    region = value
                elif ("Variety" in key) or ("variety" in key):
                    variety = str(value)
                elif ("Vintage" in key) or ("vintage" in key):
                    vintage = value
                elif ("ABV" in key) or ("abv" in key):
                    abv = value

            if not variety:
                if 'Riesling' in title:
                    variety = 'Riesling'
                elif 'Chardonnay' in title:
                    variety = 'Chardonnay'
                elif 'Sauvignon Blanc' in title:
                    variety = 'Sauvignon Blanc'
                elif 'Syrah' in title:
                    variety = 'Syrah'
                elif 'Shiraz' in title:
                    variety = 'Shiraz'
                elif 'Cabernet Sauvignon' in title:
                    variety = 'Cabernet Sauvignon'
                elif 'Merlot' in title:
                    variety = 'Merlot'
                elif 'Pinot Noir' in title:
                    variety = 'Pinot Noir'
                else:
                    variety = 'N/A'

            # Search for previous history
            previous_history = History.objects.filter(url=url)
            if previous_history:
                h = History.objects.get(url=url)
                h.date = datetime.datetime.now(datetime.timezone.utc)
                h.save()
                previous_wine = Wine.wines.filter(name=title, color=color, harvested_from=h)
                if previous_wine:
                    p_wine = Wine.wines.get(name=title, color=color, harvested_from=h)
                    if p_wine.was_modified or p_wine.deleted:
                        print("Wine has been modified or deleted, not updating!")
                        return False
                    else:
                        print("Removing wine")
                        p_wine.harvest_delete()
            else:
                # Create History
                h = History(url=url, wine_count=1, date=datetime.datetime.now(datetime.timezone.utc))
                h.save()

            # Save the wine to the database
            w = Wine(name=title, color=color, eyes=eyes, nose=nose, mouth=mouth, overall=overall,
                     producer=producer, price=price, region=region, sub_region=sub_region, variety=variety,
                     vintage=vintage, abv=abv, description=description, tags=tags,
                     harvest_data=str(article),
                     harvested_from=h,
                     harvested_date=datetime.datetime.now(datetime.timezone.utc))
            w.harvest_save()
        return True
Example #5
0
    def create_multi_wines_exception_a(url, article):
        wines_regex = re.compile(r"""<span style="text-decoration: underline;">(.*?)(?=<span)""",
                                 re.M | re.S | re.I)
        wines = wines_regex.findall(str(article))

        title_regex = re.compile(r"""<strong>(.*?) –""")
        wine_info_regex = re.compile(r"""<strong>(.*?)<\/strong>(.*?)(?=<)""", re.I | re.M | re.S)

        # Create history
        previous_history = History.objects.filter(url=url, wine_count=len(wines))
        if previous_history:
            h = History.objects.get(url=url, wine_count=len(wines))
            h.date = datetime.datetime.now(datetime.timezone.utc)
            h.save()
        else:
            h = History(url=url, wine_count=len(wines), date=datetime.datetime.now(datetime.timezone.utc))
            h.save()

        for wine in wines:
            info = wine_info_regex.findall(wine)

            color, eyes, nose, mouth, overall, producer = ('N/A',) * 6
            price = 0
            region, sub_region, variety, vintage, description, abv = ('',) * 6

            title = title_regex.findall(wine)

            if title:
                title = title[0]
            else:
                print("Couldn't determine title!")
                continue

            if 'Riesling' in title:
                variety = 'Riesling'
            elif 'Chardonnay' in title:
                variety = 'Chardonnay'
            elif 'Sauvignon Blanc' in title:
                variety = 'Sauvignon Blanc'
            elif 'Syrah':
                variety = 'Syrah'
            elif 'Shiraz':
                variety = 'Shiraz'
            elif 'Cabernet Sauvignon':
                variety = 'Cabernet Sauvignon'
            elif 'Merlot':
                variety = 'Merlot'
            elif 'Pinot Noir':
                variety = 'Pinot Noir'

            for key, value in info:
                value_soup = BeautifulSoup(value, 'html.parser')
                value = value_soup.getText()
                key_soup = BeautifulSoup(key, 'html.parser')
                key = key_soup.getText()
                if ("Eyes" in key) or ("eyes" in key):
                    eyes = value
                elif ("Nose" in key) or ("nose" in key):
                    nose = value
                elif ("Mouth" in key) or ("mouth" in key):
                    mouth = value
                elif ("All in all" in key) or ("all in all" in key):
                    overall = value
                elif ("Producer" in key) or ("producer" in key):
                    producer = value
                elif ("Price" in key) or ("price" in key):
                    price = value.replace("$", "")
                elif ("Sub-Region" in key) or ("sub-region" in key):
                    sub_region = value
                elif ("Region" in key) or ("region" in key):
                    region = value
                elif ("Variety" in key) or ("variety" in key):
                    variety = value
                elif ("Vintage" in key) or ("vintage" in key):
                    vintage = value
                elif ("ABV" in key) or ("abv" in key):
                    abv = value

                # Search for previous history
            if previous_history:
                previous_wine = Wine.wines.filter(name=title, color=color, harvested_from=h)
                if previous_wine:
                    p_wine = Wine.wines.get(name=title, color=color, harvested_from=h)
                    if p_wine.was_modified or p_wine.deleted:
                        print("Wine has been modified or deleted, not updating!")
                        return False
                    else:
                        print("Removing wine and updating")
                        p_wine.harvest_delete()

            w = Wine(name=title, color=color, eyes=eyes, nose=nose, mouth=mouth, overall=overall,
                     producer=producer, price=price, region=region, sub_region=sub_region,
                     variety=variety,
                     vintage=vintage, abv=abv, description=description, tags='',
                     harvest_data=str(article),
                     harvested_from=h,
                     harvested_date=datetime.datetime.now(datetime.timezone.utc))
            w.harvest_save()
Example #6
0
    def create_multi_wines(wines, url, article, tags):
        # Regex
        title_regex = re.compile(r"""<strong>(.*)""", re.M | re.S | re.I)
        alt_title_regex = re.compile(r"""<span .*>(.*?)</span>""")
        # Create history
        previous_history = History.objects.filter(url=url, wine_count=len(wines))
        if previous_history:
            h = History.objects.get(url=url, wine_count=len(wines))
            h.date = datetime.datetime.now(datetime.timezone.utc)
            h.save()
        else:
            h = History(url=url, wine_count=len(wines), date=datetime.datetime.now(datetime.timezone.utc))
            h.save()

        print("Length of wines:" + str(len(wines)))

        for wine in wines:  # ['title', 'subregion/region price', 'description']

            title = title_regex.findall(wine[0])
            if title:
                title = title[0]
            else:
                title = alt_title_regex.findall(wine[0])
                if not title:
                    # If there is no title there is no wine so break this loop
                    print("No wine??")
                    h.wine_count -= 1
                    h.save()
                    continue

            title = html.unescape(strip_tags(title))

            color, eyes, nose, mouth, overall, producer = ('N/A',) * 6
            price = 0
            region, sub_region, variety, vintage, description, abv = ('',) * 6

            if 'Riesling' in title:
                variety = 'Riesling'
            elif 'Chardonnay' in title:
                variety = 'Chardonnay'
            elif 'Sauvignon Blanc' in title:
                variety = 'Sauvignon Blanc'
            elif 'Syrah' in title:
                variety = 'Syrah'
            elif 'Shiraz' in title:
                variety = 'Shiraz'
            elif 'Cabernet Sauvignon' in title:
                variety = 'Cabernet Sauvignon'
            elif 'Merlot' in title:
                variety = 'Merlot'
            elif 'Pinot Noir' in title:
                variety = 'Pinot Noir'
            else:
                variety = ''

            if '#ff99cc' in wine[0]:
                color = 'Rosé'
            if '#339966' in wine[0]:
                color = 'White'
            if '#800000' in wine[0]:
                color = 'Red'

            region_price_regex = re.compile(r"""\((.*?)\) \$(.+)|\((.*?)\)""", re.M | re.S | re.I)
            region_price = region_price_regex.findall(wine[1])
            total_region = []
            if region_price:
                region_price = region_price[0]

                if region_price[0]:
                    total_region = region_price[0]

                if region_price[1]:
                    price = region_price[1]

                if region_price[2]:
                    total_region = region_price[2]

                if total_region:
                    region_list = total_region.split(',')
                    # print(len(region_list))
                    region_count = len(region_list)
                    if region_count is 1:
                        region = region_list[0].strip()
                    elif region_count is 2:
                        region = region_list[1].strip()
                        sub_region = region_list[0].strip()
                    elif region_count > 2:
                        # print(region_list)
                        region_number = region_count - 1
                        sub_region_number = region_count - 2
                        alt_sub_region_number = region_count - 3

                        region = region_list[region_number].strip()

                        sub_region = region_list[alt_sub_region_number].strip() + ", " + region_list[
                            sub_region_number].strip()

            description = str(wine[2].strip())

            # Search for previous history
            if previous_history:
                previous_wine = Wine.wines.filter(name=title, color=color, harvested_from=h)
                if previous_wine:
                    p_wine = Wine.wines.get(name=title, color=color, harvested_from=h)
                    if p_wine.was_modified or p_wine.deleted:
                        print("Wine has been modified or deleted, not updating!")
                        return False
                    else:
                        print("Removing wine and updating")
                        p_wine.harvest_delete()

            w = Wine(name=title, color=color, eyes=eyes, nose=nose, mouth=mouth, overall=overall,
                     producer=producer, price=price, region=region, sub_region=sub_region,
                     variety=variety,
                     vintage=vintage, abv=abv, description=str(description), tags=str(tags),
                     harvest_data=str(article),
                     harvested_from=h,
                     harvested_date=datetime.datetime.now(datetime.timezone.utc))
            w.harvest_save()
        return True