예제 #1
0
    def get_artist_data(self, soup, url):
        # Called by self.get_artwork_listings_slave()
        # Pick name, born, country, about

        artist_resume = soup.find('div', class_='artist-resume').find(
            'div', class_='artist-resume_text')
        name = artist_resume.h1.text.strip()
        print(name)
        # If an error occurs here, its because the page layout has changed and thus the code needs to be fixed

        if name is not None:
            try:
                country = artist_resume.find(
                    'p', class_='location').text.strip().split('\n')
                country = country[0].split(',')
                country = country[-1].strip()
                print(country)
            except AttributeError:
                country = None

            about = soup.find('div', id='about').text.strip()
            # About will either be found and be some text or be None.
            # print(about)

            artist_data_pack = [name, None, country, about]
            # pack = [name, born, country, about]
            # self.write_artist_data(*artist_data_pack)
            KEY_INFO[url] = db.Artist.key_maker(artist_data_pack)
            TheAuthour.write_artist(*artist_data_pack)
예제 #2
0
    def get_artist_data(self, soup, url):
        # Called by self.get_artwork_listings_slave()
        # Pick name, born, country, about

        # Name : Pick artist's name here
        print(name)
        # If an error occurs here, its because the page layout has changed and thus the code needs to be fixed

        if name is not None:
            try:
                # Pick artist's country here.
                print(country)
            except AttributeError:
                country = None

            try:
                # Pick birth year here here.
                print(born)
            except AttributeError:
                born = None

            try:
                # Pick artist's description here.
                print(about)
            except AttributeError:
                about = None

            artist_data_pack = [name, born, country, about]
            # pack = [name, born, country, about]
            # Updating KEY_INFO dictionary.
            KEY_INFO[url] = db.Artist.key_maker(artist_data_pack)
            # Updating the dB with artist listings.
            TheAuthour.write_artist(*artist_data_pack)
예제 #3
0
    def key_maker(artist_url):
        options = Options()
        options.headless = True
        driver = webdriver.Firefox(options=options)
        visited.discard(artist_url)
        soup = BeautifulSoup(driver.page_source, artist_url)
        if soup is not None:

            n_c = soup.find_all('h2', class_='font_2')
            # Artist's name
            try:
                name = n_c[0].text.strip()
            except IndexError:
                print(n_c)
                name = None
            # print(name)
            # If an error occurs here, its because the page layout has changed and thus the code needs to be fixed

            if name is not None:
                # Country
                try:
                    country = n_c[1].text.strip()
                except AttributeError:
                    country = None

                # About
                try:
                    text = soup.find_all('p', class_='font_8')
                    about = ""
                    for t in text:
                        about += t.text.strip()
                        about += " "
                    # print(about)
                except AttributeError:
                    about = None
                except TypeError:
                    about = None
                # About will either be found and be some text or be None.
                # print(about)

                artist_data_pack = [name, None, country, about]
                # artist_data_pack = [name, born, country, about]
                # pack = [name, born, country, about]
                # Updating KEY_INFO dictionary.
                KEY_INFO[artist_url] = db.Artist.key_maker(artist_data_pack)
                key = KEY_INFO.get(artist_url)
                # Updating the dB with artist listings.
                TheAuthour.write_artist(*artist_data_pack)

                # key = db.Artist.key_maker(artist_data_pack)
                # pack = [name, born, country, about]
                driver.quit()
                return key
            else:
                driver.quit()
                return None

        else:
            return None
예제 #4
0
    def get_artist_data(self, soup, url):
        # Called by self.get_artwork_listings_slave()
        # Pick name, born, country, about

        # PICKING ARTIST DATA
        A = soup.find('div', id='biography')
        # Artist's name
        name = A.h1.text.strip()
        # print(name)
        # Code should break if the name goes missing

        try:
            # Born
            A = soup.find('div', id='biography')
            B = A.find('div', class_='sub-title col-sm-9 col-xs-12')
            bo = B.find('span', class_='birthday-date').text
            born = ""
            for b in bo:
                if b.isdigit():
                    born += b

            born = int(born)
            # print(born)
        except AttributeError:
            born = None

        try:
            # Country
            A = soup.find('div', id='biography')
            B = A.find('div', class_='sub-title col-sm-9 col-xs-12')
            country = B.span.text.strip()
            # print(country)
        except AttributeError:
            country = None

        try:
            # About
            A = soup.find('div', id='biography')
            about = A.find('div',
                           class_='col-sm-9 col-xs-12 biography').text.strip()
            ab = about.split("  ")
            about = ''
            for a in range(len(ab) - 1):
                b = ab[a]
                about = about + "\n" + b.strip()
            about = about.strip()
            # print(about)
        except AttributeError:
            about = None

        artist_data_pack = [name, born, country, about]
        KEY_INFO[url] = db.Artist.key_maker(artist_data_pack)
        TheAuthour.write_artist(*artist_data_pack)
예제 #5
0
    def get_artist_data(self, soup, url):
        # name, born, country, about
        # pack = [name, born, country, about]
        # no need to run the safety try: except: here because we're not fetching the page here.
        try:
            name = soup.find('div', class_='artist-intro').find('h1').text
            name = str(name).strip()
        except AttributeError:
            name = None

        if name is not None:
            try:
                born = soup.find('p', class_='born').text.strip()
                t = ""
                for b in born:
                    if str(b).isdigit():
                        t += b
                born = int(t)

                if born > 3000:
                    born = str(born)[0:3]

            except AttributeError:
                born = None
            except ValueError:
                born = None

            # Country
            try:
                country = soup.find('div', class_="artist-intro")
                country = country.find('div', class_='h2').text.strip().split("|")
                country = str(country[-1]).strip()
            except AttributeError:
                country = None

            # About
            try:
                about = soup.find('section', class_='artist-bio')
                about = about.find('div', class_='resume').text.strip()
            except AttributeError:
                about = None

            # pack = [name, born, country, about]
            # print(pack)

            artist_data_pack = [name, born, country, about]
            # pack = [name, born, country, about]
            # Updating KEY_INFO dictionary.
            KEY_INFO[url] = db.Artist.key_maker(artist_data_pack)
            # Updating the dB with artist listings.
            TheAuthour.write_artist(*artist_data_pack)
예제 #6
0
    def get_artist_data(self, soup, url):
        # Called by self.get_artwork_listings_slave()
        # Pick name, born, country, about
        # dom = etree.HTML(str(soup))

        # Name : Pick artist's name here
        A = soup.find_all(
            'div',
            class_=re.compile(
                r'Box-sc-15se88d-0 GridColumns__Cell-sc-1g9p6xx-1\.*'))
        name = soup.find('h1').text.strip()
        # print(name)
        # If an error occurs here, its because the page layout has changed and thus the code needs to be fixed

        if name is not None:
            try:
                # Pick artist's country here.
                B = A[1].find('h2').text.strip().split(",")
                country = B[0].strip()
                if country == "American":
                    country = "USA"
                elif country == "Japanese":
                    country = "Japan"
                elif "French" in country:
                    country = "France"
                elif "Argentine" in country:
                    country = "Argentina"
                elif "Dutch" in country:
                    country = "Netherlands"
                elif "Indian" in country:
                    country = "India"
                elif "Pakistani" in country:
                    country = "Pakistan"
                elif "Italian" in country:
                    country = "Italy"
                elif "English" in country:
                    country = "UK"
                elif "Chinese" in country:
                    country = "China"
                elif "Hispanic" in country:
                    country = "Spain"
                elif "German" in country:
                    country = "Germany"
                elif "Spanish" in country:
                    country = "Spain"
                elif "Russian" in country:
                    country = "Russia"
                elif "British" in country:
                    country = "UK"
                elif "Mexican" in country:
                    country = "Mexico"
                elif "Brazilian" in country:
                    country = "Brazil"
                elif "Canadian" in country:
                    country = "Canada"
                elif "Belgian" in country:
                    country = "Belgium"
                elif "Israeli" in country:
                    country = "Israel"
                elif "Venezuelan" in country:
                    country = "Venezuela"
                elif "Polish" in country:
                    country = "Poland"
                else:
                    for i in country:
                        if str(i).isnumeric():
                            country = None
                # print(country)

                try:
                    born = str(B[-1]).strip().split("–")
                    born = born[0]
                    t = ""
                    for b in born:
                        if b.isnumeric():
                            t += b
                    born = int(t)
                except ValueError:
                    born = None
                # print(born)
            except AttributeError:
                born = None
                country = None

            try:
                about = None
                # Pick artist's description here.
                about_block = soup.find_all(
                    'div',
                    class_=re.compile(
                        r'Box-sc-15se88d-0 Text-sc-18gcpao-0\.*'))
                for a in about_block:
                    if a.text.strip() == 'Bio':
                        # print("A")
                        about = a.nextSibling.text.strip()
                        break
                # print(about)
            except AttributeError:
                about = None

            artist_data_pack = [name, born, country, about]
            # pack = [name, born, country, about]
            # Updating KEY_INFO dictionary.
            KEY_INFO[url] = db.Artist.key_maker(artist_data_pack)
            # Updating the dB with artist listings.
            TheAuthour.write_artist(*artist_data_pack)
예제 #7
0
    def get_artwork_data_slave(self, url, driver):

        driver.get(url)
        soup = BeautifulSoup(driver.page_source, url)
        if soup is not None:

            # Field initiation ::

            artwork = None
            price = None
            type_ = None
            dimensions = None
            frame = None
            authenticity = None
            about = None
            artist_id = None
            image_loc = None
            year = None
            support = None
            signature = None
            # Material to be added to technique
            technique = ""

            seller_id = None
            artist = None
            medium = None

            # Medium must always have "Painting" or "Sculpture" (RULE :: 2)
            # if "/painting/" in str(url):
            #     medium = "Painting"  # (painting or sculpture)
            # elif "/sculpture/" in str(url):
            #     medium = "Sculpture"
            # else:
            #     # So that url leaks don't break the code.
            #     medium = None

            # Seller_id
            try:
                seller_url = soup.find('div', class_='WncCi').find('a')['href']
                seller_id = self.get_seller_id(seller_url)
            except AttributeError or TypeError:
                # Seller doesn't have a page.
                try:
                    seller_url = soup.find('div', class_='WncCi').text.strip()
                    if seller_url in SELLER_INFO.keys():
                        seller_id = SELLER_INFO.get(seller_url)
                    else:
                        # Make a Kazoart style bundle, and write it to obtain a seller_id.
                        # [seller_url, platform_id(from name), Seller's name, Location, website]
                        bundle = [
                            seller_url, self.website.platform,
                            'EMERGINGARTISTPLATFOM', None, None
                        ]
                        # Writing to db.
                        TheAuthour.write_seller(*bundle)
                        # This should generate the seller_id we so desperately desire.
                        # time.sleep(1)
                        seller_id = SELLER_INFO.get(seller_url)
                except AttributeError:
                    pass

            # We'll let the seller name be seller_url if the url is not found.

            # Artist_id
            try:
                artist_url = soup.find('div', class_='WncCi').a.get('href')
                if str(artist_url).endswith(".com"):
                    artist_url = re.sub('.com', "", artist_url)
                    artist_url = re.sub('emergingartistplatform',
                                        'emergingartistplatform.com',
                                        artist_url)
                artist_id = self.get_artist_id(artist_url)

            except AttributeError:
                try:
                    artist_url = soup.find('div', class_='WncCi').text.strip()
                    country = None
                    a = soup.find_all('pre')
                    for b in a:
                        if b.get('data-hook') == 'description':
                            p = b.find_all('p')
                            for j in p:
                                if 'Country' in j.text or 'country' in j.text or 'COUNTRY' in j.text:
                                    title = j.text.split(":")
                                    country = title[-1].strip()

                    artist_data_pack = [artist_url, None, country, None]
                    # artist_data_pack = [name, born, country, about]
                    # pack = [name, born, country, about]
                    # Updating KEY_INFO dictionary.
                    KEY_INFO[artist_url] = db.Artist.key_maker(
                        artist_data_pack)
                    key = KEY_INFO.get(artist_url)
                    # Updating the dB with artist listings.
                    TheAuthour.write_artist(*artist_data_pack)
                    artist_id = ARTIST_INFO[key]
                except AttributeError:
                    artist_id = None

            # Continue fetching data only if seller_id, artist_id and medium are found. (RULE :: 3, 4)
            if seller_id is not None and artist_id is not None:
                try:
                    a = soup.find_all('span')
                    t = ""
                    for b in a:
                        if b.get('data-hook') == "formatted-primary-price":
                            # print(b.text)
                            for p in b.text:
                                if str(p).isnumeric() or str(p) == ".":
                                    t += p
                    price = float(t) * rate
                    # print(price)
                    # Price
                    # print(price)
                except AttributeError:
                    price = None
                except ValueError:
                    price = None

                # RULE : 5
                if price is not None:

                    # Find artist, artwork, year, type_(N/A), dimensions, support, frame, signature, authenticity,
                    # about, image_loc(actual url of the image), and technique

                    # Wish the code to break if either Artist's name or Artwork's name are not found.
                    # Artist
                    artist = soup.find('div', class_='WncCi').text.strip()
                    # print(artist)

                    # Artwork
                    a = soup.find_all('pre')
                    for b in a:
                        if b.get('data-hook') == 'description':
                            p = b.find_all('p')
                            for j in p:
                                if 'Title' in j.text or 'title' in j.text or 'TITLE' in j.text:
                                    title = j.text.split(":")
                                    artwork = title[-1].strip()
                                    if len(artwork) >= 255:
                                        artwork = artwork[0:255]
                                    # print(artwork)

                                if 'Date' in j.text:
                                    date = j.text.split(":")
                                    year = date[-1].strip()
                                    # print(year)

                                if 'Size' in j.text:
                                    dimensions = j.text.split(":")
                                    dimensions = dimensions[-1].strip()
                                    # print(dimensions)

                                if 'Medium' in j.text:
                                    technique = j.text.split(":")
                                    technique = technique[-1].strip()
                                    # print(technique)

                                if len(j.text.split(
                                        ":")) == 1 and about is None:
                                    about = j.text[-1].strip()

                    # Medium (RULE : 3)
                    if "Sculptures" in self.website.start_url:
                        medium = "Sculpture"
                    else:
                        medium = "Painting"

                    # image_loc
                    image = soup.find('div',
                                      class_='main-media-image-wrapper-hook')
                    image = image.find('div', id='get-image-item-id')
                    image_loc = image.get('href')

                    # print(image_loc)

                    artwork_bundle = {
                        "artwork_title": artwork,
                        "artist_name": artist,
                        "year": year,
                        "price": price,
                        "Medium": medium,
                        "Type": type_,
                        "Dimensions": dimensions,
                        "Support": support,
                        "Frame": frame,
                        "Signature": signature,
                        "Authenticity": authenticity,
                        "About": about,
                        "platform": self.website.platform,
                        "image_addr": image_loc,
                        "seller_id": seller_id,
                        "artist_id": artist_id,
                        "url": url,
                        "technique": technique
                    }

                    TheAuthour.write_artwork_price_image(**artwork_bundle)
                else:
                    print(f"Skipping {url}\n PRICE : {price}")
            else:
                print(
                    f"Skipping : {url}\nSeller_id = {seller_id}, Artist_id = {artist_id}, medium = {medium}"
                )
        else:
            print(f"Soup not returned for {url}")