예제 #1
0
    def write_seller_data(self, *args):
        bundle = [*args]
        # bundle = [url, Seller_name, Location =None, Website = url]

        # Just like the artist class in dataStructures, sellers also seems redundant as of now.
        writer = db.Sellers()
        writer.insert_data_sellers(*bundle)
예제 #2
0
def main():

    # Creating SELLER_INFO === To be used with artwork entry
    sellers = db.Sellers()
    sellers.read_data_sellers()
    # sellers.__del__()
    # Trying to close the connection here throws error. Maybe putting it in a function works.

    # Creating ARTIST_INFO === To be used with artwork entry
    artists = db.Artist()
    artists.read_artist_data()
    # artists.__del__()

    artsperpainters = Website(
        'https://www.artsper.com',
        'https://www.artsper.com/us/contemporary-artists/youngtalents/sculptors-artists',
        "ARTSPER")
    artsper = Artsper(artsperpainters)
    artsper.miner()

    artsperpainters = Website(
        'https://www.artsper.com',
        'https://www.artsper.com/us/contemporary-artists/youngtalents/painters',
        "ARTSPER")

    artsper = Artsper(artsperpainters)
    artsper.miner()
예제 #3
0
 def write_seller(*args):
     # args = [url, platform, seller_name, location = None, Website ]
     # Running data through SellerData class of dataStructures.
     seller = SellerDS(*args)
     bundle = seller.seller_bundle()
     # bundle = [url, Seller_name, Location =None, Website = url]
     writer = db.Sellers()
     writer.insert_data_sellers(*bundle)
예제 #4
0
def main():
    # Creating SELLER_INFO === To be used with artwork entry
    sellers = db.Sellers()
    sellers.read_data_sellers()

    # Creating ARTIST_INFO === To be used with artwork entry
    artists = db.Artist()
    artists.read_artist_data()

    webagent = Website(
        'https://www.artsy.net',
        'https://www.artsy.net/collection/new-and-noteworthy-artists?additional_gene_ids%5B0%5D=painting&additional_gene_ids%5B1%5D=sculpture',
        'ARTSY')
    artsy = Artsy(webagent)
    artsy.miner()

    time.sleep(10)
예제 #5
0
def main():
    # Creating SELLER_INFO === To be used with artwork entry
    sellers = db.Sellers()
    sellers.read_data_sellers()

    # Creating ARTIST_INFO === To be used with artwork entry
    artists = db.Artist()
    artists.read_artist_data()

    webagent = Website('https://www.singulart.com/en',
                               'https://www.singulart.com/en/painting',
                               "SINGULART")
    singulart = Singulart(webagent)
    singulart.miner()

    time.sleep(10)

    print("FINISHED")
예제 #6
0
def main():
    sellers = db.Sellers()
    sellers.read_data_sellers()
    artists = db.Artist()
    artists.read_artist_data()

    agent = Website(
        'https://www.emergingartistplatform.com',
        'https://www.emergingartistplatform.com/browse?Collection=Paintings&page=',
        "EMERGINGARTISTPLATFOM")
    eap = EAP(agent)
    eap.miner()

    agent = Website(
        'https://www.emergingartistplatform.com',
        'https://www.emergingartistplatform.com/browse?Collection=Sculptures&page=',
        "EMERGINGARTISTPLATFOM")
    eap = EAP(agent)
    eap.miner()
예제 #7
0
def main():
    start = time.perf_counter()

    # Creating SELLER_INFO
    sellers = db.Sellers()
    sellers.read_data_sellers()

    # Creating ARTIST_INFO
    artists = db.Artist()
    artists.read_artist_data()

    artsperpainters = Website(
        'https://www.artsper.com',
        'https://www.artsper.com/us/contemporary-artists/youngtalents/painters?',
        "ARTSPER")

    a_m = Artsper(artsperpainters)
    a_m.artsper_mine()

    finish = time.perf_counter()
    print(
        f"Lap Completed in {round(finish - start, 2)}, seconds.\n Starting sculptures"
    )

    artspersculptors = Website(
        'https://www.artsper.com',
        'https://www.artsper.com/us/contemporary-artists/youngtalents/sculptors-artists',
        "ARTSPER")

    a_m = Artsper(artspersculptors)
    a_m.artsper_mine()

    finish = time.perf_counter()

    print(
        f"Lap Completed in {round(finish - start, 2)}, seconds.\n Downloading and updating images"
    )

    TheMiner.sir_image_manager()

    finish = time.perf_counter()
    print(f"Finished in {round(finish - start, 2)}, seconds")
예제 #8
0
def main():
    # Creating SELLER_INFO === To be used with artwork entry
    sellers = db.Sellers()
    sellers.read_data_sellers()

    # Creating ARTIST_INFO === To be used with artwork entry
    artists = db.Artist()
    artists.read_artist_data()

    kazoart_webagent = Website('https://www.kazoart.com',
                               'https://www.kazoart.com/en/artistes/technique/sculpture?eme=1',
                               "KAZOART")
    kazoart = Kazoart(kazoart_webagent)
    kazoart.miner()

    time.sleep(10)

    kazoart_webagent = Website('https://www.kazoart.com',
                               'https://www.kazoart.com/en/artistes/technique/peintures?eme=1',
                               "KAZOART")
    kazoart = Kazoart(kazoart_webagent)
    kazoart.miner()
예제 #9
0
    def get_art_data_core(self, url):
        platform = self.website.platform
        artist_name = None
        artwork_title = None
        year = None
        price = None
        Dimensions = None
        Medium = None
        Type = None
        Support = None
        Frame = None
        Signature = None
        Authenticity = None
        About = None
        image_addr = None
        seller_id = None

        soup = TheMiner.fetch_page(url)
        if soup is not None:
            # Data to be picked here.
            # Artist's name, artwork's name, year, Artwork description, Price, Dimensions, Medium(Sculpture/Painting)
            # Type (Copies or Unique), Frame, Support, Authenticity, Website, Image (12)

            seller_id_trigger, seller_bundle = self.seller_info(soup)
            # Seller_id_trigger could be 0, 1 or a real id.(real id comes with bundle =None)
            # seller_id_trigger 0 comes with some data in bundle
            # seller_id 1_trigger comes with no data in the bundle
            if seller_bundle is None:
                seller_id = seller_id_trigger

            # THIS FOLLOW BLOCK OF CODE NEEDS TO BE CONSISTENT ACROSS ALL THE WEBSITE MODULES.
            # Get seller bundle
            elif seller_id_trigger == 0:
                seller_ds = SellerData(*seller_bundle)
                s_bundle = seller_ds.seller_bundle()
                # Write data to table "sellers"
                s_agent = db.Sellers()
                s_agent.create_table_sellers()
                seller_id = s_agent.insert_data_sellers(*s_bundle)
                # Writing the seller_info for quick use and reduce the number of clicks
                seller_name = seller_bundle[0]
                location = seller_bundle[1]
                SELLER_INFO["_".join([seller_name, location])] = seller_id

            else:
                seller_id = seller_id_trigger

            try:
                A = soup.find('section', id='informations')
                B = A.find('div', class_='relative')

                try:
                    ## ARTIST'S NAME
                    artist_name = B.find('span', class_='primary-title').text.strip()
                    # print(artist_name)
                except:
                    artist_name = None
                try:
                    ## ARTWORK'S NAME
                    C = B.find('span', class_='secondary-title').text.strip()
                    artwork_ = C.split(',')
                    artwork_title = ""
                    for a in range(len(artwork_)-1):
                        if a == 0:
                            artwork_title = artwork_[a]
                            continue
                        artwork_title = artwork_title + ", " + artwork_[a].strip()
                    # print(artwork_title)

                    # ARTWORK YEAR
                    year = C.split(',')[-1].strip()
                    # print(year)
                except:
                    artwork_title = None
                    year = None
                try:
                    # PRICE
                    price = A.find('p', class_='media-price price').text.strip()
                    number = ''
                    for p in price:
                        if p == '-':
                            break
                        if p.isdigit():
                            number += str(p)
                    price = int(number)
                    # print(price)
                except:
                    price = None

                try:
                    # Image url
                    B = A.find('div', id='img-container')
                    image_addr = B.find('img', id='img_original')['data-src']
                    # print(image_addr)
                except:
                    image_addr = None
            except:
                artist_name = None
                artwork_title = None
                year = None
                price = None
                image_addr = None

            try:
                D = soup.find('div', id='tabs-description').ul
                # Contains:: image, dimensions, medium, type, Frame, Support, authenticity, signature
                E = D.find_all('li')
                Dimensions = None
                Medium = None
                Type = None
                Support = None
                Frame = None
                Signature = None
                Authenticity = None
                About = None

                for e in E:
                    a = e.text
                    # Dimensions
                    if 'Dimensions' in a and 'About the artwork' not in a and 'Support' not in a:
                        Dimensions = e.find('p', class_='pull-right').strong.text.strip() + ' (Height x Width x Depth)'
                        dim = True
                        # print(Dimensions)
                        continue

                    # Medium (Sculpture/Painting)
                    if 'Medium' in a and 'About the artwork' not in a:
                        Medium = e.find('p', class_='pull-right').a.text.strip()
                        # print(Medium)
                        continue

                    # Type
                    if 'Type' in a and 'About the artwork' not in a:
                        Type = e.find('p', class_='pull-right text-right').text.strip().split('  ')[0]
                        # print(Type)
                        continue

                    # Support (base)
                    if 'Support' in a and 'About the artwork' not in a:
                        try:
                            f = e.find('p', class_='pull-right text-right').text.strip().split('  ')
                            Support = f[0] + '. ' + f[1].strip('\n')
                            f = e.find('p', class_='pull-right text-right').strong.text.strip().strip('\n')
                            Support += f
                        except IndexError:
                            Support = e.find('p', class_='pull-right text-right').text.strip()
                        # print(Support)
                        continue

                    # Framing
                    if 'Framing' in a and 'About the artwork' not in a:
                        Frame = e.find('p', class_='pull-right').text.strip()
                        # print(Frame)
                        continue

                    # Signature
                    if 'Signature' in a and 'About the artwork' not in a:
                        Signature = e.find('p', class_='pull-right').text.strip()
                        # print(Signature)
                        continue

                    # Authenticity
                    if 'Authenticity' in a and 'About the artwork' not in a:
                        Authenticity = e.find('p', class_='pull-right text-right').text.strip()
                        # print(Authenticity)
                        continue

                    # Artwork Description
                    if 'About the artwork' in a:
                        About = e.find('p', class_="marg-bot-10")
                        if About is not None:
                            a = e.find('div', class_="description-catalog see-more text-justify").text.strip()
                            About = About.text.strip()
                            About += a
                        else:
                            About = e.find('p', class_='').text.strip()
                        continue
                        # print(About)
            except:
                # Make all the fields Null
                Dimensions = None
                Medium = None
                Type = None
                Support = None
                Frame = None
                Signature = None
                Authenticity = None
                About = None

            result = {"artwork_title": artwork_title, "artist_name": artist_name,  "year": year, "price": price,
                      "Dimensions": Dimensions, "Medium": Medium, "Type": Type, "Support": Support, "Frame": Frame,
                      "Signature": Signature, "Authenticity": Authenticity, "About": About, 'platform': platform,
                      "image_addr": image_addr, "seller_id": seller_id}

            artwork_item = ArtworkData(**result)
            # Downloading images will be done at the end, after every 100, or so instances, we'll write the
            # data from image pool to a db [ image_url and artwork_id ]
            # And download the entire pool of images at the end of the execution.
            # The function for downlaoding the images will have to pick a set of 100 images, the function
            # is with TheMiner in module dataStructures. That function is called by ArtworksData (in datastructures)
            # DON'T THREAD ANYTHING WITH DATA DOWNLOAD FUNCTION AS IT ITSELF IS LAUNCHED ON THREAD (STUPID).
            art_bund = artwork_item.artwork_bundle()

            # WRITING ARTWORK
            dbartwork_agent = db.Artwork()
            dbartwork_agent.create_table_artwork()
            artwork_id = dbartwork_agent.insert_data_artwork(*art_bund)

            # Writing image-info
            # image_addr = result[13]
            image_bundle = artwork_item.image_bundle(artwork_id)
            dbimage_agent = db.Images()
            dbimage_agent.create_table_images()
            # dbimage_agent.insert_data_images(image_addr, artwork_id)
            dbimage_agent.insert_data_images(*image_bundle)

            # Price bundle can only be created once the artwork is written in the db
            price_bund = artwork_item.price_bundle(artwork_id)

            # WRITING PRICES
            dbprice_agent = db.Price()
            dbprice_agent.create_table_prices()
            dbprice_agent.insert_data_prices(*price_bund)