def write_seller_data(self, *args): bundle = [*args] # bundle = [url, Seller_name, Location =None, Website = url] # Just like the artist class in dataStructures, sellers also seems redundant as of now. writer = db.Sellers() writer.insert_data_sellers(*bundle)
def main(): # Creating SELLER_INFO === To be used with artwork entry sellers = db.Sellers() sellers.read_data_sellers() # sellers.__del__() # Trying to close the connection here throws error. Maybe putting it in a function works. # Creating ARTIST_INFO === To be used with artwork entry artists = db.Artist() artists.read_artist_data() # artists.__del__() artsperpainters = Website( 'https://www.artsper.com', 'https://www.artsper.com/us/contemporary-artists/youngtalents/sculptors-artists', "ARTSPER") artsper = Artsper(artsperpainters) artsper.miner() artsperpainters = Website( 'https://www.artsper.com', 'https://www.artsper.com/us/contemporary-artists/youngtalents/painters', "ARTSPER") artsper = Artsper(artsperpainters) artsper.miner()
def write_seller(*args): # args = [url, platform, seller_name, location = None, Website ] # Running data through SellerData class of dataStructures. seller = SellerDS(*args) bundle = seller.seller_bundle() # bundle = [url, Seller_name, Location =None, Website = url] writer = db.Sellers() writer.insert_data_sellers(*bundle)
def main(): # Creating SELLER_INFO === To be used with artwork entry sellers = db.Sellers() sellers.read_data_sellers() # Creating ARTIST_INFO === To be used with artwork entry artists = db.Artist() artists.read_artist_data() webagent = Website( 'https://www.artsy.net', 'https://www.artsy.net/collection/new-and-noteworthy-artists?additional_gene_ids%5B0%5D=painting&additional_gene_ids%5B1%5D=sculpture', 'ARTSY') artsy = Artsy(webagent) artsy.miner() time.sleep(10)
def main(): # Creating SELLER_INFO === To be used with artwork entry sellers = db.Sellers() sellers.read_data_sellers() # Creating ARTIST_INFO === To be used with artwork entry artists = db.Artist() artists.read_artist_data() webagent = Website('https://www.singulart.com/en', 'https://www.singulart.com/en/painting', "SINGULART") singulart = Singulart(webagent) singulart.miner() time.sleep(10) print("FINISHED")
def main(): sellers = db.Sellers() sellers.read_data_sellers() artists = db.Artist() artists.read_artist_data() agent = Website( 'https://www.emergingartistplatform.com', 'https://www.emergingartistplatform.com/browse?Collection=Paintings&page=', "EMERGINGARTISTPLATFOM") eap = EAP(agent) eap.miner() agent = Website( 'https://www.emergingartistplatform.com', 'https://www.emergingartistplatform.com/browse?Collection=Sculptures&page=', "EMERGINGARTISTPLATFOM") eap = EAP(agent) eap.miner()
def main(): start = time.perf_counter() # Creating SELLER_INFO sellers = db.Sellers() sellers.read_data_sellers() # Creating ARTIST_INFO artists = db.Artist() artists.read_artist_data() artsperpainters = Website( 'https://www.artsper.com', 'https://www.artsper.com/us/contemporary-artists/youngtalents/painters?', "ARTSPER") a_m = Artsper(artsperpainters) a_m.artsper_mine() finish = time.perf_counter() print( f"Lap Completed in {round(finish - start, 2)}, seconds.\n Starting sculptures" ) artspersculptors = Website( 'https://www.artsper.com', 'https://www.artsper.com/us/contemporary-artists/youngtalents/sculptors-artists', "ARTSPER") a_m = Artsper(artspersculptors) a_m.artsper_mine() finish = time.perf_counter() print( f"Lap Completed in {round(finish - start, 2)}, seconds.\n Downloading and updating images" ) TheMiner.sir_image_manager() finish = time.perf_counter() print(f"Finished in {round(finish - start, 2)}, seconds")
def main(): # Creating SELLER_INFO === To be used with artwork entry sellers = db.Sellers() sellers.read_data_sellers() # Creating ARTIST_INFO === To be used with artwork entry artists = db.Artist() artists.read_artist_data() kazoart_webagent = Website('https://www.kazoart.com', 'https://www.kazoart.com/en/artistes/technique/sculpture?eme=1', "KAZOART") kazoart = Kazoart(kazoart_webagent) kazoart.miner() time.sleep(10) kazoart_webagent = Website('https://www.kazoart.com', 'https://www.kazoart.com/en/artistes/technique/peintures?eme=1', "KAZOART") kazoart = Kazoart(kazoart_webagent) kazoart.miner()
def get_art_data_core(self, url): platform = self.website.platform artist_name = None artwork_title = None year = None price = None Dimensions = None Medium = None Type = None Support = None Frame = None Signature = None Authenticity = None About = None image_addr = None seller_id = None soup = TheMiner.fetch_page(url) if soup is not None: # Data to be picked here. # Artist's name, artwork's name, year, Artwork description, Price, Dimensions, Medium(Sculpture/Painting) # Type (Copies or Unique), Frame, Support, Authenticity, Website, Image (12) seller_id_trigger, seller_bundle = self.seller_info(soup) # Seller_id_trigger could be 0, 1 or a real id.(real id comes with bundle =None) # seller_id_trigger 0 comes with some data in bundle # seller_id 1_trigger comes with no data in the bundle if seller_bundle is None: seller_id = seller_id_trigger # THIS FOLLOW BLOCK OF CODE NEEDS TO BE CONSISTENT ACROSS ALL THE WEBSITE MODULES. # Get seller bundle elif seller_id_trigger == 0: seller_ds = SellerData(*seller_bundle) s_bundle = seller_ds.seller_bundle() # Write data to table "sellers" s_agent = db.Sellers() s_agent.create_table_sellers() seller_id = s_agent.insert_data_sellers(*s_bundle) # Writing the seller_info for quick use and reduce the number of clicks seller_name = seller_bundle[0] location = seller_bundle[1] SELLER_INFO["_".join([seller_name, location])] = seller_id else: seller_id = seller_id_trigger try: A = soup.find('section', id='informations') B = A.find('div', class_='relative') try: ## ARTIST'S NAME artist_name = B.find('span', class_='primary-title').text.strip() # print(artist_name) except: artist_name = None try: ## ARTWORK'S NAME C = B.find('span', class_='secondary-title').text.strip() artwork_ = C.split(',') artwork_title = "" for a in range(len(artwork_)-1): if a == 0: artwork_title = artwork_[a] continue artwork_title = artwork_title + ", " + artwork_[a].strip() # print(artwork_title) # ARTWORK YEAR year = C.split(',')[-1].strip() # print(year) except: artwork_title = None year = None try: # PRICE price = A.find('p', class_='media-price price').text.strip() number = '' for p in price: if p == '-': break if p.isdigit(): number += str(p) price = int(number) # print(price) except: price = None try: # Image url B = A.find('div', id='img-container') image_addr = B.find('img', id='img_original')['data-src'] # print(image_addr) except: image_addr = None except: artist_name = None artwork_title = None year = None price = None image_addr = None try: D = soup.find('div', id='tabs-description').ul # Contains:: image, dimensions, medium, type, Frame, Support, authenticity, signature E = D.find_all('li') Dimensions = None Medium = None Type = None Support = None Frame = None Signature = None Authenticity = None About = None for e in E: a = e.text # Dimensions if 'Dimensions' in a and 'About the artwork' not in a and 'Support' not in a: Dimensions = e.find('p', class_='pull-right').strong.text.strip() + ' (Height x Width x Depth)' dim = True # print(Dimensions) continue # Medium (Sculpture/Painting) if 'Medium' in a and 'About the artwork' not in a: Medium = e.find('p', class_='pull-right').a.text.strip() # print(Medium) continue # Type if 'Type' in a and 'About the artwork' not in a: Type = e.find('p', class_='pull-right text-right').text.strip().split(' ')[0] # print(Type) continue # Support (base) if 'Support' in a and 'About the artwork' not in a: try: f = e.find('p', class_='pull-right text-right').text.strip().split(' ') Support = f[0] + '. ' + f[1].strip('\n') f = e.find('p', class_='pull-right text-right').strong.text.strip().strip('\n') Support += f except IndexError: Support = e.find('p', class_='pull-right text-right').text.strip() # print(Support) continue # Framing if 'Framing' in a and 'About the artwork' not in a: Frame = e.find('p', class_='pull-right').text.strip() # print(Frame) continue # Signature if 'Signature' in a and 'About the artwork' not in a: Signature = e.find('p', class_='pull-right').text.strip() # print(Signature) continue # Authenticity if 'Authenticity' in a and 'About the artwork' not in a: Authenticity = e.find('p', class_='pull-right text-right').text.strip() # print(Authenticity) continue # Artwork Description if 'About the artwork' in a: About = e.find('p', class_="marg-bot-10") if About is not None: a = e.find('div', class_="description-catalog see-more text-justify").text.strip() About = About.text.strip() About += a else: About = e.find('p', class_='').text.strip() continue # print(About) except: # Make all the fields Null Dimensions = None Medium = None Type = None Support = None Frame = None Signature = None Authenticity = None About = None result = {"artwork_title": artwork_title, "artist_name": artist_name, "year": year, "price": price, "Dimensions": Dimensions, "Medium": Medium, "Type": Type, "Support": Support, "Frame": Frame, "Signature": Signature, "Authenticity": Authenticity, "About": About, 'platform': platform, "image_addr": image_addr, "seller_id": seller_id} artwork_item = ArtworkData(**result) # Downloading images will be done at the end, after every 100, or so instances, we'll write the # data from image pool to a db [ image_url and artwork_id ] # And download the entire pool of images at the end of the execution. # The function for downlaoding the images will have to pick a set of 100 images, the function # is with TheMiner in module dataStructures. That function is called by ArtworksData (in datastructures) # DON'T THREAD ANYTHING WITH DATA DOWNLOAD FUNCTION AS IT ITSELF IS LAUNCHED ON THREAD (STUPID). art_bund = artwork_item.artwork_bundle() # WRITING ARTWORK dbartwork_agent = db.Artwork() dbartwork_agent.create_table_artwork() artwork_id = dbartwork_agent.insert_data_artwork(*art_bund) # Writing image-info # image_addr = result[13] image_bundle = artwork_item.image_bundle(artwork_id) dbimage_agent = db.Images() dbimage_agent.create_table_images() # dbimage_agent.insert_data_images(image_addr, artwork_id) dbimage_agent.insert_data_images(*image_bundle) # Price bundle can only be created once the artwork is written in the db price_bund = artwork_item.price_bundle(artwork_id) # WRITING PRICES dbprice_agent = db.Price() dbprice_agent.create_table_prices() dbprice_agent.insert_data_prices(*price_bund)