all_hotels = soup.find_all("div",{"class":"hotelCardListing"}) for hotel in all_hotels: hotel_dict={} hotel_dict["name"] = hotel.found("h3", {"class": "listingHotelDescription_hotelName"}).text hotel_dict["address"] = hotel.find("span": {"itemprop": "streetAdress"}).text hotel_dict["price"] =hotel.find("span": {"class": "listingPrice_finalPrice"}).text # try ....... except try: hotel_rating = hotel.find("span", {"class": "hotelRating_ratingSummary"}).text except AttributeError: hotel_dict["rating"]=None parent_amenities_element = hotel.find("div", {"class": "amenityWrapper"}) amenities_list =[] for amenity in parent_amenities_element.find_all("class": "amenityWrapper_amenity"}): amenities_list.append(amenity.find("span", {"class": , "d-body-sm"}).text,strip()) hotel_dict["amenities"]=', '.join(amenities_list[:-1]) scraped_info_list.append(hotel_dict) connect.insert_into_table(args.dbname, tupple(hotel_dict.values())) dataFrame=pandas.DataFrame(scrapped_info_list) print("Creating csv file....") dataFrame.to_csv("0yo.csv") connect.get_hotel_info(args.dbname)
# ("tag",{"attribute":"value"}) all_products = soup.find_all("div",{"class":"thumbnail"}) # returns a list if all the elements which satsfy the given condition in parenthesis for product in all_products: product_dic = {} # empty dictionary product_dic["name"] = product.find("a",{"class":"title"}).text # name product_dic["price"] = product.find("h4",{"class":"price"}).text # price product_dic["review"] = product.find("p",{"class":"pull-right"}).text # review pro_desc = product.find("p",{"class":"description"}).text # description pro_desc_list = pro_desc.split(',') # converting the description into list # product_dic["Description"] = ', '.join(pro_desc_list[:4]) # description product_dic["Screen-size"] = ''.join(pro_desc_list[0]) # description - Screen-size product_dic["Processor"] = ''.join(pro_desc_list[1]) # description - Processor product_dic["RAM"] = ''.join(pro_desc_list[2]) # description - RAM product_dic["ROM"] = ''.join(pro_desc_list[3]) # description - ROM scraped_info_list.append(product_dic) # list of products' dictionary connect.insert_into_table(args.dbname, tuple(product_dic.values())) # print(pro_name,pro_price,pro_desc_list[:4],pro_review) # saving_csv() # printing the scraped data in terminal connect.get_product_info(args.dbname)
import connect parser = argparse.ArgumentParser() parser.add_argument("--dbname", help="Enter the name of db", type=str) args=parser.parse_args() flip_url="https://www.flipkart.com/search?q=mobiles&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off" page_num_MAX = 3 scraped_info_list=[] connect.connect(args.dbname) for page_num in range(1, page_num_MAX): req=requests.get(flip_url+ str(page_num)) content=req.content soup=BeautifulSoup(content,"html.parser") all_phone=soup.find_all("div",{"class":"_3O0U0u"}) for phone in all_phone: phone_dict={} phone_dict["name"]=phone.find("div",{"class":"_3wU53n"}).text phone_dict["rate"]=phone.find("div",{"class":"_6BWGkk"}).text #print(phone_name, phone_rate) scraped_info_list.append(phone_dict) connect.insert_into_table(args.dbname,tuple(phone_dict.values())) dataframe = pandas.DataFrame(scraped_info_list) dataframe.to_csv("flip.csv") connect.get_phone_info(args.dbname) #THUS THE OUT IS STORED IN DATABASE AND CSV FILE
dbname = input("Enter the db name:") connect.connect(dbname) olx_url = "https://www.olx.in/items/q-yamaha-rx100?isSearchCall=true" scraped_info_list = [] req = requests.get(olx_url) content = req.content soup = BeautifulSoup(content, "html.parser") all_bikes = soup.find_all("div", {"class": "IKo3_"}) for bike in all_bikes: bike_dict = {} bike_dict["prices"] = bike.find("span", {"class": "_89yzn"}).text try: bike_dict["year"] = bike.find("span", {"class": "_2TVI3"}).text except AttributeError: pass bike_dict["title"] = bike.find("span", {"class": "_2tW1I"}).text bike_dict["address"] = bike.find("span", {"class": "tjgMj"}).text scraped_info_list.append(bike_dict) connect.insert_into_table(dbname, tuple(bike_dict.values())) dataFrame = pandas.DataFrame(scraped_info_list) dataFrame.to_csv("olx.csv") connect.get_bike_info(dbname)
parent_features=mobile.find('div',{'class':'fMghEO'}) for feature in parent_features.find_all('ul',{'class':'_1xgFaf'}): f=feature.find_all('li',{'class':'rgWa7D'}) for i in f: list=i.text feature_list.append(list) #print(feature_list) print() mobile_dict['Features']=', '.join(feature_list) #we cant store list in csv. Thus we use .join() to join list with comma and space scraped_info_list.append(mobile_dict) connect.insert_into_table(args.dbname,tuple(mobile_dict.values())) #values has to be tuple #store all collected info in a list #as pandas process info in form of list #to do this-->take all collected info and convert it into dictionary #dataFrame=pandas.DataFrame(scraped_info_list) #data structure used by pandas lib #print('creating csv file...') #dataFrame.to_csv('Flipkart.csv') #converts ds to csv file connect.get_info(args.dbname)
except AttributeError: product_dict["rating"] = "" product_dict["price"] = product.find('div', class_='_1vC4OE _2rQ-NK').text try: product_dict["discount"] = product.find('div', class_='VGWI6T').text except AttributeError: product_dict["discount"] = "" parent_feature_element = product.find('ul', class_='vFw0gD') features_list = [] for feature in parent_feature_element.find_all('li', class_='tVe95H'): features_list.append(feature.text) product_dict["features"] = ', '.join(features_list) scraped_info_list.append(product_dict) connect.insert_into_table(tuple(product_dict.values())) dataFrame = pandas.DataFrame(scraped_info_list) dataFrame.to_csv("laptops.csv") connect.get_laptop_info() ''' Output :- Table created successfully ('Dell 14 3000 Core i3 7th Gen - (4 GB/1 TB HDD/Linux) inspiron 3481 Laptop', '4.1', '₹25,490', '13% off', 'Intel Core i3 Processor (7th Gen), 4 GB DDR4 RAM, Linux/Ubuntu Operating System, 1 TB HDD, 35.56 cm (14 inch) Display, 1 Year Limited Hardware Warranty, InHome Service After Remote Diagnosis') ('HP 14q Core i3 7th Gen - (8 GB/256 GB SSD/Windows 10 Home) 14q-cs0023TU Thin and Light Laptop', '4.3', '₹32,990', '3% off', 'Intel Core i3 Processor (7th Gen), 8 GB DDR4 RAM, 64 bit Windows 10 Operating System, 256 GB SSD, 35.56 cm (14 inch) Display, 1 Year Onsite Warranty') ('Lenovo Ideapad 130 Core i3 7th Gen - (4 GB/1 TB HDD/Windows 10 Home) 130-15IKB Laptop', '4', '₹27,990', '15% off', 'Intel Core i3 Processor (7th Gen), 4 GB DDR4 RAM, 64 bit Windows 10 Operating System, 1 TB HDD, 39.62 cm (15.6 inch) Display, 1 Year Onsite Warranty') ('HP 15 Pentium Gold - (4 GB/1 TB HDD/Windows 10 Home) 15-di0001TU Laptop', '4.4', '₹23,490', '8% off', 'Intel Pentium Gold Processor, 4 GB DDR4 RAM, 64 bit Windows 10 Operating System, 1 TB HDD, 39.62 cm (15.6 inch) Display, 1 Year Onsite Warranty') ('HP 15s Ryzen 3 Dual Core - (4 GB/256 GB SSD/Windows 10 Home) 15s-eq0007AU Thin and Light Laptop', '4.5', '₹30,990', '4% off', 'AMD Ryzen 3 Dual Core Processor, 4 GB DDR4 RAM, 64 bit Windows 10 Operating System, 256 GB SSD, 39.62 cm (15.6 inch) Display, 1 Year Onsite Warranty') ('Lenovo Ideapad L340 Core i5 9th Gen - (8 GB/1 TB HDD/128 GB SSD/Windows 10 Home/4 GB Graphics/NVIDIA G...', '4.5', '₹59,990', '39% off', '60 Hz Refresh Rate- It can display upto 60 frames per second., Intel Core i5 Processor (9th Gen), 8 GB DDR4 RAM, 64 bit Windows 10 Operating System, 1 TB HDD|128 GB SSD, 39.62 cm (15.6 inch) Display, 1 Year Onsite Warranty') ('Acer Nitro 5 Core i7 9th Gen - (8 GB/1 TB HDD/256 GB SSD/Windows 10 Home/4 GB Graphics/NVIDIA Geforce ...', '4.6', '₹69,990', '33% off', '60 Hz Refresh Rate- It can display upto 60 frames per second., Intel Core i7 Processor (9th Gen), 8 GB DDR4 RAM, 64 bit Windows 10 Operating System, 1 TB HDD|256 GB SSD, 39.62 cm (15.6 inch) Display, 1 Year International Travelers Warranty (ITW)')