def Temperature(pvid): try: # 1- URL url = 'https://www.tmd.go.th/province.php?id=' + str(pvid) #pvid = province id from website # 2- Request URL webopen = req(url) page_html = webopen.read() webopen.close() # 3- Convert page_html to Soup Object #print(page_html) data = soup(page_html, 'html.parser') #print(data) # 4- Find Element ( td:'strokeme') temp = data.findAll('td', {'class': 'strokeme'}) province = data.findAll('span', {'class': 'title'}) pv = province[0].text.replace(' ', '') result = temp[0].text print(f'จังหวัด: {pv} อุณหภูมิ: {result}') text = f'จังหวัด: {pv} อุณหภูมิ: {result}' return text except: print('No Result') return 'No Result'
def single(): url = 'https://www.matichon.co.th/economy' webopen = req(url) page_html = webopen.read() webopen.close() data = Soup(page_html, 'html.parser') #print(data) news = data.findAll('h3', {'class': 'entry-title td-module-title'}) print('COUNT: ', len(news)) alltext = '' all_title = [] all_link = [] for i in news: title = i.a['title'] link = i.a['href'] text = "TITLE: {}\nLINK: {}\n\n".format(title, link) alltext = alltext + text all_title.append(title) all_link.append(link) #title = news[-1].a['title'] #link = news[-1].a['href'] return (alltext, all_title, all_link)
def Translate(vocab, fulltext=False): url = 'https://dict.longdo.com/mobile.php?search={}'.format(vocab) webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') tables = data.find_all('table', {'class': 'result-table'}) result = [] for i, t in enumerate(tables[:4]): row = t.find_all('tr') for j, c in enumerate(row): cl = c.find_all('td') meaning = cl[1].text.split(',')[0] #print(meaning) full = cl[1].text if fulltext: if meaning[0] == '[': result.append({ 'vocab': cl[0].text, 'meaning': meaning, 'full': full }) else: if meaning[0] == '[': result.append({'vocab': cl[0].text, 'meaning': meaning}) if len(result) != 0: return result[0] else: return None
def getData(url): try: response = req(url) except req.HTTPError as e: return None try: data_raw = response.read() page_soup = soup(data_raw, "html.parser") containers = page_soup.findAll("div", {"class": "biz-listing-large"}) for container in containers: title = container.div.div.h3.a.span.get_text() title = title.strip() # remove white space r = container.findAll("span", {"class": "review-count rating-qualifier"}) rating = r[0].text rating = rating.strip().split(' ')[ 0] # split by white space and take first part item = { 'name': title, 'rating': rating, 'position': 0 } # construct dictionary data.append(item) except AttributeError as e: return None
def parseWorker(name): print("name is {}".format(name)) url = blogs[name]['url'] headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'} toSend = req(url=url, headers=headers) xml = urlopen(toSend).read() rss_feed = BeautifulSoup(xml, 'html.parser') output = rss_feed.find('item') if 'custom_parse' in blogs[name]: output = cleaning.findFirst(name, output) else: max = 0 for cd in output.findAll(text=True): if isinstance(cd, CData): if len(cd) > max: output = BeautifulSoup(cd, 'html.parser') max = len(cd) book_creator.createHTML(name, output) return output
def GoldPrice(): url = 'https://www.goldtraders.or.th/default.aspx' webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') allclass = [ 'DetailPlace_uc_goldprices1_lblBLSell', 'DetailPlace_uc_goldprices1_lblBLBuy', 'DetailPlace_uc_goldprices1_lblOMSell', 'DetailPlace_uc_goldprices1_lblOMBuy' ] allprice = [] for al in allclass: rawdata = data.find_all('span', {'id': al}) allprice.append(float(rawdata[0].text.replace(',', ''))) #print(allprice) header = [ 'ทองคำแท่ง-ขายออก', 'ทองคำแท่ง-รับซื้อ', 'ทองรูปพรรณ-ขายออก', 'ทองรูปพรรณ-รับซื้อ' ] result = {} for h, p in zip(header, allprice): result[h] = p #print(result) return result
def crypto_scrape_continue(): price_array = [] raw_url = 'https://coinmarketcap.com/' page = req(raw_url) page_html = page.read() page.close() page_soup = BeautifulSoup(page_html, "html.parser") table = page_soup.find(id="currencies") table2 = table.tbody crypto_containers = table2.find_all('tr') counter = 1 for container in crypto_containers: # names = container.find_all("td", {"class": "no-wrap currency-name"}) cryptoNames = container.findAll("a", { "class": "currency-name-container link-secondary night-mode-bold" }) name = cryptoNames[0].string name_Array.append(name) for container in crypto_containers: cryptoPrices = container.findAll("a", {"class": "price"}) price = cryptoPrices[0].string price_array.append(price) print("Printing all Cryptocurrency name and prices:") for x in range(len(price_array)): print(name_Array[x], " ", price_array[x]) for y in range(len(price_array)): cryptoList[y].add_price(price_array[y])
def Checkprice(name='bitcoin',sttime='20171201',etime=dt): url = 'https://coinmarketcap.com/currencies/{}/historical-data/?start={}&end={}'.format(name,sttime,etime) webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html,'html.parser') table = data.findAll('tr',{'class':'text-right'}) update = [] open_price = [] close_price = [] dictdata = {} for row in table: column = row.findAll('td') update.append(column[0].text) open_price.append(float(column[1].text)) close_price.append(float(column[4].text)) dictdata[column[0].text] = {'open':float(column[1].text),'close':float(column[4].text)} update.reverse() open_price.reverse() close_price.reverse() return dictdata
def index(): if request.method == 'POST': searchString = request.form['content'].replace(" ", "") # obtaining the search string entered in the form try: flipkart_url = "https://www.flipkart.com/search?q=" + searchString # preparing the URL to search the product on flipkart uClient = req(flipkart_url) # requesting the webpage from the internet flipkartPage = uClient.read() # reading the webpage uClient.close() # closing the connection to the web server flipkart_html = bs4(flipkartPage, "html.parser") # parsing the webpage as HTML boxes = flipkart_html.findAll("div", { "class": "bhgxx2 col-12-12"}) # searching for appropriate tag to redirect to the product link del boxes[ 0:3] # the first 3 members of the list do not contain relevant information, hence deleting them. box = boxes[0] # taking the first iteration (for demo) productLink = "https://www.flipkart.com" + box.div.div.div.a[ 'href'] # extracting the actual product link prodRes = requests.get(productLink) # getting the product page from server prod_html = bs4(prodRes.text, "html.parser") # parsing the product page as HTML commentBoxes = prod_html.find_all('div', { 'class': "_3nrCtb"}) # finding the HTML section containing the customer comments # table = db[searchString] # creating a collection with the same name as search string. # Tables and Collections are analogous. # filename = searchString+".csv" # filename to save the details # fw = open(filename, "w") # creating a local file to save the details # headers = "Product, Customer Name, Rating, Heading, Comment \n" # providing the heading of the columns # fw.write(headers) # writing first the headers to file reviews = [] # initializing an empty list for reviews # iterating over the comment section to get the details of customer and their comments for commentBox in commentBoxes: try: name = commentBox.div.div.find_all('p', {'class': '_3LYOAd _3sxSiS'})[0].text except Exception as e: name = 'No Name' try: rating = commentBox.div.div.div.div.text except Exception as e: rating = 'No Rating' try: commentHead = commentBox.div.div.div.p.text except Exception as e: commentHead = 'No Comment Heading' try: commentTag = commentBox.div.div.find_all('div', {'class': ''}) customerComment = commentTag[0].div.text except Exception as e: customerComment = 'No Customer Comment' # fw.write(searchString+","+name.replace(",", ":")+","+rating + "," + # commentHead.replace(",", ":") + "," + customerComment.replace(",", ":") + "\n") pydict = {"Product": searchString, "Name": name, "Rating": rating, "CommentHead": commentHead, "Comment": customerComment} # saving that detail to a dictionary reviews.append(pydict) # appending the comments to the review list return render_template('results.html', reviews=reviews) # showing the review to the user except Exception as err: return 'something is wrong'
def checkprice(stock, check): url = 'https://www.settrade.com/C04_01_stock_quote_p1.jsp?txtSymbol={}&ssoPageId=9&selectPage=1'.format( stock) webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') rawdata = data.find_all('div', 'col-xs-6') stock = rawdata[0].text #name stock price = float(rawdata[2].text) #price stock update = data.find_all('div', 'flex-item text-left padding-8') update1 = update[0].text #print([update1]) #เชคว่ามีตัวไรบ้าง เช่นพวก tab enter จะแสดงถ้าเราใส่ list ไป เพื่อใช้ replace #ในการตัดข้อความที่ไม่ต้องการออก update1 = update1.replace('\n', '') update1 = update1.replace('สถานะตลาด : Closed', '') update1 = update1.replace('ข้อมูลล่าสุด', '')[1:] #remove space ตัวแรกออก if float(price) < check: messenger.sticker(623, 4) text = '\n{}\nStock: {} price:{} baht'.format(update1, stock, price) messenger.sendtext(text) messenger.sendimage( 'https://img.freepik.com/free-photo/happy-man-with-cash-dollars-flying-home-office-rich-from-business-online-concept_1150-4999.jpg?size=626&ext=jpg' )
def Temperature(pid): url = 'https://www.tmd.go.th/province.php?id={}'.format(pid) webopen = req(url) #open web page_html = webopen.read() #read html webopen.close() data = soup(page_html, 'html.parser') #convert to soup # print(data); ''' <TD width='100%' align='left' style='FONT-SIZE:40px; color: #F6E207; padding-left:25px;' class='strokeme'>28.2 °C</TD> ''' province = data.select("span.title") province = province[0].text temp = data.find_all('td', {'class': 'strokeme'}) temp = temp[0].text # .text for trim tag print(province, temp)
def checkprice(stock): url = 'https://www.settrade.com/C04_01_stock_quote_p1.jsp?txtSymbol={}&ssoPageId=9&selectPage=1'.format( stock) webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') rawdata = data.find_all('div', 'col-xs-6') stock = rawdata[0].text #name stock price = float(rawdata[2].text) #price stock update = data.find_all('div', 'flex-item text-left padding-8') update1 = update[0].text #print([update1]) #เชคว่ามีตัวไรบ้าง เช่นพวก tab enter จะแสดงถ้าเราใส่ list ไป เพื่อใช้ replace #ในการตัดข้อความที่ไม่ต้องการออก update1 = update1.replace('\n', '') update1 = update1.replace('สถานะตลาด : Closed', '') update1 = update1.replace('ข้อมูลล่าสุด', '')[1:] #remove space ตัวแรกออก #print(type(price)) #เชคว่าข้อความเป็นขนิดไหน text = '\n{}\nStock: {} price:{} baht'.format(update1, stock, price) print(text) print('----' * 10) messenger.sendtext(text)
def checkprice(name='bitcoin', start='20200101', end='20200131'): url = 'https://coinmarketcap.com/currencies/{}/historical-data/?start={}&end={}'.format( name, start, end) webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') table = data.findAll('tr') list_days = [] list_dict = {} for row in table[3:]: rw = row.findAll('div') days = [] for i, r in enumerate(rw): if i > 0 and i < 5: days.append(float(r.text.replace(',', ''))) elif i > 4: days.append(int(r.text.replace(',', ''))) else: days.append(r.text.replace(',', '')) list_days.append(days) list_dict[days[0]] = days return (list_days, list_dict)
def checkprice(stockcode): url = 'http://www.settrade.com/C04_02_stock_historical_p1.jsp?txtSymbol=' + stockcode + '&ssoPageId=10&selectPage=2' webopen = req(url) page_html = webopen.read() webopen.close() #print(page_html) stockweb = soup(page_html, 'html.parser') #print(stockweb) data = stockweb.findAll('div', {'class': 'col-xs-6'}) #print('Line ', data[2].text) stockname = data[0].text stockprice = data[2].text #print('Stock: {} Price: {:,.4f}'.format(stockname,float(stockprice))) return (stockname, stockprice) #checkprice('HANA') #x,y = checkprice('TMB') #print(x) #print(y)
def checkstock(code): url = 'https://www.settrade.com/C04_02_stock_historical_p1.jsp?txtSymbol={}&ssoPageId=10&selectPage=2'.format( code) webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') price = data.findAll('div', {'class': 'col-xs-6'}) title = price[0].text stockprice = price[2].text change = price[3].text change = change.replace('\n', '') change = change.replace('\r', '') change = change.replace('\t', '') change = change.replace(' ', '') change = change[11:] pchange = price[4].text pchange = pchange.replace('\n', '') pchange = pchange.replace('\r', '') pchange = pchange.replace(' ', '') pchange = pchange[12:] update = data.findAll('span', {'class': 'stt-remark'}) stockupdate = update[0].text stockupdate = stockupdate[13:] return [title, stockprice, change, pchange, stockupdate]
def get_page(url): # getpage fonksiyon tanımı try: client = req(url) page = client.read() client.close() return soup(page, "html.parser") # sayfa beatifulsoup ile parse edildi except: return "" return ""
def ThaiCovid19(): url = 'https://ddc.moph.go.th/viralpneumonia/' webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') alldata = [] result = {} table = data.findAll('div', {'class': 'popup_blog'}) #print(table[0]) for i, tb in enumerate(table): if i == 1: rw = tb.findAll('tr') for i in range(len(rw)): cl1 = [r.text for r in rw[i].findAll('td')] alldata.append(cl1) if i == 0: rw = tb.findAll('tr') for i in range(len(rw)): cl1 = [r.text for r in rw[i].findAll('td')] alldata.append(cl1) #print('-------') #print(alldata) # for i,d in enumerate(alldata): # print(i,d,'\n\n') result['อัพเดต'] = f'{alldata[0][0]} {alldata[9][0]}' result['ผู้ป่วยสะสม'] = alldata[3][0] result['ผู้ป่วยรายใหม่'] = alldata[3][1] result['ผู้ป่วยรุนแรง'] = alldata[5][0] result['ผู้ป่วยเสียชีวิต'] = alldata[5][1] result['ผู้ป่วยกลับบ้านแล้ว'] = alldata[5][2] result['ผู้ป่วยเฝ้าระวังสะสม'] = alldata[8][0] result['ผู้ป่วยเฝ้าระวังรายใหม่'] = alldata[8][1] #แก้ใหม่ เนื่องจากว่าหน้าเว็บได้มีการ Update ใหม่ เปลี่ยนเป็น ผู้เดินทางที่คัดกรองสะสมแทน ดังนั้นหน้าเว็บไม่มีพวก รักษาพยาบาลอยู่รพ. , รักษาพยาบาลกลับบ้าน , รักษาพยาบาลสังเกตอาการ แล้ว #result['รักษาพยาบาลอยู่รพ.'] = alldata[11][0] #result['รักษาพยาบาลกลับบ้าน'] = alldata[11][1] #result['รักษาพยาบาลสังเกตอาการ'] = alldata[11][2] #ตัวนี้เริ่มจาก 11 หลังจากหน้าเว็บ Update ใหม่ทำให้ Array ลดไปตามๆกันไป for d in alldata[11:]: result['ผู้เดินทางที่คัดกรองสะสมจาก' + d[0]] = d[1] result['อ้างอิง'] = url #print(result) return result
def GlobalCovid19(): url = 'https://www.worldometers.info/coronavirus/' webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') table = data.findAll('table', {'id': 'main_table_countries_today'}) country = table[0].findAll('tr') result = {'header': {'list': None}} for rw in country[:1]: hd = rw.findAll('th') #print(hd[0].text) result['header']['list'] = [hd[i].text for i in range(9)] for rw in country[1:]: cl = rw.findAll('td') result[cl[0].text.lower()] = { 'list': None, 'country': None, 'total': None, 'new_cases': None, 'total_deaths': None, 'new_deaths': None, 'total_recoverd': None, 'active_cases': None, 'serious_critical': None, 'totalcase_per1million': None } for rw in country[1:]: cl = rw.findAll('td') #print(cl[0].text) index = cl[0].text.lower() #print(index) result[index]['list'] = [cl[i].text for i in range(9)] result[index]['country'] = cl[0].text result[index]['total'] = cl[1].text result[index]['new_cases'] = cl[2].text result[index]['total_deaths'] = cl[3].text result[index]['new_deaths'] = cl[4].text result[index]['total_recoverd'] = cl[5].text result[index]['active_cases'] = cl[6].text result[index]['serious_critical'] = cl[7].text result[index]['totalcase_per1million'] = cl[8].text result['total'] = result['total:'] del result['total:'] return result
def html_parser(url): """ Open the web page, get the HTML and parse it """ client = req(url) page_html = client.read() client.close() page_soup = soup(page_html, "html.parser") return page_soup
def getJSONResponse(extension): base_url = 'https://min-api.cryptocompare.com/data/' complete_url = base_url + extension response = req(complete_url) data = response.read() soup = BeautifulSoup(data, "html.parser") json_data = json.loads(str(soup)) return json_data
def __init__(self, url): page_client = req(url) page_html = page_client.read() page_client.close() page_soup = bs(page_html, "lxml") search = page_soup.find( 'div', id='taplc_location_reviews_list_resp_hr_resp_0') self.url = url self.links = set() self.search = search
def ThaiCovid19(): url = 'https://ddc.moph.go.th/viralpneumonia/' webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') alldata = [] result = {} table = data.findAll('div', {'class': 'popup_blog'}) #print(table[0]) for i, tb in enumerate(table): if i == 1: rw = tb.findAll('tr') for i in range(len(rw)): cl1 = [r.text for r in rw[i].findAll('td')] alldata.append(cl1) if i == 0: rw = tb.findAll('tr') for i in range(len(rw)): cl1 = [r.text for r in rw[i].findAll('td')] alldata.append(cl1) #print('-------') #print(alldata) # for i,d in enumerate(alldata): # print(i,d,'\n\n') result['อัพเดต'] = f'{alldata[0][0]} {alldata[12][0]}' result['ผู้ป่วยสะสม'] = alldata[3][0] result['ผู้ป่วยรายใหม่'] = alldata[3][1] result['ผู้ป่วยรุนแรง'] = alldata[5][0] result['ผู้ป่วยเสียชีวิต'] = alldata[5][1] result['ผู้ป่วยกลับบ้านแล้ว'] = alldata[5][2] result['ผู้ป่วยเฝ้าระวังสะสม'] = alldata[8][0] result['ผู้ป่วยเฝ้าระวังรายใหม่'] = alldata[8][1] result['รักษาพยาบาลอยู่รพ.'] = alldata[11][0] result['รักษาพยาบาลกลับบ้าน'] = alldata[11][1] result['รักษาพยาบาลสังเกตอาการ'] = alldata[11][2] for d in alldata[14:]: result['ผู้เดินทางที่คัดกรองสะสมจาก' + d[0]] = d[1] result['อ้างอิง'] = url #print(result) return result
def correct(): url = "http://www.overcomingbias.com/feed" headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'} toSend = req(url=url, headers=headers) xml = urlopen(toSend).read() soup = BeautifulSoup(xml, 'html.parser') # rss_feed = soup.find('content:encoded') rss_feed = soup.find('item') open("testing.html", 'w').close() text_file = open("testing.html", "w") text_file.write(str(rss_feed))
def checkprice(name=None): url = 'https://www.settrade.com/C04_02_stock_historical_p1.jsp?txtSymbol={}&ssoPageId=10&selectPage=2'.format( name) webopen = req(url) #open website without open browser page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') price = data.findAll('div', {'class': 'col-xs-6'}) st_title = price[0].text st_price = price[2].text text = "STOCK: {} PRICE: {} BAHT".format(st_title, st_price) print(text)
def free(code): url = 'https://www.settrade.com/C04_05_stock_majorshareholder_p1.jsp?txtSymbol={}&ssoPageId=14&selectPage=5'.format(code) webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') freefloat = data.findAll('div',{'class':'row separate-content'}) freefloat = freefloat[0].text freefloat = freefloat.replace('\n','') freefloat = freefloat.replace('\r','') freefloat = freefloat[-6:] freefloat = freefloat.replace('%','') return [freefloat]
def scrapingNAV(self): webopen = req(self.url) page_html = webopen.read() webopen.close() #print(page_htnl) data = soup(page_html, 'html.parser') nav = data.find(id="ctl00_ContentPlaceHolder1_lblNAV").text fundCode = data.find(id="ctl00_ContentPlaceHolder1_lblFundCode").text lastUpdateDate = data.find( id="ctl00_ContentPlaceHolder1_lblLastUpdateDate").text return fundCode, nav, lastUpdateDate
def findFirst(): url = request.values.get('url') # url = 'https://stratechery.com/feed' headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'} toSend = req(url=url, headers=headers) xml = urlopen(toSend).read() soup = BeautifulSoup(xml, 'xml') links = soup.find_all('guid') # print(links) firstArticle = returnFirst(links) html = firstArticle.html r = Response(html, status=200) return r
def main(): questions = {} urls = get_urls() for url in urls: try: client = req(url) except: continue expertise_page = client.read() expertise_page_html = bs(expertise_page, "html.parser") question_list, expertise = parse_expertise(expertise_page_html, url) questions[expertise] = question_list #print(questions) with open(r'questions.yaml', 'w', encoding="utf-8") as file: documents = yaml.dump(questions, file, allow_unicode=True)
def mobile_cost(brand): print(brand) model_list = [] price_list = [] ratings_list = [] for i in range(1, 10): my_url = 'https://www.flipkart.com/search?q=' + brand + '%20mobiles&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=' + str( i) uclient = req(my_url) page_html = uclient.read() page_soup = soup(page_html, 'html.parser') uclient.close() containers = page_soup.findAll("div", {"class", "_1-2Iqu row"}) for container in containers: model_container = container.findAll("div", {"class", "_3wU53n"}) model = model_container[0].text.strip() model = model.replace(",", "-") model_list.append(model) price_container = container.findAll("div", {"class", "_1vC4OE _2rQ-NK"}) price = price_container[0].text.strip() #price = model.replace(" ","|" ) price = price.replace(",", "") price = price.replace("₹", "INR") price_list.append(price) ratings_container = container.findAll("div", {"class", "hGSR34"}) if len(ratings_container) > 0: ratings = ratings_container[0].text.strip() ratings_list.append(ratings) else: ratings = 'NA' ratings_list.append(ratings) #ratings = ratings_container[0].text.strip() #ratings = ratings.replace(" ","|" ) print((model + "," + price + "," + ratings + "\n")) #f.append("\n"+model+","+price+","+ratings) #f.write("\n") df1 = pd.DataFrame(model_list, columns=['model_list']) df1['price_list'] = price_list df1['ratings_list'] = ratings_list df1.to_csv(brand + '_Mobiles.csv', index=False)
def checkprice(name = 'PTT'): url = 'https://www.settrade.com/C04_02_stock_historical_p1.jsp?txtSymbol='+name+'&ssoPageId=10&selectPage=2' webopen = req(url) page_html = webopen.read() webopen.close() data = soup(page_html, 'html.parser') price = data.findAll('div', {'class':'col-xs-6'}) stockname = price[0].text stockprice = price[2].text print('-------------------') print(f"Stock Name: {stockname}") print(f"Price: {stockprice} Baht") print('-------------------')
from urllib.request import Request as req my_url='http://fmovies.cloud/movie/filter/movie/latest/1-2-120-125-7-25-126-119-112-122-6-121-10-118-123-3-23-22/all/all/all/all/3/' file_name='fmovies_data.csv' f=open(file_name,'w+') header='MOVIE_NAME,QUALITY\n' f.write(header) headers={} headers['User-Agent']='Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17' access_request=req(my_url,headers=headers) client=open_url(access_request) page_html=client.read() client.close() page_soup=soup(page_html,'html.parser') containers=page_soup.findAll('div' , {'class':'ml-item'}) for container in containers: moviename_container=container.findAll('span' , {'class':'mli-info'}) movie_name=moviename_container[0].text.strip() quality_container=container.findAll('span',{'class':'mli-quality'})