def iterateOverDataDivs(driver): elements = findElementsByXPath(driver, xpathDict['data_div']) length = len(elements) while (True): while (length != 0): #extract data try: dataDict = dict() elements[(length - 1)].click() sleep(2) dataDict['Name'] = extractTextFromElement( driver, xpathDict['shop_name']).replace('\nUnclaimed', '').replace( '\nClaimed', '') website = extractAttrFromElement(driver, xpathDict['shop_website'], "value") if (website == None): website = "N/A" dataDict['Website'] = website dataDict['Full_Address'] = extractTextFromElement( driver, xpathDict['shop_address']) dataDict['Rating'] = extractAttrFromElement( driver, xpathDict['shop_rating'], 'title') dataDict['Phone'] = extractTextFromElement( driver, xpathDict['shop_number']) #dataDict['Email']=extractAttrFromElement(driver,xpathDict['shop_email'],'href') dataDict['Reviews'] = extractTextFromElement( driver, xpathDict['shop_review']) dataDict['Hours'] = extractHours(driver) dataDict['City'] = ct db.writeToDB(dataDict, tableName) #column_names = list(dataDict.keys()) #print(dataDict) driver.back() sleep(2) except Exception as e: print('Exception occured-------------------->>', e) finally: elements = findElementsByXPath(driver, xpathDict['data_div']) length -= 1 nextButton = findElementByXPath(driver, xpathDict['next_button']) if (nextButton is None): print("No more elements") break else: nextButton.click() sleep(3) elements = findElementsByXPath(driver, xpathDict['data_div']) length = len(elements) return ""
def scrapePage(s, url): print(url) resp = s.get(url, proxies=proxies) sleep(random.uniform(0.9, 5.1)) tree = lh.fromstring(resp.text) dataDict = dict() name = removeExtraText( getTextFromElement(tree.xpath(xpathDict['shop_name']))) dataDict['Name'] = name dataDict['Website'] = getAttrFromElement( tree.xpath(xpathDict['shop_website']), 'href') dataDict['Phone'] = getTextFromElement(tree.xpath( xpathDict['shop_number'])) dataDict['Full_Address'] = getTextFromElement( tree.xpath(xpathDict['shop_address'])) dataDict['Reviews'] = getTextFromElement( tree.xpath(xpathDict['shop_review'])) dataDict['Rating'] = getTextFromElement( tree.xpath(xpathDict['shop_rating'])) db.writeToDB(dataDict, "texas")
def scrapePage(s, url): if url not in aList: global tableName resp = s.get(url) sleep(random.uniform(0.9, 5.1)) tree = lh.fromstring(resp.text) dataDict = dict() name = removeExtraText( getTextFromElement(tree.xpath(xpathDict['shop_name']))) dataDict['Name'] = name dataDict['Website'] = getAttrFromElement( tree.xpath(xpathDict['shop_website']), 'href') dataDict['Email'] = getAttrFromElement( tree.xpath(xpathDict['shop_email']), 'href').replace("mailto:", '') dataDict['Phone'] = getTextFromElement( tree.xpath(xpathDict['shop_number'])) dataDict['Full_Address'] = getTextFromElement( tree.xpath(xpathDict['shop_address'])) #dataDict['Reviews']=getTextFromElement(tree.xpath(xpathDict['shop_review'])) #dataDict['Rating']=getTextFromElement(tree.xpath(xpathDict['shop_rating'])) db.writeToDB(dataDict, tableName) aList.append(url)
def writeToDb(d): try: db.writeToDB(d, tableName) except: print("Error saving")
def writeToDb(): for d in dataArray: try: db.writeToDB(d, tableName) except: print("Error saving")