Esempio n. 1
0
def iterateOverDataDivs(driver):
    elements = findElementsByXPath(driver, xpathDict['data_div'])
    length = len(elements)
    while (True):
        while (length != 0):
            #extract data
            try:
                dataDict = dict()
                elements[(length - 1)].click()
                sleep(2)
                dataDict['Name'] = extractTextFromElement(
                    driver,
                    xpathDict['shop_name']).replace('\nUnclaimed', '').replace(
                        '\nClaimed', '')
                website = extractAttrFromElement(driver,
                                                 xpathDict['shop_website'],
                                                 "value")
                if (website == None):
                    website = "N/A"
                dataDict['Website'] = website
                dataDict['Full_Address'] = extractTextFromElement(
                    driver, xpathDict['shop_address'])
                dataDict['Rating'] = extractAttrFromElement(
                    driver, xpathDict['shop_rating'], 'title')
                dataDict['Phone'] = extractTextFromElement(
                    driver, xpathDict['shop_number'])
                #dataDict['Email']=extractAttrFromElement(driver,xpathDict['shop_email'],'href')
                dataDict['Reviews'] = extractTextFromElement(
                    driver, xpathDict['shop_review'])
                dataDict['Hours'] = extractHours(driver)
                dataDict['City'] = ct
                db.writeToDB(dataDict, tableName)
                #column_names = list(dataDict.keys())
                #print(dataDict)
                driver.back()
                sleep(2)

            except Exception as e:
                print('Exception occured-------------------->>', e)
            finally:
                elements = findElementsByXPath(driver, xpathDict['data_div'])
            length -= 1
        nextButton = findElementByXPath(driver, xpathDict['next_button'])
        if (nextButton is None):
            print("No more elements")
            break
        else:
            nextButton.click()
            sleep(3)
            elements = findElementsByXPath(driver, xpathDict['data_div'])
            length = len(elements)

    return ""
Esempio n. 2
0
def scrapePage(s, url):
    print(url)
    resp = s.get(url, proxies=proxies)
    sleep(random.uniform(0.9, 5.1))
    tree = lh.fromstring(resp.text)
    dataDict = dict()
    name = removeExtraText(
        getTextFromElement(tree.xpath(xpathDict['shop_name'])))
    dataDict['Name'] = name
    dataDict['Website'] = getAttrFromElement(
        tree.xpath(xpathDict['shop_website']), 'href')
    dataDict['Phone'] = getTextFromElement(tree.xpath(
        xpathDict['shop_number']))
    dataDict['Full_Address'] = getTextFromElement(
        tree.xpath(xpathDict['shop_address']))
    dataDict['Reviews'] = getTextFromElement(
        tree.xpath(xpathDict['shop_review']))
    dataDict['Rating'] = getTextFromElement(
        tree.xpath(xpathDict['shop_rating']))
    db.writeToDB(dataDict, "texas")
Esempio n. 3
0
def scrapePage(s, url):
    if url not in aList:
        global tableName
        resp = s.get(url)
        sleep(random.uniform(0.9, 5.1))
        tree = lh.fromstring(resp.text)
        dataDict = dict()
        name = removeExtraText(
            getTextFromElement(tree.xpath(xpathDict['shop_name'])))
        dataDict['Name'] = name
        dataDict['Website'] = getAttrFromElement(
            tree.xpath(xpathDict['shop_website']), 'href')
        dataDict['Email'] = getAttrFromElement(
            tree.xpath(xpathDict['shop_email']),
            'href').replace("mailto:", '')
        dataDict['Phone'] = getTextFromElement(
            tree.xpath(xpathDict['shop_number']))
        dataDict['Full_Address'] = getTextFromElement(
            tree.xpath(xpathDict['shop_address']))
        #dataDict['Reviews']=getTextFromElement(tree.xpath(xpathDict['shop_review']))
        #dataDict['Rating']=getTextFromElement(tree.xpath(xpathDict['shop_rating']))
        db.writeToDB(dataDict, tableName)
        aList.append(url)
Esempio n. 4
0
def writeToDb(d):
    try:
        db.writeToDB(d, tableName)
    except:
        print("Error saving")
Esempio n. 5
0
def writeToDb():
    for d in dataArray:
        try:
            db.writeToDB(d, tableName)
        except:
            print("Error saving")