예제 #1
0
def main():
    from selenium import webdriver

    global urlPages, outputFolder

    urlPages, settings = GUI('SportStats')

    outputFolder = settings['Path']

    lenurls = len(urlPages)
    if lenurls > 0:
        driver = webdriver.Chrome()

    for i, urlPage in enumerate(urlPages):
        urlInfo = 'URL: ' + urlPage
        initialTime = int(time.time())
        while True:
            try:
                PageScrapping(driver, urlInfo, urlPage)
                break
            except:
                if int(time.time()) - initialTime > 30:
                    GUIChangeError(urlInfo + '\n Runtime ALOK Error - 202')
                    driver.quit()
                    GUIKill()

    if lenurls > 0:
        driver.quit()

    GUIKill()
    return
예제 #2
0
def CollectInnerData(driver):
    import pandas as pd

    data = None
    innerXpath = "//div[@id='athlete-popup']"
    void = 0

    innerTableStyleOld = CollectTable(driver,
                                      innerXpath)[0].get_attribute('style')
    innerTableHTMLOld = CollectTable(driver,
                                     innerXpath)[0].get_attribute('innerHTML')
    wait_clickability_element(driver, "//tr[@role='row']//td[4]//a")
    namesHTML = driver.find_elements_by_xpath("//tr[@role='row']//td[4]//a")

    for i, nameHTML in enumerate(namesHTML):
        try:
            driver.execute_script("arguments[0].click();", nameHTML)
        except:
            if data is None:
                void += 1
            else:
                data = data.append(pd.Series(), ignore_index=True)
        else:
            initialTime = int(time.time())
            while True:
                try:
                    innerTableHTML = CollectTable(
                        driver, innerXpath)[0].get_attribute('innerHTML')
                    innerTableStyle = CollectTable(
                        driver, innerXpath)[0].get_attribute('style')
                except:
                    continue

                if (innerTableHTMLOld != innerTableHTML) or (
                        innerTableStyleOld != innerTableStyle):
                    innerTableStyleOld = innerTableStyle
                    innerTableHTMLOld = innerTableHTML
                    innerDataRow = TableToData(innerTableHTML)[0].set_index(
                        0).T
                    if data is None:
                        data = innerDataRow
                        if void != 0:
                            for _ in range(void):
                                data = data.append(pd.Series(),
                                                   ignore_index=True)
                    else:
                        data = data.append(innerDataRow)
                    break

                if int(time.time()) - initialTime > 100:
                    GUIChangeError('Runtime Error - 83')
                    driver.quit()
                    GUIKill()

    return data
예제 #3
0
def PageScrapping(driver, urlInfo, urlPage):
    driver = CollectData(driver, urlPage)
    data = None
    pageNumber = 1

    # Loop to identify where should the script go for another page

    while (len(driver.find_elements_by_xpath("//tr[@role='row']//td[4]//a")) >
           0):

        GUIChangeStatus(urlInfo + ' Page: ' + str(pageNumber))

        viewbtnHTML = driver.find_elements_by_xpath(
            "//tr[@role='row']//div[contains(@aria-expanded, 'true')]")
        for viewbtn in viewbtnHTML:
            driver.execute_script("arguments[0].click();", viewbtn)
            time.sleep(1)

        data = CollectContentPage(data, driver)
        firstLineHTMLOld = driver.find_elements_by_xpath(
            "//div[@class='ui-datatable-tablewrapper']")[0].get_attribute(
                'innerHTML')
        nxtbtnHTML = driver.find_elements_by_xpath(
            "//div[@id='mainForm:pageNav']//a[contains(@class, 'fa-angle-right')]"
        )
        if len(nxtbtnHTML) > 0:
            driver.execute_script("arguments[0].click();", nxtbtnHTML[0])
            pageNumber += 1
            initialTime = int(time.time())
            while True:
                try:
                    firstLineHTML = driver.find_elements_by_xpath(
                        "//div[@class='ui-datatable-tablewrapper']"
                    )[0].get_attribute('innerHTML')
                except:
                    continue
                if firstLineHTMLOld != firstLineHTML:
                    time.sleep(1)
                    break

                if int(time.time()) - initialTime > 10:
                    GUIChangeError('Runtime Error - 170')
                    driver.quit()
                    GUIKill()
        else:
            break

    data = ProcessData(data)

    CreateFile(urlPage, data, driver)
예제 #4
0
def CollectContent(data,driver):
    results = driver.find_elements_by_xpath("//div[contains(@class,'link-to-irp')]/div")
    
    if data is None:
        data = CollectHeader(driver)
    num_columns = len(data)
    
    for i,result in enumerate(results):
        try:
            data[i%num_columns].append(result.text)
        except:
            GUIChangeError('Runtime Error - 67')
                
    return data
예제 #5
0
def CreateFile(urlPage, data, driver):
    global outputFolder

    raceTitleHTML = driver.find_elements_by_xpath("//div[@id='main']//h1[1]")
    raceTitle = raceTitleHTML[0].text
    raceDateTypeHTML = driver.find_elements_by_xpath("//div[@id='main']//p[1]")

    try:
        [raceDate, raceType] = re.split(r"•", raceDateTypeHTML[0].text)
        fileString = raceTitle + "_" + raceDate + "_" + raceType
    except:
        fileString = raceTitle
        GUIChangeError("Procedure Error - 127")

    fileString = fileString.replace('/', '-')
    fileName = outputFolder + fileString + ".txt"

    heading = urlPage + '\n' + raceTitle + '\n' + raceDateTypeHTML[
        0].text + '\n\nline 1\nline 2\nline 3\nline 4\n'

    CreateFinalFile(fileName, data, heading)
예제 #6
0
def main():
    
    global urlPages,outputFolder
    
    urlPages,settings = GUI('AthLinks')
    
    outputFolder = settings['Path']
    
    lenurls = len(urlPages)
    if lenurls>0:
        chrome_options = webdriver.ChromeOptions()
        #chrome_options.add_argument("headless")
        #chrome_options.add_argument("--window-size=1920,1080")
        chrome_options.add_argument("--start-maximized")
        chrome_options.add_argument('--log-level=3')
        driver = webdriver.Chrome(options=chrome_options)
        driver.minimize_window()
    
    for i,urlPage in enumerate(urlPages):
        urlInfo = 'URL: '+urlPage
        initialTime = int(time.time())
        while True:
            try:
                PageScrapping(driver,urlInfo,urlPage)
                break
            except:
                if int(time.time())-initialTime>30:
                    GUIChangeError(urlInfo+'\n Runtime Error - 186')
                    driver.quit()
                    GUIKill()
        
    if lenurls>0:
        driver.quit()
  
    GUIKill()
    return