driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver')

    # iteratively load website / raw table data
    for offset in range(OFFSET_RANGE[0], OFFSET_RANGE[1], 100):

        driver.get(HLTV_BASE_URL + str(offset))
        driver.execute_script(
            "window.scrollTo(0, document.body.scrollHeight);")

        # transcribe raw html to condensed tabular data
        headers = driver.find_elements_by_class_name('standard-headline')
        header_text = [header.text for header in headers]
        match_time = calc_average_header_date(header_text)
        table = driver.find_elements_by_class_name('result')
        result_text = [row.text for row in table]
        match_data = transcribe_table_data(result_text, match_time)
        logger.info('Finished processing of %s rows for an offset of %s.',
                    len(table), offset)

        # insert to db
        if ENVIRONMENT == 'PRODUCTION' and len(table) > 0:
            logger.info('Upserting %s rows into database.', len(table))
            postgres_db_upsert(match_data, DB_CREDENTIALS)
        elif len(table) == 0:
            logger.warning('HLTV data scrape produced 0 data points.')
        else:
            logger.info('Produced data: %s', table)

        # sleep to not spam website
        time.sleep(random.uniform(1, 3))
Esempio n. 2
0
if __name__ == '__main__':

	logger.info('Starting scrape job for rivalry table data.')

	# initialize headless selenium webdriver
	chrome_options = webdriver.ChromeOptions()
	chrome_options.add_argument('--headless')
	chrome_options.add_argument('--no-sandbox')
	chrome_options.add_argument('--disable-dev-shm-usage')
	driver = webdriver.Chrome(chrome_options=chrome_options)

	# load website / raw table data
	driver.get(RIVALRY_URL)
	time.sleep(5)  # give webpage time to load table
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")  # scroll down to load dynamic content
	time.sleep(1)
	table = driver.find_element_by_id('__nuxt')
	table = table.text.split('\n')
	table = transcribe_table_data(table)

	logger.info('Finished processing of %s rows.', len(table))

	# insert to db
	if ENVIRONMENT == 'PRODUCTION' and len(table) > 0:
		logger.info('Inserting %s rows into database.', len(table))
		postgres_db_insert(table, DB_CREDENTIALS)
	elif len(table) == 0:
		logger.warning('EGB data scrape produced 0 data points.')
	else:
		logger.info('Produced data: %s', table)
Esempio n. 3
0
    # load website
    driver.get(GGBET_URL)
    html = driver.page_source
    time.sleep(5)  # give webpage time to load table
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);"
                          )  # scroll down to load dynamic content
    time.sleep(1)

    # transcribe data table
    table = driver.find_element_by_id('betting__container').text
    soup = BeautifulSoup(table, 'html.parser')
    table_text = remove_header(soup.text)
    table_text = insert_row_breaks(table_text)
    table_rows = table_text.split('_ROW_BREAK_')
    formatted_data = transcribe_table_data(table_rows)[1:]
    if len(formatted_data) == 1:
        logger.info('Finished processing %s row', len(formatted_data))
    else:
        logger.info('Finished processing %s rows', len(formatted_data))

    # insert to db
    if ENVIRONMENT == "PRODUCTION":
        if len(formatted_data) > 0:
            logger.info('Inserting %s rows into database', len(formatted_data))
            postgres_db_insert(formatted_data, DB_CREDENTIALS)
        else:
            logger.warning('GGBET data scrape produced 0 data points')
    elif ENVIRONMENT == "DEVELOPMENT":
        logger.info('Produced data: %s', table)
    else: