def likes(): hashtag_list = open("hashtaglist.txt").readlines() tag = -1 goalLikes = int( input( '\033[0;33mHow many likes should we do in each hashtag?:\033[m ')) for hashtag in hashtag_list: currentLikes = 0 tag = tag + 1 print('\033[0;33mLiking the hashtag:\033[m ' + hashtag_list[tag]) webdriver.get('https://www.instagram.com/explore/tags/' + hashtag_list[tag] + '/') image_img = webdriver.find_element_by_xpath( '/html/body/div[1]/section/main/article/div[2]/div/div[1]/div[1]') sleep(1) image_img.click() sleep(1) try: while (currentLikes != goalLikes): sleep(3) image_like_svg = webdriver.find_element_by_css_selector( 'body > div._2dDPU.CkGkG > div.zZYga > div > article > div.eo2As > section.ltpMr.Slqrh > span.fr66n > button > div > span > svg' ) image_like_label = image_like_svg.get_attribute("aria-label") if image_like_label == "Like": image_like_svg.click() currentLikes += 1 print('Liked images: \033[0;33m{}\033[m'.format( currentLikes)) print("Looking for image...") sleep(randint(3, 7)) image_next = webdriver.find_element_by_class_name( 'coreSpriteRightPaginationArrow') image_next.click() elif image_like_label == "Unlike": print('\033[0;33mImage already liked.\033[m') image_next = webdriver.find_element_by_class_name( 'coreSpriteRightPaginationArrow') image_next.click() sleep(randint(1, 4)) else: sleep(5) image_next = webdriver.find_element_by_class_name( 'coreSpriteRightPaginationArrow') image_next.click() sleep(1) except: print( 'Oops, Instagram is having trouble in this tag, lets go to the next one. ' ) image_next = webdriver.find_element_by_class_name( 'coreSpriteRightPaginationArrow') image_next.click() continue print("\033[0;33mFinished liking hashtag:\033[m " + hashtag_list[tag]) sleep(1) print("\033[0;33mFinished liking all hashtags in the hashtag list.\033[m") goHome() sleep(2) menu()
def code_2fa_recieve(self, webdriver, code: str): wait_for_element = WebDriverWait(webdriver, TIMEOUT).until( EC.element_to_be_clickable( (By.CLASS_NAME, 'input challengeEmailCode'))) webdriver.find_element_by_class_name( 'input challengeEmailCode').click() webdriver.find_element_by_class_name( 'input challengeEmailCode').send_keys(code + Keys.RETURN) self.status = 'Epicgames 2fa activated'
def _send_all_keys_to_forms(webdriver, form_html_classes, keys): for key, form_html_class in zip(keys, form_html_classes): print(key, form_html_class) form = webdriver.find_element_by_class_name(form_html_class) form.send_keys(key) triggering_form = webdriver.find_element_by_class_name('query') triggering_form.submit() return None
def downloadGameResults(webdriver, partidos): print('Games to download: {}'.format(len(partidos))) cont = 1 total = len(partidos) # we loop through the gathered download links for partido in partidos: print('Downloading Game result {}/{} {}'.format(str(cont), str(total), partido.titulo)) try: cont += 1 webdriver.get(partido.link) time.sleep(2) # if not ('P.Pospuesto' in partido.titulo): download = webdriver.find_element_by_class_name( 's_pdf_download_link') partido.download_link = download.get_attribute('href') webdriver.get(download.get_attribute('href')) partido.downloaded = True except: #partido.download_link = '' partido.downloaded = False print('Encountered an issue downloading results for {}'.format( partido.titulo)) # traceback.print_exc() return partidos
def getVideoURL(): elem = webdriver.find_element_by_class_name("_97aPb") source_code = elem.get_attribute("outerHTML") soup = BeautifulSoup(source_code, features="html.parser") for link in soup.find_all('video'): if link.get('src') not in urls: urls.append(link.get('src'))
def countNumberOfPost(): try: print("Counting Number of Post") return (int(webdriver.find_element_by_class_name('g47SY').text)) except: print("Some Error Occured") return (0)
def scrape_page(webdriver, links, username): '''This function will go to all links provided and scrape each picture for the number of likes and the caption. If the link is a video no information is recorded. The function will only save the caption if the title is the identified user Parameters: the active webdriver, a set of picture links, the username of the page your are scraping Returns: a list of lists with the number of likes and caption ''' picture_info = [] for link in links: # Open new tab webdriver.execute_script("window.open('');") time.sleep(3) # Switch to the new window webdriver.switch_to.window(webdriver.window_handles[1]) webdriver.get(link) time.sleep(5) try: likes_list = webdriver.find_elements_by_class_name('zV_Nj') if len(likes_list) != 0: #If the length is 0, then it is a video if len(likes_list) == 1: #No common friends liked the photo num_likes = webdriver.find_elements_by_class_name('Nm9Fw')[0].text.split(' ')[0] else: num_likes = int(likes_list[1].text.split(' ')[0]) + 1 try: title = webdriver.find_element_by_class_name('_6lAjh').text if title == username: caption_list = webdriver.find_elements_by_xpath("//div[@class='C4VMK']//span") '''This code works but not active since I did not use the information num_of_comments = len(caption_list)''' caption = caption_list[0].text else: caption = None #if the user was not the title except: caption = None #photo does not have a caption or any comments picture_info.append([num_likes, caption]) except: pass webdriver.close() # Switch focus back to main tab webdriver.switch_to.window(webdriver.window_handles[0]) time.sleep(5) return picture_info
def like(link): webdriver.switch_to.window(webdriver.window_handles[0]) webdriver.get(link) sleep(2) like = webdriver.find_element_by_class_name('fr66n') print(like) like.click() sleep(2)
def extract_days_to_expiration(webdriver): dt_expiration = webdriver.find_element_by_class_name( name='bc-options-toolbar__second-row') a = dt_expiration.find_element_by_xpath( "//strong[contains(text(), 'Days')]") out = re.match(string=a.text, pattern='^([0-9]+)') return out.group(1)
def get_signal(ticker, interval): #Declare variable analysis = [] #Open tradingview's site webdriver.get( "https://s.tradingview.com/embed-widget/technical-analysis/?locale=en#%7B%22interval%22%3A%22{}%22%2C%22width%22%3A%22100%25%22%2C%22isTransparent%22%3Afalse%2C%22height%22%3A%22100%25%22%2C%22symbol%22%3A%22{}%22%2C%22showIntervalTabs%22%3Atrue%2C%22colorTheme%22%3A%22dark%22%2C%22utm_medium%22%3A%22widget_new%22%2C%22utm_campaign%22%3A%22technical-analysis%22%7D" .format(interval, ticker)) webdriver.refresh() #Wait for site to load elements while len( webdriver.find_elements_by_class_name( "speedometerSignal-pyzN--tL")) == 0: sleep(0.1) #Recommendation recommendation_element = webdriver.find_element_by_class_name( "speedometerSignal-pyzN--tL") analysis.append(recommendation_element.get_attribute('innerHTML')) #Counters counter_elements = webdriver.find_elements_by_class_name( "counterNumber-3l14ys0C") #Sell analysis.append(int(counter_elements[0].get_attribute('innerHTML'))) #Neutral analysis.append(int(counter_elements[1].get_attribute('innerHTML'))) #Buy analysis.append(int(counter_elements[2].get_attribute('innerHTML'))) last_analysis = analysis signal = last_analysis[0] num_sell = last_analysis[1] num_neutral = last_analysis[2] num_buy = last_analysis[3] line = '-' * 50 ticker = ticker.strip('"') interval = interval.strip('"') line = line.strip('"') signal = signal.strip('"') ticker = json.dumps(ticker) interval = json.dumps(interval) signal = json.dumps(signal) num_sell = json.dumps(num_sell) num_neutral = json.dumps(num_neutral) num_buy = json.dumps(num_buy) line = json.dumps(line) value = f'TradingView Data for {ticker} for {interval}: ' + '<br/>' + line + '<br/>' + f'Overall Signal: {signal}' + '<br/>' + f'Number of Sell Indicators: {num_sell}' + '<br/>' + f'Number of Neutral Indicators: {num_neutral}' + '<br/>' + f'Number of Buy Indicators: {num_buy}' return value
def find_patch_commit_parent(wd, patch_commit_id, cms_url): time.sleep(random.randint(SLEEP_TIME_MIN, SLEEP_TIME_MAX)) wd.get("{}/commit/{}".format(cms_url, patch_commit_id)) try: href = wd.find_element_by_class_name('sha').get_attribute('href') href = href.split('/')[-1] except: href = "Not Found !" print(href) return href
def form_data(webdriver): input1 = webdriver.find_element_by_tag_name("input") input1.send_keys("Ivan") input2 = webdriver.find_element_by_name("last_name") input2.send_keys("Petrov") input3 = webdriver.find_element_by_class_name("city") input3.send_keys("Smolensk") input4 = webdriver.find_element_by_id("country") input4.send_keys("Russia") button = webdriver.find_element_by_xpath(xpath) button.click() return
def initNJUJw(user, pwd): userInput = webdriver.find_element_by_name('userName') pwdInput = webdriver.find_element_by_name('password') userInput.send_keys(user) pwdInput.send_keys(pwd) sub = webdriver.find_element_by_class_name('Btn') sub.click() check = 'UserInfo' try: webdriver.find_element_by_id(check) except Exception, e: raise e return False
def Search(): SpeakText("Which Song?") #SELECTION OF SONG with sr.Microphone() as source2: print("Listening...") audio2 = r.listen(source2) MyText = r.recognize_google(audio2) MyText = MyText.lower() webdriver.get("https://www.youtube.com/results?search_query=" + MyText) sleep(2) x = webdriver.find_element_by_class_name( "style-scope ytd-video-renderer").click() sleep(2)
def register(driver): register_link = driver.find_element_by_link_text("Click to Register!") register_link.click() form_fields = driver.find_elements_by_class_name("form-control") username = form_fields[0] username.send_keys(TEST_USER) email = form_fields[1] email.send_keys("*****@*****.**") password = form_fields[2] password.send_keys(TEST_PW) repeat_password = form_fields[3] repeat_password.send_keys(TEST_PW) register = wd.find_element_by_class_name("btn") register.click()
def smart_wait(webdriver, method, element_path): for i in range(10): if i >= 9: print("timeout") break try: if (method == "xpath"): if webdriver.find_element_by_xpath(element_path): break if (method == "class_name"): if webdriver.find_element_by_class_name(element_path): break if (method == "tag_name"): if webdriver.find_element_by_tag_name(element_path): break except: print("wait for find element") time.sleep(1)
def openFirstPicFromThumbnail(): print("Opening First Thumbnail") sleep(2) try: openFirst = webdriver.find_element_by_class_name('v1Nh3') #openFirst=webdriver.find_element_by_xpath("/html/body/div[1]/section/main/div/div[3]/article/div[1]/div/div[1]/div[1]") hover = ActionChains(webdriver).move_to_element( openFirst).click().perform() openFirst.click sleep(3) except: tryAgain = input( "Cannot Open image: Would You like to try again PRESS 'Y' for YES 'N' for NO : " ) if (tryAgain == 'Y' or tryAgain == 'y' or tryAgain == 'YES' or tryAgain == 'yes'): openFirstPicFromThumbnail() else: exit()
def find(webdriver, by, css_selector_val): ''' Wrapper function of selenium python to find an elment using locator and locator_value(css_selector_val) Arguments --------- webdriver - object of selenium.webdriver.chrome.webdriver.WebDriver . by - element locator name . contraint: expected value:- id, name, xpath, link_text, partial_link_text, tag_name, class_name, css_selector other value than the expected will return None css_selector_val- value for the element locator i.e. arg 'by' example:- to find all elements with class_name=contact, value for css_selector_val is 'contact' Return --------- Webelement - if the value of arg 'by' is an expected value or None - if the value of arg 'by' is an unexpected value ''' if by == 'id': return webdriver.find_element_by_id(css_selector_val) if by == 'name': return webdriver.find_element_by_name(css_selector_val) if by == 'xpath': return webdriver.find_element_by_xpath(css_selector_val) if by == 'link_text': return webdriver.find_element_by_link_text(css_selector_val) if by == 'partial_link_text': return webdriver.find_element_by_partial_link_text(css_selector_val) if by == 'tag_name': return webdriver.find_element_by_tag_name(css_selector_val) if by == 'class_name': return webdriver.find_element_by_class_name(css_selector_val) if by == 'css_selector': return webdriver.find_element_by_css_selector(css_selector_val) else: return None
def searchVideoLength(webdriver, search_url, base_url, content_title): time.sleep(random.randint(3, 6)) try: webdriver.get(search_url) content_length_info = webdriver.find_element_by_class_name( 'txt-type01').text content_length = int(content_length_info.split()[2][:-1]) * 60 except Exception: webdriver.switch_to_alert().accept() """webdriver.get(base_url) button_list = webdriver.find_elements_by_tag_name('button') ActionChains(webdriver).move_to_element(button_list[0]).click(button_list[0]).perform() input_content_title = webdriver.find_element_by_id('search-ip') input_content_title.send_keys(content_title) input_content_title.send_keys(Keys.ENTER) best_search_result = webdriver.find_element_by_xpath("//span/strong/span") ActionChains(webdriver).move_to_element(best_search_result).click(best_search_result).perform() content_length_info = webdriver.find_element_by_class_name('txt-type01').text content_length = int(content_length_info.split()[2][:-1])*60""" return -1 return content_length
def retrieve_image(search_query, webdriver, dir_name, img_name): try: logger.log("image_scraping function start") image_name = '' # Variable that holds the number of images to fetch number_of_images_to_fetch = 1 index = 0 # Scroll down the webpage to load more images scroll_down(webdriver) time.sleep(5) # Save all of the html image elements from our google search # 'rg_i' is the class name that the images have image_elements = webdriver.find_elements_by_class_name('rg_i') target_dir = basest_dir + "/" + dir_name # Check if the directory that we want to put our iamges in already exists if not os.path.exists(target_dir): # If not, make that directory os.mkdir(target_dir) found_image_count = 0 attempt_count = 0 logger.log("begin finding images") for element in image_elements: attempt_count += 1 try: # Check if you've downloaded all the images you want if found_image_count == number_of_images_to_fetch: break # Click on the image you want to download element.click() # Give the browser some time to catch up time.sleep(2) # After clicking on the image, get the larger version found_image = webdriver.find_element_by_class_name('n3VNCb') # find the source of the image, it's url image_url = found_image.get_attribute('src') logger.log("attempt " + str(attempt_count) + ": " + image_url[0:10]) # Make sure that the image url is a valid source if 'http' in image_url: logger.log("successful image found") # Download this image as a BytesIO object image_file = io.BytesIO(requests.get(image_url).content) # Convert our BytesIO object into an actual image image = Image.open(image_file).convert('RGB') # Create the the name of this image we're downloaded image_name = img_name + '.jpg' logger.log(image_name) # Save the path that we want to save the image to # The directory will be the same name as the search query image_path = target_dir + '/' + image_name # Save the image image.save(image_path, 'JPEG', quality=85) found_image_count += 1 # endif statement # end try block except: logger.log("couldn't find enhanced images") # end except block # End for loop loop # close the web browser #webdriver.close() if attempt_count > 3: logger.log("multiple attempts: " + search_query + "<=======") else: logger.log(image_name) return image_name except: logger.log("retrieve image crash") webdriver.close()
#Declare variable analysis = [] #Open tradingview's site webdriver.get( "https://s.tradingview.com/embed-widget/technical-analysis/?locale=en#%7B%22interval%22%3A%22{}%22%2C%22width%22%3A%22100%25%22%2C%22isTransparent%22%3Afalse%2C%22height%22%3A%22100%25%22%2C%22symbol%22%3A%22{}%22%2C%22showIntervalTabs%22%3Atrue%2C%22colorTheme%22%3A%22dark%22%2C%22utm_medium%22%3A%22widget_new%22%2C%22utm_campaign%22%3A%22technical-analysis%22%7D" .format(interval, ticker)) webdriver.refresh() #Wait for site to load elements while len(webdriver.find_elements_by_class_name( "speedometerSignal-pyzN--tL")) == 0: sleep(0.1) #Recommendation recommendation_element = webdriver.find_element_by_class_name( "speedometerSignal-pyzN--tL") analysis.append(recommendation_element.get_attribute('innerHTML')) #Counters counter_elements = webdriver.find_elements_by_class_name( "counterNumber-3l14ys0C") #Sell analysis.append(int(counter_elements[0].get_attribute('innerHTML'))) #Neutral analysis.append(int(counter_elements[1].get_attribute('innerHTML'))) #Buy analysis.append(int(counter_elements[2].get_attribute('innerHTML')))
for index, row in nomadlist_data[73:100].iterrows(): driver.get(row.URL) driver.execute_script('window.scrollTo(0, 4000') with open(str("nomadlist" + str(row.Date) + '.txt'), "w", encoding="utf-8") as f: f.write(driver.page_source) element = driver.find_element_by_css_selector(".dynamic-item-75") driver.get('https://web.archive.org/web/20180718015001/https://nomadlist.com/') if element.is_displayed == False: driver.execute_script("arguments[1].scrollIntoView();", element) driver.execute_script('arguments[0].scrollIntoView();", element') webdriver.find_element_by_class_name('dynamic-item-75') driver.execute_script('scrollTo(0, 4000)') '''Below can be ignored for now''' # Trying to get all the city data is kind of a lot--might just focus on Tbilisi # nomadlist_city_data = pd.DataFrame(columns=["Date", "City", "Nomad Score", "Nomad Cost", "Internet Speed", "Air Quality", "Temperature", "Region"]) # nomadlist_city_data['Date'] = day_url_dict.keys() # nomadlist_city_data = nomadlist_city_data.set_index([nomadlist_city_data['Date']]) # # # for key in day_url_dict.keys(): # url = day_url_dict[key] # driver.get(url) # for city in # nomadlist_city_data.loc[str(key)].
chrome_options = Options() chrome_options.add_argument("--incognito") chrome_options.add_argument("--headless") webdriver = webdriver.Chrome(options=chrome_options, executable_path='/usr/local/bin/chromedriver') sleep(3) webdriver.get('https://www.instagram.com/accounts/login/?source=auth_switcher') sleep(5) username = webdriver.find_element_by_name('username') username.send_keys('limevilleofficial') password = webdriver.find_element_by_name('password') password.send_keys('Ic_ndoit2') button_login = webdriver.find_element_by_class_name("L3NKy") button_login.click() sleep(5) hashtag_list = [ 'quotesdaily', 'quotes', 'motivationalquotes', 'quotestoliveby', 'lovequotes', 'love', ] hashtag = choice(hashtag_list) data = pd.read_csv('insta.csv') prev_user_list = list(data['0']) new_followed = []
def nextMedia(): nextMedia = webdriver.find_element_by_class_name("coreSpriteRightChevron") nextMedia.click()
def nextPost(): nextPost = webdriver.find_element_by_class_name( "coreSpriteRightPaginationArrow") nextPost.click()
import pandas as pd from bs4 import BeautifulSoup import pandas as pd import requests as rq webdriver = webdriver.Chrome() sleep(2) webdriver.get('https://www.instagram.com/accounts/login/?source=auth_switcher') sleep(20) print("working") username = webdriver.find_element_by_name('username') username.send_keys('username') password = webdriver.find_element_by_name('password') password.send_keys('password') button_login = webdriver.find_element_by_class_name("y3zKF") button_login.click() sleep(10) notification = webdriver.find_element_by_class_name("HoLwm") notification.click() sleep(2) webdriver.get('https://www.instagram.com/virat.kohli/') sleep(10) soup = BeautifulSoup(webdriver.page_source, 'html.parser') # print(soup) allimages = soup.select('img') print(allimages) imglink = [] for img in allimages:
def checkElementExist(element_class): try: webdriver.find_element_by_class_name(element_class) except NoSuchElementException: return False return True
if len(passw.get_attribute( 'value')) == 6: #enter password from sms on your phone, olny 6 numbers env = webdriver.find_element_by_css_selector( '#react-root > section > main > div > article > div > div:nth-child(1) > div > form > div.Igw0E.IwRSH.eGOV_._4EzTm.MGdpg.CovQj.jKUp7.iHqQ7 > button' ) env.click() sleep(7) else: sleep(5) but = webdriver.find_element_by_css_selector( 'body > div.RnEpo.Yx5HN > div > div > div.mt3GC > button.aOOlW.HoLwm') but.click() sleep(2) search = webdriver.find_element_by_class_name('TqC_a') search.click() searc = webdriver.find_element_by_css_selector( '#react-root > section > nav > div._8MQSO.Cx7Bp > div > div > div.LWmhU._0aCwM > input' ) searc.send_keys('marizhelby') sleep(2) s = webdriver.find_element_by_class_name('Ap253') s.click() sleep(2) e = webdriver.find_element_by_css_selector( '#react-root > section > main > div > header > section > ul > li:nth-child(2) > a' )
chromedriver_path = 'C:/Users/tomas/PycharmProjects/Repo_Learning/Web_Scraping/chromedriver.exe' # Change this to your own chromedriver path! webdriver = webdriver.Chrome(executable_path=chromedriver_path) sleep(2) webdriver.get('https://www.instagram.com/accounts/login/?source=auth_switcher') sleep(3) username = webdriver.find_element_by_name('username') username.send_keys('*****@*****.**') password = webdriver.find_element_by_name('password') password.send_keys('Widzew01+') login_in_enter = webdriver.find_element_by_name("password").send_keys(Keys.RETURN) sleep(3) alert_no = webdriver.find_element_by_class_name("mt3GC") alert_no.click() hashtag_list = ['traveler', 'landscape', 'drone', 'dronephotos', 'dronephotography', 'goprophotography', 'traveldestination', 'photographer', 'vacation', 'instatravel', 'travelblogger', 'photooftheday', 'instapic', 'inspiration', 'instacool', 'happiness', 'blogger', 'travel', 'follow', 'me', 'art', 'style'] # hashtag_list = ['fashion','follow','me','art','style','family','amazing','bestoftheday','nice','landscape','photographer','vacation','instagramers','instatravel'] prev_user_list = [] # - if it's the first time you run it, use this line and comment the two below # prev_user_list = pd.read_csv('20181203-224633_users_followed_list.csv', delimiter=',').iloc[:, # 1:2] # useful to build a user log # prev_user_list = list(prev_user_list['0']) new_followed = []
# User Credentials username = webdriver.find_element_by_name('username') username.send_keys('enter_your_username') password = webdriver.find_element_by_name('password') password.send_keys('enter_your_password') # Click 'not now' buttons buttons = webdriver.find_elements_by_tag_name('button') buttons[2].click() sleep(3) buttons = webdriver.find_elements_by_tag_name('button') buttons[1].click() sleep(3) notnow = webdriver.find_element_by_class_name("aOOlW") notnow.click() sleep(3) # Desired Hashtags hashtag_list = ['travel', 'summer', 'design'] tag = -1 # Loop Through Hashtags for hashtag in hashtag_list: tag += 1 webdriver.get('https://www.instagram.com/explore/tags/' + hashtag_list[tag] + '/') sleep(5) first_thumbnail = webdriver.find_element_by_xpath( '//*[@id="react-root"]/section/main/article/div[1]/div/div/div[1]/div[1]/a/div'