def scrape_users(self, usernames: list): users = [] for username in usernames: profile = Profile(username) profile.scrape(headers=self.headers, webdriver=self.driver) users.append(profile) return users
def insta(): mydict = request.form insta_username = str(mydict['name']) google = Profile(insta_username) google.scrape(keys=['profile_pic_url_hd']) print(google['profile_pic_url_hd']) image_link = google['profile_pic_url_hd'] # render on the template return render_template('index.html', image_link=image_link)
def test_from_username(profile): expected_profile_username = '******' result_profile: Profile = Profile.from_username(username=expected_profile_username) assert result_profile.name == profile.data_points[0].username assert result_profile.url == profile.url
def test_from_username(): expected_profile_usename = "instagram" expected_profile_url = f"https://www.instagram.com/{expected_profile_usename}/" result_profile: Profile = Profile.from_username( username=expected_profile_usename) assert result_profile.name == expected_profile_usename assert result_profile.url == expected_profile_url
# See this tutorial to find your sessionid: # http://valvepress.com/how-to-get-instagram-session-cookie/ from selenium.webdriver import Chrome from instascrape import Profile, scrape_posts # Creating our webdriver webdriver = Chrome("path/to/chromedriver.exe") # Scraping Joe Biden's profile SESSIONID = 'ENTER_YOUR_SESSION_ID_HERE' headers = {"user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57", "cookie": f"sessionid={SESSIONID};"} joe = Profile("joebiden") joe.scrape(headers=headers) # Scraping the posts posts = joe.get_posts(webdriver=webdriver, login_first=True) scraped, unscraped = scrape_posts(posts, silent=False, headers=headers, pause=10)
def test_from_soup(self, get_request, page_instance): profile_html = get_request.text profile_soup = BeautifulSoup(profile_html, features='lxml') profile_obj = Profile(profile_soup) profile_obj.scrape() assert profile_obj.followers == page_instance.followers
def test_from_html(self, get_request, page_instance): profile_html = get_request.text profile_obj = Profile(profile_html) profile_obj.scrape() assert profile_obj.followers == page_instance.followers
def page_instance(self, url, headers): random_profile = Profile(url) random_profile.scrape(headers=headers) return random_profile
from instascrape import Profile user_name = Profile.from_username('pycoders') user_name.load() recent = user_name.get_recent_posts() profile_photos = [post for post in recent if not post.is_video] for post in profile_photos: img = post.upload_date.strftime("%Y-%m-%d %Hh%Mm") post.download(f"{img}.png")
def page_instance(self): profile_url = "https://www.instagram.com/chris_greening/" profile_obj = Profile(profile_url) profile_obj.load() return profile_obj
def profile() -> Profile: profile_url = "https://www.instagram.com/chris_greening/" profile_obj = Profile(profile_url) profile_obj.static_load() return profile_obj
import json # import needed libraries / pip3 install json from instascrape import Profile # pip3 install insta-scrape import pandas as pd # pip3 install pandas google = Profile.from_username("google") # declare profile google.load() # scrape profile google_data = google.to_dict() # turn scraped data into python dictionary google_data = {key: [val] for key, val in google_data.items()} df = pd.DataFrame(google_data).transpose() df.to_csv( "google.csv", encoding="utf-8") # convert dataframe to csv and write to "google.csv"
from selenium.webdriver import Chrome from selenium import webdriver from instascrape import Profile, scrape_posts, Post username = '******' #browser = webdriver.Chrome('C:/Users/Tro/Downloads/chromedriver_win32/chromedriver.exe') webdriver = Chrome( "C:/Users/Tro/Downloads/chromedriver_win32/chromedriver.exe") headers = { "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57", "cookie": "sessionid=YOUR_SESSION_ID" } pr = Profile(username) pr.scrape(headers=headers) #posts = pr.get_recent_posts() posts = pr.get_posts(webdriver=webdriver, login_first=True) scraped_posts, unscraped_posts = scrape_posts(posts, headers=headers, pause=0.5, silent=False) for post in scraped_posts: fname = post.upload_date.strftime("%Y-%m-%d %Hh%Mm") post.download(f"{fname}.png")
import pandas as pd #for dictionaries and visual tables from selenium.webdriver import Chrome #for webdriver from instascrape import Profile, scrape_posts import matplotlib.pyplot as plt #for plots #get session id from (inspect page source, application, session id) and paste in sessionid='' headers = { "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57", "cookie": "sessionid=sessionid" } address = input('Enter Profile Name: ') #input target instagram account name webdriver = Chrome( "PATH/chromedriver.exe") #guide selenium to chromedriver's path profile = Profile(address) #use Profile() from instascrape profile.scrape(headers=headers) #use session id posts = profile.get_posts(webdriver=webdriver, login_first=False) #pops up intagram browser #If pause is set to a lowe value, instagram might redirect you. scraped_posts, unscraped_posts = scrape_posts(posts, headers=headers, pause=10, silent=False) posts_data = [post.to_dict() for post in scraped_posts ] #or recent_posts// make a dict out of it posts_df = pd.DataFrame(posts_data) #Create a pandas DataFrame posts_df.to_csv(address + '.csv', index=False) #write data to csv file print(posts_df[['upload_date', 'comments']])
import parser_libraries from selenium import webdriver from selenium.webdriver import Chrome from instascrape import Profile, scrape_posts, Post, Hashtag import time headers = {"user-agent": "Chrome/87.0.4389.23", "cookie": "sessionid= "} f = open('input_data.txt', 'r') posts_codes = [] for line in f: line = line.split(':')[1] line = line.split('-') if (len(line) == 4): line = line[0].strip(' ') else: line = line[0].strip(' ') + '-' + line[1].strip(' ') posts_codes.append(line) print(posts_codes) nafnaf = Profile('nafnafcol') nafnaf.scrape(headers=headers) for i in range(len(posts_codes)): nafnaf_post = Post(posts_codes[i]) nafnaf_post.scrape() print('----------------------') print(nafnaf_post.to_dict()['caption']) time.sleep(10)
# URL INSTAGRAM WEB SCRAPER FOR NAFNAF import selenium import parser_libraries from selenium import webdriver from selenium.webdriver import Chrome from instascrape import Profile, scrape_posts path = "C:/Users/Moni/Downloads/chromedriver.exe" driver = webdriver.Chrome(path) driver.get("https://www.instagram.com/") webdriver = Chrome("C:/Users/Moni/Downloads/chromedriver.exe") headers = {"user-agent": "Chrome/87.0.4389.23", "cookie": "sessionid= "} nafnaf = Profile("nafnafcol") nafnaf.scrape(headers=headers) posts = nafnaf.get_posts(webdriver=webdriver, login_first=True, amount=20) scraped_posts, unscraped_posts = scrape_posts(posts, headers=headers, pause=5, silent=False)
def page_instance(self, url): random_profile = Profile(url) random_profile.load() return random_profile
def test_from_username(self, page_instance): expected_profile_username = "******" result_profile: Profile = Profile.from_username( username=expected_profile_username) assert result_profile.url == page_instance.url
import json # import needed libraries / pip3 install json from instascrape import Profile # pip3 install insta-scrape import pandas as pd # pip3 install pandas google = Profile.from_username('google') # declare profile google.static_load() # scrape profile google_data = google.data_points[0].to_dict() # turn scraped data into python dictionary google_data = {key: [val] for key, val in google_data.items()} df = pd.DataFrame(google_data) df.to_csv('google.csv', encoding='utf-8', index=False) # convert dataframe to csv and write to "google.csv"