Example #1
0
 def scrape_users(self, usernames: list):
     users = []
     for username in usernames:
         profile = Profile(username)
         profile.scrape(headers=self.headers, webdriver=self.driver)
         users.append(profile)
     return users
Example #2
0
def insta():
    mydict = request.form
    insta_username = str(mydict['name'])

    google = Profile(insta_username)
    google.scrape(keys=['profile_pic_url_hd'])
    print(google['profile_pic_url_hd'])
    image_link = google['profile_pic_url_hd']

    # render on the template
    return render_template('index.html', image_link=image_link)
def test_from_username(profile):

    expected_profile_username = '******'
    result_profile: Profile = Profile.from_username(username=expected_profile_username)

    assert result_profile.name == profile.data_points[0].username
    assert result_profile.url == profile.url
Example #4
0
def test_from_username():
    expected_profile_usename = "instagram"
    expected_profile_url = f"https://www.instagram.com/{expected_profile_usename}/"

    result_profile: Profile = Profile.from_username(
        username=expected_profile_usename)

    assert result_profile.name == expected_profile_usename
    assert result_profile.url == expected_profile_url
Example #5
0
# See this tutorial to find your sessionid:
# http://valvepress.com/how-to-get-instagram-session-cookie/

from selenium.webdriver import Chrome
from instascrape import Profile, scrape_posts

# Creating our webdriver
webdriver = Chrome("path/to/chromedriver.exe")

# Scraping Joe Biden's profile
SESSIONID = 'ENTER_YOUR_SESSION_ID_HERE'
headers = {"user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57",
           "cookie": f"sessionid={SESSIONID};"}
joe = Profile("joebiden")
joe.scrape(headers=headers)

# Scraping the posts
posts = joe.get_posts(webdriver=webdriver, login_first=True)
scraped, unscraped = scrape_posts(posts, silent=False, headers=headers, pause=10)
Example #6
0
 def test_from_soup(self, get_request, page_instance):
     profile_html = get_request.text
     profile_soup = BeautifulSoup(profile_html, features='lxml')
     profile_obj = Profile(profile_soup)
     profile_obj.scrape()
     assert profile_obj.followers == page_instance.followers
Example #7
0
 def test_from_html(self, get_request, page_instance):
     profile_html = get_request.text
     profile_obj = Profile(profile_html)
     profile_obj.scrape()
     assert profile_obj.followers == page_instance.followers
Example #8
0
 def page_instance(self, url, headers):
     random_profile = Profile(url)
     random_profile.scrape(headers=headers)
     return random_profile
Example #9
0
from instascrape import Profile

user_name = Profile.from_username('pycoders')
user_name.load()

recent = user_name.get_recent_posts()
profile_photos = [post for post in recent if not post.is_video]

for post in profile_photos:
    img = post.upload_date.strftime("%Y-%m-%d %Hh%Mm")
    post.download(f"{img}.png")
Example #10
0
 def page_instance(self):
     profile_url = "https://www.instagram.com/chris_greening/"
     profile_obj = Profile(profile_url)
     profile_obj.load()
     return profile_obj
Example #11
0
def profile() -> Profile:
    profile_url = "https://www.instagram.com/chris_greening/"
    profile_obj = Profile(profile_url)
    profile_obj.static_load()
    return profile_obj
Example #12
0
import json  # import needed libraries / pip3 install json

from instascrape import Profile  # pip3 install insta-scrape

import pandas as pd  # pip3 install pandas

google = Profile.from_username("google")  # declare profile

google.load()  # scrape profile

google_data = google.to_dict()  # turn scraped data into python dictionary

google_data = {key: [val] for key, val in google_data.items()}
df = pd.DataFrame(google_data).transpose()

df.to_csv(
    "google.csv",
    encoding="utf-8")  # convert dataframe to csv and write to "google.csv"
Example #13
0
from selenium.webdriver import Chrome
from selenium import webdriver
from instascrape import Profile, scrape_posts, Post

username = '******'
#browser = webdriver.Chrome('C:/Users/Tro/Downloads/chromedriver_win32/chromedriver.exe')
webdriver = Chrome(
    "C:/Users/Tro/Downloads/chromedriver_win32/chromedriver.exe")

headers = {
    "user-agent":
    "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57",
    "cookie": "sessionid=YOUR_SESSION_ID"
}
pr = Profile(username)
pr.scrape(headers=headers)
#posts = pr.get_recent_posts()
posts = pr.get_posts(webdriver=webdriver, login_first=True)
scraped_posts, unscraped_posts = scrape_posts(posts,
                                              headers=headers,
                                              pause=0.5,
                                              silent=False)

for post in scraped_posts:
    fname = post.upload_date.strftime("%Y-%m-%d %Hh%Mm")
    post.download(f"{fname}.png")
Example #14
0
import pandas as pd  #for dictionaries and visual tables
from selenium.webdriver import Chrome  #for webdriver
from instascrape import Profile, scrape_posts
import matplotlib.pyplot as plt  #for plots

#get session id from (inspect page source, application, session id) and paste in sessionid=''
headers = {
    "user-agent":
    "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.57",
    "cookie": "sessionid=sessionid"
}

address = input('Enter Profile Name: ')  #input target instagram account name
webdriver = Chrome(
    "PATH/chromedriver.exe")  #guide selenium to chromedriver's path
profile = Profile(address)  #use Profile() from instascrape
profile.scrape(headers=headers)  #use session id
posts = profile.get_posts(webdriver=webdriver,
                          login_first=False)  #pops up intagram browser

#If pause is set to a lowe value, instagram might redirect you.
scraped_posts, unscraped_posts = scrape_posts(posts,
                                              headers=headers,
                                              pause=10,
                                              silent=False)

posts_data = [post.to_dict() for post in scraped_posts
              ]  #or recent_posts// make a dict out of it
posts_df = pd.DataFrame(posts_data)  #Create a pandas DataFrame
posts_df.to_csv(address + '.csv', index=False)  #write data to csv file
print(posts_df[['upload_date', 'comments']])
Example #15
0
import parser_libraries
from selenium import webdriver
from selenium.webdriver import Chrome
from instascrape import Profile, scrape_posts, Post, Hashtag
import time

headers = {"user-agent": "Chrome/87.0.4389.23", "cookie": "sessionid= "}

f = open('input_data.txt', 'r')
posts_codes = []

for line in f:
    line = line.split(':')[1]
    line = line.split('-')
    if (len(line) == 4):
        line = line[0].strip(' ')
    else:
        line = line[0].strip(' ') + '-' + line[1].strip(' ')
    posts_codes.append(line)

print(posts_codes)
nafnaf = Profile('nafnafcol')
nafnaf.scrape(headers=headers)

for i in range(len(posts_codes)):
    nafnaf_post = Post(posts_codes[i])
    nafnaf_post.scrape()
    print('----------------------')
    print(nafnaf_post.to_dict()['caption'])
    time.sleep(10)
Example #16
0
# URL INSTAGRAM WEB SCRAPER FOR NAFNAF

import selenium
import parser_libraries
from selenium import webdriver
from selenium.webdriver import Chrome
from instascrape import Profile, scrape_posts

path = "C:/Users/Moni/Downloads/chromedriver.exe"
driver = webdriver.Chrome(path)

driver.get("https://www.instagram.com/")
webdriver = Chrome("C:/Users/Moni/Downloads/chromedriver.exe")

headers = {"user-agent": "Chrome/87.0.4389.23", "cookie": "sessionid= "}

nafnaf = Profile("nafnafcol")
nafnaf.scrape(headers=headers)
posts = nafnaf.get_posts(webdriver=webdriver, login_first=True, amount=20)
scraped_posts, unscraped_posts = scrape_posts(posts,
                                              headers=headers,
                                              pause=5,
                                              silent=False)
Example #17
0
 def page_instance(self, url):
     random_profile = Profile(url)
     random_profile.load()
     return random_profile
Example #18
0
    def test_from_username(self, page_instance):
        expected_profile_username = "******"
        result_profile: Profile = Profile.from_username(
            username=expected_profile_username)

        assert result_profile.url == page_instance.url
import json # import needed libraries / pip3 install json

from instascrape import Profile # pip3 install insta-scrape

import pandas as pd # pip3 install pandas

google = Profile.from_username('google') # declare profile

google.static_load() # scrape profile

google_data = google.data_points[0].to_dict() # turn scraped data into python dictionary

google_data = {key: [val] for key, val in google_data.items()}   
df = pd.DataFrame(google_data)

df.to_csv('google.csv', encoding='utf-8', index=False) # convert dataframe to csv and write to "google.csv"