def get_posts_by_username(username, size=(0, 50)): L = Instaloader(download_pictures=False, download_comments=False, compress_json=False, download_videos=False, download_geotags=False) try: profile = Profile.from_username(L.context, username) except Exception: return {'status': 'bad'} response_ans = [] posts_list = list(profile.get_posts()) owner = None for post in posts_list[size[0]:size[1]]: L.download_post(post, target=profile.username) with open(glob.glob(profile.username + "/*.json")[0]) as json_file: super_dict = json.load(json_file) post_info = {} post_info['code'] = super_dict['node']['shortcode'] post_info['img'] = super_dict['node']['display_url'] owner = super_dict['node']['owner'] response_ans.append(post_info) shutil.rmtree(profile.username) return {'list': response_ans, 'status': 'ok', 'owner': owner}
def load_photo_from_post(short_code): L = Instaloader(download_pictures=False, download_comments=False, compress_json=False, download_videos=True) try: post = Post.from_shortcode(L.context, short_code) except Exception: return {'status': 'bad'} L.download_post(post, target=short_code) super_json = glob.glob(short_code + '/*.json')[0] response = {} super_dict = json.loads(open(super_json).read()) if 'edge_sidecar_to_children' in super_dict['node']: response['img'] = [] for i in super_dict['node']['edge_sidecar_to_children']['edges']: response['img'].append(i['node']['display_resources']) else: response['img'] = [super_dict['node']['display_resources']] response['owner'] = super_dict['node']['owner'] response['status'] = 'ok' shutil.rmtree(short_code) return response
def download_hashtags(self): """ Método que realiza o download de hashtags especificadas na entrada """ L = Instaloader() for hs in self.hashtags_list: try: hashtag = Hashtag.from_name(L.context, hs) post_counter = 0 for post in hashtag.get_posts(): L.download_post(post, target=hashtag.name) post_counter = post_counter + 1 if self.hashtags_max != None: if post_counter == self.hashtags_max: break print("Downloaded ", post_counter, " from ", hashtag.name) except Exception as e: print("Nao foi possivel baixar todos os posts da tag: ", hs) print("Dica: Tente coletar menos posts por hashtag") print(e) for hs in self.hashtags_list: try: os.rename(hs, self._get_path() + hs) except Exception as e: print("Erro ao mover arquivos da hashtag: ", hs) print(e)
def download_photos(username): L = Instaloader(download_videos = False, download_video_thumbnails = False, download_comments = False, compress_json = False, dirname_pattern = "../Data/Photos/{target}") posts = [] with open("../Data/Posts_list/" + username + ".json") as f: posts = json.load(f) for post in posts: post = Post.from_shortcode(L.context, post) L.download_post(post, username)
def download(self): for profile in self.sources: loader = Instaloader() loader.dirname_pattern = f"../Media/{self.username}/" self.configure(loader) profile = Profile.from_username(loader.context, profile) posts_sorted_by_likes = sorted(profile.get_posts(), key=lambda p: p.likes) for post in islice(posts_sorted_by_likes, ceil(profile.mediacount * self.percentage / 100)): loader.download_post(post, profile)
def scrape_instagram_hashtag_posts(session: Instaloader, hashtag: str, max_posts: int = 50) -> None: """Scrapes specified number of posts from an instagram profile and save in the current working directory. Args: session: Instaloader user session file profile: Profile of specific public Instagram user id number: maximum number of posts to extract (can have multiple images per post) """ counter = 0 profile = Profile.from_username(session.context, hashtag) for post in profile.get_posts(): session.download_post(post, target=profile.username) counter += 1 if counter == max_posts: break
def showDialog5(self): # finding the content of current item in combo box content = self.combo_box3.currentText() if content=="Top 5": text, ok = QInputDialog.getText(self, 'Input Dialog', 'Der Instagram Name:') if ok: PROFILE = text L = Instaloader(save_metadata=True, compress_json=False, download_video_thumbnails=False, download_comments=False, post_metadata_txt_pattern="{likes}") profile = Profile.from_username(L.context, PROFILE) posts_sorted_by_likes = sorted(profile.get_posts(), key=lambda post: post.likes, reverse=False) for post in islice(posts_sorted_by_likes, ceil(5)): L.download_post(post, PROFILE) if content=="Top 10": text, ok = QInputDialog.getText(self, 'Input Dialog', 'Der Instagram Name:') if ok: PROFILE = text L = Instaloader(save_metadata=True, compress_json=False, download_video_thumbnails=False, download_comments=False, post_metadata_txt_pattern="{likes}") profile = Profile.from_username(L.context, PROFILE) posts_sorted_by_likes = sorted(profile.get_posts(), key=lambda post: post.likes, reverse=False) for post in islice(posts_sorted_by_likes, ceil(10)): L.download_post(post, PROFILE) if content=="Alle": text, ok = QInputDialog.getText(self, 'Input Dialog', 'Der Instagram Name:') if ok: PROFILE = text L = Instaloader(save_metadata=True, compress_json=False, download_video_thumbnails=False, download_comments=False, post_metadata_txt_pattern="{likes}") profile = Profile.from_username(L.context, PROFILE) posts_sorted_by_likes = sorted(profile.get_posts(), key=lambda post: post.likes, reverse=False) for post in islice(posts_sorted_by_likes, ceil(99999999999999)): L.download_post(post, PROFILE)
from os import chdir, mkdir from pathlib import Path from instaloader import Instaloader, Post, Profile, load_structure_from_file L = Instaloader() today = datetime.date.today() ## make sure user passed target name try: TARGET = argv[1] except IndexError: raise SystemExit("Profile name required as argument.") ##download new posts: for post in Profile.from_username(L.context, TARGET).get_posts(): L.download_post(post, TARGET) chdir(TARGET) ##Check to see if history file exists for TARGET if Path(f"{TARGET}_deletedposts.txt").is_file() == False: file = open(f"{TARGET}_deletedposts.txt", "w+") if Path(f"{TARGET}_deletedposts.txt").is_file() == True: file = open(f"{TARGET}_deletedposts.txt", "a") # Obtain set of posts on HD offline_posts = set( filter(lambda s: isinstance(s, Post), (load_structure_from_file(L.context, file) for file in (glob('*UTC.json.xz') + glob('*UTC.json'))))) # Obtain set of posts that are currently online
def download_post(client: Instaloader, post: Post) -> bool: """ Downloads content and returns True """ client.download_post(post, post.owner_username) return True
loader = Instaloader() PROFILE = input("Enter the username of the profile you would like to scan: ") posts = Profile.from_username(loader.context, PROFILE).get_posts() since = input("Enter the date of where you'd like to start (mm/dd/yyyy): ") since_dates = since.split('/') until = input("Enter the date of where you'd like to end (mm/dd/yyyy): ") until_dates = until.split('/') SINCE = datetime.datetime(int(since_dates[2]), int(since_dates[0]), int(since_dates[1])) UNTIL = datetime.datetime(int(until_dates[2]), int(until_dates[0]), int(until_dates[1])) for post in dropwhile(lambda p: p.date > UNTIL, takewhile(lambda p: p.date > SINCE, posts)): print(post.date) loader.download_post(post, PROFILE) class Scraper(): def __init__(self, profile): self.profile = profile def get_post_data(self, begin, end): begin = datetime.datetime(int(since_dates[2]), int(since_dates[0]), int(since_dates[1]))
# TODO: another program that feeds this one with a post that fits the parameters # Test with this post: https://www.instagram.com/p/BvFHV32B5mA/ # TODO: build out to paste in url and remove everything but shortcode. #Name = input('What is the post?: ') #should name folder the name of the profile Name = 'gothamroasters' SHORTCODE = 'BvFHV32B5mA' # can get this from json data['node']['shortcode'] # Creating .json files of each post # See parameters here: https://instaloader.github.io/as-module.html#instaloader-main-class L = Instaloader(download_pictures=False, download_videos=False, download_geotags=False, compress_json=False, download_comments=False, download_video_thumbnails=False, post_metadata_txt_pattern='') post = Post.from_shortcode(L.context, SHORTCODE) folder_save_name = Name # Downloads the post into a folder download = L.download_post(post, folder_save_name) # looks for follow, like, comment, tag ############################################################################ # TODO add into an excel file called "currently_entered.xlsx" # opens json file for parsing # TODO make this link dynamic with open('C:/Users/Tyler Wooten/Documents/GitHub/Python/Instagram_Bot/gothamroasters/2019-03-16_18-52-08_UTC.json') as complex_data: # stores dictionary version of json file in data data = json.loads(complex_data.read())
from itertools import islice from math import ceil from instaloader import Instaloader, Profile PROFILE = ... # profile to download from X_percentage = 10 # percentage of posts that should be downloaded L = Instaloader() profile = Profile.from_username(L.context, PROFILE) posts_sorted_by_likes = sorted(profile.get_posts(), key=lambda p: p.likes + p.comments, reverse=True) for post in islice(posts_sorted_by_likes, ceil(profile.mediacount * X_percentage / 100)): L.download_post(post, PROFILE)
# Paramètres de dates des publications, depuis X jusqu'à X SINCE = datetime(2020, 2, 10) UNTIL = datetime(2019, 1, 10) profile = Profile.from_username(L.context, USERSPROFILE) # Retourne le nom du profil dans la console print(profile) # Triage des publications en liste posts_sorted_by_likes = sorted(profile.get_posts(), key=lambda p: p.likes + p.comments, reverse=True) # Retourne la liste complète et triée des publications print(posts_sorted_by_likes) # Pour la liste retournée, récupération du % des publications les plus likes for post in islice(posts_sorted_by_likes, ceil(profile.mediacount * X_percentage / 100)): print("4") # Depuis X jusqu'à X if (post.date <= SINCE and post.date >= UNTIL): # Retourne la validation du téléchargement dans la console print('Downloading post with the caption:', post.caption) # Téléchargement L.download_post(post, USERSPROFILE)
# Check a set list of instagram accounts/hashtags for new posts # Each new posts check for "giveaway, free, win" , "by tonight", "by 12pm" # See if post says to like, comment, or tag others # Do that^ # notify me via email if won something (monitor notifications) #https://www.promptcloud.com/blog/how-to-scrape-instagram-data-using-python # https://instaloader.github.io/ #TODO find out how I want to get new posts USERNAME = '******' # Creating .json files of each post # See parameters here: https://instaloader.github.io/as-module.html#instaloader-main-class L = Instaloader(download_pictures=False, download_videos=False, download_geotags=False, compress_json=False, download_comments=False, download_video_thumbnails=False, post_metadata_txt_pattern='') profile = Profile.from_username(L.context, USERNAME) for post in profile.get_posts(): L.download_post(post, target=profile.username)
def repost(basedir, username, ig_link): conf_dir = basedir / 'config' / username # Load configuration conf = load_config(conf_dir / 'config.yaml') hashtags = load_hashtags(conf_dir / 'hashtags.txt') blacklist = load_hashtags(conf_dir / 'blacklist.txt') caption = load_template(conf_dir / 'template.txt') # Get Instagram postId postId = post_id_from_link(ig_link) print('ImageId:', postId) # Download image with instaloader L = Instaloader(request_timeout=2, max_connection_attempts=3, quiet=True) P = Post.from_shortcode(L.context, postId) profile = P.profile post_hashtags = P.caption_hashtags print('Profile:', profile) if not L.download_post(P, basedir / 'temp'): print('Download of image failed! Please try again.') cleanup(basedir) sys.exit() # Select image and copy to post folder images = list(sorted((basedir / 'temp').glob('*.jpg'))) images_len = len(images) if images_len > 1: print( 'Which image would you like to repost? (1-{}):'.format(images_len), end=' ') choice = int(input()) assert 1 <= choice and choice <= images_len image = images[choice - 1] else: image = images[0] post_dir = basedir / 'posts' / username post_dir.mkdir(parents=True, exist_ok=True) name = image.name.split('.')[0] target = post_dir / '{}.jpg'.format(name) shutil.copy(image, target) # Create caption and hashtags if conf['HASHTAG_INCLUDE']: post_hashtags = [ '#' + hashtag for hashtag in post_hashtags if hashtag not in blacklist and hashtag not in hashtags ] shuffle(post_hashtags) post_hashtags = post_hashtags else: post_hashtags = list() hashtags = ['#' + hashtag for hashtag in hashtags] + post_hashtags hashtags = hashtags[:conf['HASHTAG_LIMIT']] hashtags_str = conf['HASHTAG_SEPARATOR'].join(hashtags) caption_str = caption.substitute(username='******'.format(profile), hashtags=hashtags_str) open(post_dir / '{}.txt'.format(name), 'w').write(caption_str) print('Please enter location (or leave blank):', end=' ') location = input() submit_to_buffer(conf['IG_USER'], post_dir / '{}.txt'.format(name), post_dir / '{}.jpg'.format(name), location, conf['BUFFER_MAIL'], conf['BUFFER_PASS'], driver=conf['DRIVER']) cleanup(basedir)
from instaloader import Instaloader, Profile import os.path PROFILE = "mindoftoto" # Insert profile name here L = Instaloader() # Obtain profile profile = Profile.from_username(L.context, PROFILE) # Get all posts and sort them by their number of likes posts_sorted_by_likes = sorted(profile.get_posts(), key=lambda post: post.likes) # Download the post with the most likes current_directory = os.getcwd() for x in range(10): final_directory = os.path.join(current_directory, str(x)) L.download_post(posts_sorted_by_likes[len(posts_sorted_by_likes) - 1], PROFILE) print(posts_sorted_by_likes[len(posts_sorted_by_likes) - 1]) if not os.path.exists(final_directory): os.makedirs(final_directory)