class TestIgramscraper(unittest.TestCase): @classmethod def setUpClass(self): cwd = os.getcwd() session_folder = cwd + os.path.sep + 'sessions' + os.path.sep if username == None or password == None: self.instagram = Instagram() else: self.instagram = Instagram.with_credentials( username, password, session_folder) self.instagram.login() if user_agent != None: #set user agent pass @classmethod def tearDownClass(self): pass def test_get_account_by_username(self): account = self.instagram.get_account('kevin') self.assertEqual('kevin', account.username) self.assertEqual('3', account.identifier) def test_get_account_by_id(self): account = self.instagram.get_account_by_id(3) self.assertEqual('kevin', account.username) self.assertEqual('3', account.identifier)
def __init__(self, username, password): self.instagram = Instagram() self._login(username, password) self.likes = {} self.month_posts = {} self.shared_followings = [] self.likes_contributors = {}
def __init__(self, username, password): self.username = username self.password = password self.scraper = Instagram() self.target = self.scraper.get_account(username) self.scraper.with_credentials(username, password) self.scraper.login()
def __init__(self, useProxy=True, autoRotate=True, maxRequestcount=120, logindata=None): # Initialize # Gonna set some vars for statistics, login and proxy management # But rotating logins and proxys doesnt work well in the momen so it is not really used self.useProxy = True self.overallRequestCount = 0 self.ready = False self.currentCredit = None self.logindata = logindata self.autoRotate = autoRotate self.maxRequestCount = maxRequestcount Instagram.__init__(self, 1.3) # second arg is standard sleep after each req self.parent = super( customInstagram, self ) # Link to superclass, dont want to write super ... all the time self.Auth() # login and proxy selection # Inject Hook self.oldReqGet = self._Instagram__req.get #_...___ receives hidden var from super class... req = session, we will path get function in order to count requests and add additonal limits time/maxcount self._Instagram__req.verify = True self._Instagram__req.get = self.hookedRequest self.requestCount = 0
def getigStats(username): instagram = Instagram() data = { 'account': {}} account = instagram.get_account(username) data['account']['id'] = account.identifier data['account']['username'] = account.username data['account']['Full name'] = account.full_name data['account']['Biography'] = account.biography data['account']['Profile pic url'] = account.get_profile_picture_url() data['account']['Number of published posts'] = account.media_count data['account']['Number of followers'] = account.followed_by_count data['account']['Number of follows'] = account.follows_count today = date.today().strftime("%Y-%m-%d") firebse = firebase.FirebaseApplication('https://covidai-1dd78.firebaseio.com/', None) previous = firebse.get('https://covidai-1dd78.firebaseio.com/covidai-1dd78/followcount/', '') keys = previous.keys() print(keys) newEntry = 0 for key in keys: if(username == key): newEntry = 1 for sna in previous[key].keys(): stamp = sna print(stamp) Dataurl = 'https://covidai-1dd78.firebaseio.com/covidai-1dd78/followcount/'+username+'/' flag = 0 previous = firebse.get(Dataurl+stamp, '') print(previous) da = previous[-1].get(today, '') print(da) if(da): diff = account.followed_by_count - previous[-1][today] data['account']['diff'] = diff flag = 1 print(diff) if(diff != 0): previous[-1][today] = account.followed_by_count snap = str(len(previous) - 1) print('diff not') result = firebse.put(Dataurl+stamp, snap, {today:previous[-1][today]}) break if(flag == 0): previous.append({ today: account.followed_by_count }) result = firebse.put(Dataurl, stamp, previous) diff = account.followed_by_count - previous[-1][today] print('update') data['account']['diff'] = diff break if(newEntry == 0): print('new entry') Dataurl = 'https://covidai-1dd78.firebaseio.com/covidai-1dd78/followcount/'+username firebse.post(Dataurl, [{today: account.followed_by_count}]) return data
def get_first_post(username: str) -> str: """ Gets link to the first users post """ try: instagram = Instagram() post = instagram.get_medias(username, 1)[0] return post.link except (InstagramException, InstagramNotFoundException, IndexError): return ''
def getcomments(username): instagram = Instagram() data = {} coms = [] medias = instagram.get_medias(username, 1000) for x in medias: comments = instagram.get_media_comments_by_id(x.identifier, 10000) for comment in comments['comments']: coms.append(comment.text) data['comments'] = coms return data
def test(): instagram = Instagram() # instagram.with_credentials('username', 'password', 'path/to/cache/folder') # instagram.login() proxies = { 'https': 'http://124.41.213.211', 'https': 'http://217.64.109.231', } medias = instagram.get_medias_by_tag('vacation', count=1000) print(medias[0].type)
def getlatest(username): data = {} instagram = Instagram() medias = instagram.get_medias(username, 1) for x in medias: data['created_time'] = x.created_time data['caption'] = x.caption data['likes_count'] = x.likes_count data['comments_count'] = x.comments_count data['image_high_resolution_url'] = x.image_high_resolution_url data['link'] = x.link return data
def get_first_posts(num: int, username: str) -> list: """ Gets first num posts of the given user """ # creating an instance of instagram class instagram = Instagram() # gets posts of the given user media = instagram.get_medias(username, num) # returns captions of all the given media return [item.caption for item in media]
def get_instagram_medias_by_tag(data): data_escucha = escucha.objects.get(id=data['id_escucha']) data_campana = campana_publicitaria.objects.get(id=data['id_campana']) data_red_social = red_social.objects.get(id=data['id_red']) instagram = Instagram() nombre_hashtag = data['query'] if nombre_hashtag.startswith('#'): nombre_hashtag = nombre_hashtag.replace(nombre_hashtag[0], '') try: medias = instagram.get_medias_by_tag(nombre_hashtag, count=100) except: logger.error( "*********************************************************************************" ) logger.error("Error obteniendo medias por query de instagram") logger.error( "********************************************************************************" ) else: for media in medias: if media.identifier: data_red_tag = data_red.objects.filter( publicacion_id=media.identifier, data_red_escucha=data_escucha, data_red_campana=data_campana).values() if not data_red_tag: try: texto = media.caption except: texto = "" new_publication = data_red( publicacion_id=media.identifier, publicacion_fecha=datetime.fromtimestamp( media.created_time), publicacion_texto=texto, publicacion_likes=media.likes_count, publicacion_comentarios=media.comments_count, publicacion_compartidos=0, publicacion_user=media.owner.identifier, is_from_hashtag=True, data_red_escucha=data_escucha, data_red_campana=data_campana, data_red_social=data_red_social) new_publication.save() logger.error( 'Task get instagram medias by tag has finished succesfully')
def setUpClass(self): cwd = os.getcwd() session_folder = cwd + os.path.sep + 'sessions' + os.path.sep if username == None or password == None: self.instagram = Instagram() else: self.instagram = Instagram() self.instagram.with_credentials(username, password, session_folder) self.instagram.login() if user_agent != None: #TODO set user agent pass
def accountChecker(self , username) : instagram = Instagram() instagram.set_proxies({ 'http': 'http://*****:*****@167.71.74.160:7015', 'https': 'http://*****:*****@167.71.74.160:7015' }) try : account = instagram.get_account(username) if(account.media_count > 5 and account.followed_by_count > 50 and account.followed_by_count <35000) : return True return False except : return True
def ig_scraper(): instagram = Instagram() instagram.with_credentials('ICmemes666', 'devandcaitlyn') instagram.login() account = instagram.get_account('ICmemes666') # # Available fields # print('Account info:') # print('Id: ', account.identifier) # print('Username: '******'Full name: ', account.full_name) # print('Biography: ', account.biography) # print('Profile pic url: ', account.get_profile_pic_url_hd()) # print('External Url: ', account.external_url) # print('Number of published posts: ', account.media_count) # print('Number of followers: ', account.followed_by_count) # print('Number of follows: ', account.follows_count) # print('Is private: ', account.is_private) # print('Is verified: ', account.is_verified) # # print(instagram.get_account('ICmemes666')) medias = instagram.get_medias_by_user_id(account.identifier) return render_template('ig_scraper.html', medias=medias, title="Instagram")
def __init__(self, set_proxy: bool = False): config = configparser.ConfigParser() config.read(settings.CONFIG_INI_PATH) self.instagram = Instagram() # self.instagram.with_credentials( # config['INSTA']['LOGIN'], # config['INSTA']['PASSWORD']) # self.instagram.login() if set_proxy: # Нужно расширить список адресов, чтобы Инста не блокировала proxies = { 'http': 'http://123.45.67.8:1087', 'https': 'http://123.45.67.8:1087', } self.instagram.set_proxies(proxies)
def Main(): printBanner() args = parser.parse_args() if not (args.exact or args.broad): parser.error('Add search method --exact or --broad') instagram = Instagram() # loginInstagram(instagram, args.login) # if loginInstagram: if args.proxy: setProxies(instagram, args.proxy) inputFile = args.input outputDir = args.output print("Read inputfile") keywordList = readKeywordList(inputFile) print("Fetch Accounts") if args.broad: accountlist = broadAccountSearch(instagram, keywordList) if args.exact: accountlist = exactAccountSearch(instagram, keywordList) print("print account details to CSV") printDetailsCSV(accountlist, outputDir) if args.media: print("Fetch Media") downloadMedia(instagram, args.media, accountlist, outputDir) #else: # print("problems with login") print("Finished")
class MatrixModule(PollingService): def __init__(self, name): super().__init__(name) self.instagram = Instagram() self.service_name = 'Instagram' async def poll_implementation(self, bot, account, roomid, send_messages): try: medias = self.instagram.get_medias(account, 5) self.logger.info(f'Polling instagram account {account} for room {roomid} - got {len(medias)} posts.') for media in medias: if send_messages: if media.identifier not in self.known_ids: await bot.send_html(bot.get_room_by_id(roomid), f'<a href="{media.link}">Instagram {account}:</a> {media.caption}', f'{account}: {media.caption} {media.link}') self.known_ids.add(media.identifier) except InstagramNotFoundException: self.logger.error(f"{account} does not exist - deleting from room") self.account_rooms[roomid].remove(account) bot.save_settings() except Exception: self.logger.error('Polling instagram account failed:') traceback.print_exc(file=sys.stderr) polldelay = timedelta(minutes=30 + randrange(30)) self.next_poll_time[roomid] = datetime.now() + polldelay
def start(): while True: try: print("iter") instagram = Instagram() instagram.with_credentials(insta_username, insta_password) instagram.login(force=False,two_step_verificator=True) sleep(2) # Delay to mimic user followers = [] account = instagram.get_account(username) sleep(1) curr_time = datetime.datetime.now(timezone('Asia/Kolkata')) curr_time = curr_time.strftime("%b %d, %Y - %H:%M:%S") followers = instagram.get_followers(account.identifier, FOLLOWER_LIMIT, 100, delayed=True) # Get 150 followers of 'kevin', 100 a time with random delay between requests # print(followers) current_followers = [] for follower in followers['accounts']: current_followers.append(follower.username) del followers if not path.exists("follower_list.txt"): f = open("follower_list.txt","w") f.write(str(current_followers)) f.close() else: f = open("follower_list.txt","r+") old_followers = f.read() f.close() old_followers = ast.literal_eval(old_followers) unfollowers = check_unfollowers(current_followers,old_followers) followers = check_followers(current_followers,old_followers) follower_change = len(current_followers)-len(old_followers) follow_count = len(followers) unfollow_count = len(unfollowers) discord_webhook.send_msg(username,follower_change,followers,unfollowers,follow_count,unfollow_count,curr_time,discord_webhook_url) f = open("follower_list.txt","w") f.write(str(current_followers)) f.close() except KeyboardInterrupt: print("Exiting...") sys.exit(0) except Exception as e: print(e) sleep(MINS_TO_SLEEP*60)
class TestIgramscraper(unittest.TestCase): @classmethod def setUpClass(self): cwd = os.getcwd() session_folder = cwd + os.path.sep + 'sessions' + os.path.sep if username == None or password == None: self.instagram = Instagram() else: self.instagram = Instagram.with_credentials( username, password, session_folder) self.instagram.login() if user_agent != None: #TODO set user agent pass @classmethod def tearDownClass(self): pass def test_get_account_by_username(self): account = self.instagram.get_account('kevin') self.assertEqual('kevin', account.username) self.assertEqual('3', account.identifier) def test_get_account_by_id(self): account = self.instagram.get_account_by_id(3) self.assertEqual('kevin', account.username) self.assertEqual('3', account.identifier) def test_get_medias(self): medias = self.instagram.get_medias('kevin', 80) self.assertEqual(80, len(medias)) def test_get_hundred_medias(self): medias = self.instagram.get_medias('kevin', 100) self.assertEqual(100, len(medias)) def test_get_medias_by_tag(self): medias = self.instagram.get_medias_by_tag('youneverknow', 20) self.assertEqual(20, len(medias)) def test_get_medias_by_code(self): media = self.instagram.get_medias_by_code('BHaRdodBouH') self.assertEqual('kevin', media.owner.username) def test_get_media_by_url(self): media = self.instagram.get_media_by_url( 'https://www.instagram.com/p/BHaRdodBouH') self.assertEqual('kevin', media.owner.username)
def insta_usr(): instagram = Instagram() # authentication supported instagram.with_credentials(username, password) instagram.login() account = instagram.get_account(usr) print(f"\t{account}") server = DiscordWebhooks(webhook_url) try: server.set_image(url=account.profile_pic_url) server.add_field(name="UserName : "******"Hacking is not a trick. It's an state of mind :)") server.set_author(name="HackersBrain Instagram Analyser Bot", url="http://gauravraj.gq/", icon_url="https://source.unsplash.com/35x35/?man") server.add_field(name="Full Name : ", value=account.full_name) server.add_field(name="Bio : ", value=account.biography) server.add_field(name="No. of Posts : ", value=account.media_count) server.add_field(name="No. of Followers : ", value=account.followed_by_count) server.add_field(name="No. of Follows : ", value=account.follows_count) server.add_field(name="Is Private : ", value=account.is_private) server.add_field(name="Is Verified : ", value=account.is_verified) server.send() print(Fore.GREEN + "\t Message Sent Successfully...\n" + Style.RESET_ALL) except KeyboardInterrupt as key_err: print(" Exiting Program... \tProject by : HackersBrain\n") except Exception as err: print(f"\n {err}\n Exiting Program... \tProject by : HackersBrain\n")
def get_instagram_medias_by_user(data): data_escucha = escucha.objects.get(id=data['id_escucha']) data_campana = campana_publicitaria.objects.get(id=data['id_campana']) data_red_social = red_social.objects.get(id=data['id_red']) instagram = Instagram() try: medias = instagram.get_medias(data['nombre_usuario'], 100) except: logger.error( "*********************************************************************************" ) logger.error("Error obteniendo medias por usuario de instagram") logger.error( "********************************************************************************" ) else: for media in medias: if media and media.identifier: data_red_escucha = data_red.objects.filter( publicacion_id=media.identifier, data_red_escucha=data_escucha, data_red_campana=data_campana).values() if not data_red_escucha: try: texto = media.caption except: texto = "" new_publication = data_red( publicacion_id=media.identifier, publicacion_fecha=datetime.fromtimestamp( media.created_time), publicacion_texto=texto, publicacion_likes=media.likes_count, publicacion_comentarios=media.comments_count, publicacion_compartidos=0, publicacion_user=media.owner.identifier, data_red_escucha=data_escucha, data_red_campana=data_campana, data_red_social=data_red_social) new_publication.save() logger.error( 'Task get instagram medias by username has finished succesfully')
def parseinst(linkinst): open('f1.json', 'w').close() settings = get_project_settings() settings.overrides['FEED_FORMAT'] = 'json' settings.overrides['FEED_URI'] = 'f1.json' process = CrawlerProcess(settings) process.crawl('comment', link = linkinst) process.start() with open('f1.json') as json_file: id_text = json.load(json_file) idstr = id_text[0]['idstr'] json_file.close() ind = idstr.index('=') + 1 idstr = idstr[ind:] instagram = Instagram() instagram.with_credentials('grouchysalmon', 'ulofob37', '') instagram.login() comments = instagram.get_media_comments_by_id(idstr, 10000) k = 0 comments_list = [] for comment in comments['comments']: k +=1 comments_list.append(comment.text) #print(comment.owner) print(k) with open('comments.txt', 'wb') as f: pickle.dump(comments_list,f)
def captions_from_tag(tag, begin_date, number=1000, ID='*****@*****.**', PASSWORD='******'): instagram = Instagram() instagram.with_credentials(ID, PASSWORD) instagram.login() medias = instagram.get_medias_by_tag(tag, count=number, min_timestamp=begin_date) captions_data = [] for media in medias: caption_data = {} caption_data['ID'] = media.identifier caption_data['time'] = datetime.fromtimestamp( media.created_time).strftime("%d-%b-%Y (%H:%M)") if type(media.caption) == 'str': caption_data['text'] = media.caption.replace("\n", " ") else: print("Possible error, no string datatype") caption_data['text'] = media.caption caption_data['likes'] = media.likes_count caption_data['comments'] = media.comments_count caption_data['usedID'] = media.owner.identifier caption_data['URL'] = media.link caption_data['timestamp'] = media.created_time captions_data.append(caption_data) captions_data = pd.DataFrame(captions_data) return captions_data, medias
def get_media_from_hashtag(tag, media_type, quality, max_images, path): instagram = Instagram() medias = instagram.get_medias_by_tag(tag, count=max_images) count = 1 for media in medias: media.type = 'image' if media.type == 'sidecar' or media.type == 'carousel' else media.type # Extracting Image URL if (media.type == 'image' and media_type == 'image' or media_type == 'all') and not media.is_ad: # Get the links form media all_quality = ['low', 'standard', 'high'] url = media.__getattribute__(f"image_{quality}_resolution_url") # If the preferred quality is not available if not url: all_quality.remove(quality) for q in all_quality: url = media.__getattribute__(f"image_{q}_resolution_url") if url: break # Extracting Video URL if (media.type == 'video' and media_type == 'all' or media_type == 'video') and not media.is_ad: # Get the links form media media = instagram.get_media_by_id(media.identifier) url = media.video_standard_resolution_url or media.video_low_bandwidth_url or media.video_low_resolution_url or media.video_url # Downloading the media if url: urllib.request.urlretrieve( url, f"{path}/{media.type}s/{media.type}{count}.{'jpg' if media.type == 'image' else 'mp4'}" ) print(f"{count}/{max_images} media downloaded") else: print( f"[{count}] Failed downloading the media {media.link} (id - {media.identifier})" ) count += 1
def followed_accounts( follower: Account, client: Instagram, config: dict, logger: logging.Logger) -> Generator[Account, None, None]: response = client.get_following( account_id=follower.identifier, count=config['max_followed_scraped'], page_size=config['follows_page_size'], ) accounts = response['accounts'] if 'accounts' in response else [] return (account_from_obj(account) for account in accounts)
def Auth(self): Instagram.__init__(self, 0.1) if self.useProxy: if not self.RotateProxy(): self.ready = False return #logindata means the user has provided specific credentials - it is for the private account if not self.logindata: if not self.tryLogin(): self.ready = False return self.ready = True else: self.with_credentials(self.logindata[0], self.logindata[1]) try: self.login() self.ready = True except: self.ready = False
def getPosts(account, amount): posts = [] instagram = Instagram() medias = instagram.get_medias(account, amount) for media in medias: # Checks what's the account minimum engagement accepted for best performing posts if (media.likes_count > engagementLimits[account]): posts.append([ media.identifier, niche[account], media.image_high_resolution_url, media.likes_count, media.caption, md5(media.image_high_resolution_url) ]) else: continue # time.sleep(30) # Necessary to not get banned by IG if posts == None or posts == [] or len(posts) == 0: return None else: return posts
def search_account_by_username(data): data_escucha = escucha.objects.get(id=data['id_escucha']) data_campana = campana_publicitaria.objects.get(id=data['id_campana']) data_red_social = red_social.objects.get(id=data['id_red']) escucha_type = data['escucha_type'] instagram = Instagram() try: account = instagram.get_account(data['nombre_usuario']) except: logger.error( "*********************************************************************************" ) logger.error( "No se ha encontrado datos de cuenta en la tarea de obtener cuenta por tag" ) logger.error( "********************************************************************************" ) else: data_red_cuentas = cuentas_empresa.objects.filter( username=account.username, data_red_escucha=data_escucha, data_red_campana=data_campana).values() if not data_red_cuentas: new_account = cuentas_empresa( identifier=account.identifier, username=account.username, followers_count=account.followed_by_count, following_count=account.follows_count, post_count=account.media_count, listed_count=account.media_count, fullname=account.full_name, profile_pic_url=account.profile_pic_url, is_competition=escucha_type, data_red_escucha=data_escucha, data_red_campana=data_campana, data_red_social=data_red_social) new_account.save() logger.error('Task get instagram accounts has finished succesfully')
def instagram_login_with_auth(uname, pwd): ig = Instagram() ig.with_credentials(uname, pwd, IG_SESSION_STORE) ig.login() return ig
class Scraper(): def __init__(self, username, password): self.username = username self.password = password self.scraper = Instagram() self.scraper.with_credentials(username, password) self.scraper.login() def target_by_username(self, target_name, tweet_count=None): rows = [[ 'username', 'full name', 'biography', 'prive', 'verfied', 'picture' ]] target = self.scraper.get_account(target_name) if not tweet_count: tweet_count = target.follows_count followers = self.scraper.get_followers(target.identifier, tweet_count, 100, delayed=True) for item in followers['accounts']: rows.append([ item.username, item.full_name, item.biography, item.is_private, item.is_verified, item.profile_pic_url ]) return rows