class TestIgramscraper(unittest.TestCase): @classmethod def setUpClass(self): cwd = os.getcwd() session_folder = cwd + os.path.sep + 'sessions' + os.path.sep if username == None or password == None: self.instagram = Instagram() else: self.instagram = Instagram.with_credentials( username, password, session_folder) self.instagram.login() if user_agent != None: #TODO set user agent pass @classmethod def tearDownClass(self): pass def test_get_account_by_username(self): account = self.instagram.get_account('kevin') self.assertEqual('kevin', account.username) self.assertEqual('3', account.identifier) def test_get_account_by_id(self): account = self.instagram.get_account_by_id(3) self.assertEqual('kevin', account.username) self.assertEqual('3', account.identifier) def test_get_medias(self): medias = self.instagram.get_medias('kevin', 80) self.assertEqual(80, len(medias)) def test_get_hundred_medias(self): medias = self.instagram.get_medias('kevin', 100) self.assertEqual(100, len(medias)) def test_get_medias_by_tag(self): medias = self.instagram.get_medias_by_tag('youneverknow', 20) self.assertEqual(20, len(medias)) def test_get_medias_by_code(self): media = self.instagram.get_medias_by_code('BHaRdodBouH') self.assertEqual('kevin', media.owner.username) def test_get_media_by_url(self): media = self.instagram.get_media_by_url( 'https://www.instagram.com/p/BHaRdodBouH') self.assertEqual('kevin', media.owner.username)
class InstaParser: """Получаем информацию из Инстаграма """ def __init__(self, set_proxy: bool = False): config = configparser.ConfigParser() config.read(settings.CONFIG_INI_PATH) self.instagram = Instagram() # self.instagram.with_credentials( # config['INSTA']['LOGIN'], # config['INSTA']['PASSWORD']) # self.instagram.login() if set_proxy: # Нужно расширить список адресов, чтобы Инста не блокировала proxies = { 'http': 'http://123.45.67.8:1087', 'https': 'http://123.45.67.8:1087', } self.instagram.set_proxies(proxies) def get_account_info(self, account: str) -> Dict: """Мета-информация об аккаунте """ account = self.instagram.get_account(account) return { 'posts_count': account.media_count, 'followers': account.followed_by_count, 'follows': account.follows_count } def get_last_posts(self, account: str) -> List[Dict]: """Получаем 100 последних постов аккаунта """ try: medias = self.instagram.get_medias(account, 100) except: medias = [] result = [] for media in medias: if media.caption is not None: result.append({'id': media.identifier, 'text': media.caption, 'date': datetime.datetime.fromtimestamp(media.created_time), 'likes': media.likes_count, 'url': media.link }) return result def get_comments_by_id(self, post_id: str) -> List[Dict]: """Получаем последние 100 комментариев к посту по ID """ comments = self.instagram.get_media_comments_by_id(post_id, 100) result = [] if 'comments' in comments: for comment in comments['comments']: result.append({ 'date': datetime.datetime.fromtimestamp(comment.created_at), 'text': comment.text, '_is_fake': comment._is_fake, 'id': comment.identifier, 'from_username': comment.owner.username, 'from_full_name': comment.owner.full_name }) return result def get_comments_by_code(self, code: str) -> List[Dict]: """Получаем последние 100 комментариев к посту по коду (Работает через раз) Args: code (str): Код поста Returns: List[Dict]: Список комментариев с мета-информацией """ comments = self.instagram.get_media_comments_by_code(code, 100) result = [] if 'comments' in comments: for comment in comments['comments']: result.append({ 'date': datetime.datetime.fromtimestamp(comment.created_at), 'text': comment.text, '_is_fake': comment._is_fake, 'id': comment.identifier, 'username': comment.owner.username, 'from_full_name': comment.owner.full_name }) return result def get_post_by_url(self, url: str) -> Dict: """Получаем пост по URL Args: url (str): Ссылка на пост Returns: Dict: Информация о посте """ media = self.instagram.get_media_by_url(media_url=url) return { 'id': media.identifier, 'text': media.caption, 'date': datetime.datetime.fromtimestamp(media.created_time), 'likes': media.likes_count } def url2acc(self, url: str) -> str: """Извлекает из ссылки на аккаунт имя аккаунта Args: url (str): Ссылка на аккаунт. Например: https://www.instagram.com/new_ulyanovsk/ Returns: str: Аккаунт для применения в методах парсинга: new_ulyanovsk """ return url.split('https://www.instagram.com/')[1].split('/')[0] def url2code(self, url: str) -> str: """Получаем код поста из url Args: url (str): Ссылка на пост. Например: https://www.instagram.com/p/CHDn3eMMenH Returns: str: Код поста: CHDn3eMMenH """ return url.split('https://www.instagram.com/p/')[1].split('/')[0] def get_post_likes(self, code: str) -> int: """Получаем количество лайков поста по его коду Args: code (str): Код поста Returns: int: Количество лайков """ return self.instagram.get_media_likes_by_code(code)
class TestIgramscraper(unittest.TestCase): @classmethod def setUpClass(self): cwd = os.getcwd() session_folder = cwd + os.path.sep + 'sessions' + os.path.sep if username == None or password == None: self.instagram = Instagram() else: self.instagram = Instagram() self.instagram.with_credentials(username, password, session_folder) self.instagram.login() if user_agent != None: #TODO set user agent pass @classmethod def tearDownClass(self): pass def test_get_account_by_username(self): account = self.instagram.get_account('kevin') self.assertEqual('kevin', account.username) self.assertEqual('3', account.identifier) def test_get_account_by_id(self): account = self.instagram.get_account_by_id(3) self.assertEqual('kevin', account.username) self.assertEqual('3', account.identifier) def test_get_medias(self): medias = self.instagram.get_medias('kevin', 80) self.assertEqual(80, len(medias)) def test_get_hundred_medias(self): medias = self.instagram.get_medias('kevin', 100) self.assertEqual(100, len(medias)) def test_get_medias_by_tag(self): medias = self.instagram.get_medias_by_tag('youneverknow', 20) self.assertEqual(20, len(medias)) def test_get_medias_by_code(self): media = self.instagram.get_medias_by_code('BHaRdodBouH') self.assertEqual('kevin', media.owner.username) def test_get_media_by_url(self): media = self.instagram.get_media_by_url('https://www.instagram.com/p/BHaRdodBouH') self.assertEqual('kevin', media.owner.username) def test_get_account_by_id_with_invalid_numeric_id(self): # sys.maxsize is far larger than the greatest id so far and thus does not represent a valid account. try: self.instagram.get_account_by_id(sys.maxsize) except Exception as e: self.assertEqual(str(e), 'Failed to fetch account with given id') def test_get_location_top_medias_by_id(self): medias = self.instagram.get_current_top_medias_by_tag_name(1) self.assertEqual(9, len(medias)) def test_get_location_medias_by_id(self): medias = self.instagram.get_medias_by_location_id(1, 56) self.assertEqual(56, len(medias)) def test_get_location_by_id(self): location = self.instagram.get_location_by_id(1) self.assertEqual('Dog Patch Labs', location.name) def test_get_id_from_code(self): code = Media.get_code_from_id('1270593720437182847') self.assertEqual('BGiDkHAgBF_', code) code = Media.get_code_from_id('1270593720437182847_3') self.assertEqual('BGiDkHAgBF_', code) code = Media.get_code_from_id(1270593720437182847) self.assertEqual('BGiDkHAgBF_', code) def test_get_code_from_id(self): id = Media.get_id_from_code('BGiDkHAgBF_') self.assertEqual(1270593720437182847, id) def test_get_media_comments_by_code(self): comments = self.instagram.get_media_comments_by_code('BR5Njq1gKmB', 40) #TODO: check why returns less comments self.assertLessEqual(40, len(comments)) def test_get_username_by_id(self): username = self.instagram.get_username_by_id(3) self.assertEqual('kevin', username) def test_get_medias_by_user_id(self): medias = self.instagram.get_medias_by_user_id(3) self.assertEqual(12, len(medias))
from igramscraper.instagram import Instagram # If account is public you can query Instagram without auth instagram = Instagram() # If account is private and you subscribed to it, first login # instagram.with_credentials('username', 'password', 'cachePath') # instagram.login() media = instagram.get_media_by_url('https://www.instagram.com/p/BHaRdodBouH') print(media) print(media.owner)
class InstagramHelper: instagram = None def __init__(self): self.instagram = Instagram() def set_credentials(self, username, password): self.instagram.with_credentials(username, password) self.instagram.login() def get_all_comments(self, url=None, private=False): if url is None: return None #NOPE OUT IF URL IS NOT PASSED all_comment_objs = [] try: # REMOVE IGSHID if '?igshid=' in url: url = str(url).split('?igshid=')[0] # GET MEDIA ID media = self.instagram.get_media_by_url(url) media_id = media.identifier # GET COMMENTS comments = self.instagram.get_media_comments_by_id(media_id, 10000) all_comment_objs = comments['comments'] except Exception as e: print(e) pass return all_comment_objs def filter_comments(self, comments, filter_string): valid_comments = [] # filter is assumed to be a comma seperated list of filters # they can be hashtags as well # remove inconsistent spaces after commas filter_string = filter_string.replace(', ', ',') filters = filter_string.split(',') for comment in comments: valid_comment = True for f in filters: if str(f).lower() not in str(comment.text).lower(): valid_comment = False if valid_comment: valid_comments.append(comment) return valid_comments def mention_count_filter(self, comments, required_count=0): valid_comments = [] for comment in comments: if str(comment.text).count('@') >= required_count: valid_comments.append(comment) return valid_comments def get_random_comment(self, comments): return comments[random.randint(0, len(comments) - 1 )]
from igramscraper.instagram import Instagram import sys import json instagram = Instagram() media = instagram.get_media_by_url(sys.argv[1]) if media.video_standard_resolution_url: print(media.video_standard_resolution_url) elif media.image_standard_resolution_url: print(media.image_standard_resolution_url)