Exemple #1
0
class TestIgramscraper(unittest.TestCase):
    @classmethod
    def setUpClass(self):

        cwd = os.getcwd()
        session_folder = cwd + os.path.sep + 'sessions' + os.path.sep
        if username == None or password == None:
            self.instagram = Instagram()
        else:
            self.instagram = Instagram.with_credentials(
                username, password, session_folder)
            self.instagram.login()

        if user_agent != None:
            #TODO set user agent
            pass

    @classmethod
    def tearDownClass(self):
        pass

    def test_get_account_by_username(self):
        account = self.instagram.get_account('kevin')
        self.assertEqual('kevin', account.username)
        self.assertEqual('3', account.identifier)

    def test_get_account_by_id(self):
        account = self.instagram.get_account_by_id(3)
        self.assertEqual('kevin', account.username)
        self.assertEqual('3', account.identifier)

    def test_get_medias(self):
        medias = self.instagram.get_medias('kevin', 80)
        self.assertEqual(80, len(medias))

    def test_get_hundred_medias(self):
        medias = self.instagram.get_medias('kevin', 100)
        self.assertEqual(100, len(medias))

    def test_get_medias_by_tag(self):
        medias = self.instagram.get_medias_by_tag('youneverknow', 20)
        self.assertEqual(20, len(medias))

    def test_get_medias_by_code(self):
        media = self.instagram.get_medias_by_code('BHaRdodBouH')
        self.assertEqual('kevin', media.owner.username)

    def test_get_media_by_url(self):
        media = self.instagram.get_media_by_url(
            'https://www.instagram.com/p/BHaRdodBouH')
        self.assertEqual('kevin', media.owner.username)
Exemple #2
0
class InstaParser:
    """Получаем информацию из Инстаграма
    """

    def __init__(self, set_proxy: bool = False):
        config = configparser.ConfigParser()
        config.read(settings.CONFIG_INI_PATH)
        self.instagram = Instagram()
        # self.instagram.with_credentials(
        #     config['INSTA']['LOGIN'],
        #     config['INSTA']['PASSWORD'])
        # self.instagram.login()
        if set_proxy:
            # Нужно расширить список адресов, чтобы Инста не блокировала
            proxies = {
                'http': 'http://123.45.67.8:1087',
                'https': 'http://123.45.67.8:1087',
            }
            self.instagram.set_proxies(proxies)

    def get_account_info(self, account: str) -> Dict:
        """Мета-информация об аккаунте
        """
        account = self.instagram.get_account(account)
        return {
            'posts_count': account.media_count,
            'followers': account.followed_by_count,
            'follows': account.follows_count
        }
    
    def get_last_posts(self, account: str) -> List[Dict]:
        """Получаем 100 последних постов аккаунта
        """
        try:
            medias = self.instagram.get_medias(account, 100)
        except:
            medias = []
        result = []
        for media in medias:
            if media.caption is not None:
                result.append({'id': media.identifier,
                    'text': media.caption,
                    'date': datetime.datetime.fromtimestamp(media.created_time),
                    'likes': media.likes_count,
                    'url': media.link
                    })
        return result
    
    def get_comments_by_id(self, post_id: str) -> List[Dict]:
        """Получаем последние 100 комментариев к посту по ID
        """
        comments = self.instagram.get_media_comments_by_id(post_id, 100)
        result = []
        if 'comments' in comments:
            for comment in comments['comments']:
                result.append({
                    'date': datetime.datetime.fromtimestamp(comment.created_at),
                    'text': comment.text,
                    '_is_fake': comment._is_fake,
                    'id': comment.identifier,
                    'from_username': comment.owner.username,
                    'from_full_name': comment.owner.full_name
                })
        return result
    
    def get_comments_by_code(self, code: str) -> List[Dict]:
        """Получаем последние 100 комментариев к посту по коду
        (Работает через раз)
        Args:
            code (str): Код поста

        Returns:
            List[Dict]: Список комментариев с мета-информацией
        """
        comments = self.instagram.get_media_comments_by_code(code, 100)
        result = []
        if 'comments' in comments:
            for comment in comments['comments']:
                result.append({
                    'date': datetime.datetime.fromtimestamp(comment.created_at),
                    'text': comment.text,
                    '_is_fake': comment._is_fake,
                    'id': comment.identifier,
                    'username': comment.owner.username,
                    'from_full_name': comment.owner.full_name
                })
        return result
    
    def get_post_by_url(self, url: str) -> Dict:
        """Получаем пост по URL

        Args:
            url (str): Ссылка на пост

        Returns:
            Dict: Информация о посте
        """
        media = self.instagram.get_media_by_url(media_url=url)
        return {
            'id': media.identifier,
            'text': media.caption,
            'date': datetime.datetime.fromtimestamp(media.created_time),
            'likes': media.likes_count
        }

    def url2acc(self, url: str) -> str:
        """Извлекает из ссылки на аккаунт имя аккаунта

        Args:
            url (str): Ссылка на аккаунт. Например: https://www.instagram.com/new_ulyanovsk/

        Returns:
            str: Аккаунт для применения в методах парсинга: new_ulyanovsk
        """
        return url.split('https://www.instagram.com/')[1].split('/')[0]
    
    def url2code(self, url: str) -> str:
        """Получаем код поста из url

        Args:
            url (str): Ссылка на пост. Например: https://www.instagram.com/p/CHDn3eMMenH

        Returns:
            str: Код поста: CHDn3eMMenH
        """
        return url.split('https://www.instagram.com/p/')[1].split('/')[0]
    
    def get_post_likes(self, code: str) -> int:
        """Получаем количество лайков поста по его коду

        Args:
            code (str): Код поста

        Returns:
            int: Количество лайков
        """
        return self.instagram.get_media_likes_by_code(code)
class TestIgramscraper(unittest.TestCase):

    @classmethod
    def setUpClass(self):

        cwd = os.getcwd()
        session_folder = cwd + os.path.sep + 'sessions' + os.path.sep
        if username == None or password == None:
            self.instagram = Instagram()
        else:
            self.instagram = Instagram()
            self.instagram.with_credentials(username, password, session_folder)
            self.instagram.login()
        
        if user_agent != None:
            #TODO set user agent
            pass

    @classmethod
    def tearDownClass(self):
        pass

    def test_get_account_by_username(self):
        account = self.instagram.get_account('kevin')
        self.assertEqual('kevin', account.username)
        self.assertEqual('3', account.identifier)
    
    def test_get_account_by_id(self):
        account = self.instagram.get_account_by_id(3)
        self.assertEqual('kevin', account.username)
        self.assertEqual('3', account.identifier)
    
    def test_get_medias(self):
        medias = self.instagram.get_medias('kevin', 80)
        self.assertEqual(80, len(medias))

    def test_get_hundred_medias(self):
        medias = self.instagram.get_medias('kevin', 100)
        self.assertEqual(100, len(medias))

    def test_get_medias_by_tag(self):
        medias = self.instagram.get_medias_by_tag('youneverknow', 20)
        self.assertEqual(20, len(medias))

    def test_get_medias_by_code(self):
        media = self.instagram.get_medias_by_code('BHaRdodBouH')
        self.assertEqual('kevin', media.owner.username)

    def test_get_media_by_url(self):
        media = self.instagram.get_media_by_url('https://www.instagram.com/p/BHaRdodBouH')
        self.assertEqual('kevin', media.owner.username)

    def test_get_account_by_id_with_invalid_numeric_id(self):
        # sys.maxsize is far larger than the greatest id so far and thus does not represent a valid account.
        try:
            self.instagram.get_account_by_id(sys.maxsize)
        except Exception as e:
            self.assertEqual(str(e), 'Failed to fetch account with given id')

    def test_get_location_top_medias_by_id(self):
        medias = self.instagram.get_current_top_medias_by_tag_name(1)
        self.assertEqual(9, len(medias))

    def test_get_location_medias_by_id(self):
        medias = self.instagram.get_medias_by_location_id(1, 56)
        self.assertEqual(56, len(medias))

    def test_get_location_by_id(self):
        location = self.instagram.get_location_by_id(1)
        self.assertEqual('Dog Patch Labs', location.name)

    def test_get_id_from_code(self):
        code = Media.get_code_from_id('1270593720437182847')
        self.assertEqual('BGiDkHAgBF_', code)
        code = Media.get_code_from_id('1270593720437182847_3')
        self.assertEqual('BGiDkHAgBF_', code)
        code = Media.get_code_from_id(1270593720437182847)
        self.assertEqual('BGiDkHAgBF_', code)
    
    def test_get_code_from_id(self):
        id = Media.get_id_from_code('BGiDkHAgBF_')
        self.assertEqual(1270593720437182847, id)
    
    
    def test_get_media_comments_by_code(self):
        comments = self.instagram.get_media_comments_by_code('BR5Njq1gKmB', 40)
        #TODO: check why returns less comments
        self.assertLessEqual(40, len(comments))

    def test_get_username_by_id(self):
        username = self.instagram.get_username_by_id(3)
        self.assertEqual('kevin', username)
    
    def test_get_medias_by_user_id(self):
        medias = self.instagram.get_medias_by_user_id(3)
        self.assertEqual(12, len(medias))
from igramscraper.instagram import Instagram

# If account is public you can query Instagram without auth
instagram = Instagram()

# If account is private and you subscribed to it, first login
# instagram.with_credentials('username', 'password', 'cachePath')
# instagram.login()

media = instagram.get_media_by_url('https://www.instagram.com/p/BHaRdodBouH')

print(media)
print(media.owner)
Exemple #5
0
class InstagramHelper:

    instagram = None

    def __init__(self):
        self.instagram = Instagram()

    def set_credentials(self, username, password):
        self.instagram.with_credentials(username, password)
        self.instagram.login()


    def get_all_comments(self, url=None, private=False):
        if url is None:
            return None #NOPE OUT IF URL IS NOT PASSED

        all_comment_objs = []
        
        try:
            # REMOVE IGSHID
            if '?igshid=' in url:
                url = str(url).split('?igshid=')[0]


            # GET MEDIA ID
            media = self.instagram.get_media_by_url(url)
            media_id = media.identifier

            # GET COMMENTS
            comments = self.instagram.get_media_comments_by_id(media_id, 10000)
            all_comment_objs = comments['comments']
        except Exception as e:
            print(e)
            pass

        return all_comment_objs


    def filter_comments(self, comments, filter_string):
        valid_comments = []

        # filter is assumed to be a comma seperated list of filters
        # they can be hashtags as well

        # remove inconsistent spaces after commas
        filter_string = filter_string.replace(', ', ',')        
        filters = filter_string.split(',')

        for comment in comments:
            valid_comment = True

            for f in filters:
                if str(f).lower() not in str(comment.text).lower():
                    valid_comment = False

            if valid_comment:
                valid_comments.append(comment)


        return valid_comments



    def mention_count_filter(self, comments, required_count=0):
        valid_comments = []

        for comment in comments:
            if str(comment.text).count('@') >= required_count:
                valid_comments.append(comment)               
            
        return valid_comments

    def get_random_comment(self, comments):
        return comments[random.randint(0, len(comments) - 1 )]
Exemple #6
0
  
from igramscraper.instagram import Instagram
import sys
import json

instagram = Instagram()
media = instagram.get_media_by_url(sys.argv[1])

if media.video_standard_resolution_url:
    print(media.video_standard_resolution_url)
elif media.image_standard_resolution_url:
    print(media.image_standard_resolution_url)