Ejemplo n.º 1
0
    def test_pr_122(self):
        """
        Feature implemented by @susundberg.

        Set the access time and modification time of a downloaded media
        according to its IG date.
        """

        looter = ProfileLooter('franz_ferdinand',
            template='{code}', session=self.session)
        info = looter.get_post_info('BY77tSfBnRm')

        # Test download_post
        post_looter = PostLooter('BY77tSfBnRm',
            session=self.session, template='{code}')
        post_looter.download(self.destfs)
        stat = self.destfs.getdetails('BY77tSfBnRm.jpg')
        self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp'])

        # Test download_pictures
        pic = next(m for m in looter.medias() if not m['is_video'])
        looter.download_pictures(self.destfs, media_count=1)
        stat = self.destfs.getdetails('{}.jpg'.format(pic['shortcode']))
        self.assertEqual(stat.raw["details"]["accessed"], pic['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], pic['taken_at_timestamp'])

        # Test download_videos
        vid = next(m for m in looter.medias() if m['is_video'])
        looter.download_videos(self.destfs, media_count=1)
        stat = self.destfs.getdetails('{}.mp4'.format(vid['shortcode']))
        self.assertEqual(stat.raw["details"]["accessed"], vid['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], vid['taken_at_timestamp'])
Ejemplo n.º 2
0
def instagram_p(url):
    looter = PostLooter(url)

    # Скачивание
    picture_id = looter.info['id']
    looter.download('./pictures/')

    file = open('./pictures/{}.jpg'.format(picture_id), 'rb')
    return file
Ejemplo n.º 3
0
def instagram_v(url):
    looter = PostLooter(url)

    # Скачивание
    video_id = looter.info['id']
    looter.download_videos('./videos/')

    file = open('./videos/{}.mp4'.format(video_id), 'rb')
    return file
Ejemplo n.º 4
0
def get_insta(request):
	homedir=os.path.expanduser("~")
	dirs=homedir+'/Downloads'
	if request.method=='POST' and 'get_video' in request.POST:
		link=request.POST.get('links')
		PostLooter(link).download_videos(dirs)
	elif request.method=='POST' and 'get_photo' in request.POST:
		link=request.POST.get('links')
		PostLooter(link).download(dirs)
	return redir(request)
Ejemplo n.º 5
0
def get_post(post_id):
    print("get post : %s" % post_id)
    looter = PostLooter(post_id)
    post = looter.get_post_info(post_id)
    res = {
        "id": post["shortcode"],
        "user_name": post["owner"]["username"],
        "image_url": post["display_url"],
    }
    return res
Ejemplo n.º 6
0
    def test_issue_094(self):
        """Thanks to @jeanmarctst for raising this issue.

        Make sure caption is properly extracted from images downloaded
        from a post code and written to the metadata.
        """
        looter = PostLooter("BY77tSfBnRm",
                            add_metadata=True, template='{code}', session=self.session)
        looter.download(self.destfs)
        metadata = piexif.load(self.destfs.getbytes("BY77tSfBnRm.jpg"), True)
        self.assertTrue(metadata['Exif']['UserComment'])
Ejemplo n.º 7
0
def download_instagram_vid(post_id):
    """Attempt to download the video"""
    try:
        looter = PostLooter(post_id)
    except ValueError:
        print("Couldn't get video from the link. The user's profile may be private.")
        sys.exit(1)
    info = looter.get_post_info(post_id)
    video = info['id']
    username = info['owner']['username']
    return video, username, looter.download_videos(destination)
Ejemplo n.º 8
0
    def test_pr_122_download_post(self):
        """Feature implemented by @susundberg.

        Set the access time and modification time of a downloaded media
        according to its IG date.
        """
        code = 'BY77tSfBnRm'
        post_looter = PostLooter(code, session=self.session, template='{code}')
        info = post_looter.get_post_info(code)
        post_looter.download(self.destfs)
        stat = self.destfs.getdetails('{}.jpg'.format(code))
        self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp'])
Ejemplo n.º 9
0
def instalooterw():
    # looter = ProfileLooter(username="******")
    # # looter.download(destination='~/Pictures', media_count=1, timeframe=(datetime.now(), datetime.now()-timedelta(5)))
    # looter.download_videos('~/Pictures', media_count=1)
    # f = open("/home/smkh_l/projects/instagram_helper/text.txt", "r")
    # f1 = f.read()
    # count = 0
    # f1 = f1.split()
    # for i in f1:
    #     if 'text' in i:
    #         count += 1
    # print(count)
    # users = set()
    # # for media in looter.medias():
    # #     post_info = looter.get_post_info(media['shortcode'])
    # #     break
    # post_info = looter.get_post_info('CGSDCgLhNbq')
    #
    # for comment in post_info['edge_media_to_preview_comment']['edges']:
    #     user = comment['node']['owner']['username']
    #     users.add(user)
    # print(users)

    looter = PostLooter(
        code=
        "https://www.instagram.com/p/CGW8ClsFRip/?utm_source=ig_web_copy_link")
Ejemplo n.º 10
0
    def test_issue_184(self, _):
        """Feature request by @ghost.

        Allow downloading a post directly from its URL.
        """
        looter = PostLooter("https://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k")
        self.assertEqual(looter.code, "BJlIB9WhdRn")
Ejemplo n.º 11
0
    def test_issue_039(self):
        """
        Feature request by @verafide

        Checks that all pictures are downloaded from posts
        with more than one picture.
        """
        looter = PostLooter("BRHecUuFhPl", session=self.session)
        looter.download(self.destfs)
        self.assertEqual(
            set(self.destfs.listdir("/")), {
                "1461270165803344956.jpg",
                "1461270167497776767.jpg",
                "1461270174435133336.jpg",
                "1461270172581471925.jpg",
                "1461270181565655668.jpg",
            })
Ejemplo n.º 12
0
    def test_issue_042(self):
        """
        Thanks to @MohamedIM for reporting this bug.

        Checks that a multipost is successfully downloaded from
        the CLI `post` option.
        """
        looter = PostLooter('BRW-j_dBI6F',
                            get_videos=True,
                            session=self.session)
        looter.download(self.destfs)
        self.assertEqual(
            set(self.destfs.listdir("/")), {
                '1465633492745668095.mp4',
                '1465633517836005761.mp4',
                '1465633541559037966.mp4',
                '1465633561523918792.mp4',
            })
Ejemplo n.º 13
0
    def test_issue_026(self):
        """
        Feature request by @verafide.

        Checks that pictures that are downloaded are not
        resized.
        """
        PostLooter("BO0XpEshejh", session=self.session).download(self.destfs)
        pic = PIL.Image.open(self.destfs.getsyspath("1419863760138791137.jpg"))
        self.assertEqual(pic.size, (525, 612))
Ejemplo n.º 14
0
 def test_post_url(self, _):
     urls = (
         "http://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k",
         "https://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k",
         "www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k",
         "http://instagr.am/p/BJlIB9WhdRn/?taken-by=2k",
         "https://instagr.am/p/BJlIB9WhdRn/?taken-by=2k",
         "instagr.am/p/BJlIB9WhdRn/?taken-by=2k",
     )
     for url in urls:
         looter = PostLooter(url)
         self.assertEqual(looter.code, "BJlIB9WhdRn")
def send_text(message):
    log(strftime("%Y-%m-%d %H:%M:%S", gmtime()), message)

    if 'instagram.com/p/' in message.text:
        path = message.text
        looter = PostLooter(path)
        if looter.info['__typename'] == 'GraphImage':
            picture_id = looter.info['id']
            looter.download('./pictures/')
            bot.send_photo(message.chat.id,
                           open('./pictures/{}.jpg'.format(picture_id), 'rb'),
                           caption='🤖 Downloaded with @instsave_bot')
        elif looter.info['__typename'] == 'GraphVideo':
            video_id = looter.info['id']
            looter.download_videos('./videos/')
            bot.send_video(message.chat.id,
                           open('./videos/{}.mp4'.format(video_id), 'rb'),
                           caption='🤖 Downloaded with @instsave_bot')
        elif looter.info['__typename'] == 'GraphSidecar':
            bot.send_message(
                message.chat.id,
                'Sorry, I can\'t send you post with more than 1 photo\n\nPlease try again'
            )
    elif 'private' in message.text:
        bot.send_message(436264579, message.text[7:])
    else:
        bot.send_message(
            message.chat.id,
            'Please, send link or username\n\nNeed more help?\nJust tap: /help'
        )
Ejemplo n.º 16
0
async def send_media(message: types.Message):
    try:
        looter = PostLooter(message.text, get_videos=True)
        edges = looter.info['edge_media_to_caption']['edges']
    except (ValueError, KeyError):
        await message.answer(emojize(ERROR_MESSAGE))
        return

    media = types.MediaGroup()
    for m in looter.medias():
        for link in get_links(m, looter):
            if '.mp4' in link:
                media.attach_video(link)
            else:
                media.attach_photo(link)

    await message.answer_media_group(media=media)

    try:
        description = edges[0]['node']['text']
        await message.answer(description)
    except IndexError:
        await message.answer('<i>Описание отсутствует.</i>',
                             parse_mode=types.ParseMode.HTML)
Ejemplo n.º 17
0
 def test_invalid_post_code(self, _):
     with self.assertRaises(ValueError):
         looter = PostLooter("instagram")  # invalid code
Ejemplo n.º 18
0
def scrape_post_from_link(given_link):  # scrape post from a given link
    looter = PostLooter(given_link)
    post_list = []
    for post_info in looter.medias():
        post_dict = {}
        comment_users = []
        comment_texts = []
        comment_users_verified = []
        comment_texts_processed = []
        for i in post_info['edge_media_to_parent_comment']['edges']:
            comment_users += [i['node']['owner']['username']]
            comment_texts += [i['node']['text']]
            comment_texts_processed += [process_text(i['node']['text'])]
            comment_users_verified += [i['node']['owner']['is_verified']]
        try:
            caption = post_info['edge_media_to_caption']['edges'][0]['node'][
                'text']
        except Exception:
            caption = ""
        if caption:
            post_dict['hashtags_cap'] = re.findall(r"#(\w+)", caption)
            post_dict['mentions_cap'] = re.findall(r"@(\w+)", caption)
        else:
            post_dict['hashtags_cap'] = []
            post_dict['mentions_cap'] = []
        post_dict['shortcode'] = post_info['shortcode']
        post_dict['photo_url'] = post_info['display_url']
        post_dict['comment_users'] = comment_users
        post_dict['comment_texts'] = comment_texts
        post_dict['comment_users_verified'] = comment_users_verified
        post_dict['comment_texts_processed'] = comment_texts_processed
        post_dict['caption'] = caption
        post_dict['post_url'] = _baseurl + str(post_info['shortcode'])
        post_dict['post_id'] = post_info['id']
        post_dict['user_name'] = post_info['owner']['username']
        post_dict['user_id'] = post_info['owner']['id']
        post_dict['user_full_name'] = post_info['owner']['full_name']
        post_dict['user_verified'] = post_info['owner']['is_verified']
        post_dict['user_private'] = post_info['owner']['is_private']
        post_dict['user_profile_pic_url'] = post_info['owner'][
            'profile_pic_url']
        post_dict['user_post_count'] = post_info['owner'][
            'edge_owner_to_timeline_media']['count']
        post_dict['is_ad'] = post_info['is_ad']
        post_dict['is_video'] = post_info['is_video']
        post_dict['location'] = post_info['location']
        post_dict['timestamp'] = post_info['taken_at_timestamp']
        post_dict['datetime'] = datetime.fromtimestamp(
            post_info['taken_at_timestamp'])
        post_dict['comments_disabled'] = post_info['comments_disabled']
        post_dict['likes'] = post_info['edge_media_preview_like']['count']
        post_dict['comments'] = post_info['edge_media_to_parent_comment'][
            'count']

        tagged_usernames = []
        tagged_user_full_name = []
        tagged_user_verified = []
        for i in post_info['edge_media_to_tagged_user']['edges']:
            tagged_usernames += [i['node']['user']['username']]
            tagged_user_full_name += [i['node']['user']['full_name']]
            tagged_user_verified += [i['node']['user']['is_verified']]
        post_dict['tagged_usernames'] = tagged_usernames
        post_dict['tagged_user_full_name'] = tagged_user_full_name
        post_dict['tagged_user_verified'] = tagged_user_verified

        try:
            ploot = PostLooter(post_dict['post_url'])
            ploot.download('instaLooter_images/temp/')
            img = cv2.imread('instaLooter_images/temp/' + post_info['id'] +
                             ".jpg")
            text = pytesseract.image_to_string(img)
            post_dict['image_text'] = text
            post_dict['hashtags_img'] = re.findall(r"#(\w+)", text)
            post_dict['mentions_img'] = re.findall(r"@(\w+)", text)
            text = process_text(text)
            post_dict['image_text_processed'] = text
        except Exception as e:
            post_dict['image_text'] = ""
            post_dict['hashtags_img'] = ""
            post_dict['mentions_img'] = ""
            post_dict['image_text_processed'] = ""

        post_list += [post_dict]
    return post_list
def application(url, path):
    looter = PostLooter(url)
    var = looter.download(f"media/{path}")
    return var
Ejemplo n.º 20
0
async def update(tg_chatid, ig_profile):
    write(f"\033[2K\rchecking @{ig_profile}…")
    await bot.send_chat_action(tg_chatid, types.ChatActions.TYPING)
    try:
        pl = ProfileLooter(ig_profile)
    except Exception as e:
        write(f"\033[2K\r\033[31munable to get profile @{ig_profile}\033[0m\n")
        print(tb.format_exc())
        return False
    with open(sent_fp, "r") as f:
        sent = json.load(f)
    sent_something = False
    for j, media in enumerate(pl.medias()):
        i = media["id"]
        sc = media["shortcode"]
        write(f"\033[2K\rchecking @{ig_profile} ({j}|{i}|{sc})")
        if i not in sent:
            write(": \033[sgetting post…")
            _pl = PostLooter(sc)
            try:
                info = _pl.get_post_info(sc)
            except Exception as e:  #because the library I use can randomly throw errors while getting stuff…
                write("\033[u\033[0K\033[31munable to get post\033[0m\n")
                print(tb.format_exc())
                continue
            caption = "\n".join(
                edge["node"]["text"]
                for edge in info["edge_media_to_caption"]["edges"])
            with MemoryFS() as fs:
                if media["is_video"]:
                    await bot.send_chat_action(tg_chatid,
                                               types.ChatActions.RECORD_VIDEO)
                    _pl.download_videos(fs, media_count=1)
                    func = bot.send_video
                    fn = fs.listdir("./")[0]
                    await bot.send_chat_action(tg_chatid,
                                               types.ChatActions.UPLOAD_VIDEO)
                elif media["__typename"].lower() == "graphimage":
                    await bot.send_chat_action(tg_chatid,
                                               types.ChatActions.UPLOAD_PHOTO)
                    _pl.download_pictures(fs, media_count=1)
                    func = bot.send_photo
                    fn = fs.listdir("./")[0]
                elif media["__typename"].lower() == "graphsidecar":
                    await bot.send_chat_action(tg_chatid,
                                               types.ChatActions.UPLOAD_PHOTO)
                    _pl.download_pictures(fs)
                    fn = tuple(fs.listdir("./"))
                    if len(fn) == 1:
                        func = bot.send_photo
                        fn = fn[0]
                    else:
                        func = bot.send_media_group
                else:
                    await bot.send_message(
                        tg_chatid,
                        f"Oh-oh. I've encountered a new post type!\nPlease tell my developer, so he can tell me what I should do with a {media}."
                    )
                    print("\n\033[31mUNKNOWN MEDIA TYPE AAAAA\033[0m", media)
                    break
                if isinstance(fn, tuple):
                    write("\033[u\033[0Ksending album…")
                    f = [fs.openbin(_fn) for _fn in fn]
                    _media = types.input_media.MediaGroup()
                    for _f in f:
                        _media.attach_photo(_f)
                else:
                    write("\033[u\033[0Ksending file…")
                    _media = f = fs.openbin(fn)
                if len(
                        caption
                ) > 100:  #telegram media captions have a character limit of 200 chars & I want to have a buffer
                    caption = caption[:100] + "[…]"
                markdown.quote_html(caption)
                text = f"{caption}\n→<a href=\"https://www.instagram.com/p/{sc}\">original post</a>"
                try:
                    if isinstance(fn, tuple):
                        msg_id = (await func(tg_chatid,
                                             _media))[-1]["message_id"]
                        await bot.send_message(tg_chatid,
                                               text,
                                               reply_to_message_id=msg_id,
                                               parse_mode=types.ParseMode.HTML)
                    else:
                        await func(tg_chatid,
                                   _media,
                                   caption=text,
                                   parse_mode=types.ParseMode.HTML)
                except exceptions.BadRequest as e:
                    write(
                        "\033[u\033[0K\033[31mskipped\033[0m\nGot Bad Request while trying to send message.\n"
                    )
                except exceptions.RetryAfter as e:
                    write(
                        "\nMEEP MEEP FLOOD CONTROL - YOU'RE FLOODING TELEGRAM\nstopping sending messages & waiting for next cycle…\n"
                    )
                    break
                else:
                    sent.append(i)
                    write("\033[u\033[0Ksaving sent messages…\033[0m")
                    with open(sent_fp, "w+") as f:
                        json.dump(sent, f)
                    write("\033[u\033[0K\033[32msent\033[0m\n")
                if isinstance(f, list):
                    for _f in f:
                        _f.close()
                else:
                    f.close()
            sent_something = True
        # sometimes the page has to be reloaded, which would prolong the time the checking post…
        # message would be displayed if I didn't do this
        write(f"\033[2K\rchecking @{ig_profile}…")
    return sent_something