Exemple #1
0
async def send_media(message: types.Message):
    try:
        looter = PostLooter(message.text, get_videos=True)
        edges = looter.info['edge_media_to_caption']['edges']
    except (ValueError, KeyError):
        await message.answer(emojize(ERROR_MESSAGE))
        return

    media = types.MediaGroup()
    for m in looter.medias():
        for link in get_links(m, looter):
            if '.mp4' in link:
                media.attach_video(link)
            else:
                media.attach_photo(link)

    await message.answer_media_group(media=media)

    try:
        description = edges[0]['node']['text']
        await message.answer(description)
    except IndexError:
        await message.answer('<i>Описание отсутствует.</i>',
                             parse_mode=types.ParseMode.HTML)
Exemple #2
0
def scrape_post_from_link(given_link):  # scrape post from a given link
    looter = PostLooter(given_link)
    post_list = []
    for post_info in looter.medias():
        post_dict = {}
        comment_users = []
        comment_texts = []
        comment_users_verified = []
        comment_texts_processed = []
        for i in post_info['edge_media_to_parent_comment']['edges']:
            comment_users += [i['node']['owner']['username']]
            comment_texts += [i['node']['text']]
            comment_texts_processed += [process_text(i['node']['text'])]
            comment_users_verified += [i['node']['owner']['is_verified']]
        try:
            caption = post_info['edge_media_to_caption']['edges'][0]['node'][
                'text']
        except Exception:
            caption = ""
        if caption:
            post_dict['hashtags_cap'] = re.findall(r"#(\w+)", caption)
            post_dict['mentions_cap'] = re.findall(r"@(\w+)", caption)
        else:
            post_dict['hashtags_cap'] = []
            post_dict['mentions_cap'] = []
        post_dict['shortcode'] = post_info['shortcode']
        post_dict['photo_url'] = post_info['display_url']
        post_dict['comment_users'] = comment_users
        post_dict['comment_texts'] = comment_texts
        post_dict['comment_users_verified'] = comment_users_verified
        post_dict['comment_texts_processed'] = comment_texts_processed
        post_dict['caption'] = caption
        post_dict['post_url'] = _baseurl + str(post_info['shortcode'])
        post_dict['post_id'] = post_info['id']
        post_dict['user_name'] = post_info['owner']['username']
        post_dict['user_id'] = post_info['owner']['id']
        post_dict['user_full_name'] = post_info['owner']['full_name']
        post_dict['user_verified'] = post_info['owner']['is_verified']
        post_dict['user_private'] = post_info['owner']['is_private']
        post_dict['user_profile_pic_url'] = post_info['owner'][
            'profile_pic_url']
        post_dict['user_post_count'] = post_info['owner'][
            'edge_owner_to_timeline_media']['count']
        post_dict['is_ad'] = post_info['is_ad']
        post_dict['is_video'] = post_info['is_video']
        post_dict['location'] = post_info['location']
        post_dict['timestamp'] = post_info['taken_at_timestamp']
        post_dict['datetime'] = datetime.fromtimestamp(
            post_info['taken_at_timestamp'])
        post_dict['comments_disabled'] = post_info['comments_disabled']
        post_dict['likes'] = post_info['edge_media_preview_like']['count']
        post_dict['comments'] = post_info['edge_media_to_parent_comment'][
            'count']

        tagged_usernames = []
        tagged_user_full_name = []
        tagged_user_verified = []
        for i in post_info['edge_media_to_tagged_user']['edges']:
            tagged_usernames += [i['node']['user']['username']]
            tagged_user_full_name += [i['node']['user']['full_name']]
            tagged_user_verified += [i['node']['user']['is_verified']]
        post_dict['tagged_usernames'] = tagged_usernames
        post_dict['tagged_user_full_name'] = tagged_user_full_name
        post_dict['tagged_user_verified'] = tagged_user_verified

        try:
            ploot = PostLooter(post_dict['post_url'])
            ploot.download('instaLooter_images/temp/')
            img = cv2.imread('instaLooter_images/temp/' + post_info['id'] +
                             ".jpg")
            text = pytesseract.image_to_string(img)
            post_dict['image_text'] = text
            post_dict['hashtags_img'] = re.findall(r"#(\w+)", text)
            post_dict['mentions_img'] = re.findall(r"@(\w+)", text)
            text = process_text(text)
            post_dict['image_text_processed'] = text
        except Exception as e:
            post_dict['image_text'] = ""
            post_dict['hashtags_img'] = ""
            post_dict['mentions_img'] = ""
            post_dict['image_text_processed'] = ""

        post_list += [post_dict]
    return post_list