コード例 #1
0
def run():
    web_api = Client(auto_patch=True, drop_incompat_keys=False)

    feed = []
    startup = True
    user_dict = {
        "SpaceX": "20311520",
        "jclishman.testing": "7400533474"
    }

    while True:
        for id_str in list(user_dict.values()):
            try:
                feed.append(web_api.user_feed(id_str, count=1))
                time.sleep(5)
                
            except Exception as e:
                #logger.error(str(e))
                #logger.error("Error getting feed. Sleeping for 30s")
                time.sleep(30)

        for post in feed:
            post = post[0]["node"]
            user_id_str = post["owner"]["id"]
            shortcode = post["shortcode"]
            timestamp = post["created_time"]

            # Empty string if there isn't a caption
            try:
                caption = post["caption"]["text"]
            except:
                caption = ''

            # Match ID number to screenname
            for screen_name, id_str in user_dict.items():
                if user_id_str == id_str:
                    user_screen_name = screen_name

            stored_timestamp = db.get_instagram_timestamp(user_screen_name)

            if int(timestamp) > stored_timestamp:
                start_time = time.time()
                db.update_instagram_timestamp(user_screen_name, int(timestamp))

                logger.info(f"New Instagram post by @{user_screen_name}, id {user_id_str}")
                logger.info(f"Post shortcode: {shortcode}")
                logger.info(f"Post caption: {caption}")
                logger.info(f"Post timestamp: {timestamp}")

                url = f"https://instagram.com/p/{shortcode}"
                if not startup:
                    db.insert_message('Instagram', user_screen_name, caption.replace("\n", " "), url, start_time)

        time.sleep(10)
        startup = False
コード例 #2
0
def instagram_feed(user_handle="google", user_id=1067259270):

    feed_list = []
    user_id = user_id  #TODO:generate user_id given a user_handle
    web_api = Client(auto_patch=True, drop_incompat_keys=False)
    user_feed_info = web_api.user_feed(user_id,
                                       count=50)  #gets fifty user feeds

    for feeds in user_feed_info:
        try:
            raw_item = feeds["node"]
            date = datetime.fromtimestamp(
                int(raw_item.get('taken_at_timestamp')))
            feed_info = {
                "provider":
                "instagram",
                "provider_handle":
                user_handle or '',
                "link":
                raw_item["link"] or '',
                "likes":
                raw_item["likes"]["count"] or 0,
                "media": [],
                "video_views":
                raw_item.get('video_view_count') or 0,
                "caption":
                raw_item["edge_media_to_caption"]["edges"][0]["node"]["text"]
                or '',
            }

            feed_info['pubDate'] = date.strftime(
                '%a, %d %b %Y %H:%M:%S') + ' GMT'
            img_link = raw_item.get('display_src') or raw_item.get(
                'thumbnail_src')
            if img_link:
                feed_info['media'].append(img_link)
            if raw_item["is_video"]:
                feed_info["videos"] = raw_item["display_url"]
                vid_link = feed_info["videos"]
                if vid_link:
                    feed_info['media'].append(vid_link)

            feed_list.append(feed_info)
        except:
            raise ("Could not get instagram feed or Feed does not exist")

    return app.response_class(BytesIO(json.dumps(feed_list)),
                              content_type='application/json')
コード例 #3
0
ファイル: feed.py プロジェクト: southrop/instagram-to-twitter
def get_feed(twitter_api):
    last = src.last.get_last(src.last.PostType.MEDIA)
    highest = last

    web_api = Client(auto_patch=True, drop_incompat_keys=False)
    user_feed = web_api.user_feed(os.getenv('INSTAGRAM_USERID'), count=23)

    for post in reversed(user_feed):
        # ID comes in the format 'POSTID_USERID'
        post_id = int(post['node']['id'].split('_')[0])

        # If has not been processed already
        if post_id > last:
            # Hashtag
            tweet_metadata = ['#鈴木このみ', ' ']

            # Format timestamp
            timestamp = datetime.datetime.fromtimestamp(
                post['node']['taken_at_timestamp'],
                pytz.timezone('Asia/Tokyo'))
            tweet_metadata += [timestamp.strftime('%Y-%m-%d %H:%M'), '\n']

            # Post URL
            tweet_metadata.append(post['node']['link'])

            # Caption
            caption = post['node']['caption']['text']
            tweet_content = ['\n\n', caption]

            media = []  # List of tuples of (type, url)

            if post['node']['__typename'] == MediaType.GALLERY.value:
                list_idx = 0
                list_type = None
                media_list = []
                for gallery_item in post['node']['edge_sidecar_to_children'][
                        'edges']:
                    if gallery_item['node'][
                            '__typename'] == MediaType.VIDEO.value:
                        if list_type is None:
                            media.append([gallery_item['node']['video_url']])
                        elif list_type is MediaType.IMAGE:
                            # Image list in progress
                            # Commit current list and create new list with video
                            media.append(media_list)
                            media.append([gallery_item['node']['video_url']])
                            list_type = None
                            media_list = []
                    else:
                        if list_type is None:
                            # No list in progress
                            list_type = MediaType.IMAGE
                            media_list.append(
                                gallery_item['node']['display_url'])
                        elif list_type is MediaType.IMAGE:
                            # Image list in progress
                            if len(media_list) > 4:
                                # List is somehow overfull
                                # Tweets only allow 4 images, so extra ones need to be split
                                while len(media_list) >= 4:
                                    media.append(media_list[:4])
                                    media_list = media_list[4:]
                                media_list.append(
                                    gallery_item['node']['display_url'])
                            elif len(media_list) == 4:
                                # List full
                                # Commit current list and create new list
                                media.append(media_list)
                                media_list = [
                                    gallery_item['node']['display_url']
                                ]
                            else:
                                # List not full yet
                                media_list.append(
                                    gallery_item['node']['display_url'])
                # Commit unfinished list if exists
                if list_type is MediaType.IMAGE and len(media_list) > 0:
                    media.append(media_list)

            elif post['node']['__typename'] == MediaType.VIDEO.value:
                media.append([post['node']['video_url']])

            else:
                media.append([post['node']['display_url']])

            tweet_str = twutils.truncate_status(''.join(tweet_metadata +
                                                        tweet_content))

            prev_status = 0
            for tweet_media in media:
                replyto = None
                if (prev_status > 0):
                    tweet_str = twutils.truncate_status(
                        ''.join(tweet_metadata))
                    replyto = prev_status

                if os.getenv('ENV', 'dev') == 'production':
                    prev_status = twitter_api.PostUpdate(
                        tweet_str,
                        tweet_media,
                        in_reply_to_status_id=prev_status).id
                else:
                    prev_status += 1
                    twitter_api.write(tweet_str + '\n\n')
                    twitter_api.write('\n'.join(tweet_media) + '\n\n')

            # Update highest ID if higher
            if post_id > highest:
                highest = post_id

    if (highest > last):
        src.last.set_last(str(highest), src.last.PostType.MEDIA)
コード例 #4
0
        new_str = new_str + doc
    ret = new_str.strip()
    ret = re.sub("[\n,/,\\\]", "", ret)
    # ret = ret.replace("  ", " ")
    return ret

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('Usage: python3 get_feed.py [query] ')
        sys.exit(0)

    query = sys.argv[1]

    api = Client(auto_patch=True, drop_incompat_keys=False)

    user_feed = api.user_feed(query, count=10)

    feed_data = {
        "username": user_feed[0]["node"]["owner"]["username"],
        "user_id": user_feed[0]["node"]["owner"]["id"],
        "media": []
    }

    for photo in user_feed:

        media_shortcode = photo["node"]["shortcode"]
        data = photo["node"]

        media_data = {
            "media_shortcode": media_shortcode,
            "photo_url": data["display_url"],
コード例 #5
0
class InstagramSession(session.StreamSession):

    BATCH_COUNT = 25

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            self.web_api = Client(
                proxy=self.proxies.get("https") if self.proxies else None,
                auto_patch=True,
                drop_incompat_keys=False)
        self.end_cursors = DefaultAttrDict(lambda: None)

    @memo(region="long")
    def user_name_to_id(self, user_name):
        try:
            user_id = self.web_api.user_info2(user_name)["id"]
        except:
            raise SGException(f"user id for {user_name} not found")
        return user_id

    def get_feed_items(self, user_name, count=BATCH_COUNT):

        try:
            feed = self.web_api.user_feed(
                self.user_name_to_id(user_name),
                count=self.BATCH_COUNT,
                end_cursor=self.end_cursors[user_name])
        except ClientConnectionError as e:
            logger.warn(f"connection error: {e}")

        for post in feed:
            try:
                cursor = (post["node"]["edge_media_to_comment"]["page_info"]
                          ["end_cursor"])
                if cursor:
                    self.end_cursors[user_name] = cursor
            except KeyError:
                pass

            post_type = None
            post_id = post["node"]["id"]

            try:
                title = post["node"]["caption"]["text"].replace("\n", "")
            except TypeError:
                title = "(no caption)"

            media_type = post["node"]["type"]
            if media_type == "video":
                post_type = "video"
                # content = InstagramMediaSource(post["node"]["link"], media_type="video")
                # content = InstagramMediaSource(post["node"]["videos"]["standard_resolution"]["url"], media_type="video")
                content = self.provider.new_media_source(
                    post["node"]["videos"]["standard_resolution"]["url"],
                    media_type="video")

            elif media_type == "image":
                if "carousel_media" in post["node"]:
                    post_type = "story"
                    content = [
                        # InstagramMediaSource(m["images"]["standard_resolution"]["url"], media_type="image")
                        self.provider.new_media_source(
                            m["images"]["standard_resolution"]["url"],
                            media_type="image") if m["type"] == "image" else
                        # InstagramMediaSource(m["video_url"], media_type="video")
                        self.provider.new_media_source(m["video_url"],
                                                       media_type="video")
                        if m["type"] == "video" else None
                        for m in post["node"]["carousel_media"]
                    ]
                else:
                    post_type = "image"
                    # content = InstagramMediaSource(post["node"]["images"]["standard_resolution"]["url"], media_type="image")
                    content = self.provider.new_media_source(
                        post["node"]["images"]["standard_resolution"]["url"],
                        media_type="image")
                    # raise Exception
            else:
                logger.warn(f"no content for post {post_id}")
                continue

            yield (AttrDict(guid=post_id,
                            title=title.strip(),
                            post_type=post_type,
                            created=datetime.fromtimestamp(
                                int(post["node"]["created_time"])),
                            content=content))
コード例 #6
0
ファイル: socialsafe.py プロジェクト: analytec/shieldware-pro
from instagram_web_api import Client, ClientCompatPatch, ClientError, ClientLoginError

from sightengine.client import SightengineClient
client = SightengineClient('630881392', 'St5TPUomwvLYq7eiXd4G')
web_api = Client(auto_patch=True, drop_incompat_keys=False)
user_feed_info = web_api.user_feed('232192182')


def checkDrugs(my_url):
    output = client.check('wad').set_url(my_url)
    drugs = output['drugs']
    return drugs


def checkWeapons(my_url):
    output = client.check('wad').set_url(my_url)
    weapons = output['weapon']
    return weapons


def checkAlcohol(my_url):
    output = client.check('wad').set_url(my_url)
    alcohol = output['alcohol']
    return alcohol


def checkCaption(my_url):
    output = client.check('wad').set_url(my_url)
    print(output)

コード例 #7
0
class InstagramCrawler:
    api = None

    user_id = '327416611'

    social_endpoint = None

    def __init__(self):
        self.api = Client(auto_patch=True, drop_incompat_keys=False)
        self.social_endpoint = os.getenv('SOCIAL_ENDPOINT',
                                         'http://localhost:8080')

    def fetch(self, end_cursor=None):
        result = self.api.user_feed(self.user_id,
                                    count=50,
                                    extract=False,
                                    end_cursor=end_cursor)

        info = self.parse_http_result(result)

        for post in info['posts']:
            self.process_post(post)

        page_info = info['page_info']
        if page_info.get('has_next_page', False):
            time.sleep(2)
            self.fetch(page_info['end_cursor'])

    def parse_http_result(self, result):
        status = result.get('status', 'error')

        if status != 'ok':
            sys.exit('api response not ok')

        data = result['data']
        media = data['user']['edge_owner_to_timeline_media']

        return {
            'count': media['count'],
            'posts': [edge['node'] for edge in media['edges']],
            'page_info': media['page_info']
        }

    def process_post(self, post):
        payload = self.parse_post(post)

        r = requests.put('%s/instagram' % self.social_endpoint, json=payload)

        if r.status_code != 200:
            sys.exit(r.text)

    def parse_post(self, post):
        text_edges = post['edge_media_to_caption']['edges']

        if not text_edges:
            caption = ''
            tags = []
        else:
            text = text_edges[0]['node']['text']

            text = re.sub('\s+', ' ', text)
            text = re.sub('\.\s+', '', text)
            tags = list({
                tag.strip().lower()
                for tag in re.findall('(?<=#)[^# ]+(?=#|$| )', text)
            })
            caption = re.sub('(#[^# ]+ )*(#[^# ]+$)', '', text)

        return {
            'shortcode':
            post['shortcode'],
            'caption':
            caption,
            'tags':
            tags,
            'likes':
            post['likes']['count'],
            'comments':
            post['comments']['count'],
            'type':
            post['type'],
            'thumbnail':
            post['images']['thumbnail']['url'],
            'image':
            post['images']['standard_resolution']['url'],
            'timestamp':
            datetime.utcfromtimestamp(int(post['created_time'])).replace(
                tzinfo=timezone.utc).isoformat()
        }