コード例 #1
0
def get_custom_comment(tweet: dict) -> dict:
    keys = tweet.keys()
    extract_spec = {
        'comment_text': ('text', str),
        'comment_date': ('created_at', str),
        'comment_id': ('id_str', str)
    }
    custom_comment = {}

    if 'retweeted_status' in keys:  # retweet
        extract_spec['post_id'] = ('retweeted_status:id_str', str)
        custom_comment['type'] = 'retweet'
        extract_spec['metadata'] = ('retweeted_status', str)

    elif tweet.get('is_quote_status', False):  # quote
        extract_spec['post_id'] = ('quoted_status_id_str', str)
        custom_comment['type'] = 'quote'
        extract_spec['metadata'] = ('quoted_status', str)

    elif tweet.get('in_reply_to_status_id', False):  # reply
        extract_spec['post_id'] = ('in_reply_to_status_id_str', str)
        custom_comment['type'] = 'reply'
        custom_comment['metadata'] = ''
    else:
        custom_comment = None

    if custom_comment:
        custom_comment = extract_fields(tweet, extract_spec, custom_comment)
        custom_comment['post_id'] = 'twi_' + custom_comment['post_id']
        custom_comment['comment_id'] = 'twi_' + custom_comment['comment_id']

    return custom_comment
コード例 #2
0
def get_custom_media(tweet: dict) -> dict:
    list_media = extract_field(tweet, 'extended_entities:media', list)
    post_id = 'twi_' + tweet['id_str']

    if not len(list_media):
        return None
    rets = []
    for media in list_media:
        extract_spec_media = {
            'file_url': ('media_url', str),
            'video_url': ('video_info:variants', str),
        }

        custom_media = extract_fields(media, extract_spec_media)
        custom_media['post_id'] = post_id
        custom_media['video_url'] = custom_media['video_url'][-1][
            'url'] if custom_media['video_url'] else ''
        custom_media['file_path'] = parse.urlsplit(
            custom_media['file_url'])[2].split(r'/')[-1]
        custom_media['file_path'] = r'/'.join(
            [custom_media['file_path'][:4], custom_media['file_path'][4:]])
        custom_media['metadata'] = media

        rets.append(custom_media)
    return rets
コード例 #3
0
def get_custom_media(post):
    post_head = 'graphql:shortcode_media'
    node = extract_field(post, post_head, dict)
    post_id = extract_field(node, 'id', str)
    list_media = extract_field(node, 'edge_sidecar_to_children:edges', list)

    # 무조건 이미지 하나는 포함됨
    list_media.append({'node': node})

    rets = []

    extract_spec = {
        'video_url': ('video_url', str),
    }

    for media in (x['node'] for x in list_media):
        custom_media = extract_fields(media, extract_spec)
        custom_media['file_url'] = media['display_resources'][-1]['src']
        custom_media['post_id'] = 'insta_' + post_id
        custom_media['file_path'] = parse.urlsplit(
            custom_media['file_url']).path.split(r'/')[-1]
        custom_media['file_path'] = (lambda s: r'/'.join([s[:4], s[4:]]))(
            custom_media['file_path'])
        custom_media['metadata'] = media
        rets.append(custom_media)

    return rets
コード例 #4
0
def get_custom_post(post: dict) -> dict:
    post_head = 'graphql:shortcode_media'
    node = extract_field(post, post_head, dict)

    extract_spec = {
        'post_id': ('id', str),
        'post': ('edge_media_to_caption:edges', dict),
        'like_count': ('edge_media_preview_like:count', int),
        'post_date': ('taken_at_timestamp', int),
        'comment_count': ('edge_media_to_comment:count', int),
    }

    custom_post = extract_fields(node, extract_spec)

    # post 연결
    if isinstance(custom_post['post'], list):
        custom_post['post'] = '\n---\n'.join(x['node']['text']
                                             for x in custom_post['post'])
    # post에서 hashtag 추출
    custom_post['hash_tag'] = (lambda s: s[s.find('#'):].replace('\n', ''))(
        custom_post['post'])
    custom_post['url'] = "https://www.instagram.com/p/%s/" % extract_field(
        node, 'shortcode', str)
    custom_post['post_id'] = 'insta_' + custom_post['post_id']
    custom_post['metadata'] = node

    return custom_post
コード例 #5
0
def get_custom_post(tweet: dict) -> dict:
    extract_spec = {
        'post_id': ('id_str', str),
        'post': ('text', str),
        'hash_tag': ('entities:hashtags', str),
        'like_count': ('favorite_count', str),
        'post_date': ('created_at', str),
        'comment_count': ('retweet_count', int),
        'extended': ('extended_tweet', bool),
    }

    custom_tweet = extract_fields(tweet, extract_spec)
    custom_tweet['post_id'] = 'twi_' + custom_tweet['post_id']
    custom_tweet['url'] = 'https://twitter.com/{user_screenid}/status/{tweet_id}'. \
        format_map({'user_screenid': tweet['user']['screen_name'], 'tweet_id': tweet['id_str']})
    custom_tweet['extended'] = True if custom_tweet['extended'] else False
    custom_tweet['metadata'] = tweet
    return custom_tweet
コード例 #6
0
def get_custom_comment(post):
    post_head = 'graphql:shortcode_media'
    node = extract_field(post, post_head, dict)

    list_comments = extract_field(node, 'edge_media_to_comment:edges', list)
    if not len(list_comments):
        return None

    rets = []

    extract_spec = {
        'post_id': ('id', str),
        'comment_text': ('created_at', int),
        'comment_date': ('text', str)
    }

    for comment in (x['node'] for x in list_comments):
        custom_comment = extract_fields(comment, extract_spec)

        custom_comment['post_id'] = 'insta_' + custom_comment['post_id']
        custom_comment['metadata'] = comment
        rets.append(custom_comment)

    return rets
コード例 #7
0
        meta = extract_field(node, 'metadata', dict)
        post_id = extract_field(meta, 'id', str)
        list_media = extract_field(meta, 'edge_sidecar_to_children:edges', list)

        # 무조건 이미지 하나는 포함됨
        list_media.append({'node': meta})

        rets = []

        extract_spec = {
            'video_url': ('video_url', str),
        }

        _dup_check = []
        for media in (x['node'] for x in list_media):
            custom_media = extract_fields(media, extract_spec)
            custom_media['file_url'] = media['display_resources'][-1]['src']
            if custom_media['file_url'] in _dup_check:
                continue
            custom_media['post_id'] = 'insta_' + post_id
            custom_media['file_path'] = parse.urlsplit(custom_media['file_url']).path.split(r'/')[-1]
            custom_media['file_path'] = (lambda s: r'/'.join([s[:4], s[4:]]))(custom_media['file_path'])
            custom_media['metadata'] = media
            rets.append(custom_media)

        json_media.extend(rets)

    # print(json_media)
    # print(file_path.split('_'))

    with open(file_path, 'w', encoding = 'utf-8') as fp: