Esempio n. 1
0
def process_likes(request_identifier, likes_raw):
    likes_raw = likes_raw.replace('window.YTD.like.part0 = ', '')

    likes = json.loads(likes_raw)

    for like in likes:
        pdk_like = {
            'pdk_hashed_tweetId':
            hash_content(like['like']['tweetId']),
            'pdk_encrypted_tweetId':
            encrypt_content(like['like']['tweetId'].encode('utf-8')),
            'pdk_encrypted_fullText':
            encrypt_content(like['like']['fullText'].encode('utf-8')),
        }

        annotate_field(pdk_like, 'fullText', like['like']['fullText'])

        created = timezone.now()  # No timestamp available in this file!

        DataPoint.objects.create_data_point(
            'pdk-external-twitter-like',
            request_identifier,
            pdk_like,
            user_agent='Passive Data Kit External Importer',
            created=created)

        create_engagement_event(source='twitter',
                                identifier=request_identifier,
                                outgoing_engagement=0.5,
                                engagement_type='reaction',
                                start=created)
Esempio n. 2
0
def process_ad_impressions(request_identifier, ads_raw):
    ads_raw = ads_raw.replace('window.YTD.ad_impressions.part0 = ', '')

    ads = json.loads(ads_raw)

    for ad_view in ads:
        for impression in ad_view['ad']['adsUserData']['adImpressions'][
                'impressions']:
            created = arrow.get(impression['impressionTime']).datetime

            if include_data(request_identifier, created, impression):
                if 'promotedTweetInfo' in impression:
                    annotate_field(
                        impression, 'tweet_text',
                        impression['promotedTweetInfo']['tweetText'])

                DataPoint.objects.create_data_point(
                    'pdk-external-twitter-ad-viewed',
                    request_identifier,
                    impression,
                    user_agent='Passive Data Kit External Importer',
                    created=created)

                create_engagement_event(source='twitter',
                                        identifier=request_identifier,
                                        outgoing_engagement=0.0,
                                        engagement_type='advertising',
                                        start=created)
Esempio n. 3
0
def process_search_history(request_identifier, searches_raw):
    searches = json.loads(searches_raw)

    for search in searches['searches']:
        created = None

        try:
            created = arrow.get(search['timestamp']).datetime
        except ValueError:
            try:
                created = arrow.get(search['timestamp'] / 1000).datetime
            except ValueError:
                pass

        if created is not None and include_data(request_identifier, created, search): # pylint: disable=too-many-nested-blocks
            if 'attachments' in search:
                for attachment in search['attachments']:
                    if 'data' in attachment:
                        for data in attachment['data']:
                            if 'text' in data:
                                payload = {
                                    'pdk_encrypted_query': encrypt_content(data['text'].encode('utf-8'))
                                }

                                annotate_field(payload, 'query', data['text'])

                                create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='search', start=created)
                                queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-search', request_identifier, payload, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))
Esempio n. 4
0
def process_tweets(request_identifier, tweets_raw):
    tweets_raw = tweets_raw.replace('window.YTD.tweet.part0 = ', '')

    tweets = json.loads(tweets_raw)

    for tweet in tweets:
        if 'tweet' in tweet:
            tweet = tweet['tweet']

        created = arrow.get(tweet['created_at'],
                            'ddd MMM DD HH:mm:ss Z YYYY').datetime

        if include_data(request_identifier, created, tweet):
            if 'id' in tweet:
                tweet['pdk_hashed_id'] = hash_content(tweet['id'])
                tweet['pdk_encrypted_id'] = encrypt_content(
                    tweet['id'].encode('utf-8'))
                del tweet['id']

            if 'id_str' in tweet:
                tweet['pdk_hashed_id_str'] = hash_content(tweet['id_str'])
                tweet['pdk_encrypted_id_str'] = encrypt_content(
                    tweet['id_str'].encode('utf-8'))
                del tweet['id_str']

            if 'full_text' in tweet:
                tweet['pdk_encrypted_full_text'] = encrypt_content(
                    tweet['full_text'].encode('utf-8'))

                annotate_field(tweet, 'full_text', tweet['full_text'])

                del tweet['full_text']

            if 'entities' in tweet:
                entities_str = json.dumps(tweet['entities'], indent=2)
                tweet['pdk_encrypted_entities'] = encrypt_content(
                    entities_str.encode('utf-8'))

                del tweet['entities']

            if 'urls' in tweet:
                urls_str = json.dumps(tweet['urls'], indent=2)
                tweet['pdk_encrypted_urls'] = urls_str(
                    entities_str.encode('utf-8'))

                del tweet['urls']

            DataPoint.objects.create_data_point(
                'pdk-external-twitter-tweet',
                request_identifier,
                tweet,
                user_agent='Passive Data Kit External Importer',
                created=created)

            create_engagement_event(source='twitter',
                                    identifier=request_identifier,
                                    outgoing_engagement=1.0,
                                    engagement_type='post',
                                    start=created)
def process_messages_new(request_identifier, username, messages_raw):
    messages = json.loads(messages_raw)

    if isinstance(messages, dict) is False:
        return

    for message in messages['messages']:
        created = arrow.get(message['timestamp_ms'] / 1000).datetime

        if include_data(request_identifier, created, message):
            pdk_message = {
                'pdk_recipients_count':
                len(messages['participants']) - 1,
                'pdk_hashed_senderId':
                hash_content(message['sender_name'].encode('utf-8')),
                'pdk_encrypted_sender':
                encrypt_content(message['sender_name'].encode('utf-8')),
                'created_at':
                message['timestamp_ms']
            }

            if 'content' in message and message['content'] is not None:
                annotate_field(pdk_message, 'content', message['content'])
                pdk_message['pdk_encrypted_content'] = encrypt_content(
                    message['content'].encode('utf-8'))

            if 'share' in message:
                pdk_message['pdk_encrypted_media_url'] = encrypt_content(
                    message['share']['link'].encode('utf-8'))

                if 'share_text' in message['share']:
                    annotate_field(pdk_message, 'share_text',
                                   message['share']['share_text'])

            queue_batch_insert(
                DataPoint.objects.create_data_point(
                    'pdk-external-instagram-direct-message',
                    request_identifier,
                    pdk_message,
                    user_agent='Passive Data Kit External Importer',
                    created=created,
                    skip_save=True,
                    skip_extract_secondary_identifier=True))

            if message['sender_name'] == username:
                create_engagement_event(source='instagram',
                                        identifier=request_identifier,
                                        outgoing_engagement=1.0,
                                        engagement_type='message',
                                        start=created)
            else:
                create_engagement_event(source='instagram',
                                        identifier=request_identifier,
                                        incoming_engagement=1.0,
                                        engagement_type='message',
                                        start=created)
def process_post_comments(request_identifier, post_comments_raw):
    post_comments = json.loads(post_comments_raw)

    if isinstance(post_comments, dict) is False:
        return

    if ('comments_media_comments' in post_comments) is False:
        return

    warned = False

    for post_comment in post_comments['comments_media_comments']:
        try:
            post_comment['encrypted_title'] = encrypt_content(
                post_comment['title'].encode('utf-8'))
            del post_comment['title']

            post_comment['string_list_data'][
                'encrypted_value'] = encrypt_content(
                    post_comment['string_list_data']['value'].encode('utf-8'))
            annotate_field(post_comment['string_list_data'], 'value',
                           post_comment['string_list_data']['value'])
            del post_comment['string_list_data']['value']

            created = arrow.get(
                post_comment['string_map_data']['Time']['timestamp']).datetime

            if include_data(request_identifier, created, post_comment):
                queue_batch_insert(
                    DataPoint.objects.create_data_point(
                        'pdk-external-instagram-comment-posted',
                        request_identifier,
                        post_comment,
                        user_agent='Passive Data Kit External Importer',
                        created=created,
                        skip_save=True,
                        skip_extract_secondary_identifier=True))

                create_engagement_event(source='instagram',
                                        identifier=request_identifier,
                                        outgoing_engagement=1.0,
                                        engagement_type='comment',
                                        start=created)
        except TypeError:
            if warned is False:
                print(
                    'Unexpected structure encountered (process_liked_comments): %s'
                    % json.dumps(post_comment, indent=2))
                warned = True
Esempio n. 7
0
def process_unfollows(request_identifier, unfollows):
    for item in unfollows:
        created = arrow.get(item['timestamp']).datetime

        if include_data(request_identifier, created, item):
            pdk_item = {
                'pdk_hashed_blog_name': hash_content(item['blog_name']),
                'pdk_encrypted_blog_name': encrypt_content(item['blog_name'].encode('utf-8')),
                'timestamp': item['timestamp'],
            }

            annotate_field(pdk_item, 'blog_name', item['blog_name'])


            DataPoint.objects.create_data_point('pdk-external-tumblr-unfollow', request_identifier, pdk_item, user_agent='Passive Data Kit External Importer', created=created)

            create_engagement_event(source='tumblr', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='follow', start=created)
def process_messages(request_identifier, file_html):
    soup = bs4.BeautifulSoup(file_html, features='lxml')

    for list_element in soup.findAll('li'):
        created = None

        for child in list_element.contents:
            try:
                if child.startswith('Sent at '):
                    date_str = child.replace('Sent at ', '').replace(
                        'while watching ', '')

                    created = arrow.get(date_str).datetime
            except TypeError:
                pass  # Not a string

        if created is not None and include_data(request_identifier, created,
                                                list_element):
            message = list_element.contents[-1]

            if isinstance(message, bs4.element.Tag) is False:
                if message is None:
                    message = ''

                payload = {
                    'pdk_encrypted_message':
                    encrypt_content(message.encode('utf-8'))
                }

                annotate_field(payload, 'message', message)

                queue_batch_insert(
                    DataPoint.objects.create_data_point(
                        'pdk-external-youtube-chat-message',
                        request_identifier,
                        payload,
                        user_agent='Passive Data Kit External Importer',
                        created=created,
                        skip_save=True,
                        skip_extract_secondary_identifier=True))

                create_engagement_event(source='youtube',
                                        identifier=request_identifier,
                                        outgoing_engagement=1.0,
                                        engagement_type='chatroom',
                                        start=created)
Esempio n. 9
0
def process_messages(request_identifier, messages_raw, full_names):
    messages = json.loads(messages_raw)

    for message in messages['messages']:
        message = copy.deepcopy(message)

        created = None

        try:
            created = arrow.get(message['timestamp_ms']).datetime
        except ValueError:
            try:
                created = arrow.get(message['timestamp_ms'] / 1000).datetime
            except ValueError:
                pass

        if created is not None and include_data(request_identifier, created, message):
            if 'content' in message:
                message['pdk_encrypted_content'] = encrypt_content(message['content'].encode('utf-8'))

                annotate_field(message, 'content', message['content'])

                del message['content']

            if 'share' in message:
                share = message['share']

                for share_key in copy.deepcopy(share):
                    if share_key == 'link':
                        share['pdk_encrypted_link'] = encrypt_content(share[share_key].encode('utf-8'))

                        annotate_field(share, 'link', share[share_key])

                        del share[share_key]

            if message['sender_name'] in full_names:
                message['pdk_direction'] = 'outgoing'

                create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='message', start=created)
            else:
                message['pdk_direction'] = 'incoming'

                create_engagement_event(source='facebook', identifier=request_identifier, incoming_engagement=1.0, engagement_type='message', start=created)

            queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-message', request_identifier, message, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))
def process_likes(request_identifier, file_json):
    likes = json.loads(file_json)

    for like in likes:
        created = arrow.get(like['snippet']['publishedAt']).datetime

        if include_data(request_identifier, created, like):
            like['pdk_encrypted_title'] = encrypt_content(
                like['snippet']['title'].encode('utf-8'))
            like['pdk_length_title'] = len(like['snippet']['title'])

            annotate_field(like, 'title', like['snippet']['title'])

            if 'snippet' in like:
                snippet_str = json.dumps(like['snippet'], indent=2)
                like['pdk_encrypted_snippet'] = encrypt_content(
                    snippet_str.encode('utf-8'))

                del like['snippet']

            if 'contentDetails' in like:
                content_details_str = json.dumps(like['contentDetails'],
                                                 indent=2)
                like['pdk_encrypted_contentDetails'] = encrypt_content(
                    content_details_str.encode('utf-8'))

                del like['contentDetails']

            queue_batch_insert(
                DataPoint.objects.create_data_point(
                    'pdk-external-youtube-like',
                    request_identifier,
                    like,
                    user_agent='Passive Data Kit External Importer',
                    created=created,
                    skip_save=True,
                    skip_extract_secondary_identifier=True))

            create_engagement_event(source='youtube',
                                    identifier=request_identifier,
                                    outgoing_engagement=0.5,
                                    engagement_type='reaction',
                                    start=created)
def process_uploads(request_identifier, file_json):
    uploads = json.loads(file_json)

    for upload in uploads:
        created = arrow.get(upload['snippet']['publishedAt']).datetime

        if include_data(request_identifier, created, upload):
            upload['pdk_encrypted_title'] = encrypt_content(
                upload['snippet']['title'].encode('utf-8'))

            annotate_field(upload, 'title', upload['snippet']['title'])

            if 'snippet' in upload:
                snippet_str = json.dumps(upload['snippet'], indent=2)
                upload['pdk_encrypted_snippet'] = encrypt_content(
                    snippet_str.encode('utf-8'))

                del upload['snippet']

            if 'contentDetails' in upload:
                content_details_str = json.dumps(upload['contentDetails'],
                                                 indent=2)
                upload['pdk_encrypted_contentDetails'] = encrypt_content(
                    content_details_str.encode('utf-8'))

                del upload['contentDetails']

            queue_batch_insert(
                DataPoint.objects.create_data_point(
                    'pdk-external-youtube-upload',
                    request_identifier,
                    upload,
                    user_agent='Passive Data Kit External Importer',
                    created=created,
                    skip_save=True,
                    skip_extract_secondary_identifier=True))

            create_engagement_event(source='youtube',
                                    identifier=request_identifier,
                                    outgoing_engagement=1.0,
                                    engagement_type='upload',
                                    start=created)
Esempio n. 12
0
def process_page_reactions(request_identifier, reactions_raw):
    reactions = json.loads(reactions_raw)

    for reaction in reactions['page_likes']:
        created = arrow.get(reaction['timestamp']).datetime

        if include_data(request_identifier, created, reaction):
            if 'name' in reaction:
                reaction['pdk_encrypted_name'] = encrypt_content(reaction['name'].encode('utf-8'))

                annotate_field(reaction, 'name', reaction['name'])

                del reaction['name']

            reaction['content_type'] = 'page'
            reaction['reaction'] = 'like'

            queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-reaction', request_identifier, reaction, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

            create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='reaction', start=created)
def process_comments(request_identifier, comments_raw):
    comments = json.loads(comments_raw)

    if isinstance(comments, dict) is False:
        return

    for key in comments:
        comment_list = comments[key]

        for comment in comment_list:
            created = arrow.get(comment[0]).replace(
                tzinfo=pytz.timezone('US/Pacific')).datetime

            if include_data(request_identifier, created, comment):
                comment_point = {}

                comment_point['pdk_encrypted_comment'] = encrypt_content(
                    comment[1].encode('utf-8'))

                annotate_field(comment_point, 'comment', comment[1])

                comment_point['pdk_hashed_profile'] = hash_content(comment[2])
                comment_point['pdk_encrypted_profile'] = encrypt_content(
                    comment[2].encode('utf-8'))

                queue_batch_insert(
                    DataPoint.objects.create_data_point(
                        'pdk-external-instagram-comment',
                        request_identifier,
                        comment_point,
                        user_agent='Passive Data Kit External Importer',
                        created=created,
                        skip_save=True,
                        skip_extract_secondary_identifier=True))

                create_engagement_event(source='instagram',
                                        identifier=request_identifier,
                                        outgoing_engagement=1.0,
                                        engagement_type='comment',
                                        start=created)
def process_search_history(request_identifier, file_json):
    search_history = json.loads(file_json)

    for search in search_history:
        created = arrow.get(search['time']).datetime

        if include_data(request_identifier, created, search):
            search['pdk_encrypted_title'] = encrypt_content(
                search['title'].encode('utf-8'))

            annotate_field(search, 'title', search['title'])

            del search['title']

            if 'titleUrl' in search:
                search['pdk_encrypted_titleUrl'] = encrypt_content(
                    search['titleUrl'].encode('utf-8'))
                search['pdk_length_titleUrl'] = len(search['titleUrl'])

                del search['titleUrl']

            queue_batch_insert(
                DataPoint.objects.create_data_point(
                    'pdk-external-youtube-search',
                    request_identifier,
                    search,
                    user_agent='Passive Data Kit External Importer',
                    created=created,
                    skip_save=True,
                    skip_extract_secondary_identifier=True))

            create_engagement_event(source='youtube',
                                    identifier=request_identifier,
                                    outgoing_engagement=1.0,
                                    engagement_type='search',
                                    start=created)
def process_watch_history(request_identifier, file_json):
    watch_history = json.loads(file_json)

    for watch in watch_history:
        created = arrow.get(watch['time']).datetime

        if include_data(request_identifier, created, watch):
            annotate_field(watch, 'title', watch['title'])

            queue_batch_insert(
                DataPoint.objects.create_data_point(
                    'pdk-external-youtube-watch',
                    request_identifier,
                    watch,
                    user_agent='Passive Data Kit External Importer',
                    created=created,
                    skip_save=True,
                    skip_extract_secondary_identifier=True))

            create_engagement_event(source='youtube',
                                    identifier=request_identifier,
                                    outgoing_engagement=0.5,
                                    engagement_type='watch',
                                    start=created)
def process_posts_made(request_identifier, posts_made_raw):
    posts_made = json.loads(posts_made_raw)

    if isinstance(posts_made, list) is False:
        return

    for post in posts_made:
        created = arrow.get(post['media'][0]['creation_timestamp']).datetime

        if include_data(request_identifier, created, post):
            for media in post['media']:
                media['encrypted_title'] = encrypt_content(
                    media['title'].encode('utf-8'))
                annotate_field(media, 'title', media['title'])
                del media['title']

                try:
                    del media['media_metadata']['photo_metadata']['exif_data']
                except KeyError:
                    pass

            queue_batch_insert(
                DataPoint.objects.create_data_point(
                    'pdk-external-instagram-post',
                    request_identifier,
                    post,
                    user_agent='Passive Data Kit External Importer',
                    created=created,
                    skip_save=True,
                    skip_extract_secondary_identifier=True))

            create_engagement_event(source='instagram',
                                    identifier=request_identifier,
                                    outgoing_engagement=1.0,
                                    engagement_type='post',
                                    start=created)
Esempio n. 17
0
def process_comments(request_identifier, comments_raw): # pylint: disable=too-many-branches
    comments = json.loads(comments_raw)

    if 'comments' in comments: # pylint: disable=too-many-nested-blocks
        for comment in comments['comments']: # pylint: disable=too-many-nested-blocks
            comment = copy.deepcopy(comment)

            created = arrow.get(comment['timestamp']).datetime

            if include_data(request_identifier, created, comment):
                if 'title' in comment:
                    comment['pdk_encrypted_title'] = encrypt_content(comment['title'].encode('utf-8'))

                    annotate_field(comment, 'title', comment['title'])

                    del comment['title']

                if 'data' in comment:
                    data = comment['data']

                    for datum in data:
                        if 'comment' in datum:
                            comment_obj = datum['comment']

                            if 'comment' in comment_obj:
                                comment_obj['pdk_encrypted_comment'] = encrypt_content(comment_obj['comment'].encode('utf-8'))

                                annotate_field(comment_obj, 'comment', comment_obj['comment'])

                                del comment_obj['comment']

                            if 'author' in comment_obj:
                                comment_obj['pdk_hashed_author'] = hash_content(comment_obj['author'])
                                comment_obj['pdk_encrypted_author'] = encrypt_content(comment_obj['author'].encode('utf-8'))

                                del comment_obj['author']

                queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-comment', request_identifier, comment, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='comment', start=created)

    if 'comments_v2' in comments: # pylint: disable=too-many-nested-blocks
        for comment in comments['comments_v2']: # pylint: disable=too-many-nested-blocks
            comment = copy.deepcopy(comment)

            created = arrow.get(comment['timestamp']).datetime

            if include_data(request_identifier, created, comment):
                if 'title' in comment:
                    comment['pdk_encrypted_title'] = encrypt_content(comment['title'].encode('utf-8'))

                    annotate_field(comment, 'title', comment['title'])

                    del comment['title']

                if 'data' in comment:
                    data = comment['data']

                    for datum in data:
                        if 'comment' in datum:
                            comment_obj = datum['comment']

                            if 'comment' in comment_obj:
                                comment_obj['pdk_encrypted_comment'] = encrypt_content(comment_obj['comment'].encode('utf-8'))

                                annotate_field(comment_obj, 'comment', comment_obj['comment'])

                                del comment_obj['comment']

                            if 'author' in comment_obj:
                                comment_obj['pdk_hashed_author'] = hash_content(comment_obj['author'])
                                comment_obj['pdk_encrypted_author'] = encrypt_content(comment_obj['author'].encode('utf-8'))

                                del comment_obj['author']

                queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-comment', request_identifier, comment, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='comment', start=created)
Esempio n. 18
0
def process_visited(request_identifier, viewed_raw): # pylint: disable=too-many-branches
    metadata = json.loads(viewed_raw)

    for thing in metadata['visited_things']:
        if thing['name'] == 'Profile visits':
            for entry in thing['entries']:
                created = arrow.get(entry['timestamp']).datetime

                if include_data(request_identifier, created, entry):
                    entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8'))
                    entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8'))

                    del entry['data']['uri']

                    entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8'))
                    entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8'))

                    annotate_field(entry, 'name', entry['data']['name'])

                    del entry['data']['name']

                    queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-profile-visit', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                    create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='profile', start=created)

        elif thing['name'] == 'Page visits':
            for entry in thing['entries']:
                created = arrow.get(entry['timestamp']).datetime

                if include_data(request_identifier, created, entry):
                    entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8'))
                    entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8'))

                    del entry['data']['uri']

                    entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8'))
                    entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8'))

                    annotate_field(entry, 'name', entry['data']['name'])

                    del entry['data']['name']

                    queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-page-visit', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                    create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='page', start=created)

        elif thing['name'] == 'Events visited':
            for entry in thing['entries']:
                created = arrow.get(entry['timestamp']).datetime

                if include_data(request_identifier, created, entry):
                    entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8'))
                    entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8'))

                    del entry['data']['uri']

                    entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8'))
                    entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8'))

                    annotate_field(entry, 'name', entry['data']['name'])

                    del entry['data']['name']

                    queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-event-visit', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                    create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='event', start=created)

        elif thing['name'] == 'Groups visited':
            for entry in thing['entries']:
                created = arrow.get(entry['timestamp']).datetime

                if include_data(request_identifier, created, entry):
                    entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8'))
                    entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8'))

                    del entry['data']['uri']

                    entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8'))
                    entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8'))

                    annotate_field(entry, 'name', entry['data']['name'])

                    del entry['data']['name']

                    queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-group-visit', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                    create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='group', start=created)
def process_media(request_identifier, media_raw):
    media = json.loads(media_raw)

    if 'photos' in media:
        for photo in media['photos']:
            created = arrow.get(photo['taken_at']).replace(
                tzinfo=pytz.timezone('US/Pacific')).datetime

            if include_data(request_identifier, created, photo):
                photo['pdk_encrypted_caption'] = encrypt_content(
                    photo['caption'].encode('utf-8'))

                annotate_field(photo, 'caption', photo['caption'])

                del photo['caption']

                if 'location' in photo:
                    photo['pdk_encrypted_location'] = encrypt_content(
                        photo['location'].encode('utf-8'))

                    annotate_field(photo, 'location', photo['location'])

                    del photo['location']

                queue_batch_insert(
                    DataPoint.objects.create_data_point(
                        'pdk-external-instagram-photo',
                        request_identifier,
                        photo,
                        user_agent='Passive Data Kit External Importer',
                        created=created,
                        skip_save=True,
                        skip_extract_secondary_identifier=True))

                create_engagement_event(source='instagram',
                                        identifier=request_identifier,
                                        outgoing_engagement=1.0,
                                        engagement_type='photo',
                                        start=created)

    if 'videos' in media:
        for video in media['videos']:
            created = arrow.get(video['taken_at']).replace(
                tzinfo=pytz.timezone('US/Pacific')).datetime

            if include_data(request_identifier, created, video):
                video['pdk_encrypted_caption'] = encrypt_content(
                    video['caption'].encode('utf-8'))

                annotate_field(video, 'caption', video['caption'])

                del video['caption']

                if 'location' in video:
                    video['pdk_encrypted_location'] = encrypt_content(
                        video['location'].encode('utf-8'))

                    annotate_field(video, 'location', video['location'])

                    del video['location']

                queue_batch_insert(
                    DataPoint.objects.create_data_point(
                        'pdk-external-instagram-video',
                        request_identifier,
                        video,
                        user_agent='Passive Data Kit External Importer',
                        created=created,
                        skip_save=True,
                        skip_extract_secondary_identifier=True))

                create_engagement_event(source='instagram',
                                        identifier=request_identifier,
                                        outgoing_engagement=1.0,
                                        engagement_type='video',
                                        start=created)
Esempio n. 20
0
def process_post_comment_reactions(request_identifier, reactions_raw): # pylint: disable=too-many-branches, too-many-statements
    reactions = json.loads(reactions_raw)

    if 'reactions' in reactions: # pylint: disable=too-many-nested-blocks
        for reaction in reactions['reactions']: # pylint: disable=too-many-nested-blocks
            created = arrow.get(reaction['timestamp']).datetime

            if include_data(request_identifier, created, reaction):
                if 'title' in reaction:
                    reaction['pdk_encrypted_title'] = encrypt_content(reaction['title'].encode('utf-8'))

                    annotate_field(reaction, 'title', reaction['title'])

                    if '\'s post' in reaction['title']:
                        reaction['content_type'] = 'post'
                    elif '\'s comment' in reaction['title']:
                        reaction['content_type'] = 'comment'
                    elif '\'s photo' in reaction['title']:
                        reaction['content_type'] = 'photo'
                    elif '\'s video' in reaction['title']:
                        reaction['content_type'] = 'video'
                    else:
                        reaction['content_type'] = 'unknown'

                    del reaction['title']

                if 'data' in reaction:
                    for data_item in reaction['data']:
                        if 'reaction' in data_item:
                            data_item['reaction']['reaction'] = data_item['reaction']['reaction'].lower()

                            if 'actor' in data_item['reaction']:
                                data_item['reaction']['pdk_encrypted_actor'] = encrypt_content(data_item['reaction']['actor'].encode('utf-8'))

                                annotate_field(data_item['reaction'], 'actor', data_item['reaction']['actor'])

                                del data_item['reaction']['actor']

                    queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-reaction', request_identifier, reaction, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                    create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='reaction', start=created)

    if 'reactions_v2' in reactions: # pylint: disable=too-many-nested-blocks
        for reaction in reactions['reactions_v2']: # pylint: disable=too-many-nested-blocks
            created = arrow.get(reaction['timestamp']).datetime

            if include_data(request_identifier, created, reaction):
                if 'title' in reaction:
                    reaction['pdk_encrypted_title'] = encrypt_content(reaction['title'].encode('utf-8'))

                    annotate_field(reaction, 'title', reaction['title'])

                    if '\'s post' in reaction['title']:
                        reaction['content_type'] = 'post'
                    elif '\'s comment' in reaction['title']:
                        reaction['content_type'] = 'comment'
                    elif '\'s photo' in reaction['title']:
                        reaction['content_type'] = 'photo'
                    elif '\'s video' in reaction['title']:
                        reaction['content_type'] = 'video'
                    else:
                        reaction['content_type'] = 'unknown'

                    del reaction['title']

                if 'data' in reaction:
                    for data_item in reaction['data']:
                        if 'reaction' in data_item:
                            data_item['reaction']['reaction'] = data_item['reaction']['reaction'].lower()

                            if 'actor' in data_item['reaction']:
                                data_item['reaction']['pdk_encrypted_actor'] = encrypt_content(data_item['reaction']['actor'].encode('utf-8'))

                                annotate_field(data_item['reaction'], 'actor', data_item['reaction']['actor'])

                                del data_item['reaction']['actor']

                    queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-reaction', request_identifier, reaction, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                    create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='reaction', start=created)
Esempio n. 21
0
def process_posts(request_identifier, posts_raw): # pylint: disable=too-many-branches, too-many-statements
    posts = json.loads(posts_raw)

    source = 'user'

    if isinstance(posts, dict):
        source = 'others'

        if 'wall_posts_sent_to_you' in posts and 'activity_log_data' in posts['wall_posts_sent_to_you']:
            posts = posts['wall_posts_sent_to_you']['activity_log_data']

    if 'timestamp' in posts:
        posts = [posts]

    for post in posts: # pylint: disable=too-many-nested-blocks
        post = copy.deepcopy(post)

        if isinstance(post, dict):
            created = arrow.get(post['timestamp']).datetime

            if include_data(request_identifier, created, post):
                if 'title' in post:
                    post['pdk_encrypted_title'] = encrypt_content(post['title'].encode('utf-8'))

                    annotate_field(post, 'title', post['title'])

                    del post['title']

                if 'data' in post:
                    for datum in post['data']:
                        if 'post' in datum:
                            datum['pdk_encrypted_post'] = encrypt_content(datum['post'].encode('utf-8'))

                            annotate_field(datum, 'post', datum['post'])

                            del datum['post']

                if 'attachments' in post:
                    for attachment in post['attachments']:
                        if 'data' in attachment:
                            for datum in attachment['data']:
                                if 'event' in datum:
                                    event = datum['event']

                                    if 'name' in event:
                                        event['pdk_encrypted_name'] = encrypt_content(event['name'].encode('utf-8'))

                                        annotate_field(event, 'name', event['name'])

                                        del event['name']

                                    if 'description' in event:
                                        event['pdk_encrypted_description'] = encrypt_content(event['description'].encode('utf-8'))

                                        annotate_field(event, 'description', event['description'])

                                        del event['description']

                                    if 'place' in event:
                                        place_str = json.dumps(event['place'], indent=2)
                                        event['pdk_encrypted_place'] = encrypt_content(place_str.encode('utf-8'))

                                        annotate_field(event, 'place', place_str)

                                        del event['place']

                                if 'external_context' in datum:
                                    external_context = datum['external_context']

                                    if 'url' in external_context:
                                        external_context['pdk_encrypted_url'] = encrypt_content(external_context['url'].encode('utf-8'))

                                        annotate_field(external_context, 'url', external_context['url'])

                                        del external_context['url']

                                if 'media' in datum:
                                    media = datum['media']

                                    if 'title' in media:
                                        media['pdk_encrypted_title'] = encrypt_content(media['title'].encode('utf-8'))

                                        annotate_field(media, 'title', media['title'])

                                        del media['title']

                                    if 'description' in media:
                                        media['pdk_encrypted_description'] = encrypt_content(media['description'].encode('utf-8'))

                                        annotate_field(media, 'description', media['description'])

                                        del media['description']

                                    if 'uri' in media:
                                        media['pdk_encrypted_uri'] = encrypt_content(media['uri'].encode('utf-8'))

                                        annotate_field(media, 'uri', media['uri'])

                                        del media['uri']

                                    if 'media_metadata' in media:
                                        metadata_str = json.dumps(media['media_metadata'], indent=2)
                                        media['pdk_encrypted_media_metadata'] = encrypt_content(metadata_str.encode('utf-8'))

                                        del media['media_metadata']

                                if 'place' in datum:
                                    place_str = json.dumps(datum['place'], indent=2)
                                    datum['pdk_encrypted_place'] = encrypt_content(place_str.encode('utf-8'))

                                    del datum['place']

                post['pdk_facebook_source'] = source

                queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-post', request_identifier, post, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='post', start=created)
Esempio n. 22
0
def process_direct_messages(request_identifier, messages_raw):  # pylint: disable=too-many-branches
    messages_raw = messages_raw.replace('window.YTD.direct_message.part0 = ',
                                        '')
    messages_raw = messages_raw.replace('window.YTD.direct_messages.part0 = ',
                                        '')

    conversations = json.loads(messages_raw)

    my_ids = []

    for conversation in conversations:
        if len(my_ids) != 1:
            tokens = conversation['dmConversation']['conversationId'].split(
                '-')

            if len(my_ids) == 0:  # pylint: disable=len-as-condition
                my_ids = tokens
            else:
                my_ids = list(set().union(my_ids, tokens))

    if len(my_ids) > 0:  # pylint: disable=len-as-condition, too-many-nested-blocks
        my_id = my_ids[0]

        for conversation in conversations:
            for message in conversation['dmConversation']['messages']:
                if 'messageCreate' in message:
                    msg_data = message['messageCreate']

                    created = arrow.get(msg_data['createdAt']).datetime

                    if include_data(request_identifier, created, msg_data):
                        pdk_message = {
                            'pdk_hashed_recipientId':
                            hash_content(msg_data['recipientId']),
                            'pdk_encrypted_recipientId':
                            encrypt_content(
                                msg_data['recipientId'].encode('utf-8')),
                            'pdk_hashed_senderId':
                            hash_content(msg_data['senderId']),
                            'pdk_encrypted_senderId':
                            encrypt_content(
                                msg_data['senderId'].encode('utf-8')),
                            'pdk_encrypted_text':
                            encrypt_content(msg_data['text'].encode('utf-8')),
                            'id':
                            msg_data['id'],
                            'conversationId':
                            conversation['dmConversation']['conversationId'],
                            'createdAt':
                            msg_data['createdAt']
                        }

                        annotate_field(pdk_message, 'text', msg_data['text'])

                        if msg_data['mediaUrls']:
                            media_urls_str = json.dumps(msg_data['mediaUrls'],
                                                        indent=2)
                            pdk_message[
                                'pdk_encrypted_mediaUrls'] = encrypt_content(
                                    media_urls_str.encode('utf-8'))

                        DataPoint.objects.create_data_point(
                            'pdk-external-twitter-direct-message',
                            request_identifier,
                            pdk_message,
                            user_agent='Passive Data Kit External Importer',
                            created=created)

                        if my_id == msg_data['senderId']:
                            create_engagement_event(
                                source='twitter',
                                identifier=request_identifier,
                                outgoing_engagement=1.0,
                                engagement_type='message',
                                start=created)
                        else:
                            create_engagement_event(
                                source='twitter',
                                identifier=request_identifier,
                                incoming_engagement=1.0,
                                engagement_type='message',
                                start=created)
                elif 'reactionCreate' in message:
                    msg_data = message['reactionCreate']

                    created = arrow.get(msg_data['createdAt']).datetime

                    if include_data(request_identifier, created, msg_data):
                        pdk_message = {
                            'pdk_hashed_senderId':
                            hash_content(msg_data['senderId']),
                            'pdk_encrypted_senderId':
                            encrypt_content(
                                msg_data['senderId'].encode('utf-8')),
                            'eventId':
                            msg_data['eventId'],
                            'reactionKey':
                            msg_data['reactionKey'],
                            'createdAt':
                            msg_data['createdAt']
                        }

                        DataPoint.objects.create_data_point(
                            'pdk-external-twitter-direct-message-reaction',
                            request_identifier,
                            pdk_message,
                            user_agent='Passive Data Kit External Importer',
                            created=created)

                        if my_id == msg_data['senderId']:
                            create_engagement_event(
                                source='twitter',
                                identifier=request_identifier,
                                outgoing_engagement=0.5,
                                engagement_type='reaction',
                                start=created)
                        else:
                            create_engagement_event(
                                source='twitter',
                                identifier=request_identifier,
                                incoming_engagement=0.5,
                                engagement_type='reaction',
                                start=created)
                else:
                    print('TWITTER/MSG: ' + json.dumps(message, indent=2))
Esempio n. 23
0
def process_viewed(request_identifier, viewed_raw): # pylint: disable=too-many-branches, too-many-statements
    metadata = json.loads(viewed_raw)

    for thing in metadata['viewed_things']: # pylint: disable=too-many-nested-blocks
        if thing['name'] == 'Facebook Watch Videos and Shows':
            for child in thing['children']:
                if child['name'] == 'Shows':
                    for entry in child['entries']:
                        created = arrow.get(entry['timestamp']).datetime

                        if include_data(request_identifier, created, entry):
                            entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8'))
                            entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8'))

                            del entry['data']['uri']

                            entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8'))
                            entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8'))

                            annotate_field(entry, 'name', entry['data']['name'])

                            del entry['data']['name']

                            queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-watch', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                            create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='video', start=created)

                elif child['name'] == 'Time Viewed':
                    for entry in child['entries']:
                        created = arrow.get(entry['timestamp']).datetime

                        if include_data(request_identifier, created, entry):
                            entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8'))
                            entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8'))

                            del entry['data']['uri']

                            entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8'))
                            entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8'))

                            annotate_field(entry, 'name', entry['data']['name'])

                            del entry['data']['name']

                            queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-watch', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                            create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='video', start=created, duration=entry['data']['watch_position_seconds'])

        elif thing['name'] == 'Facebook Live Videos':
            for entry in thing['entries']:
                created = arrow.get(entry['timestamp']).datetime

                if include_data(request_identifier, created, entry):
                    entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8'))
                    entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8'))

                    del entry['data']['uri']

                    entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8'))
                    entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8'))

                    annotate_field(entry, 'name', entry['data']['name'])

                    del entry['data']['name']

                    queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-watch', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                    create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='video', start=created)

        elif thing['name'] == 'Articles':
            for entry in thing['entries']:
                created = arrow.get(entry['timestamp']).datetime

                if include_data(request_identifier, created, entry):
                    entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8'))
                    entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8'))

                    del entry['data']['uri']

                    entry['data']['pdk_encrypted_share'] = encrypt_content(entry['data']['share'].encode('utf-8'))
                    entry['data']['pdk_hashed_share'] = hash_content(entry['data']['share'].encode('utf-8'))

                    del entry['data']['share']

                    entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8'))
                    entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8'))

                    annotate_field(entry, 'name', entry['data']['name'])

                    del entry['data']['name']

                    queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-link', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                    create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='link', start=created)

        elif thing['name'] == 'Marketplace Interactions':
            for child in thing['children']:
                if child['name'] == 'Marketplace Items':
                    for entry in child['entries']:
                        created = arrow.get(entry['timestamp']).datetime

                        if include_data(request_identifier, created, entry):
                            entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8'))
                            entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8'))

                            del entry['data']['uri']

                            entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8'))
                            entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8'))

                            annotate_field(entry, 'name', entry['data']['name'])

                            del entry['data']['name']

                            queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-market', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                            create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='shopping', start=created)

        elif thing['name'] == 'Ads':
            for entry in thing['entries']:
                created = arrow.get(entry['timestamp']).datetime

                if include_data(request_identifier, created, entry):
                    if 'uri' in entry['data']:
                        entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8'))
                        entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8'))

                        del entry['data']['uri']

                    entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8'))
                    entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8'))

                    annotate_field(entry, 'name', entry['data']['name'])

                    del entry['data']['name']

                    queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-ad-viewed', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))

                    create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='advertising', start=created)