Exemple #1
0
def influencer_fetch_post_analytics_instances(pa_ids, inf_ids=None):
    from debra.models import PostAnalytics
    from debra.helpers import send_admin_email_via_mailsnake

    if inf_ids is None:
        inf_ids = [None] * len(pa_ids)

    pa_values = PostAnalytics.objects.filter(id__in=pa_ids).values_list(
        'id', 'post_url')

    for inf_id, (pa_id, post_url) in zip(inf_ids, pa_values):
        connect_url_to_post(post_url, pa_id, inf_id)

    pas = PostAnalytics.objects.filter(id__in=pa_ids).prefetch_related('post')

    for pa in pas:
        if not pa.post:
            continue
        if pa.post.title != pa.post_title or (
                pa.post.create_date
                and pa.post.create_date.date() != pa.post_date):
            send_admin_email_via_mailsnake(
                'PostAnalytics={} and Post={} data mismatch'.format(
                    pa.id, pa.post.id),
                'Titles: "{}", "{}" <br />Dates: "{}, "{}"'.format(
                    pa.post_title, pa.post.title, pa.post_date,
                    pa.post.create_date.date()))
Exemple #2
0
def genereate_new_report_notification(collection_id):
    from debra.models import PostAnalyticsCollection
    from debra import account_helpers
    from debra import helpers

    collection = PostAnalyticsCollection.objects.get(id=collection_id)

    now = datetime.datetime.strftime(datetime.datetime.now(), '%c')

    body = '''Started to generate a new report at {}:
                collection_id = {},
                collection_name = {},
                user = {}
            '''.format(now, collection.id, collection.name, collection.user
                       or 'No user')
    subject = 'New report for collection_id={}, {}'.format(collection.id, now)

    helpers.send_admin_email_via_mailsnake(subject, body)

    account_helpers.intercom_track_event(
        None,
        "generate-report-requested", {
            'collection_name': collection.name,
            'collection_id': collection.id,
        },
        user=collection.user)
Exemple #3
0
def find_and_connect_user_to_influencer(user_prof, to_save=True, **kwargs):
    """
    This method connects a userprofile with an influencer object and updates this data in intercom.
    Sending an email to admins in case of errors.
    *param: user_profile
    *return: None
    """
    from debra.models import Influencer
    from debra.helpers import create_influencer_and_blog_platform, send_admin_email_via_mailsnake
    from platformdatafetcher import platformutils, postprocessing

    blog_url = user_prof.blog_page
    influencer = create_influencer_and_blog_platform(blog_url,
                                                     'blogger_signup', to_save,
                                                     False)
    log.info("Found %r possible influencer for profile [%s %s]" %
             (influencer, user_prof.user, user_prof.blog_page))

    if not influencer:
        log.info("No influencer found for User_prof_id: %s" % (user_prof.id, ))
        send_admin_email_via_mailsnake("No influencer found for user",
                                       "User_prof_id: %s" % (user_prof.id, ))
        user_prof.error_when_connecting_to_influencer = "NO INFLUENCERS"
    else:
        log.info("Found %s influencer for signed up user %s" %
                 (influencer, user_prof))
        influencer.name = user_prof.name
        influencer.email_for_advertising_or_collaborations = user_prof.user.email
        influencer.email = user_prof.user.email
        user_prof.influencer = influencer
        influencer.shelf_user = user_prof.user
        influencer.append_source('blogger_signup')
        log.info("Done connecting User: [%s, %s] with Influencer: [%s, %s]" %
                 (user_prof.blog_page, user_prof.user.email,
                  influencer.email_for_advertising_or_collaborations,
                  influencer.blog_url))

    if to_save:
        user_prof.save()
        if influencer:
            influencer.save()
            user_prof.update_intercom()
            # if influencer is showing on search, their profile must be ok, so invite them
            if influencer.show_on_search and not influencer.ready_to_invite:
                influencer.ready_to_invite = True
                influencer.save()
                user_prof.update_intercom()
            # if they have been already qa-ed, invite them
            elif influencer.validated_on and 'info' in influencer.validated_on and not influencer.ready_to_invite:
                influencer.ready_to_invite = True
                influencer.save()
                user_prof.update_intercom()
            # now, if this influencer is not validated or not showing on search
            else:
                # issue the complete processing
                postprocessing.process_new_influencer_sequentially(
                    influencer.id, assume_blog=True)

        check_user_prof_influencer_connectivity(user_prof.id)
Exemple #4
0
def send_missing_emails_report():
    from debra import helpers
    from debra.models import Influencer

    mp_data = Influencer.objects.missing_emails_data()
    count = len(mp_data.keys())

    helpers.send_admin_email_via_mailsnake(
        "Missing emails daily report: {} found".format(count),
        "During our daily check we have found {} influencers with missing emails"
        .format(count))
Exemple #5
0
 def visit_page(page_url):
     log.info('* Opening {} with Selenium...'.format(page_url))
     with xbrowser.XBrowser(
             headless_display=settings.AUTOCREATE_HEADLESS_DISPLAY) as xb:
         xb.driver.set_page_load_timeout(60)
         xb.driver.set_script_timeout(60)
         xb.driver.implicitly_wait(10)
         try:
             xb.load_url(page_url)
         except:
             send_admin_email_via_mailsnake(
                 "'influencer_tracking_verification' Selenium exception for PostAnalytics={} (url={})"
                 .format(pa.id, page_url),
                 '<br />'.join(traceback.format_exc().splitlines()))
Exemple #6
0
def check_user_prof_influencer_connectivity(user_profile_id):
    from debra.helpers import send_admin_email_via_mailsnake
    from debra.models import UserProfile
    prof = UserProfile.objects.get(id=user_profile_id)
    if not prof.influencer:
        send_admin_email_via_mailsnake(
            "No influencer found for %s" % prof, "Blog %s User %s Email %s " %
            (prof.blog_page, prof.user, prof.user.email))
    else:
        inf = prof.influencer
        if not inf.shelf_user:
            send_admin_email_via_mailsnake(
                "No shelf_user found for %s" % inf.id,
                "Influencer %s; User_profile_id %s" % (inf, user_profile_id))
Exemple #7
0
 def get_datapoint_entity(result, datapoint_id):
     try:
         entity = result.find_entity(datapoint_id)
     except ClickMeterException as e:
         send_admin_email_via_mailsnake(
             'ClickMeterException for Contract={}'.format(contract_id),
             e.error_html)
     else:
         if not entity:
             # send_admin_email_via_mailsnake(
             #     'Cannot find ClickMeter EntityId={} for Contract={}'.format(
             #         datapoint_id, campaign_id),
             #     'Cannot find ClickMeter EntityId={} for Contract={}'.format(
             #         datapoint_id, campaign_id),
             # )
             return
         return entity
Exemple #8
0
def send_post_analytics_report_to_admins(collection_id):
    from debra.models import PostAnalyticsCollection
    from debra import helpers

    collection = PostAnalyticsCollection.objects.get(id=collection_id)

    if not collection.updated:
        return

    qs = collection.get_unique_post_analytics().prefetch_related('post')
    qs = list(qs)

    stats = [
        len(filter(lambda x: x.post.title is None, qs)),
        len(filter(lambda x: x.post.create_date is None, qs)),
        len(filter(lambda x: x.post_num_comments in [-1, None], qs)),
        collection.creator_brand.name if collection.creator_brand else None,
        collection.user.id if collection.user else None,
        '{} ({})'.format(collection.user.userprofile.name\
            if collection.user and\
                collection.user.userprofile and\
                collection.user.userprofile.name is not None
            else 'No name',
            collection.user.email if collection.user else 'No email'
        )
    ]

    subject = 'Report for PostAnalyticsCollection.id={} is ready'.format(
        collection.id)
    body = '<br />'.join([
        'Number of missing titles: {}',
        'Number of missing dates: {}',
        'Number of missing comment counts: {}',
        'Brand name: {}',
        'User ID: {}',
        'Username: {}',
    ]).format(*stats)

    helpers.send_admin_email_via_mailsnake(subject, body)
Exemple #9
0
def update_campaign_tracking_stats(campaign_id):
    from debra.models import (BrandJobPost, PostAnalyticsCollectionTimeSeries)
    from debra.helpers import send_admin_email_via_mailsnake

    campaign = get_object_or_404(BrandJobPost, id=campaign_id)

    if campaign.tracking_group is None:
        return

    result = ClickMeterListResult(clickmeter_api, '/aggregated/summary/groups',
                                  {
                                      'timeframe': 'beginning',
                                      'status': 'active',
                                  })

    try:
        entity = result.find_entity(campaign.tracking_group)
    except ClickMeterException as e:
        send_admin_email_via_mailsnake(
            'ClickMeterException for Campaign={}'.format(campaign_id),
            e.error_html)
    else:
        if not entity:
            # TODO: commented it out because of spamming out emails
            # send_admin_email_via_mailsnake(
            #     'Cannot find ClickMeter EntityId={} for Campaign={}'.format(
            #         campaign.tracking_group, campaign_id),
            #     'Cannot find ClickMeter EntityId={} for Campaign={}'.format(
            #         campaign.tracking_group, campaign_id),
            # )
            pass
        else:
            time_series = PostAnalyticsCollectionTimeSeries.objects.create(
                collection=campaign.post_collection,
                count_clicks=entity.get('totalClicks', 0),
                count_unique_clicks=entity.get('uniqueClicks', 0),
                count_views=entity.get('totalViews', 0),
                count_unique_views=entity.get('uniqueViews', 0),
                snapshot_date=datetime.datetime.now())
Exemple #10
0
    def check_visit(datapoint, url):
        log.info('* Attempt id={}, #{}'.format(pa.id, n + 1))
        log.info('* Sleeping for {} secs... id={}, #{}'.format(
            delay, pa.id, n + 1))

        time.sleep(delay)
        try:
            log.info('* Getting /clickstream... id={}, #{}'.format(
                pa.id, n + 1))

            resp = requests.get(constants.CLICKMETER_BASE_URL + '/clickstream',
                                headers=headers,
                                params={'datapoint': datapoint})
            try:
                urls = [
                    unquote(x.get('realDestinationUrl', '')).strip().strip('/')
                    for x in resp.json()['rows']
                ][:constants.CLICKMETER_EVENTS_VERIFICATION_NUMBER]
            except KeyError:
                urls = []

            log.info('* Urls found={} for id={}, #{}'.format(
                len(urls), pa.id, n + 1))

            if url.strip().strip('/') in urls:
                log.info('* Post URL is found... id={}, #{}'.format(
                    pa.id, n + 1))

                return True
        except:
            log.info(
                '* Exception, sending email to admins... id={}, #{}'.format(
                    pa.id, n + 1))

            send_admin_email_via_mailsnake(
                "'influencer_tracking_verification' exception for PostAnalytics={}"
                .format(pa.id),
                '<br />'.join(traceback.format_exc().splitlines()))
Exemple #11
0
    def create_post(url):
        blog_url = utils.post_to_blog_url(url)
        inf = helpers.create_influencer_and_blog_platform(
            blog_url, 'import_from_post_analytics', True, True)

        if inf:
            platform = inf.blog_platform
            print("Inf.validated_on: %r" % inf.validated_on)
            if not inf.validated_on or not 'info' in inf.validated_on:
                # it's not QA-ed yet, so let's process this sequentially
                postprocessing.process_new_influencer_sequentially(
                    inf.id, True)
            # at this point, we should have data for the influencer
            # now, let's check if got the post

            # post = Posts.objects.filter(platform=platform, url__iexact=url)
            # print("Got post: %r" % post)
            # if post.exists():
            #     return post[0]

            post = find_post_by_url(url, True, platform=platform)

            if post is None:
                # here we just create a quick post artifically (ideally we should have fetched this post)
                post = Posts.objects.create(platform=platform,
                                            influencer=inf,
                                            show_on_search=inf.show_on_search,
                                            url=url)
            return post

        print("No valid influencer found")
        helpers.send_admin_email_via_mailsnake(
            "Post Analytics: No valid influencer found %r" % url,
            "During our post analytics, we didn't find an influencer for this Post.url=%r"
            % (url))
        return None
Exemple #12
0
def mandrill_webhook(request):
    from debra import models
    from debra import helpers

    if request.method != "POST":
        return HttpResponse()

    try:
        webhook_data = json.loads(request.POST.get('mandrill_events', ''))
    except:
        log.exception("Mandrill JSON parse error.")
        return HttpResponse()

    try:
        batch = models.MandrillBatch.objects.create(data=webhook_data)
    except Exception:
        helpers.send_admin_email_via_mailsnake("Failed to save mandrill batch",
                                               webhook_data)
        return HttpResponse(status=500)
    else:
        handle_webhook.apply_async([batch.id, True, False, False],
                                   queue="celery_mandrill_2")

        return HttpResponse()
Exemple #13
0
def handle_inbound(inbound_data, admins_only=False, inbound_backup=None):
    from debra import models
    from debra import helpers

    log.info("Got %d inbound data units" % len(inbound_data))

    if len(inbound_data) == 0:
        log.info("Returning")
        return

    if inbound_backup is None:
        log.info("No backup provided")
        inbound_backup = [None] * len(inbound_data)

    for n, (data, backup) in enumerate(zip(inbound_data, inbound_backup)):

        if backup is None:
            log.info(
                "No backup found for #{} message, creating one...".format(n))
            backup = models.MandrillEvent.objects.create(
                data=data, type=models.MandrillEvent.TYPE_INBOUND)
            log.info("Backup for #{} message created, id={}".format(
                n, backup.id))

        log.info("Preparing message #{}, backup_id={}".format(n, backup.id))

        ts = data.get('ts')
        msg = data.get('msg')
        raw_msg = msg.get('raw_msg')
        email_to = msg.get('email')
        attachments = msg.get('attachments', [])
        images = msg.get('images', [])

        try:
            r_type = email_to.split('_')[0]
        except Exception:
            # this should never happen if all emails are ok
            log.exception("Wrong email_to, can't split it by underscore",
                          "Wrong email: %s" % (email_to, ))
            return HttpResponse()

        # mail from brand to influencer
        if r_type == "i":
            direction = models.MailProxyMessage.DIRECTION_BRAND_2_INFLUENCER
            thread = models.MailProxy.objects.get(influencer_mail=email_to)

        # mail from influencer to brand
        if r_type == "b":
            direction = models.MailProxyMessage.DIRECTION_INFLUENCER_2_BRAND
            thread = models.MailProxy.objects.get(brand_mail=email_to)

        log.info("#{} message: Brand={}, Influencer={}".format(
            n, thread.brand_id, thread.influencer_id))

        mandrill_message = {
            'text': msg.get('text'),
            'html': msg.get('html'),
            'subject': msg.get('subject'),
            'attachments': attachments,
            'images': images,
        }
        if direction == models.MailProxyMessage.DIRECTION_BRAND_2_INFLUENCER:
            complete_message_brand_2_influencer(mandrill_message,
                                                thread,
                                                ts=ts)
        if direction == models.MailProxyMessage.DIRECTION_INFLUENCER_2_BRAND:
            complete_message_influencer_2_brand(mandrill_message,
                                                thread,
                                                ts=ts)

        db_params = dict(direction=direction,
                         thread=thread,
                         msg=raw_msg,
                         type=models.MailProxyMessage.TYPE_EMAIL)

        try:
            log.info(
                '\nSearching for #{} message (thread={}, direction={}) in '
                'database...'.format(n, thread.id, direction))
            msg = models.MailProxyMessage.objects.filter(**db_params)[0]
            log.info("#{} message is found".format(n))
        except IndexError:
            log.info("#{} message is not found".format(n))
            if admins_only:
                continue
            log.info("Sending #{} message to clients".format(n))
            resp = mailsnake_send(mandrill_message)

            log.info("Set status of backup.id=%r to STATUS_SENT" % backup.id)
            backup.status = models.MandrillEvent.STATUS_SENT
            backup.save()

            mandrill_id = get_mandrill_id(resp)
            log.info("Got ID for #{} message: {}".format(n, mandrill_id))

            log.info("Saving #{} message...".format(n))
            msg = models.MailProxyMessage.objects.create(
                ts=datetime.datetime.utcfromtimestamp(int(ts)),
                mandrill_id=mandrill_id,
                **db_params)

            thread.candidate_mapping.filter(
                campaign_stage=models.InfluencerJobMapping.
                CAMPAIGN_STAGE_WAITING_ON_RESPONSE).update(
                    campaign_stage=models.InfluencerJobMapping.
                    CAMPAIGN_STAGE_NEGOTIATION,
                    campaign_stage_prev=models.InfluencerJobMapping.
                    CAMPAIGN_STAGE_WAITING_ON_RESPONSE)

            log.info("#{} message is saved: id={}".format(n, msg.id))
            log.info("Set status of backup.id=%r to STATUS_SAVED" % backup.id)
            backup.status = models.MandrillEvent.STATUS_SAVED
            backup.save()

            if is_invalid(resp):
                log.info(
                    "[ADMIN NOTIFICATION] Message #{} is invalid".format(n))
                body = 'INVALID mandrill message. Msg_id: {}'.format(msg.id)
                log.error(body)
                helpers.send_admin_email_via_mailsnake(body, body)

            if is_status(resp, 'rejected'):
                log.info("[ADMIN NOTIFICATION] Message #{} rejected".format(n))
                helpers.send_admin_email_via_mailsnake(
                    '''REJECTED mandrill message.
                        models.MailProxyMessage(id={})
                    '''.format(msg.id), 'Response: {}'.format(resp))

            if attachments:
                log.info("Saving #{} message's attachments to DB...".format(n))
                msg.attachments = upload_message_attachments_to_s3(
                    msg, attachments.values(), True)
                msg.save()

            log.info("Set status of backup.id=%r to STATUS_PROCESSED" %
                     backup.id)
            backup.status = models.MandrillEvent.STATUS_PROCESSED
            backup.save()
        finally:
            log.info("Sending #{} message copy to admins...".format(n))
            mailsnake_send(mandrill_message, admins_only=True)

    print("Sleeping for 10s")
    time.sleep(10)
Exemple #14
0
def influencer_tracking_verification(pa_id, attempts=3, delay=30):
    from urllib2 import unquote
    from debra.models import Contract, PostAnalytics
    from debra.helpers import send_admin_email_via_mailsnake
    from xpathscraper import xbrowser

    # contract = get_object_or_404(Contract, id=contract_id)
    pa = get_object_or_404(PostAnalytics, id=pa_id)
    contract = pa.contract

    pa.tracking_status = pa.TRACKING_STATUS_VERIFYING
    pa.save()

    def visit_page(page_url):
        log.info('* Opening {} with Selenium...'.format(page_url))
        with xbrowser.XBrowser(
                headless_display=settings.AUTOCREATE_HEADLESS_DISPLAY) as xb:
            xb.driver.set_page_load_timeout(60)
            xb.driver.set_script_timeout(60)
            xb.driver.implicitly_wait(10)
            try:
                xb.load_url(page_url)
            except:
                send_admin_email_via_mailsnake(
                    "'influencer_tracking_verification' Selenium exception for PostAnalytics={} (url={})"
                    .format(pa.id, page_url),
                    '<br />'.join(traceback.format_exc().splitlines()))

    def check_visit(datapoint, url):
        log.info('* Attempt id={}, #{}'.format(pa.id, n + 1))
        log.info('* Sleeping for {} secs... id={}, #{}'.format(
            delay, pa.id, n + 1))

        time.sleep(delay)
        try:
            log.info('* Getting /clickstream... id={}, #{}'.format(
                pa.id, n + 1))

            resp = requests.get(constants.CLICKMETER_BASE_URL + '/clickstream',
                                headers=headers,
                                params={'datapoint': datapoint})
            try:
                urls = [
                    unquote(x.get('realDestinationUrl', '')).strip().strip('/')
                    for x in resp.json()['rows']
                ][:constants.CLICKMETER_EVENTS_VERIFICATION_NUMBER]
            except KeyError:
                urls = []

            log.info('* Urls found={} for id={}, #{}'.format(
                len(urls), pa.id, n + 1))

            if url.strip().strip('/') in urls:
                log.info('* Post URL is found... id={}, #{}'.format(
                    pa.id, n + 1))

                return True
        except:
            log.info(
                '* Exception, sending email to admins... id={}, #{}'.format(
                    pa.id, n + 1))

            send_admin_email_via_mailsnake(
                "'influencer_tracking_verification' exception for PostAnalytics={}"
                .format(pa.id),
                '<br />'.join(traceback.format_exc().splitlines()))

    if pa.post_type not in ['Blog']:
        response = requests.get(pa.post_url)
        if response.status_code == 200:
            pa.tracking_status = pa.TRACKING_STATUS_VERIFIED
        else:
            pa.tracking_status = pa.TRACKING_STATUS_VERIFICATION_PROBLEM
        pa.save()
        return

    log.info('* Exctracting tracking data...')

    check_data = [
        (pa.post_url, contract.tracking_pixel, True),
        (contract.product_url, contract.tracking_link,
         contract.campaign.product_sending_status
         not in ['no_product_sending', 'no_product_page']),
        (contract.campaign.client_url, contract.tracking_brand_link, True),
    ]

    headers = {
        'Accept': 'application/json',
        'Content-Type': 'application/json',
        'X-Clickmeter-Authkey': constants.CLICKMETER_API_KEY,
    }

    for url, datapoint, to_check in check_data:
        if not to_check:
            continue
        success = False
        visit_page(url)
        for n in xrange(attempts):
            success = success or check_visit(datapoint, url)
        if not success:
            log.info('* Nothing is found. id={}, #{}, url={}'.format(
                pa.id, n + 1, url))

            pa.tracking_status = pa.TRACKING_STATUS_VERIFICATION_PROBLEM
            pa.save()

            log.info(
                "* PostAnalytics updated with 'Verification Problem' status. id={}, #{}"
                .format(pa.id, n + 1))
            log.info(
                '* Sending email to admins about failure. id={}, #{}'.format(
                    pa.id, n + 1))

            send_admin_email_via_mailsnake(
                'Verification problem on PostAnalytics={}'.format(pa.id), '''
                # of attempts = {}, delay = {} secs<br />
                searched for url={}
                '''.format(attempts, delay, url))
            return

    pa.tracking_status = pa.TRACKING_STATUS_VERIFIED
    pa.save()

    log.info(
        "* PostAnalytics updated with 'Verified' status. id={}, #{}".format(
            pa.id, n + 1))
Exemple #15
0
def handle_new_influencers(urls, tag_id=None, brand_id=None, user_id=None):
    from debra.helpers import create_influencer_and_blog_platform_bunch
    from debra.models import (InfluencersGroup, Brands, User, Influencer)
    from debra.helpers import send_admin_email_via_mailsnake
    from platformdatafetcher import postprocessing
    from xpathscraper import utils

    urls = map(utils.post_to_blog_url, urls)

    brand = Brands.objects.get(id=brand_id) if brand_id else None
    user = User.objects.get(id=user_id) if user_id else None
    tag = InfluencersGroup.objects.get(id=tag_id) if tag_id else None

    send_admin_email_via_mailsnake(
        'Influencer import started for {}'.format(brand.name), '''
        <p>Influencer import started:</p></br>
        <ul>
        <li>adding {} urls</li>
        <li>collection: {}</li>
        <li>brand: {}</li>
        <li>user: {}</li>
        </ul>
        '''.format(len(urls), tag, brand, user))

    infs = create_influencer_and_blog_platform_bunch(
        urls, 'customer_uploaded', None, tags=['customer_uploaded'])

    send_admin_email_via_mailsnake(
        'Influencer Import: Influencers created now for {} and tag {}'.format(
            brand.name, tag.name if tag else "No Tag Given"), '''
        <p>Created {}<p>'''.format(len(infs)))

    infs_added_to_tag = []

    for inf in infs:
        print("Checking influencer %r blog_url %r show_on_search %r" %
              (inf.id, inf.blog_url, inf.show_on_search))
        if not inf.show_on_search:
            # it's not on search yet, so let's process this sequentially and then it'll show up on an admin table
            postprocessing.process_new_influencer_sequentially(inf.id, True)
        if tag:
            if tag.add_influencer(inf):
                infs_added_to_tag.append(inf)
                print("influencer %r added to the tag group" % inf)

    send_admin_email_via_mailsnake(
        'Influencer import finished for {}'.format(brand.name),
        '''
        <p>Please check new influencers in the <a href={}>admin table</a>. </p>
        <p>Stats:</p></br>
        <ul>
        <li>{} influencers were found/created</li>
        <li>{} urls were passed</li>
        <li>{} new influencers were added to the tag</li>
        <li>{} with show_on_search=True among newly added to the tag influencers</li>
        <li>{} with old_show_on_search=True among newly added to the tag influencers</li>
        <li>{} with show_on_search=True among all passed influencers</li>
        <li>{} with old_show_on_search=True among all passed influencers</li>
        </ul>
        '''.format(
            "https://app.theshelf.com/admin/upgrade/influencer/uploaded_by_customers/",
            len(infs),
            len(urls),
            len(infs_added_to_tag),
            len(filter(lambda inf: inf.show_on_search, infs_added_to_tag)),
            len(filter(lambda inf: inf.old_show_on_search, infs_added_to_tag)),
            len(filter(lambda inf: inf.show_on_search, infs)),
            len(filter(lambda inf: inf.old_show_on_search, infs)),
        ),
        extra_emails=['*****@*****.**'])
Exemple #16
0
def connect_url_to_post(self, url, post_analytics_id, influencer_id=None):
    """
    High level goal: Given a url, we ultimately need to add that post and Influencer in our database.
        So, first we check if we have a post for that url.
            => Yes, then we can find the influencer as well associated with that post.
            => No
                => First, we need to see if there already exists an Influencer for this blog domain
                    => If yes, then use that influencer
                    => Else, create an influencer with source='import_from_post_analytics'
                => Now, that we have an influencer for this url but we dont have that post fetched. So,
                   we need to run our crawler to get that url.
                   QUESTION: do we have a method to fetch a given url?
    """
    from debra.models import Posts, PostAnalytics, Platform, Influencer
    from debra import helpers
    from platformdatafetcher import (pbfetcher, fetcher, postprocessing, feeds,
                                     socialfetcher)
    from xpathscraper import utils
    from platformdatafetcher.fetch_blog_posts_manually import get_all_comments_number
    from platformdatafetcher.fetch_blog_posts_date import fetch_blog_posts_date
    from platformdatafetcher import pbfetcher
    from masuka.image_manipulator import upload_post_image

    def create_post(url):
        blog_url = utils.post_to_blog_url(url)
        inf = helpers.create_influencer_and_blog_platform(
            blog_url, 'import_from_post_analytics', True, True)

        if inf:
            platform = inf.blog_platform
            print("Inf.validated_on: %r" % inf.validated_on)
            if not inf.validated_on or not 'info' in inf.validated_on:
                # it's not QA-ed yet, so let's process this sequentially
                postprocessing.process_new_influencer_sequentially(
                    inf.id, True)
            # at this point, we should have data for the influencer
            # now, let's check if got the post

            # post = Posts.objects.filter(platform=platform, url__iexact=url)
            # print("Got post: %r" % post)
            # if post.exists():
            #     return post[0]

            post = find_post_by_url(url, True, platform=platform)

            if post is None:
                # here we just create a quick post artifically (ideally we should have fetched this post)
                post = Posts.objects.create(platform=platform,
                                            influencer=inf,
                                            show_on_search=inf.show_on_search,
                                            url=url)
            return post

        print("No valid influencer found")
        helpers.send_admin_email_via_mailsnake(
            "Post Analytics: No valid influencer found %r" % url,
            "During our post analytics, we didn't find an influencer for this Post.url=%r"
            % (url))
        return None

    def pick_the_best_post(posts):
        """
        we may get multiple posts for the same url
        1. If there is only one, return it
        2. Else, if the influencer is show_on_search >> validated_on >> others
        """
        print("pick_the_best_post: for %r" % posts)
        blog_posts = posts.filter(
            platform__platform_name__in=Platform.BLOG_PLATFORMS)
        if len(blog_posts) > 0:
            posts = blog_posts
        if len(posts) == 1:
            print("\tCase 1: Only one post found, so returning it")
            return posts[0]

        # first pick the best influencer
        influencers = list(posts.values_list('influencer', flat=True))
        if len(set(influencers)) == 1:
            influencer = influencers[0]
        else:
            influencer_id = influencers[0]
            influencer_objects = Influencer.objects.filter(id__in=influencers)
            influencer_object = Influencer.objects.get(id=influencer_id)
            influencer_object = influencer_object._select_influencer_to_stay(
                influencer_objects)
            influencer = influencer_object.id
        print("selected influencer= %r" % influencer)
        posts = posts.filter(influencer=influencer)
        # Case 2: return post belonging to the best influencer
        if len(posts) == 1:
            print("\tCase 2: returning post belonging to the best influencer")
            return posts[0]
        # Case 3: return post belonging to the non url_not_found platform from the best influencer
        posts_without_url_not_found = posts.exclude(
            platform__url_not_found=True).extra(
                select={
                    'has_content': 'content IS NOT NULL',
                }).order_by('has_content')
        if len(posts_without_url_not_found) >= 1:
            print(
                "\tCase 3: returning post belonging to the platform that is not url_not_found"
            )
            return posts_without_url_not_found[0]
        # Case 4: just use any post
        print(
            "\tInfluencer for this post are neither show_on_search nor validated, so returning with arbitrary one"
        )
        return posts[0]

    def get_resolved_url(pa):
        # use the url that we found after resolving the link
        # so the postanalytics.url will be what the user added and
        # the associated post will use the resolved link
        print '* Handling URL redirection for PA={}'.format(pa.id)
        try:
            redirected_url = utils.resolve_http_redirect(pa.post_url)
            if pa.post_url != redirected_url:
                PostAnalytics.objects.handle_redirect(pa, redirected_url)
            url = redirected_url
            print '* Resolved URL={}'.format(redirected_url)
        except:
            url = pa.post_url
        print '* Original URL={}'.format(pa.post_url)
        print '* Resulting URL={}'.format(url)
        return url

    def get_social_post(pa, influencer):
        was_created = False
        if pa.post is not None:
            post = pa.post
        else:
            # not sure if we need this
            url = get_resolved_url(pa)
            plats = influencer.platforms().filter(platform_name=pa.post_type)
            if plats.count() > 1:
                plats = plats.exclude(url_not_found=True)
            # @TODO: add IndexError exception handling
            try:
                plat = plats[0]
            except Exception:
                post = None
            else:
                post = find_post_by_url(url, True, platform=plat)
                if post is None:
                    pass
                    # # create a new post
                    # print '* Creating a new post'
                    # post = Posts.objects.create(
                    #     platform=plat,
                    #     influencer=influencer,
                    #     show_on_search=influencer.show_on_search,
                    #     url=url
                    # )
                    # print '* created. id={}'.format(post.id)
                    # was_created = True
        return post, was_created

    def get_post(pa):
        post, was_created = None, False
        if pa.post is not None:
            post = pa.post
        else:
            url = get_resolved_url(pa)
            posts = find_post_by_url(url, False)
            if posts and len(posts) >= 1:
                post_ids = [p.id for p in posts]
                posts_qs = Posts.objects.filter(id__in=post_ids)
                post = pick_the_best_post(posts_qs)
            else:
                # create a blog post only for Blog
                post = create_post(url)
                was_created = True
        return post, was_created

    def extract_post_comments(post, analytics_for_url):
        num_comments = -1
        method = None
        try:
            num_comments, method = get_all_comments_number(post.url)
            if num_comments == -1 and method == 'captcha_squarespace':

                # helpers.send_admin_email_via_mailsnake(
                #     "Comment fetcher: Captcah required (Squarespace) for %r" % post.url,
                #     "Please check out this Post.id=%r Post.url=%r" % (post.id, post.url))

                logger.error(
                    'Captcha (Squarespace) required for comment fetcher. Post ID and url provided.',
                    exc_info=1,
                    extra={
                        'post_id': post.id,
                        'post_url': post.url
                    })
        except Exception as e:
            num_comments = -1

            if self.request.retries >= 5:

                logger.error(
                    'Comment fetcher constantly crashed for %s retries.' %
                    self.request.retries,
                    exc_info=1,
                    extra={
                        'post_id': post.id,
                        'post_url': post.url,
                        'exception': e
                    })

                # helpers.send_admin_email_via_mailsnake(
                #     "Comment fetcher crashed for %r and was retried %s times." % (post.url, self.request.retries),
                #     "Please check out this Post.id=%r Post.url=%r\n Exception: %s" % (post.id, post.url, e))
            else:
                # Retrying the method in 1 minute when we get an exception from comment fetcher.
                logger.warning('Retrying task due to comment fetcher crash...',
                               exc_info=1,
                               extra={
                                   'post_id': post.id,
                                   'post_url': post.url,
                                   'exception': e,
                                   'retry_number': self.request.retries,
                                   'task_id': self.request.id
                               })

                # logger.error('Retrying task %s attempt %s due to comment fetcher crash for post: %s ' % (self.request.id,
                #                                                                                          self.request.retries,
                #                                                                                          post.url))

                raise self.retry(countdown=60 * 1, max_retries=5)

        # Retrying the method in 1 minute when we get a connection error result.
        if num_comments == -1 and method == 'connection_error':

            if self.request.retries < 3:
                # logger.error('Retrying task %s attempt %s due to connection_error for post: %s ' % (self.request.id,
                #                                                                                     self.request.retries,
                #                                                                                     post.url))
                logger.warning(
                    'Retrying task due to connection_error result in comment_fetcher',
                    exc_info=1,
                    extra={
                        'post_id': post.id,
                        'post_url': post.url,
                        'retry_number': self.request.retries,
                        'task_id': self.request.id
                    })

                raise self.retry(countdown=60 * 1, max_retries=3)

        # num_comments = max(
        #     num_comments, max(map(
        #         lambda x: x.post_comments, analytics_for_url)
        #     )
        # )

        post.engagement_media_numcomments = num_comments
        post.ext_num_comments = num_comments

    def extract_post_date(post):
        if not post.create_date:
            try:
                post_data_result = fetch_blog_posts_date(post.url)
                if post_data_result['date_published'] is not None:
                    post.create_date = post_data_result['date_published']
                if post_data_result['title'] is not None:
                    post.title = post_data_result['title']
            except Exception as e:
                # helpers.send_admin_email_via_mailsnake(
                #     "Date fetcher crashed for %r" % post.url,
                #     "Please check out this Post.id=%r Post.url=%r\n Exception: %s" % (post.id, post.url, e)
                # )
                logger.error(
                    'Date fetcher has crashed. Post ID and url provided.' %
                    self.request.retries,
                    exc_info=1,
                    extra={
                        'post_id': post.id,
                        'post_url': post.url,
                        'exception': e
                    })

    origin_analytics = PostAnalytics.objects.get(id=post_analytics_id)
    if influencer_id is not None:
        influencer = Influencer.objects.get(id=influencer_id)
    else:
        influencer = None
    collection_id = origin_analytics.collection_id

    if influencer is not None and origin_analytics.post_type not in [
            None, 'Blog'
    ]:
        post, was_created = get_social_post(origin_analytics, influencer)
    else:
        post, was_created = get_post(origin_analytics)

    if not post:
        helpers.send_admin_email_via_mailsnake(
            "Post Analytics: No valid post found %r" % url,
            "During our post analytics, we didn't find a post for this url=%r"
            % (url))
        print("No post was found for %r, so returning." %
              origin_analytics.post_url)
        return
    print("Using post: %r for URL: %r" % (post, origin_analytics.post_url))

    post_exists = PostAnalytics.objects.filter(
        collection_id=collection_id,
        post=post).exclude(post_url=origin_analytics.post_url).exists()
    if post_exists:
        print '* PA with post={} and another post_url exists, removing current one.'.format(
            post.id)
        origin_analytics.delete()
        return

    origin_analytics.post = post
    origin_analytics.save()

    # BE CAREFUL IF YOU MODIFY ANY OF THE POST ANALYTICS FROM THIS LIST !!!
    if post.platform:
        analytics_for_url = PostAnalytics.objects.filter(post=post)
        post_type = post.platform.platform_name
        if post_type in Platform.BLOG_PLATFORMS:
            post_type = 'Blog'
        analytics_for_url.update(post_type=post_type)
        origin_analytics = PostAnalytics.objects.get(id=origin_analytics.id)
    else:
        analytics_for_url = PostAnalytics.objects.filter(
            post_url__iexact=origin_analytics.post_url)
    analytics_for_url = list(analytics_for_url)

    if post.post_image is None:
        # upload_post_image_task.apply_async([post.id], queue='celery')
        upload_post_image(post)
        post = Posts.objects.get(id=post.id)

    if origin_analytics.post_type in ['Blog', None]:
        extract_post_comments(post, analytics_for_url)
        extract_post_date(post)
        post.save()
    else:
        try:
            print '* Start fetching post interactions.'
            f = fetcher.fetcher_for_platform(post.platform)
            f.fetch_post_interactions([post])
        except Exception:
            helpers.send_admin_email_via_mailsnake(
                "'connect_url_to_post' exception during post interactions fetching (Post={})"
                .format(post.id),
                '<br />'.join(traceback.format_exc().splitlines()))

    def get_number_of_shares(pa):
        if pa.post_type == 'Facebook':
            return pa.post.engagement_media_numfbshares
        elif pa.post_type == 'Twitter':
            return pa.post.engagement_media_numretweets
        elif pa.post_type == 'Pinterest':
            return pa.post.engagement_media_numrepins
        else:
            return sum([
                pa.count_tweets or 0,
                pa.count_fb_shares or 0,
                pa.count_fb_likes or 0,
                pa.count_fb_comments or 0,
                pa.count_gplus_plusone or 0,
                pa.count_pins or 0,
            ])

    origin_analytics.count_video_impressions = post.impressions
    origin_analytics.count_likes = post.engagement_media_numlikes
    origin_analytics.post_comments = post.ext_num_comments
    origin_analytics.count_shares = get_number_of_shares(origin_analytics)
    origin_analytics.save()

    contract = connect_post_analytics_to_contract(origin_analytics, False)

    for analytics in analytics_for_url:
        if analytics.post is None:
            analytics.post = post
            analytics.post_found = True
        # if analytics.post_comments is None:
        #     analytics.post_comments = post.engagement_media_numcomments
        if contract and analytics.contract_id is None and analytics.collection_id == collection_id:
            analytics.contract_id = contract.id
        analytics.save()

    print("Collection_id=%r" % collection_id)
Exemple #17
0
def handle_events(events_data, to_save=True, events_backup=None):
    from debra.models import (MailProxyMessage, InfluencerJobMapping,
                              MandrillEvent)
    from debra import helpers

    log.info("Got %d evnets data units" % len(events_data))

    if len(events_data) == 0:
        log.info("Returning")
        return

    if events_backup is None:
        events_backup = [None] * len(events_data)

    event_2_type = {
        "send": MailProxyMessage.TYPE_SEND,
        "open": MailProxyMessage.TYPE_OPEN,
        "click": MailProxyMessage.TYPE_CLICK,
        "spam": MailProxyMessage.TYPE_SPAM,
        "hard_bounce": MailProxyMessage.TYPE_BOUNCE,
    }

    log.info("Got {} events".format(len(events_data)))

    events_backup = [
        y for x, y in zip(events_data, events_backup)
        if not exclude_from_events(x)
    ]
    events_data = [x for x in events_data if not exclude_from_events(x)]
    log.info("Got {} events after filtering".format(len(events_data)))

    if len(events_data) == 0:
        log.info("Returning as nothing needs to be done")
        return

    mandrill_ids = [x.get('_id') for x in events_data if x.get('_id')]

    log.info("Getting corresponding messages from DB for %r" % mandrill_ids)

    messages_from_db = list(
        MailProxyMessage.objects.filter(
            mandrill_id__in=mandrill_ids,
            type=MailProxyMessage.TYPE_EMAIL).values_list(
                'mandrill_id', 'thread'))

    mandrill_id_2_thread = dict(messages_from_db)

    log.info("Mandrill_id_2_thread: %r" % mandrill_id_2_thread)
    log.info("Performing checks...")

    # check #1 for thread uniqueness
    thread_counts = Counter(
        [mandrill_id for mandrill_id, _ in messages_from_db])
    if any([x > 1 for x in thread_counts.values()]):
        log.info("[ADMIN NOTIFICATION] Thread uniqueness check")
        helpers.send_admin_email_via_mailsnake(
            "mandrill_id was found in multiple mailboxes",
            "('mandrill_id', 'thread_id') pairs: {},\n counts: {}".format(
                messages_from_db, thread_counts))

    # check #2 for missing messages in DB
    diff = set(mandrill_ids) - set(
        [mandrill_id for mandrill_id, _ in messages_from_db])
    if diff:
        log.info("[ADMIN NOTIFICATION] Missing messages in DB check")
        helpers.send_admin_email_via_mailsnake(
            "Mandrill events fired for missing emails", str(diff))

    events_for_save = []

    for n, (data, backup) in enumerate(zip(events_data, events_backup)):
        mandrill_id = data.get('_id')
        events_for_save.append(
            MailProxyMessage(thread_id=mandrill_id_2_thread.get(mandrill_id),
                             msg=json.dumps(data),
                             ts=datetime.datetime.utcfromtimestamp(
                                 int(data.get('ts'))),
                             direction=MailProxyMessage.DIRECTION_NONE,
                             type=event_2_type.get(data.get('event')),
                             mandrill_id=mandrill_id))
        if backup is None:
            log.info(
                "No backup found for #{} event, creating one...".format(n))
            events_backup[n] = MandrillEvent.objects.create(
                data=data, type=MandrillEvent.TYPE_EVENT)

        log.info("Event found MandrillID: {} ThreadID: {}".format(
            mandrill_id, mandrill_id_2_thread.get(mandrill_id)))
    if not to_save:
        log.info("to_save == False, so not creating events data in DB")
    else:
        # save to DB
        log.info("Saving events data to DB")
        _zipped = zip(events_for_save, events_backup)
        for n, (event, backup) in enumerate(_zipped):
            try:
                event.save()
            except Exception:
                log.info("Failed to save event #{}".format(n))
            log.info("Saved backup.id %s to STATUS_SAVED" % backup.id)
            backup.status = MandrillEvent.STATUS_SAVED
            backup.save()

        # MailProxyMessage.objects.bulk_create(events_for_save)

    # handle 'open' and 'click' events
    opened_and_clicked_emails = [
        x.get('_id') for x in events_data
        if x.get('event') in ('open', 'click')
    ]

    log.info("Got {} 'open' and 'click' events".format(
        len(opened_and_clicked_emails)))

    opened_and_clicked_threads = set(
        [mandrill_id_2_thread.get(x) for x in opened_and_clicked_emails])

    # save notification to mongoDB
    # log.info("Saving brand notifications to mongoDB...")
    # for msg in opened_and_clicked_emails_from_db:
    #     if msg.direction == MailProxyMessage.DIRECTION_BRAND_2_INFLUENCER:
    #         notification = {
    #             'text': "%s opened email on %s UTC" % (
    #                 msg.thread.influencer.name,
    #                 msg.ts.strftime("%b. %e, %Y - %H:%M")),
    #             'thread': msg.thread_id
    #         }
    #         notify_brand(msg.thread.brand_id, "mail", notification)

    log.info("Getting associated campaigns...")
    job_mappings = InfluencerJobMapping.objects.filter(
        status=InfluencerJobMapping.STATUS_INVITED,
        mailbox__in=opened_and_clicked_threads,
    )

    log.info("Should change campaigns statuses to RECEIVED: {}".format(
        job_mappings.values_list('id', flat=True)))

    if not to_save:
        log.info("to_save == False, so not changing campaign statuses DB")
    else:
        # save to DB
        log.info("Saving changes data to campaign statuses in DB")
        job_mappings.update(status=InfluencerJobMapping.STATUS_EMAIL_RECEIVED)

    for backup in events_backup:
        log.info("Saved backup.id %s to STATUS_PROCESSED" % backup.id)
        backup.status = MandrillEvent.STATUS_PROCESSED
        backup.save()

    print("Sleeping for 10s")
    time.sleep(10)