def influencer_fetch_post_analytics_instances(pa_ids, inf_ids=None): from debra.models import PostAnalytics from debra.helpers import send_admin_email_via_mailsnake if inf_ids is None: inf_ids = [None] * len(pa_ids) pa_values = PostAnalytics.objects.filter(id__in=pa_ids).values_list( 'id', 'post_url') for inf_id, (pa_id, post_url) in zip(inf_ids, pa_values): connect_url_to_post(post_url, pa_id, inf_id) pas = PostAnalytics.objects.filter(id__in=pa_ids).prefetch_related('post') for pa in pas: if not pa.post: continue if pa.post.title != pa.post_title or ( pa.post.create_date and pa.post.create_date.date() != pa.post_date): send_admin_email_via_mailsnake( 'PostAnalytics={} and Post={} data mismatch'.format( pa.id, pa.post.id), 'Titles: "{}", "{}" <br />Dates: "{}, "{}"'.format( pa.post_title, pa.post.title, pa.post_date, pa.post.create_date.date()))
def genereate_new_report_notification(collection_id): from debra.models import PostAnalyticsCollection from debra import account_helpers from debra import helpers collection = PostAnalyticsCollection.objects.get(id=collection_id) now = datetime.datetime.strftime(datetime.datetime.now(), '%c') body = '''Started to generate a new report at {}: collection_id = {}, collection_name = {}, user = {} '''.format(now, collection.id, collection.name, collection.user or 'No user') subject = 'New report for collection_id={}, {}'.format(collection.id, now) helpers.send_admin_email_via_mailsnake(subject, body) account_helpers.intercom_track_event( None, "generate-report-requested", { 'collection_name': collection.name, 'collection_id': collection.id, }, user=collection.user)
def find_and_connect_user_to_influencer(user_prof, to_save=True, **kwargs): """ This method connects a userprofile with an influencer object and updates this data in intercom. Sending an email to admins in case of errors. *param: user_profile *return: None """ from debra.models import Influencer from debra.helpers import create_influencer_and_blog_platform, send_admin_email_via_mailsnake from platformdatafetcher import platformutils, postprocessing blog_url = user_prof.blog_page influencer = create_influencer_and_blog_platform(blog_url, 'blogger_signup', to_save, False) log.info("Found %r possible influencer for profile [%s %s]" % (influencer, user_prof.user, user_prof.blog_page)) if not influencer: log.info("No influencer found for User_prof_id: %s" % (user_prof.id, )) send_admin_email_via_mailsnake("No influencer found for user", "User_prof_id: %s" % (user_prof.id, )) user_prof.error_when_connecting_to_influencer = "NO INFLUENCERS" else: log.info("Found %s influencer for signed up user %s" % (influencer, user_prof)) influencer.name = user_prof.name influencer.email_for_advertising_or_collaborations = user_prof.user.email influencer.email = user_prof.user.email user_prof.influencer = influencer influencer.shelf_user = user_prof.user influencer.append_source('blogger_signup') log.info("Done connecting User: [%s, %s] with Influencer: [%s, %s]" % (user_prof.blog_page, user_prof.user.email, influencer.email_for_advertising_or_collaborations, influencer.blog_url)) if to_save: user_prof.save() if influencer: influencer.save() user_prof.update_intercom() # if influencer is showing on search, their profile must be ok, so invite them if influencer.show_on_search and not influencer.ready_to_invite: influencer.ready_to_invite = True influencer.save() user_prof.update_intercom() # if they have been already qa-ed, invite them elif influencer.validated_on and 'info' in influencer.validated_on and not influencer.ready_to_invite: influencer.ready_to_invite = True influencer.save() user_prof.update_intercom() # now, if this influencer is not validated or not showing on search else: # issue the complete processing postprocessing.process_new_influencer_sequentially( influencer.id, assume_blog=True) check_user_prof_influencer_connectivity(user_prof.id)
def send_missing_emails_report(): from debra import helpers from debra.models import Influencer mp_data = Influencer.objects.missing_emails_data() count = len(mp_data.keys()) helpers.send_admin_email_via_mailsnake( "Missing emails daily report: {} found".format(count), "During our daily check we have found {} influencers with missing emails" .format(count))
def visit_page(page_url): log.info('* Opening {} with Selenium...'.format(page_url)) with xbrowser.XBrowser( headless_display=settings.AUTOCREATE_HEADLESS_DISPLAY) as xb: xb.driver.set_page_load_timeout(60) xb.driver.set_script_timeout(60) xb.driver.implicitly_wait(10) try: xb.load_url(page_url) except: send_admin_email_via_mailsnake( "'influencer_tracking_verification' Selenium exception for PostAnalytics={} (url={})" .format(pa.id, page_url), '<br />'.join(traceback.format_exc().splitlines()))
def check_user_prof_influencer_connectivity(user_profile_id): from debra.helpers import send_admin_email_via_mailsnake from debra.models import UserProfile prof = UserProfile.objects.get(id=user_profile_id) if not prof.influencer: send_admin_email_via_mailsnake( "No influencer found for %s" % prof, "Blog %s User %s Email %s " % (prof.blog_page, prof.user, prof.user.email)) else: inf = prof.influencer if not inf.shelf_user: send_admin_email_via_mailsnake( "No shelf_user found for %s" % inf.id, "Influencer %s; User_profile_id %s" % (inf, user_profile_id))
def get_datapoint_entity(result, datapoint_id): try: entity = result.find_entity(datapoint_id) except ClickMeterException as e: send_admin_email_via_mailsnake( 'ClickMeterException for Contract={}'.format(contract_id), e.error_html) else: if not entity: # send_admin_email_via_mailsnake( # 'Cannot find ClickMeter EntityId={} for Contract={}'.format( # datapoint_id, campaign_id), # 'Cannot find ClickMeter EntityId={} for Contract={}'.format( # datapoint_id, campaign_id), # ) return return entity
def send_post_analytics_report_to_admins(collection_id): from debra.models import PostAnalyticsCollection from debra import helpers collection = PostAnalyticsCollection.objects.get(id=collection_id) if not collection.updated: return qs = collection.get_unique_post_analytics().prefetch_related('post') qs = list(qs) stats = [ len(filter(lambda x: x.post.title is None, qs)), len(filter(lambda x: x.post.create_date is None, qs)), len(filter(lambda x: x.post_num_comments in [-1, None], qs)), collection.creator_brand.name if collection.creator_brand else None, collection.user.id if collection.user else None, '{} ({})'.format(collection.user.userprofile.name\ if collection.user and\ collection.user.userprofile and\ collection.user.userprofile.name is not None else 'No name', collection.user.email if collection.user else 'No email' ) ] subject = 'Report for PostAnalyticsCollection.id={} is ready'.format( collection.id) body = '<br />'.join([ 'Number of missing titles: {}', 'Number of missing dates: {}', 'Number of missing comment counts: {}', 'Brand name: {}', 'User ID: {}', 'Username: {}', ]).format(*stats) helpers.send_admin_email_via_mailsnake(subject, body)
def update_campaign_tracking_stats(campaign_id): from debra.models import (BrandJobPost, PostAnalyticsCollectionTimeSeries) from debra.helpers import send_admin_email_via_mailsnake campaign = get_object_or_404(BrandJobPost, id=campaign_id) if campaign.tracking_group is None: return result = ClickMeterListResult(clickmeter_api, '/aggregated/summary/groups', { 'timeframe': 'beginning', 'status': 'active', }) try: entity = result.find_entity(campaign.tracking_group) except ClickMeterException as e: send_admin_email_via_mailsnake( 'ClickMeterException for Campaign={}'.format(campaign_id), e.error_html) else: if not entity: # TODO: commented it out because of spamming out emails # send_admin_email_via_mailsnake( # 'Cannot find ClickMeter EntityId={} for Campaign={}'.format( # campaign.tracking_group, campaign_id), # 'Cannot find ClickMeter EntityId={} for Campaign={}'.format( # campaign.tracking_group, campaign_id), # ) pass else: time_series = PostAnalyticsCollectionTimeSeries.objects.create( collection=campaign.post_collection, count_clicks=entity.get('totalClicks', 0), count_unique_clicks=entity.get('uniqueClicks', 0), count_views=entity.get('totalViews', 0), count_unique_views=entity.get('uniqueViews', 0), snapshot_date=datetime.datetime.now())
def check_visit(datapoint, url): log.info('* Attempt id={}, #{}'.format(pa.id, n + 1)) log.info('* Sleeping for {} secs... id={}, #{}'.format( delay, pa.id, n + 1)) time.sleep(delay) try: log.info('* Getting /clickstream... id={}, #{}'.format( pa.id, n + 1)) resp = requests.get(constants.CLICKMETER_BASE_URL + '/clickstream', headers=headers, params={'datapoint': datapoint}) try: urls = [ unquote(x.get('realDestinationUrl', '')).strip().strip('/') for x in resp.json()['rows'] ][:constants.CLICKMETER_EVENTS_VERIFICATION_NUMBER] except KeyError: urls = [] log.info('* Urls found={} for id={}, #{}'.format( len(urls), pa.id, n + 1)) if url.strip().strip('/') in urls: log.info('* Post URL is found... id={}, #{}'.format( pa.id, n + 1)) return True except: log.info( '* Exception, sending email to admins... id={}, #{}'.format( pa.id, n + 1)) send_admin_email_via_mailsnake( "'influencer_tracking_verification' exception for PostAnalytics={}" .format(pa.id), '<br />'.join(traceback.format_exc().splitlines()))
def create_post(url): blog_url = utils.post_to_blog_url(url) inf = helpers.create_influencer_and_blog_platform( blog_url, 'import_from_post_analytics', True, True) if inf: platform = inf.blog_platform print("Inf.validated_on: %r" % inf.validated_on) if not inf.validated_on or not 'info' in inf.validated_on: # it's not QA-ed yet, so let's process this sequentially postprocessing.process_new_influencer_sequentially( inf.id, True) # at this point, we should have data for the influencer # now, let's check if got the post # post = Posts.objects.filter(platform=platform, url__iexact=url) # print("Got post: %r" % post) # if post.exists(): # return post[0] post = find_post_by_url(url, True, platform=platform) if post is None: # here we just create a quick post artifically (ideally we should have fetched this post) post = Posts.objects.create(platform=platform, influencer=inf, show_on_search=inf.show_on_search, url=url) return post print("No valid influencer found") helpers.send_admin_email_via_mailsnake( "Post Analytics: No valid influencer found %r" % url, "During our post analytics, we didn't find an influencer for this Post.url=%r" % (url)) return None
def mandrill_webhook(request): from debra import models from debra import helpers if request.method != "POST": return HttpResponse() try: webhook_data = json.loads(request.POST.get('mandrill_events', '')) except: log.exception("Mandrill JSON parse error.") return HttpResponse() try: batch = models.MandrillBatch.objects.create(data=webhook_data) except Exception: helpers.send_admin_email_via_mailsnake("Failed to save mandrill batch", webhook_data) return HttpResponse(status=500) else: handle_webhook.apply_async([batch.id, True, False, False], queue="celery_mandrill_2") return HttpResponse()
def handle_inbound(inbound_data, admins_only=False, inbound_backup=None): from debra import models from debra import helpers log.info("Got %d inbound data units" % len(inbound_data)) if len(inbound_data) == 0: log.info("Returning") return if inbound_backup is None: log.info("No backup provided") inbound_backup = [None] * len(inbound_data) for n, (data, backup) in enumerate(zip(inbound_data, inbound_backup)): if backup is None: log.info( "No backup found for #{} message, creating one...".format(n)) backup = models.MandrillEvent.objects.create( data=data, type=models.MandrillEvent.TYPE_INBOUND) log.info("Backup for #{} message created, id={}".format( n, backup.id)) log.info("Preparing message #{}, backup_id={}".format(n, backup.id)) ts = data.get('ts') msg = data.get('msg') raw_msg = msg.get('raw_msg') email_to = msg.get('email') attachments = msg.get('attachments', []) images = msg.get('images', []) try: r_type = email_to.split('_')[0] except Exception: # this should never happen if all emails are ok log.exception("Wrong email_to, can't split it by underscore", "Wrong email: %s" % (email_to, )) return HttpResponse() # mail from brand to influencer if r_type == "i": direction = models.MailProxyMessage.DIRECTION_BRAND_2_INFLUENCER thread = models.MailProxy.objects.get(influencer_mail=email_to) # mail from influencer to brand if r_type == "b": direction = models.MailProxyMessage.DIRECTION_INFLUENCER_2_BRAND thread = models.MailProxy.objects.get(brand_mail=email_to) log.info("#{} message: Brand={}, Influencer={}".format( n, thread.brand_id, thread.influencer_id)) mandrill_message = { 'text': msg.get('text'), 'html': msg.get('html'), 'subject': msg.get('subject'), 'attachments': attachments, 'images': images, } if direction == models.MailProxyMessage.DIRECTION_BRAND_2_INFLUENCER: complete_message_brand_2_influencer(mandrill_message, thread, ts=ts) if direction == models.MailProxyMessage.DIRECTION_INFLUENCER_2_BRAND: complete_message_influencer_2_brand(mandrill_message, thread, ts=ts) db_params = dict(direction=direction, thread=thread, msg=raw_msg, type=models.MailProxyMessage.TYPE_EMAIL) try: log.info( '\nSearching for #{} message (thread={}, direction={}) in ' 'database...'.format(n, thread.id, direction)) msg = models.MailProxyMessage.objects.filter(**db_params)[0] log.info("#{} message is found".format(n)) except IndexError: log.info("#{} message is not found".format(n)) if admins_only: continue log.info("Sending #{} message to clients".format(n)) resp = mailsnake_send(mandrill_message) log.info("Set status of backup.id=%r to STATUS_SENT" % backup.id) backup.status = models.MandrillEvent.STATUS_SENT backup.save() mandrill_id = get_mandrill_id(resp) log.info("Got ID for #{} message: {}".format(n, mandrill_id)) log.info("Saving #{} message...".format(n)) msg = models.MailProxyMessage.objects.create( ts=datetime.datetime.utcfromtimestamp(int(ts)), mandrill_id=mandrill_id, **db_params) thread.candidate_mapping.filter( campaign_stage=models.InfluencerJobMapping. CAMPAIGN_STAGE_WAITING_ON_RESPONSE).update( campaign_stage=models.InfluencerJobMapping. CAMPAIGN_STAGE_NEGOTIATION, campaign_stage_prev=models.InfluencerJobMapping. CAMPAIGN_STAGE_WAITING_ON_RESPONSE) log.info("#{} message is saved: id={}".format(n, msg.id)) log.info("Set status of backup.id=%r to STATUS_SAVED" % backup.id) backup.status = models.MandrillEvent.STATUS_SAVED backup.save() if is_invalid(resp): log.info( "[ADMIN NOTIFICATION] Message #{} is invalid".format(n)) body = 'INVALID mandrill message. Msg_id: {}'.format(msg.id) log.error(body) helpers.send_admin_email_via_mailsnake(body, body) if is_status(resp, 'rejected'): log.info("[ADMIN NOTIFICATION] Message #{} rejected".format(n)) helpers.send_admin_email_via_mailsnake( '''REJECTED mandrill message. models.MailProxyMessage(id={}) '''.format(msg.id), 'Response: {}'.format(resp)) if attachments: log.info("Saving #{} message's attachments to DB...".format(n)) msg.attachments = upload_message_attachments_to_s3( msg, attachments.values(), True) msg.save() log.info("Set status of backup.id=%r to STATUS_PROCESSED" % backup.id) backup.status = models.MandrillEvent.STATUS_PROCESSED backup.save() finally: log.info("Sending #{} message copy to admins...".format(n)) mailsnake_send(mandrill_message, admins_only=True) print("Sleeping for 10s") time.sleep(10)
def influencer_tracking_verification(pa_id, attempts=3, delay=30): from urllib2 import unquote from debra.models import Contract, PostAnalytics from debra.helpers import send_admin_email_via_mailsnake from xpathscraper import xbrowser # contract = get_object_or_404(Contract, id=contract_id) pa = get_object_or_404(PostAnalytics, id=pa_id) contract = pa.contract pa.tracking_status = pa.TRACKING_STATUS_VERIFYING pa.save() def visit_page(page_url): log.info('* Opening {} with Selenium...'.format(page_url)) with xbrowser.XBrowser( headless_display=settings.AUTOCREATE_HEADLESS_DISPLAY) as xb: xb.driver.set_page_load_timeout(60) xb.driver.set_script_timeout(60) xb.driver.implicitly_wait(10) try: xb.load_url(page_url) except: send_admin_email_via_mailsnake( "'influencer_tracking_verification' Selenium exception for PostAnalytics={} (url={})" .format(pa.id, page_url), '<br />'.join(traceback.format_exc().splitlines())) def check_visit(datapoint, url): log.info('* Attempt id={}, #{}'.format(pa.id, n + 1)) log.info('* Sleeping for {} secs... id={}, #{}'.format( delay, pa.id, n + 1)) time.sleep(delay) try: log.info('* Getting /clickstream... id={}, #{}'.format( pa.id, n + 1)) resp = requests.get(constants.CLICKMETER_BASE_URL + '/clickstream', headers=headers, params={'datapoint': datapoint}) try: urls = [ unquote(x.get('realDestinationUrl', '')).strip().strip('/') for x in resp.json()['rows'] ][:constants.CLICKMETER_EVENTS_VERIFICATION_NUMBER] except KeyError: urls = [] log.info('* Urls found={} for id={}, #{}'.format( len(urls), pa.id, n + 1)) if url.strip().strip('/') in urls: log.info('* Post URL is found... id={}, #{}'.format( pa.id, n + 1)) return True except: log.info( '* Exception, sending email to admins... id={}, #{}'.format( pa.id, n + 1)) send_admin_email_via_mailsnake( "'influencer_tracking_verification' exception for PostAnalytics={}" .format(pa.id), '<br />'.join(traceback.format_exc().splitlines())) if pa.post_type not in ['Blog']: response = requests.get(pa.post_url) if response.status_code == 200: pa.tracking_status = pa.TRACKING_STATUS_VERIFIED else: pa.tracking_status = pa.TRACKING_STATUS_VERIFICATION_PROBLEM pa.save() return log.info('* Exctracting tracking data...') check_data = [ (pa.post_url, contract.tracking_pixel, True), (contract.product_url, contract.tracking_link, contract.campaign.product_sending_status not in ['no_product_sending', 'no_product_page']), (contract.campaign.client_url, contract.tracking_brand_link, True), ] headers = { 'Accept': 'application/json', 'Content-Type': 'application/json', 'X-Clickmeter-Authkey': constants.CLICKMETER_API_KEY, } for url, datapoint, to_check in check_data: if not to_check: continue success = False visit_page(url) for n in xrange(attempts): success = success or check_visit(datapoint, url) if not success: log.info('* Nothing is found. id={}, #{}, url={}'.format( pa.id, n + 1, url)) pa.tracking_status = pa.TRACKING_STATUS_VERIFICATION_PROBLEM pa.save() log.info( "* PostAnalytics updated with 'Verification Problem' status. id={}, #{}" .format(pa.id, n + 1)) log.info( '* Sending email to admins about failure. id={}, #{}'.format( pa.id, n + 1)) send_admin_email_via_mailsnake( 'Verification problem on PostAnalytics={}'.format(pa.id), ''' # of attempts = {}, delay = {} secs<br /> searched for url={} '''.format(attempts, delay, url)) return pa.tracking_status = pa.TRACKING_STATUS_VERIFIED pa.save() log.info( "* PostAnalytics updated with 'Verified' status. id={}, #{}".format( pa.id, n + 1))
def handle_new_influencers(urls, tag_id=None, brand_id=None, user_id=None): from debra.helpers import create_influencer_and_blog_platform_bunch from debra.models import (InfluencersGroup, Brands, User, Influencer) from debra.helpers import send_admin_email_via_mailsnake from platformdatafetcher import postprocessing from xpathscraper import utils urls = map(utils.post_to_blog_url, urls) brand = Brands.objects.get(id=brand_id) if brand_id else None user = User.objects.get(id=user_id) if user_id else None tag = InfluencersGroup.objects.get(id=tag_id) if tag_id else None send_admin_email_via_mailsnake( 'Influencer import started for {}'.format(brand.name), ''' <p>Influencer import started:</p></br> <ul> <li>adding {} urls</li> <li>collection: {}</li> <li>brand: {}</li> <li>user: {}</li> </ul> '''.format(len(urls), tag, brand, user)) infs = create_influencer_and_blog_platform_bunch( urls, 'customer_uploaded', None, tags=['customer_uploaded']) send_admin_email_via_mailsnake( 'Influencer Import: Influencers created now for {} and tag {}'.format( brand.name, tag.name if tag else "No Tag Given"), ''' <p>Created {}<p>'''.format(len(infs))) infs_added_to_tag = [] for inf in infs: print("Checking influencer %r blog_url %r show_on_search %r" % (inf.id, inf.blog_url, inf.show_on_search)) if not inf.show_on_search: # it's not on search yet, so let's process this sequentially and then it'll show up on an admin table postprocessing.process_new_influencer_sequentially(inf.id, True) if tag: if tag.add_influencer(inf): infs_added_to_tag.append(inf) print("influencer %r added to the tag group" % inf) send_admin_email_via_mailsnake( 'Influencer import finished for {}'.format(brand.name), ''' <p>Please check new influencers in the <a href={}>admin table</a>. </p> <p>Stats:</p></br> <ul> <li>{} influencers were found/created</li> <li>{} urls were passed</li> <li>{} new influencers were added to the tag</li> <li>{} with show_on_search=True among newly added to the tag influencers</li> <li>{} with old_show_on_search=True among newly added to the tag influencers</li> <li>{} with show_on_search=True among all passed influencers</li> <li>{} with old_show_on_search=True among all passed influencers</li> </ul> '''.format( "https://app.theshelf.com/admin/upgrade/influencer/uploaded_by_customers/", len(infs), len(urls), len(infs_added_to_tag), len(filter(lambda inf: inf.show_on_search, infs_added_to_tag)), len(filter(lambda inf: inf.old_show_on_search, infs_added_to_tag)), len(filter(lambda inf: inf.show_on_search, infs)), len(filter(lambda inf: inf.old_show_on_search, infs)), ), extra_emails=['*****@*****.**'])
def connect_url_to_post(self, url, post_analytics_id, influencer_id=None): """ High level goal: Given a url, we ultimately need to add that post and Influencer in our database. So, first we check if we have a post for that url. => Yes, then we can find the influencer as well associated with that post. => No => First, we need to see if there already exists an Influencer for this blog domain => If yes, then use that influencer => Else, create an influencer with source='import_from_post_analytics' => Now, that we have an influencer for this url but we dont have that post fetched. So, we need to run our crawler to get that url. QUESTION: do we have a method to fetch a given url? """ from debra.models import Posts, PostAnalytics, Platform, Influencer from debra import helpers from platformdatafetcher import (pbfetcher, fetcher, postprocessing, feeds, socialfetcher) from xpathscraper import utils from platformdatafetcher.fetch_blog_posts_manually import get_all_comments_number from platformdatafetcher.fetch_blog_posts_date import fetch_blog_posts_date from platformdatafetcher import pbfetcher from masuka.image_manipulator import upload_post_image def create_post(url): blog_url = utils.post_to_blog_url(url) inf = helpers.create_influencer_and_blog_platform( blog_url, 'import_from_post_analytics', True, True) if inf: platform = inf.blog_platform print("Inf.validated_on: %r" % inf.validated_on) if not inf.validated_on or not 'info' in inf.validated_on: # it's not QA-ed yet, so let's process this sequentially postprocessing.process_new_influencer_sequentially( inf.id, True) # at this point, we should have data for the influencer # now, let's check if got the post # post = Posts.objects.filter(platform=platform, url__iexact=url) # print("Got post: %r" % post) # if post.exists(): # return post[0] post = find_post_by_url(url, True, platform=platform) if post is None: # here we just create a quick post artifically (ideally we should have fetched this post) post = Posts.objects.create(platform=platform, influencer=inf, show_on_search=inf.show_on_search, url=url) return post print("No valid influencer found") helpers.send_admin_email_via_mailsnake( "Post Analytics: No valid influencer found %r" % url, "During our post analytics, we didn't find an influencer for this Post.url=%r" % (url)) return None def pick_the_best_post(posts): """ we may get multiple posts for the same url 1. If there is only one, return it 2. Else, if the influencer is show_on_search >> validated_on >> others """ print("pick_the_best_post: for %r" % posts) blog_posts = posts.filter( platform__platform_name__in=Platform.BLOG_PLATFORMS) if len(blog_posts) > 0: posts = blog_posts if len(posts) == 1: print("\tCase 1: Only one post found, so returning it") return posts[0] # first pick the best influencer influencers = list(posts.values_list('influencer', flat=True)) if len(set(influencers)) == 1: influencer = influencers[0] else: influencer_id = influencers[0] influencer_objects = Influencer.objects.filter(id__in=influencers) influencer_object = Influencer.objects.get(id=influencer_id) influencer_object = influencer_object._select_influencer_to_stay( influencer_objects) influencer = influencer_object.id print("selected influencer= %r" % influencer) posts = posts.filter(influencer=influencer) # Case 2: return post belonging to the best influencer if len(posts) == 1: print("\tCase 2: returning post belonging to the best influencer") return posts[0] # Case 3: return post belonging to the non url_not_found platform from the best influencer posts_without_url_not_found = posts.exclude( platform__url_not_found=True).extra( select={ 'has_content': 'content IS NOT NULL', }).order_by('has_content') if len(posts_without_url_not_found) >= 1: print( "\tCase 3: returning post belonging to the platform that is not url_not_found" ) return posts_without_url_not_found[0] # Case 4: just use any post print( "\tInfluencer for this post are neither show_on_search nor validated, so returning with arbitrary one" ) return posts[0] def get_resolved_url(pa): # use the url that we found after resolving the link # so the postanalytics.url will be what the user added and # the associated post will use the resolved link print '* Handling URL redirection for PA={}'.format(pa.id) try: redirected_url = utils.resolve_http_redirect(pa.post_url) if pa.post_url != redirected_url: PostAnalytics.objects.handle_redirect(pa, redirected_url) url = redirected_url print '* Resolved URL={}'.format(redirected_url) except: url = pa.post_url print '* Original URL={}'.format(pa.post_url) print '* Resulting URL={}'.format(url) return url def get_social_post(pa, influencer): was_created = False if pa.post is not None: post = pa.post else: # not sure if we need this url = get_resolved_url(pa) plats = influencer.platforms().filter(platform_name=pa.post_type) if plats.count() > 1: plats = plats.exclude(url_not_found=True) # @TODO: add IndexError exception handling try: plat = plats[0] except Exception: post = None else: post = find_post_by_url(url, True, platform=plat) if post is None: pass # # create a new post # print '* Creating a new post' # post = Posts.objects.create( # platform=plat, # influencer=influencer, # show_on_search=influencer.show_on_search, # url=url # ) # print '* created. id={}'.format(post.id) # was_created = True return post, was_created def get_post(pa): post, was_created = None, False if pa.post is not None: post = pa.post else: url = get_resolved_url(pa) posts = find_post_by_url(url, False) if posts and len(posts) >= 1: post_ids = [p.id for p in posts] posts_qs = Posts.objects.filter(id__in=post_ids) post = pick_the_best_post(posts_qs) else: # create a blog post only for Blog post = create_post(url) was_created = True return post, was_created def extract_post_comments(post, analytics_for_url): num_comments = -1 method = None try: num_comments, method = get_all_comments_number(post.url) if num_comments == -1 and method == 'captcha_squarespace': # helpers.send_admin_email_via_mailsnake( # "Comment fetcher: Captcah required (Squarespace) for %r" % post.url, # "Please check out this Post.id=%r Post.url=%r" % (post.id, post.url)) logger.error( 'Captcha (Squarespace) required for comment fetcher. Post ID and url provided.', exc_info=1, extra={ 'post_id': post.id, 'post_url': post.url }) except Exception as e: num_comments = -1 if self.request.retries >= 5: logger.error( 'Comment fetcher constantly crashed for %s retries.' % self.request.retries, exc_info=1, extra={ 'post_id': post.id, 'post_url': post.url, 'exception': e }) # helpers.send_admin_email_via_mailsnake( # "Comment fetcher crashed for %r and was retried %s times." % (post.url, self.request.retries), # "Please check out this Post.id=%r Post.url=%r\n Exception: %s" % (post.id, post.url, e)) else: # Retrying the method in 1 minute when we get an exception from comment fetcher. logger.warning('Retrying task due to comment fetcher crash...', exc_info=1, extra={ 'post_id': post.id, 'post_url': post.url, 'exception': e, 'retry_number': self.request.retries, 'task_id': self.request.id }) # logger.error('Retrying task %s attempt %s due to comment fetcher crash for post: %s ' % (self.request.id, # self.request.retries, # post.url)) raise self.retry(countdown=60 * 1, max_retries=5) # Retrying the method in 1 minute when we get a connection error result. if num_comments == -1 and method == 'connection_error': if self.request.retries < 3: # logger.error('Retrying task %s attempt %s due to connection_error for post: %s ' % (self.request.id, # self.request.retries, # post.url)) logger.warning( 'Retrying task due to connection_error result in comment_fetcher', exc_info=1, extra={ 'post_id': post.id, 'post_url': post.url, 'retry_number': self.request.retries, 'task_id': self.request.id }) raise self.retry(countdown=60 * 1, max_retries=3) # num_comments = max( # num_comments, max(map( # lambda x: x.post_comments, analytics_for_url) # ) # ) post.engagement_media_numcomments = num_comments post.ext_num_comments = num_comments def extract_post_date(post): if not post.create_date: try: post_data_result = fetch_blog_posts_date(post.url) if post_data_result['date_published'] is not None: post.create_date = post_data_result['date_published'] if post_data_result['title'] is not None: post.title = post_data_result['title'] except Exception as e: # helpers.send_admin_email_via_mailsnake( # "Date fetcher crashed for %r" % post.url, # "Please check out this Post.id=%r Post.url=%r\n Exception: %s" % (post.id, post.url, e) # ) logger.error( 'Date fetcher has crashed. Post ID and url provided.' % self.request.retries, exc_info=1, extra={ 'post_id': post.id, 'post_url': post.url, 'exception': e }) origin_analytics = PostAnalytics.objects.get(id=post_analytics_id) if influencer_id is not None: influencer = Influencer.objects.get(id=influencer_id) else: influencer = None collection_id = origin_analytics.collection_id if influencer is not None and origin_analytics.post_type not in [ None, 'Blog' ]: post, was_created = get_social_post(origin_analytics, influencer) else: post, was_created = get_post(origin_analytics) if not post: helpers.send_admin_email_via_mailsnake( "Post Analytics: No valid post found %r" % url, "During our post analytics, we didn't find a post for this url=%r" % (url)) print("No post was found for %r, so returning." % origin_analytics.post_url) return print("Using post: %r for URL: %r" % (post, origin_analytics.post_url)) post_exists = PostAnalytics.objects.filter( collection_id=collection_id, post=post).exclude(post_url=origin_analytics.post_url).exists() if post_exists: print '* PA with post={} and another post_url exists, removing current one.'.format( post.id) origin_analytics.delete() return origin_analytics.post = post origin_analytics.save() # BE CAREFUL IF YOU MODIFY ANY OF THE POST ANALYTICS FROM THIS LIST !!! if post.platform: analytics_for_url = PostAnalytics.objects.filter(post=post) post_type = post.platform.platform_name if post_type in Platform.BLOG_PLATFORMS: post_type = 'Blog' analytics_for_url.update(post_type=post_type) origin_analytics = PostAnalytics.objects.get(id=origin_analytics.id) else: analytics_for_url = PostAnalytics.objects.filter( post_url__iexact=origin_analytics.post_url) analytics_for_url = list(analytics_for_url) if post.post_image is None: # upload_post_image_task.apply_async([post.id], queue='celery') upload_post_image(post) post = Posts.objects.get(id=post.id) if origin_analytics.post_type in ['Blog', None]: extract_post_comments(post, analytics_for_url) extract_post_date(post) post.save() else: try: print '* Start fetching post interactions.' f = fetcher.fetcher_for_platform(post.platform) f.fetch_post_interactions([post]) except Exception: helpers.send_admin_email_via_mailsnake( "'connect_url_to_post' exception during post interactions fetching (Post={})" .format(post.id), '<br />'.join(traceback.format_exc().splitlines())) def get_number_of_shares(pa): if pa.post_type == 'Facebook': return pa.post.engagement_media_numfbshares elif pa.post_type == 'Twitter': return pa.post.engagement_media_numretweets elif pa.post_type == 'Pinterest': return pa.post.engagement_media_numrepins else: return sum([ pa.count_tweets or 0, pa.count_fb_shares or 0, pa.count_fb_likes or 0, pa.count_fb_comments or 0, pa.count_gplus_plusone or 0, pa.count_pins or 0, ]) origin_analytics.count_video_impressions = post.impressions origin_analytics.count_likes = post.engagement_media_numlikes origin_analytics.post_comments = post.ext_num_comments origin_analytics.count_shares = get_number_of_shares(origin_analytics) origin_analytics.save() contract = connect_post_analytics_to_contract(origin_analytics, False) for analytics in analytics_for_url: if analytics.post is None: analytics.post = post analytics.post_found = True # if analytics.post_comments is None: # analytics.post_comments = post.engagement_media_numcomments if contract and analytics.contract_id is None and analytics.collection_id == collection_id: analytics.contract_id = contract.id analytics.save() print("Collection_id=%r" % collection_id)
def handle_events(events_data, to_save=True, events_backup=None): from debra.models import (MailProxyMessage, InfluencerJobMapping, MandrillEvent) from debra import helpers log.info("Got %d evnets data units" % len(events_data)) if len(events_data) == 0: log.info("Returning") return if events_backup is None: events_backup = [None] * len(events_data) event_2_type = { "send": MailProxyMessage.TYPE_SEND, "open": MailProxyMessage.TYPE_OPEN, "click": MailProxyMessage.TYPE_CLICK, "spam": MailProxyMessage.TYPE_SPAM, "hard_bounce": MailProxyMessage.TYPE_BOUNCE, } log.info("Got {} events".format(len(events_data))) events_backup = [ y for x, y in zip(events_data, events_backup) if not exclude_from_events(x) ] events_data = [x for x in events_data if not exclude_from_events(x)] log.info("Got {} events after filtering".format(len(events_data))) if len(events_data) == 0: log.info("Returning as nothing needs to be done") return mandrill_ids = [x.get('_id') for x in events_data if x.get('_id')] log.info("Getting corresponding messages from DB for %r" % mandrill_ids) messages_from_db = list( MailProxyMessage.objects.filter( mandrill_id__in=mandrill_ids, type=MailProxyMessage.TYPE_EMAIL).values_list( 'mandrill_id', 'thread')) mandrill_id_2_thread = dict(messages_from_db) log.info("Mandrill_id_2_thread: %r" % mandrill_id_2_thread) log.info("Performing checks...") # check #1 for thread uniqueness thread_counts = Counter( [mandrill_id for mandrill_id, _ in messages_from_db]) if any([x > 1 for x in thread_counts.values()]): log.info("[ADMIN NOTIFICATION] Thread uniqueness check") helpers.send_admin_email_via_mailsnake( "mandrill_id was found in multiple mailboxes", "('mandrill_id', 'thread_id') pairs: {},\n counts: {}".format( messages_from_db, thread_counts)) # check #2 for missing messages in DB diff = set(mandrill_ids) - set( [mandrill_id for mandrill_id, _ in messages_from_db]) if diff: log.info("[ADMIN NOTIFICATION] Missing messages in DB check") helpers.send_admin_email_via_mailsnake( "Mandrill events fired for missing emails", str(diff)) events_for_save = [] for n, (data, backup) in enumerate(zip(events_data, events_backup)): mandrill_id = data.get('_id') events_for_save.append( MailProxyMessage(thread_id=mandrill_id_2_thread.get(mandrill_id), msg=json.dumps(data), ts=datetime.datetime.utcfromtimestamp( int(data.get('ts'))), direction=MailProxyMessage.DIRECTION_NONE, type=event_2_type.get(data.get('event')), mandrill_id=mandrill_id)) if backup is None: log.info( "No backup found for #{} event, creating one...".format(n)) events_backup[n] = MandrillEvent.objects.create( data=data, type=MandrillEvent.TYPE_EVENT) log.info("Event found MandrillID: {} ThreadID: {}".format( mandrill_id, mandrill_id_2_thread.get(mandrill_id))) if not to_save: log.info("to_save == False, so not creating events data in DB") else: # save to DB log.info("Saving events data to DB") _zipped = zip(events_for_save, events_backup) for n, (event, backup) in enumerate(_zipped): try: event.save() except Exception: log.info("Failed to save event #{}".format(n)) log.info("Saved backup.id %s to STATUS_SAVED" % backup.id) backup.status = MandrillEvent.STATUS_SAVED backup.save() # MailProxyMessage.objects.bulk_create(events_for_save) # handle 'open' and 'click' events opened_and_clicked_emails = [ x.get('_id') for x in events_data if x.get('event') in ('open', 'click') ] log.info("Got {} 'open' and 'click' events".format( len(opened_and_clicked_emails))) opened_and_clicked_threads = set( [mandrill_id_2_thread.get(x) for x in opened_and_clicked_emails]) # save notification to mongoDB # log.info("Saving brand notifications to mongoDB...") # for msg in opened_and_clicked_emails_from_db: # if msg.direction == MailProxyMessage.DIRECTION_BRAND_2_INFLUENCER: # notification = { # 'text': "%s opened email on %s UTC" % ( # msg.thread.influencer.name, # msg.ts.strftime("%b. %e, %Y - %H:%M")), # 'thread': msg.thread_id # } # notify_brand(msg.thread.brand_id, "mail", notification) log.info("Getting associated campaigns...") job_mappings = InfluencerJobMapping.objects.filter( status=InfluencerJobMapping.STATUS_INVITED, mailbox__in=opened_and_clicked_threads, ) log.info("Should change campaigns statuses to RECEIVED: {}".format( job_mappings.values_list('id', flat=True))) if not to_save: log.info("to_save == False, so not changing campaign statuses DB") else: # save to DB log.info("Saving changes data to campaign statuses in DB") job_mappings.update(status=InfluencerJobMapping.STATUS_EMAIL_RECEIVED) for backup in events_backup: log.info("Saved backup.id %s to STATUS_PROCESSED" % backup.id) backup.status = MandrillEvent.STATUS_PROCESSED backup.save() print("Sleeping for 10s") time.sleep(10)