def create_influencer_from_bad_brands(brand, to_save=True): ''' This method creates influencers from Brands whose domains contain blogger urls. Example: blogspot = Brands.objects.filter(domain_name__icontains='blogspot.") blogspot.update(blacklisted=True) for b in blogspot: create_influencer_from_bad_brands(b, True) Double checks: this function should be called only for those Brands that have not been passed through this function we shouldn't run this for brands with domain_name in 'tumblr.com', because these influencer could have a separate blog (say on blogspot.com) and then we will have duplicates ''' with platformutils.OpRecorder(operation='import_from_bad_brand', brand=brand) as opr: url = brand.domain_name domain = utils.domain_from_url(url) if domain in BLACKLISTED_DOMAINS: log.info('Domain %r is blacklisted', domain) return inf = helpers.create_influencer_and_blog_platform( url, 'discovered_from_brands', to_save, platform_name_fallback=True) if not inf: log.error('Blacklisted url: %r', url) if inf and inf.id is not None: opr.data = {'inf_id_created': [inf.id]} else: opr.data = {'inf_cnt_skipped': 1}
def _prepare_test_influencer(self, op='created_for_testing'): infs = models.Influencer.objects.filter(relevant_to_fashion=True, show_on_search=False, source__isnull=False, classification='blog', blacklisted=False).\ exclude(validated_on__contains=constants.ADMIN_TABLE_INFLUENCER_INFORMATIONS).\ exclude(validated_on__contains=constants.ADMIN_TABLE_INFLUENCER_SELF_MODIFIED).\ order_by('-id') log.info('%d infs', infs.count()) assert infs.exists() inf_to_recreate = infs[0] self.orig_inf = inf_to_recreate log.info('Recreating influencer %r', inf_to_recreate) orig_blog_url = inf_to_recreate.blog_url orig_source = inf_to_recreate.source self._disable_inf(inf_to_recreate) self.inf = helpers.create_influencer_and_blog_platform( orig_blog_url, orig_source, to_save=True, platform_name_fallback=True) assert self.inf is not None with platformutils.OpRecorder(operation=op, influencer=self.inf) as opr: opr.data = {'source_influencer_id': inf_to_recreate.id} log.info('New influencer for testing: %r', self.inf)
def search_infs_using_preloaded_urls(queries, pages=20): for q in queries: try: urls = collect_urls_from_google(q, pages) except: log.exception( 'While collect_urls_from_google(%r), going to the next query', q) continue print "Got urls: %s" % urls return for url in urls: try: if utils.domain_from_url( url) in import_from_blog_post.exclude_domains_set: log.warn('%r is blacklisted', url) continue dups = models.Influencer.find_duplicates(url) log.info('%r dups: %s', url, dups) if not dups: log.info('YES_CREATE %r', url) new_inf = helpers.create_influencer_and_blog_platform( url, 'google', platform_name_fallback=True) log.info('Created influencer: %r', new_inf) else: log.info('NO_CREATE %r', url) except: log.exception('While processing url %r, skipping', url)
def find_and_connect_user_to_influencer(user_prof, to_save=True, **kwargs): """ This method connects a userprofile with an influencer object and updates this data in intercom. Sending an email to admins in case of errors. *param: user_profile *return: None """ from debra.models import Influencer from debra.helpers import create_influencer_and_blog_platform, send_admin_email_via_mailsnake from platformdatafetcher import platformutils, postprocessing blog_url = user_prof.blog_page influencer = create_influencer_and_blog_platform(blog_url, 'blogger_signup', to_save, False) log.info("Found %r possible influencer for profile [%s %s]" % (influencer, user_prof.user, user_prof.blog_page)) if not influencer: log.info("No influencer found for User_prof_id: %s" % (user_prof.id, )) send_admin_email_via_mailsnake("No influencer found for user", "User_prof_id: %s" % (user_prof.id, )) user_prof.error_when_connecting_to_influencer = "NO INFLUENCERS" else: log.info("Found %s influencer for signed up user %s" % (influencer, user_prof)) influencer.name = user_prof.name influencer.email_for_advertising_or_collaborations = user_prof.user.email influencer.email = user_prof.user.email user_prof.influencer = influencer influencer.shelf_user = user_prof.user influencer.append_source('blogger_signup') log.info("Done connecting User: [%s, %s] with Influencer: [%s, %s]" % (user_prof.blog_page, user_prof.user.email, influencer.email_for_advertising_or_collaborations, influencer.blog_url)) if to_save: user_prof.save() if influencer: influencer.save() user_prof.update_intercom() # if influencer is showing on search, their profile must be ok, so invite them if influencer.show_on_search and not influencer.ready_to_invite: influencer.ready_to_invite = True influencer.save() user_prof.update_intercom() # if they have been already qa-ed, invite them elif influencer.validated_on and 'info' in influencer.validated_on and not influencer.ready_to_invite: influencer.ready_to_invite = True influencer.save() user_prof.update_intercom() # now, if this influencer is not validated or not showing on search else: # issue the complete processing postprocessing.process_new_influencer_sequentially( influencer.id, assume_blog=True) check_user_prof_influencer_connectivity(user_prof.id)
def _start_processing(self): self.inf = helpers.create_influencer_and_blog_platform( TEST_BLOG_URL, INF_SOURCE, to_save=True, platform_name_fallback=True) self.processing_thread = threading.Thread(target=self._do_processing) self.processing_thread.start()
def import_network_bloggers(filename): with open(filename, 'rb') as f: lines = f.readlines()[1:] reader = csv.DictReader(lines, ('unusual', 'blog_name', 'url', 'persons_name', 'location', 'source', 'description')) blogger_type = os.path.basename(filename).split('.')[0].split(' - ')[1] log.info('blogger_type: %r', blogger_type) for row in reader: try: log.info('row: %r', row) if not row['url'].startswith('http'): log.warn('Skipping row with invalid url %r', row['url']) continue source = utils.domain_from_url(row['source']) if not source.strip(): log.warn('Skipping row with no source') continue if not row['url'].strip(): log.warn('Skipping row with no url') continue inf = helpers.create_influencer_and_blog_platform( row['url'], source, to_save=True, platform_name_fallback=True) if not inf: log.warn('Skipping blacklisted url') continue if not inf.is_enabled_for_automated_edits(): log.warn( 'Influencer is not enabled for automated edits, skipping') continue inf.blogname = row['blog_name'] inf.blogger_type = blogger_type inf.name = row['persons_name'] inf.demographics_location = row['location'] inf.description = row['description'] log.info( 'source, blogname, name, location, description: %r, %r, %r, %r, %r', inf.source, inf.blogname, inf.name, inf.demographics_location, inf.description[:100]) inf.save() # update blogname for blog platform blog_pl_q = inf.platform_set.filter(url=row['url']) if blog_pl_q.exists(): blog_pl = blog_pl_q[0] log.info('Updating blogname of %r', blog_pl) blog_pl.blogname = row['blog_name'] blog_pl.save() except: log.exception('While processing %s, skipping', row)
def import_from_blog_url(follower_id, to_save=True): follower = models.Follower.objects.get(id=follower_id) with platformutils.OpRecorder(operation='import_from_pi', follower=follower) as opr: url = utils.url_without_path(follower.url) log.info('Will check url %r', url) if any(invalid_s in url for invalid_s in ('@', '(', '..')): log.warn('Invalid follower url: %r', url) return log.info('import_from_blog_url runs for follower %r', follower) url = utils.resolve_http_redirect(url) domain = utils.domain_from_url(url) if domain in BLACKLISTED_DOMAINS: log.info('Domain %r is blacklisted', domain) return inf = helpers.create_influencer_and_blog_platform( url, 'comments_import', to_save) if not inf: log.error('Blacklisted url: %r', url) if inf and inf.id is not None: opr.data = {'inf_id_created': [inf.id]} else: opr.data = {'inf_cnt_skipped': 1}
def create_post(url): blog_url = utils.post_to_blog_url(url) inf = helpers.create_influencer_and_blog_platform( blog_url, 'import_from_post_analytics', True, True) if inf: platform = inf.blog_platform print("Inf.validated_on: %r" % inf.validated_on) if not inf.validated_on or not 'info' in inf.validated_on: # it's not QA-ed yet, so let's process this sequentially postprocessing.process_new_influencer_sequentially( inf.id, True) # at this point, we should have data for the influencer # now, let's check if got the post # post = Posts.objects.filter(platform=platform, url__iexact=url) # print("Got post: %r" % post) # if post.exists(): # return post[0] post = find_post_by_url(url, True, platform=platform) if post is None: # here we just create a quick post artifically (ideally we should have fetched this post) post = Posts.objects.create(platform=platform, influencer=inf, show_on_search=inf.show_on_search, url=url) return post print("No valid influencer found") helpers.send_admin_email_via_mailsnake( "Post Analytics: No valid influencer found %r" % url, "During our post analytics, we didn't find an influencer for this Post.url=%r" % (url)) return None
def search_infs_by_giveaways(pages=20): brands = models.Brands.objects.filter(supported=True).order_by('id')[12:13] for brand in brands: for q in GOOGLE_QUERIES: q = q.format(brand=brand) log.info('Searching: %r', q) try: with xbrowser.XBrowser(headless_display=settings. AUTOCREATE_HEADLESS_DISPLAY) as xb: g = GoogleScraper(xb) it = g.search(q, pages) for results in it: for url in results: try: if utils.domain_from_url( url ) in import_from_blog_post.exclude_domains_set: log.warn('%r is blacklisted', url) continue dups = models.Influencer.find_duplicates(url) log.info('%r dups: %s', url, dups) if not dups: log.info('YES_CREATE %r', url) new_inf = helpers.create_influencer_and_blog_platform( url, 'google', platform_name_fallback=True) log.info('Created influencer: %r', new_inf) else: log.info('NO_CREATE %r', url) except: log.exception( 'While processing url %r, skipping', url) except Exception as e: log.exception('For brand %r got exception: %s' % (brand, e), extra={'pages': pages})
def create_influencer_from_instagram(profile_id, to_save): profile = InstagramProfile.objects.get(id=profile_id) existing_infs, valid_urls = find_matching_influencers_for_profile(profile) # We don't handle the case when there're matching influencers if existing_infs: return False, existing_infs ''' algorithm: 1. Create an influencer with a fake blog url 2. Then create a platform object for each of the platforms that we're able to discover - It could be a youtube or facebook or pinterest or twitter - Mark all these platforms as autovalidated - Use these platforms to discover other related platforms - These should be automatically validated also - Issue fetch tasks for these automatically validated platforms 3. Extract email if given ''' plats = [] # creating a unique influencer blog url that is concurrency-safe blog_url = 'http://www.theshelf.com/artificial_blog/{}.html'.format( int(time.time())) inf = helpers.create_influencer_and_blog_platform( blog_url, influencer_source='discovered_via_instagram', to_save=to_save, platform_name_fallback=True) log.info('Influencer object %s created/fetched.', inf.id) if to_save: inf.save() _ = PlatformDataOp.objects.create( influencer=inf, operation='inf_articial_blog_from_instagram_crawl') for valid_url in valid_urls: platform = create_platform_for_influencer(url=valid_url, inf=inf, profile=profile, to_save=to_save) if not platform: continue if to_save: field_name = Influencer.platform_name_to_field[ platform.platform_name] admin_helpers.handle_social_handle_updates(inf, field_name, platform.url) plats.append(( platform, 'discovered_via_instagram', )) log.debug('After performing all urls, insta_url is: %s', inf.insta_url) # now, using the created platforms, see if we can create new platforms platformextractor.do_further_validation_using_validated_platforms( plats, []) log.debug('After do_further_validation, insta_url is: %s', inf.insta_url) profile.discovered_influencer = inf if to_save: profile.valid_influencer = True profile.save() for platform, _ in plats: fetchertasks.fetch_platform_data.apply_async( [ platform.id, ], queue='new_influencer') log.debug('Finally Influencer has insta_url: %s', inf.insta_url) log.debug( ('And finally, profile with id %s should have discovered influencer ' 'with id: %s (to_save is %s)'), profile.id, inf.id, to_save) # Here we are fetching email, blogname, name, locations from platforms get_influencers_email_name_location_for_profile(profile_id, to_save=to_save) # TODO: links to other platforms using @ sign or just like (snapchat: blah) return True, inf
def detect_influencer(self): """ Detects influencer according to the diagram :return: Influencer Id """ self.report_data = dict() # checking if this profile has been performed before (if it has any IC_* actual tags) tags = self.profile.tags.split() if any(t in self.TAGS for t in tags): # looks like this profile was already performed, skipping it return 'already_preformed' # removing existing discovered_influencer if any presents present_influencer = self.profile.discovered_influencer if present_influencer is not None: self.profile.discovered_influencer = None if self.save is True: self.profile.save() # Getting profile's discovered platform ids existing_platform_ids = self.profile.get_platform_ids_detected() non_social_urls = self.profile.get_non_social_urls_detected() log.info('Detecting influencer for InstagramProfile %s ...' % self.profile.id) self.report_data['profile_id'] = self.profile.id self.report_data['existing_platform_ids_qty'] = len( existing_platform_ids) self.report_data['non_social_urls_qty'] = len(non_social_urls) if len(existing_platform_ids) >= 1: log.info('Found %s platform ids' % len(existing_platform_ids)) # There are at least 1 discovered existing platform for this Profile # fetching all platforms except those with url_not_found=True # UPDATE: and then detecting influencers of these platforms. If there is only one influencer - using it active_plats = Platform.objects.filter( id__in=existing_platform_ids).exclude(url_not_found=True) active_influencers_ids = set() for p in active_plats: if p.influencer is not None: active_influencers_ids.add(p.influencer.id) active_influencers_ids = list(active_influencers_ids) self.report_data['active_influencers_ids'] = active_influencers_ids log.info( 'Found %s existing platforms with %s distinctive influencers' % (len(existing_platform_ids), len(active_influencers_ids))) if len(active_influencers_ids) == 1: # Great! Only platforms with one distinctive influencers found, working with it: adding this # influencer to collection, connecting it to InstagramProfile log.info( 'Found 1 influencer (%s), setting IC_one_inf_found tag, setting ' 'influencer to InstagramProfile' % active_influencers_ids[0]) candidate_influencer = Influencer.objects.get( id=active_influencers_ids[0]) if candidate_influencer.blog_url is not None and candidate_influencer.blog_url.startswith( 'http://www.theshelf.com/artificial_blog/'): inf = Influencer.objects.get(id=active_influencers_ids[0]) # TODO: connecting existing artificial influencer? self.profile.discovered_influencer = candidate_influencer if self.save is True: self.profile.save() self.add_influencer_to_discovered_collection( candidate_influencer) self.profile.append_mutual_exclusive_tag( 'IC_one_artificial_inf_found', self.TAGS + self.obsolete_tags) self.report_data[ 'result'] = 'One existing influencer found (artificial/osos): %s (osos: %s / sos: %s)' % ( active_influencers_ids[0], inf.old_show_on_search, inf.show_on_search, ) return 'IC_one_artificial_inf_found' else: self.profile.discovered_influencer = candidate_influencer if self.save is True: self.profile.save() self.add_influencer_to_discovered_collection( candidate_influencer) self.profile.append_mutual_exclusive_tag( 'IC_one_inf_found', self.TAGS + self.obsolete_tags) self.report_data['result'] = 'One existing influencer found and set to ' \ 'profile (non-artificial, non-osos): %s (osos: %s / sos: %s)' % ( active_influencers_ids[0], candidate_influencer.old_show_on_search, candidate_influencer.show_on_search, ) return 'IC_one_inf_found' elif len(active_influencers_ids) > 1: # We discovered more than one active platforms with more than one distinctive influencers. log.info( 'Found more than 1 platform with more than 1 distinctive ' 'Influencers, setting tag IC_many_plats_found') # self.profile.append_mutual_exclusive_tag('IC_many_infs_found', self.TAGS) infs = Influencer.objects.filter( id__in=active_influencers_ids, old_show_on_search=True).exclude(blacklisted=True) if infs.count() == 0: # None found, we pick the best _select_influencer_to_stay(), # connect to the profile and add to the collection active_infs = Influencer.objects.filter( id__in=active_influencers_ids) best_one = active_infs[0]._select_influencer_to_stay( list(active_infs)) self.profile.discovered_influencer = best_one if self.save is True: self.profile.save() # self.add_influencer_to_discovered_collection(best_one) self.profile.append_mutual_exclusive_tag( 'IC_best_from_several', self.TAGS + self.obsolete_tags) several_infs = [ "%s (osos: %s / sos: %s)" % (inf.id, inf.old_show_on_search, inf.show_on_search) for inf in active_infs ] self.report_data['result'] = 'Several existing influencers found (no osos=True): %s , ' \ 'taken best of them: %s (osos: %s / sos: %s)' % ( several_infs, best_one.id, best_one.old_show_on_search, best_one.show_on_search ) return 'IC_best_from_several' elif infs.count() == 1: # One Influencer with old_show_on_search=True found, using it candidate_influencer = infs[0] self.profile.discovered_influencer = candidate_influencer if self.save is True: self.profile.save() # self.add_influencer_to_discovered_collection(candidate_influencer) self.profile.append_mutual_exclusive_tag( 'IC_one_from_several', self.TAGS + self.obsolete_tags) several_infs = [ "%s (osos: %s / sos: %s)" % (inf.id, inf.old_show_on_search, inf.show_on_search) for inf in infs ] self.report_data['result'] = 'Several existing influencers found: %s , taken ' \ 'one of them with osos=True: %s (osos: %s / sos: %s)' % ( several_infs, candidate_influencer.id, candidate_influencer.old_show_on_search, candidate_influencer.show_on_search, ) return 'IC_one_from_several' else: # Multiple found - adding these to collection of duplicates if self.save is True: self.add_influencers_to_duplicates_collection( influencers=infs) self.profile.append_mutual_exclusive_tag( 'IC_many_infs_found', self.TAGS + self.obsolete_tags) self.report_data['result'] = 'Several existing influencers found: %s, taken those with osos=True ' \ 'and putting them to duplicates collection.' % [ "%s (osos: %s / sos: %s)" % (inf.id, inf.old_show_on_search, inf.show_on_search) for inf in infs ] return 'IC_many_infs_found' # There are 0 discovered platforms, checking with non-social urls if len(non_social_urls) == 0: # Creating influencer with artificial url, adding it to collection, connecting it to the profile log.info( 'No non-social urls found, creating artificial Influencer and adding it to the profile' ) count_str = '%s' % (int(time.time())) blog_url = 'http://www.theshelf.com/artificial_blog/%s.html' % count_str inf = create_influencer_and_blog_platform( blog_url, influencer_source='discovered_via_instagram', to_save=True, platform_name_fallback=True) self.profile.discovered_influencer = inf if self.save is True: self.profile.save() # TODO: Should we create here an instagram platform too? self.add_influencer_to_discovered_collection(inf) self.profile.append_mutual_exclusive_tag( 'IC_artificial_inf_created', self.TAGS + self.obsolete_tags) log.info('Adding IC_artificial_inf_created tag') self.report_data['result'] = 'No social/non-social platforms found - creating ' \ 'artificial Influencer: %s (osos: %s / sos: %s).' % (inf.id, inf.old_show_on_search, inf.show_on_search) return 'IC_artificial_inf_created' else: # There are some non-social urls -- checking if there are unique non-social urls # Special shortcut: if non-social urls contain liketoknow.it url. If this url is found, then using it as a # blog url for this future influencer from platformdatafetcher.producturlsextractor import get_blog_url_from_liketoknowit # NEW logic to check for bloggy urls log.info( '%s non-social urls found: %s, trying to find unique root domains' % (len(non_social_urls), non_social_urls)) blog_urls_found = [] from platformdatafetcher.platformextractor import collect_social_urls_from_blog_url, \ substitute_instagram_post_urls # detecting if any of non-social urls are blogs with xbrowsermod.XBrowser( headless_display=settings.AUTOCREATE_HEADLESS_DISPLAY, load_no_images=True, disable_cleanup=False, timeout=60) as xb: # social urls chunks, we need to prepare social urls into detectable chunks like 'www-less domain/path' social_chunks = [] for url in self.profile.get_social_urls_detected(): parsed = urlparse(url) chunk = '%s%s' % (parsed.netloc[4:] if parsed.netloc.startswith('www.') else parsed.netloc, parsed.path) chunk = chunk.strip('/') if chunk not in social_chunks: social_chunks.append(chunk) log.info('Social url fragments for searching: %s' % social_chunks) # detecting if any found socials in there non_social_urls = self.profile.get_non_social_urls_detected() unique_root_domains = self.get_unique_root_domains( non_social_urls) for k in unique_root_domains.keys(): non_social_url_start = unique_root_domains[k][0] # checking if this url is a good liketoknow.it url and blog url can be retrieved: parsed = urlparse(non_social_url_start) # checking if domain is liketoknow.it if parsed.netloc.lower().strip().replace('www.', '', 1) == 'liketoknow.it' and \ parsed.path.lower().strip('/').strip() not in ['', 'login']: log.info( 'Liketoknow.it url detected: %r , trying to get its blog url' % non_social_url_start) # looks like it is a good liketoknow.it url, getting blog url blog_url = get_blog_url_from_liketoknowit( non_social_url_start, xb) if blog_url is not None: log.info( 'Blog url detected successfully: %r , considering it a good blog url' % blog_url) # adding it to blog_urls detected if blog_url not in blog_urls_found: blog_urls_found.append(blog_url) else: log.info('Blog url %r is already detected' % blog_url) else: log.info('Blog url was not detected') else: is_blog_url, non_social_url = self.is_url_a_blog( non_social_url_start, self.profile) log.info('Checking if %r is a blog:' % non_social_url) if is_blog_url is True and non_social_url is not None: log.info('Perfect, %r is a blog' % non_social_url) socials_detected = [] found_soc_urls = defaultdict(list) collect_social_urls_from_blog_url( xb=xb, by_pname=found_soc_urls, platform=None, non_social_url=non_social_url) substitute_instagram_post_urls(found_soc_urls) log.info('SOCIAL URLS COLLECTED: %s' % found_soc_urls) # if no social urls were collected, we're checking if this non-social url has # social urls in any form with regexps by its content and iframes. if len(found_soc_urls) == 0: scraped_social_urls = collect_any_social_urls( xb=xb, non_social_url=non_social_url) log.info( 'Thorough search found %s candidate social urls ' 'to check' % len(scraped_social_urls)) found_soc_urls[ 'Bruteforce'] = scraped_social_urls # found_socials is in format {'Instagram': ['url1', 'url2',...], 'Facebook': [...], ...} for social_url_lst in found_soc_urls.values(): for social_url in social_url_lst: if any([ sc.lower() in social_url.lower() for sc in social_chunks ]): # we found one of social chunks in detected social url if social_url not in socials_detected: socials_detected.append(social_url) log.info('Positively matched social urls: %s' % socials_detected) # if we found some matching social urls - then it is a blog url, TA-DAAAA! if len(socials_detected) > 0: if non_social_url not in blog_urls_found: # TODO: should we use here self.is_url_a_blog(url, self.profile) for extra blog check? blog_urls_found.append(non_social_url) log.info( 'Considering url %r to be a blog url for this profile' % non_social_url) else: log.info( 'Url %r considered as non-blog url or is unreachable' % non_social_url_start) if len(blog_urls_found) == 1: # we found 1 blog url log.info('Looks like it is a new single blog url!') self.report_data['unique_root_domain_is_blog'] = True # Here we have found 0 existing platforms, but we detected that a single non-social url # is a BLOG. So we create a blog platform with this url, creating an influencer, connecting # this blog platform to this influencer and connecting the influencer to the profile. # creating new blog platform inf = create_influencer_and_blog_platform( blog_url=blog_urls_found[0], influencer_source='ic_from_insta_profile', to_save=self.save, platform_name_fallback=True) self.profile.discovered_influencer = inf log.info('A new influencer has been created: %s' % inf) if self.save is True: self.profile.save() self.add_influencer_to_discovered_collection(inf) self.profile.append_mutual_exclusive_tag( 'IC_new_blog_new_inf', self.TAGS + self.obsolete_tags) self.report_data['result'] = 'New influencer %s (osos: %s / sos: %s) created by single ' \ 'non-social blog platform' % (inf.id, inf.old_show_on_search, inf.show_on_search) return 'IC_new_blog_new_inf' elif len(blog_urls_found) == 0: # if none found to be a blog # => check if the length of the url > 20 chars (typically identifies as a # product) => then this profile needs to be fetched again later # => create a new field "date_to_fetch_later" in InstagramProfile and update this field # with today+10 days later # => need to create a celery task that checks if today is the day when they should be # re-fetched and then clears up this date_to_fetch_later to None # => after fetching the profile, compare the old url and description with new one, check # if it's different, then pass it to the same pipeline as it was originally part of log.info('No blog urls were detected within non_social_urls') # TODO: what should we do if this already has date_to_fetch_later != None ? long_url = False for non_social_url in non_social_urls: if len(non_social_url) > 20: self.profile.date_to_fetch_later = datetime.now( ) + timedelta(days=10) if self.save is True: self.profile.save() long_url = True break if long_url is True: self.report_data[ 'result'] = 'No blog urls were found, retrying in 10 days' return '10_days_later' else: # TODO: What should we do here, should we create an artificial url? if self.save is True: self.profile.append_mutual_exclusive_tag( 'IC_possible_brand', self.TAGS + self.obsolete_tags) self.report_data[ 'result'] = 'Profile considered to be possibly a brand.' return 'IC_possible_brand' else: # TODO: Skipping for now... log.info( 'We found many non-social blog domains, setting IC_many_nonsocial_found tag:' % blog_urls_found) if self.save is True: self.profile.append_mutual_exclusive_tag( 'IC_many_nonsocial_found', self.TAGS + self.obsolete_tags) self.report_data[ 'result'] = 'Multiple unique root domains found, skipped for now' return 'IC_many_nonsocial_found'