def check_moz_domain(m, cols, wait_time): """ Calls the Moz API for the url of the given URLMetric. Args: m (URLMetrics): The metrics object whose query_url to use in the call. cols (list): A list of fields the call should return. See the URLMetrics class for more details. wait_time (int): Number of seconds to wait before releasing the API lock after the call has returned. """ lock = MozAPILock() lock.acquire() params = AdminSetting.get_moz_params() params.append(('Cols', cols)) try: r = requests.get(AdminSetting.get_moz_api_url()+'url-metrics/'+m.query_url, params=params) rtext = r.text if r.status_code == 200: # Retrieve the JSON result rd = json.loads(rtext) # Store the fields in the URLMetrics object m.store_result(rd) # Update the status of the URLMetrics m.last_updated = timezone.now() m.save() r.close() print u'Done with %s, waiting (new)...' % m.query_url # Wait the specified time time.sleep(wait_time) except Exception as e: lock.release() raise lock.release()
def check_moz_update(): """ Calls the Moz API to determine when the Moz data was last updated. This information is recorded and used to determine whether URLMetrics are still up to date. """ if settings.DEBUG: logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) requests_log = logging.getLogger(u'requests.packages.urllib3') requests_log.setLevel(logging.DEBUG) requests_log.propagate = True params = AdminSetting.get_moz_params() r = requests.get(AdminSetting.get_moz_api_url() + 'metadata/last_update.json', params=params) status = r.status_code print status if status == 200: rtext = r.text print rtext rd = json.loads(rtext) mu = MozLastUpdate() mu.datetime = timezone.make_aware( datetime.datetime.fromtimestamp(int(rd['last_update'])), timezone.get_current_timezone()) mu.retrieved = timezone.now() mu.save() r.close()
def check_moz_domain(m, cols, wait_time): """ Calls the Moz API for the url of the given URLMetric. Args: m (URLMetrics): The metrics object whose query_url to use in the call. cols (list): A list of fields the call should return. See the URLMetrics class for more details. wait_time (int): Number of seconds to wait before releasing the API lock after the call has returned. """ lock = MozAPILock() lock.acquire() params = AdminSetting.get_moz_params() params.append(('Cols', cols)) try: r = requests.get(AdminSetting.get_moz_api_url() + 'url-metrics/' + m.query_url, params=params) rtext = r.text if r.status_code == 200: # Retrieve the JSON result rd = json.loads(rtext) # Store the fields in the URLMetrics object m.store_result(rd) # Update the status of the URLMetrics m.last_updated = timezone.now() m.save() r.close() print u'Done with %s, waiting (new)...' % m.query_url # Wait the specified time time.sleep(wait_time) except Exception as e: lock.release() raise lock.release()
def update_tlds(): """ Calls the Namecheap API to update the list of recognized and registerable top-level domains. This is currently initiated manually via the administration panel. """ params = AdminSetting.get_api_params() params.append((u'Command', u'namecheap.domains.gettldlist')) r = requests.get(AdminSetting.get_api_url(), params=params) rtext = r.text send_mail( u'Domain Checker - TLD Update', u'The following response was received from the TLD update (using %s):\n\n%s' % (AdminSetting.get_api_url(), rtext), AdminSetting.get_value(u'noreply_address'), [AdminSetting.get_value(u'admin_address')]) parser = etree.XMLParser(encoding=u'utf-8') header_len = len('<?xml version="1.0" encoding="utf-8"?>') rtext = rtext[header_len:] rtree = etree.fromstring(rtext, parser=parser) rels = rtree.findall( u'./{http://api.namecheap.com/xml.response}CommandResponse/{http://api.namecheap.com/xml.response}Tlds/{http://api.namecheap.com/xml.response}Tld' ) rels = dict([(r.attrib[u'Name'], r) for r in rels]) tlds = TLD.objects.all() with transaction.atomic(): for tld in tlds: if tld.domain in rels.keys(): rel = rels[tld.domain] tld.is_recognized = True tld.is_api_registerable = ( rel.attrib[u'IsApiRegisterable'] == u'true') tld.description = rel.text tld.type = rel.attrib[u'Type'] else: tld.is_recognized = False tld.is_api_registrable = False tld.type = u'unknown' tld.description = None tld.save() for ncd, rel in rels.items(): if len(TLD.objects.filter(domain=ncd)) == 0: new_tld = TLD(domain=ncd, is_recognized=True, is_api_registerable=( rel.attrib['IsApiRegisterable'] == True), description=rel.text, type=rel.attrib['Type']) new_tld.save() print u'New TLD added: %s' % ncd print u'Finished processing tlds.'
def update_project_metrics(project_id): """ Updates all the URLMetrics associated with the given project id through the Moz API. If the MozRank of a URL is over the set threshold, extension URLs are created and also checked. Args: project_id (int): The ID of the project to update. """ p = UserProject.objects.get(id=project_id) # Retrieve all fields available with free Moz API registration cols = URLMetrics.create_cols_bitflag([ 'Title', 'Canonical URL', 'External Links', 'Links', 'MozRank 10', 'MozRank Raw', 'Subdomain MozRank 10', 'Subdomain MozRank Raw', 'HTTP Status Code', 'Page Authority', 'Domain Authority']) wait_time = AdminSetting.get_moz_api_wait_time() mozrank_extension_threshold = AdminSetting.get_value('mozrank_extension_threshold') associate_project_metrics(p) pmetrics = ProjectMetrics.objects.filter(project=p, is_checked=False) for pm in pmetrics: with transaction.atomic(): if not pm.urlmetrics.is_uptodate(): check_moz_domain(pm.urlmetrics, cols, wait_time) if not pm.is_extension and pm.urlmetrics.mozrank_10 >= mozrank_extension_threshold: extensions = get_extensions(pm.urlmetrics) print u'Getting extensions (%d)' % len(extensions) for ex in extensions: print u' %s' % ex.query_url try: newpm = ProjectMetrics.objects.get(project=p, urlmetrics=ex) except ProjectMetrics.DoesNotExist: newpm = ProjectMetrics(project=p, urlmetrics=ex, is_checked=True, is_extension=True) if not ex.is_uptodate(): print u' Checking extension: %s' % ex.query_url check_moz_domain(ex, cols, wait_time) else: print u' Extension already checked: %s' % ex.query_url newpm.is_checked = True newpm.save() pm.is_checked=True pm.save() p.update_state() p.save()
def update_project_metrics(project_id): """ Updates all the URLMetrics associated with the given project id through the Moz API. If the MozRank of a URL is over the set threshold, extension URLs are created and also checked. Args: project_id (int): The ID of the project to update. """ p = UserProject.objects.get(id=project_id) # Retrieve all fields available with free Moz API registration cols = URLMetrics.create_cols_bitflag([ 'Title', 'Canonical URL', 'External Links', 'Links', 'MozRank 10', 'MozRank Raw', 'Subdomain MozRank 10', 'Subdomain MozRank Raw', 'HTTP Status Code', 'Page Authority', 'Domain Authority' ]) wait_time = AdminSetting.get_moz_api_wait_time() mozrank_extension_threshold = AdminSetting.get_value( 'mozrank_extension_threshold') associate_project_metrics(p) pmetrics = ProjectMetrics.objects.filter(project=p, is_checked=False) for pm in pmetrics: with transaction.atomic(): if not pm.urlmetrics.is_uptodate(): check_moz_domain(pm.urlmetrics, cols, wait_time) if not pm.is_extension and pm.urlmetrics.mozrank_10 >= mozrank_extension_threshold: extensions = get_extensions(pm.urlmetrics) print u'Getting extensions (%d)' % len(extensions) for ex in extensions: print u' %s' % ex.query_url try: newpm = ProjectMetrics.objects.get(project=p, urlmetrics=ex) except ProjectMetrics.DoesNotExist: newpm = ProjectMetrics(project=p, urlmetrics=ex, is_checked=True, is_extension=True) if not ex.is_uptodate(): print u' Checking extension: %s' % ex.query_url check_moz_domain(ex, cols, wait_time) else: print u' Extension already checked: %s' % ex.query_url newpm.is_checked = True newpm.save() pm.is_checked = True pm.save() p.update_state() p.save()
def register_user(request): """ View: Registers a new user. If registrations are disabled, the user is redirected back to the index page. Otherwise a new user is created and authenticated, and the user is then redirected to their new profile page. """ if not AdminSetting.get_value('allow_new_registrations'): return redirect('index') if request.method != 'POST': return redirect('index') username = request.POST['username'] first_name = request.POST['first_name'] last_name = request.POST['last_name'] email = request.POST['email'] password = request.POST['password'] user = User.objects.create_user(username, email, password, first_name=first_name, last_name=last_name) user.save() user = authenticate(username=username, password=password) login(request, user) return redirect('profile')
def index(request): """ View: The root page. """ return render( request, 'main/index.html', { 'allow_new_registrations': AdminSetting.get_value('allow_new_registrations'), })
def update_tlds(): """ Calls the Namecheap API to update the list of recognized and registerable top-level domains. This is currently initiated manually via the administration panel. """ params = AdminSetting.get_api_params() params.append((u'Command', u'namecheap.domains.gettldlist')) r = requests.get(AdminSetting.get_api_url(), params=params) rtext = r.text send_mail(u'Domain Checker - TLD Update', u'The following response was received from the TLD update (using %s):\n\n%s' % (AdminSetting.get_api_url(), rtext), AdminSetting.get_value(u'noreply_address'), [AdminSetting.get_value(u'admin_address')]) parser = etree.XMLParser(encoding=u'utf-8') rtree = etree.fromstring(rtext, parser=parser) rels = rtree.findall(u'./{http://api.namecheap.com/xml.response}CommandResponse/{http://api.namecheap.com/xml.response}Tlds/{http://api.namecheap.com/xml.response}Tld') rels = dict([(r.attrib[u'Name'], r) for r in rels]) tlds = TLD.objects.all() with transaction.atomic(): for tld in tlds: if tld.domain in rels.keys(): rel = rels[tld.domain] tld.is_recognized = True tld.is_api_registerable = (rel.attrib[u'IsApiRegisterable'] == u'true') tld.description = rel.text tld.type = rel.attrib[u'Type'] else: tld.is_recognized = False tld.is_api_registrable = False tld.type = u'unknown' tld.description = None tld.save() for ncd, rel in rels.items(): if len(TLD.objects.filter(domain=ncd)) == 0: new_tld = TLD(domain=ncd, is_recognized=True, is_api_registerable=(rel.attrib['IsApiRegisterable'] == True), description=rel.text, type=rel.attrib['Type']) new_tld.save() print u'New TLD added: %s' % ncd print u'Finished processing tlds.'
def check_moz_update(): """ Calls the Moz API to determine when the Moz data was last updated. This information is recorded and used to determine whether URLMetrics are still up to date. """ if settings.DEBUG: logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) requests_log = logging.getLogger(u'requests.packages.urllib3') requests_log.setLevel(logging.DEBUG) requests_log.propagate = True params = AdminSetting.get_moz_params() r = requests.get(AdminSetting.get_moz_api_url()+'metadata/last_update.json', params=params) status = r.status_code print status if status == 200: rtext = r.text print rtext rd = json.loads(rtext) mu = MozLastUpdate() mu.datetime = timezone.make_aware(datetime.datetime.fromtimestamp(int(rd['last_update'])), timezone.get_current_timezone()) mu.retrieved = timezone.now() mu.save() r.close()
def handle(self, *args, **options): tld_filename = 'tld_list.txt' exclusion_filename = 'exclusion_domains.txt' settings_filename = 'clean_admin.txt' tldf = open(tld_filename) tlds = [line.strip() for line in tldf if line[0] not in '/\n'] tldf.close() exf = open(exclusion_filename) exl = [line.strip() for line in exf] exf.close() sf = open(settings_filename) ss = [line.strip() for line in sf] sf.close() tic = 0 for tld in tlds: try: t = TLD.objects.get(domain=tld) except TLD.DoesNotExist: t = TLD() t.domain = tld t.is_recognized = False t.is_api_registerable = False t.description = None t.type = '' t.save() tic += 1 self.stdout.write('TLDs: Inserted %d row(s) (out of %d TLDs)' % (tic, len(tlds))) eic = 0 for exd in exl: try: ed = ExcludedDomain.objects.get(domain=exd) except ExcludedDomain.DoesNotExit: ed = ExcludedDomain() ed.domain = exd exd.save() eic += 1 self.stdout.write( 'Excluded domains: Inserted %d row(s) (out of %d listed domains)' % (eic, len(exl))) sic = 0 for s in ss: if len(s) == 0: continue vals = s.split('\t') key = vals[0] value = vals[1] valtype = vals[2] choices = None if len(vals) > 3: choices = vals[4] try: aso = AdminSetting.objects.get(key=key) except AdminSetting.DoesNotExist: aso = AdminSetting() aso.key = key aso.value = value aso.type = valtype aso.choices = choices aso.save() sic += 1 self.stdout.write( 'Admin settings: Inserted %d row(s) (out of %d listed settings)' % (sic, len(ss)))
def handle(self, *args, **options): tld_filename = 'tld_list.txt' exclusion_filename = 'exclusion_domains.txt' settings_filename = 'clean_admin.txt' tldf = open(tld_filename) tlds = [line.strip() for line in tldf if line[0] not in '/\n'] tldf.close() exf = open(exclusion_filename) exl = [line.strip() for line in exf] exf.close() sf = open(settings_filename) ss = [line.strip() for line in sf] sf.close() tic = 0 for tld in tlds: try: t = TLD.objects.get(domain=tld) except TLD.DoesNotExist: t = TLD() t.domain = tld t.is_recognized = False t.is_api_registerable = False t.description = None t.type = '' t.save() tic += 1 self.stdout.write('TLDs: Inserted %d row(s) (out of %d TLDs)' % (tic, len(tlds))) eic = 0 for exd in exl: try: ed = ExcludedDomain.objects.get(domain=exd) except ExcludedDomain.DoesNotExit: ed = ExcludedDomain() ed.domain = exd exd.save() eic += 1 self.stdout.write('Excluded domains: Inserted %d row(s) (out of %d listed domains)' % (eic, len(exl))) sic = 0 for s in ss: if len(s) == 0: continue vals = s.split('\t') key = vals[0] value = vals[1] valtype = vals[2] choices = None if len(vals) > 3: choices = vals[4] try: aso = AdminSetting.objects.get(key=key) except AdminSetting.DoesNotExist: aso = AdminSetting() aso.key = key aso.value = value aso.type = valtype aso.choices = choices aso.save() sic += 1 self.stdout.write('Admin settings: Inserted %d row(s) (out of %d listed settings)' % (sic, len(ss)))
def check_project_domains(project_id): """ Use the Namecheap API to update availability status for all the domains associated with the given project. Args: project_id (int): The ID of the project to check domains for. """ lock = NamecheapLock() project = UserProject.objects.get(id=project_id) # Enable debug output if settings.DEBUG: logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) requests_log = logging.getLogger(u'requests.packages.urllib3') requests_log.setLevel(logging.DEBUG) requests_log.propagate = True while True: lock.acquire() try: # Retrieve list of unchecked domains (limited by the set limit of domains per call) domain_list = project.projectdomain_set.filter( is_checked=False)[:AdminSetting.get_api_urls_per_request()] # If no domains unchecked, progress project to the next stage (usually metrics measuring) if domain_list.count() == 0: print u'No domains found.' project.update_state(save=False) project.save() lock.release() break # Fold the list into a dictionary for easy reference domains = dict([(d.domain, d) for d in domain_list]) domain_str = u','.join(domains.keys()) params = AdminSetting.get_api_params() params.append((u'Command', u'namecheap.domains.check')) params.append((u'DomainList', domain_str)) print u'Domains that will be checked: %s' % domain_str print params # Make the call to the Namecheap API (retry 3 times then fail) retries = 0 while True: try: r = requests.get(AdminSetting.get_api_url(), params=params) break except requests.exceptions.ConnectionError as ce: retries += 1 if retries >= 3: raise ce time.sleep(5) sc = r.status_code print u'Status code: %d' % sc if sc == 200: rxml = r.text.encode(u'utf-8') (domain_results, error_results) = parse_namecheap_result(rxml) if len(domain_results) == 0 and len(error_results) > 0: # Handle specific but rare Namecheap API errors gracefully for er in error_results: if int(er[u'number']) == 2030280: # TLD not found - assume same result for all for domain, d in domains.items(): d.state = u'error' d.error = u'API unable to parse TLD for this domain (possible encoding issue)' d.is_checked = True d.last_checked = timezone.now() d.save() break elif int(er[u'number']) == 3031510: # Denied authorization for this domain for domain, d in domains.items(): d.state = u'error' d.error = u'API denies authorisation to check this domain (reason not given)' d.is_checked = True d.last_checked = timezone.now() d.save() break else: # Assume catastrophic error error_str = u'the API backend returned the following unrecoverable error(s):\n\n' error_str += u'\n'.join([ u' %d: [%s] %s' % (i + 1, er[u'number'], er[u'description']) for i, er in enumerate(error_results) ]) raise Exception(error_str) """ Match the call results to the domain list and store them. If appropriate, create and associate a metrics object for the project. """ for dr in domain_results: print u'Finding match for "%s"...' % (dr[u'domain']) for key in domains.keys(): # We use endswith to handle mailto: addresses, TODO: These should be handled at the parsing stage if key.endswith(dr[u'domain']): d = domains[key] if dr[u'errorno'] != 0: d.state = u'error' d.error = u'API error (%d): %s' % ( dr[u'errorno'], dr[u'description']) print dr else: d.state = u'available' if dr[ u'available'] else u'unavailable' d.description = None d.is_checked = True d.last_checked = timezone.now() d.save() if d.state == u'available': try: um = URLMetrics.objects.get( query_url=d.domain) except URLMetrics.DoesNotExist: um = URLMetrics(query_url=d.domain) um.save() pm = ProjectMetrics(project=project, urlmetrics=um, is_checked=False, is_extension=False) pm.save() break # Make a debug note if a requested domain does not appear in the results (likely an error occurred) for domain, d in domains.items(): if d.state == u'unchecked': print u'Domain result not found (will recheck later): %s' % domain else: print u'Warning: Unexpected response while calling API code: %d, will retry after delay' % sc r.close() time.sleep(AdminSetting.get_api_wait_time()) lock.release() except Exception as e: lock.release() # A fatal error has occurred, set the project state appropriately and send an email to the user. project.state = u'error' project.error = u'Error occurred while checking domains - %s' % str( e).encode('utf-8') project.updated = timezone.now() project.completed_datetime = timezone.now() project.save() reply_address = AdminSetting.get_value(u'noreply_address') server_address = AdminSetting.get_value(u'server_address') messagebody = (u'The project "%s" has encountered an error:\n\n' + \ u'%s\n\nYou can view the results at the following address:\n\n' + \ u'%s/project?id=%d\n\n' + \ u'Thank you for using Domain Checker.') % \ (project.name(), project.error, server_address, project.id) user = User.objects.get(id=project.user_id) send_mail( u'Domain Checker - Project "%s" Error' % (project.name(), ), messagebody, reply_address, [user.email]) (exc_type, exc_value, exc_traceback) = sys.exc_info() admin_email = AdminSetting.get_value(u'admin_address') admin_messagebody = (u'The user "%s" has encountered an unrecoverable error for project id %d.\n\n%s') % \ (user.username, project.id, '\n'.join(traceback.format_exception(exc_type, exc_value, exc_traceback))) print admin_email print admin_messagebody send_mail(u'Domain Checker - User Unrecoverable Error', admin_messagebody, reply_address, [admin_email]) # Propagate error to Celery handler raise project.update_state() # If any domains require metrics retrieval, start the appropriate background task if project.state == u'measuring': update_project_metrics.delay(project.id)
def check_project_domains(project_id): """ Use the Namecheap API to update availability status for all the domains associated with the given project. Args: project_id (int): The ID of the project to check domains for. """ lock = NamecheapLock() project = UserProject.objects.get(id=project_id) # Enable debug output if settings.DEBUG: logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) requests_log = logging.getLogger(u'requests.packages.urllib3') requests_log.setLevel(logging.DEBUG) requests_log.propagate = True while True: lock.acquire() try: # Retrieve list of unchecked domains (limited by the set limit of domains per call) domain_list = project.projectdomain_set.filter(is_checked=False)[:AdminSetting.get_api_urls_per_request()] # If no domains unchecked, progress project to the next stage (usually metrics measuring) if domain_list.count() == 0: print u'No domains found.' project.update_state(save=False) project.save() lock.release() break # Fold the list into a dictionary for easy reference domains = dict([(d.domain, d) for d in domain_list]) domain_str = u','.join(domains.keys()) params = AdminSetting.get_api_params() params.append((u'Command', u'namecheap.domains.check')) params.append((u'DomainList', domain_str)) print u'Domains that will be checked: %s' % domain_str print params # Make the call to the Namecheap API (retry 3 times then fail) retries = 0 while True: try: r = requests.get(AdminSetting.get_api_url(), params=params) break except requests.exceptions.ConnectionError as ce: retries += 1 if retries >= 3: raise ce time.sleep(5) sc = r.status_code print u'Status code: %d' % sc if sc == 200: rxml = r.text.encode(u'utf-8') (domain_results, error_results) = parse_namecheap_result(rxml) if len(domain_results) == 0 and len(error_results) > 0: # Handle specific but rare Namecheap API errors gracefully for er in error_results: if int(er[u'number']) == 2030280: # TLD not found - assume same result for all for domain, d in domains.items(): d.state = u'error' d.error = u'API unable to parse TLD for this domain (possible encoding issue)' d.is_checked = True d.last_checked = timezone.now() d.save() break elif int(er[u'number']) == 3031510: # Denied authorization for this domain for domain, d in domains.items(): d.state = u'error' d.error = u'API denies authorisation to check this domain (reason not given)' d.is_checked = True d.last_checked = timezone.now() d.save() break else: # Assume catastrophic error error_str = u'the API backend returned the following unrecoverable error(s):\n\n' error_str += u'\n'.join([u' %d: [%s] %s' % (i+1, er[u'number'], er[u'description']) for i, er in enumerate(error_results)]) raise Exception(error_str) """ Match the call results to the domain list and store them. If appropriate, create and associate a metrics object for the project. """ for dr in domain_results: print u'Finding match for "%s"...' % (dr[u'domain']) for key in domains.keys(): # We use endswith to handle mailto: addresses, TODO: These should be handled at the parsing stage if key.endswith(dr[u'domain']): d = domains[key] if dr[u'errorno'] != 0: d.state = u'error' d.error = u'API error (%d): %s' % (dr[u'errorno'], dr[u'description']) print dr else: d.state = u'available' if dr[u'available'] else u'unavailable' d.description = None d.is_checked = True d.last_checked = timezone.now() d.save() if d.state == u'available': try: um = URLMetrics.objects.get(query_url=d.domain) except URLMetrics.DoesNotExist: um = URLMetrics(query_url=d.domain) um.save() pm = ProjectMetrics(project=project, urlmetrics=um, is_checked=False, is_extension=False) pm.save() break # Make a debug note if a requested domain does not appear in the results (likely an error occurred) for domain, d in domains.items(): if d.state == u'unchecked': print u'Domain result not found (will recheck later): %s' % domain else: print u'Warning: Unexpected response while calling API code: %d, will retry after delay' % sc r.close() time.sleep(AdminSetting.get_api_wait_time()) lock.release() except Exception as e: lock.release() # A fatal error has occurred, set the project state appropriately and send an email to the user. project.state = u'error' project.error = u'Error occurred while checking domains - %s' % str(e).encode('utf-8') project.updated = timezone.now() project.completed_datetime = timezone.now() project.save() reply_address = AdminSetting.get_value(u'noreply_address') server_address = AdminSetting.get_value(u'server_address') messagebody = (u'The project "%s" has encountered an error:\n\n' + \ u'%s\n\nYou can view the results at the following address:\n\n' + \ u'%s/project?id=%d\n\n' + \ u'Thank you for using Domain Checker.') % \ (project.name(), project.error, server_address, project.id) user = User.objects.get(id=project.user_id) send_mail(u'Domain Checker - Project "%s" Error' % (project.name(),), messagebody, reply_address, [user.email]) (exc_type, exc_value, exc_traceback) = sys.exc_info() admin_email = AdminSetting.get_value(u'admin_address') admin_messagebody = (u'The user "%s" has encountered an unrecoverable error for project id %d.\n\n%s') % \ (user.username, project.id, '\n'.join(traceback.format_exception(exc_type, exc_value, exc_traceback))) print admin_email print admin_messagebody send_mail(u'Domain Checker - User Unrecoverable Error', admin_messagebody, reply_address, [admin_email]) # Propagate error to Celery handler raise project.update_state() # If any domains require metrics retrieval, start the appropriate background task if project.state == u'measuring': update_project_metrics.delay(project.id)
def extract_domains(file_content, fail_email, filename): """ Takes the contents of an uploaded file and returns a series a tuple of results representing parsed domains of different types. If any lines in the file cannot be parsed, an email notification is sent to the given address with details. Args: file_content (str): The raw content of the file. fail_email (str): Email address of user to notify if there is a fatal error filename (str): The original filename Returns: A tuple containing three items: 1. Correctly parsed domains 2. Domains that failed, are unrecognized or unregisterable 3. A list of lines in the file that could not be parsed """ tlds = load_tlds() exclusions = load_exclusions() preservations = load_preservations() domain_list = set() ln = 0 failed_lines = [] failed_domains = [] failed_set = set() for url in file_content.split('\n'): ln += 1 logger.debug(type(url)) # url = url.decode('utf-8') # url = unicode(url, errors='ignore') if len(url) == 0 or url[0] in '/\n': continue # logger.debug(url.strip()) try: url = url.strip() if iponly_re.match(url) is not None: raise ValueError(u'IP only - no domain to extract') elif url.startswith('javascript:'): raise ValueError(u'Javascript hook') (tld_match, domain, full_domain) = remove_subdomains(url.strip(), tlds) tld = TLD.objects.get(domain=tld_match) if domain in failed_set: continue if not tld.is_recognized: failed_domains.append((domain, u'unregisterable', u'Unregisterable TLD (%s)' % tld_match)) failed_set.add(domain) elif not tld.is_api_registerable: failed_domains.append((domain, u'unregisterable', u'TLD recognized but cannot be registered through the API (%s)'% tld_match)) failed_set.add(domain) elif domain in exclusions: failed_domains.append((domain, u'unregisterable', u'Domain explicitly excluded (%s)' % domain)) failed_set.add(domain) elif domain in preservations: failed_domains.append((full_domain, u'special', u'Domain is reserved for special processing (%s)' % domain)) failed_set.add(full_domain) else: domain_list.add(domain) except ValueError as e: failed_lines.append((ln, url.strip(), str(e))) if len(failed_lines) > 0: error_email = u'The following domains failed while reading the file "%s":\n\n' % filename.encode('utf-8') for fd in failed_lines: error_email += u'Line %d: %s (%s)\n' % (fd[0], fd[1], fd[2]) logger.debug(error_email) send_mail(u'Domain Checker: Failed Domains', error_email, AdminSetting.get_value('noreply_address'), [fail_email]) return (domain_list, failed_domains, failed_lines)
def index(request): """ View: The root page. """ return render(request, 'main/index.html', {'allow_new_registrations' : AdminSetting.get_value('allow_new_registrations'),})
def extract_domains(file_content, fail_email, filename): """ Takes the contents of an uploaded file and returns a series a tuple of results representing parsed domains of different types. If any lines in the file cannot be parsed, an email notification is sent to the given address with details. Args: file_content (str): The raw content of the file. fail_email (str): Email address of user to notify if there is a fatal error filename (str): The original filename Returns: A tuple containing three items: 1. Correctly parsed domains 2. Domains that failed, are unrecognized or unregisterable 3. A list of lines in the file that could not be parsed """ tlds = load_tlds() exclusions = load_exclusions() preservations = load_preservations() domain_list = set() ln = 0 failed_lines = [] failed_domains = [] failed_set = set() for url in file_content.split('\n'): ln += 1 logger.debug(type(url)) # url = url.decode('utf-8') # url = unicode(url, errors='ignore') if len(url) == 0 or url[0] in '/\n': continue # logger.debug(url.strip()) try: url = url.strip() if iponly_re.match(url) is not None: raise ValueError(u'IP only - no domain to extract') elif url.startswith('javascript:'): raise ValueError(u'Javascript hook') (tld_match, domain, full_domain) = remove_subdomains(url.strip(), tlds) tld = TLD.objects.get(domain=tld_match) if domain in failed_set: continue if not tld.is_recognized: failed_domains.append((domain, u'unregisterable', u'Unregisterable TLD (%s)' % tld_match)) failed_set.add(domain) elif not tld.is_api_registerable: failed_domains.append(( domain, u'unregisterable', u'TLD recognized but cannot be registered through the API (%s)' % tld_match)) failed_set.add(domain) elif domain in exclusions: failed_domains.append( (domain, u'unregisterable', u'Domain explicitly excluded (%s)' % domain)) failed_set.add(domain) elif domain in preservations: failed_domains.append( (full_domain, u'special', u'Domain is reserved for special processing (%s)' % domain)) failed_set.add(full_domain) else: domain_list.add(domain) except ValueError as e: failed_lines.append((ln, url.strip(), str(e))) if len(failed_lines) > 0: error_email = u'The following domains failed while reading the file "%s":\n\n' % filename.encode( 'utf-8') for fd in failed_lines: error_email += u'Line %d: %s (%s)\n' % (fd[0], fd[1], fd[2]) logger.debug(error_email) send_mail(u'Domain Checker: Failed Domains', error_email, AdminSetting.get_value('noreply_address'), [fail_email]) return (domain_list, failed_domains, failed_lines)