Example #1
0
def check_moz_domain(m, cols, wait_time):
    """
    Calls the Moz API for the url of the given URLMetric.

    Args:
      m (URLMetrics): The metrics object whose query_url to use in the call.
      cols (list): A list of fields the call should return.  See the URLMetrics class for more details.
      wait_time (int): Number of seconds to wait before releasing the API lock after the call has returned.
    """
    lock = MozAPILock()
    lock.acquire()
    params = AdminSetting.get_moz_params()
    params.append(('Cols', cols))
    try:
        r = requests.get(AdminSetting.get_moz_api_url()+'url-metrics/'+m.query_url, params=params)
        rtext = r.text
        if r.status_code == 200:
            # Retrieve the JSON result
            rd = json.loads(rtext)
            # Store the fields in the URLMetrics object
            m.store_result(rd)
            # Update the status of the URLMetrics
            m.last_updated = timezone.now()
            m.save()
        r.close()
        print u'Done with %s, waiting (new)...' % m.query_url
        # Wait the specified time
        time.sleep(wait_time)
    except Exception as e:
        lock.release()
        raise
    lock.release()
Example #2
0
def check_moz_update():
    """
    Calls the Moz API to determine when the Moz data was last updated.  This information is recorded and used to determine whether URLMetrics are still up to date.
    """
    if settings.DEBUG:
        logging.basicConfig()
        logging.getLogger().setLevel(logging.DEBUG)
        requests_log = logging.getLogger(u'requests.packages.urllib3')
        requests_log.setLevel(logging.DEBUG)
        requests_log.propagate = True
    params = AdminSetting.get_moz_params()
    r = requests.get(AdminSetting.get_moz_api_url() +
                     'metadata/last_update.json',
                     params=params)
    status = r.status_code
    print status
    if status == 200:
        rtext = r.text
        print rtext
        rd = json.loads(rtext)
        mu = MozLastUpdate()
        mu.datetime = timezone.make_aware(
            datetime.datetime.fromtimestamp(int(rd['last_update'])),
            timezone.get_current_timezone())
        mu.retrieved = timezone.now()
        mu.save()
    r.close()
Example #3
0
def check_moz_domain(m, cols, wait_time):
    """
    Calls the Moz API for the url of the given URLMetric.

    Args:
      m (URLMetrics): The metrics object whose query_url to use in the call.
      cols (list): A list of fields the call should return.  See the URLMetrics class for more details.
      wait_time (int): Number of seconds to wait before releasing the API lock after the call has returned.
    """
    lock = MozAPILock()
    lock.acquire()
    params = AdminSetting.get_moz_params()
    params.append(('Cols', cols))
    try:
        r = requests.get(AdminSetting.get_moz_api_url() + 'url-metrics/' +
                         m.query_url,
                         params=params)
        rtext = r.text
        if r.status_code == 200:
            # Retrieve the JSON result
            rd = json.loads(rtext)
            # Store the fields in the URLMetrics object
            m.store_result(rd)
            # Update the status of the URLMetrics
            m.last_updated = timezone.now()
            m.save()
        r.close()
        print u'Done with %s, waiting (new)...' % m.query_url
        # Wait the specified time
        time.sleep(wait_time)
    except Exception as e:
        lock.release()
        raise
    lock.release()
Example #4
0
def update_tlds():
    """
    Calls the Namecheap API to update the list of recognized and registerable top-level domains.  This is currently initiated manually via the administration panel.
    """
    params = AdminSetting.get_api_params()
    params.append((u'Command', u'namecheap.domains.gettldlist'))
    r = requests.get(AdminSetting.get_api_url(), params=params)
    rtext = r.text

    send_mail(
        u'Domain Checker - TLD Update',
        u'The following response was received from the TLD update (using %s):\n\n%s'
        % (AdminSetting.get_api_url(), rtext),
        AdminSetting.get_value(u'noreply_address'),
        [AdminSetting.get_value(u'admin_address')])

    parser = etree.XMLParser(encoding=u'utf-8')
    header_len = len('<?xml version="1.0" encoding="utf-8"?>')
    rtext = rtext[header_len:]
    rtree = etree.fromstring(rtext, parser=parser)
    rels = rtree.findall(
        u'./{http://api.namecheap.com/xml.response}CommandResponse/{http://api.namecheap.com/xml.response}Tlds/{http://api.namecheap.com/xml.response}Tld'
    )

    rels = dict([(r.attrib[u'Name'], r) for r in rels])
    tlds = TLD.objects.all()
    with transaction.atomic():
        for tld in tlds:
            if tld.domain in rels.keys():
                rel = rels[tld.domain]
                tld.is_recognized = True
                tld.is_api_registerable = (
                    rel.attrib[u'IsApiRegisterable'] == u'true')
                tld.description = rel.text
                tld.type = rel.attrib[u'Type']
            else:
                tld.is_recognized = False
                tld.is_api_registrable = False
                tld.type = u'unknown'
                tld.description = None
            tld.save()

        for ncd, rel in rels.items():
            if len(TLD.objects.filter(domain=ncd)) == 0:
                new_tld = TLD(domain=ncd,
                              is_recognized=True,
                              is_api_registerable=(
                                  rel.attrib['IsApiRegisterable'] == True),
                              description=rel.text,
                              type=rel.attrib['Type'])
                new_tld.save()
                print u'New TLD added: %s' % ncd
    print u'Finished processing tlds.'
Example #5
0
def update_project_metrics(project_id):
    """
    Updates all the URLMetrics associated with the given project id through the Moz API.  If the MozRank of a URL is over the set threshold, extension URLs are created and also checked.

    Args:
      project_id (int): The ID of the project to update.
    """
    p = UserProject.objects.get(id=project_id)
    # Retrieve all fields available with free Moz API registration
    cols = URLMetrics.create_cols_bitflag([
        'Title',
        'Canonical URL',
        'External Links',
        'Links',
        'MozRank 10', 
        'MozRank Raw',
        'Subdomain MozRank 10',
        'Subdomain MozRank Raw',
        'HTTP Status Code',
        'Page Authority',
        'Domain Authority'])
    wait_time = AdminSetting.get_moz_api_wait_time()
    mozrank_extension_threshold = AdminSetting.get_value('mozrank_extension_threshold')
    associate_project_metrics(p)
    pmetrics = ProjectMetrics.objects.filter(project=p, is_checked=False)
    for pm in pmetrics:
        with transaction.atomic():
            if not pm.urlmetrics.is_uptodate():
                check_moz_domain(pm.urlmetrics, cols, wait_time)
            if not pm.is_extension and pm.urlmetrics.mozrank_10 >= mozrank_extension_threshold:
                extensions = get_extensions(pm.urlmetrics)
                print u'Getting extensions (%d)' % len(extensions)
                for ex in extensions:
                    print u'  %s' % ex.query_url
                    try:
                        newpm = ProjectMetrics.objects.get(project=p, urlmetrics=ex)
                    except ProjectMetrics.DoesNotExist:
                        newpm = ProjectMetrics(project=p, urlmetrics=ex, is_checked=True, is_extension=True)
                    if not ex.is_uptodate():
                        print u'  Checking extension: %s' % ex.query_url
                        check_moz_domain(ex, cols, wait_time)
                    else:
                        print u'  Extension already checked: %s' % ex.query_url
                    newpm.is_checked = True
                    newpm.save()
                
            pm.is_checked=True
            pm.save()
    p.update_state()
    p.save()
Example #6
0
def update_project_metrics(project_id):
    """
    Updates all the URLMetrics associated with the given project id through the Moz API.  If the MozRank of a URL is over the set threshold, extension URLs are created and also checked.

    Args:
      project_id (int): The ID of the project to update.
    """
    p = UserProject.objects.get(id=project_id)
    # Retrieve all fields available with free Moz API registration
    cols = URLMetrics.create_cols_bitflag([
        'Title', 'Canonical URL', 'External Links', 'Links', 'MozRank 10',
        'MozRank Raw', 'Subdomain MozRank 10', 'Subdomain MozRank Raw',
        'HTTP Status Code', 'Page Authority', 'Domain Authority'
    ])
    wait_time = AdminSetting.get_moz_api_wait_time()
    mozrank_extension_threshold = AdminSetting.get_value(
        'mozrank_extension_threshold')
    associate_project_metrics(p)
    pmetrics = ProjectMetrics.objects.filter(project=p, is_checked=False)
    for pm in pmetrics:
        with transaction.atomic():
            if not pm.urlmetrics.is_uptodate():
                check_moz_domain(pm.urlmetrics, cols, wait_time)
            if not pm.is_extension and pm.urlmetrics.mozrank_10 >= mozrank_extension_threshold:
                extensions = get_extensions(pm.urlmetrics)
                print u'Getting extensions (%d)' % len(extensions)
                for ex in extensions:
                    print u'  %s' % ex.query_url
                    try:
                        newpm = ProjectMetrics.objects.get(project=p,
                                                           urlmetrics=ex)
                    except ProjectMetrics.DoesNotExist:
                        newpm = ProjectMetrics(project=p,
                                               urlmetrics=ex,
                                               is_checked=True,
                                               is_extension=True)
                    if not ex.is_uptodate():
                        print u'  Checking extension: %s' % ex.query_url
                        check_moz_domain(ex, cols, wait_time)
                    else:
                        print u'  Extension already checked: %s' % ex.query_url
                    newpm.is_checked = True
                    newpm.save()

            pm.is_checked = True
            pm.save()
    p.update_state()
    p.save()
Example #7
0
def register_user(request):
    """
    View:  Registers a new user.  If registrations are disabled, the user is redirected back to the index page.  Otherwise a new user is created and authenticated, and the user is then redirected to their new profile page.
    """
    if not AdminSetting.get_value('allow_new_registrations'):
        return redirect('index')

    if request.method != 'POST':
        return redirect('index')

    username = request.POST['username']
    first_name = request.POST['first_name']
    last_name = request.POST['last_name']
    email = request.POST['email']
    password = request.POST['password']

    user = User.objects.create_user(username,
                                    email,
                                    password,
                                    first_name=first_name,
                                    last_name=last_name)
    user.save()

    user = authenticate(username=username, password=password)
    login(request, user)

    return redirect('profile')
Example #8
0
def index(request):
    """
    View: The root page.
    """
    return render(
        request, 'main/index.html', {
            'allow_new_registrations':
            AdminSetting.get_value('allow_new_registrations'),
        })
Example #9
0
def update_tlds():
    """
    Calls the Namecheap API to update the list of recognized and registerable top-level domains.  This is currently initiated manually via the administration panel.
    """
    params = AdminSetting.get_api_params()
    params.append((u'Command', u'namecheap.domains.gettldlist'))
    r = requests.get(AdminSetting.get_api_url(), params=params)
    rtext = r.text

    send_mail(u'Domain Checker - TLD Update', u'The following response was received from the TLD update (using %s):\n\n%s' % (AdminSetting.get_api_url(), rtext), AdminSetting.get_value(u'noreply_address'), [AdminSetting.get_value(u'admin_address')])

    parser = etree.XMLParser(encoding=u'utf-8')
    rtree = etree.fromstring(rtext, parser=parser)
    rels = rtree.findall(u'./{http://api.namecheap.com/xml.response}CommandResponse/{http://api.namecheap.com/xml.response}Tlds/{http://api.namecheap.com/xml.response}Tld')

    rels = dict([(r.attrib[u'Name'], r) for r in rels])
    tlds = TLD.objects.all()
    with transaction.atomic():
        for tld in tlds:
            if tld.domain in rels.keys():
                rel = rels[tld.domain]
                tld.is_recognized = True
                tld.is_api_registerable = (rel.attrib[u'IsApiRegisterable'] == u'true')
                tld.description = rel.text
                tld.type = rel.attrib[u'Type']
            else:
                tld.is_recognized = False
                tld.is_api_registrable = False
                tld.type = u'unknown'
                tld.description = None
            tld.save()

        for ncd, rel in rels.items():
            if len(TLD.objects.filter(domain=ncd)) == 0:
                new_tld = TLD(domain=ncd, is_recognized=True, is_api_registerable=(rel.attrib['IsApiRegisterable'] == True), description=rel.text, type=rel.attrib['Type'])
                new_tld.save()
                print u'New TLD added: %s' % ncd
    print u'Finished processing tlds.'
Example #10
0
def check_moz_update():
    """
    Calls the Moz API to determine when the Moz data was last updated.  This information is recorded and used to determine whether URLMetrics are still up to date.
    """
    if settings.DEBUG:
        logging.basicConfig() 
        logging.getLogger().setLevel(logging.DEBUG)
        requests_log = logging.getLogger(u'requests.packages.urllib3')
        requests_log.setLevel(logging.DEBUG)
        requests_log.propagate = True
    params = AdminSetting.get_moz_params()
    r = requests.get(AdminSetting.get_moz_api_url()+'metadata/last_update.json', params=params)
    status = r.status_code
    print status
    if status == 200:
        rtext = r.text
        print rtext
        rd = json.loads(rtext)
        mu = MozLastUpdate()
        mu.datetime = timezone.make_aware(datetime.datetime.fromtimestamp(int(rd['last_update'])), timezone.get_current_timezone())
        mu.retrieved = timezone.now()
        mu.save()
    r.close()
Example #11
0
def register_user(request):
    """
    View:  Registers a new user.  If registrations are disabled, the user is redirected back to the index page.  Otherwise a new user is created and authenticated, and the user is then redirected to their new profile page.
    """
    if not AdminSetting.get_value('allow_new_registrations'):
        return redirect('index')

    if request.method != 'POST':
        return redirect('index')

    username = request.POST['username']
    first_name = request.POST['first_name']
    last_name = request.POST['last_name']
    email = request.POST['email']
    password = request.POST['password']

    user = User.objects.create_user(username, email, password, first_name=first_name, last_name=last_name)
    user.save()

    user = authenticate(username=username, password=password)
    login(request, user)

    return redirect('profile')
Example #12
0
    def handle(self, *args, **options):
        tld_filename = 'tld_list.txt'
        exclusion_filename = 'exclusion_domains.txt'
        settings_filename = 'clean_admin.txt'

        tldf = open(tld_filename)
        tlds = [line.strip() for line in tldf if line[0] not in '/\n']
        tldf.close()
        exf = open(exclusion_filename)
        exl = [line.strip() for line in exf]
        exf.close()
        sf = open(settings_filename)
        ss = [line.strip() for line in sf]
        sf.close()

        tic = 0
        for tld in tlds:
            try:
                t = TLD.objects.get(domain=tld)
            except TLD.DoesNotExist:
                t = TLD()
                t.domain = tld
                t.is_recognized = False
                t.is_api_registerable = False
                t.description = None
                t.type = ''
                t.save()
                tic += 1
        self.stdout.write('TLDs: Inserted %d row(s) (out of %d TLDs)' %
                          (tic, len(tlds)))

        eic = 0
        for exd in exl:
            try:
                ed = ExcludedDomain.objects.get(domain=exd)
            except ExcludedDomain.DoesNotExit:
                ed = ExcludedDomain()
                ed.domain = exd
                exd.save()
                eic += 1
        self.stdout.write(
            'Excluded domains: Inserted %d row(s) (out of %d listed domains)' %
            (eic, len(exl)))

        sic = 0
        for s in ss:
            if len(s) == 0:
                continue
            vals = s.split('\t')
            key = vals[0]
            value = vals[1]
            valtype = vals[2]
            choices = None
            if len(vals) > 3:
                choices = vals[4]
            try:
                aso = AdminSetting.objects.get(key=key)
            except AdminSetting.DoesNotExist:
                aso = AdminSetting()
                aso.key = key
                aso.value = value
                aso.type = valtype
                aso.choices = choices
                aso.save()
                sic += 1
        self.stdout.write(
            'Admin settings: Inserted %d row(s) (out of %d listed settings)' %
            (sic, len(ss)))
Example #13
0
    def handle(self, *args, **options):
        tld_filename = 'tld_list.txt'
        exclusion_filename = 'exclusion_domains.txt'
        settings_filename = 'clean_admin.txt'

        tldf = open(tld_filename)
        tlds = [line.strip() for line in tldf if line[0] not in '/\n']
        tldf.close()
        exf = open(exclusion_filename)
        exl = [line.strip() for line in exf]
        exf.close()
        sf = open(settings_filename)
        ss = [line.strip() for line in sf]
        sf.close()

        tic = 0
        for tld in tlds:
            try:
                t = TLD.objects.get(domain=tld)
            except TLD.DoesNotExist:
                t = TLD()
                t.domain = tld
                t.is_recognized = False
                t.is_api_registerable = False
                t.description = None
                t.type = ''
                t.save()
                tic += 1
        self.stdout.write('TLDs: Inserted %d row(s) (out of %d TLDs)' % (tic, len(tlds)))

        eic = 0
        for exd in exl:
            try:
                ed = ExcludedDomain.objects.get(domain=exd)
            except ExcludedDomain.DoesNotExit:
                ed = ExcludedDomain()
                ed.domain = exd
                exd.save()
                eic += 1
        self.stdout.write('Excluded domains: Inserted %d row(s) (out of %d listed domains)' % (eic, len(exl)))

        sic = 0
        for s in ss:
            if len(s) == 0:
                continue
            vals = s.split('\t')
            key = vals[0]
            value = vals[1]
            valtype = vals[2]
            choices = None
            if len(vals) > 3:
                choices = vals[4]
            try:
                aso = AdminSetting.objects.get(key=key)
            except AdminSetting.DoesNotExist:
                aso = AdminSetting()
                aso.key = key
                aso.value = value
                aso.type = valtype
                aso.choices = choices
                aso.save()
                sic += 1
        self.stdout.write('Admin settings: Inserted %d row(s) (out of %d listed settings)' % (sic, len(ss)))
Example #14
0
def check_project_domains(project_id):
    """
    Use the Namecheap API to update availability status for all the domains associated with the given project.

    Args:
      project_id (int): The ID of the project to check domains for.
    """
    lock = NamecheapLock()
    project = UserProject.objects.get(id=project_id)
    # Enable debug output
    if settings.DEBUG:
        logging.basicConfig()
        logging.getLogger().setLevel(logging.DEBUG)
        requests_log = logging.getLogger(u'requests.packages.urllib3')
        requests_log.setLevel(logging.DEBUG)
        requests_log.propagate = True
    while True:
        lock.acquire()
        try:
            # Retrieve list of unchecked domains (limited by the set limit of domains per call)
            domain_list = project.projectdomain_set.filter(
                is_checked=False)[:AdminSetting.get_api_urls_per_request()]
            # If no domains unchecked, progress project to the next stage (usually metrics measuring)
            if domain_list.count() == 0:
                print u'No domains found.'
                project.update_state(save=False)
                project.save()

                lock.release()
                break

            # Fold the list into a dictionary for easy reference
            domains = dict([(d.domain, d) for d in domain_list])
            domain_str = u','.join(domains.keys())

            params = AdminSetting.get_api_params()
            params.append((u'Command', u'namecheap.domains.check'))
            params.append((u'DomainList', domain_str))

            print u'Domains that will be checked: %s' % domain_str
            print params

            # Make the call to the Namecheap API (retry 3 times then fail)
            retries = 0
            while True:
                try:
                    r = requests.get(AdminSetting.get_api_url(), params=params)
                    break
                except requests.exceptions.ConnectionError as ce:
                    retries += 1
                    if retries >= 3:
                        raise ce
                    time.sleep(5)

            sc = r.status_code
            print u'Status code: %d' % sc

            if sc == 200:
                rxml = r.text.encode(u'utf-8')
                (domain_results, error_results) = parse_namecheap_result(rxml)
                if len(domain_results) == 0 and len(error_results) > 0:
                    # Handle specific but rare Namecheap API errors gracefully
                    for er in error_results:
                        if int(er[u'number']) == 2030280:
                            # TLD not found - assume same result for all
                            for domain, d in domains.items():
                                d.state = u'error'
                                d.error = u'API unable to parse TLD for this domain (possible encoding issue)'
                                d.is_checked = True
                                d.last_checked = timezone.now()
                                d.save()
                            break
                        elif int(er[u'number']) == 3031510:
                            # Denied authorization for this domain
                            for domain, d in domains.items():
                                d.state = u'error'
                                d.error = u'API denies authorisation to check this domain (reason not given)'
                                d.is_checked = True
                                d.last_checked = timezone.now()
                                d.save()
                            break
                        else:
                            # Assume catastrophic error
                            error_str = u'the API backend returned the following unrecoverable error(s):\n\n'
                            error_str += u'\n'.join([
                                u'  %d: [%s] %s' %
                                (i + 1, er[u'number'], er[u'description'])
                                for i, er in enumerate(error_results)
                            ])
                            raise Exception(error_str)
                """
                Match the call results to the domain list and store them.  If appropriate, create and associate a metrics object for the project.
                """
                for dr in domain_results:
                    print u'Finding match for "%s"...' % (dr[u'domain'])
                    for key in domains.keys():
                        # We use endswith to handle mailto: addresses, TODO: These should be handled at the parsing stage
                        if key.endswith(dr[u'domain']):
                            d = domains[key]
                            if dr[u'errorno'] != 0:
                                d.state = u'error'
                                d.error = u'API error (%d): %s' % (
                                    dr[u'errorno'], dr[u'description'])
                                print dr
                            else:
                                d.state = u'available' if dr[
                                    u'available'] else u'unavailable'
                                d.description = None
                            d.is_checked = True
                            d.last_checked = timezone.now()
                            d.save()
                            if d.state == u'available':
                                try:
                                    um = URLMetrics.objects.get(
                                        query_url=d.domain)
                                except URLMetrics.DoesNotExist:
                                    um = URLMetrics(query_url=d.domain)
                                    um.save()
                                pm = ProjectMetrics(project=project,
                                                    urlmetrics=um,
                                                    is_checked=False,
                                                    is_extension=False)
                                pm.save()
                            break

                # Make a debug note if a requested domain does not appear in the results (likely an error occurred)
                for domain, d in domains.items():
                    if d.state == u'unchecked':
                        print u'Domain result not found (will recheck later): %s' % domain
            else:
                print u'Warning: Unexpected response while calling API code: %d, will retry after delay' % sc

            r.close()
            time.sleep(AdminSetting.get_api_wait_time())
            lock.release()
        except Exception as e:
            lock.release()

            # A fatal error has occurred, set the project state appropriately and send an email to the user.
            project.state = u'error'
            project.error = u'Error occurred while checking domains - %s' % str(
                e).encode('utf-8')
            project.updated = timezone.now()
            project.completed_datetime = timezone.now()
            project.save()
            reply_address = AdminSetting.get_value(u'noreply_address')
            server_address = AdminSetting.get_value(u'server_address')
            messagebody = (u'The project "%s" has encountered an error:\n\n' + \
                  u'%s\n\nYou can view the results at the following address:\n\n' + \
                  u'%s/project?id=%d\n\n' + \
                  u'Thank you for using Domain Checker.') % \
                  (project.name(), project.error, server_address, project.id)
            user = User.objects.get(id=project.user_id)
            send_mail(
                u'Domain Checker - Project "%s" Error' % (project.name(), ),
                messagebody, reply_address, [user.email])

            (exc_type, exc_value, exc_traceback) = sys.exc_info()
            admin_email = AdminSetting.get_value(u'admin_address')
            admin_messagebody = (u'The user "%s" has encountered an unrecoverable error for project id %d.\n\n%s') % \
                (user.username, project.id, '\n'.join(traceback.format_exception(exc_type, exc_value, exc_traceback)))
            print admin_email
            print admin_messagebody

            send_mail(u'Domain Checker - User Unrecoverable Error',
                      admin_messagebody, reply_address, [admin_email])

            # Propagate error to Celery handler
            raise

        project.update_state()
        # If any domains require metrics retrieval, start the appropriate background task
        if project.state == u'measuring':
            update_project_metrics.delay(project.id)
Example #15
0
def check_project_domains(project_id):
    """
    Use the Namecheap API to update availability status for all the domains associated with the given project.

    Args:
      project_id (int): The ID of the project to check domains for.
    """
    lock = NamecheapLock()
    project = UserProject.objects.get(id=project_id)
    # Enable debug output
    if settings.DEBUG:
        logging.basicConfig() 
        logging.getLogger().setLevel(logging.DEBUG)
        requests_log = logging.getLogger(u'requests.packages.urllib3')
        requests_log.setLevel(logging.DEBUG)
        requests_log.propagate = True
    while True:
        lock.acquire()
        try:
            # Retrieve list of unchecked domains (limited by the set limit of domains per call)
            domain_list = project.projectdomain_set.filter(is_checked=False)[:AdminSetting.get_api_urls_per_request()]
            # If no domains unchecked, progress project to the next stage (usually metrics measuring)
            if domain_list.count() == 0:
                print u'No domains found.'
                project.update_state(save=False)
                project.save()

                lock.release()
                break

            # Fold the list into a dictionary for easy reference
            domains = dict([(d.domain, d) for d in domain_list])
            domain_str = u','.join(domains.keys())

            params = AdminSetting.get_api_params()
            params.append((u'Command', u'namecheap.domains.check'))
            params.append((u'DomainList', domain_str))

            print u'Domains that will be checked: %s' % domain_str
            print params

            # Make the call to the Namecheap API (retry 3 times then fail)
            retries = 0
            while True:
                try:
                    r = requests.get(AdminSetting.get_api_url(), params=params)
                    break
                except requests.exceptions.ConnectionError as ce:
                    retries += 1
                    if retries >= 3:
                        raise ce
                    time.sleep(5)

            sc = r.status_code
            print u'Status code: %d' % sc

            if sc == 200:
                rxml = r.text.encode(u'utf-8')
                (domain_results, error_results) = parse_namecheap_result(rxml)
                if len(domain_results) == 0 and len(error_results) > 0:
                    # Handle specific but rare Namecheap API errors gracefully
                    for er in error_results:
                        if int(er[u'number']) == 2030280:
                            # TLD not found - assume same result for all
                            for domain, d in domains.items():
                                d.state = u'error'
                                d.error = u'API unable to parse TLD for this domain (possible encoding issue)'
                                d.is_checked = True
                                d.last_checked = timezone.now()
                                d.save()
                            break
                        elif int(er[u'number']) == 3031510:
                            # Denied authorization for this domain
                            for domain, d in domains.items():
                                d.state = u'error'
                                d.error = u'API denies authorisation to check this domain (reason not given)'
                                d.is_checked = True
                                d.last_checked = timezone.now()
                                d.save()
                            break
                        else:
                            # Assume catastrophic error
                            error_str = u'the API backend returned the following unrecoverable error(s):\n\n'
                            error_str += u'\n'.join([u'  %d: [%s] %s' % (i+1, er[u'number'], er[u'description']) for i, er in enumerate(error_results)])
                            raise Exception(error_str)

                """
                Match the call results to the domain list and store them.  If appropriate, create and associate a metrics object for the project.
                """
                for dr in domain_results:
                    print u'Finding match for "%s"...' % (dr[u'domain'])
                    for key in domains.keys():
                        # We use endswith to handle mailto: addresses, TODO: These should be handled at the parsing stage
                        if key.endswith(dr[u'domain']):
                            d = domains[key]
                            if dr[u'errorno'] != 0:
                                d.state = u'error'
                                d.error = u'API error (%d): %s' % (dr[u'errorno'], dr[u'description'])
                                print dr
                            else:
                                d.state = u'available' if dr[u'available'] else u'unavailable'
                                d.description = None
                            d.is_checked = True
                            d.last_checked = timezone.now()
                            d.save()
                            if d.state == u'available':
                                try:
                                    um = URLMetrics.objects.get(query_url=d.domain)
                                except URLMetrics.DoesNotExist:
                                    um = URLMetrics(query_url=d.domain)
                                    um.save()
                                pm = ProjectMetrics(project=project, urlmetrics=um, is_checked=False, is_extension=False)
                                pm.save()
                            break

                # Make a debug note if a requested domain does not appear in the results (likely an error occurred)
                for domain, d in domains.items():
                    if d.state == u'unchecked':
                        print u'Domain result not found (will recheck later): %s' % domain
            else:
                print u'Warning: Unexpected response while calling API code: %d, will retry after delay' % sc

            r.close()
            time.sleep(AdminSetting.get_api_wait_time())
            lock.release()
        except Exception as e:
            lock.release()

            # A fatal error has occurred, set the project state appropriately and send an email to the user.
            project.state = u'error'
            project.error = u'Error occurred while checking domains - %s' % str(e).encode('utf-8')
            project.updated = timezone.now()
            project.completed_datetime = timezone.now()
            project.save()
            reply_address = AdminSetting.get_value(u'noreply_address')
            server_address = AdminSetting.get_value(u'server_address')
            messagebody = (u'The project "%s" has encountered an error:\n\n' + \
                  u'%s\n\nYou can view the results at the following address:\n\n' + \
                  u'%s/project?id=%d\n\n' + \
                  u'Thank you for using Domain Checker.') % \
                  (project.name(), project.error, server_address, project.id)
            user = User.objects.get(id=project.user_id)
            send_mail(u'Domain Checker - Project "%s" Error' % (project.name(),), messagebody, reply_address, [user.email])

            (exc_type, exc_value, exc_traceback) = sys.exc_info()
            admin_email = AdminSetting.get_value(u'admin_address')
            admin_messagebody = (u'The user "%s" has encountered an unrecoverable error for project id %d.\n\n%s') % \
                (user.username, project.id, '\n'.join(traceback.format_exception(exc_type, exc_value, exc_traceback)))
            print admin_email
            print admin_messagebody
                
            send_mail(u'Domain Checker - User Unrecoverable Error', admin_messagebody, reply_address, [admin_email])

            # Propagate error to Celery handler
            raise

        project.update_state()
        # If any domains require metrics retrieval, start the appropriate background task
        if project.state == u'measuring':
            update_project_metrics.delay(project.id)
Example #16
0
def extract_domains(file_content, fail_email, filename):
    """
    Takes the contents of an uploaded file and returns a series a tuple of results representing parsed domains of different types.  If any lines in the file cannot be parsed, an email notification is sent to the given address with details.

    Args:
      file_content (str): The raw content of the file.
      fail_email (str): Email address of user to notify if there is a fatal error
      filename (str): The original filename

    Returns:
      A tuple containing three items:
         1.  Correctly parsed domains
         2.  Domains that failed, are unrecognized or unregisterable
         3.  A list of lines in the file that could not be parsed
    """
    tlds = load_tlds()
    exclusions = load_exclusions()
    preservations = load_preservations()
    domain_list = set()
    ln = 0
    failed_lines = []
    failed_domains = []
    failed_set = set()
    for url in file_content.split('\n'):
        ln += 1
        logger.debug(type(url))
        # url = url.decode('utf-8')
        # url = unicode(url, errors='ignore')
        if len(url) == 0 or url[0] in '/\n':
            continue
        # logger.debug(url.strip())
        try:
            url = url.strip()
            if iponly_re.match(url) is not None:
                raise ValueError(u'IP only - no domain to extract')
            elif url.startswith('javascript:'):
                raise ValueError(u'Javascript hook')
            (tld_match, domain, full_domain) = remove_subdomains(url.strip(), tlds)
            tld = TLD.objects.get(domain=tld_match)
            if domain in failed_set:
                continue
            if not tld.is_recognized:
                failed_domains.append((domain, u'unregisterable', u'Unregisterable TLD (%s)' % tld_match))
                failed_set.add(domain)
            elif not tld.is_api_registerable:
                failed_domains.append((domain, u'unregisterable', u'TLD recognized but cannot be registered through the API (%s)'% tld_match))
                failed_set.add(domain)
            elif domain in exclusions:
                failed_domains.append((domain, u'unregisterable', u'Domain explicitly excluded (%s)' % domain))
                failed_set.add(domain)
            elif domain in preservations:
                failed_domains.append((full_domain, u'special', u'Domain is reserved for special processing (%s)' % domain))
                failed_set.add(full_domain)
            else:
                domain_list.add(domain)
        except ValueError as e:
            failed_lines.append((ln, url.strip(), str(e)))

    if len(failed_lines) > 0:
        error_email = u'The following domains failed while reading the file "%s":\n\n' % filename.encode('utf-8')
        for fd in failed_lines:
            error_email += u'Line %d: %s (%s)\n' % (fd[0], 
                fd[1],
                fd[2])
        logger.debug(error_email)
        send_mail(u'Domain Checker: Failed Domains', error_email, AdminSetting.get_value('noreply_address'), [fail_email])
    return (domain_list, failed_domains, failed_lines)
Example #17
0
def index(request):
    """
    View: The root page.
    """
    return render(request, 'main/index.html', {'allow_new_registrations' : AdminSetting.get_value('allow_new_registrations'),})
Example #18
0
def extract_domains(file_content, fail_email, filename):
    """
    Takes the contents of an uploaded file and returns a series a tuple of results representing parsed domains of different types.  If any lines in the file cannot be parsed, an email notification is sent to the given address with details.

    Args:
      file_content (str): The raw content of the file.
      fail_email (str): Email address of user to notify if there is a fatal error
      filename (str): The original filename

    Returns:
      A tuple containing three items:
         1.  Correctly parsed domains
         2.  Domains that failed, are unrecognized or unregisterable
         3.  A list of lines in the file that could not be parsed
    """
    tlds = load_tlds()
    exclusions = load_exclusions()
    preservations = load_preservations()
    domain_list = set()
    ln = 0
    failed_lines = []
    failed_domains = []
    failed_set = set()
    for url in file_content.split('\n'):
        ln += 1
        logger.debug(type(url))
        # url = url.decode('utf-8')
        # url = unicode(url, errors='ignore')
        if len(url) == 0 or url[0] in '/\n':
            continue
        # logger.debug(url.strip())
        try:
            url = url.strip()
            if iponly_re.match(url) is not None:
                raise ValueError(u'IP only - no domain to extract')
            elif url.startswith('javascript:'):
                raise ValueError(u'Javascript hook')
            (tld_match, domain,
             full_domain) = remove_subdomains(url.strip(), tlds)
            tld = TLD.objects.get(domain=tld_match)
            if domain in failed_set:
                continue
            if not tld.is_recognized:
                failed_domains.append((domain, u'unregisterable',
                                       u'Unregisterable TLD (%s)' % tld_match))
                failed_set.add(domain)
            elif not tld.is_api_registerable:
                failed_domains.append((
                    domain, u'unregisterable',
                    u'TLD recognized but cannot be registered through the API (%s)'
                    % tld_match))
                failed_set.add(domain)
            elif domain in exclusions:
                failed_domains.append(
                    (domain, u'unregisterable',
                     u'Domain explicitly excluded (%s)' % domain))
                failed_set.add(domain)
            elif domain in preservations:
                failed_domains.append(
                    (full_domain, u'special',
                     u'Domain is reserved for special processing (%s)' %
                     domain))
                failed_set.add(full_domain)
            else:
                domain_list.add(domain)
        except ValueError as e:
            failed_lines.append((ln, url.strip(), str(e)))

    if len(failed_lines) > 0:
        error_email = u'The following domains failed while reading the file "%s":\n\n' % filename.encode(
            'utf-8')
        for fd in failed_lines:
            error_email += u'Line %d: %s (%s)\n' % (fd[0], fd[1], fd[2])
        logger.debug(error_email)
        send_mail(u'Domain Checker: Failed Domains', error_email,
                  AdminSetting.get_value('noreply_address'), [fail_email])
    return (domain_list, failed_domains, failed_lines)