def html_render(domain): """Render and return the html report.""" reports = dict([tools.analyse(domain)]) return flask.render_template( 'www/report.html', reports=reports, atoms=dict([(domain, tools.encode_domain(domain))]), exports={ 'json': 'json', 'csv': 'csv' }, domain_encoded=tools.encode_domain(domain), )
def test_unregister_tidies_database(self): """Tests that you can unregister domains.""" repository = dnstwister.repository domain = u'www.\u0454xample.com' get_param = tools.encode_domain(domain) assert not repository.is_domain_registered(domain) assert repository.db.data == {} self.app.get('/atom/{}'.format(get_param)) repository.update_delta_report( domain, { 'new': [('www.examp1e.com', '127.0.0.1')], 'updated': [], 'deleted': [], }, ) assert repository.is_domain_registered(domain) assert repository.db.data != {} repository.unregister_domain(domain) assert not repository.is_domain_registered(domain) assert repository.db.data == {}
def test_unicode_resolve(webapp): """Check we can resolve a unicode domain. """ domain = 'xn--sterreich-z7a.icom.museum'.decode('idna') hexdomain = tools.encode_domain(domain) response = webapp.get('/api/ip/{}'.format(hexdomain)) assert response.status_code == 200 payload = response.json ip_addr = payload['ip'] del payload['ip'] assert payload == { u'domain': u'xn--sterreich-z7a.icom.museum', u'domain_as_hexadecimal': u'786e2d2d7374657272656963682d7a37612e69636f6d2e6d757365756d', u'error': False, u'fuzz_url': u'http://localhost/api/fuzz/786e2d2d7374657272656963682d7a37612e69636f6d2e6d757365756d', u'parked_score_url': u'http://localhost/api/parked/786e2d2d7374657272656963682d7a37612e69636f6d2e6d757365756d', u'url': u'http://localhost/api/ip/786e2d2d7374657272656963682d7a37612e69636f6d2e6d757365756d' } # Will throw socket.error exception if this is not a valid IP address. socket.inet_aton(ip_addr)
def test_feed_reading_is_tracked(self): """Tests that reading a feed is logged.""" repository = dnstwister.repository domain = u'www.\u0454xample.com' get_param = tools.encode_domain(domain) # Read dates are None by default read_date = repository.delta_report_last_read(domain) assert read_date is None # Registering a feed will update the read date self.app.get('/atom/{}'.format(get_param)) read_date = repository.delta_report_last_read(domain) assert type(read_date) is datetime.datetime # Manually set the date to an older date so we don't have to 'sleep' # in the test. repository.mark_delta_report_as_read( domain, datetime.datetime(2000, 1, 1, 0, 0, 0)) # Clear the webapp cache dnstwister.cache.clear() # Reading a feed will update the read date read_date = repository.delta_report_last_read(domain) self.app.get('/atom/{}'.format(get_param)) read_date2 = repository.delta_report_last_read(domain) assert read_date2 > read_date
def test_unicode_atom(webapp): """Unicode should just work too, this is just a sanity check.""" unicode_domain = 'xn--plnt-1na.com'.decode('idna') # 'plànt.com' get_path = tools.encode_domain(unicode_domain) with pytest.raises(webtest.app.AppError) as err: webapp.get('/atom/{}'.format(get_path)) assert '404 NOT FOUND' in err.value.message assert 'New RSS feed generation currently disabled.' in err.value.message
def generate(): for result in tools.fuzzy_domains_iter(domain): domain_result = result.domain yield json.dumps({ 'd': domain_result, 'ed': tools.encode_domain(domain_result), 'pd': domain_result.encode('idna') }) + '\n\n'
def domain_to_hex(domain): """Helps you convert domains to hex.""" hexdomain = tools.encode_domain(domain) if tools.parse_domain(hexdomain) is None: flask.abort(400, 'Malformed domain.') payload = standard_api_values(domain, skip='domain_to_hex') payload['domain_as_hexadecimal'] = hexdomain return flask.jsonify(payload)
def test_api_domain_validation(webapp): """Test that domains are validated on all API endpoints.""" malformed_domain = 'example' endpoints = ('fuzz', 'to_hex', 'ip', 'parked', 'safebrowsing', 'whois') for endpoint in endpoints: with pytest.raises(webtest.app.AppError) as err: webapp.get('/api/{}/{}'.format( endpoint, tools.encode_domain(malformed_domain))) assert '400 BAD REQUEST' in err.value.message
def test_unicode_basics(webapp): """Test that Unicode domains work on all endpoints.""" unicode_domain = 'xn--sterreich-z7a.icom.museum'.decode('idna') endpoints = ('fuzz', 'ip', 'parked', 'safebrowsing', 'whois') for endpoint in endpoints: webapp.get('/api/{}/{}'.format( endpoint, tools.encode_domain(unicode_domain), )) webapp.get('/api/to_hex/{}'.format(unicode_domain.encode('idna')))
def _base64_redirect(encoded_domain): """Try to parse a domain into base64, return a redirect to the hex version if successful, otherwise None. """ try: decoded_domain = base64.b64decode(encoded_domain) if dnstwist.validate_domain(decoded_domain): return '/atom/{}'.format(tools.encode_domain(decoded_domain)) except: pass
def test_new_feed(self): """Tests the registration of a new feed - currently disabled.""" repository = dnstwister.repository # We need a domain to get the feed for. domain = u'www.\u0454xample.com' # A feed is registered by trying to load it (and it not already being # registered). with pytest.raises(webtest.app.AppError) as err: res = self.app.get('/atom/{}'.format(tools.encode_domain(domain))) assert '404 NOT FOUND' in err.value.message assert 'New RSS feed generation currently disabled.' in err.value.message
def search_post(): """Handle form submit.""" try: post_data = flask.request.form['domains'] except KeyError: app.logger.info('Missing "domains" key from POST: {}'.format( flask.request.form)) return flask.redirect('/error/2') if post_data is None or post_data.strip() == '': app.logger.info('No data in "domains" key in POST') return flask.redirect('/error/2') search_parameter = tools.encode_domain(post_data) if search_parameter is None: app.logger.info('Invalid POST Unicode data:{}'.format(repr(post_data))) return flask.redirect('/error/0') return flask.redirect('/search/{}'.format(search_parameter))
def handle_invalid_domain(search_term_as_hex): """Called when no valid domain found in GET param, creates a suggestion to return to the user. """ decoded_search = None try: decoded_search = tools.decode_domain(search_term_as_hex) except: pass if decoded_search is not None: suggestion = tools.suggest_domain(decoded_search) if suggestion is not None: app.logger.info( 'Not a valid domain in GET: {}, suggesting: {}'.format( search_term_as_hex, suggestion)) encoded_suggestion = tools.encode_domain(suggestion) return flask.redirect( '/error/0?suggestion={}'.format(encoded_suggestion)) app.logger.info('Not a valid domain in GET: {}'.format(search_term_as_hex)) return flask.redirect('/error/0')
def standard_api_values(domain, skip=''): """Return the set of key-value pairs for the api inter-relationships.""" payload = {} hexdomain = tools.encode_domain(domain) for endpoint in ENDPOINTS: if endpoint == skip: continue key = '{}_url'.format(endpoint) view_path = '.{}'.format(endpoint) path = flask.url_for(view_path, hexdomain=hexdomain) url = urlparse.urljoin(flask.request.url_root, path) payload[key] = url if skip != 'url': payload['url'] = flask.request.base_url if skip != 'domain': payload['domain'] = domain.encode('idna') if skip != 'domain_as_hexadecimal': payload['domain_as_hexadecimal'] = hexdomain return payload
def test_unicode_atom(webapp): """Unicode should just work too, this is just a sanity check.""" unicode_domain = 'xn--plnt-1na.com'.decode('idna') # 'plànt.com' get_path = tools.encode_domain(unicode_domain) webapp.get('/atom/{}'.format(get_path))
def test_decode_encoded_invalid_ascii_domain(): """Weird edge cases with non-domains that were causing issues.""" assert tools.encode_domain('example') == '6578616d706c65' assert tools.decode_domain('6578616d706c65') == 'example' assert tools.decode_domain(u'6578616d706c65') == 'example'
def test_encode_ascii_domain(): assert tools.encode_domain( 'www.example.com') == '7777772e6578616d706c652e636f6d'
def process_sub(sub_id, detail): """Process a subscription.""" domain = detail['domain'] email_address = detail['email_address'] hide_noisy = False try: hide_noisy = bool(detail['hide_noisy']) except KeyError: pass sub_log = sub_id[:10] # Ensure the domain is registered for reporting, register if not. repository.register_domain(domain) # Mark delta report as "read" so it's not unsubscribed. repository.mark_delta_report_as_read(domain) # Don't send more than once every 24 hours last_sent = repository.email_last_send_for_sub(sub_id) if last_sent is not None: age_last_sent = datetime.datetime.now() - last_sent if age_last_sent < datetime.timedelta(seconds=PERIOD): print '<24h: {}'.format(sub_log) return # Grab the delta delta = repository.get_delta_report(domain) if delta is None: print 'No delta: {}'.format(sub_log) return # Grab the delta report update time. delta_updated = repository.delta_report_updated(domain) # If the delta report was updated > 23 hours ago, we're too close to the # next delta report. This means we should hold off so we don't send the # same delta report twice. if delta_updated is not None: age_delta_updated = datetime.datetime.now() - delta_updated if age_delta_updated > datetime.timedelta(hours=23): print '>23h: {}'.format(sub_log) return # Filter out noisy domains if that's the user's preference. if hide_noisy and feature_flags.enable_noisy_domains(): delta = remove_noisy(delta) # Don't email if no changes new = delta['new'] if len(delta['new']) > 0 else None updated = delta['updated'] if len(delta['updated']) > 0 else None deleted = delta['deleted'] if len(delta['deleted']) > 0 else None if new is updated is deleted is None: print 'Empty: {}'.format(sub_log) return # Add analysis links if new is not None: new = [(dom, ip, ANALYSIS_ROOT.format(tools.encode_domain(dom))) for (dom, ip) in new] if updated is not None: updated = [(dom, old_ip, new_ip, ANALYSIS_ROOT.format(tools.encode_domain(dom))) for (dom, old_ip, new_ip) in updated] # Email noisy_link = None if hide_noisy and feature_flags.enable_noisy_domains(): noisy_link = 'https://dnstwister.report/email/{}/noisy'.format(sub_id) body = email_tools.render_email( 'report.html', domain=domain, new=new, updated=updated, deleted=deleted, unsubscribe_link='https://dnstwister.report/email/unsubscribe/{}'. format(sub_id), noisy_link=noisy_link) # Mark as emailed to ensure we don't flood if there's an error after the # actual email has been sent. repository.update_last_email_sub_sent_date(sub_id) emailer.send( email_address, u'dnstwister report for {}'.format( template_tools.domain_renderer(domain)), body) print 'Sent: {}'.format(sub_log)
def view(hexdomain): """Return new atom items for changes in resolved domains.""" # Parse out the requested domain domain = tools.parse_domain(hexdomain) # Redirect old base64 requests to the new format. if domain is None: redirect_url = _base64_redirect(hexdomain) if redirect_url is not None: return flask.redirect(redirect_url, code=302) flask.abort( 400, 'Malformed domain or domain not represented in hexadecimal format.' ) # Prepare a feed feed = werkzeug.contrib.atom.AtomFeed( title=u'dnstwister report for {}'.format( template_tools.domain_renderer(domain) ), feed_url='{}atom/{}'.format(flask.request.url_root, hexdomain), url='{}search/{}'.format(flask.request.url_root, hexdomain), ) # The publish/update date for the placeholder is locked to 00:00:00.000 # (midnight UTC) on the current day. today = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0 ) # Ensure the domain is registered. if not repository.is_domain_registered(domain): repository.register_domain(domain) # Retrieve the delta report delta_report = repository.get_delta_report(domain) # If we don't have a delta report yet, show the placeholder. if delta_report is None: feed.add( title=u'No report yet for {}'.format(template_tools.domain_renderer(domain)), title_type='text', content=flask.render_template( 'syndication/atom/placeholder.html', domain=domain ), content_type='html', author='dnstwister', updated=today, published=today, id=u'waiting:{}'.format(template_tools.domain_renderer(domain)), url=feed.url, ) else: # If there is a delta report, generate the feed and return it. We use # the actual date of generation here. updated = repository.delta_report_updated(domain) if updated is None: updated = today # Setting the ID to be epoch seconds, floored per 24 hours, ensure the # updates are only every 24 hours max. id_24hr = (updated - datetime.datetime(1970, 1, 1)).total_seconds() common_kwargs = { 'title_type': 'text', 'content_type': 'html', 'author': 'dnstwister', 'updated': updated, 'published': updated, 'url': feed.url, } for (dom, ip) in delta_report['new']: feed.add( title=u'NEW: {}'.format(template_tools.domain_renderer(dom)), content=flask.render_template( 'syndication/atom/new.html', ip=ip, hexdomain=tools.encode_domain(dom) ), id='new:{}:{}:{}'.format(dom.encode('idna'), ip, id_24hr), **common_kwargs ) for (dom, old_ip, new_ip) in delta_report['updated']: feed.add( title=u'UPDATED: {}'.format(template_tools.domain_renderer(dom)), content=flask.render_template( 'syndication/atom/updated.html', new_ip=new_ip, old_ip=old_ip, hexdomain=tools.encode_domain(dom), ), id='updated:{}:{}:{}:{}'.format( dom.encode('idna'), old_ip, new_ip, id_24hr ), **common_kwargs ) for dom in delta_report['deleted']: feed.add( title=u'DELETED: {}'.format(template_tools.domain_renderer(dom)), content=flask.render_template( 'syndication/atom/deleted.html', ), id='deleted:{}:{}'.format(dom.encode('idna'), id_24hr), **common_kwargs ) feed_response = feed.get_response() repository.mark_delta_report_as_read(domain) return feed_response
def test_encode_punycoded_domain(): punycode_domain = 'www.xampl.com-ehlf' assert tools.encode_domain( punycode_domain) == '7777772e78616d706c2e636f6d2d65686c66'
def test_new_feed(self): """Tests the registration of a new feed.""" repository = dnstwister.repository # We need a domain to get the feed for. domain = u'www.\u0454xample.com' # A feed is registered by trying to load it (and it not already being # registered). res = self.app.get('/atom/{}'.format(tools.encode_domain(domain))) # And only returns a single placeholder item. assert str(res) == textwrap.dedent(""" Response: 200 OK Content-Type: application/atom+xml; charset=utf-8 <?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> <title type="text">dnstwister report for www.\xd1\x94xample.com (www.xn--xample-9uf.com)</title> <id>http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d</id> <updated>{date_today}</updated> <link href="http://localhost:80/search/7777772e786e2d2d78616d706c652d3975662e636f6d" /> <link href="http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d" rel="self" /> <generator>Werkzeug</generator> <entry xml:base="http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d"> <title type="text">No report yet for www.\xd1\x94xample.com (www.xn--xample-9uf.com)</title> <id>waiting:www.\xd1\x94xample.com (www.xn--xample-9uf.com)</id> <updated>{date_today}</updated> <published>{date_today}</published> <link href="http://localhost:80/search/7777772e786e2d2d78616d706c652d3975662e636f6d" /> <author> <name>dnstwister</name> </author> <content type="html"><p> This is the placeholder for your dnstwister report for www.\xd1\x94xample.com (www.xn--xample-9uf.com). </p> <p> Your first report will be generated within 24 hours with all entries marked as "NEW". </p> <p> <strong>Important:</strong> The "delta" between each report is generated every 24 hours. If your feed reader polls this feed less often than that, you will miss out on changes. </p></content> </entry> </feed> """).strip().format(date_today=datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0).strftime( '%Y-%m-%dT%H:%M:%SZ')) # Clear the webapp cache dnstwister.cache.clear() # Until the first delta is actually created, this placeholder remains. res = self.app.get('/atom/{}'.format(tools.encode_domain(domain))) assert str(res) == textwrap.dedent(""" Response: 200 OK Content-Type: application/atom+xml; charset=utf-8 <?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> <title type="text">dnstwister report for www.\xd1\x94xample.com (www.xn--xample-9uf.com)</title> <id>http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d</id> <updated>{date_today}</updated> <link href="http://localhost:80/search/7777772e786e2d2d78616d706c652d3975662e636f6d" /> <link href="http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d" rel="self" /> <generator>Werkzeug</generator> <entry xml:base="http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d"> <title type="text">No report yet for www.\xd1\x94xample.com (www.xn--xample-9uf.com)</title> <id>waiting:www.\xd1\x94xample.com (www.xn--xample-9uf.com)</id> <updated>{date_today}</updated> <published>{date_today}</published> <link href="http://localhost:80/search/7777772e786e2d2d78616d706c652d3975662e636f6d" /> <author> <name>dnstwister</name> </author> <content type="html"><p> This is the placeholder for your dnstwister report for www.\xd1\x94xample.com (www.xn--xample-9uf.com). </p> <p> Your first report will be generated within 24 hours with all entries marked as "NEW". </p> <p> <strong>Important:</strong> The "delta" between each report is generated every 24 hours. If your feed reader polls this feed less often than that, you will miss out on changes. </p></content> </entry> </feed> """).strip().format(date_today=datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0).strftime( '%Y-%m-%dT%H:%M:%SZ')) # We can calculate a delta though. update_date = datetime.datetime(2016, 2, 28, 11, 10, 34) repository.update_delta_report( domain, { 'new': [(u'www.\u0454xampl\u0454.com', '127.0.0.1')], 'updated': [], 'deleted': [], }, update_date) # Clear the webapp cache dnstwister.cache.clear() res = self.app.get('/atom/{}'.format(tools.encode_domain(domain))) assert str(res) == textwrap.dedent(""" Response: 200 OK Content-Type: application/atom+xml; charset=utf-8 <?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> <title type="text">dnstwister report for www.\xd1\x94xample.com (www.xn--xample-9uf.com)</title> <id>http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d</id> <updated>2016-02-28T11:10:34Z</updated> <link href="http://localhost:80/search/7777772e786e2d2d78616d706c652d3975662e636f6d" /> <link href="http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d" rel="self" /> <generator>Werkzeug</generator> <entry xml:base="http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d"> <title type="text">NEW: www.\xd1\x94xampl\xd1\x94.com (www.xn--xampl-91ef.com)</title> <id>new:www.xn--xampl-91ef.com:127.0.0.1:1456657834.0</id> <updated>2016-02-28T11:10:34Z</updated> <published>2016-02-28T11:10:34Z</published> <link href="http://localhost:80/search/7777772e786e2d2d78616d706c652d3975662e636f6d" /> <author> <name>dnstwister</name> </author> <content type="html"><h1>IP: 127.0.0.1</h1> <a href="https://dnstwister.report/analyse/7777772e786e2d2d78616d706c2d393165662e636f6d">analyse</a></content> </entry> </feed> """).strip()
def test_updated_and_deleted_items_appear_in_rss(self): """Tests that updated and deleted items in delta reports appear in the RSS. """ repository = dnstwister.repository # We need a domain to get the feed for. domain = u'www.\u0454xample.com' # We can calculate a delta though. update_date = datetime.datetime(2016, 2, 28, 11, 10, 34) repository.update_delta_report( domain, { 'new': [('www.examp1e.com', '127.0.0.1')], 'updated': [(u'www\u0454xa.mple.com', '127.0.0.1', '127.0.0.2') ], 'deleted': [u'www.\u0454xampl\u0454.com', 'www2.example.com.au'], }, update_date) # Clear the webapp cache dnstwister.cache.clear() res = self.app.get('/atom/{}'.format(tools.encode_domain(domain))) assert str(res) == textwrap.dedent(""" Response: 200 OK Content-Type: application/atom+xml; charset=utf-8 <?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> <title type="text">dnstwister report for www.\xd1\x94xample.com (www.xn--xample-9uf.com)</title> <id>http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d</id> <updated>2016-02-28T11:10:34Z</updated> <link href="http://localhost:80/search/7777772e786e2d2d78616d706c652d3975662e636f6d" /> <link href="http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d" rel="self" /> <generator>Werkzeug</generator> <entry xml:base="http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d"> <title type="text">NEW: www.examp1e.com</title> <id>new:www.examp1e.com:127.0.0.1:1456657834.0</id> <updated>2016-02-28T11:10:34Z</updated> <published>2016-02-28T11:10:34Z</published> <link href="http://localhost:80/search/7777772e786e2d2d78616d706c652d3975662e636f6d" /> <author> <name>dnstwister</name> </author> <content type="html"><h1>IP: 127.0.0.1</h1> <a href="https://dnstwister.report/analyse/7777772e6578616d7031652e636f6d">analyse</a></content> </entry> <entry xml:base="http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d"> <title type="text">UPDATED: www\xd1\x94xa.mple.com (xn--wwwxa-d2e.mple.com)</title> <id>updated:xn--wwwxa-d2e.mple.com:127.0.0.1:127.0.0.2:1456657834.0</id> <updated>2016-02-28T11:10:34Z</updated> <published>2016-02-28T11:10:34Z</published> <link href="http://localhost:80/search/7777772e786e2d2d78616d706c652d3975662e636f6d" /> <author> <name>dnstwister</name> </author> <content type="html"><h1>IP: 127.0.0.1 &gt; 127.0.0.2</h1> <a href="https://dnstwister.report/analyse/786e2d2d77777778612d6432652e6d706c652e636f6d">analyse</a></content> </entry> <entry xml:base="http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d"> <title type="text">DELETED: www.\xd1\x94xampl\xd1\x94.com (www.xn--xampl-91ef.com)</title> <id>deleted:www.xn--xampl-91ef.com:1456657834.0</id> <updated>2016-02-28T11:10:34Z</updated> <published>2016-02-28T11:10:34Z</published> <link href="http://localhost:80/search/7777772e786e2d2d78616d706c652d3975662e636f6d" /> <author> <name>dnstwister</name> </author> </entry> <entry xml:base="http://localhost:80/atom/7777772e786e2d2d78616d706c652d3975662e636f6d"> <title type="text">DELETED: www2.example.com.au</title> <id>deleted:www2.example.com.au:1456657834.0</id> <updated>2016-02-28T11:10:34Z</updated> <published>2016-02-28T11:10:34Z</published> <link href="http://localhost:80/search/7777772e786e2d2d78616d706c652d3975662e636f6d" /> <author> <name>dnstwister</name> </author> </entry> </feed> """).strip()
def test_encode_bonkers_unicode(): """Some unicode is not "valid".""" unicode_domain = u'a\uDFFFa.com' assert tools.encode_domain(unicode_domain) is None
def test_encode_unicode_domain(): unicode_domain = u'www.\u0454xampl\u0454.com' # www.xn--xampl-91ef.com in hex assert tools.encode_domain( unicode_domain) == '7777772e786e2d2d78616d706c2d393165662e636f6d'