def web_screenshot_extraction(sample_id, url=None, *args, **kwargs): """ Generates html output from those browsers. """ if url is None: url = Sample.objects.get(id=sample_id).url if not is_proper_url(url): return False sample = Sample.objects.get(id=sample_id) try: screenshot = get_web_screenshot(url) Sample.objects.filter(id=sample_id).update(screenshot=screenshot) send_event( "EventSampleScreenshotDone", sample_id=sample_id, sample_url=sample.url, job_id=sample.job_id, ) except BaseWebkitException, e: send_event( "EventSampleScreenshotFail", sample_id=sample_id, sample_url=sample.url, job_id=sample.job_id, error_code=e.status_code, ) return False
def web_content_extraction(sample_id, url=None, *args, **kwargs): """ Links/lynx required. Generates html output from those browsers. """ if url is None: url = Sample.objects.get(id=sample_id).url if not is_proper_url(url): return False sample = Sample.objects.get(id=sample_id) try: text = get_web_text(url) Sample.objects.filter(id=sample_id).update(text=text) send_event( "EventSampleContentDone", sample_id=sample_id, sample_url=sample.url, job_id=sample.job_id, ) except subprocess.CalledProcessError, e: # Something wrong has happened to links. Couldn't find documentation on # error codes - assume bad stuff has happened that retrying won't fix. send_event( 'EventSampleContentFail', sample_id=sample_id, sample_url=sample.url, job_id=sample.job_id, error_code=e.returncode ) return False
def testURLCheck(self): tests = [ ('127.0.0.1', False), (':10', False), (':', False), ('10.0.0.1:2414', False), ('172.16.0.1:21021', False), ('192.168.0.100', False), ('213.241.87.50', True), ('213.241.87.50:80', True), ('213.241.87.50:232232', True), ] for test in tests: self.assertEqual(is_proper_url(test[0]), test[1])