Example #1
0
def web_screenshot_extraction(sample_id, url=None, *args, **kwargs):
    """ Generates html output from those browsers.
    """
    if url is None:
        url = Sample.objects.get(id=sample_id).url

    if not is_proper_url(url):
        return False

    sample = Sample.objects.get(id=sample_id)
    try:
        screenshot = get_web_screenshot(url)
        Sample.objects.filter(id=sample_id).update(screenshot=screenshot)

        send_event(
            "EventSampleScreenshotDone",
            sample_id=sample_id,
            sample_url=sample.url,
            job_id=sample.job_id,
        )
    except BaseWebkitException, e:
        send_event(
            "EventSampleScreenshotFail",
            sample_id=sample_id,
            sample_url=sample.url,
            job_id=sample.job_id,
            error_code=e.status_code,
        )
        return False
Example #2
0
def web_content_extraction(sample_id, url=None, *args, **kwargs):
    """ Links/lynx required. Generates html output from those browsers.
    """
    if url is None:
        url = Sample.objects.get(id=sample_id).url

    if not is_proper_url(url):
        return False

    sample = Sample.objects.get(id=sample_id)

    try:
        text = get_web_text(url)

        Sample.objects.filter(id=sample_id).update(text=text)
        send_event(
            "EventSampleContentDone",
            sample_id=sample_id,
            sample_url=sample.url,
            job_id=sample.job_id,
        )
    except subprocess.CalledProcessError, e:
        # Something wrong has happened to links. Couldn't find documentation on
        # error codes - assume bad stuff has happened that retrying won't fix.
        send_event(
            'EventSampleContentFail',
            sample_id=sample_id,
            sample_url=sample.url,
            job_id=sample.job_id,
            error_code=e.returncode
        )
        return False
Example #3
0
    def testURLCheck(self):
        tests = [
            ('127.0.0.1', False),
            (':10', False),
            (':', False),
            ('10.0.0.1:2414', False),
            ('172.16.0.1:21021', False),
            ('192.168.0.100', False),
            ('213.241.87.50', True),
            ('213.241.87.50:80', True),
            ('213.241.87.50:232232', True),
        ]

        for test in tests:
            self.assertEqual(is_proper_url(test[0]), test[1])
Example #4
0
    def testURLCheck(self):
        tests = [
            ('127.0.0.1', False),
            (':10', False),
            (':', False),
            ('10.0.0.1:2414', False),
            ('172.16.0.1:21021', False),
            ('192.168.0.100', False),
            ('213.241.87.50', True),
            ('213.241.87.50:80', True),
            ('213.241.87.50:232232', True),
        ]

        for test in tests:
            self.assertEqual(is_proper_url(test[0]), test[1])