Example #1
0
def identify_rfu(tempdir):
    """
    Given a temporary directory, attempts to distinguish CMS' from non-CMS
    websites and from each other.

    If a single CMS file is identified, then no hashing is performed and the
    file is assumed to be of that particular CMS. False positives will be weeded
    during the version detection phase.

    If all files requested were responded with 200 OK, the site is discarded.
    This is a design decision I might reconsider if it results in too many false
    negatives.

    @param tempfile: as returned by download_rfu.
    @return: DeferredList
    """
    rfu = pu.get_rfu()
    plugins = pu.plugins_base_get()
    files_found = async.rfu_path(tempdir, plugins)

    if len(rfu) == len(files_found):
        msg = "Url responded 200 OK to everything"
        return defer.fail(UnknownCMSException(msg))

    cms_name = identify_rfu_easy(tempdir, files_found)
    if cms_name:
        return defer.succeed(cms_name)

    return defer.fail(UnknownCMSException("This shouldn't happen too often."))
Example #2
0
def download_rfu(base_url, host_header):
    """
    Download all "regular file urls" for all CMS.
    @param base_url:
    @param host_header:
    @return DeferredList
    """
    def ret_result(results, tempdir, location):
        succ = filter(lambda r: r[0], results)
        if len(succ) == 0:
            msg = "'%s' not identified as any CMS."
            return Failure(UnknownCMSException(msg % str(location)))
        else:
            return tempdir

    def clean(fail, tempdir):
        delete_tempdir(tempdir)
        return fail

    tempdir = mkdtemp(prefix='dscan') + "/"
    required_files = pu.get_rfu()

    ds = []
    for f in required_files:
        url = base_url + f
        download_location = tempdir + async.filename_encode(f)
        d = async.download_url(url, host_header, download_location)
        ds.append(d)

    dl = defer.DeferredList(ds, consumeErrors=True)
    dl.addCallback(ret_result, tempdir, (base_url, host_header))
    dl.addErrback(clean, tempdir)

    return dl
Example #3
0
def identify_rfu(tempdir):
    """
    Given a temporary directory, attempts to distinguish CMS' from non-CMS
    websites and from each other.

    If a single CMS file is identified, then no hashing is performed and the
    file is assumed to be of that particular CMS. False positives will be weeded
    during the version detection phase.

    If all files requested were responded with 200 OK, the site is discarded.
    This is a design decision I might reconsider if it results in too many false
    negatives.

    @param tempfile: as returned by download_rfu.
    @return: DeferredList
    """
    rfu = pu.get_rfu()
    plugins = pu.plugins_base_get()
    files_found = async .rfu_path(tempdir, plugins)

    if len(rfu) == len(files_found):
        msg = "Url responded 200 OK to everything"
        return defer.fail(UnknownCMSException(msg))

    cms_name = identify_rfu_easy(tempdir, files_found)
    if cms_name:
        return defer.succeed(cms_name)

    return defer.fail(UnknownCMSException("This shouldn't happen too often."))
Example #4
0
def download_rfu(base_url, host_header):
    """
    Download all "regular file urls" for all CMS.
    @param base_url:
    @param host_header:
    @return DeferredList
    """
    def ret_result(results, tempdir, location):
        succ = filter(lambda r: r[0], results)
        if len(succ) == 0:
            msg = "'%s' not identified as any CMS."
            return Failure(UnknownCMSException(msg % str(location)))
        else:
            return tempdir

    def clean(fail, tempdir):
        delete_tempdir(tempdir)
        return fail

    tempdir = mkdtemp(prefix='dscan') + "/"
    required_files = pu.get_rfu()

    ds = []
    for f in required_files:
        url = base_url + f
        download_location = tempdir + async .filename_encode(f)
        d = async .download_url(url, host_header, download_location)
        ds.append(d)

    dl = defer.DeferredList(ds, consumeErrors=True)
    dl.addCallback(ret_result, tempdir, (base_url, host_header))
    dl.addErrback(clean, tempdir)

    return dl
Example #5
0
    def test_identify_calls_all_rfu(self):
        rfu = pu.get_rfu()
        with patch(ASYNC + 'download_url', autospec=True) as du:
            identify_url(self.base_url, None)

            self.assertEquals(du.call_count, len(rfu))
            for i, call in enumerate(du.call_args_list):
                args, kwargs = call
                self.assertEquals(args[0], self.base_url + rfu[i])
                self.assertTrue(args[2].endswith(async.filename_encode(rfu[i])))
Example #6
0
    def test_identify_calls_all_rfu(self):
        rfu = pu.get_rfu()
        with patch(ASYNC + 'download_url', autospec=True) as du:
            identify_url(self.base_url, None)

            self.assertEquals(du.call_count, len(rfu))
            for i, call in enumerate(du.call_args_list):
                args, kwargs = call
                self.assertEquals(args[0], self.base_url + rfu[i])
                self.assertTrue(args[2].endswith(async .filename_encode(
                    rfu[i])))
Example #7
0
    def test_identify_rfu_single_file(self):
        rfu = pu.get_rfu()
        fake_dir = '/tmp/dsadasdadaa/'
        joomla_file = fake_dir + async.filename_encode("media/system/js/validate.js")
        def isfile(path):
            if path == joomla_file:
                return True
            else:
                return False

        with patch("os.path.isfile", side_effect=isfile, autospec=True) as if_mock:
            d = identify_rfu(fake_dir)
            cms_name = self.successResultOf(d)

            self.assertEquals(cms_name, "joomla")
            self.assertEquals(if_mock.call_count, len(rfu))
Example #8
0
    def test_identify_raises_when_none_found(self, rt, mt, isdir):
        ret = '/tmp/lelelellee'
        mt.return_value = ret

        def fail(*args, **kwargs):
            return f()

        rfu = pu.get_rfu()
        with patch(ASYNC + 'download_url', side_effect=fail, autospec=True) as du:
            with patch(ASYNC_SCAN + 'identify_rfu') as ir:
                self.assertFailure(identify_url(self.base_url, None),
                        UnknownCMSException)
                self.assertEquals(ir.call_count, 0)

                args, kwargs = rt.call_args
                self.assertEquals(rt.call_count, 1)
                self.assertEquals(mt.call_count, 1)
                self.assertEquals(args[0], ret + "/")
Example #9
0
    def test_identify_rfu_single_file(self):
        rfu = pu.get_rfu()
        fake_dir = '/tmp/dsadasdadaa/'
        joomla_file = fake_dir + async .filename_encode(
            "media/system/js/validate.js")

        def isfile(path):
            if path == joomla_file:
                return True
            else:
                return False

        with patch("os.path.isfile", side_effect=isfile,
                   autospec=True) as if_mock:
            d = identify_rfu(fake_dir)
            cms_name = self.successResultOf(d)

            self.assertEquals(cms_name, "joomla")
            self.assertEquals(if_mock.call_count, len(rfu))
Example #10
0
    def test_identify_raises_when_none_found(self, rt, mt, isdir):
        ret = '/tmp/lelelellee'
        mt.return_value = ret

        def fail(*args, **kwargs):
            return f()

        rfu = pu.get_rfu()
        with patch(ASYNC + 'download_url', side_effect=fail,
                   autospec=True) as du:
            with patch(ASYNC_SCAN + 'identify_rfu') as ir:
                self.assertFailure(identify_url(self.base_url, None),
                                   UnknownCMSException)
                self.assertEquals(ir.call_count, 0)

                args, kwargs = rt.call_args
                self.assertEquals(rt.call_count, 1)
                self.assertEquals(mt.call_count, 1)
                self.assertEquals(args[0], ret + "/")