Ejemplo n.º 1
0
    def test_single_file_combined(self):
        agg = SimpleAggregator(
            {'source': FileAccessIndexSource(TEST_EXCL_PATH + 'list1.aclj')})
        access = AccessChecker(agg, default_access='block')

        edx = access.find_access_rule('http://example.com/abc/page.html')
        assert edx['urlkey'] == 'com,example)/abc/page.html'
        assert edx['access'] == 'allow'

        edx = access.find_access_rule('http://example.com/abc/page.htm')
        assert edx['urlkey'] == 'com,example)/abc'
        assert edx['access'] == 'block'

        edx = access.find_access_rule('http://example.com/abc/')
        assert edx['urlkey'] == 'com,example)/abc'
        assert edx['access'] == 'block'

        edx = access.find_access_rule('http://foo.example.com/')
        assert edx['urlkey'] == 'com,example,'
        assert edx['access'] == 'exclude'

        edx = access.find_access_rule('http://example.com/')
        assert edx['urlkey'] == 'com,'
        assert edx['access'] == 'allow'

        edx = access.find_access_rule('foo.net')
        assert edx['urlkey'] == ''
        assert edx['access'] == 'block'

        edx = access.find_access_rule('https://example.net/abc/path/other')
        assert edx['urlkey'] == ''
        assert edx['access'] == 'block'
Ejemplo n.º 2
0
    def test_excludes_dir(self):
        agg = DirectoryAccessSource(TEST_EXCL_PATH)

        access = AccessChecker(agg, default_access='block')

        edx = access.find_access_rule('http://example.com/')
        assert edx['urlkey'] == 'com,example)/'
        assert edx['access'] == 'allow'

        edx = access.find_access_rule('http://example.bo')
        assert edx['urlkey'] == 'bo,example)/'
        assert edx['access'] == 'exclude'

        edx = access.find_access_rule('https://example.com/foo/path')
        assert edx['urlkey'] == 'com,example)/foo'
        assert edx['access'] == 'exclude'

        edx = access.find_access_rule('https://example.net/abc/path/other')
        assert edx['urlkey'] == 'net,example)/abc/path'
        assert edx['access'] == 'block'

        # exact-only matchc
        edx = access.find_access_rule('https://www.iana.org/')
        assert edx['urlkey'] == 'org,iana)/###'
        assert edx['access'] == 'allow'

        edx = access.find_access_rule('https://www.iana.org/any/other')
        assert edx['urlkey'] == 'org,iana)/'
        assert edx['access'] == 'exclude'

        edx = access.find_access_rule('https://www.iana.org/x')
        assert edx['urlkey'] == 'org,iana)/'
        assert edx['access'] == 'exclude'
Ejemplo n.º 3
0
    def test_blocks_only(self):
        agg = SimpleAggregator(
            {'source': FileAccessIndexSource(TEST_EXCL_PATH + 'blocks.aclj')})
        access = AccessChecker(agg)

        edx = access.find_access_rule('https://example.com/foo')
        assert edx['urlkey'] == 'com,example)/foo'
        assert edx['access'] == 'exclude'

        edx = access.find_access_rule('https://example.com/food')
        assert edx['urlkey'] == 'com,example)/foo'
        assert edx['access'] == 'exclude'

        edx = access.find_access_rule('https://example.com/foo/path')
        assert edx['urlkey'] == 'com,example)/foo'
        assert edx['access'] == 'exclude'

        edx = access.find_access_rule('https://example.net/abc/path')
        assert edx['urlkey'] == 'net,example)/abc/path'
        assert edx['access'] == 'block'

        edx = access.find_access_rule('https://example.net/abc/path/other')
        assert edx['urlkey'] == 'net,example)/abc/path'
        assert edx['access'] == 'block'

        edx = access.find_access_rule('https://example.net/fo')
        assert edx['urlkey'] == ''
        assert edx['access'] == 'allow'
Ejemplo n.º 4
0
    def load_auto_colls(self):
        if not self.root_dir:
            print('No Root Dir, Skip Auto Colls!')
            return

        dir_source = CacheDirectoryIndexSource(base_prefix=self.root_dir,
                                               base_dir=self.index_paths,
                                               config=self.config)

        access_checker = AccessChecker(
            CacheDirectoryAccessSource(self.acl_paths), self.default_access)

        if self.dedup_index_url:
            source = SimpleAggregator({
                'dedup':
                RedisMultiKeyIndexSource(self.dedup_index_url),
                'dir':
                dir_source
            })

        else:
            source = dir_source

        return DefaultResourceHandler(source,
                                      self.archive_paths,
                                      rules_file=self.rules_file,
                                      access_checker=access_checker)
Ejemplo n.º 5
0
    def find_match(self, r):
        """Finds a matching acl rule

        :param argparse.Namespace r: Parsed result from ArgumentParser
        :rtype: None
        """
        access_checker = AccessChecker(self.acl_file, '<default>')
        rule = access_checker.find_access_rule(r.url, acl_user=r.user)

        print('Matched rule:')
        print('')
        if rule['urlkey'] == '':
            print('    <No Match, Using Default Rule>')
            print('')
        else:
            self.print_rule(rule)
Ejemplo n.º 6
0
    def load_coll(self, name, coll_config):
        if coll_config == '$all' and self.auto_handler:
            return self.auto_handler

        if isinstance(coll_config, str):
            index = coll_config
            archive_paths = None
            acl_paths = None
            default_access = self.default_access
        elif isinstance(coll_config, dict):
            index = coll_config.get('index')
            if not index:
                index = coll_config.get('index_paths')
            archive_paths = coll_config.get('archive_paths')
            acl_paths = coll_config.get('acl_paths')
            default_access = coll_config.get('default_access',
                                             self.default_access)
            surt_ordered = coll_config.get('surt_ordered', True)
        else:
            raise Exception('collection config must be string or dict')

        # INDEX CONFIG
        if index:
            agg = init_index_agg({name: index}, surt_ordered=surt_ordered)
        else:
            if not isinstance(coll_config, dict):
                raise Exception('collection config missing')

            sequence = coll_config.get('sequence')
            if sequence:
                return self.init_sequence(name, sequence)

            index_group = coll_config.get('index_group')
            if not index_group:
                raise Exception('no index, index_group or sequence found')

            timeout = int(coll_config.get('timeout', 0))
            agg = init_index_agg(index_group,
                                 True,
                                 timeout,
                                 surt_ordered=surt_ordered)

        # ARCHIVE CONFIG
        if not archive_paths:
            archive_paths = self.config.get('archive_paths')

        # ACCESS CONFIG
        access_checker = None
        if acl_paths:
            access_checker = AccessChecker(acl_paths, default_access)

        return DefaultResourceHandler(agg,
                                      archive_paths,
                                      rules_file=self.rules_file,
                                      access_checker=access_checker)
Ejemplo n.º 7
0
    def load_auto_colls(self):
        if not self.root_dir:
            print('No Root Dir, Skip Auto Colls!')
            return

        dir_source = CacheDirectoryIndexSource(base_prefix=self.root_dir,
                                               base_dir=self.index_paths,
                                               config=self.config)

        access_checker = AccessChecker(CacheDirectoryAccessSource(self.acl_paths),
                                       self.default_access)

        return DefaultResourceHandler(dir_source, self.archive_paths,
                                      rules_file=self.rules_file,
                                      access_checker=access_checker)
Ejemplo n.º 8
0
    def test_allows_only_default_block(self):
        agg = SimpleAggregator(
            {'source': FileAccessIndexSource(TEST_EXCL_PATH + 'allows.aclj')})
        access = AccessChecker(agg, default_access='block')

        edx = access.find_access_rule('http://example.net')
        assert edx['urlkey'] == 'net,'

        edx = access.find_access_rule('http://foo.example.net/abc')
        assert edx['urlkey'] == 'net,'

        edx = access.find_access_rule('https://example.net/test/')
        assert edx['urlkey'] == 'net,example)/test'

        edx = access.find_access_rule('https://example.org/')
        assert edx['urlkey'] == ''
        assert edx['access'] == 'block'

        edx = access.find_access_rule('https://abc.domain.net/path')
        assert edx['urlkey'] == 'net,domain,'

        edx = access.find_access_rule('https://domain.neta/path')
        assert edx['urlkey'] == ''
        assert edx['access'] == 'block'
Ejemplo n.º 9
0
    def test_excludes_dir(self):
        agg = DirectoryAccessSource(TEST_EXCL_PATH)

        access = AccessChecker(agg, default_access='block')

        edx = access.find_access_rule('http://example.com/')
        assert edx['urlkey'] == 'com,example)/'
        assert edx['access'] == 'allow'

        edx = access.find_access_rule('http://example.bo')
        assert edx['urlkey'] == 'bo,example)/'
        assert edx['access'] == 'exclude'

        edx = access.find_access_rule('https://example.com/foo/path')
        assert edx['urlkey'] == 'com,example)/foo'
        assert edx['access'] == 'exclude'

        edx = access.find_access_rule('https://example.net/abc/path/other')
        assert edx['urlkey'] == 'net,example)/abc/path'
        assert edx['access'] == 'block'

        # exact-only match
        edx = access.find_access_rule('https://www.iana.org/')
        assert edx['urlkey'] == 'org,iana)/###'
        assert edx['access'] == 'allow'

        edx = access.find_access_rule('https://www.iana.org/any/other')
        assert edx['urlkey'] == 'org,iana)/'
        assert edx['access'] == 'exclude'

        edx = access.find_access_rule('https://www.iana.org/x')
        assert edx['urlkey'] == 'org,iana)/'
        assert edx['access'] == 'exclude'

        # exact-only match, first line in *.aclj file
        edx = access.find_access_rule(
            'https://www.iana.org/exact/match/first/line/aclj/')
        assert edx['urlkey'] == 'org,iana)/exact/match/first/line/aclj###'
        assert edx['access'] == 'allow'

        # exact-only match, single rule in *.aclj file
        edx = access.find_access_rule('https://www.lonesome-rule.org/')
        assert edx['urlkey'] == 'org,lonesome-rule)/###'
        assert edx['access'] == 'allow'