def test_and_filter(ctx): from melk.util.dibject import Dibject, dibjectify from melkman.filters import NewsItemFilterFactory filter_factory = NewsItemFilterFactory(ctx.component_manager) cfg = { 'filters': [{'op': 'match_field', 'config': {'field': 'foo', 'match_type': 'substring', 'values': ['ap', 'ban']}}, {'op': 'match_field', 'config': {'field': 'bar', 'match_type': 'regex', 'values': ['^\d+$', '^foo.*bar$']}}]} filt = filter_factory.create_filter('and', cfg) assert filt(dummy_news_item({'details': {'foo': 'crabapple', 'bar': '123'}})) assert not filt(dummy_news_item({'details': {'foo': 'crabapple', 'bar': 'abc'}})) assert not filt(dummy_news_item({'details': {'foo': 'abc', 'bar': '123'}})) assert not filt(dummy_news_item({'details': {'foo': 'abc', 'bar': 'abc'}})) cfg = {'filters': []} filt = filter_factory.create_filter('and', cfg) assert not filt(dummy_news_item({}))
def test_filter_chain(ctx): from melk.util.dibject import Dibject, dibjectify from melkman.filters import NewsItemFilterFactory, ACCEPT_ITEM, REJECT_ITEM filter_factory = NewsItemFilterFactory(ctx.component_manager) chain = [ {'op': 'match_author', 'config': {'values': ['fred']}, 'action': 'reject'}, {'op': 'and', 'config': { 'filters': [ {'op': 'match_author', 'config': {'values': ['barney']}}, {'op': 'match_field', 'config': {'field': 'foo', 'values': ['bar']}}]}, 'action': 'accept'}, {'op': 'match_field', 'config': {'field': 'foo', 'values': ['bar']}, 'action': 'reject'}, {'op': 'match_all', 'config': {}, 'action': 'accept'} ] chain = [dibjectify(x) for x in chain] chain = filter_factory.create_chain(chain) assert chain(dummy_news_item({'author': 'fred'})) == REJECT_ITEM assert chain(dummy_news_item({})) == ACCEPT_ITEM assert chain(dummy_news_item({'details': {'foo': 'bar'}})) == REJECT_ITEM assert chain(dummy_news_item({'author': 'barney', 'details': {'foo': 'bar'}})) == ACCEPT_ITEM
def test_source_filter(ctx): from melk.util.dibject import Dibject from melkman.filters import NewsItemFilterFactory filter_factory = NewsItemFilterFactory(ctx.component_manager) cfg = {'values': ['http://example.org/Feed1', 'http://example.org/Feed2']} filt = filter_factory.create_filter('match_source', cfg) assert filt is not None assert filt(dummy_news_item({'source_url': 'http://example.org/Feed1'})) assert filt(dummy_news_item({'source_url': 'http://example.org/Feed2'})) assert filt(dummy_news_item({'source_url': 'http://example.org:80/FeEd1'})) assert not filt(dummy_news_item({'source_url': 'http://foo.example.org/Feed2'})) cfg = {'values': ['^http://(.*\.)?example.org/Feed\d$'], 'match_type': 'regex'} filt = filter_factory.create_filter('match_source', cfg) assert filt is not None for i in range(5): assert filt(dummy_news_item({'source_url': 'http://example.org/Feed%d' % i})) assert not filt(dummy_news_item({'source_url': 'http://example.org/FeedJ'})) for i in range(2): for j in range(2): assert filt(dummy_news_item({'source_url': 'http://server%d.example.org/Feed%d' % (j,i)}))
def _filtered_update(composite, news_items, ctx): from melkman.filters import NewsItemFilterFactory, ACCEPT_ITEM, REJECT_ITEM filter_factory = NewsItemFilterFactory(ctx.component_manager) filt = filter_factory.create_chain(composite.filters) accepts = [] rejects = [] for item in news_items: result = filt(item) if result == ACCEPT_ITEM: accepts.append(item) else: if result != REJECT_ITEM: log.warn('Unsupported filter action: %s -- rejecting' % result) rejects.append(item) updated_items = 0 for item in accepts: if composite.add_news_item(item): updated_items += 1 if len(rejects) > 0: reject_bucket = composite.get_rejected(ctx.db) if reject_bucket is not None: for item in rejects: reject_bucket.add_news_item(item) log.info("filtered update to %s accepted %d (%d new), rejected %d" % (composite.id, len(accepts), updated_items, len(rejects))) return updated_items
def test_title_filter(ctx): from melk.util.dibject import Dibject from melkman.filters import NewsItemFilterFactory filter_factory = NewsItemFilterFactory(ctx.component_manager) cfg = {'values': ['Fred', 'Barney']} filt = filter_factory.create_filter('match_title', cfg) assert filt is not None assert filt(dummy_news_item({'title': 'fred'})) assert filt(dummy_news_item({'title': 'frED'})) assert filt(dummy_news_item({'title': 'barney'})) assert filt(dummy_news_item({'title': 'bArnEy'})) assert not filt(dummy_news_item({'title': 'blurney'}))
def test_tag_filter(ctx): from melk.util.dibject import Dibject from melkman.filters import NewsItemFilterFactory filter_factory = NewsItemFilterFactory(ctx.component_manager) cfg = {'values': ['soup', 'nuts']} filt = filter_factory.create_filter('match_tag', cfg) assert filt is not None assert not filt(dummy_news_item({})) assert filt(dummy_news_item({'details': {'tags': [{'label': 'soup'}]}})) assert filt(dummy_news_item({'details': {'tags': [{'label': 'nuts'}]}})) assert filt(dummy_news_item({'details': {'tags': [{'term': 'soup'}]}})) assert filt(dummy_news_item({'details': {'tags': [{'term': 'nuts'}]}})) assert filt(dummy_news_item({'details': {'tags': [{'label': 'fruit'}, {'label': 'soup'}]}})) assert not filt(dummy_news_item({'details': {'tags': [{'label': 'fruit'}]}}))
def test_content_filter(ctx): from melk.util.dibject import Dibject from melkman.filters import NewsItemFilterFactory filter_factory = NewsItemFilterFactory(ctx.component_manager) test_content = """ <div> Here they are <ul> <li> Fred </li> <li> <a href="http://flintstones.net/br">Bar<i>ney</i></a>... </li> </ul> </div> """ content_field = { 'type': 'text/html', 'value': test_content } cfg = {'values': ['Fred'], 'match_type': 'substring'} filt = filter_factory.create_filter('match_content', cfg) assert not filt(dummy_news_item({})) assert filt(dummy_news_item({'details': {'content': [content_field]}})) assert filt(dummy_news_item({'details': {'summary': '', 'summary_detail': content_field}})) cfg = {'values': ['Barney'], 'match_type': 'substring'} filt = filter_factory.create_filter('match_content', cfg) assert filt(dummy_news_item({'details': {'content': [content_field]}})) assert filt(dummy_news_item({'details': {'summary': '', 'summary_detail': content_field}})) cfg = {'values': ['Wilma'], 'match_type': 'substring'} filt = filter_factory.create_filter('match_content', cfg) assert not filt(dummy_news_item({'details': {'content': [content_field]}})) assert not filt(dummy_news_item({'details': {'summary': '', 'summary_detail': content_field}}))
def test_match_filter(ctx): from melk.util.dibject import Dibject, dibjectify from melkman.filters import NewsItemFilterFactory filter_factory = NewsItemFilterFactory(ctx.component_manager) cfgs = [] cfgs.append({'field': 'foo.bar', 'match_type': 'exact', 'values': ['apple', 'banana']}) cfgs.append({'field': 'foo.bar', 'values': ['apple', 'banana']}) for cfg in cfgs: filt = filter_factory.create_filter('match_field', cfg) assert filt(dummy_news_item({'details': {'foo': {'bar': 'apple'}}})) assert filt(dummy_news_item({'details': {'foo': {'bar': 'banana'}}})) assert not filt(dummy_news_item({'details': {'foo': {'bar': 'applehead'}}})) assert not filt(dummy_news_item({'details': {'foo': {'bar': {'quux': 'zoo'}}}})) assert not filt(dummy_news_item({'details': {'foo': 'bar'}})) cfg = {'field': 'foo.bar', 'match_type': 'substring', 'values': ['ap', 'ban']} filt = filter_factory.create_filter('match_field', cfg) assert filt(dummy_news_item({'details': {'foo': {'bar': 'crabapple'}}})) assert filt(dummy_news_item({'details': {'foo': {'bar': 'nanbana'}}})) assert not filt(dummy_news_item({'details': {'foo': {'bar': 'cran'}}})) cfg = {'field': 'foo.bar', 'match_type': 'regex', 'values': ['^\d+$', '^foo.*bar$']} filt = filter_factory.create_filter('match_field', cfg) assert filt(dummy_news_item({'details': {'foo': {'bar': '123'}}})) assert filt(dummy_news_item({'details': {'foo': {'bar': 'fooqqqbar'}}})) assert not filt(dummy_news_item({'details': {'foo': {'bar': '123foo'}}}))
def test_filter_plugin(ctx): from giblets import Component, implements from melkman.filters import NewsItemFilterFactory, INewsItemFilterFactory filter_factory = NewsItemFilterFactory(ctx.component_manager) def dummy(item): return False class FilterMaker(Component): implements(INewsItemFilterFactory) def handles_type(self, filter_type): if filter_type in ['foo_filt', 'bar_filt']: return True return False def create_filter(self, filter_type, config): if filter_type in ['foo_filt', 'bar_filt']: return dummy assert filter_factory.create_filter('foo_filt', {}) is not None assert filter_factory.create_filter('bar_filt', {}) is not None assert filter_factory.create_filter('quux_filt', {}) is None