def test_replace_views(): dataset = DataSet() adapter = dataset.add_adapter(MockAdapter()) no_managers = SelectionOp( LoadOp('bogus'), IsOp(Var('manager_id'), NullConst()) ) dataset.create_view( 'no_managers', no_managers ) view = AliasOp('no_managers', no_managers) compare( replace_views(LoadOp('no_managers'), dataset), view ) compare( replace_views( JoinOp( LoadOp('no_managers'), LoadOp('no_managers') ), dataset ), JoinOp( view, view ) )
def test_relpace_views(): dataset = DataSet() dataset.add_adapter(EmployeeAdapter()) no_managers = SelectionOp( LoadOp('employees'), IsOp(Var('manager_id'), NullConst()) ) dataset.create_view( 'no_managers', no_managers ) eq_( replace_views(LoadOp('no_managers'), dataset), no_managers ) eq_( replace_views( JoinOp( LoadOp('no_managers'), LoadOp('no_managers') ), dataset ), JoinOp( no_managers, no_managers ) )
def test_replace_views(): dataset = DataSet() adapter = dataset.add_adapter(MockAdapter()) no_managers = SelectionOp(LoadOp('bogus'), IsOp(Var('manager_id'), NullConst())) dataset.create_view('no_managers', no_managers) view = AliasOp('no_managers', no_managers) compare(replace_views(LoadOp('no_managers'), dataset), view) compare( replace_views(JoinOp(LoadOp('no_managers'), LoadOp('no_managers')), dataset), JoinOp(view, view))
def test_replace_view_within_a_view(): dataset = DataSet() adapter = dataset.add_adapter(MockAdapter()) dataset.create_view( 'view1', LoadOp('bogus') ) dataset.create_view( 'view2', LoadOp('view1') ) dataset.create_view( 'view3', SelectionOp(LoadOp('view2'), IsOp(Var('x'), NullConst())) ) v1 = replace_views( LoadOp('view3'), dataset ) compare( v1, AliasOp('view3', SelectionOp( AliasOp('view2', AliasOp('view1',LoadOp('bogus'))), IsOp(Var('x'), NullConst()) ) ) )
def init(**tables): """ Returns a dataset to work with the discodb specified by path """ dataset = DataSet() dataset.add_server(DiscoDBServer(**tables)) dataset.add_server(FileServer( common_crawl=dict( root_dir=join(dirname(__file__), 'data'), pattern="sample.arc.gz", decode="application/x-arc", #description="Raw documents from http://commoncrawl.org" ) )) dataset.add_server(FileServer( top_sites=dict( #description="Top Sites as reported by Alexa", root_dir=join(dirname(__file__), 'data'), pattern="alexa-top1m-{date}.csv", decode="auto", schema=dict( fields=[ dict(name="date", type="DATE"), dict(name="rank", type="STRING"), dict(name="site", type="STRING") ] ) ) )) dataset.frm('top_sites').limit(10).create_view('top_10') dataset.create_view( 'outbound_links', "select link_to, count() " "from flatten(docs, 'link_to') " "group by link_to order by count desc" ) dataset.create_view( 'scripts', "select scripts as script, count() " "from flatten(docs, 'scripts') " "group by script order by count desc" ) dataset.create_view( 'servers', "select headers_value as server_name, count() " "from flatten(docs, 'headers') " "where headers_name = 'Server' " "group by server_name order by count desc" ) return dataset
def test_replace_view_within_a_view(): dataset = DataSet() adapter = dataset.add_adapter(MockAdapter()) dataset.create_view('view1', LoadOp('bogus')) dataset.create_view('view2', LoadOp('view1')) dataset.create_view( 'view3', SelectionOp(LoadOp('view2'), IsOp(Var('x'), NullConst()))) v1 = replace_views(LoadOp('view3'), dataset) compare( v1, AliasOp( 'view3', SelectionOp(AliasOp('view2', AliasOp('view1', LoadOp('bogus'))), IsOp(Var('x'), NullConst()))))