aselect = Select( aggregate.output(), UniversalSelect( aggregate.output().schema(), { 'name_age': { 'type': str, 'args': ['name', 'age'], 'function': lambda name, age: '%s --> %d' % (name, age), } } ) ) joiner = Join(qselect.output(), aselect.output()) result_stack = ResultStack( # aggregate.output(), joiner.output(), # query_streamer.output(), # query_grouper.output(), # select.output(), ) info_queue = Queue() def manage(task): print 'Running: ' + str(task) task.run()
# the query. counties_select = Select( channel, UniversalSelect( channel.schema(), { 'geonames.location': { 'type': Geometry, 'args': ['counties.the_geom'], 'function': lambda v: intersection(v, query), } })) engines.append(counties_select) geonames_source = Rtree(geonames_file, 'geonames.location') # Data accessor for the geonames. geonames_accessor = DataAccessor(counties_select.output(), geonames_source, FindRange) engines.append(geonames_accessor) # XXX At this point no additional filter for the contraining the # geonames to the query region is required. # Send '1' for each retrieved geoname location. geonames_select = Select( geonames_accessor.output(), UniversalSelect( geonames_accessor.output().schema(), { 'count': { 'type': int, 'args': ['geonames.location'], 'function': lambda v: 1
family_accessor.output(), UniversalSelect( family_accessor.output().schema(), { 'genus.family_id': { 'type': int, 'args': ['family.id'], 'function': lambda v: v } })) engines.append(family_id_select) # Data source for the genera. genus_source = DBTable(input_file, 'genus', genus_schema) # Data accessor for the genera data source. genus_accessor = DataAccessor(family_id_select.output(), genus_source, FindIdentities) engines.append(genus_accessor) # A join mini-engine to associate families with genera. family_genus_joiner = Join( family_id_grouper.output(), genus_accessor.output(), ) engines.append(family_genus_joiner) # A group mini-engine to split the (family, genus) IDs into groups. family_genus_id_grouper = Group( family_genus_joiner.output(), { 'family.id': lambda a, b: a == b,
'type': Geometry, 'args': ['states.the_geom'], 'function': lambda v: intersection(v, query), }, # keep OID 'states.oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v, } })) engines.append(states_select) # Only keep the geometry for querying states_query = Select( states_select.output(), UniversalSelect( states_select.output().schema(), { 'counties.the_geom': { 'type': Geometry, 'args': ['states.the_geom'], 'function': lambda v: v, }, })) engines.append(states_query) # Finally query the counties counties_source = Rtree(counties_file, 'counties.the_geom') counties_accessor = DataAccessor( states_query.output(), counties_source,
UniversalSelect( channel.schema(), { "geonames.location": { "type": Geometry, "args": ["counties.the_geom"], "function": lambda v: intersection(v, query), } }, ), ) engines.append(counties_select) geonames_source = Rtree(geonames_file, "geonames.location") # Data accessor for the geonames. geonames_accessor = DataAccessor(counties_select.output(), geonames_source, FindRange) engines.append(geonames_accessor) # XXX At this point no additional filter for the contraining the # geonames to the query region is required. # Send '1' for each retrieved geoname location. geonames_select = Select( geonames_accessor.output(), UniversalSelect( geonames_accessor.output().schema(), {"count": {"type": int, "args": ["geonames.location"], "function": lambda v: 1}}, ), ) engines.append(geonames_select)
# create a data accessor data_accessor = DataAccessor( query_streamer.output(), data_source, FindRange ) name_age_combiner = NameAgeCombiner(data_accessor.output().schema()) select = Select(data_accessor.output(), name_age_combiner) query_grouper = Group( query_streamer.output(), {'age': lambda a, b: a is b} ) joiner = Join(query_grouper.output(), select.output()) filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema())) result_stack = ResultStack( filter.output(), # joiner.output(), # query_streamer.output(), # query_grouper.output(), # select.output(), ) info_queue = Queue() def manage(task): print 'Running: ' + str(task) task.run()
UniversalSelect( query_streamer.output().schema(), { 'states.geom': { 'type': Geometry, 'args': ['queries.geom'], 'function': lambda v: v, }, } ) ) engines.append(states_query) states_source = Rtree(states_file, 'states.geom') states_accessor = DataAccessor( states_query.output(), states_source, FindRange ) engines.append(states_accessor) states_select = Select( states_accessor.output(), UniversalSelect( states_accessor.output().schema(), { 'states.oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v, },
# schema definition of the data stream data_schema = Schema() data_schema.append(Attribute('name', str)) data_schema.append(Attribute('age', int)) data_schema.append(Attribute('rowid', int, True)) data_source = DBTable('test.db', 'person', data_schema) # create a data accessor data_accessor = DataAccessor(query_streamer.output(), data_source, FindRange) name_age_combiner = NameAgeCombiner(data_accessor.output().schema()) select = Select(data_accessor.output(), name_age_combiner) query_grouper = Group(query_streamer.output(), {'age': lambda a, b: a is b}) joiner = Join(query_grouper.output(), select.output()) filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema())) result_stack = ResultStack( filter.output(), # joiner.output(), # query_streamer.output(), # query_grouper.output(), # select.output(), ) info_queue = Queue() def manage(task): print 'Running: ' + str(task)
groups = [] sorts = [] # create a data accessor county_accessor = DataAccessor(query_streamer.output(), county_source, FindRange) data_accessors.append(county_accessor) demux = Demux(county_accessor.output()) sub_schema = SubSchema(demux.schema(), {'county': 'zip'}) for i in range(0, 2): select = Select(demux, sub_schema) county_selects.append(select) zip_accessor = DataAccessor(select.output(), zip_source, FindRange) sub_schema2 = SubSchema(zip_accessor.output().schema(), {'oid': 'zip'}) zip_select = Select(zip_accessor.output(), sub_schema2) zip_selects.append(zip_select) sort = Sort( zip_select.output(), { 'zip': lambda a, b: cmp(a / 100, b / 100) }, # {'zip': None }, # True ) sorts.append(sort) group = Group(sort.output(), { 'zip': lambda a, b: (a / 1000) == (b / 1000) }
'args': ['family.id'], 'function': lambda v: v } } ) ) engines.append(family_id_select) # Data source for the genera. genus_source = DBTable(input_file, 'genus', genus_schema) # Data accessor for the genera data source. genus_accessor = DataAccessor( family_id_select.output(), genus_source, FindIdentities ) engines.append(genus_accessor) # A join mini-engine to associate families with genera. family_genus_joiner = Join( family_id_grouper.output(), genus_accessor.output(), ) engines.append(family_genus_joiner) # A group mini-engine to split the (family, genus) IDs into groups.
# Select only the species ID for querying plants. species_id_select = Select( channel, UniversalSelect( species_accessor.output().schema(), { 'plants.species_id': { 'type': int, 'args': ['species.id'], 'function': lambda v: v } })) engines.append(species_id_select) # Data source for the plants. plants_source = DBTable(input_file, 'plants', plants_schema) # Data accessor for the plants data source. plants_accessor = DataAccessor(species_id_select.output(), plants_source, FindIdentities) engines.append(plants_accessor) plants_filter = Filter(plants_accessor.output(), FilterAge(plants_accessor.output().schema())) engines.append(plants_filter) # Select only the species ID for querying plants. plants_height_select = Select( plants_filter.output(), UniversalSelect( plants_filter.output().schema(), { 'plants.height': { 'type': int, 'args': ['plants.height'],
'function': lambda v: intersection(v, query), }, # keep OID 'states.oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v, } } ) ) engines.append(states_select) # Only keep the geometry for querying states_query = Select( states_select.output(), UniversalSelect( states_select.output().schema(), { 'counties.the_geom': { 'type': Geometry, 'args': ['states.the_geom'], 'function': lambda v: v, }, } ) ) engines.append(states_query) # Finally query the counties counties_source = Rtree(counties_file, 'counties.the_geom')
county_accessor = DataAccessor( query_streamer.output(), county_source, FindRange ) data_accessors.append(county_accessor) demux = Demux(county_accessor.output()) sub_schema = SubSchema(demux.schema(), {'county': 'zip'}) for i in range(0, 2): select = Select(demux, sub_schema) county_selects.append(select) zip_accessor = DataAccessor( select.output(), zip_source, FindRange ) sub_schema2 = SubSchema(zip_accessor.output().schema(), {'oid': 'zip'}) zip_select = Select(zip_accessor.output(), sub_schema2) zip_selects.append(zip_select) sort = Sort( zip_select.output(), {'zip': lambda a, b: cmp(a / 100, b / 100)}, # {'zip': None }, # True ) sorts.append(sort) group = Group( sort.output(),
############################################################# states_query = Select( query_streamer.output(), UniversalSelect( query_streamer.output().schema(), { 'states.geom': { 'type': Geometry, 'args': ['queries.geom'], 'function': lambda v: v, }, })) engines.append(states_query) states_source = Rtree(states_file, 'states.geom') states_accessor = DataAccessor(states_query.output(), states_source, FindRange) engines.append(states_accessor) states_select = Select( states_accessor.output(), UniversalSelect( states_accessor.output().schema(), { 'states.oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v, }, 'states.geom': { 'type': Geometry, 'args': ['states.geom'], 'function': lambda v: v,
species_accessor.output().schema(), { 'plants.species_id': { 'type': int, 'args': ['species.id'], 'function': lambda v: v } } ) ) engines.append(species_id_select) # Data source for the plants. plants_source = DBTable(input_file, 'plants', plants_schema) # Data accessor for the plants data source. plants_accessor = DataAccessor( species_id_select.output(), plants_source, FindIdentities ) engines.append(plants_accessor) plants_filter = Filter( plants_accessor.output(), FilterAge(plants_accessor.output().schema()) ) engines.append(plants_filter) # Select only the species ID for querying plants. plants_height_select = Select( plants_filter.output(), UniversalSelect(