geonames_select.output(), SumAggregator(geonames_select.output().schema(), 'count')) engines.append(geonames_aggregate) select = Select( channel, UniversalSelect(channel.schema(), { 'oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v }, })) engines.append(select) counties_grouper = Group(select.output(), {'oid': lambda a, b: a == b}) engines.append(counties_grouper) joiner = Join(counties_grouper.output(), geonames_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) # mux_streams.append(counties_select.output()) mux = Mux(*mux_streams) engines.append(mux) result_stack = ResultFile( 'results.txt', mux.output(), ) engines.append(result_stack)
'counties.oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v, }, 'counties.the_geom': { 'type': Geometry, 'args': ['counties.the_geom'], 'function': lambda v: v, }, })) engines.append(counties_oid_select) # Group states by OID states_group = Group(states_select.output(), { 'states.oid': lambda a, b: a == b }) engines.append(states_group) # Join counties and states states_counties_join = Join( states_group.output(), counties_oid_select.output(), ) engines.append(states_counties_join) # De-multiplex the joined stream across multiple tracks for better CPU core # utilization. demux = Demux(states_counties_join.output()) mux_streams = [] for i in range(tracks):
data_schema = Schema() data_schema.append(Attribute('name', str)) data_schema.append(Attribute('age', int)) data_schema.append(Attribute('rowid', int, True)) data_source = DBTable('test.db', 'person', data_schema) # create a data accessor data_accessor = DataAccessor( query_streamer.output(), data_source, FindRange ) query_grouper = Group( query_streamer.output(), {'age': lambda a, b: a is b} ) qselect = Select( query_grouper.output(), AttributeRename( query_grouper.output().schema(), { 'age': 'age_range' } ) ) aggregate = Aggregate( data_accessor.output(), SumAgeAggregator(data_accessor.output().schema()) )
data_schema.append(Attribute('age', int)) data_schema.append(Attribute('rowid', int, True)) data_source = DBTable('test.db', 'person', data_schema) # create a data accessor data_accessor = DataAccessor( query_streamer.output(), data_source, FindRange ) name_age_combiner = NameAgeCombiner(data_accessor.output().schema()) select = Select(data_accessor.output(), name_age_combiner) query_grouper = Group( query_streamer.output(), {'age': lambda a, b: a is b} ) joiner = Join(query_grouper.output(), select.output()) filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema())) result_stack = ResultStack( filter.output(), # joiner.output(), # query_streamer.output(), # query_grouper.output(), # select.output(), ) info_queue = Queue()
# The query stream contains only a single query. query_streamer = ArrayStreamer(query_schema, [ (IntInterval(0, int(1E10)), ), ]) engines.append(query_streamer) # Create a family data source: a table in the input database. family_source = DBTable(input_file, 'family', family_schema) # Data accessor for the species data source. family_accessor = DataAccessor(query_streamer.output(), family_source, FindRange) engines.append(family_accessor) # A group mini-engine to split the family IDs into groups. family_id_grouper = Group(family_accessor.output(), { 'family.id': lambda a, b: a == b }) engines.append(family_id_grouper) # Select only the family ID for querying genera. family_id_select = Select( family_accessor.output(), UniversalSelect( family_accessor.output().schema(), { 'genus.family_id': { 'type': int, 'args': ['family.id'], 'function': lambda v: v } })) engines.append(family_id_select)
]) # schema definition of the data stream data_schema = Schema() data_schema.append(Attribute('name', str)) data_schema.append(Attribute('age', int)) data_schema.append(Attribute('rowid', int, True)) data_source = DBTable('test.db', 'person', data_schema) # create a data accessor data_accessor = DataAccessor(query_streamer.output(), data_source, FindRange) name_age_combiner = NameAgeCombiner(data_accessor.output().schema()) select = Select(data_accessor.output(), name_age_combiner) query_grouper = Group(query_streamer.output(), {'age': lambda a, b: a is b}) joiner = Join(query_grouper.output(), select.output()) filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema())) result_stack = ResultStack( filter.output(), # joiner.output(), # query_streamer.output(), # query_grouper.output(), # select.output(), ) info_queue = Queue()
UniversalSelect( geonames_accessor.output().schema(), {"count": {"type": int, "args": ["geonames.location"], "function": lambda v: 1}}, ), ) engines.append(geonames_select) geonames_aggregate = Aggregate(geonames_select.output(), SumAggregator(geonames_select.output().schema(), "count")) engines.append(geonames_aggregate) select = Select( channel, UniversalSelect(channel.schema(), {"oid": {"type": int, "args": ["oid"], "function": lambda v: v}}) ) engines.append(select) counties_grouper = Group(select.output(), {"oid": lambda a, b: a == b}) engines.append(counties_grouper) joiner = Join(counties_grouper.output(), geonames_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) # mux_streams.append(counties_select.output()) mux = Mux(*mux_streams) engines.append(mux) result_stack = ResultFile("results.txt", mux.output()) engines.append(result_stack) # result_stack = ResultStack( # mux.output(),
]) engines.append(query_streamer) # Create a family data source: a table in the input database. family_source = DBTable(input_file, 'family', family_schema) # Data accessor for the species data source. family_accessor = DataAccessor( query_streamer.output(), family_source, FindRange ) engines.append(family_accessor) # A group mini-engine to split the family IDs into groups. family_id_grouper = Group( family_accessor.output(), {'family.id': lambda a, b: a == b} ) engines.append(family_id_grouper) # Select only the family ID for querying genera. family_id_select = Select( family_accessor.output(), UniversalSelect( family_accessor.output().schema(), { 'genus.family_id': { 'type': int, 'args': ['family.id'], 'function': lambda v: v } }
'args': ['states.oid'], 'function': lambda v: v, }, 'states.geom': { 'type': Geometry, 'args': ['queries.geom', 'states.geom'], 'function': lambda a, b: intersection(a, b), } } ) ) engines.append(states_trim) states_group = Group( states_trim.output(), { 'states.oid': lambda a, b: a == b, } ) engines.append(states_group) ############################################################# # # Counties # ############################################################# counties_query = Select( states_trim.output(), UniversalSelect( states_trim.output().schema(), {
zip_accessor = DataAccessor(select.output(), zip_source, FindRange) sub_schema2 = SubSchema(zip_accessor.output().schema(), {'oid': 'zip'}) zip_select = Select(zip_accessor.output(), sub_schema2) zip_selects.append(zip_select) sort = Sort( zip_select.output(), { 'zip': lambda a, b: cmp(a / 100, b / 100) }, # {'zip': None }, # True ) sorts.append(sort) group = Group(sort.output(), { 'zip': lambda a, b: (a / 1000) == (b / 1000) } # {'zip': None } ) groups.append(group) data_accessors.append(zip_accessor) mux = Mux(*[s.output() for s in groups]) result_stack = ResultStack( # query_streamer.output(), mux.output(), # data_accessor.output(), ) tasks = []
UniversalSelect( plants_filter.output().schema(), { 'plants.height': { 'type': int, 'args': ['plants.height'], 'function': lambda v: v } })) engines.append(plants_height_select) plants_height_aggregate = Aggregate( plants_height_select.output(), MaxHeightAggregator(plants_height_select.output().schema())) engines.append(plants_height_aggregate) species_id_grouper = Group(channel, {'species.id': lambda a, b: a == b}) engines.append(species_id_grouper) joiner = Join(species_id_grouper.output(), plants_height_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) mux = Mux(*mux_streams) result_stack = ResultFile( 'results.txt', mux.output(), ) info_queue = Queue()
'args': ['oid'], 'function': lambda v: v, }, 'counties.the_geom': { 'type': Geometry, 'args': ['counties.the_geom'], 'function': lambda v: v, }, } ) ) engines.append(counties_oid_select) # Group states by OID states_group = Group( states_select.output(), {'states.oid': lambda a, b: a == b} ) engines.append(states_group) # Join counties and states states_counties_join = Join( states_group.output(), counties_oid_select.output(), ) engines.append(states_counties_join) # De-multiplex the joined stream across multiple tracks for better CPU core # utilization. demux = Demux(states_counties_join.output()) mux_streams = [] for i in range(tracks):
states_join.output().schema(), { 'states.oid': { 'type': int, 'args': ['states.oid'], 'function': lambda v: v, }, 'states.geom': { 'type': Geometry, 'args': ['queries.geom', 'states.geom'], 'function': lambda a, b: intersection(a, b), } })) engines.append(states_trim) states_group = Group(states_trim.output(), { 'states.oid': lambda a, b: a == b, }) engines.append(states_group) ############################################################# # # Counties # ############################################################# counties_query = Select( states_trim.output(), UniversalSelect( states_trim.output().schema(), { 'counties.geom': { 'type': Geometry,
'args': ['plants.height'], 'function': lambda v: v } } ) ) engines.append(plants_height_select) plants_height_aggregate = Aggregate( plants_height_select.output(), MaxHeightAggregator(plants_height_select.output().schema()) ) engines.append(plants_height_aggregate) species_id_grouper = Group( channel, {'species.id': lambda a, b: a == b} ) engines.append(species_id_grouper) joiner = Join(species_id_grouper.output(), plants_height_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) mux = Mux(*mux_streams) result_stack = ResultFile( 'results.txt', mux.output(), ) info_queue = Queue()