query_grouper = Group( query_streamer.output(), {'age': lambda a, b: a is b} ) qselect = Select( query_grouper.output(), AttributeRename( query_grouper.output().schema(), { 'age': 'age_range' } ) ) aggregate = Aggregate( data_accessor.output(), SumAgeAggregator(data_accessor.output().schema()) ) aselect = Select( aggregate.output(), UniversalSelect( aggregate.output().schema(), { 'name_age': { 'type': str, 'args': ['name', 'age'], 'function': lambda name, age: '%s --> %d' % (name, age), } } ) )
# Send '1' for each retrieved geoname location. geonames_select = Select( geonames_accessor.output(), UniversalSelect( geonames_accessor.output().schema(), { 'count': { 'type': int, 'args': ['geonames.location'], 'function': lambda v: 1 } })) engines.append(geonames_select) geonames_aggregate = Aggregate( geonames_select.output(), SumAggregator(geonames_select.output().schema(), 'count')) engines.append(geonames_aggregate) select = Select( channel, UniversalSelect(channel.schema(), { 'oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v }, })) engines.append(select) counties_grouper = Group(select.output(), {'oid': lambda a, b: a == b})
# Select only the species ID for querying plants. plants_height_select = Select( plants_filter.output(), UniversalSelect( plants_filter.output().schema(), { 'plants.height': { 'type': int, 'args': ['plants.height'], 'function': lambda v: v } })) engines.append(plants_height_select) plants_height_aggregate = Aggregate( plants_height_select.output(), MaxHeightAggregator(plants_height_select.output().schema())) engines.append(plants_height_aggregate) family_genus_species_id_grouper = Group( channel, { 'family.id': lambda a, b: a == b, 'genus.id': lambda a, b: a == b, 'species.id': lambda a, b: a == b }) engines.append(family_genus_species_id_grouper) # mux_streams.append(family_genus_species_id_grouper.output()) species_plants_joiner = Join(family_genus_species_id_grouper.output(), plants_height_aggregate.output()) engines.append(species_plants_joiner)
# Send '1' for each retrieved geoname location. geonames_select = Select( geonames_accessor.output(), UniversalSelect( geonames_accessor.output().schema(), { 'count': { 'type': int, 'args': ['geonames.location'], 'function': lambda v: 1 } })) engines.append(geonames_select) # Aggregate the geonames geonames_aggregate = Aggregate( geonames_select.output(), SumAggregator(geonames_select.output().schema(), 'count')) engines.append(geonames_aggregate) # Select only the OIDs from each of the hierarchy levels. select = Select( channel, UniversalSelect( channel.schema(), { 'states.oid': { 'type': int, 'args': ['states.oid'], 'function': lambda v: v }, 'counties.oid': { 'type': int,
lambda a, b: intersection(a, b).geom().area / b.geom().area }) ] ) ) engines.append(cover_area) ############################################################# # # 1st level aggregation # ############################################################# cover_aggregate = Aggregate( cover_area.output(), SumAggregator(cover_area.output().schema(), 'area') ) engines.append(cover_aggregate) mux_streams.append(cover_aggregate.output()) mux = Mux(*mux_streams) engines.append(mux) ############################################################# # # 2nd level aggregation # ############################################################# counties_level_select = Select( mux.output(),
engines.append(geonames_accessor) # XXX At this point no additional filter for the contraining the # geonames to the query region is required. # Send '1' for each retrieved geoname location. geonames_select = Select( geonames_accessor.output(), UniversalSelect( geonames_accessor.output().schema(), {"count": {"type": int, "args": ["geonames.location"], "function": lambda v: 1}}, ), ) engines.append(geonames_select) geonames_aggregate = Aggregate(geonames_select.output(), SumAggregator(geonames_select.output().schema(), "count")) engines.append(geonames_aggregate) select = Select( channel, UniversalSelect(channel.schema(), {"oid": {"type": int, "args": ["oid"], "function": lambda v: v}}) ) engines.append(select) counties_grouper = Group(select.output(), {"oid": lambda a, b: a == b}) engines.append(counties_grouper) joiner = Join(counties_grouper.output(), geonames_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) # mux_streams.append(counties_select.output())
plants_filter.output(), UniversalSelect( plants_filter.output().schema(), { 'plants.height': { 'type': int, 'args': ['plants.height'], 'function': lambda v: v } } ) ) engines.append(plants_height_select) plants_height_aggregate = Aggregate( plants_height_select.output(), MaxHeightAggregator(plants_height_select.output().schema()) ) engines.append(plants_height_aggregate) family_genus_species_id_grouper = Group( channel, { 'family.id': lambda a, b: a == b, 'genus.id': lambda a, b: a == b, 'species.id': lambda a, b: a == b } ) engines.append(family_genus_species_id_grouper) # mux_streams.append(family_genus_species_id_grouper.output()) species_plants_joiner = Join(
# Select only the species ID for querying plants. plants_height_select = Select( plants_filter.output(), UniversalSelect( plants_filter.output().schema(), { 'plants.height': { 'type': int, 'args': ['plants.height'], 'function': lambda v: v } })) engines.append(plants_height_select) plants_height_aggregate = Aggregate( plants_height_select.output(), MaxHeightAggregator(plants_height_select.output().schema())) engines.append(plants_height_aggregate) species_id_grouper = Group(channel, {'species.id': lambda a, b: a == b}) engines.append(species_id_grouper) joiner = Join(species_id_grouper.output(), plants_height_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) mux = Mux(*mux_streams) result_stack = ResultFile( 'results.txt',
UniversalSelect( geonames_accessor.output().schema(), { 'count': { 'type': int, 'args': ['geonames.location'], 'function': lambda v: 1 } } ) ) engines.append(geonames_select) # Aggregate the geonames geonames_aggregate = Aggregate( geonames_select.output(), SumAggregator(geonames_select.output().schema(), 'count') ) engines.append(geonames_aggregate) # Select only the OIDs from each of the hierarchy levels. select = Select( channel, UniversalSelect( channel.schema(), { 'states.oid': { 'type': int, 'args': ['states.oid'], 'function': lambda v: v }, 'counties.oid': {
float, 'args': ['zip.geom', 'cover.geom'], 'function': lambda a, b: intersection(a, b).geom().area / b.geom().area }) ])) engines.append(cover_area) ############################################################# # # 1st level aggregation # ############################################################# cover_aggregate = Aggregate( cover_area.output(), SumAggregator(cover_area.output().schema(), 'area')) engines.append(cover_aggregate) mux_streams.append(cover_aggregate.output()) mux = Mux(*mux_streams) engines.append(mux) ############################################################# # # 2nd level aggregation # ############################################################# counties_level_select = Select( mux.output(),
plants_filter.output(), UniversalSelect( plants_filter.output().schema(), { 'plants.height': { 'type': int, 'args': ['plants.height'], 'function': lambda v: v } } ) ) engines.append(plants_height_select) plants_height_aggregate = Aggregate( plants_height_select.output(), MaxHeightAggregator(plants_height_select.output().schema()) ) engines.append(plants_height_aggregate) species_id_grouper = Group( channel, {'species.id': lambda a, b: a == b} ) engines.append(species_id_grouper) joiner = Join(species_id_grouper.output(), plants_height_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) mux = Mux(*mux_streams)