qselect = Select( query_grouper.output(), AttributeRename( query_grouper.output().schema(), { 'age': 'age_range' } ) ) aggregate = Aggregate( data_accessor.output(), SumAgeAggregator(data_accessor.output().schema()) ) aselect = Select( aggregate.output(), UniversalSelect( aggregate.output().schema(), { 'name_age': { 'type': str, 'args': ['name', 'age'], 'function': lambda name, age: '%s --> %d' % (name, age), } } ) ) joiner = Join(qselect.output(), aselect.output())
select = Select( channel, UniversalSelect(channel.schema(), { 'oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v }, })) engines.append(select) counties_grouper = Group(select.output(), {'oid': lambda a, b: a == b}) engines.append(counties_grouper) joiner = Join(counties_grouper.output(), geonames_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) # mux_streams.append(counties_select.output()) mux = Mux(*mux_streams) engines.append(mux) result_stack = ResultFile( 'results.txt', mux.output(), ) engines.append(result_stack) #result_stack = ResultStack( # mux.output(),
plants_height_aggregate = Aggregate( plants_height_select.output(), MaxHeightAggregator(plants_height_select.output().schema())) engines.append(plants_height_aggregate) family_genus_species_id_grouper = Group( channel, { 'family.id': lambda a, b: a == b, 'genus.id': lambda a, b: a == b, 'species.id': lambda a, b: a == b }) engines.append(family_genus_species_id_grouper) # mux_streams.append(family_genus_species_id_grouper.output()) species_plants_joiner = Join(family_genus_species_id_grouper.output(), plants_height_aggregate.output()) engines.append(species_plants_joiner) mux_streams.append(species_plants_joiner.output()) mux = Mux(*mux_streams) engines.append(mux) # First aggregation level output selection family_genus_species_select = Select( mux.output(), UniversalSelect(mux.output().schema(), [ ('family.id', { 'type': int, 'args': ['family.id'], 'function': lambda v: v }),
'type': int, 'args': ['counties.oid'], 'function': lambda v: v }, })) engines.append(select) # Generate appropriate groups states_counties_grouper = Group(select.output(), { 'states.oid': lambda a, b: a == b, 'counties.oid': lambda a, b: a == b }) engines.append(states_counties_grouper) joiner = Join(states_counties_grouper.output(), geonames_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) mux = Mux(*mux_streams) engines.append(mux) states_level_select = Select( mux.output(), UniversalSelect( mux.output().schema(), { 'states.oid': { 'type': int, 'args': ['states.oid'], 'function': lambda v: v, },
) ) engines.append(cover_area) ############################################################# # # 1st level aggregation # ############################################################# cover_aggregate = Aggregate( cover_area.output(), SumAggregator(cover_area.output().schema(), 'area') ) engines.append(cover_aggregate) mux_streams.append(cover_aggregate.output()) mux = Mux(*mux_streams) engines.append(mux) ############################################################# # # 2nd level aggregation # ############################################################# counties_level_select = Select( mux.output(), UniversalSelect( mux.output().schema(), [
), ) engines.append(geonames_select) geonames_aggregate = Aggregate(geonames_select.output(), SumAggregator(geonames_select.output().schema(), "count")) engines.append(geonames_aggregate) select = Select( channel, UniversalSelect(channel.schema(), {"oid": {"type": int, "args": ["oid"], "function": lambda v: v}}) ) engines.append(select) counties_grouper = Group(select.output(), {"oid": lambda a, b: a == b}) engines.append(counties_grouper) joiner = Join(counties_grouper.output(), geonames_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) # mux_streams.append(counties_select.output()) mux = Mux(*mux_streams) engines.append(mux) result_stack = ResultFile("results.txt", mux.output()) engines.append(result_stack) # result_stack = ResultStack( # mux.output(), # ) # engines.append(result_stack)
engines.append(plants_height_aggregate) family_genus_species_id_grouper = Group( channel, { 'family.id': lambda a, b: a == b, 'genus.id': lambda a, b: a == b, 'species.id': lambda a, b: a == b } ) engines.append(family_genus_species_id_grouper) # mux_streams.append(family_genus_species_id_grouper.output()) species_plants_joiner = Join( family_genus_species_id_grouper.output(), plants_height_aggregate.output() ) engines.append(species_plants_joiner) mux_streams.append(species_plants_joiner.output()) mux = Mux(*mux_streams) engines.append(mux) # First aggregation level output selection family_genus_species_select = Select( mux.output(), UniversalSelect( mux.output().schema(), [ ('family.id', {
'args': ['plants.height'], 'function': lambda v: v } })) engines.append(plants_height_select) plants_height_aggregate = Aggregate( plants_height_select.output(), MaxHeightAggregator(plants_height_select.output().schema())) engines.append(plants_height_aggregate) species_id_grouper = Group(channel, {'species.id': lambda a, b: a == b}) engines.append(species_id_grouper) joiner = Join(species_id_grouper.output(), plants_height_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) mux = Mux(*mux_streams) result_stack = ResultFile( 'results.txt', mux.output(), ) info_queue = Queue() def manage(task): print 'Running: ' + str(task)
) engines.append(select) # Generate appropriate groups states_counties_grouper = Group( select.output(), { 'states.oid': lambda a, b: a == b, 'counties.oid': lambda a, b: a == b } ) engines.append(states_counties_grouper) joiner = Join( states_counties_grouper.output(), geonames_aggregate.output() ) engines.append(joiner) mux_streams.append(joiner.output()) mux = Mux(*mux_streams) engines.append(mux) states_level_select = Select( mux.output(), UniversalSelect( mux.output().schema(), { 'states.oid': { 'type': int, 'args': ['states.oid'],
lambda a, b: intersection(a, b).geom().area / b.geom().area }) ])) engines.append(cover_area) ############################################################# # # 1st level aggregation # ############################################################# cover_aggregate = Aggregate( cover_area.output(), SumAggregator(cover_area.output().schema(), 'area')) engines.append(cover_aggregate) mux_streams.append(cover_aggregate.output()) mux = Mux(*mux_streams) engines.append(mux) ############################################################# # # 2nd level aggregation # ############################################################# counties_level_select = Select( mux.output(), UniversalSelect(mux.output().schema(), [ ('states.oid', { 'type': int,
) engines.append(plants_height_select) plants_height_aggregate = Aggregate( plants_height_select.output(), MaxHeightAggregator(plants_height_select.output().schema()) ) engines.append(plants_height_aggregate) species_id_grouper = Group( channel, {'species.id': lambda a, b: a == b} ) engines.append(species_id_grouper) joiner = Join(species_id_grouper.output(), plants_height_aggregate.output()) engines.append(joiner) mux_streams.append(joiner.output()) mux = Mux(*mux_streams) result_stack = ResultFile( 'results.txt', mux.output(), ) info_queue = Queue() def manage(task): print 'Running: ' + str(task) task.run()