# create a data accessor data_accessor = DataAccessor( query_streamer.output(), data_source, FindRange ) query_grouper = Group( query_streamer.output(), {'age': lambda a, b: a is b} ) qselect = Select( query_grouper.output(), AttributeRename( query_grouper.output().schema(), { 'age': 'age_range' } ) ) aggregate = Aggregate( data_accessor.output(), SumAgeAggregator(data_accessor.output().schema()) ) aselect = Select( aggregate.output(), UniversalSelect( aggregate.output().schema(), { 'name_age': {
# w = i.wkb # print 'Got WKB' # Data source for geonames mux_streams = [] for i in range(tracks): channel = demux.channel() # To query the locations in the geonames layer, trim the counties to # the query. counties_select = Select( channel, UniversalSelect( channel.schema(), { 'geonames.location': { 'type': Geometry, 'args': ['counties.the_geom'], 'function': lambda v: intersection(v, query), } })) engines.append(counties_select) geonames_source = Rtree(geonames_file, 'geonames.location') # Data accessor for the geonames. geonames_accessor = DataAccessor(counties_select.output(), geonames_source, FindRange) engines.append(geonames_accessor) # XXX At this point no additional filter for the contraining the # geonames to the query region is required.
family_accessor = DataAccessor(query_streamer.output(), family_source, FindRange) engines.append(family_accessor) # A group mini-engine to split the family IDs into groups. family_id_grouper = Group(family_accessor.output(), { 'family.id': lambda a, b: a == b }) engines.append(family_id_grouper) # Select only the family ID for querying genera. family_id_select = Select( family_accessor.output(), UniversalSelect( family_accessor.output().schema(), { 'genus.family_id': { 'type': int, 'args': ['family.id'], 'function': lambda v: v } })) engines.append(family_id_select) # Data source for the genera. genus_source = DBTable(input_file, 'genus', genus_schema) # Data accessor for the genera data source. genus_accessor = DataAccessor(family_id_select.output(), genus_source, FindIdentities) engines.append(genus_accessor) # A join mini-engine to associate families with genera.
# Query the states from the data source. states_source = Rtree(states_file, 'states.the_geom') states_accessor = DataAccessor(query_streamer.output(), states_source, FindRange) engines.append(states_accessor) # Trim the states to the query region. states_select = Select( states_accessor.output(), UniversalSelect( states_accessor.output().schema(), { # trim geometry 'states.the_geom': { 'type': Geometry, 'args': ['states.the_geom'], 'function': lambda v: intersection(v, query), }, # keep OID 'states.oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v, } })) engines.append(states_select) # Only keep the geometry for querying states_query = Select( states_select.output(), UniversalSelect( states_select.output().schema(), {
# Data source for geonames mux_streams = [] for i in range(tracks): channel = demux.channel() # To query the locations in the geonames layer, trim the counties to # the query. counties_select = Select( channel, UniversalSelect( channel.schema(), { "geonames.location": { "type": Geometry, "args": ["counties.the_geom"], "function": lambda v: intersection(v, query), } }, ), ) engines.append(counties_select) geonames_source = Rtree(geonames_file, "geonames.location") # Data accessor for the geonames. geonames_accessor = DataAccessor(counties_select.output(), geonames_source, FindRange) engines.append(geonames_accessor) # XXX At this point no additional filter for the contraining the # geonames to the query region is required.
# schema definition of the data stream data_schema = Schema() data_schema.append(Attribute('name', str)) data_schema.append(Attribute('age', int)) data_schema.append(Attribute('rowid', int, True)) data_source = DBTable('test.db', 'person', data_schema) # create a data accessor data_accessor = DataAccessor( query_streamer.output(), data_source, FindRange ) name_age_combiner = NameAgeCombiner(data_accessor.output().schema()) select = Select(data_accessor.output(), name_age_combiner) query_grouper = Group( query_streamer.output(), {'age': lambda a, b: a is b} ) joiner = Join(query_grouper.output(), select.output()) filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema())) result_stack = ResultStack( filter.output(), # joiner.output(), # query_streamer.output(), # query_grouper.output(), # select.output(),
]) engines.append(query_streamer) ############################################################# # # States # ############################################################# states_query = Select( query_streamer.output(), UniversalSelect( query_streamer.output().schema(), { 'states.geom': { 'type': Geometry, 'args': ['queries.geom'], 'function': lambda v: v, }, } ) ) engines.append(states_query) states_source = Rtree(states_file, 'states.geom') states_accessor = DataAccessor( states_query.output(), states_source, FindRange ) engines.append(states_accessor)
(IntInterval(1, 3), ), (IntInterval(2, 5), ), ]) # schema definition of the data stream data_schema = Schema() data_schema.append(Attribute('name', str)) data_schema.append(Attribute('age', int)) data_schema.append(Attribute('rowid', int, True)) data_source = DBTable('test.db', 'person', data_schema) # create a data accessor data_accessor = DataAccessor(query_streamer.output(), data_source, FindRange) name_age_combiner = NameAgeCombiner(data_accessor.output().schema()) select = Select(data_accessor.output(), name_age_combiner) query_grouper = Group(query_streamer.output(), {'age': lambda a, b: a is b}) joiner = Join(query_grouper.output(), select.output()) filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema())) result_stack = ResultStack( filter.output(), # joiner.output(), # query_streamer.output(), # query_grouper.output(), # select.output(), ) info_queue = Queue()
data_schema.append(Attribute('name', str)) data_schema.append(Attribute('age', int)) data_schema.append(Attribute('rowid', int, True)) data_source = DBTable('test.db', 'person', data_schema) # definition of the data source #data_source = CSVFile('test.csv', data_schema) data_accessors = [] selects = [] for i in range(0, 1): # create a data accessor data_accessor = DataAccessor(demux, data_source, FindRange) name_age_combiner = NameAgeCombiner(data_accessor.output().schema()) selects.append(Select(data_accessor.output(), name_age_combiner)) data_accessors.append(data_accessor) mux = Mux(*[s.output() for s in selects]) #name_age_combiner_reverse = NameAgeCombinerReverse(demux.schema()) #select2 = Select(demux, name_age_combiner_reverse) #name_age_combiner = NameAgeCombiner(data_accessor.output().schema()) #select = Select(data_accessor.output(), name_age_combiner) #name_age_combiner_reverse = NameAgeCombinerReverse(data_accessor.output().schema()) #select2 = Select(data_accessor.output(), name_age_combiner_reverse) result_stack = ResultStack( # query_streamer.output(), mux.output(),
# A group mini-engine to split the family IDs into groups. family_id_grouper = Group( family_accessor.output(), {'family.id': lambda a, b: a == b} ) engines.append(family_id_grouper) # Select only the family ID for querying genera. family_id_select = Select( family_accessor.output(), UniversalSelect( family_accessor.output().schema(), { 'genus.family_id': { 'type': int, 'args': ['family.id'], 'function': lambda v: v } } ) ) engines.append(family_id_select) # Data source for the genera. genus_source = DBTable(input_file, 'genus', genus_schema) # Data accessor for the genera data source. genus_accessor = DataAccessor(
species_accessor = DataAccessor(query_streamer.output(), species_source, FindRange) demux = Demux(species_accessor.output()) engines = [] mux_streams = [] for i in range(tracks): channel = demux.channel() # Select only the species ID for querying plants. species_id_select = Select( channel, UniversalSelect( species_accessor.output().schema(), { 'plants.species_id': { 'type': int, 'args': ['species.id'], 'function': lambda v: v } })) engines.append(species_id_select) # Data source for the plants. plants_source = DBTable(input_file, 'plants', plants_schema) # Data accessor for the plants data source. plants_accessor = DataAccessor(species_id_select.output(), plants_source, FindIdentities) engines.append(plants_accessor) plants_filter = Filter(plants_accessor.output(), FilterAge(plants_accessor.output().schema())) engines.append(plants_filter)
states_source, FindRange ) engines.append(states_accessor) # Trim the states to the query region. states_select = Select( states_accessor.output(), UniversalSelect( states_accessor.output().schema(), { # trim geometry 'states.the_geom': { 'type': Geometry, 'args': ['states.the_geom'], 'function': lambda v: intersection(v, query), }, # keep OID 'states.oid': { 'type': int, 'args': ['oid'], 'function': lambda v: v, } } ) ) engines.append(states_select) # Only keep the geometry for querying states_query = Select( states_select.output(), UniversalSelect(
sorts = [] # create a data accessor county_accessor = DataAccessor( query_streamer.output(), county_source, FindRange ) data_accessors.append(county_accessor) demux = Demux(county_accessor.output()) sub_schema = SubSchema(demux.schema(), {'county': 'zip'}) for i in range(0, 2): select = Select(demux, sub_schema) county_selects.append(select) zip_accessor = DataAccessor( select.output(), zip_source, FindRange ) sub_schema2 = SubSchema(zip_accessor.output().schema(), {'oid': 'zip'}) zip_select = Select(zip_accessor.output(), sub_schema2) zip_selects.append(zip_select) sort = Sort( zip_select.output(), {'zip': lambda a, b: cmp(a / 100, b / 100)}, # {'zip': None }, # True )
(query, ), StopWord(), ]) engines.append(query_streamer) ############################################################# # # States # ############################################################# states_query = Select( query_streamer.output(), UniversalSelect( query_streamer.output().schema(), { 'states.geom': { 'type': Geometry, 'args': ['queries.geom'], 'function': lambda v: v, }, })) engines.append(states_query) states_source = Rtree(states_file, 'states.geom') states_accessor = DataAccessor(states_query.output(), states_source, FindRange) engines.append(states_accessor) states_select = Select( states_accessor.output(), UniversalSelect( states_accessor.output().schema(), { 'states.oid': {
demux = Demux(species_accessor.output()) engines = [] mux_streams = [] for i in range(tracks): channel = demux.channel() # Select only the species ID for querying plants. species_id_select = Select( channel, UniversalSelect( species_accessor.output().schema(), { 'plants.species_id': { 'type': int, 'args': ['species.id'], 'function': lambda v: v } } ) ) engines.append(species_id_select) # Data source for the plants. plants_source = DBTable(input_file, 'plants', plants_schema) # Data accessor for the plants data source. plants_accessor = DataAccessor( species_id_select.output(), plants_source, FindIdentities )