# The query stream contains only a single query box. query_streamer = ArrayStreamer(query_schema, [ (query, ), ]) engines.append(query_streamer) counties_source = Rtree(counties_file, 'counties.the_geom') counties_accessor = DataAccessor( query_streamer.output(), counties_source, FindRange, ) engines.append(counties_accessor) demux = Demux(counties_accessor.output()) def intersection(a, b): g1 = a.geom() g2 = b.geom() try: if g1.is_valid and g2.is_valid: i = g1.intersection(g2) return Geometry(i) else: return None except: return None
# Data source for the species. species_source = DBTable(input_file, 'species', species_schema) # Data accessor for the species data source. species_accessor = DataAccessor(genus_id_select.output(), species_source, FindIdentities) engines.append(species_accessor) # A join mini-engine to associate families, genera and species. family_genus_species_joiner = Join( family_genus_id_grouper.output(), species_accessor.output(), ) engines.append(family_genus_species_joiner) demux = Demux(family_genus_species_joiner.output()) mux_streams = [] for i in range(tracks): channel = demux.channel() # Select only the species ID for querying plants. species_id_select = Select( channel, UniversalSelect( channel.schema(), { 'plants.species_id': { 'type': int, 'args': ['species.id'], 'function': lambda v: v }
# Group states by OID states_group = Group(states_select.output(), { 'states.oid': lambda a, b: a == b }) engines.append(states_group) # Join counties and states states_counties_join = Join( states_group.output(), counties_oid_select.output(), ) engines.append(states_counties_join) # De-multiplex the joined stream across multiple tracks for better CPU core # utilization. demux = Demux(states_counties_join.output()) mux_streams = [] for i in range(tracks): channel = demux.channel() # To query the locations in the geonames layer, trim the counties to # the state and query boundary. counties_select = Select( channel, UniversalSelect( channel.schema(), { 'geonames.location': { 'type': Geometry, 'args': ['states.the_geom', 'counties.the_geom'], 'function': lambda s, c: intersection(s, c), }
self._c[i] = self._af[i][1](c, r[i]) engines = [] counters = [] # The query stream contains only a single query box. query_streamer = ArrayStreamer(query_schema, [(query,)]) engines.append(query_streamer) counties_source = Rtree(counties_file, "counties.the_geom") counties_accessor = DataAccessor(query_streamer.output(), counties_source, FindRange) engines.append(counties_accessor) demux = Demux(counties_accessor.output()) def intersection(a, b): g1 = a.geom() g2 = b.geom() try: if g1.is_valid and g2.is_valid: i = g1.intersection(g2) return Geometry(i) else: return None except: return None
query_schema = Schema() query_schema.append(Attribute('age', IntInterval)) # query stream generator from array query_streamer = ArrayStreamer(query_schema, [ (IntInterval(1, 3), ), (IntInterval(2, 5), ), (IntInterval(1, 3), ), (IntInterval(1, 3), ), (IntInterval(2, 5), ), (IntInterval(2, 5), ), (IntInterval(1, 3), ), (IntInterval(2, 5), ), ]) demux = Demux(query_streamer.output()) # schema definition of the data stream data_schema = Schema() data_schema.append(Attribute('name', str)) data_schema.append(Attribute('age', int)) data_schema.append(Attribute('rowid', int, True)) data_source = DBTable('test.db', 'person', data_schema) # definition of the data source #data_source = CSVFile('test.csv', data_schema) data_accessors = [] selects = [] for i in range(0, 1):
for i, c in enumerate(self._c): self._c[i] = self._af[i][1](c, r[i]) # The query stream contains only a single query. query_streamer = ArrayStreamer(query_schema, [ (IntInterval(0, int(1E10)), ), ]) # Create a species data source: a table in the input database. species_source = DBTable(input_file, 'species', species_schema) # Data accessor for the species data source. species_accessor = DataAccessor(query_streamer.output(), species_source, FindRange) demux = Demux(species_accessor.output()) engines = [] mux_streams = [] for i in range(tracks): channel = demux.channel() # Select only the species ID for querying plants. species_id_select = Select( channel, UniversalSelect( species_accessor.output().schema(), { 'plants.species_id': { 'type': int, 'args': ['species.id'], 'function': lambda v: v
query_schema = Schema() query_schema.append(Attribute('age', IntInterval)) # query stream generator from array query_streamer = ArrayStreamer(query_schema, [ (IntInterval(1, 3),), (IntInterval(2, 5),), (IntInterval(1, 3),), (IntInterval(1, 3),), (IntInterval(2, 5),), (IntInterval(2, 5),), (IntInterval(1, 3),), (IntInterval(2, 5),), ]) demux = Demux(query_streamer.output()) # schema definition of the data stream data_schema = Schema() data_schema.append(Attribute('name', str)) data_schema.append(Attribute('age', int)) data_schema.append(Attribute('rowid', int, True)) data_source = DBTable('test.db', 'person', data_schema) # definition of the data source #data_source = CSVFile('test.csv', data_schema) data_accessors = [] selects = [] for i in range(0, 1):
'type': Geometry, 'args': ['counties.geom', 'zip.geom'], 'function': lambda a, b: intersection(a, b), } })) engines.append(zip_trim) zip_filter = Filter( zip_trim.output(), UniversalFilter(zip_trim.output().schema(), { 'zip.geom': lambda g: g and g.geom().is_valid and g.geom().area != 0, })) engines.append(zip_filter) demux = Demux(zip_filter.output()) mux_streams = [] for i in range(tracks): channel = demux.channel() zip_group = Group( channel, { 'states.oid': lambda a, b: a == b, 'counties.oid': lambda a, b: a == b, 'zip.oid': lambda a, b: a == b, }) engines.append(zip_group) cover_query = Select( channel,