def __init__(self, input_schema): self._schema = Schema() self._schema.append(Attribute('name_age', str)) self._input_schema = input_schema self._indices = { 'name': input_schema.index(Attribute('name', str)), 'age': input_schema.index(Attribute('age', int)) }
def __init__(self, input_schema, mapping): ''' mapping = { 'name': { 'type': type, 'args': ['input', 'input', ...], 'function': function }, ... } or: mapping = [ ('name', { 'type': type, 'args': ['input', 'input', ...], 'function': function }), ... ] ''' self._input_schema = input_schema self._schema = Schema() self._f = [] if type(mapping) is dict: for name in mapping: # Create output schema type self._schema.append(Attribute( name, mapping[name]['type'], )) # Verify input schema and mapping for n in mapping[name]['args']: if n not in self._input_schema: raise Exception('Incompatible schema.') self._f.append(( [input_schema.index(n) for n in mapping[name]['args']], mapping[name]['function'], )) elif type(mapping) is list: for name, spec in mapping: # Create output schema type self._schema.append(Attribute( name, spec['type'], )) # Verify input schema and mapping for n in spec['args']: if n not in self._input_schema: raise Exception('Incompatible schema.') self._f.append(( [input_schema.index(n) for n in spec['args']], spec['function'], ))
def __init__(self, input_schema, output_attributes): self._input_schema = input_schema self._output_schema = Schema() self._indices = [] for name in output_attributes.keys(): i = self._input_schema.index(name) self._indices.append(i) self._output_schema.append( Attribute(output_attributes[name], self._input_schema[i].type()))
def __init__(self, input_schema, names): self._schema = Schema() for a in input_schema: if a.name() in names: self._schema.append(Attribute( names[a.name()], a.type() )) else: self._achema.append(a) self._input_schema = input_schema
# LEFT JOIN geonames ON CONTAINS(counties.the_geom, geonames.location) # WHERE # CONTAINS( # MakeBox2D( # MakePoint(-93.88, 49.81), # MakePoint(-65.39, 24.22) # ), # geonames.location # ) # GROUP BY counties.id; # ############################################################# # Schema definition of the query stream: an interval across all counties. query_schema = Schema() query_schema.append(Attribute('counties.the_geom', Geometry)) # Aggregation function for max height. class SumAggregator(object): def __init__(self, input_schema, f): self._input_schema = input_schema self._af = [] for a in self._input_schema: if a.name() == f: # Only keep the maximum self._af.append(( 0, lambda x, v: x + v, )) else:
def __call__(self, r): ''' Adds the specified record to the aggregate value. ''' for i, c in enumerate(self._c): self._c[i] = self._af[i][1](c, r[i]) ############################################################# # # TEST 1 # ############################################################# # schema definition of the query stream query_schema = Schema() query_schema.append(Attribute('age', IntInterval)) # query stream generator from array query_streamer = ArrayStreamer(query_schema, [ (IntInterval(1, 3),), (IntInterval(2, 5),), (IntInterval(1, 3),), (IntInterval(1, 3),), (IntInterval(2, 5),), (IntInterval(2, 5),), (IntInterval(1, 3),), (IntInterval(2, 5),), ]) # schema definition of the data stream data_schema = Schema()
# LEFT JOIN geonames ON CONTAINS(us_counties.the_geom, geonames.location) # WHERE # CONTAINS( # MakeBox2D( # MakePoint(-93.88, 49.81), # MakePoint(-65.39, 24.22) # ), # geonames.location # ) # GROUP BY ROLLUP(us_states.gid, us_counties.gid); # ############################################################# # Schema definition of the query stream: an interval across all states. query_schema = Schema() query_schema.append(Attribute('states.the_geom', Geometry)) # Aggregation function for max height. class SumAggregator(object): def __init__(self, input_schema, f): self._input_schema = input_schema self._af = [] for a in self._input_schema: if a.name() == f: # Only keep the maximum self._af.append(( 0, lambda x, v: x + v, )) else:
# # Query 2 # # SELECT family.id, genus.id, species.id, MAX(plants.height) # FROM family # LEFT JOIN genus ON genus.family_id = family.id # LEFT JOIN species ON species.genus_id = genus.id # LEFT JOIN plants ON plants.species_id = species.id # WHERE plants.age >= 10 AND plants.age <= 50 # GROUP BY ROLLUP(family.id, genus.id, species.id) # ############################################################# # Schema definition of the query stream: an interval across all families. query_schema = Schema() query_schema.append(Attribute('family.id', IntInterval)) # Schema definition of the family record stream. family_schema = Schema() family_schema.append(Attribute('family.id', int)) # Schema definitions of the genus record stream. genus_schema = Schema() genus_schema.append(Attribute('genus.id', int)) genus_schema.append(Attribute('genus.family_id', int, True)) # Schema definitions of the species record stream. species_schema = Schema() species_schema.append(Attribute('species.id', int)) species_schema.append(Attribute('species.genus_id', int, True))
############################################################# # # Query 1 # # SELECT species.id, MAX(plants.height) # FROM species # LEFT JOIN plants ON plants.species_id = species.id # WHERE plants.age >= 10 AND plants.age <= 50 # GROUP BY species.id; # ############################################################# # Schema definition of the query stream: an interval across all species # IDs. query_schema = Schema() query_schema.append(Attribute('species.id', IntInterval)) # Schema definition of the species record stream. species_schema = Schema() species_schema.append(Attribute('species.id', int)) # Schema definition of the plant record stream. plants_schema = Schema() plants_schema.append(Attribute('plants.id', int)) plants_schema.append(Attribute('plants.height', int)) plants_schema.append(Attribute('plants.age', int)) plants_schema.append(Attribute('plants.species_id', int, True)) # Filter plants to only include those 10 years or older and 50 years or # younger.
cover_file = sys.argv[6] #states_file = 'data/spatial/states' #counties_file = 'data/spatial/counties' #zip_file = 'data/spatial/zip5' #cover_file = 'data/spatial/' + sys.argv[2] ############################################################# # # Query 5 # ############################################################# # Schema definition of the query stream. query_schema = Schema() query_schema.append(Attribute('queries.geom', Geometry)) # Aggregation function for max height. class SumAggregator(object): def __init__(self, input_schema, f): self._input_schema = input_schema self._af = [] for a in self._input_schema: if a.name() == f: # Only keep the maximum self._af.append(( 0, lambda x, v: x + v, )) else: