Esempio n. 1
0
    def __init__(self, input_schema, mapping):
        '''
        mapping = {
            'name': {
                'type': type, 
                'args': ['input', 'input', ...], 
                'function': function
            },
            ...
        }
        '''
        self._input_schema = input_schema
        self._schema = Schema()
        self._mapping = mapping
        self._f = []
        for name in mapping:
            # Create output schema type
            self._schema.append(Attribute(
                name,
                mapping[name]['type'],
            ))
            # Verify input schema and mapping
            for n in mapping[name]['args']:
                if n not in self._input_schema:
                    raise Exception('Incompatible schema.')

            self._f.append((
                [input_schema.index(n) for n in mapping[name]['args']],
                mapping[name]['function'],
            ))
Esempio n. 2
0
 def __init__(self, input_schema):
     self._schema = Schema()
     self._schema.append(Attribute('name_age', str))
     self._input_schema = input_schema
     self._indices = {
         'name': input_schema.index(Attribute('name', str)),
         'age': input_schema.index(Attribute('age', int))
     }
Esempio n. 3
0
 def __init__(self, input_schema, output_attributes):
     self._input_schema = input_schema
     self._output_schema = Schema()
     self._indices = []
     for name in output_attributes.keys():
         i = self._input_schema.index(name)
         self._indices.append(i)
         self._output_schema.append(
             Attribute(output_attributes[name],
                       self._input_schema[i].type()))
Esempio n. 4
0
 def __init__(self, input_schema, names):
     self._schema = Schema()
     for a in input_schema:
         if a.name() in names:
             self._schema.append(Attribute(
                 names[a.name()], 
                 a.type()
             ))
         else:
             self._achema.append(a)
     self._input_schema = input_schema
Esempio n. 5
0
class NameAgeCombinerReverse(object):
    def __init__(self, input_schema):
        self._schema = Schema()
        self._schema.append(Attribute('name_age', str))
        self._input_schema = input_schema
        self._indices = {
            'name': input_schema.index(Attribute('name', str)),
            'age': input_schema.index(Attribute('age', int))
        }

    def schema(self):
        return self._schema

    def accepts(self, other_schema):
        return self._input_schema == other_schema

    def __call__(self, r):
        return ('%d: %s' %
                (r[self._indices['age']], r[self._indices['name']]), )
Esempio n. 6
0
class UniversalSelect(object):
    def __init__(self, input_schema, mapping):
        '''
        mapping = {
            'name': {
                'type': type, 
                'args': ['input', 'input', ...], 
                'function': function
            },
            ...
        }
        '''
        self._input_schema = input_schema
        self._schema = Schema()
        self._mapping = mapping
        self._f = []
        for name in mapping:
            # Create output schema type
            self._schema.append(Attribute(
                name,
                mapping[name]['type'],
            ))
            # Verify input schema and mapping
            for n in mapping[name]['args']:
                if n not in self._input_schema:
                    raise Exception('Incompatible schema.')

            self._f.append((
                [input_schema.index(n) for n in mapping[name]['args']],
                mapping[name]['function'],
            ))

    def schema(self):
        return self._schema

    def accepts(self, other):
        return self._input_schema == other

    def __call__(self, r):
        return tuple(
            f[1](*[r[i] for i in f[0]]) for f in self._f
        )
Esempio n. 7
0
class SubSchema(object):
    def __init__(self, input_schema, output_attributes):
        self._input_schema = input_schema
        self._output_schema = Schema()
        self._indices = []
        for name in output_attributes.keys():
            i = self._input_schema.index(name)
            self._indices.append(i)
            self._output_schema.append(
                Attribute(output_attributes[name],
                          self._input_schema[i].type()))

    def schema(self):
        return self._output_schema

    def accepts(self, other_schema):
        return self._input_schema == other_schema

    def __call__(self, r):
        return tuple(r[i] for i in self._indices)
Esempio n. 8
0
 def __init__(self, input_schema, output_attributes):
     self._input_schema = input_schema
     self._output_schema = Schema()
     self._indices = []
     for name in output_attributes.keys():
         i = self._input_schema.index(name)
         self._indices.append(i)
         self._output_schema.append(Attribute(
             output_attributes[name],
             self._input_schema[i].type()
         ))
Esempio n. 9
0
class SubSchema(object):
    def __init__(self, input_schema, output_attributes):
        self._input_schema = input_schema
        self._output_schema = Schema()
        self._indices = []
        for name in output_attributes.keys():
            i = self._input_schema.index(name)
            self._indices.append(i)
            self._output_schema.append(Attribute(
                output_attributes[name],
                self._input_schema[i].type()
            ))

    def schema(self):
        return self._output_schema

    def accepts(self, other_schema):
        return self._input_schema == other_schema

    def __call__(self, r):
        return tuple(r[i] for i in self._indices)
Esempio n. 10
0
class AttributeRename(object):
    def __init__(self, input_schema, names):
        self._schema = Schema()
        for a in input_schema:
            if a.name() in names:
                self._schema.append(Attribute(
                    names[a.name()], 
                    a.type()
                ))
            else:
                self._achema.append(a)
        self._input_schema = input_schema

    def schema(self):
        return self._schema

    def accepts(self, other_schema):
        return self._input_schema == other_schema

    def __call__(self, r):
        return r
Esempio n. 11
0
class NameAgeCombinerReverse(object):
    def __init__(self, input_schema):
        self._schema = Schema()
        self._schema.append(Attribute('name_age', str))
        self._input_schema = input_schema
        self._indices = {
            'name': input_schema.index(Attribute('name', str)),
            'age': input_schema.index(Attribute('age', int))
        }

    def schema(self):
        return self._schema

    def accepts(self, other_schema):
        return self._input_schema == other_schema

    def __call__(self, r):
        return ('%d: %s' % (
            r[self._indices['age']],
            r[self._indices['name']]
        ), )
Esempio n. 12
0
zip_file = sys.argv[5]
cover_file = sys.argv[6]

#states_file = 'data/spatial/states'
#counties_file = 'data/spatial/counties'
#zip_file = 'data/spatial/zip5'
#cover_file = 'data/spatial/' + sys.argv[2]

#############################################################
#
# Query 5
#
#############################################################

# Schema definition of the query stream.
query_schema = Schema()
query_schema.append(Attribute('queries.geom', Geometry))

# Aggregation function for max height.
class SumAggregator(object):
    def __init__(self, input_schema, f):
        self._input_schema = input_schema
        self._af = []
        for a in self._input_schema:
            if a.name() == f:
                # Only keep the maximum
                self._af.append((
                    0,
                    lambda x, v: x + v,
                ))
            else:
Esempio n. 13
0
#   ON CONTAINS(us_states.the_geom, us_counties.the_geom)
# LEFT JOIN geonames ON CONTAINS(us_counties.the_geom, geonames.location)
# WHERE 
#   CONTAINS(
#       MakeBox2D(
#           MakePoint(-93.88, 49.81),
#           MakePoint(-65.39, 24.22)
#       ),
#       geonames.location
#   )
# GROUP BY ROLLUP(us_states.gid, us_counties.gid);
#
#############################################################

# Schema definition of the query stream: an interval across all states.
query_schema = Schema()
query_schema.append(Attribute('states.the_geom', Geometry))

# Aggregation function for max height.
class SumAggregator(object):
    def __init__(self, input_schema, f):
        self._input_schema = input_schema
        self._af = []
        for a in self._input_schema:
            if a.name() == f:
                # Only keep the maximum
                self._af.append((
                    0,
                    lambda x, v: x + v,
                ))
            else:
Esempio n. 14
0
#############################################################
#
# Query 1
#
# SELECT species.id, MAX(plants.height)
# FROM species
# LEFT JOIN plants ON  plants.species_id = species.id
# WHERE plants.age >= 10 AND plants.age <= 50
# GROUP BY species.id;
#
#############################################################

# Schema definition of the query stream: an interval across all species
# IDs.
query_schema = Schema()
query_schema.append(Attribute('species.id', IntInterval))

# Schema definition of the species record stream.
species_schema = Schema()
species_schema.append(Attribute('species.id', int))

# Schema definition of the plant record stream.
plants_schema = Schema()
plants_schema.append(Attribute('plants.id', int))
plants_schema.append(Attribute('plants.height', int))
plants_schema.append(Attribute('plants.age', int))
plants_schema.append(Attribute('plants.species_id', int, True))


# Filter plants to only include those 10 years or older and 50 years or
Esempio n. 15
0
#############################################################
#
# Query 2
#
# SELECT family.id, genus.id, species.id, MAX(plants.height) 
# FROM family
# LEFT JOIN genus ON genus.family_id = family.id 
# LEFT JOIN species ON species.genus_id = genus.id 
# LEFT JOIN plants ON plants.species_id =  species.id 
# WHERE plants.age >= 10 AND plants.age <= 50 
# GROUP BY ROLLUP(family.id, genus.id, species.id)
#
#############################################################

# Schema definition of the query stream: an interval across all families.
query_schema = Schema()
query_schema.append(Attribute('family.id', IntInterval))

# Schema definition of the family record stream.
family_schema = Schema()
family_schema.append(Attribute('family.id', int))

# Schema definitions of the genus record stream.
genus_schema = Schema()
genus_schema.append(Attribute('genus.id', int))
genus_schema.append(Attribute('genus.family_id', int, True))

# Schema definitions of the species record stream.
species_schema = Schema()
species_schema.append(Attribute('species.id', int))
species_schema.append(Attribute('species.genus_id', int, True))
Esempio n. 16
0
    def __call__(self, r):
        '''
        Adds the specified record to the aggregate value.
        '''
        for i, c in enumerate(self._c):
            self._c[i] = self._af[i][1](c, r[i])

#############################################################
#
# TEST 1
#
#############################################################

# schema definition of the query stream
query_schema = Schema()
query_schema.append(Attribute('age', IntInterval))

# query stream generator from array
query_streamer = ArrayStreamer(query_schema, [
        (IntInterval(1, 3),),
        (IntInterval(2, 5),),
        (IntInterval(1, 3),),
        (IntInterval(1, 3),),
        (IntInterval(2, 5),),
        (IntInterval(2, 5),),
        (IntInterval(1, 3),),
        (IntInterval(2, 5),),
])

# schema definition of the data stream
Esempio n. 17
0
#############################################################
#
# Query 2
#
# SELECT family.id, genus.id, species.id, MAX(plants.height)
# FROM family
# LEFT JOIN genus ON genus.family_id = family.id
# LEFT JOIN species ON species.genus_id = genus.id
# LEFT JOIN plants ON plants.species_id =  species.id
# WHERE plants.age >= 10 AND plants.age <= 50
# GROUP BY ROLLUP(family.id, genus.id, species.id)
#
#############################################################

# Schema definition of the query stream: an interval across all families.
query_schema = Schema()
query_schema.append(Attribute('family.id', IntInterval))

# Schema definition of the family record stream.
family_schema = Schema()
family_schema.append(Attribute('family.id', int))

# Schema definitions of the genus record stream.
genus_schema = Schema()
genus_schema.append(Attribute('genus.id', int))
genus_schema.append(Attribute('genus.family_id', int, True))

# Schema definitions of the species record stream.
species_schema = Schema()
species_schema.append(Attribute('species.id', int))
species_schema.append(Attribute('species.genus_id', int, True))
Esempio n. 18
0
# SELECT counties.id, COUNT(geonames.*) FROM counties
# LEFT JOIN geonames ON CONTAINS(counties.the_geom, geonames.location)
# WHERE
#   CONTAINS(
#       MakeBox2D(
#           MakePoint(-93.88, 49.81),
#           MakePoint(-65.39, 24.22)
#       ),
#       geonames.location
#   )
# GROUP BY counties.id;
#
#############################################################

# Schema definition of the query stream: an interval across all counties.
query_schema = Schema()
query_schema.append(Attribute("counties.the_geom", Geometry))

# Aggregation function for max height.
class SumAggregator(object):
    def __init__(self, input_schema, f):
        self._input_schema = input_schema
        self._af = []
        for a in self._input_schema:
            if a.name() == f:
                # Only keep the maximum
                self._af.append((0, lambda x, v: x + v))
            else:
                # Everything else keep as is
                self._af.append((None, lambda x, v: v))
Esempio n. 19
0
zip_file = sys.argv[5]
cover_file = sys.argv[6]

#states_file = 'data/spatial/states'
#counties_file = 'data/spatial/counties'
#zip_file = 'data/spatial/zip5'
#cover_file = 'data/spatial/' + sys.argv[2]

#############################################################
#
# Query 5
#
#############################################################

# Schema definition of the query stream.
query_schema = Schema()
query_schema.append(Attribute('queries.geom', Geometry))


# Aggregation function for max height.
class SumAggregator(object):
    def __init__(self, input_schema, f):
        self._input_schema = input_schema
        self._af = []
        for a in self._input_schema:
            if a.name() == f:
                # Only keep the maximum
                self._af.append((
                    0,
                    lambda x, v: x + v,
                ))
Esempio n. 20
0
# SELECT counties.id, COUNT(geonames.*) FROM counties
# LEFT JOIN geonames ON CONTAINS(counties.the_geom, geonames.location)
# WHERE
#   CONTAINS(
#       MakeBox2D(
#           MakePoint(-93.88, 49.81),
#           MakePoint(-65.39, 24.22)
#       ),
#       geonames.location
#   )
# GROUP BY counties.id;
#
#############################################################

# Schema definition of the query stream: an interval across all counties.
query_schema = Schema()
query_schema.append(Attribute('counties.the_geom', Geometry))


# Aggregation function for max height.
class SumAggregator(object):
    def __init__(self, input_schema, f):
        self._input_schema = input_schema
        self._af = []
        for a in self._input_schema:
            if a.name() == f:
                # Only keep the maximum
                self._af.append((
                    0,
                    lambda x, v: x + v,
                ))
Esempio n. 21
0
#############################################################
#
# Query 1
#
# SELECT species.id, MAX(plants.height) 
# FROM species 
# LEFT JOIN plants ON  plants.species_id = species.id 
# WHERE plants.age >= 10 AND plants.age <= 50 
# GROUP BY species.id;
#
#############################################################

# Schema definition of the query stream: an interval across all species
# IDs.
query_schema = Schema()
query_schema.append(Attribute('species.id', IntInterval))

# Schema definition of the species record stream.
species_schema = Schema()
species_schema.append(Attribute('species.id', int))

# Schema definition of the plant record stream.
plants_schema = Schema()
plants_schema.append(Attribute('plants.id', int))
plants_schema.append(Attribute('plants.height', int))
plants_schema.append(Attribute('plants.age', int))
plants_schema.append(Attribute('plants.species_id', int, True))

# Filter plants to only include those 10 years or older and 50 years or
# younger.