Ejemplo n.º 1
0
aselect = Select(
    aggregate.output(),
    UniversalSelect(
        aggregate.output().schema(),
        {
            'name_age': {
                'type': str,
                'args': ['name', 'age'],
                'function': lambda name, age: '%s --> %d' % (name, age),
            }
        }
    )
)

joiner = Join(qselect.output(), aselect.output())


result_stack = ResultStack(
#    aggregate.output(),
    joiner.output(),
#    query_streamer.output(),
#    query_grouper.output(),
#    select.output(),
)

info_queue = Queue()

def manage(task):
    print 'Running: ' + str(task)
    task.run()
Ejemplo n.º 2
0
    # the query.
    counties_select = Select(
        channel,
        UniversalSelect(
            channel.schema(), {
                'geonames.location': {
                    'type': Geometry,
                    'args': ['counties.the_geom'],
                    'function': lambda v: intersection(v, query),
                }
            }))
    engines.append(counties_select)

    geonames_source = Rtree(geonames_file, 'geonames.location')
    # Data accessor for the geonames.
    geonames_accessor = DataAccessor(counties_select.output(), geonames_source,
                                     FindRange)
    engines.append(geonames_accessor)

    # XXX At this point no additional filter for the contraining the
    # geonames to the query region is required.

    # Send '1' for each retrieved geoname location.
    geonames_select = Select(
        geonames_accessor.output(),
        UniversalSelect(
            geonames_accessor.output().schema(), {
                'count': {
                    'type': int,
                    'args': ['geonames.location'],
                    'function': lambda v: 1
Ejemplo n.º 3
0
    family_accessor.output(),
    UniversalSelect(
        family_accessor.output().schema(), {
            'genus.family_id': {
                'type': int,
                'args': ['family.id'],
                'function': lambda v: v
            }
        }))
engines.append(family_id_select)

# Data source for the genera.
genus_source = DBTable(input_file, 'genus', genus_schema)

# Data accessor for the genera data source.
genus_accessor = DataAccessor(family_id_select.output(), genus_source,
                              FindIdentities)
engines.append(genus_accessor)

# A join mini-engine to associate families with genera.
family_genus_joiner = Join(
    family_id_grouper.output(),
    genus_accessor.output(),
)
engines.append(family_genus_joiner)

# A group mini-engine to split the (family, genus) IDs into groups.
family_genus_id_grouper = Group(
    family_genus_joiner.output(),
    {
        'family.id': lambda a, b: a == b,
Ejemplo n.º 4
0
                'type': Geometry,
                'args': ['states.the_geom'],
                'function': lambda v: intersection(v, query),
            },
            # keep OID
            'states.oid': {
                'type': int,
                'args': ['oid'],
                'function': lambda v: v,
            }
        }))
engines.append(states_select)

# Only keep the geometry for querying
states_query = Select(
    states_select.output(),
    UniversalSelect(
        states_select.output().schema(), {
            'counties.the_geom': {
                'type': Geometry,
                'args': ['states.the_geom'],
                'function': lambda v: v,
            },
        }))
engines.append(states_query)

# Finally query the counties
counties_source = Rtree(counties_file, 'counties.the_geom')
counties_accessor = DataAccessor(
    states_query.output(),
    counties_source,
Ejemplo n.º 5
0
        UniversalSelect(
            channel.schema(),
            {
                "geonames.location": {
                    "type": Geometry,
                    "args": ["counties.the_geom"],
                    "function": lambda v: intersection(v, query),
                }
            },
        ),
    )
    engines.append(counties_select)

    geonames_source = Rtree(geonames_file, "geonames.location")
    # Data accessor for the geonames.
    geonames_accessor = DataAccessor(counties_select.output(), geonames_source, FindRange)
    engines.append(geonames_accessor)

    # XXX At this point no additional filter for the contraining the
    # geonames to the query region is required.

    # Send '1' for each retrieved geoname location.
    geonames_select = Select(
        geonames_accessor.output(),
        UniversalSelect(
            geonames_accessor.output().schema(),
            {"count": {"type": int, "args": ["geonames.location"], "function": lambda v: 1}},
        ),
    )
    engines.append(geonames_select)
Ejemplo n.º 6
0
# create a data accessor
data_accessor = DataAccessor(
    query_streamer.output(), 
    data_source,
    FindRange
)
name_age_combiner = NameAgeCombiner(data_accessor.output().schema())
select = Select(data_accessor.output(), name_age_combiner)

query_grouper = Group(
    query_streamer.output(), 
    {'age': lambda a, b: a is b}
)

joiner = Join(query_grouper.output(), select.output())
filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema()))

result_stack = ResultStack(
    filter.output(),
#    joiner.output(),
#    query_streamer.output(),
#    query_grouper.output(),
#    select.output(),
)

info_queue = Queue()

def manage(task):
    print 'Running: ' + str(task)
    task.run()
Ejemplo n.º 7
0
    UniversalSelect(
        query_streamer.output().schema(),
        {
            'states.geom': {
                'type': Geometry,
                'args': ['queries.geom'],
                'function': lambda v: v,
            },
        }
    )
)
engines.append(states_query)

states_source = Rtree(states_file, 'states.geom')
states_accessor = DataAccessor(
    states_query.output(),
    states_source,
    FindRange
)
engines.append(states_accessor)

states_select = Select(
    states_accessor.output(),
    UniversalSelect(
        states_accessor.output().schema(),
        {
            'states.oid': {
                'type': int,
                'args': ['oid'],
                'function': lambda v: v,
            },
Ejemplo n.º 8
0
# schema definition of the data stream
data_schema = Schema()
data_schema.append(Attribute('name', str))
data_schema.append(Attribute('age', int))

data_schema.append(Attribute('rowid', int, True))
data_source = DBTable('test.db', 'person', data_schema)

# create a data accessor
data_accessor = DataAccessor(query_streamer.output(), data_source, FindRange)
name_age_combiner = NameAgeCombiner(data_accessor.output().schema())
select = Select(data_accessor.output(), name_age_combiner)

query_grouper = Group(query_streamer.output(), {'age': lambda a, b: a is b})

joiner = Join(query_grouper.output(), select.output())
filter = Filter(joiner.output(), FilterNameAge(joiner.output().schema()))

result_stack = ResultStack(
    filter.output(),
    #    joiner.output(),
    #    query_streamer.output(),
    #    query_grouper.output(),
    #    select.output(),
)

info_queue = Queue()


def manage(task):
    print 'Running: ' + str(task)
Ejemplo n.º 9
0
groups = []
sorts = []

# create a data accessor
county_accessor = DataAccessor(query_streamer.output(), county_source,
                               FindRange)
data_accessors.append(county_accessor)

demux = Demux(county_accessor.output())

sub_schema = SubSchema(demux.schema(), {'county': 'zip'})

for i in range(0, 2):
    select = Select(demux, sub_schema)
    county_selects.append(select)
    zip_accessor = DataAccessor(select.output(), zip_source, FindRange)
    sub_schema2 = SubSchema(zip_accessor.output().schema(), {'oid': 'zip'})
    zip_select = Select(zip_accessor.output(), sub_schema2)
    zip_selects.append(zip_select)
    sort = Sort(
        zip_select.output(),
        {
            'zip': lambda a, b: cmp(a / 100, b / 100)
        },
        # {'zip': None },
        # True
    )
    sorts.append(sort)
    group = Group(sort.output(), {
        'zip': lambda a, b: (a / 1000) == (b / 1000)
    }
Ejemplo n.º 10
0
                'args': ['family.id'],
                'function': lambda v: v
            }
        }
    )
)
engines.append(family_id_select)


# Data source for the genera.
genus_source = DBTable(input_file, 'genus', genus_schema)


# Data accessor for the genera data source.
genus_accessor = DataAccessor(
    family_id_select.output(), 
    genus_source,
    FindIdentities
)
engines.append(genus_accessor)


# A join mini-engine to associate families with genera.
family_genus_joiner = Join(
    family_id_grouper.output(), 
    genus_accessor.output(),
)
engines.append(family_genus_joiner)


# A group mini-engine to split the (family, genus) IDs into groups.
Ejemplo n.º 11
0
    # Select only the species ID for querying plants.
    species_id_select = Select(
        channel,
        UniversalSelect(
            species_accessor.output().schema(), {
                'plants.species_id': {
                    'type': int,
                    'args': ['species.id'],
                    'function': lambda v: v
                }
            }))
    engines.append(species_id_select)
    # Data source for the plants.
    plants_source = DBTable(input_file, 'plants', plants_schema)
    # Data accessor for the plants data source.
    plants_accessor = DataAccessor(species_id_select.output(), plants_source,
                                   FindIdentities)
    engines.append(plants_accessor)

    plants_filter = Filter(plants_accessor.output(),
                           FilterAge(plants_accessor.output().schema()))
    engines.append(plants_filter)

    # Select only the species ID for querying plants.
    plants_height_select = Select(
        plants_filter.output(),
        UniversalSelect(
            plants_filter.output().schema(), {
                'plants.height': {
                    'type': int,
                    'args': ['plants.height'],
Ejemplo n.º 12
0
                'function': lambda v: intersection(v, query),
            },
            # keep OID
            'states.oid': {
                'type': int,
                'args': ['oid'],
                'function': lambda v: v,
            }
        }
    )
)
engines.append(states_select)

# Only keep the geometry for querying
states_query = Select(
    states_select.output(),
    UniversalSelect(
        states_select.output().schema(),
        {
            'counties.the_geom': {
                'type': Geometry,
                'args': ['states.the_geom'],
                'function': lambda v: v,
            },
        }
    )
)
engines.append(states_query)

# Finally query the counties
counties_source = Rtree(counties_file, 'counties.the_geom')
Ejemplo n.º 13
0
county_accessor = DataAccessor(
    query_streamer.output(), 
    county_source,
    FindRange
)
data_accessors.append(county_accessor)

demux = Demux(county_accessor.output())

sub_schema = SubSchema(demux.schema(), {'county': 'zip'})
 
for i in range(0, 2):
    select = Select(demux, sub_schema)
    county_selects.append(select)
    zip_accessor = DataAccessor(
        select.output(),
        zip_source,
        FindRange
    )
    sub_schema2 = SubSchema(zip_accessor.output().schema(), {'oid': 'zip'})
    zip_select = Select(zip_accessor.output(), sub_schema2)
    zip_selects.append(zip_select)
    sort = Sort(
        zip_select.output(), 
        {'zip': lambda a, b: cmp(a / 100, b / 100)},
        # {'zip': None },
        # True
    )
    sorts.append(sort)
    group = Group(
        sort.output(), 
Ejemplo n.º 14
0
#############################################################

states_query = Select(
    query_streamer.output(),
    UniversalSelect(
        query_streamer.output().schema(), {
            'states.geom': {
                'type': Geometry,
                'args': ['queries.geom'],
                'function': lambda v: v,
            },
        }))
engines.append(states_query)

states_source = Rtree(states_file, 'states.geom')
states_accessor = DataAccessor(states_query.output(), states_source, FindRange)
engines.append(states_accessor)

states_select = Select(
    states_accessor.output(),
    UniversalSelect(
        states_accessor.output().schema(), {
            'states.oid': {
                'type': int,
                'args': ['oid'],
                'function': lambda v: v,
            },
            'states.geom': {
                'type': Geometry,
                'args': ['states.geom'],
                'function': lambda v: v,
Ejemplo n.º 15
0
            species_accessor.output().schema(),
            {
                'plants.species_id': {
                    'type': int,
                    'args': ['species.id'],
                    'function': lambda v: v
                }
            }
        )
    )
    engines.append(species_id_select)
    # Data source for the plants.
    plants_source = DBTable(input_file, 'plants', plants_schema)
    # Data accessor for the plants data source.
    plants_accessor = DataAccessor(
        species_id_select.output(), 
        plants_source,
        FindIdentities
    )
    engines.append(plants_accessor)

    plants_filter = Filter(
        plants_accessor.output(),
        FilterAge(plants_accessor.output().schema())
    )
    engines.append(plants_filter)

    # Select only the species ID for querying plants.
    plants_height_select = Select(
        plants_filter.output(),
        UniversalSelect(