l = df['pickup_longitude']
    return df[(l < -70) & (l > -80)]


def print_len(x):
    if x is not None:
        print(len(x))


#log_level()

try:
    s = scheduler
except:
    s = Scheduler()

csv = CSVLoader(get_dataset('bigfile'),
                header=None,
                index_col=False,
                force_valid_ids=True,
                scheduler=s)
pr = Every(scheduler=s)
pr.input.df = csv.output.table
scatterplot = ScatterPlot(x_column='_1', y_column='_2', scheduler=s)
scatterplot.create_dependent_modules(csv, 'table')

if __name__ == '__main__':
    csv.start()
    s.join()
    print(len(csv.df()))
"""
Clustering datasets may be found at
https://cs.joensuu.fi/sipu/datasets/
"""
from progressivis import Scheduler, Every#, log_level
from progressivis.cluster import MBKMeans
from progressivis.io import CSVLoader
from progressivis.vis import ScatterPlot
from progressivis.datasets import get_dataset

try:
    s = scheduler
except NameError:
    s = Scheduler()
    #log_level(package="progressivis.cluster")

data = CSVLoader(get_dataset('cluster:s1'),sep='\\s+',skipinitialspace=True,header=None,index_col=False,scheduler=s)
mbkmeans = MBKMeans(columns=['_0', '_1'], n_clusters=15, batch_size=100, is_input=False, scheduler=s)
mbkmeans.input.table = data.output.table
prn = Every(scheduler=s)
prn.input.df = mbkmeans.output.table
sp = ScatterPlot('_0','_1', scheduler=s)

sp.move_point = mbkmeans # for input management
sp.create_dependent_modules(data,'table', sample=None, select=mbkmeans)

if __name__ == '__main__':
    data.start()
    s.join()
from progressivis import Scheduler, Print
from progressivis.cluster import MBKMeans
from progressivis.stats import RandomTable
from progressivis.vis import ScatterPlot

try:
    s = scheduler
except:
    s = Scheduler()

table = RandomTable(columns=['a', 'b'], rows=50000, throttle=500, scheduler=s)
mbkmeans = MBKMeans(columns=['a', 'b'], n_clusters=8, batch_size=100, is_input=False, scheduler=s)
mbkmeans.input.table = table.output.table
prn = Print(scheduler=s)
prn.input.df = mbkmeans.output.table
sp = ScatterPlot('a', 'b', scheduler=s)
sp.create_dependent_modules(mbkmeans,'table')

if __name__ == '__main__':
    table.start()