Python Scheduler.start Examples, progressivis.Scheduler.start Python Examples

Example #1

0

Show file

File: bench_min_max.py Project: pombredanne/progressivis

 def setUpStep(self, step):
     self.set_step_info("{} rows".format(step * L))
     s = Scheduler()
     random = RandomTable(10, rows=step * L, scheduler=s)
     s.start()
     #return random
     self.random_table = pd.DataFrame(
         random.output.table.output_module.table().to_dict())

Example #2

0

Show file

File: bench_min_max.py Project: pombredanne/progressivis

 def p10s_random_min_max(n):
     StorageEngine.default = "hdf5"
     s = Scheduler()
     random = RandomTable(10, rows=n * L, scheduler=s)
     min_ = Min(name='min_' + str(hash(random)), scheduler=s)
     min_.input.table = random.output.table
     max_ = Max(name='max_' + str(hash(random)), scheduler=s)
     max_.input.table = random.output.table
     s.start()

Example #3

0

Show file

File: test_input.py Project: jdfekete/progressivis

 def test_input(self):
     s=Scheduler()
     inp = Input(scheduler=s)
     pr=Print(scheduler=s)
     pr.input.df = inp.output.df
     t=threading.Thread(target=do_line,args=(inp,s))
     t.start()
     s.start()
     self.assertEqual(len(inp.df()), 10)

Example #4

0

Show file

File: bench_min_max.py Project: pombredanne/progressivis

 def p10s_random_min_max(self):
     n = self.current_step
     StorageEngine.default = "hdf5"
     s = Scheduler()
     random = RandomTable(10, rows=n * L, scheduler=s)
     min_ = Min(mid='min_' + str(hash(random)), scheduler=s)
     min_.input.table = random.output.table
     max_ = Max(id='max_' + str(hash(random)), scheduler=s)
     max_.input.table = random.output.table
     s.start()

Example #5

0

Show file

File: test_min_max.py Project: jdfekete/progressivis

 def test_max(self):
     s=Scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     max=Max(scheduler=s)
     max.input.df = random.output.df
     pr=Print(scheduler=s)
     pr.input.df = max.output.df
     s.start()
     res1 = random.df()[random.columns.difference([random.UPDATE_COLUMN])].max()
     res2 = last_row(max.df(), remove_update=True)
     self.assertTrue(np.allclose(res1, res2))

Example #6

0

Show file

File: test_random_table.py Project: jdfekete/progressivis

 def test_random_table(self):
     s=Scheduler()
     module=RandomTable(['a', 'b'], rows=10000, scheduler=s)
     self.assertEqual(module.df().columns[0],'a')
     self.assertEqual(module.df().columns[1],'b')
     self.assertEqual(len(module.df().columns), 3) # add the UPDATE_COLUMN
     prlen = Every(proc=print_len, constant_time=True, scheduler=s)
     prlen.input.df = module.output.df
     s.start()
     self.assertEqual(len(module.df()), 10000)
     self.assertFalse(module.df()['a'].isnull().any())
     self.assertFalse(module.df()['b'].isnull().any())

Example #7

0

Show file

File: test_03_filter.py Project: pombredanne/progressivis

 def test_filter(self):
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     filter_ = FilterMod(expr='_1 > 0.5', scheduler=s)
     filter_.input.table = random.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = filter_.output.table
     s.start()
     s.join()
     idx = filter_.get_input_slot('table').data().eval(
         '_1>0.5', result_object='index')
     self.assertEqual(filter_._table.selection, bitmap(idx))

Example #8

0

Show file

File: test_mb_k_means.py Project: jdfekete/progressivis

 def test_mb_k_means(self):
     #log_level()
     s=Scheduler()
     n_clusters = 3
     csv = CSVLoader(get_dataset('cluster:s3'),sep=' ',skipinitialspace=True,header=None,index_col=False,scheduler=s)
     km = MBKMeans(n_clusters=n_clusters, random_state=42, is_input=False, scheduler=s)
     km.input.df = csv.output.df
     pr = Print(scheduler=s)
     pr.input.df = km.output.df
     e = Every(scheduler=s)
     e.input.df = km.output.labels
     s.start()
     self.assertEquals(len(csv.df()), len(km.labels()))

Example #9

0

Show file

File: test_random_table.py Project: jdfekete/progressivis

 def test_random_table2(self):
     s=Scheduler()
      # produces more than 4M rows per second on my laptop
     module=RandomTable(10, rows=10000000, force_valid_ids=True, scheduler=s)
     self.assertEqual(len(module.df().columns), 11) # add the UPDATE_COLUMN
     self.assertEqual(module.df().columns[0],'_1')
     self.assertEqual(module.df().columns[1],'_2')
     prlen = Every(proc=print_len, constant_time=True, scheduler=s)
     prlen.input.df = module.output.df
     s.start()
     self.assertEqual(len(module.df()), 10000000)
     self.assertFalse(module.df()['_1'].isnull().any())
     self.assertFalse(module.df()['_2'].isnull().any())

Example #10

0

Show file

File: test_var.py Project: jdfekete/progressivis

 def test_var(self):
     s=Scheduler()
     random = RandomTable(1, rows=1000, scheduler=s)
     var=Var(scheduler=s)
     var.input.df = random.output.df
     pr=Print(scheduler=s)
     pr.input.df = var.output.df
     s.start()
     res1 = random.df()[1].var()
     res2 = last_row(var.df(), remove_update=True)
     #print 'res1:', res1
     #print 'res2:', res2
     self.assertTrue(np.allclose(res1, res2))

Example #11

0

Show file

File: test_idxmax.py Project: jdfekete/progressivis

 def test_idxmin(self):
     s=Scheduler()
     random = RandomTable(10, rows=10000,throttle=1000, scheduler=s)
     idxmin=IdxMin(scheduler=s)
     idxmin.input.df = random.output.df
     min=Min(scheduler=s)
     min.input.df = random.output.df
     pr=Print(scheduler=s)
     pr.input.df = idxmin.output.min
     s.start()
     min1=last_row(min.df(),remove_update=True)
     #print min1
     min2=last_row(idxmin.min(),remove_update=True)
     #print min2
     self.assertTrue((min1==min2).all())

Example #12

0

Show file

File: test_idxmax.py Project: jdfekete/progressivis

 def test_idxmax(self):
     s=Scheduler()
     random = RandomTable(10, rows=10000,throttle=1000, scheduler=s)
     idxmax=IdxMax(scheduler=s)
     idxmax.input.df = random.output.df
     max=Max(scheduler=s)
     max.input.df = random.output.df
     pr=Print(scheduler=s)
     pr.input.df = idxmax.output.max
     s.start()
     max1=last_row(max.df(),remove_update=True)
     #print max1
     max2=last_row(idxmax.max(),remove_update=True)
     #print max2
     self.assertTrue((max1==max2).all())

Example #13

0

Show file

File: test_03_dummy.py Project: pombredanne/progressivis

 def test_dummy(self):
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     dummy_ = DummyMod(update_column='_1',
                       delete_rows=5,
                       update_rows=5,
                       fixed_step_size=100,
                       scheduler=s)
     dummy_.input.table = random.output.table
     max_ = Max(name='max_' + str(hash(random)), scheduler=s)
     max_.input.table = dummy_.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = max_.output.table
     s.start()
     s.join()

Example #14

0

Show file

File: test_pairwise.py Project: jdfekete/progressivis

 def NOtest_vec_distances(self):
     s=Scheduler()
     vec=VECLoader(get_dataset('warlogs'),scheduler=s)
     dis=PairwiseDistances(metric='cosine',scheduler=s)
     dis.input.df = vec.output.df
     dis.input.array = vec.output.array
     cnt = Every(proc=print_len,constant_time=True,scheduler=s)
     cnt.input.df = dis.output.dist
     global times
     times = 0
     s.start()
     df = vec.df()
     computed = dis.dist()
     self.assertEquals(computed.shape[0], len(df))
     truth = pairwise_distances(vec.toarray(), metric=dis._metric)
     self.assertTrue(np.allclose(truth, computed))

Example #15

0

Show file

 def test_hub_if_else(self):
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=5,
         update_rows=5,
         fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     switch = Switch(condition=lambda x: False, scheduler=s)
     switch.input[0] = stirrer.output.result
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = switch.output.result
     min_ = Min(name="min_" + str(hash(random)), scheduler=s)
     min_.input[0] = switch.output.result_else
     hub = Hub(scheduler=s)
     hub.input.table = min_.output.result
     hub.input.table = max_.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = hub.output.result
     aio.run(s.start())
     res1 = stirrer.result.min()
     res2 = hub.result
     self.compare(res1, res2)

Example #16

0

Show file

 def test_filter(self) -> None:
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     filter_ = FilterMod(expr="_1 > 0.5", scheduler=s)
     filter_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = filter_.output.result
     aio.run(s.start())
     idx = (filter_.get_input_slot("table").data().eval(
         "_1>0.5", result_object="index"))
     self.assertEqual(filter_.table.index, bitmap(idx))

Example #17

0

Show file

File: test_pairwise.py Project: jdfekete/progressivis

    def test_csv_distances(self):
        s=Scheduler()
        vec=CSVLoader(get_dataset('smallfile'),index_col=False,header=None,scheduler=s)
        dis=PairwiseDistances(metric='euclidean',scheduler=s)
        dis.input.df = vec.output.df
        cnt = Every(proc=print_len,constant_time=True,scheduler=s)
        cnt.input.df = dis.output.dist
        global times
        times = 0
        s.start(ten_times)
        df = vec.df()
        computed = dis.dist()
        #self.assertEquals(computed.shape[0], len(df))

        del df[CSVLoader.UPDATE_COLUMN]
        offset=0
        size=offset+5000
        truth = pairwise_distances(df.iloc[offset:size], metric=dis._metric)
        dist = computed[offset:size,offset:size]
        self.assertTrue(np.allclose(truth, dist,atol=1e-7)) # reduce tolerance

Example #18

0

Show file

File: test_select_delta.py Project: jdfekete/progressivis

 def test_select_delta(self):
     #log_level()
     delta = np.array([0, 0.05])
     points = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0]]
     s=Scheduler()
     df=pd.DataFrame(points)
     add_to_row=AddToRow(df, scheduler=s)
     def tick_proc(s, run_number):
         if run_number > 100:
             s.stop()
         #print add_to_row.df()
         try:
             add_to_row.from_input({1: delta})
         except Exception as e:
             print 'Error: %s'%e
     q=SelectDelta(delta=0.5,scheduler=s)
     q.input.df = add_to_row.output.df
     prlen = Every(scheduler=s)
     prlen.input.df = q.output.df
     s.start(tick_proc=tick_proc)
     self.assertEqual(len(q.df()), 3)

Example #19

0

Show file

 def test_repair_min(self) -> None:
     """
     test_repair_min()
     min without deletes/updates
     """
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     min_ = ScalarMin(name="min_" + str(hash(random)), scheduler=s)
     min_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = min_.output.result
     aio.run(s.start())
     res1 = random.table.min()
     res2 = min_.psdict
     self.compare(res1, res2)

Example #20

0

Show file

 def test_repair_min2(self) -> None:
     """
     test_repair_min2()
     runs with sensitive ids deletion
     """
     s = Scheduler()
     ScalarMin._reset_calls_counter = 0  # type: ignore
     random = RandomTable(2, rows=100000, scheduler=s)
     min_ = ScalarMin(name="min_repair_test2", scheduler=s)
     stirrer = MyStirrer(watched="min_repair_test2", scheduler=s)
     stirrer.input[0] = random.output.result
     min_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = min_.output.result
     aio.run(s.start())
     self.assertEqual(ScalarMin._reset_calls_counter, 1)  # type: ignore
     res1 = stirrer.table.min()
     res2 = min_.psdict
     self.compare(res1, res2)

Example #21

0

Show file

File: test_03_stirrer.py Project: jdfekete/progressivis

 def test_stirrer(self) -> None:
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=5,
         update_rows=5,
         fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     res1 = stirrer.table.max()
     res2 = max_.result
     self.compare(res1, res2)

Example #22

0

Show file

 def test_filter3(self) -> None:
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(update_column="_1",
                       update_rows=5,
                       fixed_step_size=100,
                       scheduler=s)
     stirrer.input[0] = random.output.result
     filter_ = FilterMod(expr="_1 > 0.5", scheduler=s)
     filter_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = filter_.output.result
     aio.run(s.start())
     tbl = filter_.get_input_slot("table").data()
     idx = tbl.eval("_1>0.5", result_object="index")
     self.assertEqual(filter_.table.index, bitmap(idx))
     df = pd.DataFrame(tbl.to_dict(), index=tbl.index.to_array())
     dfe = df.eval("_1>0.5")
     self.assertEqual(filter_.table.index, bitmap(df.index[dfe]))

Example #23

0

Show file

 def test_repair_max3(self) -> None:
     """
     test_repair_max3()
     runs with NON-sensitive ids deletion
     """
     s = Scheduler()
     ScalarMax._reset_calls_counter = 0  # type: ignore
     random = RandomTable(2, rows=100000, scheduler=s)
     max_ = ScalarMax(name="max_repair_test3", scheduler=s)
     stirrer = MyStirrer(watched="max_repair_test3",
                         proc_sensitive=False,
                         scheduler=s)
     stirrer.input[0] = random.output.result
     max_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     self.assertEqual(ScalarMax._reset_calls_counter, 0)  # type: ignore
     res1 = stirrer.table.max()
     res2 = max_.psdict
     self.compare(res1, res2)

Example #24

0

Show file

    def test_scheduler(self) -> None:
        with self.assertRaises(ProgressiveError):
            s = Scheduler(0)
        s = Scheduler()
        csv = CSVLoader(
            get_dataset("bigfile"),
            name="csv",
            index_col=False,
            header=None,
            scheduler=s,
        )
        self.assertIs(s["csv"], csv)
        sink = Sink(name="sink", scheduler=s)
        sink.input.inp = csv.output.result  # allow csv to start
        check_running = False

        async def _is_running() -> None:
            nonlocal check_running
            check_running = csv.scheduler().is_running()

        aio.run_gather(s.start(), _is_running())

        self.assertTrue(check_running)

        def add_min(s: Scheduler, r: int) -> None:
            with s:
                m = Min(scheduler=s)
                m.input.table = csv.output.result
                prt = Print(proc=self.terse, scheduler=s)
                prt.input.df = m.output.result

        s.on_loop(add_min, 10)
        s.on_loop(self._stop, 20)

        self.assertIs(s["csv"], csv)
        json = s.to_json(short=False)
        self.assertFalse(json["is_running"])
        self.assertTrue(json["is_terminated"])
        html = s._repr_html_()
        self.assertTrue(len(html) != 0)

Example #25

0

Show file

 def test_repair_max5(self) -> None:
     """
     test_repair_max5()
     runs with sensitive ids update (critical)
     """
     s = Scheduler()
     ScalarMax._reset_calls_counter = 0  # type: ignore
     random = RandomTable(2, rows=100000, scheduler=s)
     max_ = ScalarMax(name="max_repair_test4", scheduler=s)
     stirrer = MyStirrer(watched="max_repair_test4",
                         mode="update",
                         value=-9999.0,
                         scheduler=s)
     stirrer.input[0] = random.output.result
     max_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     self.assertEqual(ScalarMax._reset_calls_counter, 1)  # type: ignore
     res1 = stirrer.table.max()
     res2 = max_.psdict
     self.compare(res1, res2)

Example #26

0

Show file

 def test_switch_if_then(self):
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=5,
         update_rows=5,
         fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     switch = Switch(condition=lambda x: True, scheduler=s)
     switch.input[0] = stirrer.output.result
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = switch.output.result
     pr_else = Print(proc=self.terse, scheduler=s)
     pr_else.input[0] = switch.output.result_else
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     res1 = stirrer.result.max()
     res2 = max_.result
     self.compare(res1, res2)

Example #27

0

Show file

        'sample': mbkmeans if i == 0 else None,
        'input_module': filt,
        'input_slot': 'table'
    })

sp = MCScatterPlot(scheduler=s, classes=classes)
sp.create_dependent_modules()
for i in range(n_clusters):
    cname = f"k{i}"
    sp[cname].min_value._table = PsDict({'_0': -np.inf, '_1': -np.inf})
    sp[cname].max_value._table = PsDict({'_0': np.inf, '_1': np.inf})
mbkmeans.input.table = data.output.table
mbkmeans.create_dependent_modules()
sp.move_point = mbkmeans.moved_center  # for input management


def myprint(d):
    if d['convergence'] != 'unknown':
        print(d)
    else:
        print('.', end='')


prn = Every(scheduler=s, proc=print)
prn.input.df = mbkmeans.output.conv

if __name__ == '__main__':
    #data.start()
    #s.join()
    aio.run(s.start())

Example #28

0

Show file

    def _common(
        self,
        rtol: float,
        threshold: Optional[int] = None,
        resetter: Optional[MyResetter] = None,
        resetter_func: Optional[Callable[[Slot], Any]] = None,
        scheduler: Optional[Scheduler] = None,
    ) -> float:
        global KNN, LABELS, INDICES
        if scheduler is None:
            s = Scheduler()
        else:
            s = scheduler
        try:
            dataset = get_dataset("mnist_784")
        except TimeoutError:
            print("Cannot download mnist")
            return 0
        data = CSVLoader(
            dataset,
            index_col=False,
            as_array="array",
            usecols=lambda x: x != "class",
            scheduler=s,
        )
        ppca = PPCA(scheduler=s)
        ppca.input[0] = data.output.result
        ppca.params.n_components = N_COMPONENTS
        if resetter:
            assert callable(resetter_func)
            resetter.input[0] = ppca.output.result
        ppca.create_dependent_modules(
            rtol=rtol,
            trace=TRACE,
            threshold=threshold,
            resetter=resetter,
            resetter_func=resetter_func,
        )

        prn = Every(scheduler=s, proc=_print)
        prn.input[0] = ppca.reduced.output.result
        aio.run(s.start())
        pca_ = ppca._transformer["inc_pca"]
        recovered = pca_.inverse_transform(_array(ppca.reduced.table))
        if KNN is None:
            print("Init KNN")
            KNN = KNeighborsClassifier(NNEIGHBOURS)
            arr = _array(data.table)
            df: pd.DataFrame = pd.read_csv(
                dataset, usecols=["class"]  # type: ignore
            )
            LABELS = df.values.reshape((-1,))
            indices_t = sample_without_replacement(
                n_population=len(data.table),
                n_samples=TRAIN_SAMPLE_SIZE,
                random_state=RANDOM_STATE,
            )
            KNN.fit(arr[indices_t], LABELS[indices_t])
        indices_p = sample_without_replacement(
            n_population=len(data.table),
            n_samples=PREDICT_SAMPLE_SIZE,
            random_state=RANDOM_STATE * 2 + 1,
        )
        return KNN.score(recovered[indices_p], LABELS[indices_p])  # type: ignore

Example #29

0

Show file

File: bmkit_load_csv_mp.py Project: pombredanne/progressivis

def p10s_read_csv(f):
    s=Scheduler()
    module=CSVLoader(f, index_col=False, header=None, scheduler=s)
    s.start()

Example #30

0

Show file

#SUFFIX= ''
PREFIX= '../nyc-taxi/'
SUFFIX= '.bz2'

URLS = [
    PREFIX+'yellow_tripdata_2015-01.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-02.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-03.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-04.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-05.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-06.csv'+SUFFIX,
]

filenames = pd.DataFrame({'filename': URLS})
cst = Constant(Table('filenames', data=filenames), scheduler=s)
csv = CSVLoader(index_col=False,skipinitialspace=True,usecols=['dropoff_longitude', 'dropoff_latitude'], filter_=filter_, scheduler=s)
csv.input.filenames = cst.output.table
min = Min(scheduler=s)
min.input.table = csv.output.table
max = Max(scheduler=s)
max.input.table = csv.output.table
histogram2d = Histogram2D('dropoff_longitude', 'dropoff_latitude', xbins=RESOLUTION, ybins=RESOLUTION, scheduler=s)
histogram2d.input.table = csv.output.table
histogram2d.input.min = min.output.table
histogram2d.input.max = max.output.table
heatmap = Heatmap(filename='nyc_dropoff_yellow%d.png', history=5, scheduler=s)
heatmap.input.array = histogram2d.output.table

if __name__=='__main__':
    s.start()

Example #31

0

Show file

File: test_mb_k_means.py Project: jdfekete/progressivis

from progressivis import Scheduler, Print
from progressivis.cluster import MBKMeans
from progressivis.stats import RandomTable
from progressivis.vis import MCScatterPlot
import asyncio as aio

try:
    s = scheduler
except:
    s = Scheduler()

table = RandomTable(columns=['a', 'b'], rows=50000, throttle=500, scheduler=s)
mbkmeans = MBKMeans(columns=['a', 'b'],
                    n_clusters=8,
                    batch_size=100,
                    is_input=False,
                    scheduler=s)
mbkmeans.input.table = table.output.table
prn = Print(scheduler=s)
prn.input.df = mbkmeans.output.table
sp = MCScatterPlot(scheduler=s,
                   classes=[('Scatterplot', 'a', 'b')],
                   approximate=True)
sp.create_dependent_modules(mbkmeans, 'table')
sp['Scatterplot'].range_query_2d.hist_index_x.params.init_threshold = 1
sp['Scatterplot'].range_query_2d.hist_index_y.params.init_threshold = 1

if __name__ == '__main__':
    #table.start()
    aio.run(s.start(coros=[aio.sleep(3600)]))

Example #32

0

Show file

File: bench_min_max.py Project: pombredanne/progressivis

def make_df(n, L):
    s = Scheduler()
    random = RandomTable(10, rows=n * L, scheduler=s)
    s.start()
    #return random
    return pd.DataFrame(random.output.table.output_module.table().to_dict())

Example #33

0

Show file

File: bench_min_max.py Project: pombredanne/progressivis

 def p10s_zarr_random(n):
     StorageEngine.default = "zarr"
     s = Scheduler()
     random = RandomTable(10, rows=n * L, scheduler=s)
     s.start()

Example #34

0

Show file

File: bench_min_max.py Project: pombredanne/progressivis

 def p10s_random(self):
     n = self.current_step
     StorageEngine.default = "hdf5"
     s = Scheduler()
     random = RandomTable(10, rows=n * L, scheduler=s)
     s.start()

Example #35

0

Show file

File: test_csv.py Project: jdfekete/progressivis

 def test_read_csv(self):
     s=Scheduler()
     module=CSVLoader(get_dataset('bigfile'), index_col=False, header=None, scheduler=s)
     self.assertTrue(module.df() is None)
     s.start()
     self.assertEqual(len(module.df()), 1000000)

Example #36

0

Show file

File: nyc_pickup_yellow.py Project: jdfekete/progressivis

SUFFIX= '.bz2'

URLS = [
    PREFIX+'yellow_tripdata_2015-01.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-02.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-03.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-04.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-05.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-06.csv'+SUFFIX,
]

filenames = pd.DataFrame({'filename': URLS})
cst = Constant(df=filenames, scheduler=s)
csv = CSVLoader(index_col=False,skipinitialspace=True,usecols=['pickup_longitude', 'pickup_latitude'], filter=filter, scheduler=s)
csv.input.filenames = cst.output.df
#min = Min(scheduler=s)
#min.input.df = csv.output.df
#max = Max(scheduler=s)
#max.input.df = csv.output.df
min = Constant(df=pd.DataFrame([bounds_min]), scheduler=s)
max = Constant(df=pd.DataFrame([bounds_max]), scheduler=s)
histogram2d = Histogram2D('pickup_longitude', 'pickup_latitude', xbins=RESOLUTION, ybins=RESOLUTION, scheduler=s)
histogram2d.input.df = csv.output.df
histogram2d.input.min = min.output.df
histogram2d.input.max = max.output.df
heatmap = Heatmap(filename='nyc_pickup_yellow%d.png', history=5, scheduler=s)
heatmap.input.array = histogram2d.output.df

if __name__=='__main__':
    s.start()