Ejemplo n.º 1
0
 def test_histogram1d1(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
     histogram1d.input[0] = csv.output.result
     histogram1d.input.min = min_.output.result
     histogram1d.input.max = max_.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = histogram1d.output.result
     aio.run(s.start())
     _ = histogram1d.trace_stats()
     last = notNone(histogram1d.table.last()).to_dict()
     h1 = last["array"]
     bounds = (last["min"], last["max"])
     df = pd.read_csv(
         get_dataset("bigfile"), header=None, usecols=[2]  # type: ignore
     )
     v = df.to_numpy().reshape(-1)
     h2, _ = np.histogram(  # type: ignore
         v, bins=histogram1d.params.bins, density=False, range=bounds
     )
     self.assertListEqual(h1.tolist(), h2.tolist())
Ejemplo n.º 2
0
    def t_histogram1d_impl(self, **kw: Any) -> None:
        s = self.scheduler()
        csv = CSVLoader(
            get_dataset("bigfile"), index_col=False, header=None, scheduler=s
        )
        stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw)
        stirrer.input[0] = csv.output.result
        min_ = Min(scheduler=s)
        min_.input[0] = stirrer.output.result
        max_ = Max(scheduler=s)
        max_.input[0] = stirrer.output.result
        histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
        histogram1d.input[0] = stirrer.output.result
        histogram1d.input.min = min_.output.result
        histogram1d.input.max = max_.output.result

        # pr = Print(scheduler=s)
        pr = Every(proc=self.terse, scheduler=s)
        pr.input[0] = histogram1d.output.result
        aio.run(s.start())
        _ = histogram1d.trace_stats()
        last = notNone(histogram1d.table.last()).to_dict()
        h1 = last["array"]
        bounds = (last["min"], last["max"])
        tab = stirrer.table.loc[:, ["_2"]]
        assert tab is not None
        v = tab.to_array().reshape(-1)
        h2, _ = np.histogram(  # type: ignore
            v, bins=histogram1d.params.bins, density=False, range=bounds
        )
        self.assertEqual(np.sum(h1), np.sum(h2))
        self.assertListEqual(h1.tolist(), h2.tolist())
Ejemplo n.º 3
0
 def test_hub_if_else(self):
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=5,
         update_rows=5,
         fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     switch = Switch(condition=lambda x: False, scheduler=s)
     switch.input[0] = stirrer.output.result
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = switch.output.result
     min_ = Min(name="min_" + str(hash(random)), scheduler=s)
     min_.input[0] = switch.output.result_else
     hub = Hub(scheduler=s)
     hub.input.table = min_.output.result
     hub.input.table = max_.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = hub.output.result
     aio.run(s.start())
     res1 = stirrer.result.min()
     res2 = hub.result
     self.compare(res1, res2)
Ejemplo n.º 4
0
 def test_idxmax2(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10000, throttle=1000, scheduler=s)
     stirrer = Stirrer(update_column="_1",
                       delete_rows=5,
                       fixed_step_size=100,
                       scheduler=s)
     stirrer.input[0] = random.output.result
     idxmax = IdxMax(scheduler=s)
     idxmax.input[0] = stirrer.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = idxmax.output.result
     pr2 = Print(proc=self.terse, scheduler=s)
     pr2.input[0] = max_.output.result
     aio.run(s.start())
     # import pdb;pdb.set_trace()
     max1 = max_.psdict
     # print('max1', max1)
     max = idxmax.max()
     assert max is not None
     max2 = notNone(max.last()).to_dict()
     # print('max2', max2)
     self.compare(max1, max2)
Ejemplo n.º 5
0
 def test_hist_index_min_max(self):
     "Test min_out and max_out on HistogramIndex"
     s = self.scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     t_min = Table(name=None, dshape='{_1: float64}', data={'_1': [0.3]})
     min_value = Constant(table=t_min, scheduler=s)
     t_max = Table(name=None, dshape='{_1: float64}', data={'_1': [0.8]})
     max_value = Constant(table=t_max, scheduler=s)
     range_qry = RangeQuery(column='_1', scheduler=s)
     range_qry.create_dependent_modules(random,
                                        'table',
                                        min_value=min_value,
                                        max_value=max_value)
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = range_qry.output.table
     hist_index = range_qry.hist_index
     min_ = Min(name='min_' + str(hash(hist_index)), scheduler=s)
     min_.input.table = hist_index.output.min_out
     prt2 = Print(proc=self.terse, scheduler=s)
     prt2.input.df = min_.output.table
     max_ = Max(name='max_' + str(hash(hist_index)), scheduler=s)
     max_.input.table = hist_index.output.max_out
     pr3 = Print(proc=self.terse, scheduler=s)
     pr3.input.df = max_.output.table
     s.start()
     s.join()
     res1 = random.table().min()['_1']
     res2 = min_.table().last().to_dict()['_1']
     self.assertAlmostEqual(res1, res2)
     res1 = random.table().max()['_1']
     res2 = max_.table().last().to_dict()['_1']
     self.assertAlmostEqual(res1, res2)
Ejemplo n.º 6
0
 def test_hist_index_min_max(self) -> None:
     "Test min_out and max_out on HistogramIndex"
     s = self.scheduler()
     with s:
         random = RandomTable(2, rows=100000, scheduler=s)
         t_min = PsDict({"_1": 0.3})
         min_value = Constant(table=t_min, scheduler=s)
         t_max = PsDict({"_1": 0.8})
         max_value = Constant(table=t_max, scheduler=s)
         range_qry = RangeQuery(column="_1", scheduler=s)
         range_qry.create_dependent_modules(
             random, "result", min_value=min_value, max_value=max_value
         )
         prt = Print(proc=self.terse, scheduler=s)
         prt.input[0] = range_qry.output.result
         hist_index = range_qry.hist_index
         assert hist_index is not None
         min_ = Min(name="min_" + str(hash(hist_index)), scheduler=s)
         min_.input[0] = hist_index.output.min_out
         prt2 = Print(proc=self.terse, scheduler=s)
         prt2.input[0] = min_.output.result
         max_ = Max(name="max_" + str(hash(hist_index)), scheduler=s)
         max_.input[0] = hist_index.output.max_out
         pr3 = Print(proc=self.terse, scheduler=s)
         pr3.input[0] = max_.output.result
     aio.run(s.start())
     res1 = cast(float, random.table.min()["_1"])
     res2 = cast(float, min_.psdict["_1"])
     self.assertAlmostEqual(res1, res2)
     res1 = cast(float, random.table.max()["_1"])
     res2 = cast(float, max_.psdict["_1"])
     self.assertAlmostEqual(res1, res2)
Ejemplo n.º 7
0
 def t_histogram2d_impl(self, **kw: Any) -> None:
     s = self.scheduler()
     random = RandomTable(3, rows=100000, scheduler=s)
     stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw)
     stirrer.input[0] = random.output.result
     min_ = Min(scheduler=s)
     min_.input[0] = stirrer.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = stirrer.output.result
     histogram2d = Histogram2D(
         0, 1, xbins=100, ybins=100, scheduler=s
     )  # columns are called 1..30
     histogram2d.input[0] = stirrer.output.result
     histogram2d.input.min = min_.output.result
     histogram2d.input.max = max_.output.result
     heatmap = Heatmap(filename="histo_%03d.png", scheduler=s)
     heatmap.input.array = histogram2d.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = heatmap.output.result
     aio.run(s.start())
     last = notNone(histogram2d.table.last()).to_dict()
     h1 = last["array"]
     bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]]
     t = stirrer.table.loc[:, ["_1", "_2"]]
     assert t is not None
     v = t.to_array()
     bins = [histogram2d.params.ybins, histogram2d.params.xbins]
     h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds)
     h2 = np.flip(h2, axis=0)  # type: ignore
     self.assertEqual(np.sum(h1), np.sum(h2))
     self.assertListEqual(h1.reshape(-1).tolist(), h2.reshape(-1).tolist())
Ejemplo n.º 8
0
 def test_histogram2d1(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram2d = Histogram2D(
         1, 2, xbins=100, ybins=100, scheduler=s
     )  # columns are called 1..30
     histogram2d.input[0] = csv.output.result
     histogram2d.input.min = min_.output.result
     histogram2d.input.max = max_.output.result
     heatmap = Heatmap(filename="histo_%03d.png", scheduler=s)
     heatmap.input.array = histogram2d.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = heatmap.output.result
     aio.run(csv.scheduler().start())
     last = notNone(histogram2d.table.last()).to_dict()
     h1 = last["array"]
     bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]]
     df = pd.read_csv(
         get_dataset("bigfile"), header=None, usecols=[1, 2]  # type: ignore
     )
     v = df.to_numpy()  # .reshape(-1, 2)
     bins = [histogram2d.params.ybins, histogram2d.params.xbins]
     h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds)
     h2 = np.flip(h2, axis=0)  # type: ignore
     self.assertTrue(np.allclose(h1, h2))
Ejemplo n.º 9
0
 def test_max(self):
     s=Scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     max=Max(scheduler=s)
     max.input.df = random.output.df
     pr=Print(scheduler=s)
     pr.input.df = max.output.df
     s.start()
     res1 = random.df()[random.columns.difference([random.UPDATE_COLUMN])].max()
     res2 = last_row(max.df(), remove_update=True)
     self.assertTrue(np.allclose(res1, res2))
Ejemplo n.º 10
0
 def test_max(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     # s.join()
     res1 = random.table.max()
     res2 = max_.psdict
     self.compare(res1, res2)
Ejemplo n.º 11
0
 def test_max(self):
     s = self.scheduler()
     random = RandomTable(10, rows=10000, scheduler=s)
     max_ = Max(name='max_' + str(hash(random)), scheduler=s)
     max_.input.table = random.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = max_.output.table
     s.start()
     s.join()
     res1 = random.table().max()
     res2 = max_.table().last()
     self.compare(res1, res2)
Ejemplo n.º 12
0
 def test_max(self):
     s = self.scheduler()
     random = SimpleCSVLoader(
         get_dataset("bigfile_multiscale"), nrows=10_000, scheduler=s
     )
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     # s.join()
     res1 = random.result.max()
     res2 = max_.result
     self.compare(res1, res2)
Ejemplo n.º 13
0
 def test_idxmax(self):
     s=Scheduler()
     random = RandomTable(10, rows=10000,throttle=1000, scheduler=s)
     idxmax=IdxMax(scheduler=s)
     idxmax.input.df = random.output.df
     max=Max(scheduler=s)
     max.input.df = random.output.df
     pr=Print(scheduler=s)
     pr.input.df = idxmax.output.max
     s.start()
     max1=last_row(max.df(),remove_update=True)
     #print max1
     max2=last_row(idxmax.max(),remove_update=True)
     #print max2
     self.assertTrue((max1==max2).all())
Ejemplo n.º 14
0
 async def _add_max(scheduler: Scheduler, run_number: int) -> None:
     with scheduler:
         print("adding new modules")
         m = Max(name="max", scheduler=scheduler)
         prt = Print(name="print_max", proc=proc, scheduler=scheduler)
         m.input.table = table.output.result
         prt.input.df = m.output.result
Ejemplo n.º 15
0
 def test_histogram2d(self):
     s = self.scheduler()
     csv = CSVLoader(get_dataset('bigfile'),
                     index_col=False,
                     header=None,
                     scheduler=s)
     min_ = Min(scheduler=s)
     min_.input.table = csv.output.table
     max_ = Max(scheduler=s)
     max_.input.table = csv.output.table
     histogram2d = Histogram2D(1, 2, xbins=100, ybins=100,
                               scheduler=s)  # columns are called 1..30
     histogram2d.input.table = csv.output.table
     histogram2d.input.min = min_.output.table
     histogram2d.input.max = max_.output.table
     heatmap = Heatmap(filename='histo_%03d.png', scheduler=s)
     heatmap.input.array = histogram2d.output.table
     #pr = Print(scheduler=s)
     pr = Every(proc=self.terse, scheduler=s)
     #pr.input.df = heatmap.output.heatmap
     #pr.input.df = histogram2d.output.df
     pr.input.df = csv.output.table
     csv.scheduler().start()
     s.join()
     #self.scheduler.thread.join()
     s = histogram2d.trace_stats()
Ejemplo n.º 16
0
 def test_idxmax(self):
     s = self.scheduler()
     random = RandomTable(10, rows=10000, throttle=1000, scheduler=s)
     idxmax = IdxMax(scheduler=s)
     idxmax.input.table = random.output.table
     max_ = Max(scheduler=s)
     max_.input.table = random.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = idxmax.output.max
     s.start()
     s.join()
     max1 = max_.table().last().to_dict()
     #print('max1', max1)
     max2 = idxmax.max().last().to_dict()
     #print('max2', max2)
     self.assertAlmostEqual(max1, max2)
Ejemplo n.º 17
0
 def test_histogram1d(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram1d = Histogram1D("_2", scheduler=s)  # columns are called 1..30
     histogram1d.input[0] = csv.output.result
     histogram1d.input.min = min_.output.result
     histogram1d.input.max = max_.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = histogram1d.output.result
     aio.run(s.start())
     _ = histogram1d.trace_stats()
Ejemplo n.º 18
0
 def p10s_random_min_max(n):
     StorageEngine.default = "hdf5"
     s = Scheduler()
     random = RandomTable(10, rows=n * L, scheduler=s)
     min_ = Min(name='min_' + str(hash(random)), scheduler=s)
     min_.input.table = random.output.table
     max_ = Max(name='max_' + str(hash(random)), scheduler=s)
     max_.input.table = random.output.table
     s.start()
Ejemplo n.º 19
0
 def p10s_random_min_max(self):
     n = self.current_step
     StorageEngine.default = "hdf5"
     s = Scheduler()
     random = RandomTable(10, rows=n * L, scheduler=s)
     min_ = Min(mid='min_' + str(hash(random)), scheduler=s)
     min_.input.table = random.output.table
     max_ = Max(id='max_' + str(hash(random)), scheduler=s)
     max_.input.table = random.output.table
     s.start()
Ejemplo n.º 20
0
 def test_stirrer(self) -> None:
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=5,
         update_rows=5,
         fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = stirrer.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     res1 = stirrer.table.max()
     res2 = max_.result
     self.compare(res1, res2)
Ejemplo n.º 21
0
 def test_idxmax(self) -> None:
     s = self.scheduler()
     random = RandomTable(10, rows=10000, throttle=1000, scheduler=s)
     idxmax = IdxMax(scheduler=s)
     idxmax.input[0] = random.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = random.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = idxmax.output.result
     pr2 = Print(proc=self.terse, scheduler=s)
     pr2.input[0] = max_.output.result
     aio.run(s.start())
     max1 = max_.psdict
     # print('max1', max1)
     max = idxmax.max()
     assert max is not None
     max2 = notNone(max.last()).to_dict()
     # print('max2', max2)
     self.compare(max1, max2)
Ejemplo n.º 22
0
 async def _add_max_remove_min(scheduler: Scheduler,
                               run_number: int) -> None:
     with scheduler as dataflow:
         print("adding new modules")
         m = Max(name="max", scheduler=scheduler)
         prt = Print(name="print_max", proc=proc, scheduler=scheduler)
         m.input.table = table.output.result
         prt.input.df = m.output.result
         print("removing min module")
         dataflow.delete_modules("min", "print_min")
Ejemplo n.º 23
0
 def test_histogram2d(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     min_ = Min(scheduler=s)
     min_.input[0] = csv.output.result
     max_ = Max(scheduler=s)
     max_.input[0] = csv.output.result
     histogram2d = Histogram2D(
         1, 2, xbins=100, ybins=100, scheduler=s
     )  # columns are called 1..30
     histogram2d.input[0] = csv.output.result
     histogram2d.input.min = min_.output.result
     histogram2d.input.max = max_.output.result
     heatmap = Heatmap(filename="histo_%03d.png", scheduler=s)
     heatmap.input.array = histogram2d.output.result
     pr = Every(proc=self.terse, scheduler=s)
     pr.input[0] = heatmap.output.result
     aio.run(csv.scheduler().start())
     _ = histogram2d.trace_stats()
Ejemplo n.º 24
0
 def test_switch_if_then(self):
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     stirrer = Stirrer(
         update_column="_1",
         delete_rows=5,
         update_rows=5,
         fixed_step_size=100,
         scheduler=s,
     )
     stirrer.input[0] = random.output.result
     switch = Switch(condition=lambda x: True, scheduler=s)
     switch.input[0] = stirrer.output.result
     max_ = Max(name="max_" + str(hash(random)), scheduler=s)
     max_.input[0] = switch.output.result
     pr_else = Print(proc=self.terse, scheduler=s)
     pr_else.input[0] = switch.output.result_else
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = max_.output.result
     aio.run(s.start())
     res1 = stirrer.result.max()
     res2 = max_.result
     self.compare(res1, res2)
Ejemplo n.º 25
0
 def test_dummy(self):
     s = Scheduler()
     random = RandomTable(2, rows=100000, scheduler=s)
     dummy_ = DummyMod(update_column='_1',
                       delete_rows=5,
                       update_rows=5,
                       fixed_step_size=100,
                       scheduler=s)
     dummy_.input.table = random.output.table
     max_ = Max(name='max_' + str(hash(random)), scheduler=s)
     max_.input.table = dummy_.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = max_.output.table
     s.start()
     s.join()
Ejemplo n.º 26
0
def main():
    "Main function"
    csvmod = RandomTable(columns=['a', 'b', 'c'],
                         rows=1000000,
                         random=np.random.randn,
                         throttle=1000,
                         scheduler=s)
    minmod = Min(scheduler=s)
    minmod.input.table = csvmod.output.table
    maxmod = Max(scheduler=s)
    maxmod.input.table = csvmod.output.table
    histograms = Histograms(scheduler=s)
    histograms.input.table = csvmod.output.table
    histograms.input.min = minmod.output.table
    histograms.input.max = maxmod.output.table
    prlen = Every(scheduler=s)
    prlen.input.df = histograms.output.table
    return csvmod
Ejemplo n.º 27
0
    def test_histogram1d(self):
        s = self.scheduler()
        csv = CSVLoader(get_dataset('bigfile'),
                        index_col=False,
                        header=None,
                        scheduler=s)
        min_ = Min(scheduler=s)
        min_.input.table = csv.output.table
        max_ = Max(scheduler=s)
        max_.input.table = csv.output.table
        histogram1d = Histogram1D('_2',
                                  scheduler=s)  # columns are called 1..30
        histogram1d.input.table = csv.output.table
        histogram1d.input.min = min_.output.table
        histogram1d.input.max = max_.output.table

        #pr = Print(scheduler=s)
        pr = Every(proc=self.terse, scheduler=s)
        pr.input.df = csv.output.table
        s.start(tick_proc=lambda s, r: csv.is_terminated() and s.stop())
        s.join()
        s = histogram1d.trace_stats()
Ejemplo n.º 28
0
from progressivis.io import CSVLoader
from progressivis.stats import Histogram2D, Min, Max
from progressivis.datasets import get_dataset
from progressivis.vis import Heatmap

print("Loading test_histogram2d")
print("Type of default_scheduler is %s" % type(Scheduler.default))

csv = CSVLoader(get_dataset('bigfile'),
                index_col=False,
                header=None,
                engine='c')
pr = Every()
pr.input.df = csv.output.table
min_ = Min()
min_.input.table = csv.output.table
max_ = Max()
max_.input.table = csv.output.table
histogram2d = Histogram2D('_1', '_2', xbins=128, ybins=128)
histogram2d.input.table = csv.output.table
histogram2d.input.min = min_.output.table
histogram2d.input.max = max_.output.table
# heatmap
heatmap = Heatmap(filename='histo_%03d.png')
heatmap.input.array = histogram2d.output.table
pr = Print(name='print')
pr.input.df = csv.output.table

if __name__ == '__main__':
    csv.start()
Ejemplo n.º 29
0
#SUFFIX= ''
PREFIX= '../nyc-taxi/'
SUFFIX= '.bz2'

URLS = [
    PREFIX+'yellow_tripdata_2015-01.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-02.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-03.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-04.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-05.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-06.csv'+SUFFIX,
]

filenames = pd.DataFrame({'filename': URLS})
cst = Constant(Table('filenames', data=filenames), scheduler=s)
csv = CSVLoader(index_col=False,skipinitialspace=True,usecols=['dropoff_longitude', 'dropoff_latitude'], filter_=filter_, scheduler=s)
csv.input.filenames = cst.output.table
min = Min(scheduler=s)
min.input.table = csv.output.table
max = Max(scheduler=s)
max.input.table = csv.output.table
histogram2d = Histogram2D('dropoff_longitude', 'dropoff_latitude', xbins=RESOLUTION, ybins=RESOLUTION, scheduler=s)
histogram2d.input.table = csv.output.table
histogram2d.input.min = min.output.table
histogram2d.input.max = max.output.table
heatmap = Heatmap(filename='nyc_dropoff_yellow%d.png', history=5, scheduler=s)
heatmap.input.array = histogram2d.output.table

if __name__=='__main__':
    s.start()