def test_histogram2d(self): s = self.scheduler() csv = CSVLoader(get_dataset('bigfile'), index_col=False, header=None, scheduler=s) min_ = Min(scheduler=s) min_.input.table = csv.output.table max_ = Max(scheduler=s) max_.input.table = csv.output.table histogram2d = Histogram2D(1, 2, xbins=100, ybins=100, scheduler=s) # columns are called 1..30 histogram2d.input.table = csv.output.table histogram2d.input.min = min_.output.table histogram2d.input.max = max_.output.table heatmap = Heatmap(filename='histo_%03d.png', scheduler=s) heatmap.input.array = histogram2d.output.table #pr = Print(scheduler=s) pr = Every(proc=self.terse, scheduler=s) #pr.input.df = heatmap.output.heatmap #pr.input.df = histogram2d.output.df pr.input.df = csv.output.table csv.scheduler().start() s.join() #self.scheduler.thread.join() s = histogram2d.trace_stats()
def t_histogram2d_impl(self, **kw: Any) -> None: s = self.scheduler() random = RandomTable(3, rows=100000, scheduler=s) stirrer = Stirrer(update_column="_2", fixed_step_size=1000, scheduler=s, **kw) stirrer.input[0] = random.output.result min_ = Min(scheduler=s) min_.input[0] = stirrer.output.result max_ = Max(scheduler=s) max_.input[0] = stirrer.output.result histogram2d = Histogram2D( 0, 1, xbins=100, ybins=100, scheduler=s ) # columns are called 1..30 histogram2d.input[0] = stirrer.output.result histogram2d.input.min = min_.output.result histogram2d.input.max = max_.output.result heatmap = Heatmap(filename="histo_%03d.png", scheduler=s) heatmap.input.array = histogram2d.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = heatmap.output.result aio.run(s.start()) last = notNone(histogram2d.table.last()).to_dict() h1 = last["array"] bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]] t = stirrer.table.loc[:, ["_1", "_2"]] assert t is not None v = t.to_array() bins = [histogram2d.params.ybins, histogram2d.params.xbins] h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds) h2 = np.flip(h2, axis=0) # type: ignore self.assertEqual(np.sum(h1), np.sum(h2)) self.assertListEqual(h1.reshape(-1).tolist(), h2.reshape(-1).tolist())
def test_histogram2d1(self) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) min_ = Min(scheduler=s) min_.input[0] = csv.output.result max_ = Max(scheduler=s) max_.input[0] = csv.output.result histogram2d = Histogram2D( 1, 2, xbins=100, ybins=100, scheduler=s ) # columns are called 1..30 histogram2d.input[0] = csv.output.result histogram2d.input.min = min_.output.result histogram2d.input.max = max_.output.result heatmap = Heatmap(filename="histo_%03d.png", scheduler=s) heatmap.input.array = histogram2d.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = heatmap.output.result aio.run(csv.scheduler().start()) last = notNone(histogram2d.table.last()).to_dict() h1 = last["array"] bounds = [[last["ymin"], last["ymax"]], [last["xmin"], last["xmax"]]] df = pd.read_csv( get_dataset("bigfile"), header=None, usecols=[1, 2] # type: ignore ) v = df.to_numpy() # .reshape(-1, 2) bins = [histogram2d.params.ybins, histogram2d.params.xbins] h2 = fh.histogram2d(v[:, 1], v[:, 0], bins=bins, range=bounds) h2 = np.flip(h2, axis=0) # type: ignore self.assertTrue(np.allclose(h1, h2))
def test_histogram2d(self) -> None: s = self.scheduler() csv = CSVLoader( get_dataset("bigfile"), index_col=False, header=None, scheduler=s ) min_ = Min(scheduler=s) min_.input[0] = csv.output.result max_ = Max(scheduler=s) max_.input[0] = csv.output.result histogram2d = Histogram2D( 1, 2, xbins=100, ybins=100, scheduler=s ) # columns are called 1..30 histogram2d.input[0] = csv.output.result histogram2d.input.min = min_.output.result histogram2d.input.max = max_.output.result heatmap = Heatmap(filename="histo_%03d.png", scheduler=s) heatmap.input.array = histogram2d.output.result pr = Every(proc=self.terse, scheduler=s) pr.input[0] = heatmap.output.result aio.run(csv.scheduler().start()) _ = histogram2d.trace_stats()
#SUFFIX= '' PREFIX= '../nyc-taxi/' SUFFIX= '.bz2' URLS = [ PREFIX+'yellow_tripdata_2015-01.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-02.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-03.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-04.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-05.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-06.csv'+SUFFIX, ] filenames = pd.DataFrame({'filename': URLS}) cst = Constant(Table('filenames', data=filenames), scheduler=s) csv = CSVLoader(index_col=False,skipinitialspace=True,usecols=['dropoff_longitude', 'dropoff_latitude'], filter_=filter_, scheduler=s) csv.input.filenames = cst.output.table min = Min(scheduler=s) min.input.table = csv.output.table max = Max(scheduler=s) max.input.table = csv.output.table histogram2d = Histogram2D('dropoff_longitude', 'dropoff_latitude', xbins=RESOLUTION, ybins=RESOLUTION, scheduler=s) histogram2d.input.table = csv.output.table histogram2d.input.min = min.output.table histogram2d.input.max = max.output.table heatmap = Heatmap(filename='nyc_dropoff_yellow%d.png', history=5, scheduler=s) heatmap.input.array = histogram2d.output.table if __name__=='__main__': s.start()
#SUFFIX= '' PREFIX= '../nyc-taxi/' SUFFIX= '.bz2' URLS = [ PREFIX+'green_tripdata_2015-01.csv'+SUFFIX, PREFIX+'green_tripdata_2015-02.csv'+SUFFIX, PREFIX+'green_tripdata_2015-03.csv'+SUFFIX, PREFIX+'green_tripdata_2015-04.csv'+SUFFIX, PREFIX+'green_tripdata_2015-05.csv'+SUFFIX, PREFIX+'green_tripdata_2015-06.csv'+SUFFIX, ] filenames = pd.DataFrame({'filename': URLS}) cst = Constant(Table('filenames', data=filenames), scheduler=s) csv = CSVLoader(index_col=False,skipinitialspace=True,usecols=['Pickup_longitude', 'Pickup_latitude'], filter_=filter_, scheduler=s) csv.input.filenames = cst.output.table min = Min(scheduler=s) min.input.table = csv.output.table max = Max(scheduler=s) max.input.table = csv.output.table histogram2d = Histogram2D('Pickup_longitude', 'Pickup_latitude', xbins=RESOLUTION, ybins=RESOLUTION, scheduler=s) histogram2d.input.table = csv.output.table histogram2d.input.min = min.output.table histogram2d.input.max = max.output.table heatmap = Heatmap(filename='nyc_pickup_green%d.png', history=5, scheduler=s) heatmap.input.array = histogram2d.output.table if __name__=='__main__': s.start()
SUFFIX= '.bz2' URLS = [ PREFIX+'green_tripdata_2015-01.csv'+SUFFIX, PREFIX+'green_tripdata_2015-02.csv'+SUFFIX, PREFIX+'green_tripdata_2015-03.csv'+SUFFIX, PREFIX+'green_tripdata_2015-04.csv'+SUFFIX, PREFIX+'green_tripdata_2015-05.csv'+SUFFIX, PREFIX+'green_tripdata_2015-06.csv'+SUFFIX, ] filenames = pd.DataFrame({'filename': URLS}) #cst = Constant(filenames, scheduler=s) cst = Constant(Table('filenames', data=filenames), scheduler=s) csv = CSVLoader(index_col=False,skipinitialspace=True,usecols=['Dropoff_longitude', 'Dropoff_latitude'], filter_=filter_, scheduler=s) csv.input.filenames = cst.output.table min = Min(scheduler=s) min.input.table = csv.output.table max = Max(scheduler=s) max.input.table = csv.output.table histogram2d = Histogram2D('Dropoff_longitude', 'Dropoff_latitude', xbins=RESOLUTION, ybins=RESOLUTION, scheduler=s) histogram2d.input.table = csv.output.table histogram2d.input.min = min.output.table histogram2d.input.max = max.output.table #heatmap = Heatmap(filename='nyc_Dropoff_green%d.png', history=5, scheduler=s) heatmap = Heatmap(filename='nyc_Dropoff_green%d.png', scheduler=s) heatmap.input.array = histogram2d.output.table if __name__=='__main__': s.start()
from progressivis.io import CSVLoader from progressivis.stats import Histogram2D, Min, Max from progressivis.datasets import get_dataset from progressivis.vis import Heatmap print("Loading test_histogram2d") print("Type of default_scheduler is %s" % type(Scheduler.default)) csv = CSVLoader(get_dataset('bigfile'), index_col=False, header=None, engine='c') pr = Every() pr.input.df = csv.output.table min_ = Min() min_.input.table = csv.output.table max_ = Max() max_.input.table = csv.output.table histogram2d = Histogram2D('_1', '_2', xbins=128, ybins=128) histogram2d.input.table = csv.output.table histogram2d.input.min = min_.output.table histogram2d.input.max = max_.output.table # heatmap heatmap = Heatmap(filename='histo_%03d.png') heatmap.input.array = histogram2d.output.table pr = Print(name='print') pr.input.df = csv.output.table if __name__ == '__main__': csv.start()
filenames = pd.DataFrame({'filename': URLS}) cst = Constant(Table('filenames', data=filenames), scheduler=s) csv = CSVLoader(index_col=False, skipinitialspace=True, usecols=['pickup_longitude', 'pickup_latitude'], filter_=filter_, scheduler=s) csv.input.filenames = cst.output.table #min = Min(scheduler=s) #min.input.df = csv.output.df #max = Max(scheduler=s) #max.input.df = csv.output.df min = Constant(table=Table('bounds_min', data=pd.DataFrame([bounds_min])), scheduler=s) max = Constant(table=Table('bounds_min', data=pd.DataFrame([bounds_max])), scheduler=s) histogram2d = Histogram2D('pickup_longitude', 'pickup_latitude', xbins=RESOLUTION, ybins=RESOLUTION, scheduler=s) histogram2d.input.table = csv.output.table histogram2d.input.min = min.output.table histogram2d.input.max = max.output.table heatmap = Heatmap(filename='nyc_pickup_yellow%d.png', history=5, scheduler=s) heatmap.input.array = histogram2d.output.table if __name__ == '__main__': s.start()