def test_join_simple(self):
     s = self.scheduler()
     cst1 = Constant(Table(name='test_join_simple_cst1',
                           data=pd.DataFrame({
                               'xmin': [1],
                               'xmax': [2]
                           }),
                           create=True),
                     scheduler=s)
     cst2 = Constant(Table(name='test_join_simple_cst2',
                           data=pd.DataFrame({
                               'ymin': [3],
                               'ymax': [4]
                           }),
                           create=True),
                     scheduler=s)
     reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s)
     reduce_.input.table = cst1.output.table
     reduce_.input.table = cst2.output.table
     join = reduce_.expand()
     # join = BinJoin(scheduler=s)
     # join.input.first = cst1.output.table
     # join.input.second = cst2.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = join.output.table
     s.start()
     s.join()
     res = join.trace_stats(max_runs=1)
     print(res)
     df = join.table()
     last = df.loc[df.index[-1]]
     self.assertTrue(last['xmin'] == 1 and last['xmax'] == 2 and \
                     last['ymin'] == 3 and last['ymax'] == 4)
Exemple #2
0
 def test_join(self):
     s = self.scheduler()
     csv = CSVLoader(get_dataset('bigfile'),
                     index_col=False,
                     header=None,
                     scheduler=s)
     stat1 = Stats(1, reset_index=True, scheduler=s)
     stat1.input.table = csv.output.table
     stat2 = Stats(2, reset_index=True, scheduler=s)
     stat2.input.table = csv.output.table
     stat3 = Stats(3, reset_index=True, scheduler=s)
     stat3.input.table = csv.output.table
     #join=Join(scheduler=s)
     #import pdb;pdb.set_trace()
     reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s)
     reduce_.input.table = stat1.output.stats
     reduce_.input.table = stat2.output.stats
     join = reduce_.expand()
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = join.output.table
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input.df = csv.output.table
     s.start()
     res = join.trace_stats(max_runs=1)
     print(res)
Exemple #3
0
 def test_join(self) -> None:
     s = self.scheduler()
     csv = CSVLoader(
         get_dataset("bigfile"), index_col=False, header=None, scheduler=s
     )
     stat1 = Stats(1, reset_index=True, scheduler=s)
     stat1.input[0] = csv.output.result
     stat2 = Stats(2, reset_index=True, scheduler=s)
     stat2.input[0] = csv.output.result
     # join=Join(scheduler=s)
     # reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s)
     # reduce_.input[0] = stat1.output.stats
     # reduce_.input[0] = stat2.output.stats
     # join = reduce_.expand()
     join = Reduce.expand(
         BinJoin,
         "first",
         "second",
         "table",
         [stat1.output.stats, stat2.output.stats],
         scheduler=s,
     )
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = join.output.result
     prlen = Every(proc=self.terse, constant_time=True, scheduler=s)
     prlen.input[0] = csv.output.result
     aio.run(s.start())
     res = join.trace_stats(max_runs=1)
     print(res)
 def test_join_simple(self) -> None:
     s = self.scheduler()
     cst1 = Constant(
         Table(
             name="test_join_simple_cst1",
             data=pd.DataFrame({"xmin": [1], "xmax": [2]}),
             create=True,
         ),
         scheduler=s,
     )
     cst2 = Constant(
         Table(
             name="test_join_simple_cst2",
             data=pd.DataFrame({"ymin": [3], "ymax": [4]}),
             create=True,
         ),
         scheduler=s,
     )
     cst3 = Constant(
         Table(
             name="test_join_simple_cst3",
             data=pd.DataFrame({"zmin": [5], "zmax": [6]}),
             create=True,
         ),
         scheduler=s,
     )
     # join=Join(scheduler=s)
     # reduce_ = Reduce(BinJoin, "first", "second", "table", scheduler=s)
     # reduce_.input[0] = cst1.output.result
     # reduce_.input[0] = cst2.output.result
     # reduce_.input[0] = cst3.output.result
     # join = reduce_.expand()
     join = Reduce.expand(
         BinJoin,
         "first",
         "second",
         "result",
         [cst1.output.result, cst2.output.result, cst3.output.result],
         scheduler=s,
     )
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = join.output.result
     aio.run(s.start())
     res = join.trace_stats(max_runs=1)
     print(res)
     df = join.table
     last = df.loc[df.index[-1]]
     assert last is not None
     self.assertTrue(
         last["xmin"] == 1
         and last["xmax"] == 2
         and last["ymin"] == 3
         and last["ymax"] == 4
         and last["zmin"] == 5
         and last["zmax"] == 6
     )