예제 #1
0
 def test_01_read_http_csv_with_crash_and_counter(self):
     #if TRAVIS: return
     self._http_srv = _HttpSrv()
     s = self.scheduler()
     url = make_url('bigfile')
     module = CSVLoader(url, index_col=False, header=None, scheduler=s)
     self.assertTrue(module.table() is None)
     Patch1.max_steps = 200000
     decorate(s, Patch1("csv_loader_1"))
     s.start()
     s.join()
     self._http_srv.restart()
     s = self.scheduler()
     csv = CSVLoader(url,
                     recovery=True,
                     index_col=False,
                     header=None,
                     scheduler=s)
     counter = Counter(scheduler=s)
     counter.input.table = csv.output.table
     self.assertTrue(csv.table() is None)
     s.start()
     s.join()
     self.assertEqual(len(csv.table()), 1000000)
     self.assertEqual(counter.table()['counter'].loc[0], 1000000)
 def test_scatterplot2(self):
     s = self.scheduler()
     random = RandomTable(2, rows=2000000, scheduler=s)
     sp = MCScatterPlot(scheduler=s,
                        classes=[('Scatterplot', '_1', '_2')],
                        approximate=True)
     sp.create_dependent_modules(random, 'table', with_sampling=False)
     cnt = Every(proc=self.terse, constant_time=True, scheduler=s)
     cnt.input.df = random.output.table
     prt = Print(proc=self.terse, scheduler=s)
     prt.input.df = sp.output.table
     decorate(s, VariablePatch1("variable_1"))
     decorate(s, VariablePatch2("variable_2"))
     decorate(s, ScatterPlotPatch("mc_scatter_plot_1"))
     sp.scheduler().start(idle_proc=idle_proc)
     s.join()
     js = sp.to_json()
     x, y, _ = zip(*js['sample']['data'])
     min_x = min(x)
     max_x = max(x)
     min_y = min(y)
     max_y = max(y)
     self.assertGreaterEqual(min_x, LOWER_X)
     self.assertGreaterEqual(min_y, LOWER_Y)
     self.assertLessEqual(max_x, UPPER_X)
     self.assertLessEqual(max_y, UPPER_Y)
예제 #3
0
 def _tst_10_read_multi_csv_file_compress_with_crash(self, file_list):
     s = self.scheduler()
     module = CSVLoader(file_list,
                        index_col=False,
                        header=None,
                        scheduler=s)
     self.assertTrue(module.table() is None)
     Patch1.max_steps = 1200000
     decorate(s, Patch1("csv_loader_1"))
     s.start()
     s.join()
     _close(module)
     s = self.scheduler()
     module = CSVLoader(file_list,
                        recovery=True,
                        index_col=False,
                        header=None,
                        scheduler=s)
     self.assertTrue(module.table() is None)
     s.start()
     s.join()
     self.assertEqual(len(module.table()), 2000000)
예제 #4
0
 def test_06_read_http_multi_csv_bz2_with_crash(self):
     #if TRAVIS: return
     self._http_srv = _HttpSrv()
     s = self.scheduler()
     url_list = [make_url('bigfile', ext=BZ2)] * 2
     module = CSVLoader(url_list, index_col=False, header=None, scheduler=s)
     self.assertTrue(module.table() is None)
     Patch1.max_steps = 1200000
     decorate(s, Patch1("csv_loader_1"))
     s.start()
     s.join()
     self._http_srv.restart()
     s = self.scheduler()
     module = CSVLoader(url_list,
                        recovery=True,
                        index_col=False,
                        header=None,
                        scheduler=s)
     self.assertTrue(module.table() is None)
     s.start()
     s.join()
     self.assertEqual(len(module.table()), 2000000)
예제 #5
0
 def test_09_read_multi_csv_file_with_crash(self):
     s = self.scheduler()
     file_list = [get_dataset('bigfile'), get_dataset('bigfile')]
     module = CSVLoader(file_list,
                        index_col=False,
                        header=None,
                        scheduler=s)
     self.assertTrue(module.table() is None)
     Patch1.max_steps = 1200000
     decorate(s, Patch1("csv_loader_1"))
     s.start()
     s.join()
     _close(module)
     s = self.scheduler()
     module = CSVLoader(file_list,
                        recovery=True,
                        index_col=False,
                        header=None,
                        scheduler=s)
     self.assertTrue(module.table() is None)
     s.start()
     s.join()
     self.assertEqual(len(module.table()), 2000000)