def test_scheduler(self): s = MTScheduler() csv = CSVLoader(get_dataset('bigfile'),index_col=False,header=None,scheduler=s) smp = Sample(n=10,scheduler=s) smp.input.df = csv.output.df csv.scheduler().start() sleep(1) self.assertTrue(csv.scheduler().is_running()) smp2 = Sample(n=15, scheduler=s) smp2.input.df = csv.output.df def add_min(): m = Min(scheduler=s) # Of course, sleeping here is a bad idea. this is to illustrate # that add_min will be executed atomically by the scheduler. # using a sleep outside of add_oneshot_tick_proc would lead to an inconsistent # state. #sleep(1) m.input.df = smp2.output.df prt = Print(scheduler=s) prt.input.df = m.output.df s.add_oneshot_tick_proc(add_min) sleep(1) self.assertTrue(s._runorder.index(smp.id) > s._runorder.index(csv.id)) self.assertTrue(s._runorder.index(smp2.id) > s._runorder.index(csv.id)) #self.assertTrue(s._runorder.index(m.id) > s._runorder.index(smp2.id)) s.stop() s.join()
def filter(df): lon = df['dropoff_longitude'] lat = df['dropoff_latitude'] return df[(lon>-74.10)&(lon<-73.7)&(lat>40.60)&(lat<41)] def print_len(x): if x is not None: print len(x) #log_level() #package='progressivis.stats.histogram2d') try: s = scheduler except: s = MTScheduler() #PREFIX= 'https://storage.googleapis.com/tlc-trip-data/2015/' #SUFFIX= '' PREFIX= '../nyc-taxi/' SUFFIX= '.bz2' URLS = [ PREFIX+'yellow_tripdata_2015-01.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-02.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-03.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-04.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-05.csv'+SUFFIX, PREFIX+'yellow_tripdata_2015-06.csv'+SUFFIX, ]