Ejemplo n.º 1
0
    def test_scheduler(self):
        s = MTScheduler()
        csv = CSVLoader(get_dataset('bigfile'),index_col=False,header=None,scheduler=s)

        smp = Sample(n=10,scheduler=s)
        smp.input.df = csv.output.df

        csv.scheduler().start()

        sleep(1)
        self.assertTrue(csv.scheduler().is_running())

        smp2 = Sample(n=15, scheduler=s)
        smp2.input.df = csv.output.df

        def add_min():
            m = Min(scheduler=s)
            # Of course, sleeping here is a bad idea. this is to illustrate
            # that add_min will be executed atomically by the scheduler. 
            # using a sleep outside of add_oneshot_tick_proc would lead to an inconsistent
            # state.
            #sleep(1)
            m.input.df = smp2.output.df
            prt = Print(scheduler=s)
            prt.input.df = m.output.df

        s.add_oneshot_tick_proc(add_min)

        sleep(1)
        self.assertTrue(s._runorder.index(smp.id) > s._runorder.index(csv.id))
        self.assertTrue(s._runorder.index(smp2.id) > s._runorder.index(csv.id))
        #self.assertTrue(s._runorder.index(m.id) > s._runorder.index(smp2.id))
        s.stop()
        s.join()
Ejemplo n.º 2
0
def filter(df):
    lon = df['dropoff_longitude']
    lat = df['dropoff_latitude']
    return df[(lon>-74.10)&(lon<-73.7)&(lat>40.60)&(lat<41)]

def print_len(x):
    if x is not None:
        print len(x)

#log_level() #package='progressivis.stats.histogram2d')

try:
    s = scheduler
except:
    s = MTScheduler()

#PREFIX= 'https://storage.googleapis.com/tlc-trip-data/2015/'
#SUFFIX= ''
PREFIX= '../nyc-taxi/'
SUFFIX= '.bz2'

URLS = [
    PREFIX+'yellow_tripdata_2015-01.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-02.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-03.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-04.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-05.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-06.csv'+SUFFIX,
]