Esempio n. 1
0
def test():
    import numpy as np
    video_shape = (16, 112, 112)
    data = DataLayer("data/tinytraindb.lmdb", video_shape, 16, verbose=True)
    data2 = DataLayer("data/tinyvaldb.lmdb", video_shape, 16, verbose=True)

    synch_data = []
    # Retrieve data synchronously as a reference for correctness
    fetcher = DataFetcher("data/tinytraindb.lmdb",
                          video_shape,
                          16,
                          dtype='float32')
    for i in range(10):
        X, y, epoch = fetcher.load_data()
        synch_data.append(X)
        #print X

    for i in range(10):
        data.load_batch()
        data2.load_batch()
        tic = time.time()
        # Do some work
        a = np.random.randn(400, 600).dot(np.random.randn(600, 400))
        #print np.linalg.norm(a)
        toc = time.time()
        print "Work took %0.6f seconds" % (toc - tic)
        assert np.linalg.norm(data.X.get_value(borrow=True) -
                              synch_data[i]) < 1e-8
Esempio n. 2
0
    def __init__(self,
                 db_name,
                 video_shape,
                 mem_batch_size,
                 verbose=False,
                 buffer_size=6):
        self.fetcher = DataFetcher(db_name,
                                   video_shape,
                                   mem_batch_size,
                                   dtype=theano.config.floatX)
        self.batch_size = mem_batch_size
        self.video_shape = video_shape
        self.current_batch = 0
        self.verbose = verbose

        # Could manage with a buffer size of exactly 2, but need to change
        # the interprocess communication somewhat
        assert buffer_size > 2

        X = np.empty((mem_batch_size, 3) + video_shape,
                     dtype=theano.config.floatX)
        y = np.empty((mem_batch_size, ), dtype=theano.config.floatX)
        self.shared_data = theano.shared(X, borrow=True)
        self.shared_label = theano.shared(y, borrow=True)
        self.X = self.shared_data
        self.y = T.cast(self.shared_label, 'int32')

        # Create shared memory object for async loading
        X_shared_array_base = multiprocessing.Array(
            ctypes.c_float,
            buffer_size * self.batch_size * 3 * np.prod(video_shape))
        X_shared_array = np.ctypeslib.as_array(X_shared_array_base.get_obj())
        self.X_shared_array = X_shared_array.reshape(buffer_size,
                                                     self.batch_size, 3,
                                                     *video_shape)

        y_shared_array_base = multiprocessing.Array(
            ctypes.c_float, buffer_size * self.batch_size)
        y_shared_array = np.ctypeslib.as_array(y_shared_array_base.get_obj())
        self.y_shared_array = y_shared_array.reshape(buffer_size,
                                                     self.batch_size)

        # Start up worker process
        self.queue = multiprocessing.Queue(maxsize=buffer_size - 2)
        self.worker = multiprocessing.Process(
            target=fetcher_loop,
            args=(self.fetcher, self.X_shared_array, self.y_shared_array,
                  self.queue))
        self.worker.start()
Esempio n. 3
0
def test():
    import numpy as np
    video_shape = (16,112,112)
    data = DataLayer("data/tinytraindb.lmdb",video_shape,16,verbose=True)
    data2 = DataLayer("data/tinyvaldb.lmdb",video_shape,16,verbose=True)
    
    synch_data = []
    # Retrieve data synchronously as a reference for correctness
    fetcher = DataFetcher("data/tinytraindb.lmdb",video_shape,16,dtype='float32')
    for i in range(10):
        X, y, epoch = fetcher.load_data()
        synch_data.append(X)
        #print X
    
    for i in range(10):
        data.load_batch()
        data2.load_batch()
        tic = time.time()
        # Do some work
        a = np.random.randn(400,600).dot(np.random.randn(600,400))
        #print np.linalg.norm(a)
        toc = time.time()
        print "Work took %0.6f seconds" % (toc - tic)
        assert np.linalg.norm(data.X.get_value(borrow=True) - synch_data[i]) < 1e-8
Esempio n. 4
0
def evaluate_3d_conv():
    theano.config.exception_verbosity = "high"
    theano.config.optimizer = 'None'
    rng = np.random.RandomState(234)
    TT, HH, WW = 16,240,320
    N = 10
    num_classes = 5
    batch_size = 1
    num_filters = 4
    num_channels = 3
    
    if len(sys.argv) > 1:
        fetcher = DataFetcher("data/tinyvideodb.lmdb")
        X, y = fetcher.load_data(10,(16,240,320))
        y /= 21
    else:
        X = np.random.randint(-127,127,size=(N,3,16,240,320)).astype(theano.config.floatX)
        y = np.random.randint(0,num_classes,size=(N,))
        
    X_train = theano.shared(X.astype('float32'), borrow=True)
    y_train = theano.shared(y.astype('int32'), borrow=True)
    print y_train.get_value()
    
    params = []

    x = dtensor5('x')
    y = T.ivector('y')
    FT, FH, FW = 5, 5, 5

    ###########################################################################
    # CONV-RELU-POOL (Layer 1)
    ###########################################################################
    conv1 = ConvLayer(x,num_channels,num_filters,(FT,FH,FW),(TT,HH,WW),batch_size,relu,
                      layer_name="Conv1")
    params += conv1.params
    pool1 = PoolLayer(conv1.output,(2,2,2))

    ###########################################################################
    # CONV-RELU-POOL (Layer 2)
    ###########################################################################
    conv2 = ConvLayer(pool1.output,num_filters,num_filters,
                      (FT,FH,FW),
                      (TT/2,HH/2,WW/2),
                      batch_size,
                      relu,
                      layer_name="Conv2")
    params += conv2.params    
    pool2 = PoolLayer(conv2.output,(2,2,2))

    ###########################################################################
    # FULLY-CONNECTED (Layer 3)
    ###########################################################################
    out_dim = num_filters*TT*HH*WW/64            
    num_hidden = 64
    fc3 = HiddenLayer(pool2.output.flatten(ndim=2),out_dim,num_hidden,relu)
    params += fc3.params    
    
    ###########################################################################
    # SOFTMAX (Layer 4)
    ###########################################################################
    softmax = LogRegr(fc3.output,num_hidden,num_classes,relu,rng)
    params += softmax.params
    
    reg = 0.01
    cost = softmax.negative_log_likelihood(y) + reg*T.sum(softmax.W*softmax.W)
    

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in params]

    learning_rate = 1e-5
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(params, gparams)
    ]    
    
    index = T.lscalar()
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: X_train[index * batch_size: (index + 1) * batch_size],
            y: y_train[index * batch_size: (index + 1) * batch_size]
        }
    )
    
    for k in range(10):
        tic = time.time()
        cost = train_model(k % (N/batch_size))
        toc = time.time()
        print cost, "(%0.4f seconds)" % (toc - tic)