Ejemplos de IPCMemoryHandle en Python, ejemplos de pycuda.driver.IPCMemoryHandle en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: tensor_ops.py Proyecto: nagyist/NervanaSystems-ngraph

    def init_buffers(self):
        shape = self.op.args[0].tensor_description().shape
        dtype = self.op.args[0].tensor_description().dtype

        # Allocate output and scratch buffers
        self.output_buff = gpuarray.zeros(shape, dtype)
        self.scratch_buff = gpuarray.zeros(shape, dtype)

        self.output_buff_dict[self.device_id] = self.output_buff.gpudata
        self.scratch_buff_dict[self.device_id] = self.scratch_buff.gpudata

        # Allocate IPC handles
        output_ipc_hdl = drv.mem_get_ipc_handle(self.output_buff.gpudata)
        scratch_ipc_hdl = drv.mem_get_ipc_handle(self.scratch_buff.gpudata)
        event_ipc_hdl = self.event.ipc_handle()

        # Broadcast handles to others
        msg = (self.device_id, output_ipc_hdl, scratch_ipc_hdl, event_ipc_hdl)
        self.comm.bcast(msg, root=self.device_id)

        # Get handles from others
        for i in self.op.device_ids:
            if i != self.device_id:
                (peer_id, output_ipc_hdl, scratch_ipc_hdl, event_ipc_hdl) =\
                    self.comm.bcast(msg, root=i)
                output_hdl = drv.IPCMemoryHandle(output_ipc_hdl)
                scratch_hdl = drv.IPCMemoryHandle(scratch_ipc_hdl)
                event_hdl = drv.Event.from_ipc_handle(event_ipc_hdl)
                self.output_buff_dict[peer_id] = output_hdl
                self.scratch_buff_dict[peer_id] = scratch_hdl
                self.event_buff_dict[peer_id] = event_hdl

Ejemplo n.º 2

0

Mostrar archivo

Archivo: tensor_ops.py Proyecto: leonllm/ngraph

    def init_buffers(self):
        shape = self.op.args[0].tensor_description().shape
        dtype = self.op.args[0].tensor_description().dtype

        n_devs = len(self.op.device_ids)
        size = self.op.args[0].tensor_description().axes.size
        segment_size = calculate_segment_size(size, n_devs)

        # Allocate output and scratch buffers
        self.output_buff = gpuarray.zeros(shape, dtype)
        self.scratch_buff = gpuarray.zeros(segment_size * n_devs, dtype)

        self.output_buff_dict[self.device_id] = self.output_buff.gpudata
        self.scratch_buff_dict[self.device_id] = self.scratch_buff.gpudata

        # Allocate IPC handles
        output_ipc_hdl = drv.mem_get_ipc_handle(self.output_buff.gpudata)
        scratch_ipc_hdl = drv.mem_get_ipc_handle(self.scratch_buff.gpudata)
        event_ipc_hdl = self.event.ipc_handle()

        # Broadcast handles to others
        msg = (self.device_id, output_ipc_hdl, scratch_ipc_hdl, event_ipc_hdl)
        for i in self.device_ids:
            if i == self.device_id:
                self.comm.bcast(msg, root=i)
            else:
                (peer_id, output_ipc_hdl, scratch_ipc_hdl,
                 event_ipc_hdl) = self.comm.bcast(None, root=i)

                output_hdl = drv.IPCMemoryHandle(output_ipc_hdl)
                scratch_hdl = drv.IPCMemoryHandle(scratch_ipc_hdl)
                event_hdl = drv.Event.from_ipc_handle(event_ipc_hdl)
                self.output_buff_dict[peer_id] = output_hdl
                self.scratch_buff_dict[peer_id] = scratch_hdl
                self.event_buff_dict[peer_id] = event_hdl

Ejemplo n.º 3

0

Mostrar archivo

Archivo: tensor_ops.py Proyecto: psdurley/ngraph

    def init_buffers(self):
        shape = self.op.args[0].tensor_description().shape
        dtype = self.op.args[0].tensor_description().dtype

        # Allocate output and scratch buffers
        self.output_buff = gpuarray.zeros(shape, dtype)
        self.scratch_buff = gpuarray.zeros(shape, dtype)

        self.output_buff_dict[self.device_id] = self.output_buff.gpudata
        self.scratch_buff_dict[self.device_id] = self.scratch_buff.gpudata

        # Allocate IPC handles
        output_ipc_hdl = drv.mem_get_ipc_handle(self.output_buff.gpudata)
        scratch_ipc_hdl = drv.mem_get_ipc_handle(self.scratch_buff.gpudata)
        event_ipc_hdl = self.event.ipc_handle()

        # Put handles in queues
        for i in self.op.shared_queues.keys():
            if i != self.device_id:
                self.op.shared_queues[i].put((self.device_id, output_ipc_hdl,
                                              scratch_ipc_hdl, event_ipc_hdl))

        # Get handles from others
        q = self.op.shared_queues[self.device_id]
        for i in range(len(self.op.shared_queues) - 1):
            peer_id, output_ipc_hdl, scratch_ipc_hdl, event_ipc_hdl = q.get()
            output_hdl = drv.IPCMemoryHandle(output_ipc_hdl)
            scratch_hdl = drv.IPCMemoryHandle(scratch_ipc_hdl)
            event_hdl = drv.Event.from_ipc_handle(event_ipc_hdl)
            self.output_buff_dict[peer_id] = output_hdl
            self.scratch_buff_dict[peer_id] = scratch_hdl
            self.event_buff_dict[peer_id] = event_hdl

Ejemplo n.º 4

0

Mostrar archivo

Archivo: tensor_ops.py Proyecto: ami-GS/ngraph

def open_ipc_handle(shared_queue):
    while True:
        try:
            (buf_ipc_hdl, lock_ipc_hdl) = shared_queue.get(timeout=SLEEP_S)
            buf_hdl = drv.IPCMemoryHandle(buf_ipc_hdl)
            lock = drv.IPCMemoryHandle(lock_ipc_hdl)
            return (buf_hdl, lock)
        except Exception as e:
            if isinstance(e, Empty):
                pass
            else:
                raise

Ejemplo n.º 5

0

Mostrar archivo

Archivo: tensor_ops.py Proyecto: leonllm/ngraph

def bcast_ipc_handle(comm, handle=None):
    if handle is not None:
        buffer_ipc_handle = drv.mem_get_ipc_handle(handle)
        return comm.bcast(buffer_ipc_handle)
    else:
        handle = comm.bcast(handle)
        return drv.IPCMemoryHandle(handle)

Ejemplo n.º 6

0

Mostrar archivo

def fun_load(config, sock_data=5000):

    send_queue = config['queue_l2t']
    recv_queue = config['queue_t2l']
    # recv_queue and send_queue are multiprocessing.Queue
    # recv_queue is only for receiving
    # send_queue is only for sending

    # if need to do random crop and mirror
    flag_randproc = not config['use_data_layer']
    flag_batch = config['batch_crop_mirror']

    drv.init()
    dev = drv.Device(int(config['gpu'][-1]))
    ctx = dev.make_context()
    sock = zmq.Context().socket(zmq.PAIR)
    sock.bind('tcp://*:{0}'.format(sock_data))

    shape, dtype, h = sock.recv_pyobj()
    print 'shared_x information received'

    gpu_data_remote = gpuarray.GPUArray(shape,
                                        dtype,
                                        gpudata=drv.IPCMemoryHandle(h))
    gpu_data = gpuarray.GPUArray(shape, dtype)

    img_mean = recv_queue.get()
    print 'img_mean received'

    # The first time, do the set ups and other stuff

    # receive information for loading

    while True:
        # getting the hkl file name to load
        hkl_name = recv_queue.get()

        # print hkl_name
        data = hkl.load(hkl_name) - img_mean
        # print 'load ', time.time() - bgn_time

        if flag_randproc:
            param_rand = recv_queue.get()

            data = crop_and_mirror(data, param_rand, flag_batch=flag_batch)

        gpu_data.set(data)

        # wait for computation on last minibatch to finish
        msg = recv_queue.get()
        assert msg == 'calc_finished'

        drv.memcpy_peer(gpu_data_remote.ptr, gpu_data.ptr,
                        gpu_data.dtype.itemsize * gpu_data.size, ctx, ctx)

        ctx.synchronize()

        send_queue.put('copy_finished')

Ejemplo n.º 7

0

Mostrar archivo

Archivo: tensor_ops.py Proyecto: leonllm/ngraph

 def bind_buffers(self):
     """
     Get allocated GPU tensor for output and potentially source value
     """
     if isinstance(self.tensor, TensorDescription):
         self.tensor = self.tensor_view_from_td(self.tensor)
     super(CudaRecvKernel, self).bind_buffers()
     buf_ipc_hdl = self.comm.recv(source=self.source, tag=TAG_IPC)
     self.sender_buf = drv.IPCMemoryHandle(buf_ipc_hdl)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: proc_load.py Proyecto: lanlianhuaer/Recurrent-Pose-Attention

def fun_load(config, sock_data_2=5001):
    send_queue = config['queue_l2t']
    recv_queue = config['queue_t2l']
    # recv_queue and send_queue are multiprocessing.Queue
    # recv_queue is only for receiving
    # send_queue is only for sending

    num_timesteps = config['num_timesteps']
    num_seq = config['num_seq']
    img_scale_x = config['img_scale_x']
    img_scale_y = config['img_scale_y']
    drv.init()
    dev = drv.Device(int(config['gpu'][-1]))
    ctx_2 = dev.make_context()

    sock_2 = zmq.Context().socket(zmq.PAIR)
    sock_2.bind('tcp://*:{0}'.format(sock_data_2))
    shape_temporal, dtype_temporal, h_temporal = sock_2.recv_pyobj()
    print 'shared_x information received', shape_temporal
    gpu_data_remote_temporal = gpuarray.GPUArray(
        shape_temporal,
        dtype_temporal,
        gpudata=drv.IPCMemoryHandle(h_temporal))
    gpu_data_temporal = gpuarray.GPUArray(shape_temporal, dtype_temporal)
    # print 'img_mean received'
    # The first time, do the set ups and other stuff
    # receive information for loading
    while True:
        video_name_temporal = recv_queue.get()
        rand_param = recv_queue.get()
        if config['modal'] == 'rgb':
            data_temporal = prepare_data_rgb(video_name_temporal,
                                             num_timesteps,
                                             num_seq,
                                             rand_param,
                                             data_shape=(img_scale_x,
                                                         img_scale_y, 3))
        else:
            data_temporal = prepare_data_flow(video_name_temporal,
                                              num_timesteps,
                                              num_seq,
                                              rand_param,
                                              data_shape=(img_scale_x,
                                                          img_scale_y))

        gpu_data_temporal.set(data_temporal)
        # wait for computation on last minibatch to finish
        msg = recv_queue.get()
        assert msg == 'calc_finished'
        drv.memcpy_peer(
            gpu_data_remote_temporal.ptr, gpu_data_temporal.ptr,
            gpu_data_temporal.dtype.itemsize * gpu_data_temporal.size, ctx_2,
            ctx_2)

        ctx_2.synchronize()
        send_queue.put('copy_finished')

Ejemplo n.º 9

0

Mostrar archivo

Archivo: ipc-bz.py Proyecto: jofrantoba/blazingdb-protocol

    def controller(h):
        drv.init()
        dev = drv.Device(0)
        ctx_gpu = dev.make_context()

        print('receive handler')
        print(bytearray(bytes(h)))
        x_ptr = drv.IPCMemoryHandle(bytearray(bytes(h)))
        print('server gpu type: ')
        print(type(x_ptr))
        x_gpu = gpuarray.GPUArray((1, 32), numpy.int8, gpudata=x_ptr)
        print('gpu:  ', x_gpu.get())
        ctx_gpu.pop()
        return b'hi back!'

Ejemplo n.º 10

0

Mostrar archivo

def proc2():
    sock = zmq.Context().socket(zmq.REP)
    sock.bind('tcp://*:5000')

    drv.init()
    dev = drv.Device(0)
    ctx = dev.make_context()

    shape, dtype, h = sock.recv_pyobj()
    sock.send_pyobj('')

    x_gpu = gpuarray.GPUArray(shape, dtype, gpudata=drv.IPCMemoryHandle(h))
    print x_gpu

    ctx.detach()

Ejemplo n.º 11

0

Mostrar archivo

Archivo: tensor_ops.py Proyecto: nagyist/NervanaSystems-ngraph

def setup_ipc_handle(op, comm, cmd, handle=None, dest=None):
    if cmd == 'send':
        for d in dest:
            if op.metadata['device_id'] == int(d):
                local = True
                buf_ipc_hdl = int(handle)
            else:
                local = False
                buf_ipc_hdl = drv.mem_get_ipc_handle(handle)
            comm.send((local, buf_ipc_hdl), dest=int(d), tag=TAG_IPC)
    else:
        (local, buf_ipc_hdl) = comm.recv(source=op.source_id, tag=TAG_IPC)
        if local:
            return (buf_ipc_hdl)
        else:
            return (drv.IPCMemoryHandle(buf_ipc_hdl))

Ejemplo n.º 12

0

Mostrar archivo

Archivo: ipc-zmq.py Proyecto: jofrantoba/blazingdb-protocol

def func2():
    drv.init()
    dev = drv.Device(0)
    ctx_gpu = dev.make_context()

    ctx = zmq.Context()
    sock = ctx.socket(zmq.REP)
    sock.bind('tcp://*:6000')

    h = sock.recv_pyobj()

    x_ptr = drv.IPCMemoryHandle(h)
    x_gpu = gpuarray.GPUArray((1, 32), numpy.int8, gpudata=x_ptr)

    print('gpu:  ', x_gpu.get())

    ctx_gpu.pop()

Ejemplo n.º 13

0

Mostrar archivo

Archivo: cuda.py Proyecto: skallumadi/chainer

    def get(self):
        """Creates a GPUArray object from the IPC memory handle.

        Returns:
            ~pycuda.gpuarray.GPUArray: Recovered GPU array with memory shared
            accross processes.

        .. note::

           Note that :mod:`cuda` does not take care of data race between
           multiple processes.

        """
        drv.IPCMemoryHandle(self.handle)
        array = gpuarray.GPUArray((0, ), dtype=self.dtype)
        array.shape = self.shape
        array.size = self.size
        array.mem_size = self.mem_size
        setattr(array, 'ipc_handle', self.handle)
        return array

Ejemplo n.º 14

0

Mostrar archivo

 def ipc_handle_wrap(self, handle):
     return cuda.IPCMemoryHandle(handle)

Ejemplo n.º 15

0

Mostrar archivo

def train_net(config, private_config):

    # UNPACK CONFIGS
    (train_videos_spatial_jhmdb,val_videos_spatial_jhmdb,train_videos_temporal_jhmdb,val_videos_temporal_jhmdb,
     train_targets,val_targets,
           train_labels_jhmdb,val_labels_jhmdb) = unpack_configs_jhmdb(config,gpu_id=private_config['gpu_id'])
    # print('val_len',len(val_videos_spatial_jhmdb),'train_len',len(train_videos_spatial_jhmdb))
    if config['modal']=='rgb':
        train_videos = list(train_videos_spatial_jhmdb)
        test_videos = list(val_videos_spatial_jhmdb)
    else:
        train_videos = list(train_videos_temporal_jhmdb)
        test_videos = list(val_videos_temporal_jhmdb)
    print('jhmdb_len',len(train_videos),len(train_labels_jhmdb))#,len(tr_video_length_jhmdb))
    flag_para_load =config['para_load']
    gpu_send_queue = private_config['queue_gpu_send']
    gpu_recv_queue = private_config['queue_gpu_recv']

    # pycuda and zmq set up
    drv.init()
    dev = drv.Device(int(private_config['gpu'][-1]))
    ctx = dev.make_context()

    sock_gpu = zmq.Context().socket(zmq.PAIR)
    if private_config['flag_client']:
        sock_gpu.connect('tcp://*****:*****@ iter = ', num_iter
                        print 'training cost:', cost_ij,'cost_nll:',cost_nll,'cost_attention:',cost_att

                    if config['print_train_error']:
                        error_ij = train_error()

                        gpu_send_queue.put(error_ij)
                        that_error = gpu_recv_queue.get()
                        error_ij = (error_ij + that_error) / 2.

                        if private_config['flag_verbose']:
                            print 'training error rate:', error_ij

                if flag_para_load and (count < len(minibatch_range)):
                    load_send_queue.put('calc_finished')

                if count%20 == 0:
                    e = time.time()
                    print "time per 20 iter:", (e - s)
            # ############### Test on Validation Set ##################
            DropoutLayer.SetDropoutOff()
            this_val_error, this_val_loss = get_test_error(config,
                 shared_x, shared_mask, shared_y,shared_target,shared_use_noise,
                 shared_conv,test_videos,  val_labels_jhmdb,
                flag_para_load,
                batch_size,num_seq, validate_model_lstm,train_model,
                send_queue=load_send_queue, recv_queue=load_recv_queue)

            # report validation stats
            gpu_send_queue.put(this_val_error)
            that_val_error = gpu_recv_queue.get()
            this_val_error = (this_val_error + that_val_error) / 2.

            gpu_send_queue.put(this_val_loss)
            that_val_loss = gpu_recv_queue.get()
            this_val_loss = (this_val_loss + that_val_loss) / 2.

            if private_config['flag_verbose']:
                print('epoch %i: test loss of jhmdb %f ' %
                      (epoch, this_val_loss))
                print('epoch %i: test error of jhmdb %f %%' %
                      (epoch, this_val_error * 100.))
            val_record.append([this_val_error, this_val_loss])
            if private_config['flag_save']:
                np.save(config['weights_dir'] + 'test_record_jhmdb.npy', val_record)

            DropoutLayer.SetDropoutOn()
            ###########################################
            # Adapt Learning Rate
            step_idx = adjust_learning_rate(config, epoch, step_idx,
                                            val_record, learning_rate)
            # Save Weights, only one of them will do
            if private_config['flag_save'] :
                if epoch % config['snapshot_freq'] == 0:
                    save_weights(layers, config['weights_dir'], epoch)
                    np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                            learning_rate.get_value())
                    save_momentums(vels, config['weights_dir'], epoch)
        print('Optimization complete.')

Ejemplo n.º 16

0

Mostrar archivo

Archivo: py-connector.py Proyecto: jofrantoba/blazingdb-protocol

def main():

    client = PyConnector('/tmp/orchestrator.socket', '/tmp/ral.socket')

    cuda.init()
    dev = cuda.Device(0)
    ctx_gpu = dev.make_context()

    try:
        client.connect()
    except Error as err:
        print(err)

    try:
        client.run_ddl_create_table('nation', ['id'], ['GDF_INT8'], 'main')
    except Error as err:
        print(err)

    data_gpu, data_sz = create_sample_device_data()
    data_handler = bytes(cuda.mem_get_ipc_handle(data_gpu))
    valid_gpu, data_sz = create_sample_device_data()
    valid_handler = bytes(cuda.mem_get_ipc_handle(valid_gpu))

    try:
        tableGroup = {
            'tables': [{
                'name':
                'main.nation',
                'columns': [{
                    'data': data_handler,
                    'valid': valid_handler,
                    'size': data_sz,
                    'dtype': 1,
                    'null_count': 0,
                    'dtype_info': 0
                }],
                'columnNames': ['id']
            }],
            'name':
            'main',
        }
        resultSet = client.run_dml_query('select id > 5 from main.nation',
                                         tableGroup)

        print("#RESULT_SET:")
        print('GetResult Response')
        print('  metadata:')
        print('     status: %s' % resultSet.metadata.status)
        print('    message: %s' % resultSet.metadata.message)
        print('       time: %s' % resultSet.metadata.time)
        print('       rows: %s' % resultSet.metadata.rows)
        print('  columnNames: %s' % list(resultSet.columnNames))
        for i, column in enumerate(resultSet.columns):
            x_ptr = cuda.IPCMemoryHandle(
                column.data)  # x_ptr: device raw pointer
            x_gpu = gpuarray.GPUArray((1, column.size),
                                      numpy.int8,
                                      gpudata=x_ptr)
            print('\tgpu:  ', x_gpu.get())
        print("#RESULT_SET:")

        resultSet = client.free_result(123456)

    except Error as err:
        print(err)

    # try:
    #   client.run_ddl_drop_table('User', 'main')
    # except Error as err:
    #   print(err)

    client.close_connection()
    ctx_gpu.pop()

Ejemplo n.º 17

0

Mostrar archivo

def fun_mlp(shared_args, private_args, this_queue, that_queue):
    '''
    shared_args 
    contains neural network parameters

    private_args
    contains parameters for process run on each gpu

    this_queue and that_queue are used for synchronization between processes.
    '''

    learning_rate = shared_args['learning_rate']
    n_epochs = shared_args['n_epochs']
    dataset = shared_args['dataset']
    batch_size = shared_args['batch_size']
    L1_reg = shared_args['L1_reg']
    L2_reg = shared_args['L2_reg']
    n_hidden = shared_args['n_hidden']

    ####
    # pycuda and zmq environment
    drv.init()
    dev = drv.Device(private_args['ind_gpu'])
    ctx = dev.make_context()
    sock = zmq.Context().socket(zmq.PAIR)

    if private_args['flag_client']:
        sock.connect('tcp://localhost:5000')
    else:
        sock.bind('tcp://*:5000')
    ####

    ####
    # import theano related
    import theano.sandbox.cuda
    theano.sandbox.cuda.use(private_args['gpu'])

    import theano
    import theano.tensor as T

    from logistic_sgd import load_data
    from mlp import MLP

    import theano.misc.pycuda_init
    import theano.misc.pycuda_utils

    ####

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = np.random.RandomState(1234)

    classifier = MLP(rng=rng,
                     input=x,
                     n_in=28 * 28,
                     n_hidden=n_hidden,
                     n_out=10)

    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    gparams = [T.grad(cost, param) for param in classifier.params]

    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    ####
    # setting pycuda and
    # pass handles, only done once

    param_ga_list = []
    # a list of pycuda gpuarrays which point to value of theano shared variable on this gpu

    param_other_list = []
    # a list of theano shared variables that are used to store values of theano shared variable from the other gpu

    param_ga_other_list = []
    # a list of pycuda gpuarrays which point to theano shared variables in param_other_list

    h_list = []
    # a list of pycuda IPC handles

    shape_list = []
    # a list containing shapes of variables in param_ga_list

    dtype_list = []
    # a list containing dtypes of variables in param_ga_list

    average_fun_list = []
    # a list containing theano functions for averaging parameters

    for param in classifier.params:
        param_other = theano.shared(param.get_value())
        param_ga = \
            theano.misc.pycuda_utils.to_gpuarray(param.container.value)
        param_ga_other = \
            theano.misc.pycuda_utils.to_gpuarray(
                param_other.container.value)
        h = drv.mem_get_ipc_handle(param_ga.ptr)
        average_fun = \
            theano.function([], updates=[(param,
                                          (param + param_other) / 2.)])

        param_other_list.append(param_other)
        param_ga_list.append(param_ga)
        param_ga_other_list.append(param_ga_other)
        h_list.append(h)
        shape_list.append(param_ga.shape)
        dtype_list.append(param_ga.dtype)
        average_fun_list.append(average_fun)

    # pass shape, dtype and handles
    sock.send_pyobj((shape_list, dtype_list, h_list))
    shape_other_list, dtype_other_list, h_other_list = sock.recv_pyobj()

    param_ga_remote_list = []

    # create gpuarray point to the other gpu use the passed information
    for shape_other, dtype_other, h_other in zip(shape_other_list,
                                                 dtype_other_list,
                                                 h_other_list):
        param_ga_remote = \
            gpuarray.GPUArray(shape_other, dtype_other,
                              gpudata=drv.IPCMemoryHandle(h_other))

        param_ga_remote_list.append(param_ga_remote)
    ####

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    this_queue.put('')
    that_queue.get()
    start_time = time.time()

    epoch = 0

    while epoch < n_epochs:
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            if minibatch_index % 2 == private_args['mod']:
                train_model(minibatch_index)

                this_queue.put('')
                that_queue.get()

                # exchanging weights
                for param_ga, param_ga_other, param_ga_remote in \
                        zip(param_ga_list, param_ga_other_list,
                            param_ga_remote_list):

                    drv.memcpy_peer(
                        param_ga_other.ptr, param_ga_remote.ptr,
                        param_ga_remote.dtype.itemsize * param_ga_remote.size,
                        ctx, ctx)

                ctx.synchronize()
                this_queue.put('')
                that_queue.get()

                for average_fun in average_fun_list:
                    average_fun()

        if private_args['verbose']:
            validation_losses = [
                validate_model(i) for i in xrange(n_valid_batches)
            ]
            this_validation_loss = np.mean(validation_losses)

            print('epoch %i, minibatch %i/%i, validation error %f %%' %
                  (epoch, minibatch_index + 1, n_train_batches,
                   this_validation_loss * 100.))

    end_time = time.time()

    this_queue.put('')
    that_queue.get()

    if private_args['verbose']:
        print 'The code run for %d epochs, with %f epochs/sec' % (
            epoch, 1. * epoch / (end_time - start_time))
        print >> sys.stderr, ('The code for file ' +
                              os.path.split(__file__)[1] + ' ran for %.1fs' %
                              ((end_time - start_time)))

Ejemplo n.º 18

0

Mostrar archivo

def train_net(config, private_config):

    # UNPACK CONFIGS
    (flag_para_load, flag_datalayer, train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = \
        unpack_configs(config, ext_data=private_config['ext_data'],
                       ext_label=private_config['ext_label'])

    gpu_send_queue = private_config['queue_gpu_send']
    gpu_recv_queue = private_config['queue_gpu_recv']

    # pycuda and zmq set up
    drv.init()
    dev = drv.Device(int(private_config['gpu'][-1]))
    ctx = dev.make_context()

    sock_gpu = zmq.Context().socket(zmq.PAIR)
    if private_config['flag_client']:
        sock_gpu.connect('tcp://*****:*****@ iter = ', num_iter
                    print 'training cost:', cost_ij

                if config['print_train_error']:
                    error_ij = train_error()

                    gpu_send_queue.put(error_ij)
                    that_error = gpu_recv_queue.get()
                    error_ij = (error_ij + that_error) / 2.

                    if private_config['flag_verbose']:
                        print 'training error rate:', error_ij

            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

        ############### Test on Validation Set ##################

        DropoutLayer.SetDropoutOff()

        this_val_error, this_val_loss = get_val_error_loss(
            rand_arr,
            shared_x,
            shared_y,
            val_filenames,
            val_labels,
            flag_datalayer,
            flag_para_load,
            batch_size,
            validate_model,
            send_queue=load_send_queue,
            recv_queue=load_recv_queue)

        # report validation stats
        gpu_send_queue.put(this_val_error)
        that_val_error = gpu_recv_queue.get()
        this_val_error = (this_val_error + that_val_error) / 2.

        gpu_send_queue.put(this_val_loss)
        that_val_loss = gpu_recv_queue.get()
        this_val_loss = (this_val_loss + that_val_loss) / 2.

        if private_config['flag_verbose']:
            print('epoch %i: validation loss %f ' % (epoch, this_val_loss))
            print('epoch %i: validation error %f %%' %
                  (epoch, this_val_error * 100.))
        val_record.append([this_val_error, this_val_loss])

        if private_config['flag_save']:
            np.save(config['weights_dir'] + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx, val_record,
                                        learning_rate)

        # Save Weights, only one of them will do
        if private_config['flag_save']:
            if epoch % config['snapshot_freq'] == 0:
                save_weights(layers, config['weights_dir'], epoch)
                np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                        learning_rate.get_value())
                save_momentums(vels, config['weights_dir'], epoch)

    print('Optimization complete.')

Ejemplo n.º 19

0

Mostrar archivo

    sock = zmq.Context().socket(zmq.PAIR)
    try:
        sock.bind('tcp://*:{0}'.format(config['sock_data']))
    except zmq.error.ZMQError:
        print '[load] rank %d port %d zmq error' % (rank,config['sock_data'])
        sock.close()
        zmq.Context().term()
        raise
    finally:
        pass

    shape, dtype, h = sock.recv_pyobj()
    if verbose: print '[load] 1. shared_x information received'

    gpu_data_remote = gpuarray.GPUArray(shape, dtype,
                                        gpudata=drv.IPCMemoryHandle(h))
    gpu_data = gpuarray.GPUArray(shape, dtype)

    img_mean = icomm.recv(source=MPI.ANY_SOURCE, tag=66)
    if verbose: print '[load] 2. img_mean received'

    count=0
    mode=None
    import time
    while True:
        
        # 3. load the very first filename in 'train' or 'val' mode
        message = icomm.recv(source=0, tag=40)
        
        if message == 'stop':
            break