def proba(self, X, load_model=None):
        assert load_model != None, "load_model should be True of False"
        layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch

        if load_model == True:
            dir = self.model.snapshot
            if not os.path.isdir(dir):
                raise IOError('no such snapshot file: %s' %(dir))

            snapshots = glob.glob(dir+'*.npy')
            #e = os.path.basename(snapshots[0])[-5]
            e = self.config['e_snapshot']
            tools.load_weights(layers, dir, e)

        prob_model = theano.function(
                        inputs = [index],
                        outputs = self.model.proba,
                        givens={
                            x: X[index:(index + 1)],
                            }
                )
        y_prob = []
        n_test = X.get_value(borrow=True).shape[0]
        DropoutLayer.SetDropoutOff()
        print "getting probability on %d datas" %(int(n_test))
        for i in xrange(n_test):
            y_prob.append(prob_model(i))
        return np.asarray(y_prob).reshape(n_test,y_prob[0].shape[1])
    def load(self):
        layers = self.model.layers
        dir = self.model.snapshot
        if not os.path.isdir(dir):
            raise IOError('no such snapshot file: %s' %(dir))

        snapshots = glob.glob(dir+'*.npy')
        e = self.config['e_snapshot']
        tools.load_weights(layers, dir, e)
    def predict_by_sentence(self, test_set_x, index_test, load_model=None, dropout=False):
        assert load_model != None, "load_model should be True of False"
        #batch_size = self.model.batch_size
        #n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
        #self.n_test_batches = n_test_batches
        test_set_x = self.share_var(test_set_x,testing=True)
        layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch
        predict_times = len(index_test)-1
        index_test= T.cast(theano.tensor._shared(np.asarray(index_test,dtype=theano.config.floatX),borrow=True),'int32')
        if load_model == True:
            dir = self.model.snapshot
            if not os.path.isdir(dir):
                raise IOError('no such snapshot file: %s' %(dir))

            snapshots = glob.glob(dir+'*.npy')
            #e = os.path.basename(snapshots[0])[-5]
            e = self.config['e_snapshot']
            if dropout == False:
                tools.load_weights(layers, dir, e)
            else:
                tools.dropout_load_weights(layers, dir, e)

        test_model = theano.function(
                        inputs = [index],
                        outputs = self.model.y_pred,
                        givens={
                            x: test_set_x[index_test[index]:index_test[(index + 1)]],
                            }
                )

        n_test = test_set_x.get_value(borrow=True).shape[0]
        y_pred = np.array([])
        DropoutLayer.SetDropoutOff()
        print "predict on %d datas" %(int(n_test))
        for i in xrange(predict_times):
            y_pred = np.concatenate((y_pred,test_model(i)),axis=0)

        return y_pred
    def predict(self, test_set_x, load_model=None, dropout=False):
        assert load_model != None, "load_model should be True of False"
        #batch_size = self.model.batch_size
        #n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
        #self.n_test_batches = n_test_batches
        layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch

        if load_model == True:
            dir = self.model.snapshot
            if not os.path.isdir(dir):
                raise IOError('no such snapshot file: %s' %(dir))

            snapshots = glob.glob(dir+'*.npy')
            #e = os.path.basename(snapshots[0])[-5]
            e = self.config['e_snapshot']
            if dropout == False:
                tools.load_weights(layers, dir, e)
            else:
                tools.dropout_load_weights(layers, dir, e)

        test_model = theano.function(
                        inputs = [index],
                        outputs = self.model.y_pred,
                        givens={
                            x: test_set_x[index:(index + 1)],
                            }
                )

        n_test = test_set_x.get_value(borrow=True).shape[0]
        y_pred = np.zeros(n_test)
        DropoutLayer.SetDropoutOff()
        print "predict on %d datas" %(int(n_test))
        for i in xrange(n_test):
            y_pred[i] = int(test_model(i))

        return y_pred
Example #5
0
def valid():
    rng = np.random.RandomState(23455)

    config = {'batch_size':256,
              'use_data_layer':True,
              'lib_conv':'cudnn',
              'rand_crop':False,
              'rng':rng
              }

    model = AlexNet(config)
    DropoutLayer.SetDropoutOff()
    load_weights(model.layers, model_folder, model_epoch)
    img_mean = np.load(img_mean_path)[:, :, :, np.newaxis]

    rep = np.zeros((195 * 256, 4096))
    batch_size = 256
    x = theano.shared(np.random.normal(0, 1 , (3, 227, 227, 256)).astype(theano.config.floatX))
    f = theano.function([], [model.proba, model.layers[-2].output], givens={model.x:x})
    accuracy = []
    tic = time.time()

    for i, filename in enumerate(sorted(os.listdir(data_folder))):
        val_img = hkl.load(os.path.join(data_folder, filename)) - img_mean
        x.set_value(val_img)
        probas, last_layer = f()
        pred = np.argmax(probas, axis=1)

        accuracy += [(pred == y[i * batch_size: (i + 1) * batch_size]).mean() * 100.]
        toc = time.time()
        print "filename %s in %.2f sec \taccuracy :: %.2f\r" % (filename, toc - tic, np.mean(accuracy)),
        sys.stdout.flush()
        tic = toc

        rep[i * batch_size:(i + 1) * batch_size, :] = last_layer
    print "\naccuracy :: ", np.array(accuracy).mean()
    pdb.set_trace()
    cPickle.dump(rep, open("representation.pkl", "w"))
Example #6
0
def test_speed_cpu():
    rng = np.random.RandomState(23455)

    config = {'batch_size':1,
              'use_data_layer':False,
              'lib_conv':'cpu',
              'rand_crop':False,
              'rng':rng
              }

    model = AlexNet(config)
    DropoutLayer.SetDropoutOff()
    load_weights(model.layers, model_folder, model_epoch)
    img_mean = np.load(img_mean_path)[:, :, :, np.newaxis]

    # testing time to predict per image
    tic = time.time()
    niters = 50
    for i in range(niters):
        x = np.random.normal(0, 1 , (3, 227, 227, 1)).astype(theano.config.floatX)   
        f = theano.function([model.x], model.outputs[-1])
        #print f(x).shape
    print "%.2f secs per images (averaged over %i iterations)" % ((time.time() - tic) / niters, niters)
def validate_performance(config):

    # UNPACK CONFIGS
    (flag_para_load,  train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = unpack_configs(config)

    if flag_para_load:
        # pycuda and zmq set up
        drv.init()
        dev = drv.Device(int(config['gpu'][-1]))
        ctx = dev.make_context()
        sock = zmq.Context().socket(zmq.PAIR)
        sock.connect('tcp://localhost:{0}'.format(config['sock_data']))

        load_send_queue = config['queue_t2l']
        load_recv_queue = config['queue_l2t']
    else:
        load_send_queue = None
        load_recv_queue = None

    import theano.sandbox.cuda
    theano.sandbox.cuda.use(config['gpu'])
    import theano
    theano.config.on_unused_input = 'warn'

    from layers import DropoutLayer
    from alex_net import AlexNet, compile_models

    import theano.misc.pycuda_init
    import theano.misc.pycuda_utils

    # # BUILD NETWORK ##
    model = AlexNet(config)
    layers = model.layers
    batch_size = model.batch_size

    # # COMPILE FUNCTIONS ##
    (train_model, validate_model, train_error, learning_rate,
        shared_x, shared_y, rand_arr, vels) = compile_models(model, config,
                                                             flag_top_5=True)

    print '... training'

    if flag_para_load:
        # pass ipc handle and related information
        gpuarray_batch = theano.misc.pycuda_utils.to_gpuarray(
            shared_x.container.value)
        h = drv.mem_get_ipc_handle(gpuarray_batch.ptr)
        sock.send_pyobj((gpuarray_batch.shape, gpuarray_batch.dtype, h))
        load_send_queue.put(img_mean)
    

    load_epoch = config['load_epoch']
    load_weights(layers, config['weights_dir'], load_epoch)

    DropoutLayer.SetDropoutOff()

    
    this_validation_error, this_validation_error_top_5, this_validation_loss = \
        get_val_error_loss(rand_arr, shared_x, shared_y,
                           val_filenames, val_labels,
                           flag_para_load,img_mean,
                           batch_size, validate_model,
                           send_queue=load_send_queue,
                           recv_queue=load_recv_queue,
                           flag_top_5=True)

    print('validation error %f %%' %
          (this_validation_error * 100.))
    print('top 5 validation error %f %%' %
          (this_validation_error_top_5 * 100.))
    print('validation loss %f ' %
          (this_validation_loss))

    return this_validation_error, this_validation_loss
Example #8
0
def train_net(config, private_config):

    # UNPACK CONFIGS
    (train_videos_spatial_jhmdb,val_videos_spatial_jhmdb,train_videos_temporal_jhmdb,val_videos_temporal_jhmdb,
     train_targets,val_targets,
           train_labels_jhmdb,val_labels_jhmdb) = unpack_configs_jhmdb(config,gpu_id=private_config['gpu_id'])
    # print('val_len',len(val_videos_spatial_jhmdb),'train_len',len(train_videos_spatial_jhmdb))
    if config['modal']=='rgb':
        train_videos = list(train_videos_spatial_jhmdb)
        test_videos = list(val_videos_spatial_jhmdb)
    else:
        train_videos = list(train_videos_temporal_jhmdb)
        test_videos = list(val_videos_temporal_jhmdb)
    print('jhmdb_len',len(train_videos),len(train_labels_jhmdb))#,len(tr_video_length_jhmdb))
    flag_para_load =config['para_load']
    gpu_send_queue = private_config['queue_gpu_send']
    gpu_recv_queue = private_config['queue_gpu_recv']

    # pycuda and zmq set up
    drv.init()
    dev = drv.Device(int(private_config['gpu'][-1]))
    ctx = dev.make_context()

    sock_gpu = zmq.Context().socket(zmq.PAIR)
    if private_config['flag_client']:
        sock_gpu.connect('tcp://*****:*****@ iter = ', num_iter
                        print 'training cost:', cost_ij,'cost_nll:',cost_nll,'cost_attention:',cost_att

                    if config['print_train_error']:
                        error_ij = train_error()

                        gpu_send_queue.put(error_ij)
                        that_error = gpu_recv_queue.get()
                        error_ij = (error_ij + that_error) / 2.

                        if private_config['flag_verbose']:
                            print 'training error rate:', error_ij

                if flag_para_load and (count < len(minibatch_range)):
                    load_send_queue.put('calc_finished')

                if count%20 == 0:
                    e = time.time()
                    print "time per 20 iter:", (e - s)
            # ############### Test on Validation Set ##################
            DropoutLayer.SetDropoutOff()
            this_val_error, this_val_loss = get_test_error(config,
                 shared_x, shared_mask, shared_y,shared_target,shared_use_noise,
                 shared_conv,test_videos,  val_labels_jhmdb,
                flag_para_load,
                batch_size,num_seq, validate_model_lstm,train_model,
                send_queue=load_send_queue, recv_queue=load_recv_queue)

            # report validation stats
            gpu_send_queue.put(this_val_error)
            that_val_error = gpu_recv_queue.get()
            this_val_error = (this_val_error + that_val_error) / 2.

            gpu_send_queue.put(this_val_loss)
            that_val_loss = gpu_recv_queue.get()
            this_val_loss = (this_val_loss + that_val_loss) / 2.

            if private_config['flag_verbose']:
                print('epoch %i: test loss of jhmdb %f ' %
                      (epoch, this_val_loss))
                print('epoch %i: test error of jhmdb %f %%' %
                      (epoch, this_val_error * 100.))
            val_record.append([this_val_error, this_val_loss])
            if private_config['flag_save']:
                np.save(config['weights_dir'] + 'test_record_jhmdb.npy', val_record)

            DropoutLayer.SetDropoutOn()
            ###########################################
            # Adapt Learning Rate
            step_idx = adjust_learning_rate(config, epoch, step_idx,
                                            val_record, learning_rate)
            # Save Weights, only one of them will do
            if private_config['flag_save'] :
                if epoch % config['snapshot_freq'] == 0:
                    save_weights(layers, config['weights_dir'], epoch)
                    np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                            learning_rate.get_value())
                    save_momentums(vels, config['weights_dir'], epoch)
        print('Optimization complete.')
Example #9
0
def train(trn_data_generator, vld_data=None):
    learning_rate = config['learning_rate']
    experiment_dir = config['experiment_dir']
    data_dims = config['data_dims']
    batch_size = config['batch_size']
    num_epochs = config['num_epochs']
    num_samples_per_epoch = config["num_samples_per_epoch"]
    steps_per_epoch = num_samples_per_epoch // batch_size
    num_steps = steps_per_epoch * num_epochs
    checkpoint_dir = pth.join(experiment_dir, 'checkpoints')
    train_log_fpath = pth.join(experiment_dir, 'train.log')
    vld_iter = config["vld_iter"]
    checkpoint_iter = config["checkpoint_iter"]
    pretrained_weights = config.get("pretrained_weights", None)

    # ========================
    # construct training graph
    # ========================
    G = tf.Graph()
    with G.as_default():
        input_data_tensor = tf.placeholder(tf.float32, [None] + data_dims)
        input_label_tensor = tf.placeholder(tf.int32, [None])
        model = build_model(input_data_tensor, input_label_tensor)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        grads = optimizer.compute_gradients(model["loss"])
        grad_step = optimizer.apply_gradients(grads)
        init = tf.initialize_all_variables()


    # ===================================
    # initialize and run training session
    # ===================================
    log = tools.StatLogger(train_log_fpath)
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(graph=G, config=config_proto)
    sess.run(init)
    tf.train.start_queue_runners(sess=sess)
    with sess.as_default():
        if pretrained_weights:
            print("-- loading weights from %s" % pretrained_weights)
            tools.load_weights(G, pretrained_weights)


        # Start training loop
        for step in range(num_steps):
            batch_train = list(trn_data_generator.__next__())
            #print(np.array(list(batch_train)[1]).shape)
            X_trn = np.array(batch_train[0])
            Y_trn = np.array(batch_train[1])

            ops = [grad_step] + [model[k] for k in sorted(model.keys())]
            inputs = {input_data_tensor: X_trn, input_label_tensor: Y_trn}
            results = sess.run(ops, feed_dict=inputs)
            results = dict(zip(sorted(model.keys()), results[1:]))
            print("TRN step:%-5d error_top1: %.4f, error_top5: %.4f, loss:%s" % (step,
                                                                                 results["error_top1"],
                                                                                 results["error_top5"],
                                                                                 results["loss"]))
            with open("loss.log", "w+") as f:
                f.write("{}\n".format(results["loss"]))
            
            log.report(step=step,
                       split="TRN",
                       error_top5=float(results["error_top5"]),
                       error_top1=float(results["error_top5"]),
                       loss=float(results["loss"]))

            # report evaluation metrics every 10 training steps
            if (step % vld_iter == 0):
                print("-- running evaluation on vld split")
                X_vld = vld_data[0]
                Y_vld = vld_data[1]
                inputs = [input_data_tensor, input_label_tensor]
                args = [X_vld, Y_vld]
                ops = [model[k] for k in sorted(model.keys())]
                results = tools.iterative_reduce(ops, inputs, args, batch_size=1, fn=lambda x: np.mean(x, axis=0))
                results = dict(zip(sorted(model.keys()), results))
                print("VLD step:%-5d error_top1: %.4f, error_top5: %.4f, loss:%s" % (step,
                                                                                     results["error_top1"],
                                                                                     results["error_top5"],
                                                                                     results["loss"]))
                log.report(step=step,
                           split="VLD",
                           error_top5=float(results["error_top5"]),
                           error_top1=float(results["error_top1"]),
                           loss=float(results["loss"]))

            if (step % checkpoint_iter == 0) or (step + 1 == num_steps):
                print("-- saving check point")
                tools.save_weights(G, pth.join(checkpoint_dir, "weights.%s" % step))
Example #10
0
######################### TRAIN MODEL ################################
print '... training'


# Start Training Loop
epoch = 0
step_idx = 0
val_record = []

while epoch < config['n_epochs']:
    epoch = epoch + 1

    if config['resume_train'] and epoch == 1:
        load_epoch = config['load_epoch']
        load_weights(layers, config['weights_dir'], load_epoch)
        lr_to_load = np.load(
            config['weights_dir'] + 'lr_' + str(load_epoch) + '.npy')
        val_record = list(
            np.load(config['weights_dir'] + 'val_record.npy'))
        learning_rate.set_value(lr_to_load)
        load_momentums(vels, config['weights_dir'], load_epoch)
        epoch = load_epoch + 1

    n_batches = len(train_set) / batch_size
    for it in range(n_batches):
        num_iter = (epoch - 1) * len(train_set) + it
        print 'epoch', epoch, 'num_iter', num_iter, '/', n_batches

        with Timer('sample minibatch'):
            (batch_x, batch_y) = sample_minibatch(train_set, batch_size)
Example #11
0
def train(train_data_generator):
    checkpoint_dir = config["checkpoint_dir"]
    learning_rate = config['learning_rate']
    data_dims = config['data_dims']
    batch_size = config['batch_size']
    num_gpus = config['num_gpus']
    num_epochs = config['num_epochs']
    num_samples_per_epoch = config["num_samples_per_epoch"]
    pretrained_weights = config["pretrained_weights"]
    steps_per_epoch = num_samples_per_epoch // (batch_size * num_gpus)
    num_steps = steps_per_epoch * num_epochs
    checkpoint_iter = config["checkpoint_iter"]
    experiment_dir = config['experiment_dir']
    train_log_fpath = pth.join(experiment_dir, 'train.log')
    log = tools.MetricsLogger(train_log_fpath)


    # =====================
    # define training graph
    # =====================
    G = tf.Graph()
    with G.as_default(), tf.device('/cpu:0'):
        full_data_dims = [batch_size * num_gpus] + data_dims
        data = tf.placeholder(dtype=tf.float32,
                              shape=full_data_dims,
                              name='data')
        labels = tf.placeholder(dtype=tf.int32,
                                shape=[batch_size * num_gpus],
                                name='labels')

        # we split the large batch into sub-batches to be distributed onto each gpu
        split_data = tf.split(0, num_gpus, data)
        split_labels = tf.split(0, num_gpus, labels)

        # setup optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate)

        # setup one model replica per gpu to compute loss and gradient
        replica_grads = []
        for i in range(num_gpus):
            with tf.name_scope('tower_%d' % i), tf.device('/gpu:%d' % i):
                model = build_model(split_data[i], split_labels[i])
                loss = model["loss"]
                grads = optimizer.compute_gradients(loss)
                replica_grads.append(grads)
                tf.get_variable_scope().reuse_variables()

        # We must calculate the mean of each gradient. Note this is a
        # synchronization point across all towers.
        average_grad = L.average_gradients(replica_grads)
        grad_step = optimizer.apply_gradients(average_grad)
        train_step = tf.group(grad_step)
        init = tf.initialize_all_variables()

    # ==================
    # run training graph
    # ==================
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(graph=G, config=config_proto)
    sess.run(init)
    tf.train.start_queue_runners(sess=sess)
    with sess.as_default():
        if pretrained_weights:
            print("-- loading weights from %s" % pretrained_weights)
            tools.load_weights(G, pretrained_weights)

        for step in range(num_steps):
            data_batch, label_batch = train_data_generator.next()
            inputs = {data: data_batch, labels: label_batch}
            results = sess.run([train_step, loss], inputs)
            print("step:%s loss:%s" % (step, results[1]))
            log.report(step=step, split="TRN", loss=float(results[1]))


            if (step % checkpoint_iter == 0) or (step + 1 == num_steps):
                print("-- saving check point")
                tools.save_weights(G, pth.join(checkpoint_dir, "weights.%s" % step))
Example #12
0
def train_net(config, private_config):

    # UNPACK CONFIGS
    (flag_para_load, train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = \
        unpack_configs(config, ext_data=private_config['ext_data'],
                       ext_label=private_config['ext_label'])


    gpu_send_queue = private_config['queue_gpu_send']
    gpu_recv_queue = private_config['queue_gpu_recv']

    # pycuda and zmq set up
    drv.init()
    dev = drv.Device(int(private_config['gpu'][-1]))
    ctx = dev.make_context()

    sock_gpu = zmq.Context().socket(zmq.PAIR)
    if private_config['flag_client']:
        sock_gpu.connect('tcp://*****:*****@ iter = ', num_iter
                    log_iter.write("%d\n" % num_iter)
                    log_iter.flush()
                    print 'training cost:', cost_ij
                    log_err_cost.write("%f\n" % cost_ij)
                    log_err_cost.flush()

                if config['print_train_error']:
                    error_ij = train_error()

                    gpu_send_queue.put(error_ij)
                    that_error = gpu_recv_queue.get()
                    error_ij = (error_ij + that_error) / 2.

                    if private_config['flag_verbose']:
                        print 'training error rate:', error_ij
                        log_err_rate.write("%f\n" % error_ij)
                        log_err_rate.flush()


            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

            if count%20 == 0:
                e = time.time()
                print "time per 20 iter:", (e - s)
                
        ############### Test on Validation Set ##################

        DropoutLayer.SetDropoutOff()

        this_val_error, this_val_loss = get_val_error_loss(
            rand_arr, shared_x, shared_y,
            val_filenames, val_labels,
            flag_para_load, img_mean,
            batch_size, validate_model,
            send_queue=load_send_queue, recv_queue=load_recv_queue)

        # report validation stats
        gpu_send_queue.put(this_val_error)
        that_val_error = gpu_recv_queue.get()
        this_val_error = (this_val_error + that_val_error) / 2.

        gpu_send_queue.put(this_val_loss)
        that_val_loss = gpu_recv_queue.get()
        this_val_loss = (this_val_loss + that_val_loss) / 2.

        if private_config['flag_verbose']:
            print('epoch %i: validation loss %f ' %
                  (epoch, this_val_loss))
            print('epoch %i: validation error %f %%' %
                  (epoch, this_val_error * 100.))
        val_record.append([this_val_error, this_val_loss])

        if private_config['flag_save']:
            np.save(config['weights_dir'] + 'val_record.npy', val_record)
            np.savetxt(config['weights_dir'] + 'val_record_txt.txt', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx,
                                        val_record, learning_rate)

        # Save Weights, only one of them will do
        if private_config['flag_save']:
            if epoch % config['snapshot_freq'] == 0:
                save_weights(layers, config['weights_dir'], epoch)
                np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                        learning_rate.get_value())
                save_momentums(vels, config['weights_dir'], epoch)

    print('Optimization complete.')
Example #13
0
def train_net(config):
    # UNPACK CONFIGS
    (flag_para_load, train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = unpack_configs(config)
    if flag_para_load:
        #  zmq set up
        sock = zmq.Context().socket(zmq.PAIR)
        sock.connect('tcp://*****:*****@iter " + str(count)
            if count == 1:
                s = time.time()
            if count == 20:
                e = time.time()
                print "time per 20 iter:", (e - s)
                logger.info("time per 20 iter: %f" % (e - s)) 
            cost_ij = train_model_wrap(train_model, shared_x,
                                       shared_y, rand_arr, img_mean,
                                       count, minibatch_index,
                                       minibatch_range, batch_size,
                                       train_filenames, train_labels,
                                       flag_para_load,
                                       config['batch_crop_mirror'],
                                       send_queue=load_send_queue,
                                       recv_queue=load_recv_queue)

            if num_iter % config['print_freq'] == 0:
                #print 'training @ iter = ', num_iter
                #print 'training cost:', cost_ij
		logger.info("training @ iter = %i" % (num_iter)) 
		logger.info("training cost: %lf" % (cost_ij)) 
                if config['print_train_error']:
                    logger.info('training error rate: %lf' % train_error())
                    #print 'training error rate:', train_error()

            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

        ############### Test on Validation Set ##################

        #"""
        DropoutLayer.SetDropoutOff()

        # result_list = [ this_validation_error, this_validation_error_top5, this_validation_loss ]
        # or
        # result_list = [ this_validation_error, this_validation_loss ]
        result_list = get_val_error_loss(
        #this_validation_error, this_validation_loss = get_val_error_loss(
            rand_arr, shared_x, shared_y,
            val_filenames, val_labels,
            flag_para_load, img_mean,
            batch_size, validate_model,
            send_queue=load_send_queue, 
            recv_queue=load_recv_queue,
            flag_top_5=flag_top5)


        logger.info(('epoch %i: validation loss %f ' %
              (epoch, result_list[-1])))
        #print('epoch %i: validation loss %f ' %
        #      (epoch, this_validation_loss))
        if flag_top5:
            logger.info(('epoch %i: validation error (top 1) %f %%, (top5) %f %%' %
                (epoch,  result_list[0] * 100., result_list[1] * 100.)))
        else:
            logger.info(('epoch %i: validation error %f %%' %
                (epoch, result_list[0] * 100.)))
        #print('epoch %i: validation error %f %%' %
        #      (epoch, this_validation_error * 100.))
        val_record.append(result_list)
        #val_record.append([this_validation_error, this_validation_loss])
        np.save(config['weights_dir'] + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx,
                                        val_record, learning_rate)

        # Save weights
        if epoch % config['snapshot_freq'] == 0:
            save_weights(layers, config['weights_dir'], epoch)
            np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                       learning_rate.get_value())
            save_momentums(vels, config['weights_dir'], epoch)
        #"""

    print('Optimization complete.')
Example #14
0
def train_net(config, private_config):

    # UNPACK CONFIGS
    (flag_para_load, flag_datalayer, train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = \
        unpack_configs(config, ext_data=private_config['ext_data'],
                       ext_label=private_config['ext_label'])

    gpu_send_queue = private_config['queue_gpu_send']
    gpu_recv_queue = private_config['queue_gpu_recv']

    # pycuda and zmq set up
    drv.init()
    dev = drv.Device(int(private_config['gpu'][-1]))
    ctx = dev.make_context()

    sock_gpu = zmq.Context().socket(zmq.PAIR)
    if private_config['flag_client']:
        sock_gpu.connect('tcp://*****:*****@ iter = ', num_iter
                    print 'training cost:', cost_ij

                if config['print_train_error']:
                    error_ij = train_error()

                    gpu_send_queue.put(error_ij)
                    that_error = gpu_recv_queue.get()
                    error_ij = (error_ij + that_error) / 2.

                    if private_config['flag_verbose']:
                        print 'training error rate:', error_ij

            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

        ############### Test on Validation Set ##################

        DropoutLayer.SetDropoutOff()

        this_val_error, this_val_loss = get_val_error_loss(
            rand_arr,
            shared_x,
            shared_y,
            val_filenames,
            val_labels,
            flag_datalayer,
            flag_para_load,
            batch_size,
            validate_model,
            send_queue=load_send_queue,
            recv_queue=load_recv_queue)

        # report validation stats
        gpu_send_queue.put(this_val_error)
        that_val_error = gpu_recv_queue.get()
        this_val_error = (this_val_error + that_val_error) / 2.

        gpu_send_queue.put(this_val_loss)
        that_val_loss = gpu_recv_queue.get()
        this_val_loss = (this_val_loss + that_val_loss) / 2.

        if private_config['flag_verbose']:
            print('epoch %i: validation loss %f ' % (epoch, this_val_loss))
            print('epoch %i: validation error %f %%' %
                  (epoch, this_val_error * 100.))
        val_record.append([this_val_error, this_val_loss])

        if private_config['flag_save']:
            np.save(config['weights_dir'] + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx, val_record,
                                        learning_rate)

        # Save Weights, only one of them will do
        if private_config['flag_save']:
            if epoch % config['snapshot_freq'] == 0:
                save_weights(layers, config['weights_dir'], epoch)
                np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                        learning_rate.get_value())
                save_momentums(vels, config['weights_dir'], epoch)

    print('Optimization complete.')
Example #15
0
def train_net(config):

    # UNPACK CONFIGS
    (flag_para_load, train_filenames, val_filenames, train_labels, val_labels,
     img_mean) = unpack_configs(config)

    # pycuda set up
    drv.init()
    dev = drv.Device(int(config['gpu'][-1]))
    ctx = dev.make_context()

    if flag_para_load:
        #  zmq set up
        sock = zmq.Context().socket(zmq.PAIR)
        sock.connect('tcp://*****:*****@ iter = ', num_iter
                print 'training cost:', cost_ij
                if config['print_train_error']:
                    print 'training error rate:', train_error()

            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

        ############### Test on Validation Set ##################

        DropoutLayer.SetDropoutOff()

        this_validation_error, this_validation_loss = get_val_error_loss(
            rand_arr,
            shared_x,
            shared_y,
            val_filenames,
            val_labels,
            flag_para_load,
            img_mean,
            batch_size,
            validate_model,
            send_queue=load_send_queue,
            recv_queue=load_recv_queue)

        print('epoch %i: validation loss %f ' % (epoch, this_validation_loss))
        print('epoch %i: validation error %f %%' %
              (epoch, this_validation_error * 100.))
        val_record.append([this_validation_error, this_validation_loss])
        np.save(config['weights_dir'] + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx, val_record,
                                        learning_rate)

        # Save weights
        if epoch % config['snapshot_freq'] == 0:
            save_weights(layers, config['weights_dir'], epoch)
            np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                    learning_rate.get_value())
            save_momentums(vels, config['weights_dir'], epoch)

    print('Optimization complete.')
def validate_performance(config):

    # UNPACK CONFIGS
    (flag_para_load, train_filenames, val_filenames, train_labels, val_labels,
     img_mean) = unpack_configs(config)

    if flag_para_load:
        # pycuda and zmq set up
        drv.init()
        dev = drv.Device(int(config['gpu'][-1]))
        ctx = dev.make_context()
        sock = zmq.Context().socket(zmq.PAIR)
        sock.connect('tcp://localhost:{0}'.format(config['sock_data']))

        load_send_queue = config['queue_t2l']
        load_recv_queue = config['queue_l2t']
    else:
        load_send_queue = None
        load_recv_queue = None

    import theano.sandbox.cuda
    theano.sandbox.cuda.use(config['gpu'])
    import theano
    theano.config.on_unused_input = 'warn'

    from layers import DropoutLayer
    from alex_net import AlexNet, compile_models

    import theano.misc.pycuda_init
    import theano.misc.pycuda_utils

    # # BUILD NETWORK ##
    model = AlexNet(config)
    layers = model.layers
    batch_size = model.batch_size

    # # COMPILE FUNCTIONS ##
    (train_model, validate_model, train_error, learning_rate, shared_x,
     shared_y, rand_arr, vels) = compile_models(model, config, flag_top_5=True)

    print '... training'

    if flag_para_load:
        # pass ipc handle and related information
        gpuarray_batch = theano.misc.pycuda_utils.to_gpuarray(
            shared_x.container.value)
        h = drv.mem_get_ipc_handle(gpuarray_batch.ptr)
        sock.send_pyobj((gpuarray_batch.shape, gpuarray_batch.dtype, h))
        load_send_queue.put(img_mean)

    load_epoch = config['load_epoch']
    load_weights(layers, config['weights_dir'], load_epoch)

    DropoutLayer.SetDropoutOff()


    this_validation_error, this_validation_error_top_5, this_validation_loss = \
        get_val_error_loss(rand_arr, shared_x, shared_y,
                           val_filenames, val_labels,
                           flag_para_load,img_mean,
                           batch_size, validate_model,
                           send_queue=load_send_queue,
                           recv_queue=load_recv_queue,
                           flag_top_5=True)

    print('validation error %f %%' % (this_validation_error * 100.))
    print('top 5 validation error %f %%' %
          (this_validation_error_top_5 * 100.))
    print('validation loss %f ' % (this_validation_loss))

    return this_validation_error, this_validation_loss
Example #17
0
def train(trn_data_generator, vld_data=None):
    learning_rate = config['learning_rate']
    experiment_dir = config['experiment_dir']
    data_dims = config['data_dims']
    batch_size = config['batch_size']
    num_epochs = config['num_epochs']
    num_samples_per_epoch = config["num_samples_per_epoch"]
    steps_per_epoch = num_samples_per_epoch // batch_size
    num_steps = steps_per_epoch * num_epochs
    checkpoint_dir = pth.join(experiment_dir, 'checkpoints')
    train_log_fpath = pth.join(experiment_dir, 'train.log')
    vld_iter = config["vld_iter"]
    checkpoint_iter = config["checkpoint_iter"]
    pretrained_weights = config.get("pretrained_weights", None)

    # ========================
    # construct training graph
    # ========================
    G = tf.Graph()
    with G.as_default():
        input_data_tensor = tf.placeholder(tf.float32, [None] + data_dims)
        input_label_tensor = tf.placeholder(tf.int32, [None])
        model = build_model(input_data_tensor, input_label_tensor)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        grads = optimizer.compute_gradients(model["loss"])
        grad_step = optimizer.apply_gradients(grads)
        init = tf.initialize_all_variables()

        operations_tensors = {}
        operations_names = G.get_operations()
        print(operations_names)
        count1 = 0
        count2 = 0

        for operation in operations_names:
            operation_name = operation.name
            operations_info = G.get_operation_by_name(operation_name).values()
            if len(operations_info) > 0:
                if not (operations_info[0].shape.ndims is None):
                    operation_shape = operations_info[0].shape.as_list()
                    operation_dtype_size = operations_info[0].dtype.size
                    if not (operation_dtype_size is None):
                        operation_no_of_elements = 1
                        for dim in operation_shape:
                            if not (dim is None):
                                operation_no_of_elements = operation_no_of_elements * dim
                        total_size = operation_no_of_elements * operation_dtype_size
                        operations_tensors[operation_name] = total_size
                    else:
                        count1 = count1 + 1
                else:
                    count1 = count1 + 1
                    operations_tensors[operation_name] = -1

                #   print('no shape_1: ' + operation_name)
                #  print('no shape_2: ' + str(operations_info))
                #  operation_namee = operation_name + ':0'
                # tensor = tf.get_default_graph().get_tensor_by_name(operation_namee)
                # print('no shape_3:' + str(tf.shape(tensor)))
                # print('no shape:' + str(tensor.get_shape()))

            else:
                # print('no info :' + operation_name)
                # operation_namee = operation.name + ':0'
                count2 = count2 + 1
                operations_tensors[operation_name] = -1

                # try:
                #   tensor = tf.get_default_graph().get_tensor_by_name(operation_namee)
                # print(tensor)
                # print(tf.shape(tensor))
                # except:
                # print('no tensor: ' + operation_namee)
        print(count1)
        print(count2)

        with open('tensors_sz.json', 'w') as f:
            json.dump(operations_tensors, f)

    # ===================================
    # initialize and run training session
    # ===================================

    config_proto = tf.ConfigProto(log_device_placement=True,
                                  allow_soft_placement=True)
    sess = tf.Session(graph=G, config=config_proto)

    run_metadata = tf.RunMetadata()
    options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)

    def profile(run_metadata, epoch=0):
        with open('profs/timeline_step' + str(epoch) + '.json', 'w') as f:
            # Create the Timeline object, and write it to a json file
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            f.write(chrome_trace)

    sess.run(init, run_metadata=run_metadata, options=options)
    profile(run_metadata, -1)

    tf.train.start_queue_runners(sess=sess)
    with sess.as_default():
        if pretrained_weights:
            print("-- loading weights from %s" % pretrained_weights)
            tools.load_weights(G, pretrained_weights)

    # Start training loop
    num_steps = 100
    tot_samples_p_second = 0
    for step in range(num_steps):
        batch_train = trn_data_generator.next()
        X_trn = np.array(batch_train[0])
        Y_trn = np.array(batch_train[1])
        options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        ops = [grad_step] + [model[k] for k in sorted(model.keys())]
        inputs = {input_data_tensor: X_trn, input_label_tensor: Y_trn}
        start_time = time.time()
        results = sess.run(ops,
                           feed_dict=inputs,
                           run_metadata=run_metadata,
                           options=options)
        elapsed = time.time() - start_time

        samples_p_second = float(batch_size) / float(elapsed)
        print("Ex/sec: %.1f" % samples_p_second)
        profile(run_metadata, step)
        tot_samples_p_second += samples_p_second
        results = dict(zip(sorted(model.keys()), results[1:]))
        print("TRN step:%-5d error_top1: %.4f, error_top5: %.4f, loss:%s" %
              (step, results["error_top1"], results["error_top5"],
               results["loss"]))

        # report evaluation metrics every 10 training steps
        '''
	    if (step % vld_iter == 0):
                print("-- running evaluation on vld split")
                X_vld = vld_data[0]
                Y_vld = vld_data[1]
                inputs = [input_data_tensor, input_label_tensor]
                args = [X_vld, Y_vld]
                ops = [model[k] for k in sorted(model.keys())]
                results = tools.iterative_reduce(ops, inputs, args, batch_size=1, fn=lambda x: np.mean(x, axis=0))
                results = dict(zip(sorted(model.keys()), results))
                print("VLD step:%-5d error_top1: %.4f, error_top5: %.4f, loss:%s" % (step,
                                                                                     results["error_top1"],
                                                                                     results["error_top5"],
                                                                                     results["loss"]))
            

            if (step % checkpoint_iter == 0) or (step + 1 == num_steps):
                print("-- saving check point")
                tools.save_weights(G, pth.join(checkpoint_dir, "weights.%s" % step))
	    '''
    print("Average ex/sec: %.1f" % float(tot_samples_p_second / num_steps))
Example #18
0
def train_net(config):

    # UNPACK CONFIGS
    (flag_para_load, train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = unpack_configs(config)

    # pycuda set up
    drv.init()
    dev = drv.Device(int(config['gpu'][-1]))
    ctx = dev.make_context()
    
    if flag_para_load:
        #  zmq set up
        sock = zmq.Context().socket(zmq.PAIR)
        sock.connect('tcp://*****:*****@ iter = ', num_iter
                print 'training cost:', cost_ij
                if config['print_train_error']:
                    print 'training error rate:', train_error()

            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

        ############### Test on Validation Set ##################

        DropoutLayer.SetDropoutOff()

        this_validation_error, this_validation_loss = get_val_error_loss(
            rand_arr, shared_x, shared_y,
            val_filenames, val_labels,
            flag_para_load, img_mean,
            batch_size, validate_model,
            send_queue=load_send_queue, recv_queue=load_recv_queue)


        print('epoch %i: validation loss %f ' %
              (epoch, this_validation_loss))
        print('epoch %i: validation error %f %%' %
              (epoch, this_validation_error * 100.))
        val_record.append([this_validation_error, this_validation_loss])
        np.save(config['weights_dir'] + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx,
                                        val_record, learning_rate)

        # Save weights
        if epoch % config['snapshot_freq'] == 0:
            save_weights(layers, config['weights_dir'], epoch)
            np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                       learning_rate.get_value())
            save_momentums(vels, config['weights_dir'], epoch)

    print('Optimization complete.')
Example #19
0
def train_net(config):
    # UNPACK CONFIGS
    (flag_para_load, train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = unpack_configs(config)
    if flag_para_load:
        #  zmq set up
        sock = zmq.Context().socket(zmq.PAIR)
        sock.connect('tcp://*****:*****@ iter = %i" % (num_iter))
                logger.info("training cost: %lf" % (cost_ij))
                if config['print_train_error']:
                    logger.info('training error rate: %lf' % train_error())

            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

        ############### Test on Validation Set ##################

        #"""
        DropoutLayer.SetDropoutOff()

        result_list = get_val_error_loss(
            rand_arr, shared_x, shared_y,
            val_filenames, val_labels,
            flag_para_load, img_mean,
            batch_size, validate_model,
            send_queue=load_send_queue,
            recv_queue=load_recv_queue,
            flag_top_5=flag_top5)


        logger.info(('epoch %i: validation loss %f ' %
              (epoch, result_list[-1])))

        if flag_top5:
            logger.info(('epoch %i: validation error (top 1) %f %%, (top5) %f %%' %
                (epoch,  result_list[0] * 100., result_list[1] * 100.)))
        else:
            logger.info(('epoch %i: validation error %f %%' %
                (epoch, result_list[0] * 100.)))

        val_record.append(result_list)
        np.save(config['weights_dir'] + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx,
                                        val_record, learning_rate)

        # Save weights
        if epoch % config['snapshot_freq'] == 0:
            save_weights(layers, config['weights_dir'], epoch)
            np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                       learning_rate.get_value())
            save_momentums(vels, config['weights_dir'], epoch)
        #"""

    print('Optimization complete.')
Example #20
0
def code_extraction(config):

    # UNPACK CONFIGS
    (train_filenames, val_filenames, img_mean) = unpack_configs(config)

    import theano.sandbox.cuda
    theano.sandbox.cuda.use(config['gpu'])

    import theano
    theano.config.on_unused_input = 'warn'
    import theano.tensor as T

    from multilabel_layers import DropoutLayer
    from multilabel_net import CNN_model, compile_models

    import theano.misc.pycuda_init
    import theano.misc.pycuda_utils

    model = CNN_model(config)
    batch_size = model.batch_size
    layers = model.layers

    n_train_batches = len(train_filenames)
    n_val_batches = len(val_filenames)

    ## COMPILE FUNCTIONS ##
    (predict_model, shared_x) = compile_models(model, config)

    load_weights_epoch = 8

    train_predicted_code = None
    val_predicted_code = None

    load_weights_dir = config['weights_dir']

    load_weights(layers, load_weights_dir, load_weights_epoch)

    code_save_dir = config['code_save_dir']

    DropoutLayer.SetDropoutOff()

    for minibatch_index in range(n_train_batches):

        label = get_prediction_labels(predict_model, shared_x, train_filenames,
                                      minibatch_index, img_mean)

        if train_predicted_code is None:
            train_predicted_code = label[0]
        else:
            train_predicted_code = np.vstack((train_predicted_code, label[0]))

    database_code = {'database_code': train_predicted_code}
    sio.savemat(code_save_dir + 'database_code.mat', database_code)

    for minibatch_index in range(n_val_batches):

        label = get_prediction_labels(predict_model, shared_x, val_filenames,
                                      minibatch_index, img_mean)

        if val_predicted_code is None:
            val_predicted_code = label[0]
        else:
            val_predicted_code = np.vstack((val_predicted_code, label[0]))

    test_code = {'test_code': val_predicted_code}
    sio.savemat(code_save_dir + 'test_code.mat', test_code)

    print('code extraction complete.')
Example #21
0
import argparse
from skimage.transform import resize
from skimage.io import imread

G = tf.Graph()
with G.as_default():
    images = tf.placeholder("float", [1, 224, 224, 3])
    logits = vgg.build(images, n_classes=10, training=False)
    probs = tf.nn.softmax(logits)

def predict(im):
    labels = ['airplane', 'automobile', 'bird', 'cat', 'deer',
              'dog', 'frog', 'horse', 'ship', 'truck']
    if im.shape != (224, 224, 3):
        im = resize(im, (224, 224))
    im = np.expand_dims(im, 0)
    sess = tf.get_default_session()
    results = sess.run(probs, {images: im})
    return labels[np.argmax(results)]

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-w", "--weights", required=True, help="path to weights.npz file")
    parser.add_argument("image", help="path to jpg image")
    args = parser.parse_args()
    im = imread(args.image)
    sess = tf.Session(graph=G)
    with sess.as_default():
        tools.load_weights(G, args.weights)
        print predict(im)
Example #22
0
 rand_arr, vels) = compile_models(model, config)

######################### TRAIN MODEL ################################
print '... training'

# Start Training Loop
epoch = 0
step_idx = 0
val_record = []

while epoch < config['n_epochs']:
    epoch = epoch + 1

    if config['resume_train'] and epoch == 1:
        load_epoch = config['load_epoch']
        load_weights(layers, config['weights_dir'], load_epoch)
        lr_to_load = np.load(config['weights_dir'] + 'lr_' + str(load_epoch) +
                             '.npy')
        val_record = list(np.load(config['weights_dir'] + 'val_record.npy'))
        learning_rate.set_value(lr_to_load)
        load_momentums(vels, config['weights_dir'], load_epoch)
        epoch = load_epoch + 1

    n_batches = len(train_set) / batch_size
    for it in range(n_batches):
        num_iter = (epoch - 1) * len(train_set) + it
        print 'epoch', epoch, 'num_iter', num_iter, '/', n_batches

        with Timer('sample minibatch'):
            (batch_x, batch_y) = sample_minibatch(train_set, batch_size)
            shared_x.set_value(batch_x)