dump_path = sys.argv[1] model_data = pickle.load(open(dump_path, 'r')) # Setting some vars for easier ref. chunk_size = model_data['chunk_size'] * 2 batch_size = model_data['batch_size'] l_out = model_data['l_out'] l_ins = model_data['l_ins'] # Print some basic stuff about the model. num_params = nn.layers.count_params(l_out) print("\n\t\tNumber of parameters: %d" % num_params) model_arch = architecture_string(model_data['l_out']) print(model_arch) # Set up Theano stuff to compute output. output = nn.layers.get_output(l_out, deterministic=True) input_ndims = [len(nn.layers.get_output_shape(l_in)) for l_in in l_ins] xs_shared = [nn.utils.shared_empty(dim=ndim) for ndim in input_ndims] idx = T.lscalar('idx') givens = {} for l_in, x_shared in zip(l_ins, xs_shared): givens[l_in.input_var] = x_shared[ idx * batch_size:(idx + 1) * batch_size
# doesn't have the attribute if lr_decay: lr_init = model.lr_init lr_final = model.lr_final else: lr_init = LEARNING_RATE_SCHEDULE[1] np.set_printoptions(precision=3) np.set_printoptions(suppress=True) model_id = strftime("%Y_%m_%d_%H%M%S", gmtime()) dump_path = '/' + model_id + '_' + model.config_name + '.pkl' # prints the architecture of the model model_arch = architecture_string(l_out) print model_arch num_params = nn.layers.count_params(l_out, trainable=True) print "\n\t\tNumber of trainable parameters: %d\n" % num_params print "\t\tModel id: %s\n" % model_id print "\t\tModel name: %s\n" % model.config_name input_ndims = [len(nn.layers.get_output_shape(l_in)) for l_in in l_ins] xs_shared = [nn.utils.shared_empty(dim=ndim) for ndim in input_ndims] y_shared = nn.utils.shared_empty(dim=2) idx = T.lscalar('idx') obj = model.build_objective(l_out) train_loss = obj.get_loss()
if lr_decay: lr_init = model.lr_init lr_final = model.lr_final else: lr_init = LEARNING_RATE_SCHEDULE[1] np.set_printoptions(precision=3) np.set_printoptions(suppress=True) model_id = strftime("%Y_%m_%d_%H%M%S", gmtime()) dump_path = 'dumps/' + model_id + '_' + model.config_name + '.pkl' model_arch = architecture_string(l_out) print model_arch num_params = nn.layers.count_params(l_out, trainable=True) print "\n\t\tNumber of trainable parameters: %d\n" % num_params print "\t\tModel id: %s\n" % model_id print "\t\tModel name: %s\n" % model.config_name input_ndims = [len(nn.layers.get_output_shape(l_in)) for l_in in l_ins] xs_shared = [nn.utils.shared_empty(dim=ndim) for ndim in input_ndims] y_shared = nn.utils.shared_empty(dim=2) idx = T.lscalar('idx')
dump_path = sys.argv[1] model_data = pickle.load(open(dump_path, 'r')) # Setting some vars for easier ref. chunk_size = model_data['chunk_size'] * 2 batch_size = model_data['batch_size'] l_out = model_data['l_out'] l_ins = model_data['l_ins'] # Print some basic stuff about the model. num_params = nn.layers.count_params(l_out) print "\n\t\tNumber of parameters: %d" % num_params model_arch = architecture_string(model_data['l_out']) print model_arch # Set up Theano stuff to compute output. output = nn.layers.get_output(l_out, deterministic=True) input_ndims = [len(nn.layers.get_output_shape(l_in)) for l_in in l_ins] xs_shared = [nn.utils.shared_empty(dim=ndim) for ndim in input_ndims] idx = T.lscalar('idx') givens = {} for l_in, x_shared in zip(l_ins, xs_shared): givens[l_in.input_var] = x_shared[ idx * batch_size:(idx + 1) * batch_size
def main(data, cls_num, dim, pos_ratio, w=None, increase=True, n=2, num_epochs=500, strategy='momentum', model=None, thres=None, lb_num=None): x_train = data['training'] x_val = data['val'] # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') w_var = T.vector('w') num_boxes_train = len(x_train['image_ids']) pos_idx = (x_train['boxes'][:, -1] == 1).nonzero()[0] print(num_boxes_train, 'pos:{}'.format(pos_idx.shape[0])) # Create neural network model print("Building model and compiling functions...") cls_network, kl_network = build_cnn(cls_num, input_var, n, dim) epsilon = 1e-35 margin = 0.5 batchsize = target_var.shape[0] cls_prediction = lasagne.layers.get_output(cls_network) # cls_prediction = T.clip(cls_prediction, 1e-7, 1-(1e-7)) kl_prediction = lasagne.layers.get_output(kl_network) # add weight decay all_layers = lasagne.layers.get_all_layers([cls_network, kl_network]) l2_penalty = lasagne.regularization.regularize_layer_params( all_layers, lasagne.regularization.l2) * 0.0005 cls_loss = lasagne.objectives.categorical_crossentropy( cls_prediction, target_var) cls_loss = aggregate(cls_loss, w_var) kl_prediction = kl_prediction + epsilon kl_loss = get_klloss(batchsize, margin, kl_prediction, target_var) loss = l2_penalty + 0 * kl_loss + cls_loss lr = 0.1 sh_lr = theano.shared(lasagne.utils.floatX(lr)) params = lasagne.layers.get_all_params([cls_network, kl_network], trainable=True) if strategy == 'momentum': updates = lasagne.updates.momentum(loss, params, learning_rate=sh_lr, momentum=0.9) #nesterov_ else: updates = lasagne.updates.adam(loss, params, learning_rate=1e-4) train_fn = theano.function([input_var, target_var, w_var], [loss, cls_prediction, kl_prediction], updates=updates, allow_input_downcast=True) #, w_var # Create a loss expression for validation/testing test_cls_prediction = lasagne.layers.get_output(cls_network, deterministic=True) test_cls_prediction = T.clip(test_cls_prediction, 1e-7, 1 - 1e-7) test_cls_loss = lasagne.objectives.categorical_crossentropy( test_cls_prediction, target_var) test_cls_loss = test_cls_loss.mean() test_kl_prediction = lasagne.layers.get_output(kl_network, deterministic=True) test_kl_prediction = test_kl_prediction + epsilon test_kl_loss = get_klloss(batchsize, margin, test_kl_prediction, target_var) test_loss = l2_penalty + test_kl_loss + test_cls_loss # Compile a second function computing the validation loss and accuracy: val_fn = theano.function( [input_var, target_var], [test_loss, test_cls_prediction, test_kl_prediction], allow_input_downcast=True) print( "number of parameters in model: %d" % lasagne.layers.count_params([cls_network, kl_network], trainable=True)) print(architecture_string([cls_network, kl_network])) if model is not None and os.path.exists(model): # load network weights from model file with np.load(model) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] with np.load('weighted_indexes_{}'.format(model)) as f: indexes = f['arr_0'] lasagne.layers.set_all_param_values([cls_network, kl_network], param_values) else: # launch the training loop print("Starting training...") for epoch in np.arange(num_epochs): print("Epoch {} of {}:".format(epoch + 1, num_epochs)) cls_preds, kl_preds, my_hat, indexes, cm = get_mtresults(x_train,\ 'train','train', train_fn, cls_num, dim, pos_ratio, True, w=w, batchsize=128) cls_preds, kl_preds, my_hat, indexes, cm = get_mtresults(x_val,\ 'val', 'train',val_fn, cls_num, dim, pos_ratio, indexes = indexes) if (epoch + 1) == 30 or (epoch + 1) == 50 or (epoch + 1) == 70: new_lr = sh_lr.get_value() * 0.1 print("New LR:" + str(new_lr)) sh_lr.set_value(lasagne.utils.floatX(new_lr)) # dump the network weights to a file : if model is not None: np.savez( model, *lasagne.layers.get_all_param_values([cls_network, kl_network])) np.savez('weighted_indexes_{}'.format(model), indexes) return input_var, cls_network, kl_network, val_fn, indexes
def main(data, cls_num, dim, pos_ratio, w=None, increase=True, n=2, num_epochs=500, strategy='momentum', last_model=None, model=None, thres=None,lb_num=None): x_train = data['training'] x_val = data['val'] # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') w_var = T.vector('w') neg_idx = (x_train['boxes'][:,-1]==0).nonzero()[0] pos_idx = (x_train['boxes'][:,-1]==1).nonzero()[0] print('neg:{}'.format(neg_idx.shape[0]), 'pos:{}'.format(pos_idx.shape[0])) # Create neural network model print("Building model and compiling functions...") network = build_alexnet(cls_num, input_var, dim) fc8 = network['fc8'] print(architecture_string(fc8)) batchsize = target_var.shape[0] prediction = lasagne.layers.get_output(fc8) # add weight decay all_layers = lasagne.layers.get_all_layers(fc8) l2_penalty = lasagne.regularization.regularize_layer_params( all_layers, lasagne.regularization.l2) * 0.0005 loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) #loss = loss.mean() loss = aggregate(loss, w_var) loss = loss + l2_penalty acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_var), dtype=theano.config.floatX) lr = 0.001 sh_lr = theano.shared(lasagne.utils.floatX(lr)) params = lasagne.layers.get_all_params(fc8, trainable=True) if strategy=='momentum': updates = lasagne.updates.momentum(loss, params, learning_rate=sh_lr, momentum=0.9)#nesterov_ else: updates = lasagne.updates.adam(loss, params, learning_rate=1e-4) train_fn = theano.function([input_var, target_var, w_var], [loss, acc, prediction], updates=updates, allow_input_downcast=True) # Create a loss expression for validation/testing test_prediction = lasagne.layers.get_output(fc8, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc, test_prediction], allow_input_downcast=True) print("number of parameters in model: %d" % lasagne.layers.count_params(fc8, trainable=True)) # print(architecture_string(fc8)) if model is not None and os.path.exists(model): # load network weights from model file with np.load(model) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] #with np.load('weighted_indexes_{}'.format(model)) as f: # indexes = f['arr_0'] lasagne.layers.set_all_param_values(fc8, param_values) else: if last_model is None: # first loop of training, copy model from caffe caffe_root = '/home/dl/caffe/' sys.path.insert(0, caffe_root + 'python') import caffe caffe.set_mode_gpu() caffe_net = caffe.Net(caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt', caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel', caffe.TEST) layers_caffe = dict(zip(list(caffe_net._layer_names), caffe_net.layers)) for name, layer in network.items(): try: if name == 'conv2': W = layers_caffe[name].blobs[0].data[:,:,::-1,::-1] b = layers_caffe[name].blobs[1].data network['conv2_part1'].W.set_value(W[0:128,:,:,:]) network['conv2_part1'].b.set_value(b[0:128]) network['conv2_part2'].W.set_value(W[128:,:,:,:]) network['conv2_part2'].b.set_value(b[128:]) elif name == 'conv4': W = layers_caffe[name].blobs[0].data[:,:,::-1,::-1] b = layers_caffe[name].blobs[1].data network['conv4_part1'].W.set_value(W[0:192,:,:,:]) network['conv4_part1'].b.set_value(b[0:192]) network['conv4_part2'].W.set_value(W[192:,:,:,:]) network['conv4_part2'].b.set_value(b[192:]) elif name == 'conv5': W = layers_caffe[name].blobs[0].data[:,:,::-1,::-1] b = layers_caffe[name].blobs[1].data network['conv5_part1'].W.set_value(W[0:128,:,:,:]) network['conv5_part1'].b.set_value(b[0:128]) network['conv5_part2'].W.set_value(W[128:,:,:,:]) network['conv5_part2'].b.set_value(b[128:]) elif name == 'fc6' or name == 'fc7': layer.W.set_value(np.transpose(layers_caffe[name].blobs[0].data)) layer.b.set_value(layers_caffe[name].blobs[1].data) elif name != 'fc8': layer.W.set_value(layers_caffe[name].blobs[0].data[:,:,::-1,::-1]) layer.b.set_value(layers_caffe[name].blobs[1].data) except AttributeError: continue except KeyError: continue print(architecture_string(fc8)) elif os.path.exists(last_model): with np.load(last_model) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] #with np.load('weighted_indexes_{}'.format(last_model)) as f: # indexes = f['arr_0'] lasagne.layers.set_all_param_values(fc8, param_values) print("Starting training...") for epoch in np.arange(num_epochs): print("Epoch {} of {}:".format(epoch + 1, num_epochs)) preds = get_mtresults(x_train,\ 'train','train', train_fn, cls_num, dim, pos_ratio, True, w=w, batchsize=128) preds = get_mtresults(x_val,\ 'val', 'train', val_fn, cls_num, dim, pos_ratio) if (epoch+1) == 10 or (epoch+1) == 20 or (epoch+1)==30: new_lr = sh_lr.get_value() * 0.1 print("New LR:"+str(new_lr)) sh_lr.set_value(lasagne.utils.floatX(new_lr)) # dump the network weights to a file : if model is not None: np.savez(model, *lasagne.layers.get_all_param_values(fc8)) #np.savez('weighted_indexes_{}'.format(model), indexes) return input_var, fc8, val_fn