max_val = abs(weights_tuned).max() if max_val > weight_scope: weight_scope = max_val return weight_scope if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--prototxt', type=str, required=True) parser.add_argument('--origimodel', type=str, required=True) parser.add_argument('--tunedmodel', type=str, required=True) args = parser.parse_args() prototxt = args.prototxt #"models/eilab_reference_sparsenet/train_val_scnn.prototxt" original_caffemodel = args.origimodel # "models/eilab_reference_sparsenet/eilab_reference_sparsenet.caffemodel" fine_tuned_caffemodel = args.tunedmodel # "/home/wew57/2bincaffe/models/eilab_reference_sparsenet/sparsenet_train_iter_30000.caffemodel" net_parser = caffeparser.CaffeProtoParser(prototxt) net_msg = net_parser.readProtoNetFile() caffe.set_mode_cpu() # GPU mode #caffe.set_device(1) #caffe.set_mode_gpu() orig_net = caffe.Net(prototxt, original_caffemodel, caffe.TEST) tuned_net = caffe.Net(prototxt, fine_tuned_caffemodel, caffe.TEST) print("blobs {}\nparams {}".format(orig_net.blobs.keys(), orig_net.params.keys())) print("blobs {}\nparams {}".format(tuned_net.blobs.keys(), tuned_net.params.keys()))
def lowrank_netsolver(solverfile, caffemodel, ratio, rank_mat): solver_parser = caffeparser.CaffeProtoParser(solverfile) solver_msg = solver_parser.readProtoSolverFile() lr_policy = str(solver_msg.lr_policy) if lr_policy != "multistep": print "Only multistep lr_policy is supported in our lowrank_netsolver!" exit() max_iter = solver_msg.max_iter test_interval = solver_msg.test_interval stepvalues = copy.deepcopy(solver_msg.stepvalue) #stepvalues.append(max_iter) base_lr = solver_msg.base_lr net_parser = caffeparser.CaffeProtoParser(str(solver_msg.net)) net_msg = net_parser.readProtoNetFile() loop_layers = net_msg.layer solver = caffe.get_solver(solverfile) if None != caffemodel: solver.net.load_hdf5(caffemodel) iter = 0 filepath_solver = "" filepath_caffemodel = "" while iter < max_iter: # train for some steps solver.step(test_interval) # initialize the parameters in the new network new_parameters = {} for cur_layer in loop_layers: if cur_layer.name in solver.net.params: cur_param = {} for idx in range(0, len(solver.net.params[cur_layer.name])): cur_param[idx] = solver.net.params[ cur_layer.name][idx].data[:] new_parameters[cur_layer.name] = cur_param # check if a lower rank in each layer can be obtained # if so, update the network structure and weights layer_idx = -1 new_net_flag = False rank_info = "" ranks = [[]] for cur_layer in loop_layers: layer_idx += 1 if 'Convolution' == cur_layer.type and re.match( ".*(_lowrank)$", cur_layer.name): assert len(solver.net.params[cur_layer.name]) == 1 cur_weights = solver.net.params[cur_layer.name][0].data next_layer = net_msg.layer._values[layer_idx + 1] next_weights = solver.net.params[next_layer.name][0].data assert re.match(".*(_linear)$", next_layer.name) assert len(solver.net.params[next_layer.name]) == 2 assert next_layer.convolution_param.kernel_size._values[0] == 1 low_rank_filters, linear_combinations, rank = caffe_apps.filter_pca( cur_weights, ratio) rank_info = rank_info + "_{}".format(rank) ranks[0].append(rank) if rank < cur_weights.shape[0]: # generate lower-rank network new_net_flag = True cur_layer.convolution_param.num_output = rank new_parameters[cur_layer.name] = {0: low_rank_filters[:]} new_linear_combinations = np.dot( next_weights.reshape((next_weights.shape[0], -1)), linear_combinations.reshape( (linear_combinations.shape[0], -1))) new_linear_combinations = new_linear_combinations.reshape( (next_layer.convolution_param.num_output, rank, 1, 1)) if next_layer.convolution_param.bias_term: new_parameters[next_layer.name] = { 0: new_linear_combinations[:], 1: solver.net.params[next_layer.name][1].data[:] } else: new_parameters[next_layer.name] = { 0: new_linear_combinations[:] } iter += test_interval if [] == rank_mat: rank_mat = ranks else: rank_mat = np.concatenate((rank_mat, ranks), axis=0) # snapshot network, caffemodel and solver if new_net_flag: # save the new network #file_split = os.path.splitext(str(solver_msg.net)) filepath_network = solver_msg.snapshot_prefix + rank_info + "_net.prototxt" #file_split[0] + '_lowrank' + file_split[1] file = open(filepath_network, "w") if not file: raise IOError("ERROR (" + filepath_network + ")!") file.write(str(net_msg)) file.close() print "Saved as {}".format(filepath_network) # save new soler solver_msg.net = filepath_network next_lr = base_lr left_steps = copy.deepcopy(stepvalues) for idx, step_val in enumerate(stepvalues): if iter >= step_val: next_lr = next_lr * solver_msg.gamma left_steps[idx] = step_val - iter solver_msg.base_lr = next_lr solver_msg.max_iter = max_iter - iter if -1 != solver_msg.force_iter and 0 != solver_msg.force_iter: solver_msg.force_iter = solver_msg.force_iter - iter if solver_msg.force_iter < 0: solver_msg.force_iter = 0 solver_msg.stepvalue._values = [] for idx, step_val in enumerate(left_steps): if step_val > 0: solver_msg.stepvalue.append(step_val) filepath_solver = solver_msg.snapshot_prefix + rank_info + "_solver.prototxt" # file_split[0] + '_lowrank' + file_split[1] file = open(filepath_solver, "w") if not file: raise IOError("ERROR (" + filepath_solver + ")!") file.write(str(solver_msg)) file.close() print "Saved as {}".format(filepath_solver) # generate the caffemodel if iter == max_iter: solver.solve() solver = None # a weird bug if do not release it gc.collect() dst_net = caffe.Net(str(filepath_network), caffe.TRAIN) for key, val in new_parameters.iteritems(): for keykey, valval in val.iteritems(): dst_net.params[key][keykey].data[:] = valval[:] filepath_caffemodel = solver_msg.snapshot_prefix + rank_info + ".caffemodel.h5" dst_net.save_hdf5(str(filepath_caffemodel)) print "Saved as {}".format(filepath_caffemodel) dst_net = None # a weird bug if do not release it gc.collect() break if iter >= max_iter: if solver != None: solver.solve() print "Optimization done!" plt.plot(rank_mat) plt.savefig(str(solver_msg.snapshot_prefix) + "_ranks.png") np.savetxt(str(solver_msg.snapshot_prefix) + ".ranks", rank_mat, fmt="%d") #plt.show() return else: lowrank_netsolver(str(filepath_solver), str(filepath_caffemodel), ratio, rank_mat)
# e.g. python python/nn_decomposer.py \ # --prototxt models/bvlc_alexnet/train_val.prototxt \ # --caffemodel models/bvlc_alexnet/bvlc_alexnet.caffemodel.h5 \ # --rank_config models/bvlc_alexnet/config.json script_str2 = "python python/nn_decomposer.py " + \ " --prototxt " + save_model + \ " --caffemodel " + save_weights + \ " --rank_config " + args.rank_config2 os.system(script_str2) filepath_network = save_model + ".lowrank.prototxt" filepath_caffemodel = save_weights + '.lowrank.caffemodel.h5' # e.g. python python/netsolver.py \ # --solver models/bvlc_alexnet/solver.prototxt \ # --weights models/bvlc_alexnet/bvlc_alexnet.caffemodel.h5 \ # --device 0 solver_parser = caffeparser.CaffeProtoParser(args.solver) solver_msg = solver_parser.readProtoSolverFile() solver_msg.net = filepath_network file = open(args.solver, "w") if not file: raise IOError("ERROR (" + args.solver + ")!") file.write(str(solver_msg)) file.close() script_str3 = "python python/netsolver.py " + \ " --device {}".format(args.device) + \ " --solver " + args.solver + \ " --weights " + filepath_caffemodel os.system(script_str3)
required=True, help="The original alexnet with group.") parser.add_argument('--split_alexnet', type=str, required=True, help="The split alexnet without group.") parser.add_argument('--caffemodel', type=str, required=True, help="The caffemodel of split alexnet.") args = parser.parse_args() original_alexnet = args.original_alexnet caffemodel = args.caffemodel split_alexnet = args.split_alexnet net_parser = caffeparser.CaffeProtoParser(original_alexnet) orig_net_msg = net_parser.readProtoNetFile() net_parser = caffeparser.CaffeProtoParser(split_alexnet) split_net_msg = net_parser.readProtoNetFile() caffe.set_mode_cpu() # GPU mode #caffe.set_device(0) #caffe.set_mode_gpu() src_net = caffe.Net(split_alexnet, caffemodel, caffe.TEST) print("blobs {}\nparams {}".format(src_net.blobs.keys(), src_net.params.keys())) loop_layers = orig_net_msg.layer[:] # adding : implicitly makes a copy to avoid being modified in the loop
dest='force_regularization', action='store_false') parser.set_defaults(force_regularization=False) args = parser.parse_args() net_template = args.net_template n = args.n learn_depth = args.learndepth g_sparsify_relu = args.sparsify g_regularize_conv = args.regularize if args.force_regularization: g_force_mult_conv = 1.0 connectivity_mode = 0 #args.connectivity_mode caffe.set_mode_cpu() net_parser = caffeparser.CaffeProtoParser(net_template) net_msg = net_parser.readProtoNetFile() add_conv_layer(net_msg, name='conv1', bottom='data', num_output=16, pad=1, kernel_size=3, stride=1, connectivity_mode=connectivity_mode) if g_regularize_conv: add_sparsify_layer(net_msg, 'conv1') add_BN_layer(net_msg, name='conv1_bn', bottom='conv1') add_relu_layer(net_msg, name='conv1_relu', bottom='conv1_bn')
parser.add_argument('--srcproto', type=str, required=True) parser.add_argument('--srcmodel', type=str, required=True) #parser.add_argument('--dstproto', type=str, required=False) #parser.add_argument('--dstmodel', type=str, required=False) args = parser.parse_args() srcproto = args.srcproto srcmodel = args.srcmodel #dstproto = args.dstproto #dstmodel = args.dstmodel caffe.set_device(0) caffe.set_mode_gpu() src_net = caffe.Net(srcproto,srcmodel, caffe.TEST) print("src net:\n blobs {}\nparams {}\n".format(src_net.blobs.keys(), src_net.params.keys())) src_net_parser = caffeparser.CaffeProtoParser(srcproto) net_msg = src_net_parser.readProtoNetFile() layer_idx = 0 loop_layers = net_msg.layer[:] #adding : implicitly makes a copy to avoid being modified in the loop convxq_positions = [] convxq_m = [] convxq_add_layers = [] position_idx = 0 total_all_zero_counter = 0 # generate and save dst prototxt for cur_layer in loop_layers: if 'Convolution'==cur_layer.type and re.match("^conv.*q$",cur_layer.name):