def NCS_MP(crates, ncs_stepsize, masked_models, valid, corpus, acc_constraint, orig_fitvalue, num_runs=0): total_time = 0 total_iteration = 100 itr_count = 0 popsize = len(other_GPU_IDs) + 1 __C = edict() __C.parameters = { 'reset_xl_to_pop': False, 'init_value': crates, 'stepsize': ncs_stepsize, 'bounds': [0.1, 0.99999999], 'ftarget': 0, 'tmax': total_iteration * popsize, 'popsize': popsize, 'best_k': 1 } es = ncs.NCS(__C.parameters) start_t = time.time() print('***************NCS initialization***************') ref_net = masked_models[0] # 0.0 represents no parameters have been pruned, so it's original fitness ref_net.change_mask(len(crates) * [0.0], apply_MP_on_mask) ref_net.apply_mask() start_fit = evaluate_lm(ref_net.masked_model, valid, corpus, TEST_BATCH_SIZE) orignal_fit = orig_fitvalue print('start fit: {}'.format(start_fit)) print('orig fit: {}'.format(orignal_fit)) ref_net = masked_models[0] ref_net.change_mask(crates, apply_MP_on_mask) ref_net.apply_mask() tmp_fit = evaluate_lm(ref_net.masked_model, valid, corpus, TEST_BATCH_SIZE) print("start init threshold:", crates) print('Start sparsity: {}%'.format(ref_net.get_sparsity() * 100)) es.set_initFitness( es.popsize * [ref_net.get_sparsity() ]) # assume the inital crates store the size of each tensor #es.ask() #tmp_fit = torch.FloatTensor([0,0,0]) end_t = time.time() total_time = (end_t - start_t) print('fit:{}'.format(tmp_fit)) print('time {}min elapse'.format(total_time / 60.)) print('***************NCS initialization***************') ref_net.clear_cache() processes = [] results = {'result_NCS': torch.FloatTensor(crates)} results['result_NCS'].share_memory_() # paralell individuals for rank in range(popsize): p = Process(target=init_processes, args=(rank, popsize, orignal_fit, acc_constraint, prune_and_eval, valid, corpus, es, masked_models, num_runs, results)) p.start() processes.append(p) for p in processes: p.join() ref_net.change_mask(results['result_NCS'].numpy(), apply_MP_on_mask) ref_net.apply_mask() best_prune = evaluate_lm(ref_net.masked_model, valid, corpus, TEST_BATCH_SIZE) print('Accuracy:{}=>{}, ppl:{}=>{}, sparsity: {}%'.format( orignal_fit[1], best_prune[1], orignal_fit[0], best_prune[0], ref_net.get_sparsity() * 100.)) logger.scalar_summary('ncs_start_acc', tmp_fit[1], num_runs) logger.scalar_summary('ncs_start_ppl', tmp_fit[0], num_runs) logger.scalar_summary('ncs_best_acc', best_prune[1], num_runs) logger.scalar_summary('ncs_best_ppl', best_prune[0], num_runs) if True: saved_model_name = 'ncs_pruned_model_%s_iteration%s_%s_%s_acc_cons_%s.pt' % ( name_mark, num_runs, Model_type, layer_group_type, str(acc_constraint)) torch.save(ref_net, cfg.LM_MODEL_TMP_FOLDER + saved_model_name) return results['result_NCS'].numpy(), saved_model_name, ref_net
def NCS_MP(crates, ncs_stepsize, fields, masked_models, valid, acc_constraint, num_runs=0, checkpoint=None): total_time = 0 total_iteration = 100 itr_count = 0 popsize = len(other_GPU_IDs) + 1 __C = edict() __C.parameters = { 'reset_xl_to_pop': False, 'init_value': crates, 'stepsize': ncs_stepsize, 'bounds': [0., 0.95], 'ftarget': 0, 'tmax': total_iteration * popsize, 'popsize': popsize, 'best_k': 1 } es = ncs.NCS(__C.parameters) start_t = time.time() print('***************NCS initialization***************') ref_net = masked_models[0] # 0.0 represents no parameters have been pruned, so it's original fitness ref_net.change_mask(len(crates) * [0.0], apply_MP_on_mask) ref_net.apply_mask() orignal_fit = evaluate(ref_net, valid, fields) print('original fit: {}'.format(orignal_fit)) ref_net = masked_models[0] ref_net.change_mask(crates, apply_MP_on_mask) ref_net.apply_mask() tmp_fit = evaluate(ref_net, valid, fields) print('Start sparsity: {}%'.format(ref_net.get_sparsity() * 100)) es.set_initFitness( es.popsize * [ref_net.get_sparsity() ]) # assume the inital crates store the size of each tensor #es.ask() #tmp_fit = torch.FloatTensor([0,0,0]) end_t = time.time() total_time = (end_t - start_t) print('fit:{}'.format(tmp_fit)) print('time {}min elapse'.format(total_time / 60.)) print('***************NCS initialization***************') ref_net.clear_cache() valid.fields = [] # clear fields for send valid among thresholds processes = [] results = {'result_NCS': torch.FloatTensor(crates)} results['result_NCS'].share_memory_() # paralell individuals for rank in range(popsize): p = Process(target=init_processes, args=(rank, popsize, orignal_fit, acc_constraint, prune_and_eval, valid, es, masked_models, num_runs, results)) p.start() processes.append(p) for p in processes: p.join() valid.fields = fields ref_net.change_mask(results['result_NCS'].numpy(), apply_MP_on_mask) ref_net.apply_mask() best_prune = evaluate(ref_net, valid, fields) print('Accuracy:{}=>{}, ppl:{}=>{}, sparsity: {}%'.format( orignal_fit[1], best_prune[1], orignal_fit[0], best_prune[0], ref_net.get_sparsity() * 100.)) logger.scalar_summary('ncs_start_acc', tmp_fit[1], num_runs) logger.scalar_summary('ncs_start_ppl', tmp_fit[0], num_runs) logger.scalar_summary('ncs_best_acc', best_prune[1], num_runs) logger.scalar_summary('ncs_best_ppl', best_prune[0], num_runs) if checkpoint is not None: real_model = (ref_net.masked_model.module if isinstance( ref_net.masked_model, nn.DataParallel) else ref_net.masked_model) real_generator = (real_model.generator.module if isinstance( real_model.generator, nn.DataParallel) else real_model.generator) model_state_dict = real_model.state_dict() model_state_dict = { k: v for k, v in model_state_dict.items() if 'generator' not in k } generator_state_dict = real_generator.state_dict() checkpoint['model'] = model_state_dict checkpoint['generator'] = generator_state_dict saved_model_name = 'ncs_pruned_model_%s_iteration%s_%s_%s_acc_cons_%s.pt' % ( name_mark, num_runs, Model_type, layer_group_type, str(acc_constraint)) torch.save(checkpoint, SAVE_MODEL_TMP_FOLDER + saved_model_name) return results['result_NCS'].numpy( ), saved_model_name, ref_net.masked_model
def ncs_loop(tmp_crates, tmp_ind, the_input_batch, send_list, wait_list): __C = edict() __C.parameters = { 'reset_xl_to_pop': False, 'init_value': tmp_crates, 'stepsize': ncs_stepsize, 'bounds': [0.0, 10.], 'ftarget': 0, 'tmax': 1600, 'popsize': 10, 'best_k': 1 } es = ncs.NCS(__C.parameters) print('***************NCS initialization***************') tmp_x_ = np.array(crates_list) tmp_input_x = tmp_crates for _ii in range(len(tmp_ind)): tmp_x_[layer_inds[tmp_ind[_ii]]] = tmp_input_x[_ii] set_solutions([tmp_x_], send_list) _, tmp_fit = get_all(len([tmp_x_]), wait_list) print('all fitness gotten.') es.set_initFitness(es.popsize * tmp_fit) print('fit:{}'.format(tmp_fit)) print('***************NCS initialization***************') count = 0 while not es.stop(): print("now in the es loop.") count += 1 if count == 15: break x = es.ask() X = [] for x_ in x: tmp_x_ = np.array(crates_list) for _ii in range(len(tmp_ind)): tmp_x_[layer_inds[tmp_ind[_ii]]] = x_[_ii] X.append(tmp_x_) set_solutions(X, send_list) X_arrange, fit = get_all(len(X), wait_list) X = [] for x_ in X_arrange: tmp_x_ = np.array(len(tmp_ind) * [0.]) for _ii in range(len(tmp_ind)): tmp_x_[_ii] = x_[layer_inds[tmp_ind[_ii]]] X.append(tmp_x_) es.tell(X, fit) for _ii in range(len(tmp_ind)): crates_list[layer_inds[tmp_ind[_ii]]] = es.result()[0][_ii] for c_i in range(len(crates_list)): crates[layer_name[c_i]] = crates_list[c_i] #es_cache[itr]={'compression':-es.result()[1], 'crates':crates_list[:]} _tmp_c = np.array(len(crates_list) * [-1.]) for t_name in tmp_ind: _tmp_c[layer_inds[t_name]] = crates[t_name] msg = message(35, crates_list).msg_encode() send_list.put(msg)
# r = ncs_para["r"] # epoch = ncs_para["epoch"] __C.parameters = { 'reset_xl_to_pop': False, 'init_value': tmp_crates, 'stepsize': ncs_stepsize, 'bounds': [0.0, 10.], 'ftarget': 0, 'tmax': 1600, 'popsize': ncs_para["n"], 'best_k': 1, 'epoch': ncs_para["epoch"], 'lambda_': ncs_para["lambda"], 'r': ncs_para["r"] } es = ncs.NCS(__C.parameters) # print '***************NCS initialization***************' tmp_x_ = np.array(crates_list) tmp_input_x = tmp_crates for _ii in range(len(tmp_ind)): tmp_x_[layer_inds[tmp_ind[_ii]]] = tmp_input_x[_ii] _, tmp_fit = evaluate(solver.net, [tmp_x_], 1, accuracy_) es.set_initFitness(es.popsize * tmp_fit) print 'fit:{}'.format(tmp_fit) # print '***************NCS initialization***************' # while not es.stop(): if not es.stop(): x = es.ask() X = [] for x_ in x:
def NCSloop(tmp_crates, tmp_ind, accuracy_): ''' This loop will get the parameters in LoopTest1, and use them to start a ncs loop. The result will contained in a file named 'crates_list.npy' in work path. The LoopTest1.py will use this file to apply prune the solver net. :param tmp_crates: :param tmp_ind: :param accuracy_: in accuracy.npy file :return: create crates_list.npy ''' f = wait_file('./work/', 'data.npy') the_input_batch = np.load(f) # the_input_batch=hdfs_load('/shared/work/','data.npy') es = {} if es_method == 'ncs': __C = edict() __C.parameters = { 'reset_xl_to_pop': False, 'init_value': tmp_crates, 'stepsize': ncs_stepsize, 'bounds': [0.0, 10.], 'ftarget': 0, 'tmax': 1600, 'popsize': 10, 'best_k': 1 } es = ncs.NCS(__C.parameters) print('***************NCS initialization***************') tmp_x_ = np.array(crates_list) tmp_input_x = tmp_crates for _ii in range(len(tmp_ind)): tmp_x_[layer_inds[tmp_ind[_ii]]] = tmp_input_x[_ii] set_solutions([tmp_x_], send_list) _, tmp_fit = get_all(len([tmp_x_])) print('all fitness gotten.') es.set_initFitness(es.popsize * tmp_fit) print('fit:{}'.format(tmp_fit)) print('***************NCS initialization***************') count = 0 while not es.stop(): print("now in the es loop.") count += 1 if count == 15: break x = es.ask() X = [] for x_ in x: tmp_x_ = np.array(crates_list) for _ii in range(len(tmp_ind)): tmp_x_[layer_inds[tmp_ind[_ii]]] = x_[_ii] X.append(tmp_x_) set_solutions(X) X_arrange, fit = get_all(len(X)) X = [] for x_ in X_arrange: tmp_x_ = np.array(len(tmp_ind) * [0.]) for _ii in range(len(tmp_ind)): tmp_x_[_ii] = x_[layer_inds[tmp_ind[_ii]]] X.append(tmp_x_) es.tell(X, fit) for _ii in range(len(tmp_ind)): crates_list[layer_inds[tmp_ind[_ii]]] = es.result()[0][_ii] for c_i in range(len(crates_list)): crates[layer_name[c_i]] = crates_list[c_i] #es_cache[itr]={'compression':-es.result()[1], 'crates':crates_list[:]} _tmp_c = np.array(len(crates_list) * [-1.]) for t_name in tmp_ind: _tmp_c[layer_inds[t_name]] = crates[t_name] np.save('crates_list.npy', crates_list) hdfs_set_file('./', '/shared/work/', 'crates_list.npy') os.remove('crates_list.npy')
def NCS_MP_trans(crates, ncs_stepsize, references, vali_data, vali_raw_data, ref_net, ref_model_dicts, sorted_weights, param_name, trans_opt, trans_opt_dummy, acc_constraint, num_runs=0): total_time = 0 itr_count = 0 popsize = 10 __C = edict() __C.parameters = { 'reset_xl_to_pop': False, 'init_value': crates, 'stepsize': ncs_stepsize, 'bounds': crates, 'ftarget': 0, 'tmax': 400, 'popsize': popsize, 'best_k': 1 } es = ncs.NCS(__C.parameters) start_t = time.time() print('***************NCS initialization***************') tmp_fit = evaluate_trans(ref_net, references, vali_data, vali_raw_data) es.set_initFitness( es.popsize * [sum(crates) + len(crates) ]) # assume the inital crates store the size of each tensor #tmp_fit = torch.FloatTensor([0,0,0]) end_t = time.time() total_time = (end_t - start_t) print('fit:{}'.format(tmp_fit)) print('time {}min elapse'.format(total_time / 60.)) print('***************NCS initialization***************') while not es.stop(): start_t = time.time() X = es.ask() processes = [] results = {} for ind_i in range(popsize): results[ind_i] = torch.FloatTensor([0.0, 0., 0.]) for rank in range(popsize): tmp_ref_model_dict = ref_model_dicts[0] tmp_sorted_weights = sorted_weights[0] if rank >= 5 and rank < popsize: # split tasks to different GPUs tmp_ref_model_dict = ref_model_dicts[1] tmp_sorted_weights = sorted_weights[1] p = Process(target=init_processes_trans, args=(rank, popsize, param_name, references, X, prune_and_eval_trans, tmp_ref_model_dict, tmp_sorted_weights, trans_opt, trans_opt_dummy, results)) p.start() processes.append(p) for p in processes: p.join() fit = [] for i in range(len(X)): remain_num = sum(X[i]) for j in range(len(results)): # results of fitness evaluation if int(results[j] [2]) == i: # 0:ppl, 1:acc, 2:rank of individual if tmp_fit[1] - results[j][1] > acc_constraint: remain_num = np.inf fit.append(remain_num) #print X,fit es.tell(X, fit) es.disp(100) end_t = time.time() itr_count += 1 itr_time = end_t - start_t total_time += itr_time print('total time {}min elapse, itr#{} cost {} min'.format( total_time / 60., itr_count, itr_time / 60.)) pruned_model = apply_prune(ref_net, ref_model_dicts[1], sorted_weights[1], param_name, es.result()[0][0]) best_prune = evaluate(pruned_model, valid, fields, opt) print('Accuracy:{}=>{}, ppl:{}=>{}'.format(tmp_fit[1], best_prune[1], tmp_fit[0], best_prune[0])) saved_model_name = 'the_pruned_deen_model_%s.pt' % num_runs torch.save(pruned_model, saved_model_name) return es.result(), saved_model_name
def NCS_MP(crates, ncs_stepsize, checkpoint, fields, opt, ref_net, ref_model_dicts, sorted_weights, param_name, train, valid, acc_constraint): popsize = 4 __C = edict() __C.parameters = { 'reset_xl_to_pop': False, 'init_value': crates, 'stepsize': ncs_stepsize, 'bounds': crates, 'ftarget': 0, 'tmax': 100, 'popsize': popsize, 'best_k': 1 } es = ncs.NCS(__C.parameters) print('***************NCS initialization***************') tmp_fit = evaluate(ref_net, valid, fields, opt) es.set_initFitness( es.popsize * [sum(crates) + len(crates) ]) # assume the inital crates store the size of each tensor print('fit:{}'.format(tmp_fit)) print('***************NCS initialization***************') while not es.stop(): X = es.ask() processes = [] results = {} for ind_i in range(popsize): results[ind_i] = torch.FloatTensor([0.0, 0., 0.]) for rank in range(popsize): tmp_ref_model_dict = ref_model_dicts[0] tmp_sorted_weights = sorted_weights[0] if rank >= 2 and rank < 4: # split tasks to different GPUs tmp_ref_model_dict = ref_model_dicts[1] tmp_sorted_weights = sorted_weights[1] p = Process(target=init_processes, args=(rank, popsize, param_name, X, prune_and_eval, tmp_ref_model_dict, tmp_sorted_weights, results)) p.start() processes.append(p) for p in processes: p.join() fit = [] for i in range(len(X)): remain_num = sum(X[i]) for j in range(len(results)): # results of fitness evaluation if int(results[j] [2]) == i: # 0:ppl, 1:acc, 2:rank of individual if tmp_fit[1] - results[j][1] > acc_constraint: remain_num = np.inf fit.append(remain_num) #print X,fit es.tell(X, fit) es.disp(100) pruned_model = apply_prune(ref_net, ref_model_dicts[1], sorted_weights[1], param_name, es.result()[0][0]) best_prune = evaluate(pruned_model, valid, fields, opt) print('Accuracy:{}=>{}, ppl:{}=>{}'.format(tmp_fit[1], best_prune[1], tmp_fit[0], best_prune[0])) return es.result()