def NCS_MP(crates,
           ncs_stepsize,
           masked_models,
           valid,
           corpus,
           acc_constraint,
           orig_fitvalue,
           num_runs=0):
    total_time = 0
    total_iteration = 100
    itr_count = 0
    popsize = len(other_GPU_IDs) + 1
    __C = edict()
    __C.parameters = {
        'reset_xl_to_pop': False,
        'init_value': crates,
        'stepsize': ncs_stepsize,
        'bounds': [0.1, 0.99999999],
        'ftarget': 0,
        'tmax': total_iteration * popsize,
        'popsize': popsize,
        'best_k': 1
    }
    es = ncs.NCS(__C.parameters)

    start_t = time.time()

    print('***************NCS initialization***************')
    ref_net = masked_models[0]
    # 0.0 represents no parameters have been pruned, so it's original fitness
    ref_net.change_mask(len(crates) * [0.0], apply_MP_on_mask)
    ref_net.apply_mask()
    start_fit = evaluate_lm(ref_net.masked_model, valid, corpus,
                            TEST_BATCH_SIZE)
    orignal_fit = orig_fitvalue
    print('start fit: {}'.format(start_fit))
    print('orig fit: {}'.format(orignal_fit))

    ref_net = masked_models[0]
    ref_net.change_mask(crates, apply_MP_on_mask)
    ref_net.apply_mask()
    tmp_fit = evaluate_lm(ref_net.masked_model, valid, corpus, TEST_BATCH_SIZE)
    print("start init threshold:", crates)
    print('Start sparsity: {}%'.format(ref_net.get_sparsity() * 100))
    es.set_initFitness(
        es.popsize *
        [ref_net.get_sparsity()
         ])  # assume the inital crates store the size of each tensor
    #es.ask()
    #tmp_fit = torch.FloatTensor([0,0,0])

    end_t = time.time()
    total_time = (end_t - start_t)

    print('fit:{}'.format(tmp_fit))
    print('time {}min elapse'.format(total_time / 60.))
    print('***************NCS initialization***************')

    ref_net.clear_cache()
    processes = []
    results = {'result_NCS': torch.FloatTensor(crates)}
    results['result_NCS'].share_memory_()

    # paralell individuals
    for rank in range(popsize):
        p = Process(target=init_processes,
                    args=(rank, popsize, orignal_fit, acc_constraint,
                          prune_and_eval, valid, corpus, es, masked_models,
                          num_runs, results))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()

    ref_net.change_mask(results['result_NCS'].numpy(), apply_MP_on_mask)
    ref_net.apply_mask()
    best_prune = evaluate_lm(ref_net.masked_model, valid, corpus,
                             TEST_BATCH_SIZE)
    print('Accuracy:{}=>{}, ppl:{}=>{}, sparsity: {}%'.format(
        orignal_fit[1], best_prune[1], orignal_fit[0], best_prune[0],
        ref_net.get_sparsity() * 100.))

    logger.scalar_summary('ncs_start_acc', tmp_fit[1], num_runs)
    logger.scalar_summary('ncs_start_ppl', tmp_fit[0], num_runs)
    logger.scalar_summary('ncs_best_acc', best_prune[1], num_runs)
    logger.scalar_summary('ncs_best_ppl', best_prune[0], num_runs)
    if True:
        saved_model_name = 'ncs_pruned_model_%s_iteration%s_%s_%s_acc_cons_%s.pt' % (
            name_mark, num_runs, Model_type, layer_group_type,
            str(acc_constraint))
        torch.save(ref_net, cfg.LM_MODEL_TMP_FOLDER + saved_model_name)

    return results['result_NCS'].numpy(), saved_model_name, ref_net
def NCS_MP(crates,
           ncs_stepsize,
           fields,
           masked_models,
           valid,
           acc_constraint,
           num_runs=0,
           checkpoint=None):
    total_time = 0
    total_iteration = 100
    itr_count = 0
    popsize = len(other_GPU_IDs) + 1
    __C = edict()
    __C.parameters = {
        'reset_xl_to_pop': False,
        'init_value': crates,
        'stepsize': ncs_stepsize,
        'bounds': [0., 0.95],
        'ftarget': 0,
        'tmax': total_iteration * popsize,
        'popsize': popsize,
        'best_k': 1
    }
    es = ncs.NCS(__C.parameters)

    start_t = time.time()

    print('***************NCS initialization***************')
    ref_net = masked_models[0]
    # 0.0 represents no parameters have been pruned, so it's original fitness
    ref_net.change_mask(len(crates) * [0.0], apply_MP_on_mask)
    ref_net.apply_mask()
    orignal_fit = evaluate(ref_net, valid, fields)
    print('original fit: {}'.format(orignal_fit))

    ref_net = masked_models[0]
    ref_net.change_mask(crates, apply_MP_on_mask)
    ref_net.apply_mask()
    tmp_fit = evaluate(ref_net, valid, fields)
    print('Start sparsity: {}%'.format(ref_net.get_sparsity() * 100))
    es.set_initFitness(
        es.popsize *
        [ref_net.get_sparsity()
         ])  # assume the inital crates store the size of each tensor
    #es.ask()
    #tmp_fit = torch.FloatTensor([0,0,0])

    end_t = time.time()
    total_time = (end_t - start_t)

    print('fit:{}'.format(tmp_fit))
    print('time {}min elapse'.format(total_time / 60.))
    print('***************NCS initialization***************')

    ref_net.clear_cache()
    valid.fields = []  # clear fields for send valid among thresholds
    processes = []
    results = {'result_NCS': torch.FloatTensor(crates)}
    results['result_NCS'].share_memory_()

    # paralell individuals
    for rank in range(popsize):
        p = Process(target=init_processes,
                    args=(rank, popsize, orignal_fit, acc_constraint,
                          prune_and_eval, valid, es, masked_models, num_runs,
                          results))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()

    valid.fields = fields
    ref_net.change_mask(results['result_NCS'].numpy(), apply_MP_on_mask)
    ref_net.apply_mask()
    best_prune = evaluate(ref_net, valid, fields)
    print('Accuracy:{}=>{}, ppl:{}=>{}, sparsity: {}%'.format(
        orignal_fit[1], best_prune[1], orignal_fit[0], best_prune[0],
        ref_net.get_sparsity() * 100.))

    logger.scalar_summary('ncs_start_acc', tmp_fit[1], num_runs)
    logger.scalar_summary('ncs_start_ppl', tmp_fit[0], num_runs)
    logger.scalar_summary('ncs_best_acc', best_prune[1], num_runs)
    logger.scalar_summary('ncs_best_ppl', best_prune[0], num_runs)
    if checkpoint is not None:
        real_model = (ref_net.masked_model.module if isinstance(
            ref_net.masked_model, nn.DataParallel) else ref_net.masked_model)
        real_generator = (real_model.generator.module if isinstance(
            real_model.generator, nn.DataParallel) else real_model.generator)
        model_state_dict = real_model.state_dict()
        model_state_dict = {
            k: v
            for k, v in model_state_dict.items() if 'generator' not in k
        }
        generator_state_dict = real_generator.state_dict()
        checkpoint['model'] = model_state_dict
        checkpoint['generator'] = generator_state_dict
        saved_model_name = 'ncs_pruned_model_%s_iteration%s_%s_%s_acc_cons_%s.pt' % (
            name_mark, num_runs, Model_type, layer_group_type,
            str(acc_constraint))
        torch.save(checkpoint, SAVE_MODEL_TMP_FOLDER + saved_model_name)

    return results['result_NCS'].numpy(
    ), saved_model_name, ref_net.masked_model
Example #3
0
def ncs_loop(tmp_crates, tmp_ind, the_input_batch, send_list, wait_list):

    __C = edict()
    __C.parameters = {
        'reset_xl_to_pop': False,
        'init_value': tmp_crates,
        'stepsize': ncs_stepsize,
        'bounds': [0.0, 10.],
        'ftarget': 0,
        'tmax': 1600,
        'popsize': 10,
        'best_k': 1
    }

    es = ncs.NCS(__C.parameters)
    print('***************NCS initialization***************')
    tmp_x_ = np.array(crates_list)
    tmp_input_x = tmp_crates
    for _ii in range(len(tmp_ind)):
        tmp_x_[layer_inds[tmp_ind[_ii]]] = tmp_input_x[_ii]

    set_solutions([tmp_x_], send_list)
    _, tmp_fit = get_all(len([tmp_x_]), wait_list)
    print('all fitness gotten.')

    es.set_initFitness(es.popsize * tmp_fit)
    print('fit:{}'.format(tmp_fit))
    print('***************NCS initialization***************')

    count = 0
    while not es.stop():
        print("now in the es loop.")
        count += 1
        if count == 15:
            break
        x = es.ask()
        X = []
        for x_ in x:
            tmp_x_ = np.array(crates_list)
            for _ii in range(len(tmp_ind)):
                tmp_x_[layer_inds[tmp_ind[_ii]]] = x_[_ii]
            X.append(tmp_x_)
        set_solutions(X, send_list)
        X_arrange, fit = get_all(len(X), wait_list)
        X = []
        for x_ in X_arrange:
            tmp_x_ = np.array(len(tmp_ind) * [0.])
            for _ii in range(len(tmp_ind)):
                tmp_x_[_ii] = x_[layer_inds[tmp_ind[_ii]]]
            X.append(tmp_x_)
        es.tell(X, fit)
        for _ii in range(len(tmp_ind)):
            crates_list[layer_inds[tmp_ind[_ii]]] = es.result()[0][_ii]
    for c_i in range(len(crates_list)):
        crates[layer_name[c_i]] = crates_list[c_i]
    #es_cache[itr]={'compression':-es.result()[1], 'crates':crates_list[:]}
    _tmp_c = np.array(len(crates_list) * [-1.])
    for t_name in tmp_ind:
        _tmp_c[layer_inds[t_name]] = crates[t_name]

    msg = message(35, crates_list).msg_encode()
    send_list.put(msg)
Example #4
0
            #   r = ncs_para["r"]
            #   epoch = ncs_para["epoch"]
            __C.parameters = {
                'reset_xl_to_pop': False,
                'init_value': tmp_crates,
                'stepsize': ncs_stepsize,
                'bounds': [0.0, 10.],
                'ftarget': 0,
                'tmax': 1600,
                'popsize': ncs_para["n"],
                'best_k': 1,
                'epoch': ncs_para["epoch"],
                'lambda_': ncs_para["lambda"],
                'r': ncs_para["r"]
            }
            es = ncs.NCS(__C.parameters)
            #   print '***************NCS initialization***************'
            tmp_x_ = np.array(crates_list)
            tmp_input_x = tmp_crates
            for _ii in range(len(tmp_ind)):
                tmp_x_[layer_inds[tmp_ind[_ii]]] = tmp_input_x[_ii]
            _, tmp_fit = evaluate(solver.net, [tmp_x_], 1, accuracy_)
            es.set_initFitness(es.popsize * tmp_fit)
            print 'fit:{}'.format(tmp_fit)
        #   print '***************NCS initialization***************'

        # while not es.stop():
        if not es.stop():
            x = es.ask()
            X = []
            for x_ in x:
Example #5
0
def NCSloop(tmp_crates, tmp_ind, accuracy_):
    '''
    This loop will get the parameters in LoopTest1, and use them to start a ncs loop.
    The result will contained in a file named 'crates_list.npy' in work path.
    The LoopTest1.py will use this file to apply prune the solver net.
    :param tmp_crates:
    :param tmp_ind:
    :param accuracy_: in accuracy.npy file
    :return: create crates_list.npy
    '''
    f = wait_file('./work/', 'data.npy')
    the_input_batch = np.load(f)
    # the_input_batch=hdfs_load('/shared/work/','data.npy')
    es = {}

    if es_method == 'ncs':
        __C = edict()
        __C.parameters = {
            'reset_xl_to_pop': False,
            'init_value': tmp_crates,
            'stepsize': ncs_stepsize,
            'bounds': [0.0, 10.],
            'ftarget': 0,
            'tmax': 1600,
            'popsize': 10,
            'best_k': 1
        }
        es = ncs.NCS(__C.parameters)
        print('***************NCS initialization***************')
        tmp_x_ = np.array(crates_list)
        tmp_input_x = tmp_crates
        for _ii in range(len(tmp_ind)):
            tmp_x_[layer_inds[tmp_ind[_ii]]] = tmp_input_x[_ii]

        set_solutions([tmp_x_], send_list)
        _, tmp_fit = get_all(len([tmp_x_]))
        print('all fitness gotten.')

        es.set_initFitness(es.popsize * tmp_fit)
        print('fit:{}'.format(tmp_fit))
        print('***************NCS initialization***************')
    count = 0
    while not es.stop():
        print("now in the es loop.")
        count += 1
        if count == 15:
            break
        x = es.ask()
        X = []
        for x_ in x:
            tmp_x_ = np.array(crates_list)
            for _ii in range(len(tmp_ind)):
                tmp_x_[layer_inds[tmp_ind[_ii]]] = x_[_ii]
            X.append(tmp_x_)
        set_solutions(X)
        X_arrange, fit = get_all(len(X))
        X = []
        for x_ in X_arrange:
            tmp_x_ = np.array(len(tmp_ind) * [0.])
            for _ii in range(len(tmp_ind)):
                tmp_x_[_ii] = x_[layer_inds[tmp_ind[_ii]]]
            X.append(tmp_x_)
        es.tell(X, fit)
        for _ii in range(len(tmp_ind)):
            crates_list[layer_inds[tmp_ind[_ii]]] = es.result()[0][_ii]
    for c_i in range(len(crates_list)):
        crates[layer_name[c_i]] = crates_list[c_i]
    #es_cache[itr]={'compression':-es.result()[1], 'crates':crates_list[:]}
    _tmp_c = np.array(len(crates_list) * [-1.])
    for t_name in tmp_ind:
        _tmp_c[layer_inds[t_name]] = crates[t_name]
    np.save('crates_list.npy', crates_list)
    hdfs_set_file('./', '/shared/work/', 'crates_list.npy')
    os.remove('crates_list.npy')
Example #6
0
def NCS_MP_trans(crates,
                 ncs_stepsize,
                 references,
                 vali_data,
                 vali_raw_data,
                 ref_net,
                 ref_model_dicts,
                 sorted_weights,
                 param_name,
                 trans_opt,
                 trans_opt_dummy,
                 acc_constraint,
                 num_runs=0):
    total_time = 0
    itr_count = 0
    popsize = 10
    __C = edict()
    __C.parameters = {
        'reset_xl_to_pop': False,
        'init_value': crates,
        'stepsize': ncs_stepsize,
        'bounds': crates,
        'ftarget': 0,
        'tmax': 400,
        'popsize': popsize,
        'best_k': 1
    }
    es = ncs.NCS(__C.parameters)

    start_t = time.time()

    print('***************NCS initialization***************')
    tmp_fit = evaluate_trans(ref_net, references, vali_data, vali_raw_data)
    es.set_initFitness(
        es.popsize *
        [sum(crates) + len(crates)
         ])  # assume the inital crates store the size of each tensor
    #tmp_fit = torch.FloatTensor([0,0,0])

    end_t = time.time()
    total_time = (end_t - start_t)

    print('fit:{}'.format(tmp_fit))
    print('time {}min elapse'.format(total_time / 60.))
    print('***************NCS initialization***************')

    while not es.stop():
        start_t = time.time()

        X = es.ask()

        processes = []
        results = {}
        for ind_i in range(popsize):
            results[ind_i] = torch.FloatTensor([0.0, 0., 0.])

        for rank in range(popsize):
            tmp_ref_model_dict = ref_model_dicts[0]
            tmp_sorted_weights = sorted_weights[0]
            if rank >= 5 and rank < popsize:  # split tasks to different GPUs
                tmp_ref_model_dict = ref_model_dicts[1]
                tmp_sorted_weights = sorted_weights[1]
            p = Process(target=init_processes_trans,
                        args=(rank, popsize, param_name, references, X,
                              prune_and_eval_trans, tmp_ref_model_dict,
                              tmp_sorted_weights, trans_opt, trans_opt_dummy,
                              results))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()

        fit = []
        for i in range(len(X)):
            remain_num = sum(X[i])
            for j in range(len(results)):  # results of fitness evaluation
                if int(results[j]
                       [2]) == i:  # 0:ppl, 1:acc, 2:rank of individual
                    if tmp_fit[1] - results[j][1] > acc_constraint:
                        remain_num = np.inf
            fit.append(remain_num)
        #print X,fit
        es.tell(X, fit)
        es.disp(100)

        end_t = time.time()
        itr_count += 1
        itr_time = end_t - start_t
        total_time += itr_time
        print('total time {}min elapse, itr#{} cost {} min'.format(
            total_time / 60., itr_count, itr_time / 60.))

    pruned_model = apply_prune(ref_net, ref_model_dicts[1], sorted_weights[1],
                               param_name,
                               es.result()[0][0])
    best_prune = evaluate(pruned_model, valid, fields, opt)
    print('Accuracy:{}=>{}, ppl:{}=>{}'.format(tmp_fit[1], best_prune[1],
                                               tmp_fit[0], best_prune[0]))
    saved_model_name = 'the_pruned_deen_model_%s.pt' % num_runs
    torch.save(pruned_model, saved_model_name)
    return es.result(), saved_model_name
Example #7
0
def NCS_MP(crates, ncs_stepsize, checkpoint, fields, opt, ref_net,
           ref_model_dicts, sorted_weights, param_name, train, valid,
           acc_constraint):
    popsize = 4
    __C = edict()
    __C.parameters = {
        'reset_xl_to_pop': False,
        'init_value': crates,
        'stepsize': ncs_stepsize,
        'bounds': crates,
        'ftarget': 0,
        'tmax': 100,
        'popsize': popsize,
        'best_k': 1
    }
    es = ncs.NCS(__C.parameters)
    print('***************NCS initialization***************')
    tmp_fit = evaluate(ref_net, valid, fields, opt)
    es.set_initFitness(
        es.popsize *
        [sum(crates) + len(crates)
         ])  # assume the inital crates store the size of each tensor
    print('fit:{}'.format(tmp_fit))
    print('***************NCS initialization***************')
    while not es.stop():
        X = es.ask()

        processes = []
        results = {}
        for ind_i in range(popsize):
            results[ind_i] = torch.FloatTensor([0.0, 0., 0.])

        for rank in range(popsize):
            tmp_ref_model_dict = ref_model_dicts[0]
            tmp_sorted_weights = sorted_weights[0]
            if rank >= 2 and rank < 4:  # split tasks to different GPUs
                tmp_ref_model_dict = ref_model_dicts[1]
                tmp_sorted_weights = sorted_weights[1]
            p = Process(target=init_processes,
                        args=(rank, popsize, param_name, X, prune_and_eval,
                              tmp_ref_model_dict, tmp_sorted_weights, results))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()

        fit = []
        for i in range(len(X)):
            remain_num = sum(X[i])
            for j in range(len(results)):  # results of fitness evaluation
                if int(results[j]
                       [2]) == i:  # 0:ppl, 1:acc, 2:rank of individual
                    if tmp_fit[1] - results[j][1] > acc_constraint:
                        remain_num = np.inf
            fit.append(remain_num)
        #print X,fit
        es.tell(X, fit)
        es.disp(100)

    pruned_model = apply_prune(ref_net, ref_model_dicts[1], sorted_weights[1],
                               param_name,
                               es.result()[0][0])
    best_prune = evaluate(pruned_model, valid, fields, opt)
    print('Accuracy:{}=>{}, ppl:{}=>{}'.format(tmp_fit[1], best_prune[1],
                                               tmp_fit[0], best_prune[0]))
    return es.result()