Example #1
0
def _subp_confirm_train(eva, network_item, pre_blk, gpuq):
    ngpu = gpuq.get()
    os.environ['CUDA_VISIBLE_DEVICES'] = str(ngpu)
    _epoch_ctrl(eva, stage="confirm")
    score = eva.evaluate(network_item,
                         pre_blk,
                         is_bestNN=True,
                         update_pre_weight=True)
    gpuq.put(ngpu)
    return score
Example #2
0
def _confirm_train(eva, com, best_nn, best_index, ds, process_pl):
    NAS_LOG << "confirm_train"
    tmp = best_nn.item_list[best_index]
    network_item = NetworkItem(
        len(best_nn.item_list) + 1, tmp.graph, tmp.cell_list, tmp.code)
    ds.control(stage="confirm")
    _epoch_ctrl(eva, stage="confirm")
    score = process_pl.apply(
        _subp_confirm_train,
        (eva, network_item, Network.pre_block, com.idle_gpuq))
    network_item.score = score
    best_nn.item_list.append(network_item)
    return network_item
Example #3
0
def _game(eva, net_pool, ds, round):
    time_cnt = TimeCnt()
    start_round = time_cnt.start()
    block_id = len(net_pool[0].pre_block)
    NAS_LOG << ('nas_round_start', block_id + 1, round, start_round)
    cur_data_size = ds.control(stage="game")
    cur_epoch = _epoch_ctrl(eva, stage="game")
    if round > 1:
        round_template = copy.deepcopy(
            Stage_Info['blk_info'][block_id]['round_info'][0])
        Stage_Info['blk_info'][block_id]['round_info'].append(round_template)
    Stage_Info['blk_info'][block_id]['round_info'][-1][
        'round_start'] = start_round
    Stage_Info['blk_info'][block_id]['round_info'][-1][
        'round_data_size'] = cur_data_size
    Stage_Info['blk_info'][block_id]['search_epoch'] = cur_epoch

    if round > 1:
        _sample(net_pool)
    task_list = _spl_info_to_tasks(net_pool, round, cur_epoch, cur_data_size)
    result = _eva_net(task_list, eva)
    _record_result(net_pool, result)
    _eliminate(net_pool, round)
    _update(net_pool)

    end_round = time_cnt.stop()
    NAS_LOG << ('nas_round_over', end_round)
    Stage_Info['blk_info'][block_id]['round_info'][-1][
        'round_cost'] = end_round
Example #4
0
def _train_winner(eva, net_pl, com, ds, pro_pl, round):
    """

    Args:
        net_pool: list of NetworkUnit, and its length equals to 1
        round: the round number of game
    Returns:
        best_nn: object of Class NetworkUnit
    """
    NAS_LOG << "config_ops_ing"
    start_train_winner = time.time()
    ds.control(stage="game")
    _epoch_ctrl(eva, stage="game")

    if MAIN_CONFIG['pattern'] == "Block":
        _assign_task(net_pl,
                     com,
                     round,
                     batch_num=MAIN_CONFIG['num_gpu'],
                     block_winner=True)
        com.net_pool = net_pl
        com.tw_count = NAS_CONFIG['nas_main']['num_opt_best'] - NAS_CONFIG[
            'nas_main']['num_gpu']
        _do_task(pro_pl, com, eva)
        #_arrange_result(com, net_pl)
    elif MAIN_CONFIG['pattern'] == "Global":
        _global_train(net_pl, com, pro_pl, eva)
    best_nn = net_pl[0]
    _save_net_info(best_nn,
                   len(Network.pre_block) + 1, round, len(net_pl), best_nn.id,
                   len(best_nn.item_list))
    scores = [
        x.score for x in best_nn.item_list[-MAIN_CONFIG['num_opt_best']:]
    ]
    best_index = scores.index(max(scores)) - len(scores)
    if MAIN_CONFIG['pattern'] == "Block":
        network_item = _confirm_train(eva, com, best_nn, best_index, ds,
                                      pro_pl)
        _rm_other_model(network_item)
    else:
        network_item = best_nn.item_list[best_index]

    NAS_LOG << ("train_winner_tem", time.time() - start_train_winner)
    return network_item
Example #5
0
def _retrain(eva, ds):
    time_cnt = TimeCnt()
    start_time = time_cnt.start()
    NAS_LOG << ('nas_retrain', start_time)
    cur_epoch = _epoch_ctrl(stage="retrain")
    cur_data_size = ds.control(stage="retrain")
    task_item = EvaScheduleItem(nn_id=-1, alig_id=-1, graph_template=[], item=None,\
                pre_blk=Network.pre_block, ft_sign=True, bestNN=True, rd=0, nn_left=-1,\
                spl_batch_num=-1, epoch=cur_epoch, data_size=cur_data_size)
    task_list = [task_item]
    TSche.load_tasks(task_list)
    TSche.exec_task(_subproc_eva, eva)
    result = TSche.get_result()
    retrain_score = result[0].score
    retrain_end = time_cnt.stop()
    NAS_LOG << ('nas_retrain_end', retrain_end, retrain_score)
    Stage_Info['retrain_start'] = start_time
    Stage_Info['retrain_cost'] = retrain_end
    Stage_Info['retrain_epoch'] = cur_epoch
    Stage_Info['retrain_data_size'] = cur_data_size
Example #6
0
def _confirm_train(eva, best_nn, best_index, ds):
    time_cnt = TimeCnt()
    start_confirm = time_cnt.start()
    pre_blk = best_nn.pre_block
    blk_id = len(pre_blk)
    NAS_LOG << ("nas_confirm_train", blk_id + 1, start_confirm)
    cur_data_size = ds.control(stage="confirm")
    cur_epoch = _epoch_ctrl(eva, stage="confirm")
    Stage_Info['blk_info'][blk_id]['confirm_train_start'] = start_confirm
    Stage_Info['blk_info'][blk_id]['confirm_epoch'] = cur_epoch
    Stage_Info['blk_info'][blk_id]['confirm_data_size'] = cur_data_size

    nn_id = best_nn.id
    alig_id = 0
    graph_template = best_nn.graph_template
    item = best_nn.item_list[best_index]
    network_item = NetworkItem(len(best_nn.item_list), item.graph,
                               item.cell_list, item.code)
    task_list = [EvaScheduleItem(nn_id, alig_id, graph_template, network_item,\
                 pre_blk, ft_sign=True, bestNN=True, rd=-1, nn_left=0, spl_batch_num=1,\
                epoch=cur_epoch, data_size=cur_data_size)]
    if MAIN_CONFIG['subp_eva_debug']:
        result = []
        for task_item in task_list:
            task_item = _subproc_eva(task_item, None, None, eva)
            result.append(task_item)
    else:
        TSche.load_tasks(task_list)
        TSche.exec_task(_subproc_eva, eva)
        result = TSche.get_result()
    network_item.score = result[0].score
    network_item.task_info = result[0]
    best_nn.item_list.append(network_item)

    end_confirm = time_cnt.stop()
    NAS_LOG << ("nas_confirm_train_fin", end_confirm)
    Stage_Info['blk_info'][blk_id]['confirm_trian_cost'] = end_confirm
    return network_item
Example #7
0
def _retrain(eva, com, process_pool):
    _epoch_ctrl(eva, stage="retrain")
    score = process_pool.apply(_subp_retrain,
                               (eva, Network.pre_block, com.idle_gpuq))
    return score
Example #8
0
def _game(eva, net_pool, com, ds, round, process_pool):
    _assign_task(net_pool, com, round)
    ds.control(stage="game")
    _epoch_ctrl(eva, stage="game")
    _do_task(process_pool, com, eva)
    _arrange_result(com, net_pool)
Example #9
0
def _train_winner(eva, net_pl, ds, round, spl_num=MAIN_CONFIG['num_opt_best']):
    """

    Args:
        net_pool: list of NetworkUnit, and its length equals to 1
        round: the round number of game
    Returns:
        best_nn: object of Class NetworkUnit
    """
    time_cnt = TimeCnt()
    start_train_winner = time_cnt.start()
    blk_id = len(net_pl[0].pre_block)
    NAS_LOG << ("nas_train_winner_start", blk_id + 1, round,
                start_train_winner)
    cur_data_size = ds.control(stage="game")
    cur_epoch = _epoch_ctrl(eva, stage="game")
    Stage_Info["blk_info"][blk_id]["train_winner_start"] = start_train_winner
    Stage_Info["blk_info"][blk_id]["train_winner_data_size"] = cur_data_size

    i = 0
    initial = True
    while i < spl_num:
        if initial:
            batch_num = MAIN_CONFIG['num_gpu']
            _sample(net_pl, batch_num=batch_num)
            task_list = _spl_info_to_tasks(net_pl,
                                           round,
                                           cur_epoch,
                                           cur_data_size,
                                           batch_num=batch_num)
            result = _eva_net(task_list, eva, async_exec=True)
            newly_added_id = _record_result(net_pl, result)
            initial = False
            i += batch_num
        else:
            newly_num = len(newly_added_id)
            newly_num = newly_num if i + newly_num < spl_num else spl_num - i
            _update(net_pl, newly_added_id=newly_added_id)
            base_alig_id = [idx[0] for idx in newly_added_id]
            base_item_id = _sample(net_pl,
                                   batch_num=1,
                                   base_alig_id=base_alig_id)
            async_exec = True if i + newly_num < spl_num else False
            task_list = _spl_info_to_tasks(net_pl,
                                           round,
                                           cur_epoch,
                                           cur_data_size,
                                           base_item_id=base_item_id)
            result = _eva_net(task_list, eva, async_exec=async_exec)
            newly_added_id = _record_result(net_pl, result)
            i += newly_num

    best_nn = net_pl.pop(0)

    scores = [x.score for x in best_nn.item_list[-spl_num:]]
    best_index = scores.index(max(scores)) - len(scores)
    network_item = _confirm_train(eva, best_nn, best_index, ds)
    _save_net_info(best_nn, round, len(net_pl))

    trian_winner_end = time_cnt.stop()
    NAS_LOG << ("nas_train_winner_tem", trian_winner_end)
    Stage_Info["blk_info"][blk_id]["train_winner_cost"] = trian_winner_end
    return network_item