Ejemplo n.º 1
0
def _game(eva, net_pool, ds, round):
    time_cnt = TimeCnt()
    start_round = time_cnt.start()
    block_id = len(net_pool[0].pre_block)
    NAS_LOG << ('nas_round_start', block_id + 1, round, start_round)
    cur_data_size = ds.control(stage="game")
    cur_epoch = _epoch_ctrl(eva, stage="game")
    if round > 1:
        round_template = copy.deepcopy(
            Stage_Info['blk_info'][block_id]['round_info'][0])
        Stage_Info['blk_info'][block_id]['round_info'].append(round_template)
    Stage_Info['blk_info'][block_id]['round_info'][-1][
        'round_start'] = start_round
    Stage_Info['blk_info'][block_id]['round_info'][-1][
        'round_data_size'] = cur_data_size
    Stage_Info['blk_info'][block_id]['search_epoch'] = cur_epoch

    if round > 1:
        _sample(net_pool)
    task_list = _spl_info_to_tasks(net_pool, round, cur_epoch, cur_data_size)
    result = _eva_net(task_list, eva)
    _record_result(net_pool, result)
    _eliminate(net_pool, round)
    _update(net_pool)

    end_round = time_cnt.stop()
    NAS_LOG << ('nas_round_over', end_round)
    Stage_Info['blk_info'][block_id]['round_info'][-1][
        'round_cost'] = end_round
Ejemplo n.º 2
0
def _subproc_eva(task_item, result_buffer, signal, eva):
    NAS_LOG = Logger()
    task_item.pid = os.getpid()
    time_cnt = TimeCnt()
    task_item.start_time = time_cnt.start()
    NAS_LOG << ('nas_eva_ing', len(task_item.pre_block)+1,\
        task_item.round, task_item.nn_id, task_item.network_item.id)
    if MAIN_CONFIG['eva_mask']:
        task_item.score = random.uniform(0, 0.1)
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(task_item.gpu_info)
        try:
            task_item.score = eva.evaluate(task_item)
        except Exception as error:
            _err_log(NAS_LOG, task_item, error)
            task_item.score = 0

    task_item.cost_time = time_cnt.stop()
    NAS_LOG << ('nas_eva_fin', len(task_item.pre_block)+1,\
            task_item.round, task_item.nn_id, task_item.network_item.id,\
            task_item.score, task_item.cost_time, task_item.pid)
    #  use in subprocess
    if result_buffer and signal:
        result_buffer.put(task_item)
        signal.set()
    return task_item
Ejemplo n.º 3
0
    def run(self):
        NAS_LOG << 'nas_enuming'
        network_pool_tem = self.enu.enumerate()
        time_search = TimeCnt()
        start_search = time_search.start()
        NAS_LOG << ('nas_start_search', start_search)
        Stage_Info['nas_start'] = start_search
        for i in range(MAIN_CONFIG["block_num"]):
            network_item = _search_blk(i, self.eva, self.ds, network_pool_tem)
            Network.pre_block.append(network_item)
        end_search = time_search.stop()
        NAS_LOG << ('nas_search_end', end_search)
        Stage_Info['nas_cost'] = end_search
        _dump_stage(Stage_Info)

        _retrain(self.eva, self.ds)
        for block in Network.pre_block:
            NAS_LOG << ('nas_pre_block', str(block.graph), str(
                block.cell_list))
        return Network.pre_block
Ejemplo n.º 4
0
def _retrain(eva, ds):
    time_cnt = TimeCnt()
    start_time = time_cnt.start()
    NAS_LOG << ('nas_retrain', start_time)
    cur_epoch = _epoch_ctrl(stage="retrain")
    cur_data_size = ds.control(stage="retrain")
    task_item = EvaScheduleItem(nn_id=-1, alig_id=-1, graph_template=[], item=None,\
                pre_blk=Network.pre_block, ft_sign=True, bestNN=True, rd=0, nn_left=-1,\
                spl_batch_num=-1, epoch=cur_epoch, data_size=cur_data_size)
    task_list = [task_item]
    TSche.load_tasks(task_list)
    TSche.exec_task(_subproc_eva, eva)
    result = TSche.get_result()
    retrain_score = result[0].score
    retrain_end = time_cnt.stop()
    NAS_LOG << ('nas_retrain_end', retrain_end, retrain_score)
    Stage_Info['retrain_start'] = start_time
    Stage_Info['retrain_cost'] = retrain_end
    Stage_Info['retrain_epoch'] = cur_epoch
    Stage_Info['retrain_data_size'] = cur_data_size
Ejemplo n.º 5
0
def _err_log(NAS_LOG, task_item, error):
    pre_block = []
    for block in task_item.pre_block:
        pre_block.append((block.graph, block.cell_list, block.code))
    NAS_LOG << ('err_task_info', TimeCnt().start(), len(pre_block)+1, task_item.nn_id,\
                task_item.alig_id, str(pre_block), str(task_item.graph_template))
    scheme = task_item.network_item
    NAS_LOG << ('err_scheme_info', task_item.task_id, task_item.pid, task_item.start_time, \
    task_item.cost_time, task_item.gpu_info, task_item.round, task_item.nn_left, \
        task_item.spl_batch_num, str(scheme.graph), str(scheme.cell_list), \
            str(scheme.code), scheme.score)
    NAS_LOG << ('err_info', error)
Ejemplo n.º 6
0
def _confirm_train(eva, best_nn, best_index, ds):
    time_cnt = TimeCnt()
    start_confirm = time_cnt.start()
    pre_blk = best_nn.pre_block
    blk_id = len(pre_blk)
    NAS_LOG << ("nas_confirm_train", blk_id + 1, start_confirm)
    cur_data_size = ds.control(stage="confirm")
    cur_epoch = _epoch_ctrl(eva, stage="confirm")
    Stage_Info['blk_info'][blk_id]['confirm_train_start'] = start_confirm
    Stage_Info['blk_info'][blk_id]['confirm_epoch'] = cur_epoch
    Stage_Info['blk_info'][blk_id]['confirm_data_size'] = cur_data_size

    nn_id = best_nn.id
    alig_id = 0
    graph_template = best_nn.graph_template
    item = best_nn.item_list[best_index]
    network_item = NetworkItem(len(best_nn.item_list), item.graph,
                               item.cell_list, item.code)
    task_list = [EvaScheduleItem(nn_id, alig_id, graph_template, network_item,\
                 pre_blk, ft_sign=True, bestNN=True, rd=-1, nn_left=0, spl_batch_num=1,\
                epoch=cur_epoch, data_size=cur_data_size)]
    if MAIN_CONFIG['subp_eva_debug']:
        result = []
        for task_item in task_list:
            task_item = _subproc_eva(task_item, None, None, eva)
            result.append(task_item)
    else:
        TSche.load_tasks(task_list)
        TSche.exec_task(_subproc_eva, eva)
        result = TSche.get_result()
    network_item.score = result[0].score
    network_item.task_info = result[0]
    best_nn.item_list.append(network_item)

    end_confirm = time_cnt.stop()
    NAS_LOG << ("nas_confirm_train_fin", end_confirm)
    Stage_Info['blk_info'][blk_id]['confirm_trian_cost'] = end_confirm
    return network_item
Ejemplo n.º 7
0
def _search_blk(block_id, eva, ds, npool_tem):
    """evaluate all the networks asynchronously inside one round and synchronously between rounds
    :param block_id:
    :param eva:
    :param npool_tem:
    :return:
    """
    time_blk = TimeCnt()
    start_block = time_blk.start()
    NAS_LOG << ('nas_search_blk', block_id + 1, MAIN_CONFIG["block_num"],
                start_block)
    Stage_Info['blk_info'][block_id]['blk_start'] = start_block

    net_pool = copy.deepcopy(npool_tem)
    _init_npool_sampler(net_pool, block_id)
    net_pool = _init_ops(net_pool)

    round = 0
    time_game = TimeCnt()
    start_game = time_game.start()
    NAS_LOG << ('nas_rounds_game_start', block_id + 1, start_game)
    Stage_Info['blk_info'][block_id]['rounds_game_start'] = start_game

    while len(net_pool) > 1:
        round += 1
        _game(eva, net_pool, ds, round)

    game_end = time_game.stop()
    NAS_LOG << ('nas_get_winner', game_end)
    Stage_Info['blk_info'][block_id]['rounds_game_cost'] = game_end
    Stage_Info['blk_info'][block_id]['round_num'] = round

    network_item = _train_winner(eva, net_pool, ds, round + 1)

    blk_end = time_blk.stop()
    NAS_LOG << ('nas_search_blk_end', blk_end)
    Stage_Info['blk_info'][block_id]['blk_cost'] = blk_end
    return network_item
Ejemplo n.º 8
0
def _train_winner(eva, net_pl, ds, round, spl_num=MAIN_CONFIG['num_opt_best']):
    """

    Args:
        net_pool: list of NetworkUnit, and its length equals to 1
        round: the round number of game
    Returns:
        best_nn: object of Class NetworkUnit
    """
    time_cnt = TimeCnt()
    start_train_winner = time_cnt.start()
    blk_id = len(net_pl[0].pre_block)
    NAS_LOG << ("nas_train_winner_start", blk_id + 1, round,
                start_train_winner)
    cur_data_size = ds.control(stage="game")
    cur_epoch = _epoch_ctrl(eva, stage="game")
    Stage_Info["blk_info"][blk_id]["train_winner_start"] = start_train_winner
    Stage_Info["blk_info"][blk_id]["train_winner_data_size"] = cur_data_size

    i = 0
    initial = True
    while i < spl_num:
        if initial:
            batch_num = MAIN_CONFIG['num_gpu']
            _sample(net_pl, batch_num=batch_num)
            task_list = _spl_info_to_tasks(net_pl,
                                           round,
                                           cur_epoch,
                                           cur_data_size,
                                           batch_num=batch_num)
            result = _eva_net(task_list, eva, async_exec=True)
            newly_added_id = _record_result(net_pl, result)
            initial = False
            i += batch_num
        else:
            newly_num = len(newly_added_id)
            newly_num = newly_num if i + newly_num < spl_num else spl_num - i
            _update(net_pl, newly_added_id=newly_added_id)
            base_alig_id = [idx[0] for idx in newly_added_id]
            base_item_id = _sample(net_pl,
                                   batch_num=1,
                                   base_alig_id=base_alig_id)
            async_exec = True if i + newly_num < spl_num else False
            task_list = _spl_info_to_tasks(net_pl,
                                           round,
                                           cur_epoch,
                                           cur_data_size,
                                           base_item_id=base_item_id)
            result = _eva_net(task_list, eva, async_exec=async_exec)
            newly_added_id = _record_result(net_pl, result)
            i += newly_num

    best_nn = net_pl.pop(0)

    scores = [x.score for x in best_nn.item_list[-spl_num:]]
    best_index = scores.index(max(scores)) - len(scores)
    network_item = _confirm_train(eva, best_nn, best_index, ds)
    _save_net_info(best_nn, round, len(net_pl))

    trian_winner_end = time_cnt.stop()
    NAS_LOG << ("nas_train_winner_tem", trian_winner_end)
    Stage_Info["blk_info"][blk_id]["train_winner_cost"] = trian_winner_end
    return network_item