def __init__(self): self.config = SearchConfig() self.writer = None if self.config.tb_dir != "": from torch.utils.tensorboard import SummaryWriter self.writer = SummaryWriter(self.config.tb_dir, flush_secs=20) init_gpu_params(self.config) set_seed(self.config) self.logger = FileLogger('./log', self.config.is_master, self.config.is_master) self.load_data() self.logger.info(self.config) self.model = SearchCNNController(self.config, self.n_classes, self.output_mode) self.load_model() self.init_kd_component() if self.config.n_gpu > 0: self.model.to(device) if self.config.n_gpu > 1: self.model = torch.nn.parallel.DistributedDataParallel( self.model, device_ids=[self.config.local_rank], find_unused_parameters=True) self.model_to_print = self.model if self.config.multi_gpu is False else self.model.module self.architect = Architect(self.model, self.teacher_model, self.config, self.emd_tool) mb_params = param_size(self.model) self.logger.info("Model size = {:.3f} MB".format(mb_params)) self.eval_result_map = [] self.init_optim()
def run(args, myargs): my_config = getattr(myargs.config, args.command) config = SearchConfig() for k, v in args.items(): assert not hasattr(config, k) setattr(config, k, v) for k, v in my_config.items(): if not hasattr(config, k): print('* config does not have %s' % k) setattr(config, k, v) device = torch.device("cuda") writer = myargs.writer writer.add_text('all_config', config.as_markdown(), 0) logger = myargs.logger config.print_params(logger.info_msg) config.data_path = os.path.expanduser(config.data_path) config.plot_path = os.path.join(args.outdir, 'plot') config.path = args.outdir main(config=config, logger=logger, device=device, myargs=myargs)
def main(): config = SearchConfig(section='fine-tune') device = torch.device("cuda") # tensorboard writer = SummaryWriter(log_dir=os.path.join(config.path, "tb")) writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.path, "{}_tune.log".format(config.name))) config.print_params(logger.info) logger.info("Logger is set - training start") # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True # get data with meta info input_size, input_channels, n_classes, train_data, valid_data = utils.get_data( config.dataset, config.data_path, cutout_length=0, validation=True) logger.debug('loading checkpoint') best_path = os.path.join(config.path, 'best.pth.tar') model = torch.load(best_path) model.prune() model = model.to(device) # weights optimizer w_optim = torch.optim.SGD(model.weights(), config.w_lr, momentum=config.w_momentum, weight_decay=config.w_weight_decay) train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( w_optim, config.epochs, eta_min=config.w_lr_min) architect = Architect(model, config.w_momentum, config.w_weight_decay) model.print_alphas(logger) first_top1 = validate(valid_loader, model, -1, 0, device, config, logger, writer) os.system('mkdir -p ' + config.fine_tune_path) # training loop best_top1 = 0. for epoch in range(config.epochs): lr_scheduler.step() lr = lr_scheduler.get_lr()[0] model.print_alphas(logger) # training train(train_loader, model, architect, w_optim, lr, epoch, writer, device, config, logger) # validation cur_step = (epoch + 1) * len(train_loader) top1 = validate(valid_loader, model, epoch, cur_step, device, config, logger, writer) # save if best_top1 < top1: best_top1 = top1 is_best = True else: is_best = False utils.save_checkpoint(model, config.fine_tune_path, is_best) print("") logger.info("Initial best Prec@1 = {:.4%}".format(first_top1)) logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
import torch.nn.functional as F import torch.backends.cudnn as cudnn def check_error(output, k_model, input_np, epsilon=1e-5): pytorch_output = output.data.numpy() keras_output = k_model.predict(input_np) error = np.max(pytorch_output - keras_output) print('Error:', error) assert error < epsilon return error config = SearchConfig() device = torch.device("cuda") # tensorboard writer = SummaryWriter(log_dir=os.path.join(config.path, "tb")) writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.path, "{}.log".format(config.name))) config.print_params(logger.info) def main(): logger.info("Logger is set - training start")
""" Search cell """ import os import torch import torch.nn as nn import numpy as np # from tensorboardX import SummaryWriter from config import SearchConfig import utils from models.search_cnn import SearchCNNController from architect import Architect # from visualize import plot config = SearchConfig() print(config.gpus) device = torch.device("cuda") # tensorboard # writer = SummaryWriter(log_dir=os.path.join(config.path, "tb")) # writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.path, "{}.log".format(config.name))) # config.print_params(logger.info) def main(): logger.info("Logger is set - training start") # set default gpu device id torch.cuda.set_device(config.gpus[0])
def main(): config = SearchConfig() device = torch.device("cuda") # tensorboard tb_path = os.path.join(config.path, "tb") os.system('rm -r ' + tb_path) writer = SummaryWriter(log_dir=tb_path) writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.path, "{}_train.log".format(config.name))) config.print_params(logger.info) logger.info("Logger is set - training start") if int(config.profile) != 0: logger.info('entering profile mode') profile = True config.epochs = 1 max_batches = config.print_freq else: profile = False max_batches = None # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True # get data with meta info input_size, input_channels, n_classes, train_data = utils.get_data( config.dataset, config.data_path, cutout_length=0, validation=False) module_name, class_name = config.controller_class.rsplit('.', 1) controller_cls = getattr(import_module(module_name), class_name) model = controller_cls(device, **config.__dict__) model = model.to(device) # weights optimizer w_optim = torch.optim.SGD(model.weights(), config.w_lr, momentum=config.w_momentum, weight_decay=config.w_weight_decay) # alphas optimizer alpha_optim = torch.optim.Adam(model.alphas(), config.alpha_lr, betas=(0.5, 0.999), weight_decay=config.alpha_weight_decay) # split data to train/validation n_train = len(train_data) split = int(n_train * config.validate_split) indices = list(range(n_train)) if split <= 0: logger.debug('using train as validation') valid_sampler = train_sampler = torch.utils.data.sampler.SubsetRandomSampler( indices) else: train_sampler = torch.utils.data.sampler.SubsetRandomSampler( indices[:split]) valid_sampler = torch.utils.data.sampler.SubsetRandomSampler( indices[split:]) train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=train_sampler, num_workers=config.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(train_data, batch_size=config.batch_size, sampler=valid_sampler, num_workers=config.workers, pin_memory=True) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( w_optim, config.epochs, eta_min=config.w_lr_min) architect = Architect(model, config.w_momentum, config.w_weight_decay) # training loop best = 0 best_genotype = None for epoch in range(config.epochs): lr_scheduler.step() lr = lr_scheduler.get_lr()[0] model.print_alphas(logger) # training if profile: with torch.autograd.profiler.profile(use_cuda=True) as prof: train_qual = train(train_loader, valid_loader, model, architect, w_optim, alpha_optim, lr, epoch, writer, device, config, logger, max_batches=max_batches) print('cpu') print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10)) print(prof.key_averages().table(sort_by="cpu_time", row_limit=10)) print('cuda') print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10)) print(prof.key_averages().table(sort_by="cuda_time", row_limit=10)) break model.new_epoch(epoch, writer) train_qual = train(train_loader, valid_loader, model, architect, w_optim, alpha_optim, lr, epoch, writer, device, config, logger, max_batches=max_batches) # validation cur_step = (epoch + 1) * len(train_loader) val_qual = validate(valid_loader, model, epoch, cur_step, device, config, logger, writer) # log # genotype genotype = model.genotype() logger.info("genotype = {}".format(genotype)) # genotype as a image plot_path = os.path.join(config.plot_path, "EP{:02d}".format(epoch + 1)) caption = "Epoch {}".format(epoch + 1) model.plot_genotype(plot_path, caption) #plot(genotype.normal, plot_path + "-normal", caption) #plot(genotype.reduce, plot_path + "-reduce", caption) if config.use_train_quality != 0: cur_qual = train_qual else: cur_qual = val_qual # save if best < cur_qual: best = cur_qual best_genotype = genotype is_best = True else: is_best = False utils.save_checkpoint(model, config.path, is_best) logger.info("Quality{}: {} \n\n".format('*' if is_best else '', cur_qual)) logger.info("Final best = {}".format(best)) logger.info("Best Genotype = {}".format(best_genotype))
import os import pickle import sys from tensorboardX import SummaryWriter import time import torch import torch.nn as nn from config import SearchConfig from data_loader import load_dataset import genotypes as gts from search_cnn import SearchCNN import utils config = SearchConfig() config.alpha_dir = os.path.join(config.stage_dir, "alphas") os.system("mkdir -p {}".format(config.alpha_dir)) device = torch.device("cuda") # tensorboard writer = SummaryWriter(log_dir=config.log_dir) writer.add_text("config", config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.log_dir, "{}_{}.log".format( config.name, config.stage))) config.print_args(logger.info)
import os import random import time import numpy as np import torch import torch.nn as nn import utils from config import SearchConfig from datasets.mld import get_search_datasets from model import Model from cdarts import CdartsTrainer if __name__ == "__main__": config = SearchConfig() main_proc = not config.distributed or config.local_rank == 0 if config.distributed: torch.cuda.set_device(config.local_rank) torch.distributed.init_process_group(backend='nccl', init_method=config.dist_url, rank=config.local_rank, world_size=config.world_size) if main_proc: os.makedirs(config.output_path, exist_ok=True) if config.distributed: torch.distributed.barrier() logger = utils.get_logger(os.path.join(config.output_path, 'search.log')) if main_proc: config.print_params(logger.info) utils.reset_seed(config.seed)
from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import os import sys import time import torch import torch.nn as nn from config import SearchConfig from data_loader import load_dataset import utils config = SearchConfig() config.model_dir = os.path.join(config.save_dir, "augment/models") device = torch.device("cuda") logger = utils.get_logger( os.path.join(config.log_dir, "{}_{}.log".format(config.name, config.stage))) def test(data_loader, model, criterion): loss = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.eval()
def main(): # init config config = SearchConfig() # set seed if config.seed is not None: np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ') # For slurm available if config.world_size == -1 and "SLURM_NPROCS" in os.environ: # acquire world size from slurm config.world_size = int(os.environ["SLURM_NPROCS"]) config.rank = int(os.environ["SLURM_PROCID"]) jobid = os.environ["SLURM_JOBID"] hostfile = os.path.join(config.dist_path, "dist_url." + jobid + ".txt") if config.dist_file is not None: config.dist_url = "file://{}.{}".format(os.path.realpath(config.dist_file), jobid) elif config.rank == 0: if config.dist_backend == 'nccl' and config.infi_band: # only NCCL backend supports inifiniband interface_str = 'ib{:d}'.format(config.infi_band_interface) print("Use infiniband support on interface " + interface_str + '.') os.environ['NCCL_SOCKET_IFNAME'] = interface_str os.environ['GLOO_SOCKET_IFNAME'] = interface_str ip_str = os.popen('ip addr show ' + interface_str).read() ip = ip_str.split("inet ")[1].split("/")[0] else: if config.world_size == 1: # use only one node ip = '127.0.0.1' else: ip = socket.gethostbyname(socket.gethostname()) port = find_free_port() config.dist_url = "tcp://{}:{}".format(ip, port) with open(hostfile, "w") as f: f.write(config.dist_url) else: while not os.path.exists(hostfile): time.sleep(5) # waite for the main process with open(hostfile, "r") as f: config.dist_url = f.read() print("dist-url:{} at PROCID {} / {}".format(config.dist_url, config.rank, config.world_size)) # support multiple GPU on one node # assume each node have equal GPUs ngpus_per_node = torch.cuda.device_count() if config.mp_dist: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly config.world_size = ngpus_per_node * config.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # worker process function mp.spawn(worker, nprocs=ngpus_per_node, args=(ngpus_per_node, config)) else: # Simply call worker function on first GPU device worker(None, ngpus_per_node, config)
from __future__ import division from __future__ import print_function import numpy as np import os import sys import time import torch import torch.nn as nn from config import SearchConfig from data_loader import load_dataset import feature_map as fmp import utils config = SearchConfig() config.feature_dir = os.path.join(config.stage_dir, "features") os.system("mkdir -p {}".format(config.feature_dir)) config.stage = "feature" config.total_samples = 10000 device = torch.device("cuda") logger = utils.get_logger( os.path.join(config.log_dir, "{}_{}.log".format(config.name, config.stage))) def compute_offline(data_loader, model, feature_dir): logger.info("computing offline...") save_dir = os.path.join(feature_dir, "offline")
def get_current_node_count(): if "PAI_CURRENT_TASK_ROLE_NAME" not in os.environ: return 1 task_role = os.environ["PAI_CURRENT_TASK_ROLE_NAME"] return int(os.environ["PAI_TASK_ROLE_TASK_COUNT_" + task_role]) def get_current_node_index(): if "PAI_CURRENT_TASK_ROLE_CURRENT_TASK_INDEX" not in os.environ: return 0 return int(os.environ["PAI_CURRENT_TASK_ROLE_CURRENT_TASK_INDEX"]) if __name__ == "__main__": config = SearchConfig() if config.nni: if config.nni == "gt_mock": nni_tools.mock_result() else: config.designated_subgraph = [nni_tools.get_param()] config.path = nni_tools.get_output_dir() # tensorboard writer = SummaryWriter(log_dir=os.path.join(config.path, "tb")) writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger(os.path.join(config.path, "{}.log".format(config.name))) config.print_params(logger.info)