def run(args, myargs): my_config = getattr(myargs.config, args.command) config = AugmentConfig() for k, v in args.items(): assert not hasattr(config, k) setattr(config, k, v) for k, v in my_config.items(): if not hasattr(config, k): print('* config does not have %s'%k) setattr(config, k, v) device = torch.device("cuda") writer = myargs.writer writer.add_text('all_config', config.as_markdown(), 0) logger = myargs.logger config.print_params(logger.info_msg) config.genotype = gt.from_str(config.genotype) config.data_path = os.path.expanduser(config.data_path) config.plot_path = os.path.join(args.outdir, 'plot') config.path = args.outdir main(config=config, logger=logger, device=device, myargs=myargs)
""" Training augmented model """ import torch.nn as nn import torchvision from tensorboardX import SummaryWriter from config import AugmentConfig import utils from models.augment_cnn import AugmentCNN from utils import * config = AugmentConfig() device = torch.device("cuda") # tensorboard writer = SummaryWriter(log_dir=os.path.join(config.path, "tb")) writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.path, "{}.log".format(config.name))) config.print_params(logger.info) def main(): logger.info("Logger is set - training start") logger.info("Torch version is: {}".format(torch.__version__)) logger.info("Torch_vision version is: {}".format(torchvision.__version__)) # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed
def main(): # init config config = AugmentConfig() # set seed if config.seed is not None: np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ') # For slurm available if config.world_size == -1 and "SLURM_NPROCS" in os.environ: # acquire world size from slurm config.world_size = int(os.environ["SLURM_NPROCS"]) config.rank = int(os.environ["SLURM_PROCID"]) jobid = os.environ["SLURM_JOBID"] hostfile = os.path.join(config.dist_path, "dist_url." + jobid + ".txt") if config.dist_file is not None: config.dist_url = "file://{}.{}".format( os.path.realpath(config.dist_file), jobid) elif config.rank == 0: if config.dist_backend == 'nccl' and config.infi_band: # only NCCL backend supports inifiniband interface_str = 'ib{:d}'.format(config.infi_band_interface) print("Use infiniband support on interface " + interface_str + '.') os.environ['NCCL_SOCKET_IFNAME'] = interface_str os.environ['GLOO_SOCKET_IFNAME'] = interface_str ip_str = os.popen('ip addr show ' + interface_str).read() ip = ip_str.split("inet ")[1].split("/")[0] else: ip = socket.gethostbyname(socket.gethostname()) port = find_free_port() config.dist_url = "tcp://{}:{}".format(ip, port) with open(hostfile, "w") as f: f.write(config.dist_url) else: while not os.path.exists(hostfile): time.sleep(5) # waite for the main process with open(hostfile, "r") as f: config.dist_url = f.read() print("dist-url:{} at PROCID {} / {}".format(config.dist_url, config.rank, config.world_size)) # support multiple GPU on one node # assume each node have equal GPUs ngpus_per_node = torch.cuda.device_count() if config.mp_dist: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly config.world_size = ngpus_per_node * config.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # worker process function mp.spawn(worker, nprocs=ngpus_per_node, args=(ngpus_per_node, config)) else: # Simply call worker function on first GPU device worker(None, ngpus_per_node, config)
""" Training augmented model """ import os import torch import torch.nn as nn import numpy as np # from tensorboardX import SummaryWriter from config import AugmentConfig import utils from models.augment_cnn import AugmentCNN config = AugmentConfig() device = torch.device("cuda") # tensorboard # writer = SummaryWriter(log_dir=os.path.join(config.path, "tb")) # writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.path, "{}.log".format(config.name))) config.print_params(logger.info) def main(): logger.info("Logger is set - training start") # set default gpu device id torch.cuda.set_device(config.gpus[0]) # set seed np.random.seed(config.seed)
import torch import utils import logging import torch.nn as nn import genotypes import torch.utils import torchvision.datasets as dset import torchvision.transforms as transforms import torch.backends.cudnn as cudnn from config import AugmentConfig from torch.autograd import Variable from model import NetworkImageNet as Network config = AugmentConfig() logger = utils.get_logger(os.path.join(config.path, "{}.log".format(config.name))) CLASSES = 1000 class CrossEntropyLabelSmooth(nn.Module): def __init__(self, num_classes, epsilon): super(CrossEntropyLabelSmooth, self).__init__() self.num_classes = num_classes self.epsilon = epsilon self.logsoftmax = nn.LogSoftmax(dim=1) def forward(self, inputs, targets): log_probs = self.logsoftmax(inputs) targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1) targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes