Ejemplo n.º 1
0
def run(args, myargs):
    my_config = getattr(myargs.config, args.command)
    config = AugmentConfig()
    for k, v in args.items():
        assert not hasattr(config, k)
        setattr(config, k, v)

    for k, v in my_config.items():
        if not hasattr(config, k):
            print('* config does not have %s'%k)
        setattr(config, k, v)
    device = torch.device("cuda")
    writer = myargs.writer
    writer.add_text('all_config', config.as_markdown(), 0)
    logger = myargs.logger
    config.print_params(logger.info_msg)

    config.genotype = gt.from_str(config.genotype)
    config.data_path = os.path.expanduser(config.data_path)
    config.plot_path = os.path.join(args.outdir, 'plot')
    config.path = args.outdir
    main(config=config, logger=logger, device=device,
         myargs=myargs)
Ejemplo n.º 2
0
""" Training augmented model """
import torch.nn as nn
import torchvision
from tensorboardX import SummaryWriter
from config import AugmentConfig
import utils
from models.augment_cnn import AugmentCNN
from utils import *

config = AugmentConfig()

device = torch.device("cuda")

# tensorboard
writer = SummaryWriter(log_dir=os.path.join(config.path, "tb"))
writer.add_text('config', config.as_markdown(), 0)

logger = utils.get_logger(
    os.path.join(config.path, "{}.log".format(config.name)))
config.print_params(logger.info)


def main():
    logger.info("Logger is set - training start")
    logger.info("Torch version is: {}".format(torch.__version__))
    logger.info("Torch_vision version is: {}".format(torchvision.__version__))

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
Ejemplo n.º 3
0
def main():
    # init config
    config = AugmentConfig()

    # set seed
    if config.seed is not None:
        np.random.seed(config.seed)
        torch.manual_seed(config.seed)
        torch.cuda.manual_seed_all(config.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! ')

    # For slurm available
    if config.world_size == -1 and "SLURM_NPROCS" in os.environ:
        # acquire world size from slurm
        config.world_size = int(os.environ["SLURM_NPROCS"])
        config.rank = int(os.environ["SLURM_PROCID"])
        jobid = os.environ["SLURM_JOBID"]
        hostfile = os.path.join(config.dist_path, "dist_url." + jobid + ".txt")
        if config.dist_file is not None:
            config.dist_url = "file://{}.{}".format(
                os.path.realpath(config.dist_file), jobid)
        elif config.rank == 0:
            if config.dist_backend == 'nccl' and config.infi_band:
                # only NCCL backend supports inifiniband
                interface_str = 'ib{:d}'.format(config.infi_band_interface)
                print("Use infiniband support on interface " + interface_str +
                      '.')
                os.environ['NCCL_SOCKET_IFNAME'] = interface_str
                os.environ['GLOO_SOCKET_IFNAME'] = interface_str
                ip_str = os.popen('ip addr show ' + interface_str).read()
                ip = ip_str.split("inet ")[1].split("/")[0]
            else:
                ip = socket.gethostbyname(socket.gethostname())
            port = find_free_port()
            config.dist_url = "tcp://{}:{}".format(ip, port)
            with open(hostfile, "w") as f:
                f.write(config.dist_url)
        else:
            while not os.path.exists(hostfile):
                time.sleep(5)  # waite for the main process
            with open(hostfile, "r") as f:
                config.dist_url = f.read()
        print("dist-url:{} at PROCID {} / {}".format(config.dist_url,
                                                     config.rank,
                                                     config.world_size))

    # support multiple GPU on one node
    # assume each node have equal GPUs
    ngpus_per_node = torch.cuda.device_count()
    if config.mp_dist:
        # Since we have ngpus_per_node processes per node, the total world_size
        # needs to be adjusted accordingly
        config.world_size = ngpus_per_node * config.world_size
        # Use torch.multiprocessing.spawn to launch distributed processes: the
        # worker process function
        mp.spawn(worker, nprocs=ngpus_per_node, args=(ngpus_per_node, config))
    else:
        # Simply call worker function on first GPU device
        worker(None, ngpus_per_node, config)
Ejemplo n.º 4
0
""" Training augmented model """
import os
import torch
import torch.nn as nn
import numpy as np
# from tensorboardX import SummaryWriter
from config import AugmentConfig
import utils
from models.augment_cnn import AugmentCNN

config = AugmentConfig()

device = torch.device("cuda")

# tensorboard
# writer = SummaryWriter(log_dir=os.path.join(config.path, "tb"))
# writer.add_text('config', config.as_markdown(), 0)

logger = utils.get_logger(
    os.path.join(config.path, "{}.log".format(config.name)))
config.print_params(logger.info)


def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
Ejemplo n.º 5
0
import torch
import utils
import logging
import torch.nn as nn
import genotypes
import torch.utils
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torch.backends.cudnn as cudnn

from config import AugmentConfig
from torch.autograd import Variable
from model import NetworkImageNet as Network


config = AugmentConfig()
logger = utils.get_logger(os.path.join(config.path, "{}.log".format(config.name)))
CLASSES = 1000

class CrossEntropyLabelSmooth(nn.Module):

    def __init__(self, num_classes, epsilon):
        super(CrossEntropyLabelSmooth, self).__init__()
        self.num_classes = num_classes
        self.epsilon = epsilon
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, inputs, targets):
        log_probs = self.logsoftmax(inputs)
        targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
        targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes