Esempio n. 1
0
def run(gpu, config):
    cudnn.benchmark = True
    if config['distribute']:
        rank = config['rank'] * config['last_node_gpus'] + gpu
        print("world_size: {}, rank: {}".format(config['world_size'], rank))
        dist.init_process_group(backend=config['backend'],
                                init_method=config['ip'],
                                world_size=config['world_size'],
                                rank=rank)
    assert cudnn.enabled, "Amp requires cudnn backend to be enabled."
    torch.cuda.set_device(gpu)

    # create model
    model = AlexNet(10)

    # define loss function
    criterion = nn.CrossEntropyLoss()

    # define optimizer strategy
    optimizer = torch.optim.SGD(model.parameters(),
                                config['lr'],
                                momentum=config['momentum'],
                                weight_decay=config['weight_decay'])

    # convert pytorch to apex model.
    apexparallel = ApexDistributeModel(model, criterion, optimizer, config,
                                       gpu)
    apexparallel.convert()
    apexparallel.lars()

    # load data
    data_path = '~/datasets/cifar10/train'
    train_set = LoadClassifyDataSets(data_path, 227)
    train_sampler = None
    if config['distribute']:
        train_sampler = distributed.DistributedSampler(train_set)
    train_loader = DataLoader(train_set,
                              config['batch_size'],
                              shuffle=(train_sampler is None),
                              num_workers=config['num_workers'],
                              pin_memory=True,
                              sampler=train_sampler,
                              collate_fn=collate_fn)

    for epo in range(config['epoch']):
        if config['distribute']:
            train_sampler.set_epoch(epo)

        # train for per epoch
        apexparallel.train(epo, train_loader)
Esempio n. 2
0
def run(gpu, config):
    cudnn.benchmark = True
    if config['distribute']:
        rank = config['rank'] * config['last_node_gpus'] + gpu
        print("world_size: {}, rank: {}".format(config['world_size'], rank))
        dist.init_process_group(backend=config['backend'], init_method=config['ip'],
                                world_size=config['world_size'], rank=rank)
    assert cudnn.enabled, "Amp requires cudnn backend to be enabled."
    # create model
    model = AlexNet(10)

    if config['sync_bn']:
        # synchronization batch normal
        model = apex.parallel.convert_syncbn_model(model)

    torch.cuda.set_device(gpu)
    model = model.cuda(gpu)

    # define loss function
    criterion = nn.CrossEntropyLoss().cuda(gpu)

    # define optimizer strategy
    optimizer = torch.optim.SGD(model.parameters(), config['lr'],
                                momentum=config['momentum'],
                                weight_decay=config['weight_decay'])

    # initialization apex
    model, optimizer = apex.amp.initialize(model, optimizer, opt_level='O0')

    if config['distribute']:
        # model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu])
        model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True)

    # load data
    data_path = '~/datasets/cifar10/train'
    train_set = LoadClassifyDataSets(data_path, 227)
    train_sampler = None
    if config['distribute']:
        train_sampler = distributed.DistributedSampler(train_set)
    train_loader = DataLoader(train_set, config['batch_size'], shuffle=(train_sampler is None),
                              num_workers=config['num_workers'], pin_memory=True, sampler=train_sampler,
                              collate_fn=collate_fn)

    for epo in range(config['epoch']):
        if config['distribute']:
            train_sampler.set_epoch(epo)

        # train for per epoch
        train(train_loader, model, criterion, optimizer, epo, gpu)
def main(is_distributed, rank, ip):
    world_size = 1
    if is_distributed:
        world_size = 2
        torch.distributed.init_process_group(backend='nccl',
                                             init_method=ip,
                                             world_size=world_size,
                                             rank=rank)
    assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled."
    print("Connect")
    # set hyper parameters
    batch_size = 128
    lr = 0.01  # base on batch size 256
    momentum = 0.9
    weight_decay = 0.0001
    epoch = 100

    # recompute lr
    lr = lr * world_size

    # create model
    model = AlexNet(10)
    model = model.cuda()
    if is_distributed:
        # for distribute training
        model = nn.parallel.DistributedDataParallel(model)

    # define loss function
    criterion = nn.CrossEntropyLoss().cuda()

    # define optimizer strategy
    optimizer = torch.optim.SGD(model.parameters(),
                                lr,
                                momentum=momentum,
                                weight_decay=weight_decay)

    # load train data
    data_path = '~/datasets/cifar10/train'
    train_set = LoadClassifyDataSets(data_path, 227)
    train_sampler = None
    if is_distributed:
        train_sampler = distributed.DistributedSampler(train_set)
    train_loader = DataLoader(train_set,
                              batch_size,
                              shuffle=(train_sampler is None),
                              num_workers=4,
                              pin_memory=True,
                              sampler=train_sampler,
                              collate_fn=collate_fn)

    for epoch in range(100):
        # for distribute
        if is_distributed:
            train_sampler.set_epoch(epoch)

        model.train()
        train_iter = iter(train_loader)
        inputs, target = next(train_iter)

        step = 0
        print("Epoch is {}".format(epoch))
        while inputs is not None:
            step += 1
            print("Step is {}".format(step))
            if not is_distributed:
                inputs = inputs.cuda()
            time_model_1 = time.time()
            output = model(inputs)
            time_model_2 = time.time()
            print("model time: {}".format(time_model_2 - time_model_1))
            time_loss_1 = time.time()
            loss = criterion(output, target.cuda())
            time_loss_2 = time.time()
            print("loss time: {}".format(time_loss_2 - time_loss_1))
            optimizer.zero_grad()
            time_back_1 = time.time()
            loss.backward()
            time_back_2 = time.time()
            print("back time: {}".format(time_back_2 - time_back_1))
            optimizer.step()
            if step % 10 == 0:
                print("loss is : {}", loss.item())
            inputs, target = next(train_iter, (None, None))
Esempio n. 4
0
def weight_init(m):
    if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
    elif isinstance(m, nn.BatchNorm2d):
        m.weigth.data.fill_(1)
        m.bias.data.zero_()

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=100, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

n_output = 10
net = AlexNet(10)

# 如果GPU可用,使用GPU
if use_cuda:
    # move param and buffer to GPU
    net.cuda()
    # parallel use GPU
    net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()-1))
    # speed up slightly
    cudnn.benchmark = True


# 定义度量和优化
criterion = nn.CrossEntropyLoss()                      #交叉熵验证
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)      #随机梯度下降

# read image
img=None
for i in range(1,6):
    im = readimg('test/violent/' + str(i))
    if img==None:
        img=[im]
    else:
        img=np.append(img,[im],axis=0)


x = tf.placeholder(tf.float32, [ 5,IMAGE_SIZE, IMAGE_SIZE, 20])

# initialization
model = AlexNet(x, NUM_CLASSES)
score = model.fc8



with tf.name_scope('result')as scope:
    result=tf.argmax(tf.nn.softmax(score), 1)


saver=tf.train.Saver()

with tf.Session() as sess:
    #notice the order of initialize_all_variables and restore weights from checkpoint
    sess.run(tf.global_variables_initializer())
    # restore saved weights
    ckpt = tf.train.get_checkpoint_state('check')
Esempio n. 6
0
def main(is_distributed, sync_bn, rank):
    world_size = 1
    if is_distributed:
        world_size = 2
        torch.distributed.init_process_group(
            backend='nccl',
            init_method='tcp://172.16.117.110:1234',
            world_size=world_size,
            rank=rank)
    assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled."

    # set hyper parameters
    batch_size = 30
    lr = 0.01  # base on batch size 256
    momentum = 0.9
    weight_decay = 0.0001
    epoch = 100

    # recompute lr
    lr = lr * world_size

    # create model
    model = AlexNet(10)
    # leverage apex to realize batch_normal synchronization in different GPU
    # if sync_bn:
    #     model = apex.parallel.convert_syncbn_model(model)
    model = model.cuda()

    # define loss function
    criterion = nn.CrossEntropyLoss().cuda()

    # define optimizer strategy
    optimizer = torch.optim.SGD(model.parameters(),
                                lr,
                                momentum=momentum,
                                weight_decay=weight_decay)

    # initialize Amp
    # model, optimizer = apex.amp.initialize(model, optimizer, opt_level='O0')
    if is_distributed:
        # for distribute training
        # model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True)
        model = nn.parallel.DistributedDataParallel(model)
    # load train data
    data_path = '~/datasets/cifar10/train'
    train_set = LoadClassifyDataSets(data_path, 227)
    train_sampler = None
    if is_distributed:
        train_sampler = distributed.DistributedSampler(train_set,
                                                       world_size,
                                                       rank=rank)
        # train_sampler = distributed.DistributedSampler(train_set)
    train_loader = DataLoader(train_set,
                              batch_size,
                              shuffle=(train_sampler is None),
                              num_workers=4,
                              pin_memory=True,
                              sampler=train_sampler,
                              collate_fn=collate_fn)

    for epoch in range(100):
        # for distribute
        if is_distributed:
            train_sampler.set_epoch(epoch)

        model.train()
        train_iter = iter(train_loader)
        inputs, target = next(train_iter)

        step = 0
        print("Epoch is {}".format(epoch))
        while inputs is not None:
            step += 1
            print("test0")
            temp = inputs.cuda()
            print("test01")
            output = model(temp)
            print("test1")
            loss = criterion(output, target.cuda())
            print("test2")
            optimizer.zero_grad()
            print("test3")

            loss.backward()
            print("test4")
            # with apex.amp.scale_loss(loss, optimizer) as scaled_loss:
            #     scaled_loss.backward()
            optimizer.step()
            print("test5")
            if step % 10 == 0:
                print("loss is : ", loss.item())
            inputs, target = next(train_iter, (None, None))