Ejemplo n.º 1
0
parser.add_argument('--lr', type=float, default=0.1)
parser.add_argument('--momentum', type=float, default=0.0)
parser.add_argument('--weightdecay', type=float, default=0.0)
parser.add_argument('--model', type=str, default='vgg')
parser.add_argument('--resume', type=str, default=None)
parser.add_argument('--datadir',
                    type=str,
                    default='/Users/wjf/datasets/SVHN/train25k_test70k')
parser.add_argument('--logdir', type=str, default='logs/SGDCov')

args = parser.parse_args()
logger = LogSaver(args.logdir)
logger.save(str(args), 'args')

# data
dataset = SVHN(args.datadir)
logger.save(str(dataset), 'dataset')
test_list = dataset.getTestList(1000, True)

# model
start_iter = 0
lr = args.lr
if args.model == 'resnet':
    from resnet import ResNet18
    model = ResNet18().cuda()
elif args.model == 'vgg':
    from vgg import vgg11
    model = vgg11().cuda()
else:
    raise NotImplementedError()
criterion = CEwithMask
Ejemplo n.º 2
0
parser.add_argument('--maxiter', type=int, default=int(2e5 + 1))
parser.add_argument('--lr', type=float, default=0.05)
parser.add_argument('--list-size', type=int, default=5000)
parser.add_argument('--sigma', type=float, default=1e-3)
parser.add_argument('--resume', type=str, default=None)
parser.add_argument('--datadir',
                    type=str,
                    default='/home/wjf/datasets/SVHN/train25000_test70000')
parser.add_argument('--logdir', type=str, default='logs/GLD')

args = parser.parse_args()
logger = LogSaver(args.logdir)
logger.save(str(args), 'args')

# data
dataset = SVHN(args.datadir)
logger.save(str(dataset), 'dataset')
train_list = dataset.getTrainList(args.list_size, True)
test_list = dataset.getTestList(1000, True)

# model
start_iter = 0
model = vgg11().cuda()
logger.save(str(model), 'classifier')
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
logger.save(str(optimizer), 'optimizer')

if args.resume:
    checkpoint = torch.load(args.resume)
    start_iter = checkpoint['iter']
Ejemplo n.º 3
0
    print "Force restoring..."
    ckpt_file = FLAGS.restore_path
    # data_directory = os.path.join(FLAGS.data_dir, "mnist")
else:
    # assert False
    data_directory = FLAGS.data_dir
    ckpt_file = os.path.join(data_directory, "svhn" + param_string + ".ckpt")
    # data_directory = os.path.join(FLAGS.data_dir, "mnist")
    if not os.path.exists(data_directory):
        os.makedirs(data_directory)

print "Using ckpt file:", ckpt_file

# train_data = mnist.input_data.read_data_sets(mnist_directory, one_hot=True).train # binarized (0-1) mnist train data
# test_data = mnist.input_data.read_data_sets(mnist_directory, one_hot=True).test # binarized (0-1) mnist test data
svhn_data = SVHN()
train_data = svhn_data.train()
test_data = svhn_data.test()

fetches = []
fetches.extend([Lx, Lz, train_op])
Lxs = [0] * train_iters
Lzs = [0] * train_iters

sess = tf.InteractiveSession()

saver = tf.train.Saver()  # saves variables learned during training
tf.initialize_all_variables().run()
#saver.restore(sess, "/tmp/draw/drawmodel.ckpt") # to restore from model, uncomment this line

# e_values = sess.run(my_e,feed_dict={})
Ejemplo n.º 4
0
    'As preprocessing; scale the image randomly between 2 numbers and crop randomly at networs input size'
)
tf.app.flags.DEFINE_string('train_root_dir', '../training',
                           'Root directory to put the training data')
tf.app.flags.DEFINE_integer('log_step', 10000,
                            'Logging period in terms of iteration')

NUM_CLASSES = 10

TRAIN_FILE = 'svhn'
TEST_FILE = 'mnist'
print TRAIN_FILE + '  --------------------------------------->   ' + TEST_FILE
print TRAIN_FILE + '  --------------------------------------->   ' + TEST_FILE
print TRAIN_FILE + '  --------------------------------------->   ' + TEST_FILE

TRAIN = SVHN('data/svhn', split='train', shuffle=True)
VALID = MNIST('data/mnist', split='test', shuffle=True)
TEST = MNIST('data/mnist', split='test', shuffle=False)

FLAGS = tf.app.flags.FLAGS
MAX_STEP = 10000


def decay(start_rate, epoch, num_epochs):
    return start_rate / pow(1 + 0.001 * epoch, 0.75)


def adaptation_factor(x):
    #return 1.0
    #return 0.25
    den = 1.0 + math.exp(-10 * x)
Ejemplo n.º 5
0

def bias_variable(shape):
    return tf.Variable(tf.constant(1.0, shape=shape))


def convolution(x, w):
    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding="SAME")


def max_pool(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


# Load data
svhn = SVHN("../res", n_classes, use_extra=True, gray=False)

# Create the model
X = tf.placeholder(tf.float32, [None, image_size, image_size, channels])
Y = tf.placeholder(tf.float32, [None, n_classes])


# Weights & Biases
weights = {
    "layer1": weight_variable([filter_size, filter_size, channels, depth_1]),
    "layer2": weight_variable([filter_size, filter_size, depth_1, depth_2]),
    "layer3": weight_variable([filter_size, filter_size, depth_2, depth_3]),
    "layer4": weight_variable([image_size // 8 * image_size // 8 * depth_3, hidden]),
    "layer5": weight_variable([hidden, n_classes])
}
Ejemplo n.º 6
0
parser = argparse.ArgumentParser()
parser.add_argument('--maxiter', type=int, default=int(2e5+1))
parser.add_argument('--lr', type=float, default=0.05)
parser.add_argument('--batch-size', type=int, default=100)
parser.add_argument('--list-size', type=int, default=5000)
parser.add_argument('--update-noise', type=int, default=10)
parser.add_argument('--resume', type=str, default=None)
parser.add_argument('--datadir', type=str, default='/home/wjf/datasets/SVHN/train25000_test70000')
parser.add_argument('--logdir', type=str, default='logs/GLD_diag')

args = parser.parse_args()
logger = LogSaver(args.logdir)
logger.save(str(args), 'args')

# data
dataset = SVHN(args.datadir)
logger.save(str(dataset), 'dataset')
train_list = dataset.getTrainList(args.list_size, True)
test_list = dataset.getTestList(1000, True)

# model
start_iter = 0
model = vgg11().cuda()
logger.save(str(model), 'classifier')
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
logger.save(str(optimizer), 'optimizer')

if args.resume:
    checkpoint = torch.load(args.resume)
    start_iter = checkpoint['iter']
Ejemplo n.º 7
0
def main(args):
    
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    init_seeds(seed=int(time.time()))
    kwargs = {'num_workers': 2, 'pin_memory': True} if args.cuda else {}
    print(args.dataset)

    if args.dataset == 'MNIST':
        test_dataloader = data.DataLoader(
            MNIST(args.data_path, args.run_folder, transform=mnist_transformer()),
                batch_size=10000, shuffle=False, **kwargs)

        train_dataset = MNIST(args.data_path, args.run_folder, train=True, transform=mnist_transformer(), imbalance_ratio=args.imbalance_ratio)

        if args.imbalance_ratio == 100:
            args.num_images = 25711
        else:
            args.num_images = 50000

        args.budget = 125
        args.initial_budget = 125
        args.num_classes = 10
        args.num_channels = 1
        args.arch_scaler = 2
    elif args.dataset == 'SVHN':
        test_dataloader = data.DataLoader(
            SVHN(args.data_path, args.run_folder, transform=svhn_transformer()),
                batch_size=5000, shuffle=False, **kwargs)

        train_dataset = SVHN(args.data_path, args.run_folder, train=True, transform=svhn_transformer(), imbalance_ratio=args.imbalance_ratio)

        if args.imbalance_ratio == 100:
            args.num_images = 318556
        else:
            args.num_images = 500000

        args.budget = 1250
        args.initial_budget = 1250
        args.num_classes = 10
        args.num_channels = 3
        args.arch_scaler = 1
    elif args.dataset == 'cifar10':
        test_dataloader = data.DataLoader(
                datasets.CIFAR10(args.data_path, download=True, transform=cifar_transformer(), train=False),
            batch_size=args.batch_size, drop_last=False)

        train_dataset = CIFAR10(args.data_path)

        args.num_images = 50000
        args.budget = 2500
        args.initial_budget = 5000
        args.num_classes = 10
        args.num_channels = 3
    elif args.dataset == 'cifar100':
        test_dataloader = data.DataLoader(
                datasets.CIFAR100(args.data_path, download=True, transform=cifar_transformer(), train=False),
             batch_size=args.batch_size, drop_last=False)

        train_dataset = CIFAR100(args.data_path)

        args.num_images = 50000
        args.budget = 2500
        args.initial_budget = 5000
        args.num_classes = 100
        args.num_channels = 3
    elif args.dataset == 'ImageNet':
        test_dataloader = data.DataLoader(
            ImageNet(args.data_path + '/val', transform=imagenet_test_transformer()),
                batch_size=args.batch_size, shuffle=False, drop_last=False, **kwargs)

        if args.imbalance_ratio == 100:
            train_dataset = ImageNet(args.data_path + '/train_ir_100', transform=imagenet_train_transformer())
            args.num_images = 645770
        else:
            train_dataset = ImageNet(args.data_path + '/train', transform=imagenet_train_transformer())
            args.num_images = 1281167

        args.budget = 64000
        args.initial_budget = 64000
        args.num_classes = 1000
        args.num_channels = 3
        args.arch_scaler = 1
    else:
        raise NotImplementedError

    all_indices = set(np.arange(args.num_images))
    initial_indices = random.sample(all_indices, args.initial_budget)
    sampler = data.sampler.SubsetRandomSampler(initial_indices)
    #print(args.batch_size, sampler)
    # dataset with labels available
    querry_dataloader = data.DataLoader(train_dataset, sampler=sampler,
            batch_size=args.batch_size, drop_last=False, **kwargs)
    print('Sampler size =', len(querry_dataloader))
    solver = Solver(args, test_dataloader)

    splits = range(1,11)

    current_indices = list(initial_indices)

    accuracies = []
    
    for split in splits:
        print("Split =", split)
        # need to retrain all the models on the new images
        # re initialize and retrain the models
        #task_model = vgg.vgg16_bn(num_classes=args.num_classes)
        if args.dataset == 'MNIST':
            task_model = model.LeNet(num_classes=args.num_classes)
        elif args.dataset == 'SVHN':
            task_model = resnet.resnet10(num_classes=args.num_classes)
        elif args.dataset == 'ImageNet':
            task_model = resnet.resnet18(num_classes=args.num_classes)
        else:
            print('WRONG DATASET!')
        # loading pretrained
        if args.pretrained:
            print("Loading pretrained model", args.pretrained)
            checkpoint = torch.load(args.pretrained)
            task_model.load_state_dict({k: v for k, v in checkpoint['state_dict'].items() if 'fc' not in k}, strict=False) # copy all but last linear layers
        #
        vae = model.VAE(z_dim=args.latent_dim, nc=args.num_channels, s=args.arch_scaler)
        discriminator = model.Discriminator(z_dim=args.latent_dim, s=args.arch_scaler)
        #print("Sampling starts")
        unlabeled_indices = np.setdiff1d(list(all_indices), current_indices)
        unlabeled_sampler = data.sampler.SubsetRandomSampler(unlabeled_indices)
        unlabeled_dataloader = data.DataLoader(train_dataset, sampler=unlabeled_sampler,
                batch_size=args.batch_size, drop_last=False, **kwargs)
        #print("Train starts")
        # train the models on the current data
        acc, vae, discriminator = solver.train(querry_dataloader,
                                               task_model, 
                                               vae, 
                                               discriminator,
                                               unlabeled_dataloader)


        print('Final accuracy with {}% of data is: {:.2f}'.format(int(split*100.0*args.budget/args.num_images), acc))
        accuracies.append(acc)

        sampled_indices = solver.sample_for_labeling(vae, discriminator, unlabeled_dataloader)
        current_indices = list(current_indices) + list(sampled_indices)
        sampler = data.sampler.SubsetRandomSampler(current_indices)
        querry_dataloader = data.DataLoader(train_dataset, sampler=sampler,
                batch_size=args.batch_size, drop_last=False, **kwargs)

    torch.save(accuracies, os.path.join(args.out_path, args.log_name))
Ejemplo n.º 8
0

def bias_variable(shape):
    return tf.Variable(tf.constant(1.0, shape=shape))


def conv2d(x, w):
    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding="SAME")


def max_pool(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


# 读取数据集
svhn = SVHN(10, use_extra=False, gray=False)

# 建立模型
X = tf.placeholder(tf.float32, [None, 32, 32, 3])
Y = tf.placeholder(tf.float32, [None, 10])




# Batch normalization

mean, variance = tf.nn.moments(X, [1, 2, 3], keep_dims=True)
x = tf.nn.batch_normalization(X, mean, variance, normalization_offset, normalization_scale,normalization_epsilon)

#第一层卷积
W_conv1 = weight_variable([5, 5, 3, 16])
Ejemplo n.º 9
0
nb_classes = 10
nb_epoch = 20

path = '/home/...'  # 修改路径

# 输入图像的维度,SVHN库的图片大小均为32*32
img_rows, img_cols = 32, 32
# 卷积层中使用的卷积核的个数
nb_filters = 32
# 池化层操作的范围
pool_size = (2, 2)
# 卷积核的大小
kernel_size = (3, 3)
# 加载SVHN数据库中的图片,并设置训练集和测试集
svhn = SVHN('/home/cv503/Desktop/conv_visualization/svhn_dataset',
            nb_classes,
            gray=False)
X_train = svhn.train_data
Y_train = svhn.train_labels
X_test = svhn.test_data
Y_test = svhn.test_labels

# 后端使用tensorflow时,即tf模式下,
# 第一个维度是样本维,表示样本的数目,
# 第二和第三个维度是高和宽,
# 最后一个维度是通道维,表示颜色通道数
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)
input_shape = (img_rows, img_cols, 3)

# 将X_train, X_test的数据格式转为float32
Ejemplo n.º 10
0
parser.add_argument('--ghostsize', type=int, default=100)
parser.add_argument('--numghost', type=int, default=100)
parser.add_argument('--update-noise', type=int, default=10)
parser.add_argument('--momentum', type=float, default=0.0)
parser.add_argument('--weightdecay', type=float, default=0.0)
parser.add_argument('--model', type=str, default='vgg')
parser.add_argument('--resume', type=str, default=None)
parser.add_argument('--datadir', type=str, default='/Users/wjf/datasets/SVHN/train25k_test70k')
parser.add_argument('--logdir', type=str, default='logs/GLD_Fisher')

args = parser.parse_args()
logger = LogSaver(args.logdir)
logger.save(str(args), 'args')

# data
dataset = SVHN(args.datadir)
logger.save(str(dataset), 'dataset')
train_list = dataset.getTrainList(args.batchsize, True)
test_list = dataset.getTestList(1000, True)

# model
start_iter = 0
lr = args.lr
if args.model == 'resnet':
    from resnet import ResNet18
    model = ResNet18().cuda()
elif args.model == 'vgg':
    from vgg import vgg11
    model = vgg11().cuda()
else:
    raise NotImplementedError()
Ejemplo n.º 11
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    # Use specific GPU
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    devices = [int(s) for s in args.gpu.split(',') if s.isdigit()]
    nGPU = len(devices)
    devices = list(range(nGPU))

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True, r=args.se_reduce)
    else:
        print("=> creating model '{}'".format(args.arch))
        # model = models.__dict__[args.arch](r=args.se_reduce)
        model = models.__dict__[args.arch]()

    if not args.distributed:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            if (nGPU == 1):
                model.cuda()
            else:
                model = torch.nn.DataParallel(model, device_ids=devices).cuda()
    else:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)

    print(args)
    print(model)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    # optimizer = torch.optim.SGD(model.parameters(),
    #                             args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)

    lr_policy = list()
    params_collect = dict(model.named_parameters())
    for k, v in params_collect.items():
        if 'st' in k:
            lr_policy.append({
                'params': v,
                'lr': 0.001,
                'weight_decay': args.weight_decay
            })
        else:
            lr_policy.append({'params': v})

    optimizer = torch.optim.SGD(lr_policy,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    if (args.dataset == 'imagenet'):
        traindir = os.path.join(args.data, 'train')
        valdir = os.path.join(args.data, 'val')
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))

        if args.distributed:
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                train_dataset)
        else:
            train_sampler = None

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            shuffle=(train_sampler is None),
            num_workers=args.workers,
            pin_memory=True,
            sampler=train_sampler)

        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ])),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)

    elif (args.dataset == 'cifar10'):
        to_normalized_tensor = [
            transforms.ToTensor(),
            # transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2470, 0.2435, 0.2616))
        ]
        data_augmentation = [
            transforms.RandomCrop(28, padding=0),
            transforms.RandomHorizontalFlip()
        ]

        transform = transforms.Compose(data_augmentation +
                                       to_normalized_tensor)

        trainset = datasets.CIFAR10(root='./data',
                                    train=True,
                                    download=True,
                                    transform=transform)
        train_loader = torch.utils.data.DataLoader(trainset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=2)

        testset = datasets.CIFAR10(root='./data',
                                   train=False,
                                   download=True,
                                   transform=transform)
        val_loader = torch.utils.data.DataLoader(testset,
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=2)
    elif (args.dataset == 'svhn'):
        path_to_train_lmdb_dir = os.path.join(args.data, 'train.lmdb')
        path_to_val_lmdb_dir = os.path.join(args.data, 'val.lmdb')
        train_loader = torch.utils.data.DataLoader(
            SVHN(path_to_train_lmdb_dir),
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=2,
            pin_memory=True)
        val_loader = torch.utils.data.DataLoader(SVHN(path_to_val_lmdb_dir),
                                                 batch_size=128,
                                                 shuffle=False)

        # to_normalized_tensor = [transforms.ToTensor(),
        #                         # transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]
        #                         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470,  0.2435,  0.2616))]
        # data_augmentation = [transforms.RandomCrop(28, padding=0),
        #                      transforms.RandomHorizontalFlip()]
        #
        # transform = transforms.Compose(data_augmentation + to_normalized_tensor)
        #
        # trainset = datasets.CIFAR10(root='./data', train=True,
        #                                         download=True, transform=transform)
        # train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size,
        #                                           shuffle=True, num_workers=2)
        #
        # testset = datasets.CIFAR10(root='./data', train=False,
        #                                        download=True, transform=transform)
        # val_loader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size,
        #                                      shuffle=False, num_workers=2)
    else:
        print('No dataset named a ', args.dataset)
        exit(0)

    if args.evaluate:
        validate(val_loader, model, criterion)
        print(acc)
        return

    print("Number of parameters: ",
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch)

        print("Learning Rate: ", optimizer.param_groups[0]['lr'])
        # train for one epoch

        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best, args.save + '/checkpoint.pth.tar')

        print("\nBest Model: ", best_prec1)
Ejemplo n.º 12
0
    def __init__(self,
                 base_dataset='svhn',
                 take_amount=None,
                 take_amount_seed=13,
                 add_svhn_extra=True,
                 aux_data_filename=None,
                 add_aux_labels=False,
                 aux_take_amount=None,
                 train=False,
                 **kwargs):
        """A dataset with auxiliary pseudo-labeled data"""


        if base_dataset == 'svhn':
            if train:
                self.dataset = SVHN(split='train', **kwargs)
            else:
                self.dataset = SVHN(split='test', **kwargs)

        # elif base_dataset == 'mnist_m':
        #     if train:
        #         self.dataset = MNIST_M(split='train', **kwargs)
        #     else:
        #         self.dataset = MNIST_M(split='test', **kwargs)

        # elif base_dataset == "syndigit" :
        #     if train:
        #         self.dataset = SynDigit(split="train" , **kwargs)
        #     else:
        #         self.dataset = SynDigit(split="test" , **kwargs)

        else:
            raise ValueError('Dataset %s not supported' % base_dataset)

        # because torchvision is annoying
        self.dataset.targets = self.dataset.labels
        self.targets = list(self.targets)

        if train and add_svhn_extra:
            svhn_extra = SVHN(split='noise_20p', **kwargs)
            self.data = np.concatenate([self.data, svhn_extra.data])
            self.targets.extend(svhn_extra.labels)
        
        self.base_dataset = base_dataset
        self.train = train

        if self.train:
            if take_amount is not None:
                rng_state = np.random.get_state()
                np.random.seed(take_amount_seed)
                take_inds = np.random.choice(len(self.sup_indices),
                                             take_amount, replace=False)
                np.random.set_state(rng_state)

                logger = logging.getLogger()
                logger.info('Randomly taking only %d/%d examples from training'
                            ' set, seed=%d, indices=%s',
                            take_amount, len(self.sup_indices),
                            take_amount_seed, take_inds)
                self.targets = self.targets[take_inds]
                self.data = self.data[take_inds]

            self.sup_indices = list(range(len(self.targets)))
            self.unsup_indices = []

            if aux_data_filename is not None:
                aux_path = os.path.join(kwargs['root'], aux_data_filename)
                print("Loading data from %s" % aux_path)
                with open(aux_path, 'rb') as f:
                    aux = pickle.load(f)
                aux_data = aux['data']
                aux_targets = aux['extrapolated_targets']
                orig_len = len(self.data)

                if aux_take_amount is not None:
                    rng_state = np.random.get_state()
                    np.random.seed(take_amount_seed)
                    take_inds = np.random.choice(len(aux_data),
                                                 aux_take_amount, replace=False)
                    np.random.set_state(rng_state)

                    logger = logging.getLogger()
                    logger.info(
                        'Randomly taking only %d/%d examples from aux data'
                        ' set, seed=%d, indices=%s',
                        aux_take_amount, len(aux_data),
                        take_amount_seed, take_inds)
                    aux_data = aux_data[take_inds]
                    aux_targets = aux_targets[take_inds]

                self.data = np.concatenate((self.data, aux_data), axis=0)

                if not add_aux_labels:
                    self.targets.extend([-1] * len(aux_data))
                else:
                    self.targets.extend(aux_targets)
                # note that we use unsup indices to track the labeled datapoints
                # whose labels are "fake"
                self.unsup_indices.extend(
                    range(orig_len, orig_len+len(aux_data)))

            logger = logging.getLogger()
            logger.info("--Training set--")
            logger.info("Number of training samples: %d", len(self.targets))
            logger.info("Number of supervised samples: %d",
                        len(self.sup_indices))
            logger.info("Number of unsup samples: %d", len(self.unsup_indices))
            logger.info("Label (and pseudo-label) histogram: %s",
                        tuple(
                            zip(*np.unique(self.targets, return_counts=True))))
            logger.info("Shape of training data: %s", np.shape(self.data))

        # Test set
        else:
            self.sup_indices = list(range(len(self.targets)))
            self.unsup_indices = []

            logger = logging.getLogger()
            logger.info("--Test set--")
            logger.info("Number of samples: %d", len(self.targets))
            logger.info("Label histogram: %s",
                        tuple(
                            zip(*np.unique(self.targets, return_counts=True))))
            logger.info("Shape of data: %s", np.shape(self.data))
Ejemplo n.º 13
0
parser = argparse.ArgumentParser()
parser.add_argument('--maxiter', type=int, default=int(1e5 + 1))
parser.add_argument('--lr', type=float, default=0.05)
parser.add_argument('--batch-size', type=int, default=100)
parser.add_argument('--resume', type=str, default=None)
parser.add_argument('--datadir',
                    type=str,
                    default='/home/wjf/datasets/SVHN/train25000_test70000')
parser.add_argument('--logdir', type=str, default='logs/SGD')

args = parser.parse_args()
logger = LogSaver(args.logdir)
logger.save(str(args), 'args')

# data
dataset = SVHN(args.datadir)
logger.save(str(dataset), 'dataset')
test_list = dataset.getTestList(1000, True)

# model
start_iter = 0
model = vgg11().cuda()
logger.save(str(model), 'classifier')
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
logger.save(str(optimizer), 'optimizer')

if args.resume:
    checkpoint = torch.load(args.resume)
    start_iter = checkpoint['iter']
    model.load_state_dict(checkpoint['model'])
Ejemplo n.º 14
0
import tensorflow as tf
from svhn import SVHN

n_input = 3072  # warstwa wejściowa(32x32x3 piksele i rgb)
n_hidden1 = 512  # pierwsza warstwa ukryta
n_hidden2 = 256  # druga warstwa ukryta
n_hidden3 = 128  # trzecia warstwa ukryta
n_output = 10  # warstwa wyjściowa (cyfry od 0 do 9)

svhn = SVHN("../res", n_output, use_extra=False, gray=False)

learning_rate = 0.001
batch_size = 40
n_iterations = int(svhn.train_examples / batch_size)

normalization_offset = 0.0  # beta
normalization_scale = 1.0  # gamma
normalization_epsilon = 0.001  # epsilon

#placeholdery na wejściowe i wyjściowe dane
X = tf.placeholder("float", [None, 32, 32, 3], name="X")
Y = tf.placeholder("float", [None, n_output], name="Y")

#wagi pomiędzy warstwami
weights = {
    'w1': tf.Variable(tf.truncated_normal([n_input, n_hidden1], stddev=0.1)),
    'w2': tf.Variable(tf.truncated_normal([n_hidden1, n_hidden2], stddev=0.1)),
    'w3': tf.Variable(tf.truncated_normal([n_hidden2, n_hidden3], stddev=0.1)),
    'out': tf.Variable(tf.truncated_normal([n_hidden3, n_output], stddev=0.1)),
}