Example #1
0
    def factory(organism):
        from sgd import SGD
        from zfin import ZFin
        from worm import WormBase
        from fly import FlyBase
        from mouse import MGI
        from rat import RGD
        from human import Human

        if organism in ("Saccharomyces cerevisiae", "S. cerevisiae", "YEAST"):
            return SGD()
        elif organism in ("Danio rerio", "D. rerio", "DANRE"):
            return ZFin()
        elif organism in ("Caenorhabditis elegans", "C. elegans", "CAEEL"):
            return WormBase()
        elif organism in ("Drosophila melanogaster", "D. melanogaster",
                          "DROME"):
            return FlyBase()
        elif organism in ("Mus musculus", "M. musculus", "MOUSE"):
            return MGI()
        elif organism in ("Rattus norvegicus", "R. norvegicus", "RAT"):
            return RGD()
        elif organism in ("H**o sapiens", "H. sapiens", "HUMAN"):
            return Human()
        else:
            return None
Example #2
0
def sgd_model(row_vectors, col_vectors, eta, iterations):
    '''
    creates a sgd model
    '''
    model = SGD(eta, iterations)
    model.learn(row_vectors, col_vectors)
    return model
Example #3
0
def main():
    classifier_name=sys.argv[1]
    datapath=sys.argv[2]
    df=pd.read_csv(datapath)
    X=df.iloc[0:100,[0,2]].values
    y=df.iloc[0:100,4].values
    y=np.where(y=='Iris-setosa',1,-1)
    
    if(classifier_name=='Perceptron'):
        from perceptron import Perceptron
        model=Perceptron(eta=0.01,n_iter=10) 
        model.learn(X,y)
        print('errors for this classification are:\n',model.errors)
        plt.plot(range(1,len(model.errors)+1), model.errors,marker='o')
        model.testdatairis('test.csv')
        print("Accuracy of Perceptron is:",model.accuracy)
        print("--- %s seconds ---" % (time.time() - start_time))
        
    elif(classifier_name=='Adaline'):
        from adaline import Adaline
        model=Adaline(eta=0.01,n_iter=20)
        X_std = np.copy(X)
        X_std[:,0] = (X[:,0] - X[:,0].mean()) / X[:,0].std()
        X_std[:,1] = (X[:,1] - X[:,1].mean()) / X[:,1].std()
        model.learn(X_std,y)
        print('sum of errors in each iteration for this classification are:\n',model.cost)
        plt.plot(range(1,len(model.cost)+1), model.cost,marker='o')
        model.testdatairis('test.csv')
        print("Accuracy of Adaline is:",model.accuracy)
        print("--- %s seconds ---" % (time.time() - start_time))
        
    elif(classifier_name=='SGD'):
        from sgd import SGD
        model=SGD(eta=0.01,n_iter=15)
        model.learn(X,y)
        print('sum of errors in each interation for this classification are:\n' ,model.cost)
        plt.plot(range(1,len(model.cost)+1), model.cost,marker='o')
        model.testdatairis('test.csv')
        print("Accuracy of SGD is:",model.accuracy)
        print("--- %s seconds ---" % (time.time() - start_time))
        
        
    else:
        print("invalid classifier")
        return

    plt.title(classifier_name)
    plt.xlabel('iteration')
    plt.ylabel('errors')
    plt.show()
    return(model)
def get_layer_trainer_sgd_rbm(layer, trainset):
    train_algo = SGD(
        learning_rate = 1e-1,
        batch_size =  5,
        #"batches_per_iter" : 2000,
        monitoring_batches =  20,
        monitoring_dataset =  trainset,
        cost = SMD(corruptor=GaussianCorruptor(stdev=0.4)),
        termination_criterion =  EpochCounter(max_epochs=MAX_EPOCHS),
        # another option:
        # MonitorBasedTermCrit(prop_decrease=0.01, N=10),
        )
    model = layer
    callbacks = [MonitorBasedLRAdjuster(), ModelSaver()]
    return Train(model = model, algorithm = train_algo,
            callbacks = callbacks, dataset = trainset)
Example #5
0
 def setUp(self):
     self.batch_size = 10
     wordvec_size    = 100
     hidden_size     = 100
     self.time_size  = 5
     learning_rate   = 0.1
     self.max_epoch  = 100
     corpus, word_to_id, id_to_word = load_data('train')
     corpus_size = 1000
     corpus = corpus[:corpus_size]
     vocab_size = int(max(corpus) + 1)
     self.xs = corpus[:-1]
     self.ts = corpus[1:]
     model = SimpleRNNLM(vocab_size, wordvec_size, hidden_size)
     optimiser = SGD(learning_rate)
     self.rnnlm_trainer = RNNLMTrainer(model, optimiser)
def get_layer_trainer_sparse_denoising_autoencoder(layer, trainset):
    train_algo = SGD(
              learning_rate = 0.1,
              cost =  SampledMeanSquaredReconstructionError(),
              batch_size =  10,
              monitoring_batches = 10,
              monitoring_dataset = trainset,
              termination_criterion = EpochCounter(max_epochs=MAX_EPOCHS),
              update_callbacks = None
              )

    model = layer
    callbacks = [ModelSaver()]
    return Train(model = model,
            algorithm = train_algo,
            callbacks = callbacks,
            dataset = trainset)
Example #7
0
 def test_update(self):
     sgd = SGD()
     gnn = GraphNeuralNetwork(vector_size=2)
     expected = gnn.params
     sgd.update(gnn)
     actual = gnn.params
     self.assertEqual(expected, actual)
     params = copy.deepcopy(gnn.params)
     for _ in range(100):
         gnn.grads["W"] = np.random.rand()
         gnn.grads["A"] = np.random.rand()
         gnn.grads["b"] = np.random.rand()
         sgd.update(gnn)
         for key, param in params.items():
             params[key] = param - gnn.grads[key] * sgd.lr
             expected = repr(params[key])
             actual = repr(gnn.params[key])
             self.assertEqual(expected, actual)
Example #8
0
network = models.Sequential()
network.add(layers.Dense(512, activation='relu', input_shape=(28 * 28, )))
network.add(layers.Dense(10, activation='softmax'))

results_acc = []
result_acc = []
results_loss = []
result_loss = []
test_acc_results = []
test_loss_results = []
nonzero_weights = []

l = [
    GRDA(lr=.005, c=.02),
    SGD(lr=.005, nesterov=False),
    SGD(lr=.005, nesterov=True),
    Adagrad(lr=.005),
    Adam(lr=.005, amsgrad=False),
    Adam(lr=.005, amsgrad=True)
]
allcounts = np.sum([x.size for x in network.get_weights()])

for opt in l:
    network.compile(optimizer=opt,
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])
    #network.save_weights('initial_weights.h5')
    #network.load_weights('initial_weights.h5')
    #initial_weights = network.get_weights()
    result_acc = []
Example #9
0
eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                       args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), args.lr, momentum=0.9, nesterov=True)
#optimizer = Adagrad(model.parameters(), args.lr)
#optimizer = Adam(model.parameters(), betas=(0.9, 0.999))
#optimizer = RMSprop(model.parameters())

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)
from layers.tanh_layer import TanhLayer

np.random.seed(42)

# Load and clean data
x = None
y = None

# Split x and y into training and validation sets
x_train = None
y_train = None
x_val = None
y_val = None

# Create NN
nn = SGD()

nn.add_layer(InputLayer(10))
nn.add_layer(SigmoidLayer(10))
nn.add_layer(LinearLayer(10))
nn.add_layer(TanhLayer(10))
nn.add_layer(SoftmaxLoglossLayer(10))

nn.initialize_weights()

# Train network
nn.mini_batch_learning(x_train,
                       y_train,
                       x_val,
                       y_val,
                       n_epoch=100,
Example #11
0
        data = preprocess(args.dataset, makebinary=True)

    if data is None:
        print('Dataset unrecognized.')
        exit()

    X, y = data['train'] 
    Xs, ys = data['test']

    # Based on input, call classifiers
    if   args.classifier == 'perceptron':
        model = Perceptron(args.eta, args.iters)
    elif args.classifier == 'adaline':
        model = Adaline(args.eta, args.iters)
    elif args.classifier == 'sgd':
        model = SGD(args.eta, args.iters)
    elif args.classifier == 'ovr':
        model = OVR(data['classes'], args.eta, args.iters)

    model.fit(X, y)
    res = model.predict(Xs)

    matches = 0

    if args.classifier == 'ovr':
        accuracy = []
        res  = res[1]
        clas = res[0]
        print(res)
        print(np.where(ys == clas, 1, -1))
        for i in range(len(res)):
Example #12
0
for key, val in tparams.iteritems():
	print key
	# running means and running variances should not be included in the list for which gradients are computed
	if 'rm' in key or 'rv' in key:
		continue
	elif 'sg' in key:
		tparams_sg[key] = val
	else:
		tparams_net[key] = val

print "Setting up optimizers"
f_grad_shared, f_update = adam(lr, tparams_net, grads, inps_net, outs)
# f_grad_shared_sg, f_update_sg = adam(lr, tparams_sg, grads_sg, inps_sg, loss_sg)

# sgd with momentum updates
sgd = SGD(lr=args.learning_rate)
f_update_sg = theano.function(inps_sg, loss_sg, updates=sgd.get_grad_updates(loss_sg, param_sg), on_unused_input='ignore', profile=False)

print "Training"
cost_report = open('./Results/disc/SF/gradcomp_' + code_name + '_' + str(args.batch_size) + '_' + str(args.learning_rate) + '.txt', 'w')
id_order = range(len(trc))

iters = 0
min_cost = 100000.0
epoch = 0
condition = False

while condition == False:
	print "Epoch " + str(epoch + 1),
	np.random.shuffle(id_order)
	epoch_cost = 0.
Example #13
0
import sys
sys.path.append('./src')
sys.path.append('./src/concerns')
sys.path.append('./src/models')
sys.path.append('./src/layers')
sys.path.append('./src/optimisers')
from sgd import SGD
from trainer import Trainer
from spiral import *
from two_layer_net import TwoLayerNet

# Hyper Parameters
max_epoch     = 300
batch_size    = 30
hidden_size   = 10
learning_rate = 1.0

x, t = load_data()
model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
optimizer = SGD(lr=learning_rate)

trainer = Trainer(model, optimizer)
trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)
file_path = '../img/training_plot.png'
trainer.save_plot_image(file_path)
Example #14
0
#        out = self.fc1(x)
#        out = F.relu(out)
#        out = self.fc2(out)
#        return out

#net = Net(input_size, hidden_size, num_classes)
from lenet3 import LeNet
net = LeNet()
net.cuda()
net.train()
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
optimizer = SGD(net.parameters(),
                lr=learning_rate,
                weight_decay=0.0001,
                momentum=0.9,
                nesterov=True)
# Train the Model
for epoch in range(num_epochs):

    train_loss_log = AverageMeter()
    train_acc_log = AverageMeter()
    val_loss_log = AverageMeter()
    val_acc_log = AverageMeter()
    for i, (images, labels) in enumerate(train_loader):
        # Convert torch tensor to Variable
        #        if i>0:
        #            break
        #        images = Variable(images.view(-1, 28*28).cuda())
        images = Variable(images.cuda())
Example #15
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=4096,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--optimizer',
                        type=str,
                        default='lamb',
                        choices=['lamb', 'adam', 'lars', 'sgd'],
                        help='which optimizer to use')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=2048,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        metavar='LR',
                        help='learning rate (default: 0.0025)')
    parser.add_argument('--wd',
                        type=float,
                        default=0.01,
                        metavar='WD',
                        help='weight decay (default: 0.01)')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--eta',
                        type=int,
                        default=0.001,
                        metavar='e',
                        help='LARS coefficient (default: 0.001)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')

    args = parser.parse_args()
    use_cuda = torch.cuda.is_available()
    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")
    print(device)
    print(args)
    print("*" * 50)

    kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
        # datasets.MNIST('../data', train=True, download=True,
        datasets.EMNIST('../data',
                        train=True,
                        download=True,
                        split='letters',
                        transform=transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize((0.1307, ), (0.3081, ))
                        ])),
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)
    test_loader = torch.utils.data.DataLoader(
        # datasets.MNIST('../data', train=False, transform=transforms.Compose([
        datasets.EMNIST('../data',
                        train=False,
                        split='letters',
                        transform=transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize((0.1307, ), (0.3081, ))
                        ])),
        batch_size=args.test_batch_size,
        shuffle=True,
        **kwargs)

    num_features = 26
    model = Net(num_outputs=num_features).to(device)

    writer = SummaryWriter(comment="_cv_%s_%s_%s" %
                           (args.optimizer, args.batch_size, args.lr))
    weight_decay = args.lr / args.epochs
    print(len(train_loader), len(test_loader))
    print(model)
    print(f'total params ---> {count_parameters(model)}')
    if args.optimizer == 'lamb':
        optimizer = Lamb(model.parameters(),
                         lr=args.lr,
                         weight_decay=args.wd,
                         betas=(.9, .999),
                         adam=False,
                         writer=writer)
    elif args.optimizer == 'lars':
        base_optimizer = torch.optim.SGD(model.parameters(),
                                         lr=args.lr,
                                         momentum=0.9)
        optimizer = LARS(optimizer=base_optimizer,
                         eps=1e-8,
                         trust_coef=0.001,
                         writer=writer)
    elif args.optimizer == 'sgd':
        optimizer = SGD(model.parameters(),
                        lr=args.lr,
                        momentum=0.9,
                        weight_decay=args.wd,
                        writer=writer)
    else:
        # use adam optimizer
        optimizer = Lamb(model.parameters(),
                         lr=args.lr,
                         weight_decay=args.wd,
                         betas=(.9, .999),
                         adam=True,
                         writer=writer)
    print(f'Currently using the {args.optimizer}\n\n')

    metrics = {"acc": [], "test_loss": []}
    os.makedirs("cv_results", exist_ok=True)

    for epoch in range(1, args.epochs + 1):
        print("Epoch #%s" % epoch)
        train(args, model, device, train_loader, optimizer, epoch, writer)
        acc, loss = test(args, model, device, test_loader, writer, epoch)
        metrics["acc"].append(acc)
        metrics["test_loss"].append(loss)
        pickle.dump(
            metrics,
            open(
                os.path.join(
                    "cv_results",
                    '%s_%s_metrics.p' % (args.optimizer, args.batch_size)),
                'wb'))
    #print("Epoch #$s: acc=%s, loss=%s" % (epoch, acc, loss))
    return optimizer
Example #16
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data
    print('==> Preparing dataset %s' % args.dataset)
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = datasets.CIFAR10
        num_classes = 10
    else:
        dataloader = datasets.CIFAR100
        num_classes = 100

    trainset = dataloader(root='../data',
                          train=True,
                          download=True,
                          transform=transform_train)
    trainloader = data.DataLoader(trainset,
                                  batch_size=args.train_batch,
                                  shuffle=True,
                                  num_workers=args.workers)

    testset = dataloader(root='../data',
                         train=False,
                         download=False,
                         transform=transform_test)
    testloader = data.DataLoader(testset,
                                 batch_size=args.test_batch,
                                 shuffle=False,
                                 num_workers=args.workers)

    # Model
    print("==> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
            cardinality=args.cardinality,
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.startswith('densenet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            growthRate=args.growthRate,
            compressionRate=args.compressionRate,
            dropRate=args.drop,
        )
    elif args.arch.startswith('wrn'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
            widen_factor=args.widen_factor,
            dropRate=args.drop,
        )
    elif args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](
            num_classes=num_classes,
            depth=args.depth,
        )
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)

    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion = nn.CrossEntropyLoss()

    if args.opt_method == "sgd":
        print("using SGD")
        optimizer = SGD(model.parameters(),
                        lr=args.lr,
                        momentum=args.momentum,
                        weight_decay=args.weight_decay,
                        nesterov=False)
    elif args.opt_method == "adam":
        print("using Adam")
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)
    else:
        raise Exception("Optimizer not supported")

    # Resume
    title = 'cifar-10-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        args.checkpoint = os.path.dirname(args.resume)
        checkpoint = torch.load(args.resume)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(
            args.checkpoint,
            str(args.dataset) + '_' + str(args.arch) + '_mom_lr=' +
            str(args.lr) + '_m=' + str(args.momentum) + '_drop:' +
            str(args.drop) + '_seed=' + str(args.manualSeed) + '.txt'),
                        title=title)
        logger.set_names([
            'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
            'Valid Acc.'
        ])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_acc = test(testloader, model, criterion, start_epoch,
                                   use_cuda)
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return
    flag3 = 0
    # Train and val
    for epoch in range(start_epoch, args.epochs):

        if args.opt_method == "sgd":
            adjust_learning_rate(optimizer, epoch)

        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))

        train_loss, train_acc = train(trainloader, model, criterion, optimizer,
                                      epoch, use_cuda)
        test_loss, test_acc = test(testloader, model, criterion, epoch,
                                   use_cuda)
        if flag3 == 0:
            logger.append([0, train_init_loss, test_init_loss, 0, 0])
            flag3 = 1
        # append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)

    logger.close()

    print('Best acc:')
    print(best_acc)
Example #17
0
# Load and prepare data
date, latitude, longitude, magnitude = Dataset.load_from_file("database.csv")
data_size = len(date)
vectorsX, vectorsY = Dataset.vectorize(date, latitude,
                                       longitude), magnitude.reshape(
                                           (data_size, 1))

# Get Batcher
batch_gen = Generator.gen_random_batch(batch_size, vectorsX, vectorsY)

# randomly initialize our weights with mean 0
syn0 = 2 * np.random.standard_normal((vectorsX.shape[1], 32)) / 10
syn1 = 2 * np.random.standard_normal((32, vectorsY.shape[1])) / 10

# Init trainer table and datalog
trainers = [SGD(syn0, syn1), Momentum(syn0, syn1), Adma(syn0, syn1)]
datalog = []

# Train model
x = x = np.arange(1, max_epochs)
for t in x:
    # Get Batch
    batch = next(batch_gen)

    for trainer in trainers:
        syn0, syn1 = trainer.get_weight()

        # feed forward
        l0 = batch[0]
        l1 = Math.sigmoid(np.dot(l0, syn0))
        l2 = Math.relu(np.dot(l1, syn1))
Example #18
0
def main(local_rank, world_size, init_method='tcp://127.0.0.1:23499'):
    dist.init_process_group(backend='nccl',
                            init_method=init_method,
                            rank=local_rank,
                            world_size=world_size)
    cfg.local_rank = local_rank
    torch.cuda.set_device(local_rank)
    cfg.rank = dist.get_rank()
    cfg.world_size = world_size
    print(cfg.rank, dist.get_world_size())
    trainset = MXFaceDataset(root_dir='/root/face_datasets/webface/',
                             local_rank=local_rank)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        trainset, shuffle=True)
    trainloader = DataLoaderX(local_rank=local_rank,
                              dataset=trainset,
                              batch_size=cfg.batch_size,
                              sampler=train_sampler,
                              num_workers=0,
                              pin_memory=True,
                              drop_last=False)
    backbone = iresnet50(False).to(cfg.local_rank)
    backbone.train()
    # backbone = nn.SyncBatchNorm.convert_sync_batchnorm(backbone)
    for ps in backbone.parameters():
        dist.broadcast(ps, 0)

    backbone = torch.nn.parallel.DistributedDataParallel(
        backbone, broadcast_buffers=False, device_ids=[dist.get_rank()])
    backbone.train()
    sub_start, sub_classnum = get_sub_class(cfg.rank, dist.get_world_size())
    print(sub_start, sub_classnum)
    classifier_head = classifier(cfg.embedding_size,
                                 sub_classnum,
                                 sample_rate=0.4)
    cosface = CosFace(s=64.0, m=0.4)
    optimizer = SGD([{
        'params': backbone.parameters()
    }, {
        'params': classifier_head.parameters()
    }],
                    0.1,
                    momentum=0.9,
                    weight_decay=cfg.weight_decay,
                    rescale=cfg.world_size)
    warm_up_with_multistep_lr = lambda epoch: (
        (epoch + 1) / (4 + 1))**2 if epoch < -1 else 0.1**len(
            [m for m in [20, 29] if m - 1 <= epoch])
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lr_lambda=warm_up_with_multistep_lr)
    n_epochs = 33
    start_epoch = 0

    if cfg.local_rank == 0:
        writer = SummaryWriter(log_dir='logs/shows')
    global_step = 0
    loss_fun = nn.CrossEntropyLoss()
    for epoch in range(start_epoch, n_epochs):
        train_sampler.set_epoch(epoch)
        for step, (img, label) in enumerate(trainloader):
            start = time.time()
            lable_gather, norm_weight = classifier_head.prepare(
                label, optimizer)
            x = F.normalize(backbone(img))
            x_gather = torch.zeros(x.size()[0] * cfg.world_size,
                                   cfg.embedding_size,
                                   device=cfg.local_rank)
            dist.all_gather(list(x_gather.chunk(cfg.world_size, dim=0)),
                            x.data)
            x_gather.requires_grad = True

            logits = classifier_head(x_gather, norm_weight)

            logits = cosface(logits, lable_gather)

            with torch.no_grad():
                max_v = torch.max(logits, dim=1, keepdim=True)[0]
                dist.all_reduce(max_v, dist.ReduceOp.MAX)
                exp = torch.exp(logits - max_v)
                sum_exp = exp.sum(dim=1, keepdims=True)
                dist.all_reduce(sum_exp, dist.ReduceOp.SUM)
                exp.div_(sum_exp.clamp_min(1e-20))
                grad = exp
                index = torch.where(lable_gather != -1)[0]
                one_hot = torch.zeros(index.size()[0],
                                      grad.size()[1],
                                      device=grad.device)
                one_hot.scatter_(1, lable_gather[index, None], 1)

                loss = torch.zeros(grad.size()[0], 1, device=grad.device)
                loss[index] = grad[index].gather(1, lable_gather[index, None])
                dist.all_reduce(loss, dist.ReduceOp.SUM)
                loss_v = loss.clamp_min_(1e-20).log_().mean() * (-1)

                grad[index] -= one_hot
                grad.div_(grad.size()[0])

            logits.backward(grad)
            if x_gather.grad is not None:
                x_gather.grad.detach_()
            x_grad = torch.zeros_like(x)
            dist.reduce_scatter(
                x_grad, list(x_gather.grad.chunk(cfg.world_size, dim=0)))
            x.backward(x_grad)
            optimizer.step()
            classifier_head.update()
            optimizer.zero_grad()
            if cfg.rank == 0:
                print(x_gather.grad.max(), x_gather.grad.min())
                print('loss_v', loss_v.item(), global_step)
                writer.add_scalar('loss', loss_v, global_step)
                print('lr',
                      optimizer.state_dict()['param_groups'][0]['lr'],
                      global_step)
                print(cfg.batch_size / (time.time() - start))

            global_step += 1
        scheduler.step()
        if cfg.rank == 0:
            torch.save(backbone.module.state_dict(),
                       "models/" + str(epoch) + 'backbone.pth')
    dist.destroy_process_group()
Example #19
0
                           args.layers,
                           NUM_DIRECTIONS,
                           args.dropout,
                           device).to(device)
    model = torch.nn.DataParallel(model)

    writer = SummaryWriter(comment="_nlp_%s_%s_%s" % (args.optimizer, args.batch_size, args.learning_rate))
    weight_decay = args.learning_rate / args.epochs
    if args.optimizer == 'lamb':
        optimizer = Lamb(model.parameters(), lr=args.learning_rate, weight_decay=weight_decay,
                         betas=(.9, .999), adam=False, writer=writer)
    elif args.optimizer == 'lars':
        base_optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=weight_decay)
        optimizer = LARS(optimizer=base_optimizer, eps=1e-8, trust_coef=0.001, writer=writer)
    elif args.optimizer == 'sgd':
        optimizer = SGD(model.parameters(), lr=args.learning_rate, momentum=0.9,
                        weight_decay=weight_decay, writer=writer)
    else:
        # use adam optimizer
        optimizer = Lamb(model.parameters(), lr=args.learning_rate, weight_decay=weight_decay,
                         betas=(.9, .999), adam=True, writer=writer)
    print(f'The model has {count_parameters(model):,} trainable parameters')
    ckpt_dir_name = "%s_%s_%s" % (args.working_dir, args.optimizer, args.batch_size)
    model, optimizer = load_pretrained_model(model, optimizer,
                                             "%s/ckpt/%s" % (ckpt_dir_name, "best_weights.pt"))

    print(args)
    ckpt_dir = os.path.join(ckpt_dir_name, 'ckpt')
    os.makedirs(ckpt_dir, exist_ok=True)

    try:
        metrics = pickle.load(open(os.path.join(ckpt_dir, 'metrics.p'), 'rb'))
Example #20
0
 def __init__(self, classes, eta=0.01, iters=10, random_state=1):
     self.classifiers = {c: SGD(eta, iters, random_state) for c in classes}
     self.classes = classes
Example #21
0
def main(local_rank):
    dist.init_process_group(backend='nccl', init_method='env://')
    cfg.local_rank = local_rank
    torch.cuda.set_device(local_rank)
    cfg.rank = dist.get_rank()
    cfg.world_size = dist.get_world_size()
    trainset = MXFaceDataset(root_dir=cfg.rec, local_rank=local_rank)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        trainset, shuffle=True)
    train_loader = DataLoaderX(local_rank=local_rank,
                               dataset=trainset,
                               batch_size=cfg.batch_size,
                               sampler=train_sampler,
                               num_workers=0,
                               pin_memory=True,
                               drop_last=False)

    backbone = backbones.iresnet100(False).to(local_rank)
    backbone.train()

    # Broadcast init parameters
    for ps in backbone.parameters():
        dist.broadcast(ps, 0)

    # DDP
    backbone = torch.nn.parallel.DistributedDataParallel(
        module=backbone, broadcast_buffers=False, device_ids=[cfg.local_rank])
    backbone.train()

    # Memory classifer
    dist_sample_classifer = DistSampleClassifier(rank=dist.get_rank(),
                                                 local_rank=local_rank,
                                                 world_size=cfg.world_size)

    # Margin softmax
    margin_softmax = MarginSoftmax(s=64.0, m=0.4)

    # Optimizer for backbone and classifer
    optimizer = SGD([{
        'params': backbone.parameters()
    }, {
        'params': dist_sample_classifer.parameters()
    }],
                    lr=cfg.lr,
                    momentum=0.9,
                    weight_decay=cfg.weight_decay,
                    rescale=cfg.world_size)

    # Lr scheduler
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer,
                                                  lr_lambda=cfg.lr_func)
    n_epochs = cfg.num_epoch
    start_epoch = 0

    if local_rank == 0:
        writer = SummaryWriter(log_dir='logs/shows')

    #
    total_step = int(
        len(trainset) / cfg.batch_size / dist.get_world_size() * cfg.num_epoch)
    if dist.get_rank() == 0:
        print("Total Step is: %d" % total_step)

    losses = AverageMeter()
    global_step = 0
    train_start = time.time()
    for epoch in range(start_epoch, n_epochs):
        train_sampler.set_epoch(epoch)
        for step, (img, label) in enumerate(train_loader):
            total_label, norm_weight = dist_sample_classifer.prepare(
                label, optimizer)
            features = F.normalize(backbone(img))

            # Features all-gather
            total_features = torch.zeros(features.size()[0] * cfg.world_size,
                                         cfg.embedding_size,
                                         device=local_rank)
            dist.all_gather(list(total_features.chunk(cfg.world_size, dim=0)),
                            features.data)
            total_features.requires_grad = True

            # Calculate logits
            logits = dist_sample_classifer(total_features, norm_weight)
            logits = margin_softmax(logits, total_label)

            with torch.no_grad():
                max_fc = torch.max(logits, dim=1, keepdim=True)[0]
                dist.all_reduce(max_fc, dist.ReduceOp.MAX)

                # Calculate exp(logits) and all-reduce
                logits_exp = torch.exp(logits - max_fc)
                logits_sum_exp = logits_exp.sum(dim=1, keepdims=True)
                dist.all_reduce(logits_sum_exp, dist.ReduceOp.SUM)

                # Calculate prob
                logits_exp.div_(logits_sum_exp)

                # Get one-hot
                grad = logits_exp
                index = torch.where(total_label != -1)[0]
                one_hot = torch.zeros(index.size()[0],
                                      grad.size()[1],
                                      device=grad.device)
                one_hot.scatter_(1, total_label[index, None], 1)

                # Calculate loss
                loss = torch.zeros(grad.size()[0], 1, device=grad.device)
                loss[index] = grad[index].gather(1, total_label[index, None])
                dist.all_reduce(loss, dist.ReduceOp.SUM)
                loss_v = loss.clamp_min_(1e-30).log_().mean() * (-1)

                # Calculate grad
                grad[index] -= one_hot
                grad.div_(features.size()[0])

            logits.backward(grad)
            if total_features.grad is not None:
                total_features.grad.detach_()
            x_grad = torch.zeros_like(features)

            # Feature gradient all-reduce
            dist.reduce_scatter(
                x_grad, list(total_features.grad.chunk(cfg.world_size, dim=0)))
            x_grad.mul_(cfg.world_size)
            # Backward backbone
            features.backward(x_grad)
            optimizer.step()

            # Update classifer
            dist_sample_classifer.update()
            optimizer.zero_grad()
            losses.update(loss_v, 1)
            if cfg.local_rank == 0 and step % 50 == 0:
                time_now = (time.time() - train_start) / 3600
                time_total = time_now / ((global_step + 1) / total_step)
                time_for_end = time_total - time_now
                writer.add_scalar('time_for_end', time_for_end, global_step)
                writer.add_scalar('loss', loss_v, global_step)
                print(
                    "Speed %d samples/sec   Loss %.4f   Epoch: %d   Global Step: %d   Required: %1.f hours"
                    % ((cfg.batch_size * global_step /
                        (time.time() - train_start) * cfg.world_size),
                       losses.avg, epoch, global_step, time_for_end))
                losses.reset()

            global_step += 1
        scheduler.step()
        if dist.get_rank() == 0:
            import os
            if not os.path.exists(cfg.output):
                os.makedirs(cfg.output)
            torch.save(backbone.module.state_dict(),
                       os.path.join(cfg.output,
                                    str(epoch) + 'backbone.pth'))
    dist.destroy_process_group()
Example #22
0
# -*- coding: utf-8 -*-
#!/bin/python3.5

"""
The run.py file produces our final submission into a "submission.csv"
in the data folder
"""

from als import ALS
from sgd import SGD
from helpers import create_csv_submission, load_data

if __name__ == '__main__':
  # Initializing dataset
  print("Loading dataset")
  path_dataset = "data/data_train.csv"
  ratings = load_data(path_dataset)

  # Creating the sub_file with the best prediction
  # prediction, test_rmse = ALS(ratings, None, 3, 0.2, 0.9)
  prediction, test_rmse = SGD(ratings, None, 0.04, 9, 0.1, 0.016)
  create_csv_submission(prediction)
  print("Submission created at data/submission.csv")
Example #23
0
import time

from sgd import SGD
from zfin import ZFin
from worm import WormBase
from fly import FlyBase
from mouse import MGI
from rat import RGD

from mod import MOD

sgd = SGD()
zfin = ZFin()
worm = WormBase()
fly = FlyBase()
mouse = MGI()
rat = RGD()

mod = MOD()

mods = [mouse, zfin, sgd, worm, fly, rat]

for m in mods:
    start_time = time.time()
    m.load_genes()
    print(" --- %s seconds --- " % (time.time() - start_time))

mod.load_homologs()

for m in mods:
    start_time = time.time()
Example #24
0
def online_learning_example(it):

    # Initialize weights to random value
    weight_gt = [2, -3]
    weight_final = np.random.randint(-10, 10, size=2)
    # weight_final = np.array([0, 2])

    ground_truth_scm = StructuralCausalModel({
        "init_x":
        lambda n_samples: np.random.normal(loc=8., scale=2.0, size=n_samples),
        "init_y":
        lambda n_samples: np.random.normal(loc=8., scale=2.0, size=n_samples),
        "push_x":
        lambda n_samples: np.random.normal(loc=0., scale=1.0, size=n_samples),
        "push_y":
        lambda n_samples: np.random.normal(loc=0., scale=1.0, size=n_samples),
        "final_x":
        linear_model(["init_x", "push_x"], weight_gt, noise_scale=.1),
        "final_y":
        linear_model(["init_y", "push_y"], weight_gt, noise_scale=.1),
    })

    df_gt = ground_truth_scm.sample(n_samples=it)
    sgd = SGD(.001, 1, init_weights=weight_final)
    for i in range(it):
        gt_init_x = [df_gt.init_x[i]]
        gt_init_y = [df_gt.init_y[i]]
        gt_push_x = [df_gt.push_x[i]]
        gt_push_y = [df_gt.push_y[i]]
        gt_final_x = [df_gt.final_x[i]]
        gt_final_y = [df_gt.final_y[i]]

        intervention_vars = {
            "init_x": gt_init_x,
            "init_y": gt_init_y,
            "push_x": gt_push_x,
            "push_y": gt_push_y
        }
        pred_scm = StructuralCausalModel({
            "init_x":
            lambda n_samples: np.random.normal(
                loc=8., scale=2.0, size=n_samples),
            "init_y":
            lambda n_samples: np.random.normal(
                loc=8., scale=2.0, size=n_samples),
            "push_x":
            lambda n_samples: np.random.normal(
                loc=0., scale=3.0, size=n_samples),
            "push_y":
            lambda n_samples: np.random.normal(
                loc=0., scale=3.0, size=n_samples),
            "final_x":
            linear_model(["init_x", "push_x"], weight_final, noise_scale=.1),
            "final_y":
            linear_model(["init_y", "push_y"], weight_final, noise_scale=.1),
        })

        pred_scm_do = do_multiple(list(intervention_vars), pred_scm)
        df_pred = pred_scm_do.sample(n_samples=1, set_values=intervention_vars)

        pred_final_x = df_pred.final_x
        pred_final_y = df_pred.final_y

        plt.plot(df_gt.init_x[i], df_gt.init_y[i], 'bo')
        text = "True_weights: {}\n Predicted weights {}".format(
            weight_gt, weight_final)
        plt.text(df_gt.init_x[i] * (1 + 0.01),
                 df_gt.init_y[i] * (1 + 0.01),
                 text,
                 fontsize=12)
        # plt.plot(df_gt.final_x, df_gt.final_y, 'go')
        plt.quiver(gt_init_x, gt_init_y, gt_final_x, gt_final_y, color="b")
        plt.quiver(gt_init_x, gt_init_y, pred_final_x, pred_final_y, color="r")

        weight_final, rmse_x = sgd.fit(gt_final_x, pred_final_x,
                                       [gt_init_x, gt_push_x])
        # weight_final_y, rmse_y = sgd.fit(gt_final_y, gt_final_y)

        plt.pause(1.)
        plt.clf()
    plt.show()
Example #25
0
def main(local_rank):
    cfg.local_rank = local_rank
    # cfg.rank = dist.get_rank()
    # cfg.world_size = dist.get_world_size()

    backbone = backbones.iresnet50(False)

    weights = torch.load("pytorch/partial_fc_glint360k_r50/16backbone.pth",
                         map_location=torch.device('cpu'))
    backbone.load_state_dict(weights)
    backbone = backbone.float()
    backbone = backbone.eval()

    # embedding 512

    img1 = cv2.imread('boy_1.jpg')
    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
    img1 = image_preprocessing(img1)
    img1 = np.ones([112, 112, 3], dtype=np.float32)
    img1 = img1.transpose([2, 0, 1])
    img1 = np.expand_dims(img1, axis=0)
    img1 = torch.from_numpy(img1).float()
    img1 = torch.autograd.Variable(img1, requires_grad=False).to('cpu')

    img2 = cv2.imread('man_2.jpg')
    img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
    img2 = image_preprocessing(img2)
    img2 = img2.transpose([2, 0, 1])
    img2 = np.expand_dims(img2, axis=0)
    img2 = torch.from_numpy(img2).float()
    img2 = torch.autograd.Variable(img2, requires_grad=False).to('cpu')
    with torch.no_grad():
        v1 = backbone.forward(img1)
        v2 = backbone.forward(img2)

    v1 = np.asarray(v1)
    import pickle
    pickle.dump(v1, open("sample.pkl", "wb"))
    print(v1)

    result = cosine_dist(v1, v2)
    print(result)

    exit(0)
    # Broadcast init parameters
    for ps in backbone.parameters():
        dist.broadcast(ps, 0)

    # DDP
    backbone = torch.nn.parallel.DistributedDataParallel(
        module=backbone, broadcast_buffers=False, device_ids=[cfg.local_rank])
    backbone.train()

    # Memory classifer
    dist_sample_classifer = DistSampleClassifier(rank=dist.get_rank(),
                                                 local_rank=local_rank,
                                                 world_size=cfg.world_size)

    # Margin softmax
    margin_softmax = MarginSoftmax(s=64.0, m=0.4)

    # Optimizer for backbone and classifer
    optimizer = SGD([{
        'params': backbone.parameters()
    }, {
        'params': dist_sample_classifer.parameters()
    }],
                    lr=cfg.lr,
                    momentum=0.9,
                    weight_decay=cfg.weight_decay,
                    rescale=cfg.world_size)

    # Lr scheduler
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer=optimizer,
                                                  lr_lambda=cfg.lr_func)
    n_epochs = cfg.num_epoch
    start_epoch = 0

    if local_rank == 0:
        writer = SummaryWriter(log_dir='logs/shows')

    #
    total_step = int(
        len(trainset) / cfg.batch_size / dist.get_world_size() * cfg.num_epoch)
    if dist.get_rank() == 0:
        print("Total Step is: %d" % total_step)

    losses = AverageMeter()
    global_step = 0
    train_start = time.time()
    for epoch in range(start_epoch, n_epochs):
        train_sampler.set_epoch(epoch)
        for step, (img, label) in enumerate(train_loader):
            total_label, norm_weight = dist_sample_classifer.prepare(
                label, optimizer)
            features = F.normalize(backbone(img))

            # Features all-gather
            total_features = torch.zeros(features.size()[0] * cfg.world_size,
                                         cfg.embedding_size,
                                         device=local_rank)
            dist.all_gather(list(total_features.chunk(cfg.world_size, dim=0)),
                            features.data)
            total_features.requires_grad = True

            # Calculate logits
            logits = dist_sample_classifer(total_features, norm_weight)
            logits = margin_softmax(logits, total_label)

            with torch.no_grad():
                max_fc = torch.max(logits, dim=1, keepdim=True)[0]
                dist.all_reduce(max_fc, dist.ReduceOp.MAX)

                # Calculate exp(logits) and all-reduce
                logits_exp = torch.exp(logits - max_fc)
                logits_sum_exp = logits_exp.sum(dim=1, keepdims=True)
                dist.all_reduce(logits_sum_exp, dist.ReduceOp.SUM)

                # Calculate prob
                logits_exp.div_(logits_sum_exp)

                # Get one-hot
                grad = logits_exp
                index = torch.where(total_label != -1)[0]
                one_hot = torch.zeros(index.size()[0],
                                      grad.size()[1],
                                      device=grad.device)
                one_hot.scatter_(1, total_label[index, None], 1)

                # Calculate loss
                loss = torch.zeros(grad.size()[0], 1, device=grad.device)
                loss[index] = grad[index].gather(1, total_label[index, None])
                dist.all_reduce(loss, dist.ReduceOp.SUM)
                loss_v = loss.clamp_min_(1e-30).log_().mean() * (-1)

                # Calculate grad
                grad[index] -= one_hot
                grad.div_(features.size()[0])

            logits.backward(grad)
            if total_features.grad is not None:
                total_features.grad.detach_()
            x_grad = torch.zeros_like(features)

            # Feature gradient all-reduce
            dist.reduce_scatter(
                x_grad, list(total_features.grad.chunk(cfg.world_size, dim=0)))
            x_grad.mul_(cfg.world_size)
            # Backward backbone
            features.backward(x_grad)
            optimizer.step()

            # Update classifer
            dist_sample_classifer.update()
            optimizer.zero_grad()
            losses.update(loss_v, 1)
            if cfg.local_rank == 0 and step % 50 == 0:
                time_now = (time.time() - train_start) / 3600
                time_total = time_now / ((global_step + 1) / total_step)
                time_for_end = time_total - time_now
                writer.add_scalar('time_for_end', time_for_end, global_step)
                writer.add_scalar('loss', loss_v, global_step)
                print(
                    "Speed %d samples/sec   Loss %.4f   Epoch: %d   Global Step: %d   Required: %1.f hours"
                    % ((cfg.batch_size * global_step /
                        (time.time() - train_start) * cfg.world_size),
                       losses.avg, epoch, global_step, time_for_end))
                losses.reset()

            global_step += 1
        scheduler.step()
        if dist.get_rank() == 0:
            import os
            if not os.path.exists(cfg.output):
                os.makedirs(cfg.output)
            torch.save(backbone.module.state_dict(),
                       os.path.join(cfg.output,
                                    str(epoch) + 'backbone.pth'))
    dist.destroy_process_group()
hidden_size = 100
time_size = 35
learning_rate = 20.0
max_epoch = 4
max_grad = 0.25

# Load trainig data
corpus, word_to_id, id_to_word = load_data('train')
corpus_test, *_ = load_data('test')
vocab_size = len(word_to_id)
xs = corpus[:-1]
ts = corpus[1:]

# Generate a model, optimiser and trainer
model = RNNLM(vocab_size, wordvec_size, hidden_size)
optimiser = SGD(learning_rate)
trainer = RNNLMTrainer(model, optimiser)

# 1. Train applying gradients clipping
training_process = trainer.fit(xs,
                               ts,
                               max_epoch,
                               batch_size,
                               time_size,
                               max_grad,
                               eval_interval=20)
for iter in training_process:
    print(iter)
file_path = '../img/train_rnnlm.png'
tainer.save_plot_image(file_path, ylim=(0, 500))
Example #27
0
def main():
    print('=========================================')
    print('               Numpy DNN                 ')
    print('              26/Nov/2017                ')
    print('    By Thang Vu ([email protected])    ')
    print('=========================================')

    # load datasets
    path = 'data/mnist.pkl.gz'
    train_set, val_set, test_set = load_mnist_datasets(path)
    batch_size = 128
    X_train, y_train = train_set
    X_val, y_val = val_set
    X_test, y_test = test_set

    # bookeeping for best model based on validation set
    best_val_acc = -1
    best_model = None

    # create model and optimization method
    dnn = DNN()
    sgd = SGD(lr=0.1, lr_decay=0.1, weight_decay=1e-3, momentum=0.9)
    
    # Train 
    batch_size = 128
    for epoch in range(20):
        dnn.train_mode() # set model to train mode (because of dropout)
        
        num_train = X_train.shape[0]
        num_batch = num_train//batch_size
        for batch in range(num_batch):
            # get batch data
            batch_mask = np.random.choice(num_train, batch_size)
            X_batch = X_train[batch_mask]
            y_batch = y_train[batch_mask]
           
            # forward
            output = dnn.forward(X_batch)
            loss, dout = softmax_cross_entropy_loss(output, y_batch)
            if batch%100 == 0:
                print("Epoch %2d Iter %3d Loss %.5f" %(epoch, batch, loss))

            # backward and update
            grads = dnn.backward(dout)
            sgd.step(dnn.params, grads)
                                
        sgd.decay_learning_rate() # decay learning rate after one epoch
        dnn.eval_mode() # set model to eval mode 
        train_acc = check_acc(dnn, X_train, y_train)
        val_acc = check_acc(dnn, X_val, y_val)

        if(best_val_acc < val_acc):
            best_val_acc = val_acc
            best_model = dnn

        # store best model based n acc_val
        print('Epoch finish. ')
        print('Train acc %.3f' %train_acc)
        print('Val acc %.3f' %val_acc)
        print('-'*30)
        print('')

    print('Train finished. Best acc %.3f' %best_val_acc)
    test_acc = check_acc(best_model, X_test, y_test)
    print('Test acc %.3f' %test_acc)
Example #28
0
plot_model(model, to_file='model_imdb.png', show_shapes=True)

results_acc = []
result_acc = []
results_loss = []
result_loss = []
test_acc_results = []
test_loss_results = []
l = [
    Adam(lr=0.001, amsgrad=True),
    AAdam(lr=0.001, amsgrad=True),
    Adam(lr=0.001, amsgrad=False),
    AAdam(lr=0.001, amsgrad=False),
    Adagrad(),
    AAdagrad(),
    SGD(),
    ASGD()
]  #, Adam(lr=0.001, amsgrad = True), AAdam(lr=0.001, amsgrad = True)]

for opt in l:

    model.compile(loss='binary_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    #model.save_weights('initial_weights_imdb.h5')
    model.load_weights('initial_weights_imdb.h5')
    initial_weights = model.get_weights()
    result_acc = []
    result_loss = []
    test_loss = []
    test_acc = []
Example #29
0
 def setUp(self):
     model = TwoLayerNet(input_size=2, hidden_size=10, output_size=3)
     optimizer = SGD(lr=1.0)
     self.trainer = Trainer(model, optimizer)
     self.x, self.t = load_data()
     self.data_size = len(self.x)