Esempio n. 1
0
    def test_calc_delta(self):
        l1 = SoftMaxLayer()
        n = Sequential([l1])
        x = np.array([15.0, 10.0, 2.0])
        y = n.forward(x)
        self.assertEqual(y.shape, (3, ))
        nll = NegativeLogLikelihoodLoss()
        t = np.array([0.0, 0.0, 1.0])
        self.assertEqual(y.shape, t.shape)
        J1 = nll.loss(y, t)
        self.assertEqual(J1.shape, (3, ))
        assert_almost_equal(J1, [0.0, 0.0, 13.0067176], decimal=5)

        cel = CrossEntropyLoss()
        t = np.array([0.0, 0.0, 1.0])
        J2 = cel.loss(x, t)
        self.assertEqual(J2.shape, (3, ))
        assert_almost_equal(J2, [0.0, 0.0, 13.0067176], decimal=5)

        delta_in = -nll.dJdy_gradient(y, t)
        assert_almost_equal(delta_in, [0.0, 0.0, 445395.349996])
        delta_out1 = n.backward(delta_in)
        assert_almost_equal(delta_out1, [-0.9933049, -0.0066928, 0.9999978],
                            decimal=5)
        #

        delta_out2 = -cel.dJdy_gradient(x, t)
        assert_almost_equal(delta_out2, [-0.9933049, -0.0066928, 0.9999978],
                            decimal=5)
Esempio n. 2
0
    def fit(self, X, Y):
        if self.loss == "mse":
            loss = MSELoss()
        elif self.loss == "crossentropy":
            loss = CrossEntropyLoss()

        # convert Y to one-hot encoding
        if self.classifier:
            Y = to_one_hot(Y.flatten())
        else:
            # if the shape of Y is like (N,), then we need to convert it to be (N,1)
            Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y

        N, M = X.shape
        # out_dims is the number of classes in the outcome. 1 for continuous Y
        self.out_dims = Y.shape[1]
        # Record all the learners (all the trees)
        self.learners = np.empty((self.n_iter, self.out_dims), dtype=object)
        # weights for each prediction (since we don't do the linear search step, we just put the learning_rate here)
        # The very first iteration has weights equals to 1 (so we don't multiply self.learning_rate)
        self.weights = np.ones((self.n_iter, self.out_dims))
        self.weights[1:, :] *= self.learning_rate

        # Prediction values, N samples, and each samples has self.out_dims dimensions
        Y_pred = np.zeros((N, self.out_dims))
        # Very first iteration, use mean to predict
        for k in range(self.out_dims):
            t = loss.base_estimator()
            t.fit(X, Y[:, k])
            Y_pred[:, k] = t.predict(X)
            self.learners[0, k] = t

        # Incrementally fit each learner on the negative gradient of the loss
        # wrt the previous fit (pseudo-residuals)
        for i in range(1, self.n_iter):
            for k in range(self.out_dims):
                y, y_pred = Y[:, k], Y_pred[:, k]
                neg_grad = -1 * loss.grad(y, y_pred)
                # use MSE as the surrogate loss when fitting to negative gradients
                t = DecisionTree(classifier=False,
                                 max_depth=self.max_depth,
                                 criterion="mse")
                # fit X to negative gradients of the current loss function
                t.fit(X, neg_grad)
                self.learners[i, k] = t

                # compute step size and weight for the current learner
                step = 1.0
                h_pred = t.predict(X)

                # We ignore the linear search step

                # update weights and our overall prediction for Y
                self.weights[i, k] *= step
                Y_pred[:, k] += self.weights[i, k] * h_pred
Esempio n. 3
0
    def test_calc_loss(self):
        l1 = SoftMaxLayer()
        n = Sequential([l1])
        x = np.array([15.0, 10.0, 2.0])

        y = n.forward(x)
        self.assertEqual(y.shape, (3, ))
        nll = NegativeLogLikelihoodLoss()
        t = np.array([0.0, 0.0, 1.0])
        self.assertEqual(y.shape, t.shape)
        J1 = nll.loss(y, t)
        self.assertEqual(J1.shape, (3, ))
        assert_almost_equal(J1, [0.0, 0.0, 13.0067176], decimal=5)

        cel = CrossEntropyLoss()
        t = np.array([0.0, 0.0, 1.0])
        J2 = cel.loss(x, t)
        self.assertEqual(J2.shape, (3, ))
        assert_almost_equal(J2, [0.0, 0.0, 13.0067176], decimal=5)

        assert_almost_equal(J1, J2)
Esempio n. 4
0
    def fit(self, X, Y):
        """
        Fit the gradient boosted decision trees on a dataset
        :param X:
        :param Y:
        :return:
        """
        if self.loss == "mse":
            loss = MSELoss()
        elif self.loss == "crossentropy":
            loss = CrossEntropyLoss()

        if self.classifier:
            Y = to_one_hot(Y.flatten())
        else:
            Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y

        N, M = X.shape
        self.out_dims = Y.shape[1]
        self.learners = np.empty((self.n_iter, self.out_dims), dtype=object)
        self.weights = np.ones((self.n_iter, self.out_dims))
        self.weights[1:, :] = self.learning_rate

        # fit the base estimator
        Y_pred = np.zeros((N, self.out_dims))
        for k in range(self.out_dims):
            t = loss.base_estimator()
            t.fit(X, Y[:, k])
            Y_pred[:, k] += t.predict(X)
            self.learners[0, k] = t

        # incrementally fit each learner on the negative gradient of the loss
        for i in range(1, self.n_iter):
            for k in range(self.out_dims):
                y, y_pred = Y[:, k], Y_pred[:, k]
                neg_grad = -1 * loss.grad(y, y_pred)

                t = DecisionTree(classifier=False,
                                 max_depth=self.max_depth,
                                 criterion="mse")

                t.fit(X, neg_grad)
                self.learners[i, k] = t

                step = 1.0
                h_pred = t.predict(X)

                if self.step_size == "adaptive":
                    step = loss.line_search(y, y_pred, h_pred)

                self.weights[i, k] *= step
                Y_pred[:, k] += self.weights[i, k] * h_pred
Esempio n. 5
0
    def fit(self, X, Y):
        if self.loss == "mse":
            loss = MSELoss()
        elif self.loss == "crossentropy":
            loss = CrossEntropyLoss()

        # convert Y to one_hot if not already
        if self.classifier:
            Y = to_one_hot(Y.flatten())
        else:
            Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y

        N, M = X.shape
        self.out_dims = Y.shape[1]
        self.learners = np.empty((self.n_iter, self.out_dims), dtype=object)
        self.weights = np.ones((self.n_iter, self.out_dims))
        self.weights[1:, :] *= self.learning_rate

        # fit the base estimator
        Y_pred = np.zeros((N, self.out_dims))
        for k in range(self.out_dims):
            t = loss.base_estimator()
            t.fit(X, Y[:, k])
            Y_pred[:, k] += t.predict(X)
            self.learners[0, k] = t

        # incrementally fit each learner on the negative gradient of the loss
        # wrt the previous fit (pseudo-residuals)
        for i in range(1, self.n_iter):
            for k in range(self.out_dims):
                y, y_pred = Y[:, k], Y_pred[:, k]
                neg_grad = -1 * loss.grad(y, y_pred)

                # use MSE as the surrogate loss when fitting to negative gradients
                t = DecisionTree(classifier=False,
                                 max_depth=self.max_depth,
                                 criterion="mse")

                # fit current learner to negative gradients
                t.fit(X, neg_grad)
                self.learners[i, k] = t

                # compute step size and weight for the current learner
                step = 1.0
                h_pred = t.predict(X)
                if self.step_size == "adaptive":
                    step = loss.line_search(y, y_pred, h_pred)

                # update weights and our overall prediction for Y
                self.weights[i, k] *= step
                Y_pred[:, k] += self.weights[i, k] * h_pred
from losses import CrossEntropyLoss
from optimizers import SGD

with open('data/shakespear.txt', 'r') as f:
    raw = f.read()

vocab = list(set(raw))
word2index = {}
for i, word in enumerate(vocab):
    word2index[word] = i
indices = np.array(list(map(lambda x: word2index[x], raw)))

embed = Embedding(vocab_size=len(vocab), dim=512)
model = RNNCell(n_inputs=512, n_hidden=512, n_output=len(vocab))

criterion = CrossEntropyLoss()
optim = SGD(parameters=model.get_parameters() + embed.get_parameters(),
            alpha=0.01)

batch_size = 32
bptt = 16
n_batches = int((indices.shape[0] / batch_size))
trimmed_indices = indices[:n_batches * batch_size]
# batch_indices: each column represents a sub-sequence from indices -> continuous
batched_indices = trimmed_indices.reshape(batch_size, n_batches)
batched_indices = batched_indices.transpose()

input_batched_indices = batched_indices[:-1]
target_batched_indices = batched_indices[1:]

n_bptt = int((n_batches - 1) / bptt)
    def test_all(self):
        n, p, epoch = 0, 0, -1
        mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(
            Whh), np.zeros_like(Why)
        mbh, mby = np.zeros_like(bh), np.zeros_like(
            by)  # memory variables for Adagrad
        smooth_loss = -np.log(
            1.0 / vocab_size) * seq_length  # loss at iteration 0
        while n <= 400:
            print(n, p, epoch)
            # prepare inputs (we're sweeping from left to right in steps seq_length long)
            if p + seq_length + 1 > len(data) or n == 0:
                van.clear_memory()
                vantr.clear_memory()
                hprev = np.zeros((hidden_size, 1))  # reset RNN memory
                p = 0  # go from start of data
                epoch += 1
            # print (n,p,epoch)
            inputs = [char_to_ix[ch] for ch in data[p:p + seq_length]]
            targets = [char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]]
            if epoch == epochs:
                trainer2.learn_throughtime(
                    vantr2,
                    zip(to_hot_vect(inputs_all, vocab_size),
                        to_hot_vect(targets_all, vocab_size)),
                    CrossEntropyLoss(),
                    AdaGrad(learning_rate=learning_rate, clip=5), epochs)
                assert_array_equal(
                    vantr2.statenet[0].net.elements[0].elements[0].elements[1].
                    W.get(), Wxh)
                assert_array_equal(
                    vantr2.statenet[0].net.elements[0].elements[1].elements[1].
                    W.get(), Whh)
                assert_array_equal(
                    vantr2.statenet[0].net.elements[0].elements[2].W.get(),
                    bh.T[0])
                assert_array_equal(
                    vantr2.outputnet[0].net.elements[0].elements[1].W.get(),
                    Why)
                assert_array_equal(vantr2.outputnet[0].net.elements[1].W.get(),
                                   by.T[0])

                txtvan = ''
                x = to_one_hot_vect(inputs[0], vocab_size)
                for i in range(200):
                    y = soft.forward(vantr2.forward(x))
                    txtvan += ix_to_char[np.argmax(
                        y)]  #np.random.choice(range(vocab_size), p=y.ravel())]
                    x = to_one_hot_vect(np.argmax(y), vocab_size)
                vantr2.clear_memory()

                sample_ix = sample(hprev, inputs[0], 200)
                txt = ''.join(ix_to_char[ix] for ix in sample_ix)
                print '----\n %s \n %s \n----' % (txt, txtvan)

                epoch = 0

            # sample from the model now and then
            # if n % epochs == 0:
            #   sample_ix = sample(hprev, inputs[0], 200)
            #   txt = ''.join(ix_to_char[ix] for ix in sample_ix)
            #   print '----\n %s \n %s ----' % (txt,txtvan )

            # forward seq_length characters through the net and fetch gradient
            loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(
                inputs, targets, hprev)

            smooth_loss = smooth_loss * 0.999 + loss * 0.001
            if n % epochs == 0:
                print 'iter %d, loss: %f' % (n, smooth_loss)  # print progress
            # print 'iter %d, loss: %f' % (n, smooth_loss) # print progress

            # perform parameter update with Adagrad
            for param, dparam, mem in zip([Wxh, Whh, Why, bh, by],
                                          [dWxh, dWhh, dWhy, dbh, dby],
                                          [mWxh, mWhh, mWhy, mbh, mby]):
                mem += dparam * dparam
                param += -learning_rate * dparam / np.sqrt(
                    mem + 1e-8)  # adagrad update

            p += seq_length  # move data pointer
            n += 1  # iteration counter
Whh = np.random.randn(hidden_size, hidden_size) * 0.01  # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size) * 0.01  # hidden to output
bh = np.zeros((hidden_size, 1))  # hidden bias
by = np.zeros((vocab_size, 1))  # output bias

van = Vanilla(vocab_size,
              vocab_size,
              hidden_size,
              seq_length,
              Wxh=SharedWeights(Wxh.copy()),
              Whh=Whh.copy(),
              Why=Why.copy(),
              bh=bh.copy(),
              by=by.copy())
#negLog = NegativeLogLikelihoodLoss()
cross = CrossEntropyLoss()
opt = AdaGrad(learning_rate=learning_rate, clip=5)
soft = SoftMaxLayer()

vantr = Vanilla(vocab_size,
                vocab_size,
                hidden_size,
                seq_length,
                Wxh=Wxh.copy(),
                Whh=Whh.copy(),
                Why=Why.copy(),
                bh=bh.copy(),
                by=by.copy())

crosstr = CrossEntropyLoss()
opttr = AdaGrad(learning_rate=learning_rate, clip=5)
Esempio n. 9
0
    x = to_one_hot_vect(char_to_ix['c'], vocab_size)
    for i in range(200):
        y = sm.forward(lstm.forward(x))
        str += ix_to_char[np.random.choice(range(vocab_size), p=y.ravel())]
        x = to_one_hot_vect(np.argmax(y), vocab_size)
    print str
    display.show(*args)


trainer = Trainer(show_training=True, show_function=functionPlot)

train = [to_one_hot_vect(char_to_ix[ch], vocab_size) for ch in data[0:-1]]
target = [to_one_hot_vect(char_to_ix[ch], vocab_size) for ch in data[1:]]

J, dJdy = trainer.learn_throughtime(lstm, zip(train, target),
                                    CrossEntropyLoss(), opt, 1, window_size)

# J, dJdy = trainer.learn_window(
#     v,
#     zip(train[:5],target[:5]),
#     NegativeLogLikelihoodLoss(),
#     #CrossEntropyLoss(),
#     AdaGrad(learning_rate=1e-1),
# )
# print J

# J, dJdy = trainer.learn_window(
#     v,
#     zip(train[:5],target[:5]),
#     NegativeLogLikelihoodLoss(),
#     AdaGrad(learning_rate=0.001),
Esempio n. 10
0
def main(argv):
    params = args_parsing(cmd_args_parsing(argv))
    root, experiment_name, image_size, batch_size, lr, n_epochs, log_dir, checkpoint_path = (
        params['root'], params['experiment_name'], params['image_size'],
        params['batch_size'], params['lr'], params['n_epochs'],
        params['log_dir'], params['checkpoint_path'])

    train_val_split(os.path.join(root, DATASET_TABLE_PATH))
    dataset = pd.read_csv(os.path.join(root, DATASET_TABLE_PATH))

    pre_transforms = torchvision.transforms.Compose(
        [Resize(size=image_size), ToTensor()])
    batch_transforms = torchvision.transforms.Compose(
        [BatchEncodeSegmentaionMap()])
    augmentation_batch_transforms = torchvision.transforms.Compose([
        BatchToPILImage(),
        BatchHorizontalFlip(p=0.5),
        BatchRandomRotation(degrees=10),
        BatchRandomScale(scale=(1.0, 2.0)),
        BatchBrightContrastJitter(brightness=(0.5, 2.0), contrast=(0.5, 2.0)),
        BatchToTensor(),
        BatchEncodeSegmentaionMap()
    ])

    train_dataset = SegmentationDataset(
        dataset=dataset[dataset['phase'] == 'train'], transform=pre_transforms)

    train_sampler = SequentialSampler(train_dataset)
    train_batch_sampler = BatchSampler(train_sampler, batch_size)
    train_collate = collate_transform(augmentation_batch_transforms)
    train_dataloader = torch.utils.data.DataLoader(
        dataset=train_dataset,
        batch_sampler=train_batch_sampler,
        collate_fn=train_collate)

    val_dataset = SegmentationDataset(
        dataset=dataset[dataset['phase'] == 'val'], transform=pre_transforms)

    val_sampler = SequentialSampler(val_dataset)
    val_batch_sampler = BatchSampler(val_sampler, batch_size)
    val_collate = collate_transform(batch_transforms)
    val_dataloader = torch.utils.data.DataLoader(
        dataset=val_dataset,
        batch_sampler=val_batch_sampler,
        collate_fn=val_collate)

    # model = Unet_with_attention(1, 2, image_size[0], image_size[1]).to(device)
    # model = UNet(1, 2).to(device)
    # model = UNetTC(1, 2).to(device)

    model = UNetFourier(1, 2, image_size, fourier_layer='linear').to(device)

    writer, experiment_name, best_model_path = setup_experiment(
        model.__class__.__name__, log_dir, experiment_name)

    new_checkpoint_path = os.path.join(root, 'checkpoints',
                                       experiment_name + '_latest.pth')
    best_checkpoint_path = os.path.join(root, 'checkpoints',
                                        experiment_name + '_best.pth')
    os.makedirs(os.path.dirname(new_checkpoint_path), exist_ok=True)

    if checkpoint_path is not None:
        checkpoint_path = os.path.join(root, 'checkpoints', checkpoint_path)
        print(f"\nLoading checkpoint from {checkpoint_path}.\n")
        checkpoint = torch.load(checkpoint_path)
    else:
        checkpoint = None
    best_model_path = os.path.join(root, best_model_path)
    print(f"Experiment name: {experiment_name}")
    print(f"Model has {count_parameters(model):,} trainable parameters")
    print()

    criterion = CombinedLoss(
        [CrossEntropyLoss(),
         GeneralizedDiceLoss(weighted=True)], [0.4, 0.6])
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           'min',
                                                           factor=0.5,
                                                           patience=5)
    metric = DiceMetric()
    weighted_metric = DiceMetric(weighted=True)

    print(
        "To see the learning process, use command in the new terminal:\ntensorboard --logdir <path to log directory>"
    )
    print()
    train(model, train_dataloader, val_dataloader, criterion, optimizer,
          scheduler, metric, weighted_metric, n_epochs, device, writer,
          best_model_path, best_checkpoint_path, checkpoint,
          new_checkpoint_path)
Esempio n. 11
0
Whh = np.random.randn(hidden_size, hidden_size) * 0.01  # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size) * 0.01  # hidden to output
bh = np.zeros((hidden_size, 1))  # hidden bias
by = np.zeros((vocab_size, 1))  # output bias

van = Vanilla(vocab_size,
              vocab_size,
              hidden_size,
              seq_length,
              Wxh=Wxh.copy(),
              Whh=Whh.copy(),
              Why=Why.copy(),
              bh=bh.copy(),
              by=by.copy())
#negLog = NegativeLogLikelihoodLoss()
cross = CrossEntropyLoss()
opt = AdaGrad(learning_rate=learning_rate)
soft = SoftMaxLayer()


def lossFun(inputs, targets, hprev):
    """
  inputs,targets are both list of integers.
  hprev is Hx1 array of initial hidden state
  returns the loss, gradients on model parameters, and last hidden state
  """
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = np.copy(hprev)
    loss = 0

    # forward pass
Esempio n. 12
0
    model.add_layer(
        Convolution(32, (3, 3),
                    input_shape=(batch_size, X_tr.shape[1], X_tr.shape[2],
                                 X_tr.shape[3]),
                    weight_initializer=NormalInitializer(std)))
    model.add_layer(ReLuActivation())
    model.add_layer(BatchNormalization())
    model.add_layer(
        Convolution(32, (3, 3),
                    weight_initializer=NormalInitializer(std),
                    padding='same'))

    model.add_layer(ReLuActivation())
    model.add_layer(MaxPool((2, 2)))
    model.add_layer(Flatten())

    model.add_layer(
        Affine(100, weight_initializer=NormalInitializer(std), reg=reg))
    model.add_layer(ReLuActivation())
    model.add_layer(DropoutLayer(drop_rate=0.3))
    model.add_layer(
        Affine(n_classes, weight_initializer=NormalInitializer(std), reg=reg))

    model.initialize(loss=CrossEntropyLoss(),
                     optimizer=Adam(learning_rate=0.001,
                                    decay_fst_mom=0.9,
                                    decay_sec_mom=0.999))
    # with open('model_90_49.14262959724404', 'rb') as file:
    #     model = pickle.load(file)
    model.fit(batch_size, X_tr, y_tr, n_epochs=100, metric=accuracy_metric)
Esempio n. 13
0
def main():
    global args

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False
    sys.stdout = Logger(osp.join(args.save_dir, "log.txt"))
    if use_gpu:
        print('Currently using GPU {}'.format(args.gpu_devices))
        cudnn.benchmark = True
    else:
        warnings.warn(
            'Currently using CPU, however, GPU is highly recommended')

    print('Initializing image data manager')
    dm = ImageDataManager(use_gpu, **trainset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    print('Initializing model: {}'.format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              loss={'xent', 'htri'},
                              pretrained=not args.no_pretrained,
                              use_gpu=use_gpu)
    print('Model size: {:.3f} M'.format(count_num_param(model)))

    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)

    model = nn.DataParallel(model).cuda() if use_gpu else model

    criterion_xent = CrossEntropyLoss(num_classes=dm.num_train_pids,
                                      use_gpu=use_gpu,
                                      label_smooth=args.label_smooth)
    criterion_htri = TripletLoss(margin=args.margin)
    optimizer = init_optimizer(model, **optimizer_kwargs(args))
    scheduler = init_lr_scheduler(optimizer, **lr_scheduler_kwargs(args))

    if args.resume and check_isfile(args.resume):
        args.start_epoch = resume_from_checkpoint(args.resume,
                                                  model,
                                                  optimizer=optimizer)

    time_start = time.time()
    print('=> Start training')

    for epoch in range(args.start_epoch, args.max_epoch):
        train(epoch, model, criterion_xent, criterion_htri, optimizer,
              trainloader, use_gpu)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print('=> Validation')

            print('Evaluating {} ...'.format(args.test_set))
            queryloader = testloader_dict['query']
            galleryloader = testloader_dict['test']
            rank1 = test(model, queryloader, galleryloader, use_gpu)

            save_checkpoint(
                {
                    'state_dict': model.state_dict(),
                    'rank1': rank1,
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'optimizer': optimizer.state_dict(),
                }, args.save_dir)

    elapsed = round(time.time() - time_start)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    print('Elapsed {}'.format(elapsed))
Esempio n. 14
0
    def __init__(self,
                 global_step,
                 learning_rate=0.01,
                 num_classes=10,
                 is_training=True):
        self.x = tf.placeholder("float", [None, 28, 28, 1])
        self.y_ = tf.placeholder("float", [None, num_classes])
        self.lr = tf.placeholder(tf.float32, shape=[])
        self.opt = tf.train.RMSPropOptimizer(self.lr,
                                             FLAGS.rmsprop_decay,
                                             momentum=FLAGS.rmsprop_momentum,
                                             epsilon=FLAGS.rmsprop_epsilon)
        self.global_step = global_step

        num_split = FLAGS.Classifier_gpu_num
        x_splits = tf.split(self.x, num_split, 0)
        label_y_splits = tf.split(self.y_, num_split, 0)

        tower_grads = []
        tower_predictions = []
        each_sample_loss_list = []
        with tf.variable_scope("classifier") as scope:
            for i in xrange(num_split):
                with tf.device('/gpu:%d' % i):
                    with slim.arg_scope(fc_mnsit_arg_scope()):
                        PreLogits, endpoints = cnn(x_splits[i],
                                                   num_classes=10,
                                                   is_training=True,
                                                   dropout_keep_prob=0.5)
                    with tf.variable_scope('Logits'):
                        logits_ = slim.flatten(PreLogits)
                        logits_ = slim.fully_connected(logits_,
                                                       num_classes,
                                                       activation_fn=None,
                                                       scope='logits')
                    predictions = tf.nn.softmax(logits_, name='predictions')
                    tower_predictions.append(predictions)

                    correct_prediction = tf.equal(
                        tf.argmax(predictions, 1),
                        tf.argmax(label_y_splits[i], 1))
                    with tf.name_scope('accuracy'):
                        self.accuracy = tf.reduce_mean(
                            tf.cast(correct_prediction, tf.float32))
                    batch_mean_loss_calculator = CrossEntropyLoss()
                    with tf.name_scope('batch_mean_loss'):
                        self.batch_mean_loss = batch_mean_loss_calculator.calculate_loss(
                            predictions, label_y_splits[i])
                    self._loss_summary = tf.summary.scalar(
                        'batch_mean_loss', self.batch_mean_loss)

                    # loss between each prediction and each label
                    each_sample_loss_calculator = CrossEntropyBetweenEachSample(
                    )
                    each_sample_loss = each_sample_loss_calculator.calculate_loss(
                        predictions, label_y_splits[i])  # 3*1
                    each_sample_loss_list.append(each_sample_loss)

                    # tf.get_variable_scope().reuse_variables()
                    scope.reuse_variables()

                    grads = self.opt.compute_gradients(self.batch_mean_loss)
                    tower_grads.append(grads)

        grads = average_gradients(tower_grads)
        apply_gradient_op = self.opt.apply_gradients(
            grads, global_step=self.global_step)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.99, self.global_step)
        trainable_variables = [
            v for v in tf.trainable_variables()
            if v.op.name.startswith('classifier')
        ]

        variables_averages_op = variable_averages.apply(trainable_variables)

        # Group all updates to into a single train op.
        self.train_op = tf.group(apply_gradient_op, variables_averages_op)
        self.each_sample_loss = tf.concat(each_sample_loss_list, axis=0)
        self.predictions = tf.concat(tower_predictions, axis=0)
Esempio n. 15
0
# )
# print J

# J, dJdy = trainer.learn_window(
#     v,
#     zip(train[:5],target[:5]),
#     NegativeLogLikelihoodLoss(),
#     AdaGrad(learning_rate=0.001),
# )
# print J

while True:
    J, dJdy = trainer.learn_throughtime(
        v,
        zip(train, target),
        CrossEntropyLoss(),
        # NegativeLogLikelihoodLoss(),
        opt,
        epochs,
        window_size)
    v.save('vanilla.net')

    str = ''
    x = to_one_hot_vect(char_to_ix['c'], vocab_size)
    for i in range(200):
        y = sm.forward(v.forward(x))
        str += ix_to_char[np.random.choice(range(vocab_size), p=y.ravel())]
        x = to_one_hot_vect(np.argmax(y), vocab_size)
    print str

# print [ix_to_char[np.argmax(t)] for t in train]
Esempio n. 16
0
def main():

    # 为了看看repo提供的model.pth.tar-9在validation集的mAp和rank-1
    # 我自己训练的tar-9只有mAP: 15.1%; Rank-1: 23.3% ,不知道为什么
    # 更改args.load_weights = '/model/caohw9/track3_model/model.pth.tar-9'

    global args
    print(args)

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False
    sys.stdout = Logger(osp.join(args.save_dir, "log.txt"))
    if use_gpu:
        print('Currently using GPU {}'.format(args.gpu_devices))
        cudnn.benchmark = True
    else:
        warnings.warn(
            'Currently using CPU, however, GPU is highly recommended')

    # 初始化loader
    print('Initializing image data manager')
    dm = ImageDataManager(use_gpu, **trainset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders(
    )  #trainloader用于训练,testloader_dict包含['query']和['gallery']2个loader
    print('suffessfully initialize loaders!')

    # 初始化模型
    print('Initializing model: {}'.format(
        args.arch))  #args.arch default='resnet101'
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              loss={'xent', 'htri'},
                              pretrained=not args.no_pretrained,
                              use_gpu=use_gpu)
    print('Model size: {:.3f} M'.format(count_num_param(model)))

    # 加载预训练参数
    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)
        #加载训练过的模型后,先看看validation
        print('=> Validation')
        print('Evaluating {} ...'.format(
            args.test_set))  #args.test_set应该是指的validation set?
        queryloader = testloader_dict['query']
        galleryloader = testloader_dict['test']
        model = nn.DataParallel(model).cuda() if use_gpu else model
        rank1 = test(model, queryloader, galleryloader, use_gpu)  #validation!

    # 多GPU训练
    else:
        model = nn.DataParallel(model).cuda() if use_gpu else model

    # 定义loss,optimizer, lr_scheduler
    criterion_xent = CrossEntropyLoss(num_classes=dm.num_train_pids,
                                      use_gpu=use_gpu,
                                      label_smooth=args.label_smooth)
    criterion_htri = TripletLoss(margin=args.margin)
    optimizer = init_optimizer(model, **optimizer_kwargs(args))
    scheduler = init_lr_scheduler(optimizer, **lr_scheduler_kwargs(args))

    # 是否是resume训练
    if args.resume and check_isfile(args.resume):
        args.start_epoch = resume_from_checkpoint(
            args.resume, model, optimizer=optimizer)  #获取中断时刻的epoch数

    # 开始训练!
    time_start = time.time()
    print('=> Start training')

    for epoch in range(args.start_epoch, args.max_epoch):
        train(epoch, model, criterion_xent, criterion_htri, optimizer,
              trainloader, use_gpu)  #训练

        scheduler.step()  #更新lr

        # 当epoch数超过args.start_eval,每隔一定频率args.eval_freq,或者达到最后一个epoch,进行validation+存储checkpoint
        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print('=> Validation')
            print('Evaluating {} ...'.format(
                args.test_set))  #args.test_set应该是指的validation set?
            queryloader = testloader_dict['query']
            galleryloader = testloader_dict['test']
            rank1 = test(model, queryloader, galleryloader,
                         use_gpu)  #validation!

            save_checkpoint(
                {
                    'state_dict': model.state_dict(),  #模型的状态字典
                    'rank1': rank1,
                    'epoch': epoch + 1,
                    'arch': args.arch,  #default='resnet101'
                    'optimizer': optimizer.state_dict(
                    ),  #优化器对象的状态字典,包含优化器的状态和超参数(如lr, momentum,weight_decay等)
                },
                args.save_dir)  #validation同时保存checkpoint

    # 训练结束!
    elapsed = round(time.time() - time_start)  #持续时间
    elapsed = str(datetime.timedelta(seconds=elapsed))
    print('Elapsed {}'.format(elapsed))
Esempio n. 17
0
def run_epoch(model,
              iterator,
              optimizer,
              metric,
              weighted_metric=None,
              phase='train',
              epoch=0,
              device='cpu',
              writer=None):
    is_train = (phase == 'train')
    if is_train:
        model.train()
    else:
        model.eval()

    criterion_bce = torch.nn.BCELoss()
    criterion_dice = DiceLoss()

    epoch_loss = 0.0
    epoch_metric = 0.0
    if weighted_metric is not None:
        epoch_weighted_metric = 0.0

    with torch.set_grad_enabled(is_train):
        batch_to_plot = np.random.choice(range(len(iterator)))
        for i, (images, masks) in enumerate(tqdm(iterator)):
            images, masks = images.to(device), masks.to(device)

            # predicted_masks = model(images)

            # loss = criterion(predicted_masks, masks)

            if is_train:

                outputs1, outputs2, outputs3, outputs4, outputs1_1, outputs1_2, outputs1_3, outputs1_4, output = model(
                    images)

                predicted_masks = output

                output = F.sigmoid(output)
                outputs1 = F.sigmoid(outputs1)
                outputs2 = F.sigmoid(outputs2)
                outputs3 = F.sigmoid(outputs3)
                outputs4 = F.sigmoid(outputs4)
                outputs1_1 = F.sigmoid(outputs1_1)
                outputs1_2 = F.sigmoid(outputs1_2)
                outputs1_3 = F.sigmoid(outputs1_3)
                outputs1_4 = F.sigmoid(outputs1_4)

                label = masks.to(torch.float)

                loss0_bce = criterion_bce(output, label)
                loss1_bce = criterion_bce(outputs1, label)
                loss2_bce = criterion_bce(outputs2, label)
                loss3_bce = criterion_bce(outputs3, label)
                loss4_bce = criterion_bce(outputs4, label)
                loss5_bce = criterion_bce(outputs1_1, label)
                loss6_bce = criterion_bce(outputs1_2, label)
                loss7_bce = criterion_bce(outputs1_3, label)
                loss8_bce = criterion_bce(outputs1_4, label)

                loss0_dice = criterion_dice(output, label)
                loss1_dice = criterion_dice(outputs1, label)
                loss2_dice = criterion_dice(outputs2, label)
                loss3_dice = criterion_dice(outputs3, label)
                loss4_dice = criterion_dice(outputs4, label)
                loss5_dice = criterion_dice(outputs1_1, label)
                loss6_dice = criterion_dice(outputs1_2, label)
                loss7_dice = criterion_dice(outputs1_3, label)
                loss8_dice = criterion_dice(outputs1_4, label)

                loss = loss0_bce + 0.4 * loss1_bce + 0.5 * loss2_bce + 0.7 * loss3_bce + 0.8 * loss4_bce + \
                    0.4 * loss5_bce + 0.5 * loss6_bce + 0.7 * loss7_bce + 0.8 * loss8_bce + \
                    loss0_dice + 0.4 * loss1_dice + 0.5 * loss2_dice + 0.7 * loss3_dice + 0.8 * loss4_dice + \
                    0.4 * loss5_dice + 0.7 * loss6_dice + 0.8 * loss7_dice + 1 * loss8_dice

            else:
                predict = model(images)
                predicted_masks = F.sigmoid(predict).cpu().numpy()

                # predicted_masks_0 = predicted_masks <= 0.5
                # predicted_masks_1 = predicted_masks > 0.5
                predicted_masks_0 = 1 - predicted_masks
                predicted_masks_1 = predicted_masks
                predicted_masks = np.concatenate(
                    [predicted_masks_0, predicted_masks_1], axis=1)

                criterion = CombinedLoss(
                    [CrossEntropyLoss(),
                     GeneralizedDiceLoss(weighted=True)], [0.4, 0.6])
                # print(predicted_masks.shape)
                # print(masks.shape)
                predicted_masks = torch.tensor(predicted_masks).to(device)
                loss = criterion(predicted_masks.to(torch.float), masks)

            if is_train:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            epoch_loss += loss.item()
            epoch_metric += metric(torch.argmax(predicted_masks, dim=1), masks)
            if weighted_metric is not None:
                epoch_weighted_metric += weighted_metric(
                    torch.argmax(predicted_masks, dim=1), masks)

            if i == batch_to_plot:
                images_to_plot, masks_to_plot, predicted_masks_to_plot = process_to_plot(
                    images, masks, predicted_masks)

        if writer is not None:
            writer.add_scalar(f"loss_epoch/{phase}",
                              epoch_loss / len(iterator), epoch)
            writer.add_scalar(f"metric_epoch/{phase}",
                              epoch_metric / len(iterator), epoch)
            if weighted_metric is not None:
                writer.add_scalar(f"weighted_metric_epoch/{phase}",
                                  epoch_weighted_metric / len(iterator), epoch)

            # show images from last batch

            # send to tensorboard them to tensorboard
            writer.add_images(tag='images',
                              img_tensor=images_to_plot,
                              global_step=epoch + 1)
            writer.add_images(tag='true masks',
                              img_tensor=masks_to_plot,
                              global_step=epoch + 1)
            writer.add_images(tag='predicted masks',
                              img_tensor=predicted_masks_to_plot,
                              global_step=epoch + 1)

        if weighted_metric is not None:
            return epoch_loss / len(iterator), epoch_metric / len(
                iterator), epoch_weighted_metric / len(iterator)
        return epoch_loss / len(iterator), epoch_metric / len(iterator), None