def test_calc_delta(self): l1 = SoftMaxLayer() n = Sequential([l1]) x = np.array([15.0, 10.0, 2.0]) y = n.forward(x) self.assertEqual(y.shape, (3, )) nll = NegativeLogLikelihoodLoss() t = np.array([0.0, 0.0, 1.0]) self.assertEqual(y.shape, t.shape) J1 = nll.loss(y, t) self.assertEqual(J1.shape, (3, )) assert_almost_equal(J1, [0.0, 0.0, 13.0067176], decimal=5) cel = CrossEntropyLoss() t = np.array([0.0, 0.0, 1.0]) J2 = cel.loss(x, t) self.assertEqual(J2.shape, (3, )) assert_almost_equal(J2, [0.0, 0.0, 13.0067176], decimal=5) delta_in = -nll.dJdy_gradient(y, t) assert_almost_equal(delta_in, [0.0, 0.0, 445395.349996]) delta_out1 = n.backward(delta_in) assert_almost_equal(delta_out1, [-0.9933049, -0.0066928, 0.9999978], decimal=5) # delta_out2 = -cel.dJdy_gradient(x, t) assert_almost_equal(delta_out2, [-0.9933049, -0.0066928, 0.9999978], decimal=5)
def fit(self, X, Y): if self.loss == "mse": loss = MSELoss() elif self.loss == "crossentropy": loss = CrossEntropyLoss() # convert Y to one-hot encoding if self.classifier: Y = to_one_hot(Y.flatten()) else: # if the shape of Y is like (N,), then we need to convert it to be (N,1) Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y N, M = X.shape # out_dims is the number of classes in the outcome. 1 for continuous Y self.out_dims = Y.shape[1] # Record all the learners (all the trees) self.learners = np.empty((self.n_iter, self.out_dims), dtype=object) # weights for each prediction (since we don't do the linear search step, we just put the learning_rate here) # The very first iteration has weights equals to 1 (so we don't multiply self.learning_rate) self.weights = np.ones((self.n_iter, self.out_dims)) self.weights[1:, :] *= self.learning_rate # Prediction values, N samples, and each samples has self.out_dims dimensions Y_pred = np.zeros((N, self.out_dims)) # Very first iteration, use mean to predict for k in range(self.out_dims): t = loss.base_estimator() t.fit(X, Y[:, k]) Y_pred[:, k] = t.predict(X) self.learners[0, k] = t # Incrementally fit each learner on the negative gradient of the loss # wrt the previous fit (pseudo-residuals) for i in range(1, self.n_iter): for k in range(self.out_dims): y, y_pred = Y[:, k], Y_pred[:, k] neg_grad = -1 * loss.grad(y, y_pred) # use MSE as the surrogate loss when fitting to negative gradients t = DecisionTree(classifier=False, max_depth=self.max_depth, criterion="mse") # fit X to negative gradients of the current loss function t.fit(X, neg_grad) self.learners[i, k] = t # compute step size and weight for the current learner step = 1.0 h_pred = t.predict(X) # We ignore the linear search step # update weights and our overall prediction for Y self.weights[i, k] *= step Y_pred[:, k] += self.weights[i, k] * h_pred
def test_calc_loss(self): l1 = SoftMaxLayer() n = Sequential([l1]) x = np.array([15.0, 10.0, 2.0]) y = n.forward(x) self.assertEqual(y.shape, (3, )) nll = NegativeLogLikelihoodLoss() t = np.array([0.0, 0.0, 1.0]) self.assertEqual(y.shape, t.shape) J1 = nll.loss(y, t) self.assertEqual(J1.shape, (3, )) assert_almost_equal(J1, [0.0, 0.0, 13.0067176], decimal=5) cel = CrossEntropyLoss() t = np.array([0.0, 0.0, 1.0]) J2 = cel.loss(x, t) self.assertEqual(J2.shape, (3, )) assert_almost_equal(J2, [0.0, 0.0, 13.0067176], decimal=5) assert_almost_equal(J1, J2)
def fit(self, X, Y): """ Fit the gradient boosted decision trees on a dataset :param X: :param Y: :return: """ if self.loss == "mse": loss = MSELoss() elif self.loss == "crossentropy": loss = CrossEntropyLoss() if self.classifier: Y = to_one_hot(Y.flatten()) else: Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y N, M = X.shape self.out_dims = Y.shape[1] self.learners = np.empty((self.n_iter, self.out_dims), dtype=object) self.weights = np.ones((self.n_iter, self.out_dims)) self.weights[1:, :] = self.learning_rate # fit the base estimator Y_pred = np.zeros((N, self.out_dims)) for k in range(self.out_dims): t = loss.base_estimator() t.fit(X, Y[:, k]) Y_pred[:, k] += t.predict(X) self.learners[0, k] = t # incrementally fit each learner on the negative gradient of the loss for i in range(1, self.n_iter): for k in range(self.out_dims): y, y_pred = Y[:, k], Y_pred[:, k] neg_grad = -1 * loss.grad(y, y_pred) t = DecisionTree(classifier=False, max_depth=self.max_depth, criterion="mse") t.fit(X, neg_grad) self.learners[i, k] = t step = 1.0 h_pred = t.predict(X) if self.step_size == "adaptive": step = loss.line_search(y, y_pred, h_pred) self.weights[i, k] *= step Y_pred[:, k] += self.weights[i, k] * h_pred
def fit(self, X, Y): if self.loss == "mse": loss = MSELoss() elif self.loss == "crossentropy": loss = CrossEntropyLoss() # convert Y to one_hot if not already if self.classifier: Y = to_one_hot(Y.flatten()) else: Y = Y.reshape(-1, 1) if len(Y.shape) == 1 else Y N, M = X.shape self.out_dims = Y.shape[1] self.learners = np.empty((self.n_iter, self.out_dims), dtype=object) self.weights = np.ones((self.n_iter, self.out_dims)) self.weights[1:, :] *= self.learning_rate # fit the base estimator Y_pred = np.zeros((N, self.out_dims)) for k in range(self.out_dims): t = loss.base_estimator() t.fit(X, Y[:, k]) Y_pred[:, k] += t.predict(X) self.learners[0, k] = t # incrementally fit each learner on the negative gradient of the loss # wrt the previous fit (pseudo-residuals) for i in range(1, self.n_iter): for k in range(self.out_dims): y, y_pred = Y[:, k], Y_pred[:, k] neg_grad = -1 * loss.grad(y, y_pred) # use MSE as the surrogate loss when fitting to negative gradients t = DecisionTree(classifier=False, max_depth=self.max_depth, criterion="mse") # fit current learner to negative gradients t.fit(X, neg_grad) self.learners[i, k] = t # compute step size and weight for the current learner step = 1.0 h_pred = t.predict(X) if self.step_size == "adaptive": step = loss.line_search(y, y_pred, h_pred) # update weights and our overall prediction for Y self.weights[i, k] *= step Y_pred[:, k] += self.weights[i, k] * h_pred
from losses import CrossEntropyLoss from optimizers import SGD with open('data/shakespear.txt', 'r') as f: raw = f.read() vocab = list(set(raw)) word2index = {} for i, word in enumerate(vocab): word2index[word] = i indices = np.array(list(map(lambda x: word2index[x], raw))) embed = Embedding(vocab_size=len(vocab), dim=512) model = RNNCell(n_inputs=512, n_hidden=512, n_output=len(vocab)) criterion = CrossEntropyLoss() optim = SGD(parameters=model.get_parameters() + embed.get_parameters(), alpha=0.01) batch_size = 32 bptt = 16 n_batches = int((indices.shape[0] / batch_size)) trimmed_indices = indices[:n_batches * batch_size] # batch_indices: each column represents a sub-sequence from indices -> continuous batched_indices = trimmed_indices.reshape(batch_size, n_batches) batched_indices = batched_indices.transpose() input_batched_indices = batched_indices[:-1] target_batched_indices = batched_indices[1:] n_bptt = int((n_batches - 1) / bptt)
def test_all(self): n, p, epoch = 0, 0, -1 mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like( Whh), np.zeros_like(Why) mbh, mby = np.zeros_like(bh), np.zeros_like( by) # memory variables for Adagrad smooth_loss = -np.log( 1.0 / vocab_size) * seq_length # loss at iteration 0 while n <= 400: print(n, p, epoch) # prepare inputs (we're sweeping from left to right in steps seq_length long) if p + seq_length + 1 > len(data) or n == 0: van.clear_memory() vantr.clear_memory() hprev = np.zeros((hidden_size, 1)) # reset RNN memory p = 0 # go from start of data epoch += 1 # print (n,p,epoch) inputs = [char_to_ix[ch] for ch in data[p:p + seq_length]] targets = [char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]] if epoch == epochs: trainer2.learn_throughtime( vantr2, zip(to_hot_vect(inputs_all, vocab_size), to_hot_vect(targets_all, vocab_size)), CrossEntropyLoss(), AdaGrad(learning_rate=learning_rate, clip=5), epochs) assert_array_equal( vantr2.statenet[0].net.elements[0].elements[0].elements[1]. W.get(), Wxh) assert_array_equal( vantr2.statenet[0].net.elements[0].elements[1].elements[1]. W.get(), Whh) assert_array_equal( vantr2.statenet[0].net.elements[0].elements[2].W.get(), bh.T[0]) assert_array_equal( vantr2.outputnet[0].net.elements[0].elements[1].W.get(), Why) assert_array_equal(vantr2.outputnet[0].net.elements[1].W.get(), by.T[0]) txtvan = '' x = to_one_hot_vect(inputs[0], vocab_size) for i in range(200): y = soft.forward(vantr2.forward(x)) txtvan += ix_to_char[np.argmax( y)] #np.random.choice(range(vocab_size), p=y.ravel())] x = to_one_hot_vect(np.argmax(y), vocab_size) vantr2.clear_memory() sample_ix = sample(hprev, inputs[0], 200) txt = ''.join(ix_to_char[ix] for ix in sample_ix) print '----\n %s \n %s \n----' % (txt, txtvan) epoch = 0 # sample from the model now and then # if n % epochs == 0: # sample_ix = sample(hprev, inputs[0], 200) # txt = ''.join(ix_to_char[ix] for ix in sample_ix) # print '----\n %s \n %s ----' % (txt,txtvan ) # forward seq_length characters through the net and fetch gradient loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun( inputs, targets, hprev) smooth_loss = smooth_loss * 0.999 + loss * 0.001 if n % epochs == 0: print 'iter %d, loss: %f' % (n, smooth_loss) # print progress # print 'iter %d, loss: %f' % (n, smooth_loss) # print progress # perform parameter update with Adagrad for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], [dWxh, dWhh, dWhy, dbh, dby], [mWxh, mWhh, mWhy, mbh, mby]): mem += dparam * dparam param += -learning_rate * dparam / np.sqrt( mem + 1e-8) # adagrad update p += seq_length # move data pointer n += 1 # iteration counter
Whh = np.random.randn(hidden_size, hidden_size) * 0.01 # hidden to hidden Why = np.random.randn(vocab_size, hidden_size) * 0.01 # hidden to output bh = np.zeros((hidden_size, 1)) # hidden bias by = np.zeros((vocab_size, 1)) # output bias van = Vanilla(vocab_size, vocab_size, hidden_size, seq_length, Wxh=SharedWeights(Wxh.copy()), Whh=Whh.copy(), Why=Why.copy(), bh=bh.copy(), by=by.copy()) #negLog = NegativeLogLikelihoodLoss() cross = CrossEntropyLoss() opt = AdaGrad(learning_rate=learning_rate, clip=5) soft = SoftMaxLayer() vantr = Vanilla(vocab_size, vocab_size, hidden_size, seq_length, Wxh=Wxh.copy(), Whh=Whh.copy(), Why=Why.copy(), bh=bh.copy(), by=by.copy()) crosstr = CrossEntropyLoss() opttr = AdaGrad(learning_rate=learning_rate, clip=5)
x = to_one_hot_vect(char_to_ix['c'], vocab_size) for i in range(200): y = sm.forward(lstm.forward(x)) str += ix_to_char[np.random.choice(range(vocab_size), p=y.ravel())] x = to_one_hot_vect(np.argmax(y), vocab_size) print str display.show(*args) trainer = Trainer(show_training=True, show_function=functionPlot) train = [to_one_hot_vect(char_to_ix[ch], vocab_size) for ch in data[0:-1]] target = [to_one_hot_vect(char_to_ix[ch], vocab_size) for ch in data[1:]] J, dJdy = trainer.learn_throughtime(lstm, zip(train, target), CrossEntropyLoss(), opt, 1, window_size) # J, dJdy = trainer.learn_window( # v, # zip(train[:5],target[:5]), # NegativeLogLikelihoodLoss(), # #CrossEntropyLoss(), # AdaGrad(learning_rate=1e-1), # ) # print J # J, dJdy = trainer.learn_window( # v, # zip(train[:5],target[:5]), # NegativeLogLikelihoodLoss(), # AdaGrad(learning_rate=0.001),
def main(argv): params = args_parsing(cmd_args_parsing(argv)) root, experiment_name, image_size, batch_size, lr, n_epochs, log_dir, checkpoint_path = ( params['root'], params['experiment_name'], params['image_size'], params['batch_size'], params['lr'], params['n_epochs'], params['log_dir'], params['checkpoint_path']) train_val_split(os.path.join(root, DATASET_TABLE_PATH)) dataset = pd.read_csv(os.path.join(root, DATASET_TABLE_PATH)) pre_transforms = torchvision.transforms.Compose( [Resize(size=image_size), ToTensor()]) batch_transforms = torchvision.transforms.Compose( [BatchEncodeSegmentaionMap()]) augmentation_batch_transforms = torchvision.transforms.Compose([ BatchToPILImage(), BatchHorizontalFlip(p=0.5), BatchRandomRotation(degrees=10), BatchRandomScale(scale=(1.0, 2.0)), BatchBrightContrastJitter(brightness=(0.5, 2.0), contrast=(0.5, 2.0)), BatchToTensor(), BatchEncodeSegmentaionMap() ]) train_dataset = SegmentationDataset( dataset=dataset[dataset['phase'] == 'train'], transform=pre_transforms) train_sampler = SequentialSampler(train_dataset) train_batch_sampler = BatchSampler(train_sampler, batch_size) train_collate = collate_transform(augmentation_batch_transforms) train_dataloader = torch.utils.data.DataLoader( dataset=train_dataset, batch_sampler=train_batch_sampler, collate_fn=train_collate) val_dataset = SegmentationDataset( dataset=dataset[dataset['phase'] == 'val'], transform=pre_transforms) val_sampler = SequentialSampler(val_dataset) val_batch_sampler = BatchSampler(val_sampler, batch_size) val_collate = collate_transform(batch_transforms) val_dataloader = torch.utils.data.DataLoader( dataset=val_dataset, batch_sampler=val_batch_sampler, collate_fn=val_collate) # model = Unet_with_attention(1, 2, image_size[0], image_size[1]).to(device) # model = UNet(1, 2).to(device) # model = UNetTC(1, 2).to(device) model = UNetFourier(1, 2, image_size, fourier_layer='linear').to(device) writer, experiment_name, best_model_path = setup_experiment( model.__class__.__name__, log_dir, experiment_name) new_checkpoint_path = os.path.join(root, 'checkpoints', experiment_name + '_latest.pth') best_checkpoint_path = os.path.join(root, 'checkpoints', experiment_name + '_best.pth') os.makedirs(os.path.dirname(new_checkpoint_path), exist_ok=True) if checkpoint_path is not None: checkpoint_path = os.path.join(root, 'checkpoints', checkpoint_path) print(f"\nLoading checkpoint from {checkpoint_path}.\n") checkpoint = torch.load(checkpoint_path) else: checkpoint = None best_model_path = os.path.join(root, best_model_path) print(f"Experiment name: {experiment_name}") print(f"Model has {count_parameters(model):,} trainable parameters") print() criterion = CombinedLoss( [CrossEntropyLoss(), GeneralizedDiceLoss(weighted=True)], [0.4, 0.6]) optimizer = torch.optim.Adam(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5) metric = DiceMetric() weighted_metric = DiceMetric(weighted=True) print( "To see the learning process, use command in the new terminal:\ntensorboard --logdir <path to log directory>" ) print() train(model, train_dataloader, val_dataloader, criterion, optimizer, scheduler, metric, weighted_metric, n_epochs, device, writer, best_model_path, best_checkpoint_path, checkpoint, new_checkpoint_path)
Whh = np.random.randn(hidden_size, hidden_size) * 0.01 # hidden to hidden Why = np.random.randn(vocab_size, hidden_size) * 0.01 # hidden to output bh = np.zeros((hidden_size, 1)) # hidden bias by = np.zeros((vocab_size, 1)) # output bias van = Vanilla(vocab_size, vocab_size, hidden_size, seq_length, Wxh=Wxh.copy(), Whh=Whh.copy(), Why=Why.copy(), bh=bh.copy(), by=by.copy()) #negLog = NegativeLogLikelihoodLoss() cross = CrossEntropyLoss() opt = AdaGrad(learning_rate=learning_rate) soft = SoftMaxLayer() def lossFun(inputs, targets, hprev): """ inputs,targets are both list of integers. hprev is Hx1 array of initial hidden state returns the loss, gradients on model parameters, and last hidden state """ xs, hs, ys, ps = {}, {}, {}, {} hs[-1] = np.copy(hprev) loss = 0 # forward pass
model.add_layer( Convolution(32, (3, 3), input_shape=(batch_size, X_tr.shape[1], X_tr.shape[2], X_tr.shape[3]), weight_initializer=NormalInitializer(std))) model.add_layer(ReLuActivation()) model.add_layer(BatchNormalization()) model.add_layer( Convolution(32, (3, 3), weight_initializer=NormalInitializer(std), padding='same')) model.add_layer(ReLuActivation()) model.add_layer(MaxPool((2, 2))) model.add_layer(Flatten()) model.add_layer( Affine(100, weight_initializer=NormalInitializer(std), reg=reg)) model.add_layer(ReLuActivation()) model.add_layer(DropoutLayer(drop_rate=0.3)) model.add_layer( Affine(n_classes, weight_initializer=NormalInitializer(std), reg=reg)) model.initialize(loss=CrossEntropyLoss(), optimizer=Adam(learning_rate=0.001, decay_fst_mom=0.9, decay_sec_mom=0.999)) # with open('model_90_49.14262959724404', 'rb') as file: # model = pickle.load(file) model.fit(batch_size, X_tr, y_tr, n_epochs=100, metric=accuracy_metric)
def main(): global args set_random_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False sys.stdout = Logger(osp.join(args.save_dir, "log.txt")) if use_gpu: print('Currently using GPU {}'.format(args.gpu_devices)) cudnn.benchmark = True else: warnings.warn( 'Currently using CPU, however, GPU is highly recommended') print('Initializing image data manager') dm = ImageDataManager(use_gpu, **trainset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders() print('Initializing model: {}'.format(args.arch)) model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, loss={'xent', 'htri'}, pretrained=not args.no_pretrained, use_gpu=use_gpu) print('Model size: {:.3f} M'.format(count_num_param(model))) if args.load_weights and check_isfile(args.load_weights): load_pretrained_weights(model, args.load_weights) model = nn.DataParallel(model).cuda() if use_gpu else model criterion_xent = CrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth) criterion_htri = TripletLoss(margin=args.margin) optimizer = init_optimizer(model, **optimizer_kwargs(args)) scheduler = init_lr_scheduler(optimizer, **lr_scheduler_kwargs(args)) if args.resume and check_isfile(args.resume): args.start_epoch = resume_from_checkpoint(args.resume, model, optimizer=optimizer) time_start = time.time() print('=> Start training') for epoch in range(args.start_epoch, args.max_epoch): train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu) scheduler.step() if (epoch + 1) > args.start_eval and args.eval_freq > 0 and ( epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch: print('=> Validation') print('Evaluating {} ...'.format(args.test_set)) queryloader = testloader_dict['query'] galleryloader = testloader_dict['test'] rank1 = test(model, queryloader, galleryloader, use_gpu) save_checkpoint( { 'state_dict': model.state_dict(), 'rank1': rank1, 'epoch': epoch + 1, 'arch': args.arch, 'optimizer': optimizer.state_dict(), }, args.save_dir) elapsed = round(time.time() - time_start) elapsed = str(datetime.timedelta(seconds=elapsed)) print('Elapsed {}'.format(elapsed))
def __init__(self, global_step, learning_rate=0.01, num_classes=10, is_training=True): self.x = tf.placeholder("float", [None, 28, 28, 1]) self.y_ = tf.placeholder("float", [None, num_classes]) self.lr = tf.placeholder(tf.float32, shape=[]) self.opt = tf.train.RMSPropOptimizer(self.lr, FLAGS.rmsprop_decay, momentum=FLAGS.rmsprop_momentum, epsilon=FLAGS.rmsprop_epsilon) self.global_step = global_step num_split = FLAGS.Classifier_gpu_num x_splits = tf.split(self.x, num_split, 0) label_y_splits = tf.split(self.y_, num_split, 0) tower_grads = [] tower_predictions = [] each_sample_loss_list = [] with tf.variable_scope("classifier") as scope: for i in xrange(num_split): with tf.device('/gpu:%d' % i): with slim.arg_scope(fc_mnsit_arg_scope()): PreLogits, endpoints = cnn(x_splits[i], num_classes=10, is_training=True, dropout_keep_prob=0.5) with tf.variable_scope('Logits'): logits_ = slim.flatten(PreLogits) logits_ = slim.fully_connected(logits_, num_classes, activation_fn=None, scope='logits') predictions = tf.nn.softmax(logits_, name='predictions') tower_predictions.append(predictions) correct_prediction = tf.equal( tf.argmax(predictions, 1), tf.argmax(label_y_splits[i], 1)) with tf.name_scope('accuracy'): self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) batch_mean_loss_calculator = CrossEntropyLoss() with tf.name_scope('batch_mean_loss'): self.batch_mean_loss = batch_mean_loss_calculator.calculate_loss( predictions, label_y_splits[i]) self._loss_summary = tf.summary.scalar( 'batch_mean_loss', self.batch_mean_loss) # loss between each prediction and each label each_sample_loss_calculator = CrossEntropyBetweenEachSample( ) each_sample_loss = each_sample_loss_calculator.calculate_loss( predictions, label_y_splits[i]) # 3*1 each_sample_loss_list.append(each_sample_loss) # tf.get_variable_scope().reuse_variables() scope.reuse_variables() grads = self.opt.compute_gradients(self.batch_mean_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = self.opt.apply_gradients( grads, global_step=self.global_step) variable_averages = tf.train.ExponentialMovingAverage( 0.99, self.global_step) trainable_variables = [ v for v in tf.trainable_variables() if v.op.name.startswith('classifier') ] variables_averages_op = variable_averages.apply(trainable_variables) # Group all updates to into a single train op. self.train_op = tf.group(apply_gradient_op, variables_averages_op) self.each_sample_loss = tf.concat(each_sample_loss_list, axis=0) self.predictions = tf.concat(tower_predictions, axis=0)
# ) # print J # J, dJdy = trainer.learn_window( # v, # zip(train[:5],target[:5]), # NegativeLogLikelihoodLoss(), # AdaGrad(learning_rate=0.001), # ) # print J while True: J, dJdy = trainer.learn_throughtime( v, zip(train, target), CrossEntropyLoss(), # NegativeLogLikelihoodLoss(), opt, epochs, window_size) v.save('vanilla.net') str = '' x = to_one_hot_vect(char_to_ix['c'], vocab_size) for i in range(200): y = sm.forward(v.forward(x)) str += ix_to_char[np.random.choice(range(vocab_size), p=y.ravel())] x = to_one_hot_vect(np.argmax(y), vocab_size) print str # print [ix_to_char[np.argmax(t)] for t in train]
def main(): # 为了看看repo提供的model.pth.tar-9在validation集的mAp和rank-1 # 我自己训练的tar-9只有mAP: 15.1%; Rank-1: 23.3% ,不知道为什么 # 更改args.load_weights = '/model/caohw9/track3_model/model.pth.tar-9' global args print(args) set_random_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False sys.stdout = Logger(osp.join(args.save_dir, "log.txt")) if use_gpu: print('Currently using GPU {}'.format(args.gpu_devices)) cudnn.benchmark = True else: warnings.warn( 'Currently using CPU, however, GPU is highly recommended') # 初始化loader print('Initializing image data manager') dm = ImageDataManager(use_gpu, **trainset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders( ) #trainloader用于训练,testloader_dict包含['query']和['gallery']2个loader print('suffessfully initialize loaders!') # 初始化模型 print('Initializing model: {}'.format( args.arch)) #args.arch default='resnet101' model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, loss={'xent', 'htri'}, pretrained=not args.no_pretrained, use_gpu=use_gpu) print('Model size: {:.3f} M'.format(count_num_param(model))) # 加载预训练参数 if args.load_weights and check_isfile(args.load_weights): load_pretrained_weights(model, args.load_weights) #加载训练过的模型后,先看看validation print('=> Validation') print('Evaluating {} ...'.format( args.test_set)) #args.test_set应该是指的validation set? queryloader = testloader_dict['query'] galleryloader = testloader_dict['test'] model = nn.DataParallel(model).cuda() if use_gpu else model rank1 = test(model, queryloader, galleryloader, use_gpu) #validation! # 多GPU训练 else: model = nn.DataParallel(model).cuda() if use_gpu else model # 定义loss,optimizer, lr_scheduler criterion_xent = CrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth) criterion_htri = TripletLoss(margin=args.margin) optimizer = init_optimizer(model, **optimizer_kwargs(args)) scheduler = init_lr_scheduler(optimizer, **lr_scheduler_kwargs(args)) # 是否是resume训练 if args.resume and check_isfile(args.resume): args.start_epoch = resume_from_checkpoint( args.resume, model, optimizer=optimizer) #获取中断时刻的epoch数 # 开始训练! time_start = time.time() print('=> Start training') for epoch in range(args.start_epoch, args.max_epoch): train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu) #训练 scheduler.step() #更新lr # 当epoch数超过args.start_eval,每隔一定频率args.eval_freq,或者达到最后一个epoch,进行validation+存储checkpoint if (epoch + 1) > args.start_eval and args.eval_freq > 0 and ( epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch: print('=> Validation') print('Evaluating {} ...'.format( args.test_set)) #args.test_set应该是指的validation set? queryloader = testloader_dict['query'] galleryloader = testloader_dict['test'] rank1 = test(model, queryloader, galleryloader, use_gpu) #validation! save_checkpoint( { 'state_dict': model.state_dict(), #模型的状态字典 'rank1': rank1, 'epoch': epoch + 1, 'arch': args.arch, #default='resnet101' 'optimizer': optimizer.state_dict( ), #优化器对象的状态字典,包含优化器的状态和超参数(如lr, momentum,weight_decay等) }, args.save_dir) #validation同时保存checkpoint # 训练结束! elapsed = round(time.time() - time_start) #持续时间 elapsed = str(datetime.timedelta(seconds=elapsed)) print('Elapsed {}'.format(elapsed))
def run_epoch(model, iterator, optimizer, metric, weighted_metric=None, phase='train', epoch=0, device='cpu', writer=None): is_train = (phase == 'train') if is_train: model.train() else: model.eval() criterion_bce = torch.nn.BCELoss() criterion_dice = DiceLoss() epoch_loss = 0.0 epoch_metric = 0.0 if weighted_metric is not None: epoch_weighted_metric = 0.0 with torch.set_grad_enabled(is_train): batch_to_plot = np.random.choice(range(len(iterator))) for i, (images, masks) in enumerate(tqdm(iterator)): images, masks = images.to(device), masks.to(device) # predicted_masks = model(images) # loss = criterion(predicted_masks, masks) if is_train: outputs1, outputs2, outputs3, outputs4, outputs1_1, outputs1_2, outputs1_3, outputs1_4, output = model( images) predicted_masks = output output = F.sigmoid(output) outputs1 = F.sigmoid(outputs1) outputs2 = F.sigmoid(outputs2) outputs3 = F.sigmoid(outputs3) outputs4 = F.sigmoid(outputs4) outputs1_1 = F.sigmoid(outputs1_1) outputs1_2 = F.sigmoid(outputs1_2) outputs1_3 = F.sigmoid(outputs1_3) outputs1_4 = F.sigmoid(outputs1_4) label = masks.to(torch.float) loss0_bce = criterion_bce(output, label) loss1_bce = criterion_bce(outputs1, label) loss2_bce = criterion_bce(outputs2, label) loss3_bce = criterion_bce(outputs3, label) loss4_bce = criterion_bce(outputs4, label) loss5_bce = criterion_bce(outputs1_1, label) loss6_bce = criterion_bce(outputs1_2, label) loss7_bce = criterion_bce(outputs1_3, label) loss8_bce = criterion_bce(outputs1_4, label) loss0_dice = criterion_dice(output, label) loss1_dice = criterion_dice(outputs1, label) loss2_dice = criterion_dice(outputs2, label) loss3_dice = criterion_dice(outputs3, label) loss4_dice = criterion_dice(outputs4, label) loss5_dice = criterion_dice(outputs1_1, label) loss6_dice = criterion_dice(outputs1_2, label) loss7_dice = criterion_dice(outputs1_3, label) loss8_dice = criterion_dice(outputs1_4, label) loss = loss0_bce + 0.4 * loss1_bce + 0.5 * loss2_bce + 0.7 * loss3_bce + 0.8 * loss4_bce + \ 0.4 * loss5_bce + 0.5 * loss6_bce + 0.7 * loss7_bce + 0.8 * loss8_bce + \ loss0_dice + 0.4 * loss1_dice + 0.5 * loss2_dice + 0.7 * loss3_dice + 0.8 * loss4_dice + \ 0.4 * loss5_dice + 0.7 * loss6_dice + 0.8 * loss7_dice + 1 * loss8_dice else: predict = model(images) predicted_masks = F.sigmoid(predict).cpu().numpy() # predicted_masks_0 = predicted_masks <= 0.5 # predicted_masks_1 = predicted_masks > 0.5 predicted_masks_0 = 1 - predicted_masks predicted_masks_1 = predicted_masks predicted_masks = np.concatenate( [predicted_masks_0, predicted_masks_1], axis=1) criterion = CombinedLoss( [CrossEntropyLoss(), GeneralizedDiceLoss(weighted=True)], [0.4, 0.6]) # print(predicted_masks.shape) # print(masks.shape) predicted_masks = torch.tensor(predicted_masks).to(device) loss = criterion(predicted_masks.to(torch.float), masks) if is_train: optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss += loss.item() epoch_metric += metric(torch.argmax(predicted_masks, dim=1), masks) if weighted_metric is not None: epoch_weighted_metric += weighted_metric( torch.argmax(predicted_masks, dim=1), masks) if i == batch_to_plot: images_to_plot, masks_to_plot, predicted_masks_to_plot = process_to_plot( images, masks, predicted_masks) if writer is not None: writer.add_scalar(f"loss_epoch/{phase}", epoch_loss / len(iterator), epoch) writer.add_scalar(f"metric_epoch/{phase}", epoch_metric / len(iterator), epoch) if weighted_metric is not None: writer.add_scalar(f"weighted_metric_epoch/{phase}", epoch_weighted_metric / len(iterator), epoch) # show images from last batch # send to tensorboard them to tensorboard writer.add_images(tag='images', img_tensor=images_to_plot, global_step=epoch + 1) writer.add_images(tag='true masks', img_tensor=masks_to_plot, global_step=epoch + 1) writer.add_images(tag='predicted masks', img_tensor=predicted_masks_to_plot, global_step=epoch + 1) if weighted_metric is not None: return epoch_loss / len(iterator), epoch_metric / len( iterator), epoch_weighted_metric / len(iterator) return epoch_loss / len(iterator), epoch_metric / len(iterator), None