def load_joint( self, task: int, batch_size: int, shuffle: Optional[bool] = True, num_workers: Optional[int] = 0, pin_memory: Optional[bool] = True ) -> Tuple[DataLoader, DataLoader]: """ Makes dataloaders for joint/multitask settings. i.e., for task `t` returns datasets for tasks `1, 2, ..., t-1, t`. Args: task: The task number. batch_size: The batch_size for dataloaders. shuffle: Should loaders be shuffled? Default: True. num_workers: corresponds to Pytorch's `num_workers` argument. Default: 0 pin_memory: corresponds to Pytorch's `pin_memory` argument. Default: True. Returns: a Tuple of dataloaders, i.e., (train_loader, validation_loader). Examples:: >>> benchmark = Benchmark(num_tasks=2, per_task_joint_examples=128) >>> # task 1 loaders (single): returns 4 batches (i.e., 128 examples) >>> train_loader_1, val_loader_1 = benchmark.load(1, batch_size=32) >>> # task 1 loaders (joint): returns 4 batches (i.e., 128 examples) >>> joint_train_loader_1, joint_val_loader_1 = benchmark.load_joint(1, batch_size=32) >>> # task 1 loaders (single): returns 4 batches (i.e., 128 examples) >>> train_loader_2, val_loader_2 = benchmark.load(2, batch_size=32) >>> # task 1 loaders (single): returns 8 batches (i.e., 256 examples) >>> joint_train_loader_2, joint_val_loader_2 = benchmark.load(2, batch_size=32) .. warning:: The method will throw an error if `Benchmark` is instantiated without `per_task_joint_examples`. The reason is that, behind the scenese, we compute the indices for joint examples in `precompute_joint_indices()` method and this method relies on that computations. """ if not self.per_task_joint_examples: raise ValueError( "Called load_joint() but per_task_joint_examples is not set") if task > self.num_tasks: raise ValueError( f"Asked to load task {task} but the benchmark has {self.num_tasks} tasks" ) trains, tests = [], [] for prev_task in range(1, task + 1): prev_train = Subset(self.trains[prev_task], self.joint_indices_train[prev_task]) prev_test = Subset(self.tests[prev_task], self.joint_indices_test[prev_task]) trains.append(prev_train) tests.append(prev_test) trains, tests = ConcatDataset(trains), ConcatDataset(tests) train_loader = DataLoader(trains, batch_size, shuffle, num_workers=num_workers, pin_memory=pin_memory) test_loader = DataLoader(tests, batch_size, shuffle, num_workers=num_workers, pin_memory=pin_memory) return train_loader, test_loader
def train_val_dataset(dataset, val_split=0.25): train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split) return Subset(dataset, train_idx), Subset(dataset, val_idx)
def getData(name='cifar10', train_bs=128, test_bs=1000): if name == 'mnist': train_loader = datasets.MNIST('./data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), ])) val_loader = datasets.MNIST('./data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), ])) offset = 3000 rng = np.random.RandomState(1234) R = rng.permutation(len(train_loader)) lengths = (len(train_loader) - offset, offset) train_loader, val_loader = [ Subset(train_loader, R[offset - length:offset]) for offset, length in zip(_accumulate(lengths), lengths) ] train_loader = torch.utils.data.DataLoader(train_loader, batch_size=train_bs, shuffle=True) val_loader = torch.utils.data.DataLoader(val_loader, batch_size=test_bs, shuffle=False) test_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=False, download=False, transform=transforms.Compose([ transforms.ToTensor(), ])), batch_size=test_bs, shuffle=False) if name == 'pmnist': trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), ])) testset = datasets.MNIST(root='./data', train=False, download=False, transform=transforms.Compose([ transforms.ToTensor(), ])) x_train = trainset.train_data y_train = trainset.targets x_test = testset.test_data y_test = testset.targets torch.manual_seed(12008) perm = torch.randperm(784) x_train_permuted = x_train.reshape(x_train.shape[0], -1) x_train_permuted = x_train_permuted[:, perm] x_train_permuted = x_train_permuted.reshape(x_train.shape[0], 28, 28) x_test_permuted = x_test.reshape(x_test.shape[0], -1) x_test_permuted = x_test_permuted[:, perm] x_test_permuted = x_test_permuted.reshape(x_test.shape[0], 28, 28) x_train_permuted = add_channels(x_train_permuted) x_test_permuted = add_channels(x_test_permuted) train_loader = torch.utils.data.TensorDataset(x_train_permuted.float(), y_train) offset = 3000 rng = np.random.RandomState(1234) R = rng.permutation(len(train_loader)) lengths = (len(train_loader) - offset, offset) train_loader, val_loader = [ Subset(train_loader, R[offset - length:offset]) for offset, length in zip(_accumulate(lengths), lengths) ] train_loader = torch.utils.data.DataLoader(train_loader, batch_size=train_bs, shuffle=True) val_loader = torch.utils.data.DataLoader(val_loader, batch_size=test_bs, shuffle=False) test_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(x_test_permuted.float(), y_test), batch_size=test_bs, shuffle=False) if name == 'cifar10': transform_train = transforms.Compose([ transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), ]) train_loader = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) offset = 3000 rng = np.random.RandomState(1234) R = rng.permutation(len(train_loader)) lengths = (len(train_loader) - offset, offset) train_loader, val_loader = [ Subset(train_loader, R[offset - length:offset]) for offset, length in zip(_accumulate(lengths), lengths) ] train_loader = torch.utils.data.DataLoader(train_loader, batch_size=train_bs, shuffle=True) val_loader = torch.utils.data.DataLoader(val_loader, batch_size=test_bs, shuffle=False) testset = datasets.CIFAR10(root='./data', train=False, download=False, transform=transform_test) test_loader = torch.utils.data.DataLoader(testset, batch_size=test_bs, shuffle=False) if name == 'double_pendulum': # open a file, where you stored the pickled data file = open("./data/double_pendulum.pkl", 'rb') data = pickle.load(file) file.close() trainset = [] train_target = [] testset = [] test_target = [] for i in range(1, 400): trainset.append(data[i:i + 1000]) train_target.append(data[i + 1000 + 1]) for i in range(1501, 3000): testset.append(data[i:i + 1000]) test_target.append(data[i + 1000 + 1]) trainset = np.asarray(trainset) testset = np.asarray(testset) train_target = np.asarray(train_target) test_target = np.asarray(test_target) trainset = torch.tensor(trainset) testset = torch.tensor(testset) train_target = torch.tensor(train_target) test_target = torch.tensor(test_target) #trainset = add_channels(trainset) #testset = add_channels(testset) train_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(trainset.float(), train_target.float()), batch_size=train_bs, shuffle=True) test_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(testset.float(), test_target.float()), batch_size=test_bs, shuffle=False) return train_loader, test_loader, val_loader
np.random.seed(1) torch.manual_seed(1) class histoCancerDataset(Dataset): def __init__(self, data_dir, transform,data_type="train"): path2data=os.path.join(data_dir, data_type) self.filenames = os.listdir(path2data) self.full_filenames = [os.path.join(path2data, f) for f in self.filenames] csv_filename=data_type+"_labels.csv" path2csvLabels=os.path.join(data_dir,csv_filename) labels_df=pd.read_csv(path2csvLabels) labels_df.set_index("id", inplace=True) self.labels = [labels_df.loc[filename[:-4]].values[0] for filename in self.filenames] self.transform = transform def __len__(self): return len(self.full_filenames) def __getitem__(self, idx): image = Image.open(self.full_filenames[idx]) image = self.transform(image) return image, self.labels[idx] data_dir = "../chapter2/data/" data_transformer = transforms.Compose([transforms.ToTensor()]) hist_ds = histoCancerDataset(data_dir, data_transformer,data_type="train") test_index = np.random.randint(hist_ds.__len__(),size= 100) test_ds = Subset(hist_ds,test_index) test_dl = DataLoader(test_ds, batch_size=1, shuffle=False)
def _get_datasets(dataset, dataroot, load_train:bool, load_test:bool, transform_train, transform_test, train_max_size:int, test_max_size:int)\ ->Tuple[DatasetLike, DatasetLike]: logger = get_logger() trainset, testset = None, None if dataset == 'cifar10': if load_train: # NOTE: train transforms will also be applied to validation set trainset = torchvision.datasets.CIFAR10(root=dataroot, train=True, download=True, transform=transform_train) if load_test: testset = torchvision.datasets.CIFAR10(root=dataroot, train=False, download=True, transform=transform_test) elif dataset == 'mnist': if load_train: trainset = torchvision.datasets.MNIST(root=dataroot, train=True, download=True, transform=transform_train) if load_test: testset = torchvision.datasets.MNIST(root=dataroot, train=False, download=True, transform=transform_test) elif dataset == 'fashionmnist': if load_train: trainset = torchvision.datasets.FashionMNIST( root=dataroot, train=True, download=True, transform=transform_train) if load_test: testset = torchvision.datasets.FashionMNIST( root=dataroot, train=False, download=True, transform=transform_test) elif dataset == 'reduced_cifar10': if load_train: trainset = torchvision.datasets.CIFAR10(root=dataroot, train=True, download=True, transform=transform_train) sss = StratifiedShuffleSplit(n_splits=1, test_size=46000) # 4000 sss = sss.split(list(range(len(trainset))), trainset.targets) train_idx, valid_idx = next(sss) targets = [trainset.targets[idx] for idx in train_idx] trainset = Subset(trainset, train_idx) trainset.targets = targets if load_test: testset = torchvision.datasets.CIFAR10(root=dataroot, train=False, download=True, transform=transform_test) elif dataset == 'cifar100': if load_train: trainset = torchvision.datasets.CIFAR100(root=dataroot, train=True, download=True, transform=transform_train) if load_test: testset = torchvision.datasets.CIFAR100(root=dataroot, train=False, download=True, transform=transform_test) elif dataset == 'svhn': if load_train: trainset = torchvision.datasets.SVHN(root=dataroot, split='train', download=True, transform=transform_train) extraset = torchvision.datasets.SVHN(root=dataroot, split='extra', download=True, transform=transform_train) trainset = ConcatDataset([trainset, extraset]) if load_test: testset = torchvision.datasets.SVHN(root=dataroot, split='test', download=True, transform=transform_test) elif dataset == 'reduced_svhn': if load_train: trainset = torchvision.datasets.SVHN(root=dataroot, split='train', download=True, transform=transform_train) sss = StratifiedShuffleSplit(n_splits=1, test_size=73257 - 1000) #1000 sss = sss.split(list(range(len(trainset))), trainset.targets) train_idx, valid_idx = next(sss) targets = [trainset.targets[idx] for idx in train_idx] trainset = Subset(trainset, train_idx) trainset.targets = targets if load_test: testset = torchvision.datasets.SVHN(root=dataroot, split='test', download=True, transform=transform_test) elif dataset == 'imagenet': if load_train: trainset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), transform=transform_train) # compatibility trainset.targets = [lb for _, lb in trainset.samples] if load_test: testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), split='val', transform=transform_test) elif dataset == 'reduced_imagenet': # randomly chosen indices idx120 = [ 904, 385, 759, 884, 784, 844, 132, 214, 990, 786, 979, 582, 104, 288, 697, 480, 66, 943, 308, 282, 118, 926, 882, 478, 133, 884, 570, 964, 825, 656, 661, 289, 385, 448, 705, 609, 955, 5, 703, 713, 695, 811, 958, 147, 6, 3, 59, 354, 315, 514, 741, 525, 685, 673, 657, 267, 575, 501, 30, 455, 905, 860, 355, 911, 24, 708, 346, 195, 660, 528, 330, 511, 439, 150, 988, 940, 236, 803, 741, 295, 111, 520, 856, 248, 203, 147, 625, 589, 708, 201, 712, 630, 630, 367, 273, 931, 960, 274, 112, 239, 463, 355, 955, 525, 404, 59, 981, 725, 90, 782, 604, 323, 418, 35, 95, 97, 193, 690, 869, 172 ] if load_train: trainset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), transform=transform_train) # compatibility trainset.targets = [lb for _, lb in trainset.samples] sss = StratifiedShuffleSplit(n_splits=1, test_size=len(trainset) - 500000, random_state=0) # 4000 sss = sss.split(list(range(len(trainset))), trainset.targets) train_idx, valid_idx = next(sss) # filter out train_idx = list( filter(lambda x: trainset.labels[x] in idx120, train_idx)) valid_idx = list( filter(lambda x: trainset.labels[x] in idx120, valid_idx)) targets = [ idx120.index(trainset.targets[idx]) for idx in train_idx ] for idx in range(len(trainset.samples)): if trainset.samples[idx][1] not in idx120: continue trainset.samples[idx] = (trainset.samples[idx][0], idx120.index( trainset.samples[idx][1])) trainset = Subset(trainset, train_idx) trainset.targets = targets logger.info('reduced_imagenet train={}'.format(len(trainset))) if load_test: testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), split='val', transform=transform_test) test_idx = list(filter(lambda x: testset.samples[x][1] in \ idx120, range(len(testset)))) for idx in range(len(testset.samples)): if testset.samples[idx][1] not in idx120: continue testset.samples[idx] = (testset.samples[idx][0], idx120.index(testset.samples[idx][1])) testset = Subset(testset, test_idx) else: raise ValueError('invalid dataset name=%s' % dataset) if train_max_size > 0: logger.warn( 'Trainset trimmed to max_batches = {}'.format(train_max_size)) trainset = LimitDataset(trainset, train_max_size) if test_max_size > 0: logger.warn( 'Testset trimmed to max_batches = {}'.format(test_max_size)) testset = LimitDataset(testset, test_max_size) return trainset, testset
def train(opt): # log stuff if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) writer = SummaryWriter(opt.log_path) dataset = MotBBImageSequence('dataset_utils/Mot17_test_single.txt', use_only_first_video=False, new_width=832, new_height=832) train_data = Subset(dataset, range(0, dataset.valid_begin)) valid_data = Subset(dataset, range(dataset.valid_begin, len(dataset))) train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True) valid_loader = DataLoader(valid_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True) obs_length = 10 pred_length = 9 epoch_len = len(train_loader) for epoch in range(opt.num_epoches): for img_num, (gt, b, img1, img2) in enumerate(train_loader): opt.model.train() opt.decoder.train() opt.model.lstm_part.reinit_lstm(opt.batch_size) opt.decoder.reset_hidden(opt.batch_size) seq_loss = 0 seq_ap = 0 seq_loss_coord, seq_loss_conf, seq_loss_pred = (0.0, 0.0, 0.0) pred_sequence = [] for i in range(obs_length): single_gt = b[:, i].to(opt.device()) single_img2 = img2[:, i].to(opt.device()) single_img2_normed = preprocess(img2[:, i]).to(opt.device()) single_img1_normed = preprocess(img1[:, i]).to(opt.device()) double_image = torch.cat( (single_img1_normed, single_img2_normed), dim=1) logits = opt.model((double_image, single_img2 * 255.)) loss, loss_coord, loss_conf = opt.yolo_loss(logits, single_gt) seq_ap += get_ap(logits, filter_gt_batch(single_gt), opt.image_size, opt.image_size, opt.model.anchors) seq_loss += loss seq_loss_conf += loss_conf.item() seq_loss_coord += loss_coord.item() prev_out = logits_to_box_params(logits.detach(), opt.model.anchors) # the box parameters are normalized to [0, 1] # at the moment [batch, anchors, ...., h* w] # rearrange tensor s.t [batch, h, w, anchors, ...] # ... == x, y, w, h, conf prev_out = prev_out.view(opt.batch_size, len(opt.model.anchors), -1, opt.encoding_size, opt.encoding_size) \ .permute(0, 3, 4, 1, 2) # be carefull the position of the conf/id in the targets is 0 not 4 # so we change it at this point to be consistent with the labels prev_out = torch.Tensor(np.roll(prev_out.cpu().numpy(), 1, axis=-1)).to(opt.device()) opt.decoder.set_hidden(opt.model.lstm_part.hidden, opt.model.lstm_part.cell) for i in range(pred_length): _, yolo_target = opt.pred_loss.to_yolo( input=gt[:, obs_length].numpy(), target=b[:, obs_length + i].numpy(), use_iou=True) yolo_target = torch.Tensor(yolo_target).to(opt.device()) # target boxes are in [0, grid_h] -> normalize to 1 # at this point i assume that the image is a square !!! yolo_target[:, :, :, :, 1:] = yolo_target[:, :, :, :, 1:] / opt.encoding_size input_tensor = prev_out[:, :, :, :, 1:].contiguous() \ .view(opt.batch_size, opt.encoding_size, opt.encoding_size, len(opt.model.anchors) * 4) \ .permute(0, 3, 1, 2) pred = opt.decoder(input_tensor) pred = pred.view(opt.batch_size, len(opt.model.anchors), -1, opt.encoding_size, opt.encoding_size) \ .permute(0, 3, 4, 1, 2) seq_loss_pred += opt.pred_loss.forward(pred, prev_out, yolo_target) pred_sequence.append((prev_out.detach().cpu().numpy(), pred.detach().cpu().numpy(), yolo_target.detach().cpu().numpy())) prev_out[:, :, :, :, 1:] = yolo_target[:, :, :, :, 1:] seq_loss += seq_loss_pred loss = seq_loss loss.backward() opt.optimizer_encoder.step() opt.optimizer_decoder.step() opt.optimizer_encoder.zero_grad() opt.optimizer_decoder.zero_grad() seq_loss = seq_loss.item() - seq_loss_pred.item() writeLossToSummary(writer, 'Train', seq_loss / obs_length, seq_loss_coord / obs_length, seq_loss_conf / obs_length, epoch * epoch_len + img_num) print(f"epoch:{epoch} it: {img_num}") print(f"loss_seq: {seq_loss/obs_length}, " f"loss_coord: {seq_loss_coord / obs_length}, " f"loss_conf: {seq_loss_conf / obs_length}, " f"mAP: {seq_ap/obs_length}") seq_loss_pred = seq_loss_pred.item() box_list = prediction_to_box_list(pred_sequence) dis_error = displacement_error(box_list, center_distance, image_size=832.0) writer.add_scalar('Train/loss_pred', seq_loss_pred / pred_length, epoch * epoch_len + img_num) writer.add_scalar('Train/dis_err', dis_error, epoch * epoch_len + img_num) writer.add_scalar('Train/AP', seq_ap / obs_length, epoch * epoch_len + img_num) print( f"loss_pred: {seq_loss_pred / pred_length}, dis_error: {dis_error}" ) # draws last batch draw_pred_sequence(box_list, img2[0], pred_length, obs_length, name='train_img.png', image_size=832) ############### # VALIDATION ############### print("###############") print("# VALIDATION BEGIN") print("###############") opt.model.eval() opt.decoder.eval() valid_len = len(valid_loader) loss_val = 0 loss_ap = 0 loss_coord_val = 0 loss_conf_val = 0 loss_pred_val = 0 dis_error_val = 0 for img_num, (gt, b, img1, img2) in enumerate(valid_loader): opt.model.lstm_part.reinit_lstm(opt.batch_size) opt.decoder.reset_hidden(opt.batch_size) opt.optimizer_encoder.zero_grad() opt.optimizer_decoder.zero_grad() seq_loss = 0 seq_loss_coord, seq_loss_conf, seq_loss_pred = (0.0, 0.0, 0.0) seq_ap = 0 pred_sequence = [] for i in range(obs_length): single_gt = b[:, i].to(opt.device()) single_img2 = img2[:, i].to(opt.device()) single_img2_normed = preprocess(img2[:, i]).to(opt.device()) single_img1_normed = preprocess(img1[:, i]).to(opt.device()) double_image = torch.cat( (single_img1_normed, single_img2_normed), dim=1) with torch.no_grad(): logits = opt.model((double_image, single_img2 * 255.)) loss, loss_coord, loss_conf = opt.yolo_loss( logits, single_gt) seq_ap += get_ap(logits.detach(), filter_gt_batch(single_gt), opt.image_size, opt.image_size, opt.model.anchors) seq_loss += loss seq_loss_conf += loss_conf.item() seq_loss_coord += loss_coord.item() with torch.no_grad(): prev_out = logits_to_box_params(logits.detach(), opt.model.anchors) # the box parameters are normalized to [0, 1] # at the moment [batch, anchors, ...., h* w] # rearrange tensor s.t [batch, h, w, anchors, ...] # ... == x, y, w, h, conf # print(f"origin mask {torch.sum(mask, dim=(0, 1))}") prev_out = prev_out.view(opt.batch_size, len(opt.model.anchors), -1, opt.encoding_size, opt.encoding_size) \ .permute(0, 3, 4, 1, 2) # be carefull the position of the conf/id in the targets is 0 not 4 # so we change it at this point to be consistent with the labels prev_out = torch.Tensor( np.roll(prev_out.cpu().numpy(), 1, axis=-1)).to(opt.device()) opt.decoder.set_hidden(opt.model.lstm_part.hidden, opt.model.lstm_part.cell) for i in range(pred_length): _, yolo_target = opt.pred_loss.to_yolo( input=gt[:, obs_length].numpy(), target=b[:, obs_length + i].numpy(), use_iou=True) yolo_target = torch.Tensor(yolo_target).to(opt.device()) # target boxes are in [0, grid_h] -> normalize to 1 # at this point i assume that the image is a square !!! yolo_target[:, :, :, :, 1:] = yolo_target[:, :, :, :, 1:] / opt.encoding_size input_tensor = prev_out[:, :, :, :, 1:].contiguous() \ .view(opt.batch_size, opt.encoding_size, opt.encoding_size, len(opt.model.anchors) * 4) \ .permute(0, 3, 1, 2) pred = opt.decoder(input_tensor) pred = pred.view(opt.batch_size, len(opt.model.anchors), -1, opt.encoding_size, opt.encoding_size) \ .permute(0, 3, 4, 1, 2) seq_loss_pred += opt.pred_loss.forward( pred, prev_out, yolo_target) pred_sequence.append((prev_out.detach().cpu().numpy(), pred.detach().cpu().numpy(), yolo_target.detach().cpu().numpy())) prev_out[:, :, :, :, 1:] += pred seq_loss_pred = seq_loss_pred.item() box_list = prediction_to_box_list(pred_sequence) dis_error = displacement_error(box_list, center_distance, image_size=832.0) loss_val += seq_loss.item() / valid_len / obs_length loss_coord_val += seq_loss_coord / valid_len / obs_length loss_conf_val += seq_loss_conf / valid_len / obs_length loss_ap += seq_ap / obs_length / valid_len loss_pred_val += seq_loss_pred / valid_len / pred_length dis_error_val += dis_error / valid_len draw_pred_sequence(box_list, img2[0], pred_length, obs_length, name=f'val_img.png', image_size=832) writeLossToSummary(writer, 'Val', loss_val, loss_coord_val, loss_conf_val, (epoch + 1) * epoch_len) print(f"epoch:{epoch}") print( f"loss_seq: {loss_val}, loss_coord: {loss_coord_val}, loss_conf: {loss_conf_val}, mAP: {loss_ap}" ) writer.add_scalar('Val/loss_pred', loss_pred_val, (epoch + 1) * epoch_len) writer.add_scalar('Val/dis_err', dis_error_val, (epoch + 1) * epoch_len) writer.add_scalar('Val/mAP', loss_ap, (epoch + 1) * epoch_len) print(f"loss_pred: {loss_pred_val}, dis_error: {dis_error_val}") print("###############") print("# VALIDATION END") print("###############") torch.save( { 'epoch': epoch, 'model_state_dict': opt.model.state_dict(), 'optimizer_state_dict': opt.optimizer_encoder.state_dict() }, opt.log_path + f'/snapshot_encoder{epoch}.tar') torch.save( { 'epoch': epoch, 'model_state_dict': opt.decoder.state_dict(), 'optimizer_state_dict': opt.optimizer_decoder.state_dict() }, opt.log_path + f'/snapshot_decoder{epoch}.tar') writer.close()
from datasets.mb_speech import MBSpeech as SpeechDataset, vocab # only 1 voice, so use much simpler train transform train_transform = Compose([LoadMagSpectrogram(), ComputeMelSpectrogramFromMagSpectrogram(num_features=num_features, normalize=args.normalize, eps=eps), ApplyAlbumentations(album.Compose([album.Cutout(num_holes=8)], p=1)), TimeScaleSpectrogram(max_scale=0.1, probability=0.5), MaskSpectrogram(frequency_mask_max_percentage=0.3, time_mask_max_percentage=0.1, probability=0.5)]) train_dataset = SpeechDataset(transform=train_transform) valid_dataset = SpeechDataset(transform=valid_transform) indices = list(range(len(train_dataset))) train_dataset = Subset(train_dataset, indices[:-args.valid_batch_size]) valid_dataset = Subset(valid_dataset, indices[-args.valid_batch_size:]) train_data_sampler, valid_data_sampler = None, None if args.distributed: train_data_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) valid_data_sampler = torch.utils.data.distributed.DistributedSampler(valid_dataset) train_data_loader = DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=(train_data_sampler is None), collate_fn=collate_fn, num_workers=args.dataload_workers_nums, sampler=train_data_sampler) valid_data_loader = DataLoader(valid_dataset, batch_size=args.valid_batch_size, shuffle=False, collate_fn=collate_fn, num_workers=args.dataload_workers_nums, sampler=None) if args.model == 'quartznet5x5': model = QuartzNet5x5(vocab=vocab, num_features=num_features)
def decode_classes_from_layers(gpu, inference, generator, image_size, n_filters, noise_dim, data_path, dataset, nonlinear=False, lr=0.001, folds=10, epochs=50, hidden_size=1000, wd=1e-4, opt='adam', lr_schedule=False, batch_size=128, workers=4, verbose=True): """ Trains a linear or nonlinear decoder from a given layer of the cortex, all layers at a time (including inputs) Does k-fold CV on the test set of this dataset. A random permutation is used. Returns a tensor of accuracies on each of the k folds and for each of the 6 decoders: Input --- Layer1 .... Layer4 --- Noise """ # ----- Get dataset ------ # # Data loading code valdir = os.path.join(data_path, 'val') normalize = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) if dataset in ['imagenet', 'folder', 'lfw']: # folder dataset all_test_dataset = datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(image_size), transforms.CenterCrop(image_size), transforms.ToTensor(), normalize, ])) nc = 3 n_classes = 1000 elif dataset == 'cifar10': all_test_dataset = datasets.CIFAR10(root=data_path, download=True, train=False, transform=transforms.Compose([ transforms.Resize(image_size), transforms.ToTensor(), transforms.Normalize( (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) nc = 3 n_classes = 10 elif dataset == 'mnist': all_test_dataset = datasets.MNIST(root=data_path, download=True, train=False, transform=transforms.Compose([ transforms.Resize(image_size), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )), ])) nc = 1 n_classes = 10 assert all_test_dataset perm = torch.randperm(len(all_test_dataset)) n_test_examples = len(all_test_dataset) // folds all_accuracies = [] all_reconstructions = [] for f in range(folds): # ---- Get CV indices ---- test_idx = perm[f * n_test_examples:(f + 1) * n_test_examples] if f == folds - 1: #last fold may be larger if len(all_test_dataset) % folds != 0 test_idx = perm[f * n_test_examples:] train_idx = torch.cat( (perm[:f * n_test_examples], perm[(f + 1) * n_test_examples:])) # ----- Make loaders ----- train_dataset = Subset(all_test_dataset, train_idx) test_dataset = Subset(all_test_dataset, test_idx) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True, ) # ----- Build decoder ------ if nonlinear: decoder = NonlinearDecoder(image_size, noise_dim, n_classes, nc, n_filters, hidden_size) else: decoder = LinearDecoder(image_size, noise_dim, n_classes, nc, n_filters) # get to proper GPU torch.cuda.set_device(gpu) inference = inference.cuda(gpu) generator = generator.cuda(gpu) decoder = decoder.cuda(gpu) # ------ Build optimizer ------ # if opt == 'adam': optimizer = optim.Adam(decoder.parameters(), lr=lr, betas=(.9, 0.999), weight_decay=wd) elif opt == 'sgd': optimizer = optim.SGD(decoder.parameters(), lr=lr, momentum=0.9, weight_decay=wd) else: raise AssertionError("This optimizer not implemented yet.") for epoch in range(epochs): if lr_schedule: adjust_lr(epoch, optimizer, epochs) train(inference, optimizer, decoder, train_loader, gpu) if verbose or (epoch == epochs - 1): accuracies, reconstructions = test(inference, generator, decoder, test_loader, gpu, epoch, len(test_idx), verbose) all_accuracies.append(accuracies) all_reconstructions.append(reconstructions) return torch.Tensor(all_accuracies), torch.stack(all_reconstructions)
def run(): args = parser.parse_args() nlayer = args.nlayer bidirection = args.bidirection file_path = args.file_path#'/content/drive/My Drive/Master_Final_Project/Genetic_attack/Code/nlp_adversarial_example_master_pytorch/glove.840B.300d.txt'#'/lustre/scratch/scratch/ucabdc3/lstm_attack' save_path = os.path.join(file_path, 'results') MAX_VOCAB_SIZE = 50000 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # with open(os.path.join(file_path, 'dataset_%d.pkl' %MAX_VOCAB_SIZE), 'rb') as f: # dataset = pickle.load(f) with open('aux_files/dataset_%d.pkl' %MAX_VOCAB_SIZE, 'rb') as f: dataset = pickle.load(f) # skip_list = np.load('aux_files/missed_embeddings_counter_%d.npy' %MAX_VOCAB_SIZE) embedding_matrix = np.load('aux_files/embeddings_glove_%d.npy' %(MAX_VOCAB_SIZE)) embedding_matrix = torch.tensor(embedding_matrix.T).to(device) # goog_lm = LM() # pytorch max_len = 100 # padded_train_raw = pad_sequences(dataset.train_seqs2, maxlen = max_len, padding = 'post') padded_test_raw = pad_sequences(dataset.test_seqs2, maxlen = max_len, padding = 'post') # # TrainSet # data_set = Data_infor(padded_train_raw, dataset.train_y) # num_train = len(data_set) # indx = list(range(num_train)) # train_set = Subset(data_set, indx) # TestSet batch_size = 1 SAMPLE_SIZE = args.sample_size data_set = Data_infor(padded_test_raw, dataset.test_y) num_test = len(data_set) indx = list(range(num_test)) all_test_set = Subset(data_set, indx) indx = random.sample(indx, SAMPLE_SIZE) test_set = Subset(data_set, indx) test_loader = DataLoader(test_set, batch_size = batch_size, shuffle = False, pin_memory=True) all_test_loader = DataLoader(all_test_set, batch_size = 128, shuffle = True) lstm_size = 128 rnn_state_save = os.path.join(file_path,'best_lstm_0.7_0.001_test2') glove_len = args.glove_len model = SentimentAnalysis(batch_size=batch_size, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.8, num_layers=nlayer, bidirection=bidirection, embedding_dim = glove_len) model.load_state_dict(torch.load(rnn_state_save)) model = model.to(device) model.eval() test_pred = torch.tensor([]) test_targets = torch.tensor([]) with torch.no_grad(): for batch_index, (seqs, length, target) in enumerate(all_test_loader): seqs, target, length = seqs.to(device), target.to(device), length.to(device) seqs = seqs.type(torch.LongTensor) len_order = torch.argsort(length, descending = True) length = length[len_order] seqs = seqs[len_order] target = target[len_order] output, pred_out = model.pred(seqs, length, False) test_pred = torch.cat((test_pred, pred_out.cpu()), dim = 0) test_targets = torch.cat((test_targets, target.type(torch.float).cpu())) accuracy = model.evaluate_accuracy(test_pred.numpy(), test_targets.numpy()) print('Test Accuracy:{:.4f}.'.format(accuracy)) n1 = 8 n2 = 4 pop_size = 60 max_iters = 20 n_prefix = 5 n_suffix = 5 batch_model = SentimentAnalysis(batch_size=pop_size, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.8, num_layers=nlayer, bidirection=bidirection,embedding_dim = glove_len) batch_model.eval() batch_model.load_state_dict(torch.load(rnn_state_save)) batch_model.to(device) neighbour_model = SentimentAnalysis(batch_size=n1, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.8, num_layers=nlayer, bidirection=bidirection, embedding_dim = glove_len) neighbour_model.eval() neighbour_model.load_state_dict(torch.load(rnn_state_save)) neighbour_model.to(device) lm_model = gpt_2_get_words_probs() use_lm = args.use_lm ga_attack = GeneticAttack_pytorch(model, batch_model, neighbour_model, compute_dis, lm_model, max_iters = max_iters, dataset = dataset, pop_size = pop_size, n1 = n1, n2 = n2, n_prefix = n_prefix, n_suffix = n_suffix, use_lm = use_lm, use_suffix = True) TEST_SIZE = args.test_size order_pre = 0 n = 0 seq_success = [] seq_orig = [] seq_orig_label = [] word_varied = [] orig_list = [] adv_list =[] dist_list = [] # if order_pre != 0: # seq_success = np.load(os.path.join(save_path,'seq_success.npy'), allow_pickle = True).tolist() # seq_orig = np.load(os.path.join(save_path,'seq_orig.npy')).tolist() # seq_orig_label = np.load(os.path.join(save_path,'seq_orig_label.npy')).tolist() # word_varied = np.load(os.path.join(save_path,'word_varied.npy'), allow_pickle = True).tolist() # n = len(seq_success) for order, (seq, l, target) in enumerate(test_loader): if order>=order_pre: # print('Sequence number:{}'.format(order)) seq_len = np.sum(np.sign(seq.numpy())) seq, l = seq.to(device), l.to(device) seq = seq.type(torch.LongTensor) model.eval() with torch.no_grad(): preds = model.pred(seq, l, False)[1] orig_pred = np.argmax(preds.cpu().detach().numpy()) if orig_pred != target.numpy()[0]: # print('Wrong original prediction') # print('----------------------') continue if seq_len > 100: # print('Sequence is too long') # print('----------------------') continue print('Sequence number:{}'.format(order)) print('Length of sentence: {}, Number of samples:{}'.format(l.item(), n+1)) print(preds) seq_orig.append(seq[0].numpy()) seq_orig_label.append(target.numpy()[0]) target = 1-target.numpy()[0] # seq_success.append(ga_attack.attack(seq, target, l)) # if None not in np.array(seq_success[n]): # w_be = [dataset.inv_dict[seq_orig[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])] # w_to = [dataset.inv_dict[seq_success[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])] # for i in range(len(w_be)): # print('{} ----> {}'.format(w_be[i], w_to[i])) # word_varied.append([w_be]+[w_to]) # else: # print('Fail') # print('----------------------') # n += 1 # np.save(os.path.join(save_path,'seq_success_1000.npy'), np.array(seq_success)) # np.save(os.path.join(save_path,'seq_orig_1000.npy'), np.array(seq_orig)) # np.save(os.path.join(save_path,'seq_orig_label_1000.npy'), np.array(seq_orig_label)) # np.save(os.path.join(save_path,'word_varied_1000.npy'), np.array(word_varied)) # if n>TEST_SIZE: # break orig_list.append(seq[0].numpy()) x_adv = ga_attack.attack( seq, target, l) adv_list.append(x_adv) if x_adv is None: print('%d failed' %(order)) dist_list.append(100000) else: num_changes = np.sum(seq[0].numpy() != x_adv) print('%d - %d changed.' %(order, num_changes)) dist_list.append(num_changes) # display_utils.visualize_attack(sess, model, dataset, x_orig, x_adv) # display_utils.visualize_attack(sess, model, dataset, x_orig, x_adv) w_be = [dataset.inv_dict[seq[0].numpy().tolist()[i]] for i in list(np.where(seq[0].numpy() != np.array(x_adv))[0])] w_to = [dataset.inv_dict[x_adv[i]] for i in list(np.where(seq[0].numpy() != np.array(x_adv))[0])] for i in range(len(w_be)): print('{} ----> {}'.format(w_be[i], w_to[i])) n += 1 if n>TEST_SIZE: break orig_len = [np.sum(np.sign(x)) for x in orig_list] normalized_dist_list = [dist_list[i]/orig_len[i] for i in range(len(orig_list)) ] SUCCESS_THRESHOLD = 0.25 successful_attacks = [x <= SUCCESS_THRESHOLD for x in normalized_dist_list] print('Attack success rate : {:.2f}%'.format(np.mean(successful_attacks)*100)) SUCCESS_THRESHOLD = 0.2 successful_attacks = [x <= SUCCESS_THRESHOLD for x in normalized_dist_list] print('Attack success rate : {:.2f}%'.format(np.mean(successful_attacks)*100)) print('--------------------------')
def create_dataloader(dataset_type, root): if dataset_type == 'mnist': mean = (0.1307, ) std = (0.3081, ) transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) # load dataset train_set = datasets.MNIST(root, train=True, transform=transform, download=True) test_set = datasets.MNIST(root, train=False, transform=transform, download=False) val_set = test_set indices = np.arange(len(train_set)) np.random.shuffle(indices) labeled_set = Subset(train_set, indices=indices[:args.num_labeled]) train_set = datasets.MNIST(root, train=True, transform=TransformFixMatch(mean, std), download=False) unlabeled_set = Subset(train_set, indices=indices[:args.num_unlabeled]) elif dataset_type == 'cifar10': mean = [0.49139968, 0.48215827, 0.44653124] std = [0.24703233, 0.24348505, 0.26158768] transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) # load dataset train_set = datasets.CIFAR10(root, train=True, transform=transform, download=True) test_set = datasets.CIFAR10(root, train=False, transform=test_transform, download=False) val_set = test_set labeled_set = Subset(train_set, indices=np.random.permutation( len(train_set))[:args.num_labeled]) train_set = datasets.CIFAR10(root, train=True, transform=TransformFixMatch(mean, std), download=False) unlabeled_set = Subset(train_set, indices=np.random.permutation( len(train_set))[:args.num_unlabeled]) elif dataset_type == 'cifar100': mean = [0.5071, 0.4865, 0.4409] std = [0.2673, 0.2564, 0.2762] transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.RandomRotation(20), transforms.ToTensor(), transforms.Normalize(mean, std) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) # load dataset train_set = datasets.CIFAR100(root, train=True, transform=transform, download=True) test_set = datasets.CIFAR100(root, train=False, transform=test_transform, download=False) val_set = test_set labeled_set = Subset(train_set, indices=np.random.permutation( len(train_set))[:args.num_labeled]) train_set = datasets.MNIST(root, train=True, transform=TransformFixMatch(mean, std), download=False) unlabeled_set = Subset(train_set, indices=np.random.permutation( len(train_set))[:args.num_unlabeled]) # generate DataLoader # train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True) labeled_loader = DataLoader(labeled_set, batch_size=args.batch_size, shuffle=True) unlabeled_loader = DataLoader(unlabeled_set, batch_size=args.batch_size * args.mu, shuffle=True) val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False) test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False) print('Labeled data:', len(labeled_set), 'Unlabeled data:', len(unlabeled_set)) return labeled_loader, unlabeled_loader, val_loader, test_loader
def get_dataloaders(dataset, batch, num_workers, dataroot, ops_names, magnitudes, cutout, cutout_length, split=0.5, split_idx=0, target_lb=-1): """ Args: dataset: str batch: int num_workers: int dataroot: the dataset dir ops_names: list[tuple], [N=105, K=2], str magnitudes: tensor, shape [N, k] cutout: boolean, cutout_length: int split: float, default 0.5 split_idx: int, the number of the next(StratifiedShuffleSplit.split) function is called is equal `split_idx` + 1 target_lb: int, target label, if `target_lb` > 0, the train_label only include the `target_lb` Returns: """ if 'cifar' in dataset or 'svhn' in dataset: transform_train_pre = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), ]) transform_train_after = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD), ]) elif 'imagenet' in dataset: transform_train_pre = transforms.Compose([ transforms.RandomResizedCrop(224, scale=(0.08, 1.0), interpolation=Image.BICUBIC), transforms.RandomHorizontalFlip(), transforms.ColorJitter( brightness=0.4, contrast=0.4, saturation=0.4, ), ]) transform_train_after = transforms.Compose([ transforms.ToTensor(), Lighting(0.1, _IMAGENET_PCA['eigval'], _IMAGENET_PCA['eigvec']), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) transform_test = transforms.Compose([ transforms.Resize(256, interpolation=Image.BICUBIC), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) else: raise ValueError('dataset=%s' % dataset) if cutout and cutout_length != 0: transform_train_after.transforms.append(CutoutDefault(cutout_length)) if dataset == 'cifar10': total_trainset = torchvision.datasets.CIFAR10(root=dataroot, train=True, download=True, transform=None) total_trainset.train_data = total_trainset.train_data[:100] total_trainset.train_labels = total_trainset.train_labels[:100] # testset = torchvision.datasets.CIFAR10(root=dataroot, train=False, download=True, transform=None) total_trainset.targets = total_trainset.train_labels elif dataset == 'reduced_cifar10': total_trainset = torchvision.datasets.CIFAR10(root=dataroot, train=True, download=True, transform=None) sss = StratifiedShuffleSplit(n_splits=1, test_size=46000, random_state=0) # 4000 trainset sss = sss.split(list(range(len(total_trainset))), total_trainset.train_labels) train_idx, valid_idx = next(sss) targets = [total_trainset.train_labels[idx] for idx in train_idx] total_trainset = Subset(total_trainset, train_idx) total_trainset.targets = targets # testset = torchvision.datasets.CIFAR10(root=dataroot, train=False, download=True, transform=None) elif dataset == 'cifar100': total_trainset = torchvision.datasets.CIFAR100(root=dataroot, train=True, download=True, transform=None) total_trainset.targets = total_trainset.test_labels # testset = torchvision.datasets.CIFAR100(root=dataroot, train=False, download=True, transform=transform_test) elif dataset == 'reduced_cifar100': total_trainset = torchvision.datasets.CIFAR100(root=dataroot, train=True, download=True, transform=None) sss = StratifiedShuffleSplit(n_splits=1, test_size=46000, random_state=0) # 4000 trainset sss = sss.split(list(range(len(total_trainset))), total_trainset.targets) train_idx, valid_idx = next(sss) targets = [total_trainset.targets[idx] for idx in train_idx] total_trainset = Subset(total_trainset, train_idx) total_trainset.targets = targets # testset = torchvision.datasets.CIFAR10(root=dataroot, train=False, download=True, transform=None) elif dataset == 'svhn': trainset = torchvision.datasets.SVHN(root=dataroot, split='train', download=True, transform=None) extraset = torchvision.datasets.SVHN(root=dataroot, split='extra', download=True, transform=None) total_trainset = ConcatDataset([trainset, extraset]) # testset = torchvision.datasets.SVHN(root=dataroot, split='test', download=True, transform=transform_test) elif dataset == 'reduced_svhn': total_trainset = torchvision.datasets.SVHN(root=dataroot, split='train', download=True, transform=None) sss = StratifiedShuffleSplit(n_splits=1, test_size=73257 - 1000, random_state=0) # 1000 trainset # sss = sss.split(list(range(len(total_trainset))), total_trainset.targets) sss = sss.split(list(range(len(total_trainset))), total_trainset.labels) train_idx, valid_idx = next(sss) # targets = [total_trainset.targets[idx] for idx in train_idx] targets = [total_trainset.labels[idx] for idx in train_idx] total_trainset = Subset(total_trainset, train_idx) # total_trainset.targets = targets total_trainset.labels = targets total_trainset.targets = targets # testset = torchvision.datasets.SVHN(root=dataroot, split='test', download=True, transform=transform_test) elif dataset == 'imagenet': total_trainset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), download=True, transform=None) # testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), split='val', transform=transform_test) # compatibility total_trainset.targets = [lb for _, lb in total_trainset.samples] elif dataset == 'reduced_imagenet': # randomly chosen indices idx120 = [ 904, 385, 759, 884, 784, 844, 132, 214, 990, 786, 979, 582, 104, 288, 697, 480, 66, 943, 308, 282, 118, 926, 882, 478, 133, 884, 570, 964, 825, 656, 661, 289, 385, 448, 705, 609, 955, 5, 703, 713, 695, 811, 958, 147, 6, 3, 59, 354, 315, 514, 741, 525, 685, 673, 657, 267, 575, 501, 30, 455, 905, 860, 355, 911, 24, 708, 346, 195, 660, 528, 330, 511, 439, 150, 988, 940, 236, 803, 741, 295, 111, 520, 856, 248, 203, 147, 625, 589, 708, 201, 712, 630, 630, 367, 273, 931, 960, 274, 112, 239, 463, 355, 955, 525, 404, 59, 981, 725, 90, 782, 604, 323, 418, 35, 95, 97, 193, 690, 869, 172 ] total_trainset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), transform=None) # testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), split='val', transform=transform_test) # compatibility total_trainset.targets = [lb for _, lb in total_trainset.samples] # sss = StratifiedShuffleSplit(n_splits=1, test_size=len(total_trainset) - 6000, random_state=0) # 4000 trainset # sss = StratifiedShuffleSplit(n_splits=1, test_size=0, random_state=0) # 4000 trainset # sss = sss.split(list(range(len(total_trainset))), total_trainset.targets) # train_idx, valid_idx = next(sss) # print(len(train_idx), len(valid_idx)) # filter out # train_idx = list(filter(lambda x: total_trainset.labels[x] in idx120, train_idx)) # valid_idx = list(filter(lambda x: total_trainset.labels[x] in idx120, valid_idx)) # # test_idx = list(filter(lambda x: testset.samples[x][1] in idx120, range(len(testset)))) train_idx = list(range(len(total_trainset))) filter_train_idx = list( filter(lambda x: total_trainset.targets[x] in idx120, train_idx)) # valid_idx = list(filter(lambda x: total_trainset.targets[x] in idx120, valid_idx)) # test_idx = list(filter(lambda x: testset.samples[x][1] in idx120, range(len(testset)))) # print(len(filter_train_idx)) targets = [ idx120.index(total_trainset.targets[idx]) for idx in filter_train_idx ] sss = StratifiedShuffleSplit(n_splits=1, test_size=len(filter_train_idx) - 6000, random_state=0) # 4000 trainset sss = sss.split(list(range(len(filter_train_idx))), targets) train_idx, valid_idx = next(sss) train_idx = [filter_train_idx[x] for x in train_idx] valid_idx = [filter_train_idx[x] for x in valid_idx] targets = [ idx120.index(total_trainset.targets[idx]) for idx in train_idx ] for idx in range(len(total_trainset.samples)): if total_trainset.samples[idx][1] not in idx120: continue total_trainset.samples[idx] = (total_trainset.samples[idx][0], idx120.index( total_trainset.samples[idx][1])) total_trainset = Subset(total_trainset, train_idx) total_trainset.targets = targets # for idx in range(len(testset.samples)): # if testset.samples[idx][1] not in idx120: # continue # testset.samples[idx] = (testset.samples[idx][0], idx120.index(testset.samples[idx][1])) # testset = Subset(testset, test_idx) print('reduced_imagenet train=', len(total_trainset)) else: raise ValueError('invalid dataset name=%s' % dataset) train_sampler = None if split > 0.0: sss = StratifiedShuffleSplit(n_splits=5, test_size=split, random_state=0) sss = sss.split(list(range(len(total_trainset))), total_trainset.targets) for _ in range(split_idx + 1): train_idx, valid_idx = next(sss) if target_lb >= 0: train_idx = [ i for i in train_idx if total_trainset.targets[i] == target_lb ] valid_idx = [ i for i in valid_idx if total_trainset.targets[i] == target_lb ] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetSampler(valid_idx) # if horovod: # import horovod.torch as hvd # train_sampler = torch.utils.data.distributed.DistributedSampler(train_sampler, num_replicas=hvd.size(), rank=hvd.rank()) else: valid_sampler = SubsetSampler([]) # if horovod: # import horovod.torch as hvd # train_sampler = torch.utils.data.distributed.DistributedSampler(valid_sampler, num_replicas=hvd.size(), rank=hvd.rank()) train_data = AugmentDataset(total_trainset, transform_train_pre, transform_train_after, transform_test, ops_names, True, magnitudes) valid_data = AugmentDataset(total_trainset, transform_train_pre, transform_train_after, transform_test, ops_names, False, magnitudes) trainloader = torch.utils.data.DataLoader(train_data, batch_size=batch, shuffle=False, sampler=train_sampler, drop_last=False, pin_memory=True, num_workers=num_workers) validloader = torch.utils.data.DataLoader( valid_data, batch_size=batch, # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), sampler=valid_sampler, drop_last=False, pin_memory=True, num_workers=num_workers) # trainloader = torch.utils.data.DataLoader( # total_trainset, batch_size=batch, shuffle=True if train_sampler is None else False, num_workers=32, pin_memory=True, # sampler=train_sampler, drop_last=True) # validloader = torch.utils.data.DataLoader( # total_trainset, batch_size=batch, shuffle=False, num_workers=16, pin_memory=True, # sampler=valid_sampler, drop_last=False) # testloader = torch.utils.data.DataLoader( # testset, batch_size=batch, shuffle=False, num_workers=32, pin_memory=True, # drop_last=False # ) print(len(train_data)) return trainloader, validloader
def train(self): data_path = self.config.get('TRAIN', 'data_path') batch_size = self.config.getint('TRAIN', 'batch_size') max_epoches = self.config.getint('TRAIN', 'max_epoches') checkpoint = self.config.getint('TRAIN', 'checkpoint') is_checkpoint = self.config.getboolean('TRAIN', 'is_checkpoint') gcn_path = data_path transformer = PreProcessing(is_train=True) trainval_dataset = NetworkDataset( data_path, gcn_path, # self.node_metrics, self.interface_metrics, self.gcn_metrics, # self.bgp_metrics, transformer, # is_train=True ) train_indices, val_indices = train_test_split( list(range(len(trainval_dataset))), test_size=0.1, stratify=trainval_dataset.label, random_state=self.seed) train_dataset = Subset(trainval_dataset, train_indices) train_size = len(train_dataset) val_dataset = Subset(trainval_dataset, val_indices) val_size = len(val_dataset) print(f'train size : {train_size} val size: {val_size}') train_dataloader = DataLoader(trainval_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate) val_dataloader = DataLoader(val_dataset, batch_size=val_size, shuffle=True, collate_fn=collate) input_dim = trainval_dataset.column_dim # input_dim2 = len(self.interface_metrics) # input_dim3 = 2 # input_dim4 = len(self.bgp_metrics) target_dim = len(self.events.keys()) model = GraphClassifier(input_dim, target_dim).to(self.device) # model = DataParallel(model) model.double() if is_checkpoint: print('./models/gcn_{:}.model'.format(checkpoint)) model.load_state_dict( torch.load('./models/gcn_{:}.model'.format(checkpoint))) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-4) li_times = [] for epoch in range(1 + checkpoint, max_epoches + 1): if self.device == "cuda": torch.cuda.synchronize() since = int(round(time.time() * 1000)) running_loss, correct, total = (0, 0, 0) for train_inputs, train_labels in train_dataloader: train_scores = model(train_inputs) train_labels = train_labels.to(self.device) loss = loss_function(train_scores, train_labels) loss.backward() optimizer.step() running_loss += loss.item() _, predict = torch.max(train_scores, 1) correct += (predict == train_labels).sum().item() total += train_labels.size(0) train_loss = running_loss / len(train_dataloader) train_acc = correct / total with torch.no_grad(): val_inputs, val_labels = iter(val_dataloader).next() val_scores = model(val_inputs) val_labels = val_labels.to(self.device) val_loss = loss_function(val_scores, val_labels) bi_scores = torch.argmax(val_scores, dim=1).to('cpu') y_val_scores = val_labels.to('cpu').numpy() val_acc = accuracy_score(y_val_scores, bi_scores) if self.device == "cuda": torch.cuda.synchronize() time_elapsed = int(round(time.time() * 1000)) - since li_times.append(time_elapsed) print( 'EPOCH [{}/{}] train loss: {} train acc: {} val loss: {} val acc: {}, elapsed: {}ms' .format(epoch, max_epoches, train_loss, train_acc, val_loss, val_acc, time_elapsed)) if epoch % 10 == 0: print("save model") torch.save(model.state_dict(), "{:}/gcn_{:}.model".format(self.model_dir, epoch)) print(np.sum(li_times))
def run(self): # Get Data & MetaData input_size, input_channels, num_classes, train_data, test_data = get_data( dataset_name=config.DATASET, data_path=config.DATAPATH, cutout_length=16, test=True) # Train, Test Data Loaders n_train = len(train_data) n_test = len(test_data) if config.PERCENTAGE_OF_DATA < 100: n_train = (n_train // 100) * config.PERCENTAGE_OF_DATA n_test = (n_test // 100) * config.PERCENTAGE_OF_DATA ''' train_data = train_data[:n_train] test_data = test_data[:n_test] test_data = test_data[:n_test] ''' # take a random sample of the indices train_data = Subset( train_data, np.random.choice(range(len(train_data)), size=n_train, replace=False)) test_data = Subset( test_data, np.random.choice(range(len(test_data)), size=n_test, replace=False)) train_loader = torch.utils.data.DataLoader( train_data, batch_size=config.BATCH_SIZE, num_workers=config.NUM_DOWNLOAD_WORKERS, pin_memory=config.PIN_MEMORY) test_loader = torch.utils.data.DataLoader( test_data, batch_size=config.BATCH_SIZE, num_workers=config.NUM_DOWNLOAD_WORKERS, pin_memory=config.PIN_MEMORY) # Create Model print("Alpha Normal") print_alpha(self.alpha_normal) print("Alpha Reduce") print_alpha(self.alpha_reduce) print("Creating Model from these Alpha\n\n") self.model = LearntModel(alpha_normal=self.alpha_normal, alpha_reduce=self.alpha_reduce, num_cells=config.NUM_CELLS, channels_in=input_channels, channels_start=config.CHANNELS_START, stem_multiplier=config.STEM_MULTIPLIER, num_classes=num_classes, primitives=OPS, auxiliary=(not config.NO_AUXILIARY)) # Port model to gpu if availabile if torch.cuda.is_available(): self.model = self.model.cuda() # cuDNN optimizations if possible torch.backends.cudnn.benchmark = True torch.backends.cudnn.enabled = True # Weights Optimizer w_optim = torch.optim.SGD(params=self.model.parameters(), lr=config.WEIGHTS_LR, momentum=config.WEIGHTS_MOMENTUM, weight_decay=config.WEIGHTS_WEIGHT_DECAY) # Learning Rate Scheduler lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( w_optim, config.EPOCHS, eta_min=config.WEIGHTS_LR_MIN) # Register Signal Handler for interrupts & kills signal.signal(signal.SIGINT, self.terminate) # Number of parameters print("# of Parameters (M)", count_parameters_in_millions(self.model)) # Training Loop best_top1 = 0. loss_criterion = nn.CrossEntropyLoss() for epoch in range(config.EPOCHS): lr = lr_scheduler.get_lr()[0] # Training (One epoch) self.train(train_loader=train_loader, model=self.model, w_optim=w_optim, epoch=epoch, lr=lr, gradient_clip=config.WEIGHTS_GRADIENT_CLIP, epochs=config.EPOCHS, loss_criterion=loss_criterion) # Learning Rate Step lr_scheduler.step() # Test (One epoch) cur_step = (epoch + 1) * len(train_loader) top1 = self.test(test_loader=test_loader, model=self.model, epoch=epoch, cur_step=cur_step, epochs=config.EPOCHS) # Save Checkpoint # Creates checkpoint directory if it doesn't exist if not os.path.exists(config.CHECKPOINT_PATH + "/" + config.DATASET + "/" + self.dt_string): os.makedirs(config.CHECKPOINT_PATH + "/" + config.DATASET + "/" + self.dt_string) # torch.save(self.model, config.CHECKPOINT_PATH + "/" + config.DATASET + "/" + self.dt_string + "/" + str(epoch) + ".pt") if best_top1 < top1: best_top1 = top1 torch.save( self.model, config.CHECKPOINT_PATH + "/" + config.DATASET + "/" + self.dt_string + "/" + "best.pt") # GPU Memory Allocated for Model in Weight Sharing Phase if epoch == 0: try: print( "Learnt Architecture Training: Max GPU Memory Used", torch.cuda.max_memory_allocated() / (1024 * 1024 * 1024), "GB") except: print("Unable to retrieve memory data") # Log Best Accuracy so far print("Final best Prec@1 = {:.4%}".format(best_top1)) self.terminate()
def load_memory( self, task: int, batch_size: int, shuffle: Optional[bool] = True, num_workers: Optional[int] = 0, pin_memory: Optional[bool] = True ) -> Tuple[DataLoader, DataLoader]: """ Makes dataloaders for episodic memory/replay buffer. Args: task: The task number. batch_size: The batch_size for dataloaders. shuffle: Should loaders be shuffled? Default: True. num_workers: corresponds to Pytorch's `num_workers` argument. Default: 0 pin_memory: corresponds to Pytorch's `pin_memory` argument. Default: True. Returns: a Tuple of dataloaders, i.e., (train_loader, validation_loader). Examples:: >>> benchmark = Benchmark(num_tasks=2, per_task_memory_examples=16) >>> # task 1 memory loaders: returns 2 batches (i.e., 16 examples) >>> mem_train_loader_1, mem_val_loader_1 = benchmark.load_memory(1, batch_size=8) >>> # task 2 memory loaders: returns 4 batches (i.e., 16 examples) >>> mem_train_loader_2, mem_val_loader_2 = benchmark.load_memory(2, batch_size=4) .. note:: This method uses `class_uniform` sampling. i.e., if each task has 10 classes, and `per_task_memory_examples=20`, then the returend samples have 2 examples per class. .. warning:: The method will throw an error if `Benchmark` is instantiated without :attr:`per_task_memory_examples`. The reason is that, behind the scenese, we compute the indices for memory examples in `precompute_memory_indices()` method and this method relies on that computations. """ if not self.per_task_memory_examples: raise ValueError( "Called load_memory() but per_task_memory_examples is not set") if task > self.num_tasks: raise ValueError( f"Asked for memory of task={task} while the benchmark has {self.num_tasks} tasks" ) train_indices = self.memory_indices_train[task] test_indices = self.memory_indices_test[task] train_dataset = Subset(self.trains[task], train_indices) test_dataset = Subset(self.tests[task], test_indices) train_loader = DataLoader(train_dataset, batch_size, shuffle, num_workers=num_workers, pin_memory=pin_memory) test_loader = DataLoader(test_dataset, batch_size, shuffle, num_workers=num_workers, pin_memory=pin_memory) return train_loader, test_loader
########################## ### CIFAR-10 Dataset ########################## # Note transforms.ToTensor() scales input images # to 0-1 range train_indices = torch.arange(0, 49000) valid_indices = torch.arange(49000, 50000) train_and_valid = datasets.CIFAR10(root='data', train=True, transform=transforms.ToTensor(), download=True) train_dataset = Subset(train_and_valid, train_indices) valid_dataset = Subset(train_and_valid, valid_indices) test_dataset = datasets.CIFAR10(root='data', train=False, transform=transforms.ToTensor()) ##################################################### ### Data Loaders ##################################################### train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=8, shuffle=True)
from torch import nn, optim from torch.utils.data import Subset from torchvision.datasets import CIFAR10 from torchvision.transforms import transforms import numpy as np from tqdm import tqdm, trange from a2c_net import A2CNet from rl.env_a2c import AttackEnv if __name__ == '__main__': transform = transforms.Compose((transforms.ToTensor(), transforms.Normalize(0.5, 0.5, 0.5))) test_dataset = CIFAR10('data', train=False, transform=transform, download=False) test_dataset = Subset(test_dataset, range(9000)) image_size = 32 * 32 n_classes = 10 max_episodes = 20 max_episode_len = 1000 env = AttackEnv() obs_space = env.observation_space action_space = env.action_space obs_size = obs_space.low.size n_actions = action_space.n
def get_train_eval_loaders(path, batch_size=256): """Setup the dataflow: - load CIFAR100 train and test datasets - setup train/test image transforms - horizontally flipped randomly and augmented using cutout. - each mini-batch contained 256 examples - setup train/test data loaders Returns: train_loader, test_loader, eval_train_loader """ train_transform = Compose([ Pad(4), RandomCrop(32), RandomHorizontalFlip(), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), RandomErasing(), ]) test_transform = Compose([ ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_dataset = CIFAR100(root=path, train=True, transform=train_transform, download=True) test_dataset = CIFAR100(root=path, train=False, transform=test_transform, download=False) train_eval_indices = [ random.randint(0, len(train_dataset) - 1) for i in range(len(test_dataset)) ] train_eval_dataset = Subset(train_dataset, train_eval_indices) train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=12, shuffle=True, drop_last=True, pin_memory=True) test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=12, shuffle=False, drop_last=False, pin_memory=True) eval_train_loader = DataLoader(train_eval_dataset, batch_size=batch_size, num_workers=12, shuffle=False, drop_last=False, pin_memory=True) return train_loader, test_loader, eval_train_loader
def main(n_train, batch_train_size, n_test, batch_test_size): """ :param n_model: number of models for the comittee :param n_train: number of training data to be used, this decides how long the training process will be :param batch_train_size: batch size for training process, keep it under 20 :param idx_ratio: ratio of high entropy:ratio of random :return: """ # paths img_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'consensus_test', 'example.png') save_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test') csv_name_train = 'train.csv' csv_name_test = 'test.csv' csv_name_index = 'index.csv' dir_name = 'consensus_bulk_40_from_90_005_' index_path_name = 'consensus_90_5_005' save_weights_flag = True cityscape_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes') cityscape_loss_weight_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'class_weights.pkl') cityscape_pretrain_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscape_pretrain') inference_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'inference') color_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'color') print('cityscape_path: ' + cityscape_path) print(dir_name) print(index_path_name) # arguments n_train = 2880 n_pretrain = 0 n_test = 500 n_epoch = 40 test_factor = 3 # committee only tested every test_factor-th batch batch_train_size = 3*max(torch.cuda.device_count(), 1) batch_train_size_pretrain = 4 batch_test_size = 25*max(torch.cuda.device_count(), 1) lr = 0.0001 loss_print = 2 idx_ratio = [0.0, 1.0] # proportion to qbc:random continue_flag = False poly_exp = 1.0 feature_extract = True manual_seed = 10 np.random.seed(manual_seed) # CUDA cuda_flag = torch.cuda.is_available() device = torch.device("cuda" if cuda_flag else "cpu") device_cpu = torch.device("cpu") dataloader_kwargs = {'pin_memory': True} if cuda_flag else {} print(torch.cuda.device_count(), "GPUs detected") torch.manual_seed(manual_seed) # print("Max memory allocated:" + str(np.round(torch.cuda.max_memory_allocated(device) / 1e9, 3)) + ' Gb') # get data and index library mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform = T.Compose([T.Resize((800, 800), Image.BICUBIC), T.ToTensor(), T.Normalize(*mean_std)]) train_dataset = dataset_preset.Dataset_Cityscapes_n(root=cityscape_path, split='train', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_train) # read used index csv_path_index_source = os.path.join(save_path, index_path_name, csv_name_index) with open(csv_path_index_source) as csv_file: data = csv_file.readlines() train_index = np.array(list(map(int, data[-1][3:str.find(data[-1], ';')].split(',')))) print(len(train_index)) # np.random.shuffle(train_index) train_index = train_index[int(n_train*0.1):int(n_train*0.5)] print(len(train_index)) train_dataset = Subset(train_dataset, indices=train_index) test_dataset = dataset_preset.Dataset_Cityscapes_n_i(root=cityscape_path, split='val', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_test) # only test on part of data train_dataloader = DataLoader(train_dataset, batch_size=batch_train_size, shuffle=True, num_workers=3*max(torch.cuda.device_count(), 1), drop_last=True) test_dataloader = DataLoader(test_dataset, batch_size=batch_test_size, shuffle=True, num_workers=3*max(torch.cuda.device_count(), 1), drop_last=True) print("Datasets loaded!") # create models, optimizers, scheduler, criterion # the models fcn_model = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=False, progress=True, num_classes=segmen_preset.n_labels_valid, aux_loss=True) fcn_model = fcn_model.cuda() fcn_model = nn.DataParallel(fcn_model) # the optimizers params_to_update = fcn_model.parameters() print("Params to learn:") if feature_extract: params_to_update = [] for name, param in fcn_model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("\t", name) else: for name, param in fcn_model.named_parameters(): if param.requires_grad == True: print("\t", name) params = add_weight_decay(fcn_model, l2_value=0.0001) '''optimizer = torch.optim.SGD([{'params': fcn_model.module.classifier.parameters()}, {'params': list(fcn_model.module.backbone.parameters()) + list(fcn_model.module.aux_classifier.parameters())} ], lr=lr, momentum=0.9)''' optimizer = torch.optim.Adam([{'params': fcn_model.module.classifier.parameters()}, {'params': list(fcn_model.module.backbone.parameters()) + list(fcn_model.module.aux_classifier.parameters())} ], lr=lr, weight_decay=0.0001) lambda1 = lambda epoch: math.pow(1 - (epoch / n_epoch), poly_exp) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) with open(cityscape_loss_weight_path, "rb") as file: # (needed for python3) class_weights = np.array(pickle.load(file)) class_weights = torch.from_numpy(class_weights) class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda() criterion = torch.nn.CrossEntropyLoss(weight=class_weights).cuda() # report everything text = ('Model created' + (', n_train: ' + str(n_train)) + (', n_epoch: ' + str(n_epoch)) + (', batch_train_size: ' + str(batch_train_size)) + (', idx_ratio: ' + str(idx_ratio)) + (', n_test: ' + str(n_test)) + (', batch_test_size: ' + str(batch_test_size)) + (', test_factor: ' + str(test_factor)) + (', optimizer: ' + str(optimizer)) + (', model: ' + str(fcn_model))) print(text) # for documentation train_text = [str(x) for x in range(1, n_epoch+1)] test_text = [str(x) for x in range(1, n_epoch+1)] test_text_index = 0 # write text to csv dir_number = 1 while os.path.exists(os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number)))): dir_number += 1 run_path = os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number))) os.makedirs(run_path) # make run_* dir f = open(os.path.join(run_path, 'info.txt'), 'w+') # write .txt file f.write(text) f.close() copy(__file__, os.path.join(run_path, os.path.basename(__file__))) # write training progress csv_path_train = os.path.join(run_path, csv_name_train) title = ["Training progress for n_model = " + str(1) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch, torch seed: ' + str(manual_seed)] with open(csv_path_train, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # write test progress csv_path_test = os.path.join(run_path, csv_name_test) title = ["Test progress for n_model = " + str(1) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch, torch seed: ' + str(manual_seed) + 'run_path: ' + run_path + 'index_from: ' + index_path_name] with open(csv_path_test, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # load from previous run if requested if continue_flag: fcn_model.load_state_dict(torch.load( 'C:\\Users\\steve\\Desktop\\projects\\al_kitti\\results\\first_test\\adam_run_005\\model_weight_epoch_10.pt')) print('weight loaded') # training process, n-th batch for i_epoch in range(n_epoch): loss_epoch = [] iou_epoch = [] time_epoch = [] for i_batch, (data_train, target_train) in enumerate(train_dataloader): t = Timer() t.start() # train batch output, loss, iou, fcn_model, optimizer = train_batch(fcn_model, data_train, target_train, optimizer, device, criterion) print('Epoch: ' + str(i_epoch) + '\t Batch: ' + str(i_batch) + '/' + str(len(train_dataloader)) + '; model ' + str(0) + '; train loss avg: ' + "{:.3f}".format(loss) + '; train iou avg: ' + "{:.3f}".format(iou.mean())) for param_group in optimizer.param_groups: print(param_group['lr']) loss_epoch.append(loss) iou_epoch.append(iou.mean()) time_epoch.append(t.stop()) # document train result train_text[i_epoch] = train_text[i_epoch] + ";{:.4f}".format(np.array(loss_epoch).mean()) + \ ";{:.4f}".format(np.array(iou_epoch).mean()) + \ ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) + ';' + str(len(train_index)) # update train documentation text = train_text[i_epoch].split(";") with open(csv_path_train, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=';') test_writer.writerow(text) # one epoch ends here scheduler.step() print(optimizer) # save temporary model if i_epoch % 10 == 0 or (i_epoch+1) == n_epoch: fcn_model.train() torch.save(fcn_model.state_dict(), os.path.join(run_path, ('model_weight_epoch_train' + '{:03d}'.format(i_epoch) + '.pt'))) fcn_model.eval() torch.save(fcn_model.state_dict(), os.path.join(run_path, ('model_weight_epoch_' + '{:03d}'.format(i_epoch) + '.pt'))) # perform test create_pred_img(fcn_model, test_dataloader, inference_path, color_path) all_result_dict = cityscapes_eval() # average training time mean_time = np.array(time_epoch).mean() # document test result test_text[test_text_index] = test_text[test_text_index] + \ ";{:.4f}".format(all_result_dict['averageScoreClasses']) + \ ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) \ + ";{:.4f}".format(mean_time) + ';' + str(len(train_index)) # update test documentation text = test_text[test_text_index].split(";") with open(csv_path_test, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=';') test_writer.writerow(text) test_text_index = test_text_index + 1
plt.imshow(image) plt.show() if __name__ == "__main__": writer = SummaryWriter() name = "aölsdjfalök" saving_path = "models/" + name num_epochs = 100 learn_rate = 3e-5 dataset = MotBBSequence('dataset_utils/Mot17_test_single.txt', use_only_first_video=False) train_data = Subset(dataset, range(0, dataset.valid_begin)) valid_data = Subset(dataset, range(dataset.valid_begin, len(dataset))) obs_length = 10 pred_length = 9 loss_params = { "grid_shape": (16, 16), "image_shape": (416, 416), "path_anchors": "dataset_utils/anchors/anchors5.txt" } loss_function = NaiveLoss(loss_params) model = SequenceClassifier([16, 16, loss_function.num_anchors, 4], [16, 16, loss_function.num_anchors, 4], 16) optimizer = optim.Adam( model.parameters(),
def run(args): argstr = yaml.dump(args.__dict__, default_flow_style=False) print('arguments:') print(argstr) argfile = osp.join(osp.join(args.expdir), 'finetune_p_args.yaml') if osp.isfile(argfile): oldargs = yaml.load(open(argfile)) if oldargs != args.__dict__: print('WARNING: Changed configuration keys compared to stored experiment') utils.arguments.compare_dicts(oldargs, args.__dict__, verbose=True) args.cuda = not args.no_cuda args.validate_first = not args.no_validate_first args.validate = not args.no_validate if not args.dry: utils.ifmakedirs(args.expdir) logging.print_file(argstr, argfile) transforms = get_transforms(IN1K, args.input_size, crop=(args.input_crop == 'square'), need=('val',), backbone=args.backbone) datas = {} for split in ('train', 'val'): datas[split] = IdDataset(IN1K(args.imagenet_path, split, transform=transforms['val'])) loaders = {} collate_fn = dict(collate_fn=list_collate) if args.input_crop == 'rect' else {} selected = [] count = Counter() for i, label in enumerate(datas['train'].dataset.labels): if count[label] < args.images_per_class: selected.append(i) count[label] += 1 datas['train'].dataset = Subset(datas['train'].dataset, selected) loaders['train'] = DataLoader(datas['train'], batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, **collate_fn) loaders['val'] = DataLoader(datas['val'], batch_size=args.batch_size, shuffle=args.shuffle_val, num_workers=args.workers, pin_memory=True, **collate_fn) model = get_multigrain(args.backbone, include_sampling=False, pretrained_backbone=args.pretrained_backbone, learn_p=True) criterion = torch.nn.CrossEntropyLoss() if args.cuda: criterion = utils.cuda(criterion) model = utils.cuda(model) optimizers = OD() p = model.pool.p optimizers['p'] = SGD([p], lr=args.learning_rate, momentum=args.momentum) optimizers = MultiOptim(optimizers) def training_step(batch): optimizers.zero_grad() output_dict = model(batch['input']) loss = criterion(output_dict['classifier_output'], batch['classifier_target']) top1, top5 = utils.accuracy(output_dict['classifier_output'].data, batch['classifier_target'].data, topk=(1, 5)) p.grad = torch.autograd.grad(loss, p)[0] # partial backward optimizers.step() return OD([ ('cross_entropy', loss.item()), ('p', p.item()), ('top1', top1), ('top5', top5), ]) def validation_step(batch): with torch.no_grad(): output_dict = model(batch['input']) target = batch['classifier_target'] xloss = criterion(output_dict['classifier_output'], target) top1, top5 = utils.accuracy(output_dict['classifier_output'], target, topk=(1, 5)) return OD([ ('cross_entropy', xloss.item()), ('top1', top1), ('top5', top5), ]) metrics_history = OD() checkpoints = utils.CheckpointHandler(args.expdir) if checkpoints.exists(args.resume_epoch, args.resume_from): epoch = checkpoints.resume(model, metrics_history=metrics_history, resume_epoch=args.resume_epoch, resume_from=args.resume_from) else: raise ValueError('Checkpoint ' + args.resume_from + ' not found') if args.init_pooling_exponent is not None: # overwrite stored pooling exponent p.data.fill_(args.init_pooling_exponent) print("Multigrain model with {} backbone and p={} pooling:".format(args.backbone, p.item())) print(model) def loop(loader, step, epoch, prefix=''): # Training or validation loop metrics = defaultdict(utils.HistoryMeter if prefix == 'train_' else utils.AverageMeter) tic() for i, batch in enumerate(loader): if prefix == 'train_': lr = args.learning_rate * (1 - i / len(loader)) ** args.learning_rate_decay_power optimizers['p'].param_groups[0]['lr'] = lr if args.cuda: batch = utils.cuda(batch) data_time = 1000 * toc(); tic() step_metrics = step(batch) step_metrics['data_time'] = data_time step_metrics['batch_time'] = 1000 * toc(); tic() for (k, v) in step_metrics.items(): metrics[prefix + k].update(v, len(batch['input'])) print(logging.str_metrics(metrics, iter=i, num_iters=len(loader), epoch=epoch, num_epochs=epoch)) print(logging.str_metrics(metrics, epoch=epoch, num_epochs=epoch)) toc() if prefix == 'val_': return OD((k, v.avg) for (k, v) in metrics.items()) return OD((k, v.hist) for (k, v) in metrics.items()) if args.validate_first and 0 not in metrics_history: model.eval() metrics_history[epoch] = loop(loaders['val'], validation_step, epoch, 'val_') checkpoints.save_metrics(metrics_history) model.eval() # freeze batch normalization metrics = loop(loaders['train'], training_step, epoch, 'train_') metrics['last_p'] = p.item() if args.validate: model.eval() metrics.update(loop(loaders['val'], validation_step, epoch + 1, 'val_')) metrics_history[epoch + 1] = metrics if not args.dry: utils.make_plots(metrics_history, args.expdir) checkpoints.save(model, epoch + 1, optimizers, metrics_history)
def main(): parser = argparse.ArgumentParser(description='Train a model') # Required arguments parser.add_argument('dataset', metavar='DS_NAME', type=str, help='Dataset name') parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') # Optional arguments parser.add_argument('-o', '--out_path', metavar='PATH', type=str, help='Output path for model', default=cfg.MODEL_DIR) parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('-e', '--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.1)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument( '--log-interval', type=int, default=100, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--lr-step', type=int, default=30, metavar='N', help='Step sizes for LR') parser.add_argument('--lr-gamma', type=float, default=0.1, metavar='N', help='LR Decay Rate') parser.add_argument('-w', '--num_workers', metavar='N', type=int, help='# Worker threads to load data', default=10) parser.add_argument('--train_subset', type=int, help='Use a subset of train set', default=None) parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None) parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=None) args = parser.parse_args() params = vars(args) # torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') # ----------- Set up dataset dataset_name = params['dataset'] valid_datasets = datasets.__dict__.keys() if dataset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) dataset = datasets.__dict__[dataset_name] modelfamily = datasets.dataset_to_modelfamily[dataset_name] train_transform = datasets.modelfamily_to_transforms[modelfamily]['train'] test_transform = datasets.modelfamily_to_transforms[modelfamily]['test'] trainset = dataset(train=True, transform=train_transform) testset = dataset(train=False, transform=test_transform) num_classes = len(trainset.classes) params['num_classes'] = num_classes if params['train_subset'] is not None: idxs = np.arange(len(trainset)) ntrainsubset = params['train_subset'] idxs = np.random.choice(idxs, size=ntrainsubset, replace=False) trainset = Subset(trainset, idxs) # ----------- Set up model model_name = params['model_arch'] pretrained = params['pretrained'] # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained) model = zoo.get_net(model_name, modelfamily, pretrained, num_classes=num_classes) model = model.to(device) # ----------- Train out_path = params['out_path'] model_utils.train_model(model, trainset, testset=testset, device=device, **params) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(out_path, 'params.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)
def __init__(self, root: str, normal_class=0, tokenizer='spacy', use_tfidf_weights=False, append_sos=False, append_eos=False, clean_txt=False, max_seq_len_prior=None): super().__init__(root) self.n_classes = 2 # 0: normal, 1: outlier classes = list(range(6)) groups = [[ 'comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x' ], [ 'rec.autos', 'rec.motorcycles', 'rec.sport.baseball', 'rec.sport.hockey' ], ['sci.crypt', 'sci.electronics', 'sci.med', 'sci.space'], ['misc.forsale'], [ 'talk.politics.misc', 'talk.politics.guns', 'talk.politics.mideast' ], [ 'talk.religion.misc', 'alt.atheism', 'soc.religion.christian' ]] short_group_names = ['comp', 'rec', 'sci', 'misc', 'pol', 'rel'] self.subset = short_group_names[normal_class] self.normal_classes = groups[normal_class] self.outlier_classes = [] del classes[normal_class] for i in classes: self.outlier_classes += groups[i] # Load the 20 Newsgroups dataset self.train_set, self.test_set = newsgroups20_dataset( directory=root, train=True, test=True, clean_txt=clean_txt, groups=groups, short_group_names=short_group_names) # Pre-process self.train_set.columns.add('index') self.test_set.columns.add('index') self.train_set.columns.add('weight') self.test_set.columns.add('weight') train_idx_normal = [] # for subsetting train_set to normal class for i, row in enumerate(self.train_set): if row['label'] in self.normal_classes: train_idx_normal.append(i) row['label'] = torch.tensor(0) else: row['label'] = torch.tensor(1) row['text'] = row['text'].lower() test_n_idx = [] # subsetting test_set to selected normal classes test_a_idx = [] # subsetting test_set to selected anomalous classes for i, row in enumerate(self.test_set): if row['label'] in self.normal_classes: test_n_idx.append(i) else: test_a_idx.append(i) row['label'] = torch.tensor( 0) if row['label'] in self.normal_classes else torch.tensor(1) row['text'] = row['text'].lower() # Subset train_set to normal class self.train_set = Subset(self.train_set, train_idx_normal) # Subset test_set to selected normal classes self.test_n_set = Subset(self.test_set, test_n_idx) # Subset test_set to selected anomalous classes self.test_a_set = Subset(self.test_set, test_a_idx) # Make corpus and set encoder text_corpus = [ row['text'] for row in datasets_iterator(self.train_set, self.test_set) ] if tokenizer == 'spacy': self.encoder = SpacyEncoder(text_corpus, min_occurrences=3, append_eos=append_eos) if tokenizer == 'bert': self.encoder = MyBertTokenizer.from_pretrained('bert-base-uncased', cache_dir=root) # Encode self.max_seq_len = 0 for row in datasets_iterator(self.train_set, self.test_set): if append_sos: sos_id = self.encoder.stoi[DEFAULT_SOS_TOKEN] row['text'] = torch.cat((torch.tensor(sos_id).unsqueeze(0), self.encoder.encode(row['text']))) else: row['text'] = self.encoder.encode(row['text']) if len(row['text']) > self.max_seq_len: self.max_seq_len = len(row['text']) # Compute tf-idf weights if use_tfidf_weights: compute_tfidf_weights(self.train_set, self.test_set, vocab_size=self.encoder.vocab_size) else: for row in datasets_iterator(self.train_set, self.test_set): row['weight'] = torch.empty(0) # Get indices after pre-processing for i, row in enumerate(self.train_set): row['index'] = i for i, row in enumerate(self.test_set): row['index'] = i # length prior sent_lengths = [len(row['text']) for row in self.train_set] sent_lengths_freq = np.bincount(np.array(sent_lengths)) sent_lengths_freq = np.concatenate( (sent_lengths_freq, np.array((max_seq_len_prior - max(sent_lengths)) * [0])), axis=0) sent_lengths_freq = sent_lengths_freq + 1 self.length_prior = np.log(sent_lengths_freq) - np.log( sent_lengths_freq.sum())
parser.add_argument('--plot_freq', type=int, default=250) parser.add_argument('--save_freq', type=int, default=10) # eval setting parser.add_argument('--val_fraction', type=float, default=0.1) parser.add_argument('--eval_batch_size', type=int, default=32) parser.add_argument('--eval_plot_freq', type=int, default=10) args = parser.parse_args() model = DeepGMR(args) if torch.cuda.is_available(): model.cuda() data = TrainData(args.data_file, args) ids = np.random.permutation(len(data)) n_val = int(args.val_fraction * len(data)) train_data = Subset(data, ids[n_val:]) valid_data = Subset(data, ids[:n_val]) train_loader = DataLoader(train_data, args.batch_size, drop_last=True, shuffle=True) valid_loader = DataLoader(valid_data, args.eval_batch_size, drop_last=True) optimizer = torch.optim.Adam(model.parameters(), args.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, min_lr=1e-6) writer = SummaryWriter(args.log_dir) for epoch in range(args.n_epochs):
def create_dataloaders(dataset_name, data_transforms, input_size, batch_size): dataloaders_dict = {} print("Initializing Datasets and Dataloaders...") if dataset_name == "imagenetv2": # Create training and validation datasets train_dataset = ImageNetV2Dataset(transform=data_transforms['train']) test_dataset = ImageNetV2Dataset(transform=data_transforms['val']) train_test_splits_file = 'split_indices.pkl' if os.path.exists(train_test_splits_file): indices_split = pickle.load(open(train_test_splits_file, 'rb')) else: index_to_class = { idx: cl for idx, (_, cl) in enumerate(train_dataset) } class_to_index = {idx: [] for idx in range(1000)} for idx, cl in index_to_class.items(): class_to_index[cl].append(idx) indices_split = {'train': [], 'val': [], 'test': []} for cl in class_to_index: shuffle(class_to_index[cl]) indices_split['train'].extend( class_to_index[cl][:int(0.7 * len(class_to_index[cl]))]) indices_split['val'].extend( class_to_index[cl][int(0.7 * len(class_to_index[cl]) ):int(0.9 * len(class_to_index[cl]))]) indices_split['test'].extend( class_to_index[cl][int(0.9 * len(class_to_index[cl])):]) pickle.dump(indices_split, open(train_test_splits_file, 'wb')) # Create training and validation dataloaders dataloaders_dict = { x: torch.utils.data.DataLoader(Subset(test_dataset, indices_split[x]), batch_size=batch_size, shuffle=True, num_workers=4) for x in ['val', 'test'] } dataloaders_dict['train'] = torch.utils.data.DataLoader( Subset(train_dataset, indices_split['train']), batch_size=batch_size, shuffle=True, num_workers=4) elif dataset_name == "imagenetv2cifar100": # Create training and validation datasets print("Custom imagenetv2cifar dataset") train_dataset = ImageNetV2Dataset( transform=data_transforms['train']['imagenetv2']) test_dataset = ImageNetV2Dataset( transform=data_transforms['val']['imagenetv2']) train_test_splits_file = 'split_indices.pkl' if os.path.exists(train_test_splits_file): indices_split = pickle.load(open(train_test_splits_file, 'rb')) else: index_to_class = { idx: cl for idx, (_, cl) in enumerate(train_dataset) } class_to_index = {idx: [] for idx in range(1000)} for idx, cl in index_to_class.items(): class_to_index[cl].append(idx) indices_split = {'train': [], 'val': [], 'test': []} for cl in class_to_index: shuffle(class_to_index[cl]) indices_split['train'].extend( class_to_index[cl][:int(0.7 * len(class_to_index[cl]))]) indices_split['val'].extend( class_to_index[cl][int(0.7 * len(class_to_index[cl]) ):int(0.9 * len(class_to_index[cl]))]) indices_split['test'].extend( class_to_index[cl][int(0.9 * len(class_to_index[cl])):]) pickle.dump(indices_split, open(train_test_splits_file, 'wb')) train_dataset1 = Subset(train_dataset, indices_split['train']) test_dataset1 = Subset(test_dataset, indices_split['val']) train_dataset2 = torchvision.datasets.CIFAR100( root='./data', train=True, download=True, transform=data_transforms['train']['cifar100']) test_dataset2 = torchvision.datasets.CIFAR100( root='./data', train=False, download=True, transform=data_transforms['val']['cifar100']) train_dataset2.targets = [x + 1000 for x in train_dataset2.targets] test_dataset2.targets = [x + 1000 for x in test_dataset2.targets] final_train_dataset = torch.utils.data.ConcatDataset( [train_dataset1, train_dataset2]) final_test_dataset = torch.utils.data.ConcatDataset( [test_dataset1, test_dataset2]) # Create training and validation dataloaders dataloaders_dict['train'] = torch.utils.data.DataLoader( final_train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) dataloaders_dict['val'] = torch.utils.data.DataLoader( final_test_dataset, batch_size=batch_size, shuffle=True, num_workers=4) else: if dataset_name == "imagenet": train_dataset = torchvision.datasets.ImageNet( root='./data', split='train', download=True, transform=data_transforms['train']) test_dataset = torchvision.datasets.ImageNet( root='./data', split='val', download=True, transform=data_transforms['val']) elif dataset_name == "cifar10": train_dataset = torchvision.datasets.CIFAR10( root='./data', train=True, download=True, transform=data_transforms['train']) test_dataset = torchvision.datasets.CIFAR10( root='./data', train=False, download=True, transform=data_transforms['val']) elif dataset_name == "cifar100": train_dataset = torchvision.datasets.CIFAR100( root='./data', train=True, download=True, transform=data_transforms['train']) test_dataset = torchvision.datasets.CIFAR100( root='./data', train=False, download=True, transform=data_transforms['val']) else: print("Invalid dataset name, exiting...") sys.exit(0) dataloaders_dict['train'] = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) dataloaders_dict['val'] = torch.utils.data.DataLoader( test_dataset, batch_size=batch_size, shuffle=True, num_workers=4) return dataloaders_dict
def train(rawdata, charcounts, maxlens, unique_onehotvals): mb_size = 256 lr = 2.0e-4 cnt = 0 latent_dim = 32 recurrent_hidden_size = 24 epoch_len = 8 max_veclen = 0.0 patience = 12 * epoch_len patience_duration = 0 # mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) input_dict = {} input_dict['discrete'] = discrete_cols input_dict['continuous'] = continuous_cols input_dict['onehot'] = {} for k in onehot_cols: dim = int(np.ceil(np.log(len(unique_onehotvals[k])) / np.log(2.0))) input_dict['onehot'][k] = dim if len(charcounts) > 0: text_dim = int(np.ceil(np.log(len(charcounts)) / np.log(2.0))) input_dict['text'] = {t: text_dim for t in text_cols} else: text_dim = 0 input_dict['text'] = {} data = Dataseq(rawdata, charcounts, input_dict, unique_onehotvals, maxlens) data_idx = np.arange(data.__len__()) np.random.shuffle(data_idx) n_folds = 6 fold_size = 1.0 * data.__len__() / n_folds folds = [ data_idx[int(i * fold_size):int((i + 1) * fold_size)] for i in range(6) ] fold_groups = {} fold_groups[0] = {'train': [0, 1, 2, 4], 'es': [3], 'val': [5]} fold_groups[1] = {'train': [0, 2, 3, 5], 'es': [1], 'val': [4]} fold_groups[2] = {'train': [1, 3, 4, 5], 'es': [2], 'val': [0]} fold_groups[3] = {'train': [0, 2, 3, 4], 'es': [5], 'val': [1]} fold_groups[4] = {'train': [0, 1, 3, 5], 'es': [4], 'val': [2]} fold_groups[5] = {'train': [1, 2, 4, 5], 'es': [0], 'val': [3]} for fold in range(1): train_idx = np.array( list( itertools.chain.from_iterable( [folds[i] for i in fold_groups[fold]['train']]))) es_idx = np.array( list( itertools.chain.from_iterable( [folds[i] for i in fold_groups[fold]['es']]))) val_idx = np.array(folds[fold_groups[fold]['val'][0]]) train = Subset(data, train_idx) es = Subset(data, es_idx) val = Subset(data, val_idx) kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_iter = torch.utils.data.DataLoader(train, batch_size=mb_size, shuffle=True, **kwargs) es_iter = torch.utils.data.DataLoader(es, batch_size=mb_size, shuffle=True, **kwargs) val_iter = torch.utils.data.DataLoader(val, batch_size=mb_size, shuffle=True, **kwargs) embeddings = {} reverse_embeddings = {} onehot_embedding_weights = {} for k in onehot_cols: dim = input_dict['onehot'][k] onehot_embedding_weights[k] = net.get_embedding_weight( len(unique_onehotvals[k]), dim) #embeddings[k] = nn.Embedding(len(unique_onehotvals[k]), dim, max_norm=1.0) embeddings[k] = nn.Embedding(len(unique_onehotvals[k]), dim, _weight=onehot_embedding_weights[k]) reverse_embeddings[k] = net.EmbeddingToIndex( len(unique_onehotvals[k]), dim, _weight=onehot_embedding_weights[k]) if text_dim > 0: text_embedding_weights = net.get_embedding_weight( len(charcounts) + 1, text_dim) #text_embedding = nn.Embedding(len(charcounts)+1, text_dim, max_norm=1.0) text_embedding = nn.Embedding(len(charcounts) + 1, text_dim, _weight=text_embedding_weights) text_embeddingtoindex = net.EmbeddingToIndex( len(charcounts) + 1, text_dim, _weight=text_embedding_weights) for k in text_cols: embeddings[k] = text_embedding reverse_embeddings[k] = text_embeddingtoindex enc = net.Encoder(input_dict, dim=latent_dim, recurrent_hidden_size=recurrent_hidden_size) dec = net.Decoder(input_dict, maxlens, dim=latent_dim, recurrent_hidden_size=recurrent_hidden_size) if use_cuda: embeddings = {k: embeddings[k].cuda() for k in embeddings.keys()} enc.cuda() dec.cuda() #print(enc.parameters) #print(dec.parameters) #contrastivec = contrastive.ContrastiveLoss(margin=margin) logloss = contrastive.GaussianOverlap() #solver = optim.RMSprop([p for em in embeddings.values() for p in em.parameters()] + [p for p in enc.parameters()] + [p for p in dec.parameters()], lr=lr) solver = optim.Adam( [p for em in embeddings.values() for p in em.parameters()] + [p for p in enc.parameters()] + [p for p in dec.parameters()], lr=lr) Tsample = next(es_iter.__iter__()) if use_cuda: Tsample = { col: Variable(tt[0:128]).cuda() for col, tt in Tsample.items() } else: Tsample = {col: Variable(tt[0:128]) for col, tt in Tsample.items()} print({col: tt[0] for col, tt in Tsample.items()}) print('starting training') loss = 0.0 for it in range(1000000): # X = Variable(torch.tensor(np.array([[1,2,4], [4,1,9]]))).cuda() batch_idx, T = next(enumerate(train_iter)) if use_cuda: T = {col: Variable(tt).cuda() for col, tt in T.items()} else: T = {col: Variable(tt) for col, tt in T.items()} X = {} for col, tt in T.items(): if col in embeddings.keys(): X[col] = embeddings[col](tt) else: X[col] = tt.float() mu = enc(X) X2 = dec(mu) T2 = {} X2d = {col: (1.0 * tt).detach() for col, tt in X2.items()} for col, embedding in embeddings.items(): T2[col] = reverse_embeddings[col](X2[col]) X2[col] = embeddings[col](T2[col]) X2d[col] = embeddings[col](T2[col].detach()) ''' X2d = {col: (1.0*tt).detach() for col, tt in X2.items()} T2 = discretize(X2d, embeddings, maxlens) for col, embedding in embeddings.items(): X2d[col] = embeddings[col](T2[col].detach()) ''' ''' T2 = discretize(X2, embeddings, maxlens) X2d = {col: (1.0*tt).detach() for col, tt in X2.items()} for col, embedding in embeddings.items(): X2[col] = embeddings[col](T2[col]) #+0.05 X2[col] X2d[col] = embeddings[col](T2[col].detach()) ''' mu2 = enc(X2) mu2 = mu2.view(mb_size, -1) mu2d = enc(X2d) mu2d = mu2d.view(mb_size, -1) mu = mu.view(mb_size, -1) are_same = are_equal({col: x[::2] for col, x in T.items()}, {col: x[1::2] for col, x in T.items()}) #print('f same ', torch.mean(torch.mean(are_same, 1))) #enc_loss = contrastivec(mu2[::2], mu2[1::2], torch.zeros(int(mb_size / 2)).cuda()) enc_loss = logloss(torch.mean(torch.pow(mu[::2] - mu[1::2], 2), 1), are_same) #enc_loss += 0.5*contrastivec(mu2[::2], mu2[1::2], are_same) #enc_loss += 0.5 * contrastivec(mu[::2], mu2[1::2], are_same) enc_loss += 1.0 * logloss(torch.mean(torch.pow(mu - mu2, 2), 1), torch.ones(mb_size).cuda()) enc_loss += 2.0 * logloss(torch.mean(torch.pow(mu - mu2d, 2), 1), torch.zeros(mb_size).cuda()) #enc_loss += 1.0 * contrastivec(mu2d[0::2], mu2d[1::2], torch.ones(int(mb_size/2)).cuda()) #enc_loss += 1.0 * contrastivec(mu2d[::2], mu2d[1::2], torch.ones(int(mb_size / 2)).cuda()) #enc_loss += 0.5 * contrastivec(mu2d[::2], mu2d[1::2], torch.ones(int(mb_size/2)).cuda()) ''' adotb = torch.matmul(mu, mu.permute(1, 0)) # batch_size x batch_size adota = torch.matmul(mu.view(-1, 1, latent_dim), mu.view(-1, latent_dim, 1)) # batch_size x 1 x 1 diffsquares = (adota.view(-1, 1).repeat(1, mb_size) + adota.view(1, -1).repeat(mb_size, 1) - 2 * adotb) / latent_dim # did I f**k up something here? diffsquares can apparently be less than 0.... mdist = torch.sqrt(torch.clamp(torch.triu(diffsquares, diagonal=1), min=0.0)) mdist = torch.clamp(margin - mdist, min=0.0) number_of_pairs = mb_size * (mb_size - 1) / 2 enc_loss = 0.5 * torch.sum(torch.triu(torch.pow(mdist, 2), diagonal=1)) / number_of_pairs target = torch.ones(mu.size(0), 1) if use_cuda: target.cuda() enc_loss += contrastivec(mu, mu2, target.cuda()) target = torch.zeros(mu.size(0), 1) if use_cuda: target.cuda() enc_loss += 2.0 * contrastivec(mu, mu2d, target.cuda()) ''' enc_loss.backward() solver.step() enc.zero_grad() dec.zero_grad() for col in embeddings.keys(): embeddings[col].zero_grad() loss += enc_loss.data.cpu().numpy() veclen = torch.mean(torch.pow(mu, 2)) if it % epoch_len == 0: print(it, loss / epoch_len, veclen.data.cpu().numpy()) #enc_loss.data.cpu().numpy(), Xsample = {} for col, tt in Tsample.items(): if col in embeddings.keys(): Xsample[col] = embeddings[col](tt) else: Xsample[col] = tt.float() mu = enc(Xsample) X2sample = dec(mu) X2sampled = {col: tt.detach() for col, tt in X2sample.items()} T2sample = discretize(X2sample, embeddings, maxlens) mu2 = enc(X2sample) mu2d = enc(X2sampled) if 'Fare' in continuous_cols and 'Age' in continuous_cols: print([ np.mean( np.abs(Xsample[col].data.cpu().numpy() - X2sample[col].data.cpu().numpy())) for col in ['Fare', 'Age'] ]) print({ col: tt[0:2].data.cpu().numpy() for col, tt in T2sample.items() }) if 'Survived' in onehot_cols: print( '% survived correct: ', np.mean(T2sample['Survived'].data.cpu().numpy() == Tsample['Survived'].data.cpu().numpy()), np.mean( Tsample['Survived'].data.cpu().numpy() == np. ones_like(Tsample['Survived'].data.cpu().numpy()))) if 'Cabin' in text_cols: print(embeddings['Cabin'].weight[data.charindex['1']]) are_same = are_equal( {col: x[::2] for col, x in Tsample.items()}, {col: x[1::2] for col, x in Tsample.items()}) # print('f same ', torch.mean(torch.mean(are_same, 1))) # enc_loss = contrastivec(mu2[::2], mu2[1::2], torch.zeros(int(mb_size / 2)).cuda()) #es_loss = contrastivec(mu[::2], mu[1::2], are_same) # enc_loss += 0.25*contrastivec(mu2[::2], mu2[1::2], are_same) # enc_loss += 0.5 * contrastivec(mu[::2], mu2[1::2], are_same) es_loss = 1.0 * contrastivec(mu, mu2, torch.ones(mu.size(0)).cuda()) #es_loss += 2.0 * contrastivec(mu, mu2d, torch.zeros(mu.size(0)).cuda()) #print('mean mu ', torch.mean(torch.pow(mu, 2))) print('es loss ', es_loss) loss = 0.0
def setup_data(self): cfg = self.cfg batch_sz = cfg.solver.batch_sz num_workers = cfg.data.num_workers # download and unzip data if cfg.data.uri.startswith('s3://') or cfg.data.uri.startswith('/'): data_uri = cfg.data.uri else: data_uri = join(cfg.base_uri, cfg.data.uri) data_dirs = [] zip_uris = [data_uri] if data_uri.endswith('.zip') else list_paths( data_uri, 'zip') for zip_ind, zip_uri in enumerate(zip_uris): zip_path = get_local_path(zip_uri, self.data_cache_dir) if not isfile(zip_path): zip_path = download_if_needed(zip_uri, self.data_cache_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: data_dir = join(self.tmp_dir, 'data', str(zip_ind)) data_dirs.append(data_dir) zipf.extractall(data_dir) # build datasets -- one per zip file and then merge them into a single dataset train_ds = [] valid_ds = [] test_ds = [] for data_dir in data_dirs: train_dir = join(data_dir, 'train') valid_dir = join(data_dir, 'valid') transform = Compose( [Resize((cfg.data.img_sz, cfg.data.img_sz)), ToTensor()]) aug_transform = Compose([ RandomHorizontalFlip(), RandomVerticalFlip(), ColorJitter(0.1, 0.1, 0.1, 0.1), Resize((cfg.data.img_sz, cfg.data.img_sz)), ToTensor() ]) if isdir(train_dir): if cfg.overfit_mode: train_ds.append( ImageRegressionDataset(train_dir, cfg.data.class_names, transform=transform)) else: train_ds.append( ImageRegressionDataset(train_dir, cfg.data.class_names, transform=aug_transform)) if isdir(valid_dir): valid_ds.append( ImageRegressionDataset(valid_dir, cfg.data.class_names, transform=transform)) test_ds.append( ImageRegressionDataset(valid_dir, cfg.data.class_names, transform=transform)) train_ds, valid_ds, test_ds = \ ConcatDataset(train_ds), ConcatDataset(valid_ds), ConcatDataset(test_ds) if cfg.overfit_mode: train_ds = Subset(train_ds, range(batch_sz)) valid_ds = train_ds test_ds = train_ds elif cfg.test_mode: train_ds = Subset(train_ds, range(batch_sz)) valid_ds = Subset(valid_ds, range(batch_sz)) test_ds = Subset(test_ds, range(batch_sz)) train_dl = DataLoader(train_ds, shuffle=True, batch_size=batch_sz, num_workers=num_workers, pin_memory=True) valid_dl = DataLoader(valid_ds, shuffle=True, batch_size=batch_sz, num_workers=num_workers, pin_memory=True) test_dl = DataLoader(test_ds, shuffle=True, batch_size=batch_sz, num_workers=num_workers, pin_memory=True) self.train_ds, self.valid_ds, self.test_ds = (train_ds, valid_ds, test_ds) self.train_dl, self.valid_dl, self.test_dl = (train_dl, valid_dl, test_dl)
def create_dataloaders(lg, seed_base, agent, data_cfg): data_cfg.type = data_cfg.type.strip().lower() clz, (MEAN, STD) = get_dataset_settings(data_cfg.type) clz: Dataset.__class__ if 'dataset_root' not in data_cfg: data_cfg['dataset_root'] = os.path.abspath( os.path.join(os.path.expanduser('~'), 'datasets', data_cfg.type)) # build transformers last_t = time.time() baseline_train_trans = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), Cutout(n_holes=1, length=16), transforms.Normalize(MEAN, STD), ]) to_tensor = transforms.ToTensor() cutout = Cutout(n_holes=1, length=16) normalize = transforms.Normalize(MEAN, STD) autoaug_train_trans = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), agent, lambda tup: (to_tensor(tup[0]), tup[1]), lambda tup: (cutout(tup[0]), tup[1]), lambda tup: (normalize(tup[0]), tup[1]), ]) val_trans = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(MEAN, STD)]) test_trans = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(MEAN, STD)]) lg.info( f'=> after building transforms, time cost: {time.time() - last_t:.2f}s' ) # split data sets last_t = time.time() original_train_val_set = clz(root=data_cfg.dataset_root, train=True, download=False, transform=None) lg.info( f'=> after building original_train_val_set, time cost: {time.time() - last_t:.2f}s' ) last_t = time.time() targets_attr_name = 'targets' if hasattr(original_train_val_set, 'targets') else 'train_labels' reduced_size = data_cfg.train_set_size + data_cfg.val_set_size original_size = get_train_val_set_size(data_cfg.type) assert reduced_size <= original_size, f'too many images({reduced_size}) for the train_val_set of {data_cfg.type})' reduced = reduced_size < original_size def split(dataset, second_size) -> Tuple[np.ndarray, np.ndarray]: """ split a given dataset into two subsets (preserving the percentage of samples for each class) :param dataset: the origin dataset :param second_size: the length of second_idx :return: two indices (np.ndarray) of the two subsets len(second_idx) = second_size len(first_idx) + len(second_idx) = len(dataset) """ sss = StratifiedShuffleSplit(n_splits=1, test_size=second_size, random_state=seed_base) first_idx, second_idx = next( sss.split(X=list(range(len(dataset))), y=getattr(dataset, targets_attr_name))) return first_idx, second_idx train_val_set = original_train_val_set if reduced: lg.info(f'use a reduced set ({reduced_size} of {original_size})') _, reduced_train_val_idx = split(original_train_val_set, reduced_size) reduced_train_val_set = Subset(original_train_val_set, reduced_train_val_idx) setattr(reduced_train_val_set, targets_attr_name, [ getattr(original_train_val_set, targets_attr_name)[i] for i in reduced_train_val_idx ]) train_val_set = reduced_train_val_set train_idx, val_idx = split(train_val_set, data_cfg.val_set_size) lg.info(f'=> after splitting, time cost: {time.time() - last_t:.2f}s') # build datasets # data_cfg.dist_training last_t = time.time() auged_full_train_set = clz(root=data_cfg.dataset_root, train=True, download=False, transform=autoaug_train_trans) full_train_set = clz(root=data_cfg.dataset_root, train=True, download=False, transform=baseline_train_trans) auged_sub_train_set = Subset( dataset=clz(root=data_cfg.dataset_root, train=True, download=False, transform=autoaug_train_trans), indices=np.array([train_val_set.indices[i] for i in train_idx]) if reduced else train_idx) val_set = Subset( dataset=clz(root=data_cfg.dataset_root, train=True, download=False, transform=val_trans), indices=np.array([train_val_set.indices[i] for i in val_idx]) if reduced else val_idx) test_set = clz(root=data_cfg.dataset_root, train=False, download=False, transform=test_trans) set_sizes = len(full_train_set), len(auged_full_train_set), len( auged_sub_train_set), len(val_set), len(test_set) lg.info( f'=> after building sets, time cost: {time.time() - last_t:.2f}s, test_set[0][0].mean(): {test_set[0][0].mean():.4f} (expected: -0.2404)' ) # -0.24041180312633514 # build loaders from torch.utils.data._utils.collate import default_collate last_t = time.time() loaders = [ DataLoader(dataset=dataset, num_workers=data_cfg.num_workers, pin_memory=True, collate_fn=cf, batch_size=bs, shuffle=shuffle, drop_last=False) for dataset, cf, bs, shuffle in zip(( full_train_set, auged_full_train_set, auged_sub_train_set, val_set, test_set), (default_collate, collate_fn_for_autoaug, collate_fn_for_autoaug, default_collate, default_collate), ( data_cfg.batch_size, data_cfg.batch_size, data_cfg.batch_size, data_cfg.batch_size * 2, data_cfg.batch_size * 2), (True, True, True, False, False)) ] lg.info( f'=> after building loaders, time cost: {time.time() - last_t:.2f}s') return set_sizes, loaders
def train(self): """ #General Training Loop with Data Selection Strategies """ # Loading the Dataset if self.configdata['dataset']['feature'] == 'classimb': trainset, validset, testset, num_cls = load_dataset_custom( self.configdata['dataset']['datadir'], self.configdata['dataset']['name'], self.configdata['dataset']['feature'], classimb_ratio=self.configdata['dataset']['classimb_ratio']) else: trainset, validset, testset, num_cls = load_dataset_custom( self.configdata['dataset']['datadir'], self.configdata['dataset']['name'], self.configdata['dataset']['feature']) N = len(trainset) trn_batch_size = 20 val_batch_size = 1000 tst_batch_size = 1000 # Creating the Data Loaders trainloader = torch.utils.data.DataLoader(trainset, batch_size=trn_batch_size, shuffle=False, pin_memory=True) valloader = torch.utils.data.DataLoader(validset, batch_size=val_batch_size, shuffle=False, pin_memory=True) testloader = torch.utils.data.DataLoader(testset, batch_size=tst_batch_size, shuffle=False, pin_memory=True) # Budget for subset selection bud = int(self.configdata['dss_strategy']['fraction'] * N) print("Budget, fraction and N:", bud, self.configdata['dss_strategy']['fraction'], N) # Subset Selection and creating the subset data loader start_idxs = np.random.choice(N, size=bud, replace=False) idxs = start_idxs data_sub = Subset(trainset, idxs) subset_trnloader = torch.utils.data.DataLoader( data_sub, batch_size=self.configdata['dataloader']['batch_size'], shuffle=self.configdata['dataloader']['shuffle'], pin_memory=self.configdata['dataloader']['pin_memory']) # Variables to store accuracies gammas = torch.ones(len(idxs)).to( self.configdata['train_args']['device']) substrn_losses = list( ) #np.zeros(configdata['train_args']['num_epochs']) trn_losses = list() val_losses = list() #np.zeros(configdata['train_args']['num_epochs']) tst_losses = list() subtrn_losses = list() timing = np.zeros(self.configdata['train_args']['num_epochs']) trn_acc = list() val_acc = list() #np.zeros(configdata['train_args']['num_epochs']) tst_acc = list() #np.zeros(configdata['train_args']['num_epochs']) subtrn_acc = list() #np.zeros(configdata['train_args']['num_epochs']) # Results logging file print_every = self.configdata['train_args']['print_every'] results_dir = osp.abspath( osp.expanduser(self.configdata['train_args']['results_dir'])) all_logs_dir = os.path.join( results_dir, self.configdata['dss_strategy']['type'], self.configdata['dataset']['name'], str(self.configdata['dss_strategy']['fraction']), str(self.configdata['dss_strategy']['select_every'])) os.makedirs(all_logs_dir, exist_ok=True) path_logfile = os.path.join( all_logs_dir, self.configdata['dataset']['name'] + '.txt') logfile = open(path_logfile, 'w') # Model Creation model = self.create_model() model1 = self.create_model() # Loss Functions criterion, criterion_nored = self.loss_function() # Getting the optimizer and scheduler optimizer, scheduler = self.optimizer_with_scheduler(model) if self.configdata['dss_strategy']['type'] == 'GradMatch': # OMPGradMatch Selection strategy setf_model = OMPGradMatchStrategy( trainloader, valloader, model1, criterion_nored, self.configdata['optimizer']['lr'], self.configdata['train_args']['device'], num_cls, True, 'PerClassPerGradient', False, lam=self.configdata['dss_strategy']['lam'], eps=1e-100) elif self.configdata['dss_strategy']['type'] == 'GradMatchPB': setf_model = OMPGradMatchStrategy( trainloader, valloader, model1, criterion_nored, self.configdata['optimizer']['lr'], self.configdata['train_args']['device'], num_cls, True, 'PerBatch', False, lam=self.configdata['dss_strategy']['lam'], eps=1e-100) elif self.configdata['dss_strategy']['type'] == 'GLISTER': # GLISTER Selection strategy setf_model = GLISTERStrategy( trainloader, valloader, model1, criterion_nored, self.configdata['optimizer']['lr'], self.configdata['train_args']['device'], num_cls, False, 'Stochastic', r=int(bud)) elif self.configdata['dss_strategy']['type'] == 'CRAIG': # CRAIG Selection strategy setf_model = CRAIGStrategy(trainloader, valloader, model1, criterion_nored, self.configdata['train_args']['device'], num_cls, False, False, 'PerClass') elif self.configdata['dss_strategy']['type'] == 'CRAIGPB': # CRAIG Selection strategy setf_model = CRAIGStrategy(trainloader, valloader, model1, criterion_nored, self.configdata['train_args']['device'], num_cls, False, False, 'PerBatch') elif self.configdata['dss_strategy']['type'] == 'CRAIG-Warm': # CRAIG Selection strategy setf_model = CRAIGStrategy(trainloader, valloader, model1, criterion_nored, self.configdata['train_args']['device'], num_cls, False, False, 'PerClass') # Random-Online Selection strategy #rand_setf_model = RandomStrategy(trainloader, online=True) if 'kappa' in self.configdata['dss_strategy']: kappa_epochs = int(self.configdata['dss_strategy']['kappa'] * self.configdata['train_args']['num_epochs']) full_epochs = round( kappa_epochs * self.configdata['dss_strategy']['fraction']) else: raise KeyError("Specify a kappa value in the config file") elif self.configdata['dss_strategy']['type'] == 'CRAIGPB-Warm': # CRAIG Selection strategy setf_model = CRAIGStrategy(trainloader, valloader, model1, criterion_nored, self.configdata['train_args']['device'], num_cls, False, False, 'PerBatch') # Random-Online Selection strategy #rand_setf_model = RandomStrategy(trainloader, online=True) if 'kappa' in self.configdata['dss_strategy']: kappa_epochs = int(self.configdata['dss_strategy']['kappa'] * self.configdata['train_args']['num_epochs']) full_epochs = round( kappa_epochs * self.configdata['dss_strategy']['fraction']) else: raise KeyError("Specify a kappa value in the config file") elif self.configdata['dss_strategy']['type'] == 'Random': # Random Selection strategy setf_model = RandomStrategy(trainloader, online=False) elif self.configdata['dss_strategy']['type'] == 'Random-Online': # Random-Online Selection strategy setf_model = RandomStrategy(trainloader, online=True) elif self.configdata['dss_strategy']['type'] == 'GLISTER-Warm': # GLISTER Selection strategy setf_model = GLISTERStrategy( trainloader, valloader, model1, criterion_nored, self.configdata['optimizer']['lr'], self.configdata['train_args']['device'], num_cls, False, 'Stochastic', r=int(bud)) # Random-Online Selection strategy #rand_setf_model = RandomStrategy(trainloader, online=True) if 'kappa' in self.configdata['dss_strategy']: kappa_epochs = int(self.configdata['dss_strategy']['kappa'] * self.configdata['train_args']['num_epochs']) full_epochs = round( kappa_epochs * self.configdata['dss_strategy']['fraction']) else: raise KeyError("Specify a kappa value in the config file") elif self.configdata['dss_strategy']['type'] == 'GradMatch-Warm': # OMPGradMatch Selection strategy setf_model = OMPGradMatchStrategy( trainloader, valloader, model1, criterion_nored, self.configdata['optimizer']['lr'], self.configdata['train_args']['device'], num_cls, True, 'PerClassPerGradient', False, lam=self.configdata['dss_strategy']['lam'], eps=1e-100) # Random-Online Selection strategy #rand_setf_model = RandomStrategy(trainloader, online=True) if 'kappa' in self.configdata['dss_strategy']: kappa_epochs = int(self.configdata['dss_strategy']['kappa'] * self.configdata['train_args']['num_epochs']) full_epochs = round( kappa_epochs * self.configdata['dss_strategy']['fraction']) else: raise KeyError("Specify a kappa value in the config file") elif self.configdata['dss_strategy']['type'] == 'GradMatchPB-Warm': # OMPGradMatch Selection strategy setf_model = OMPGradMatchStrategy( trainloader, valloader, model1, criterion_nored, self.configdata['optimizer']['lr'], self.configdata['train_args']['device'], num_cls, True, 'PerBatch', False, lam=self.configdata['dss_strategy']['lam'], eps=1e-100) # Random-Online Selection strategy #rand_setf_model = RandomStrategy(trainloader, online=True) if 'kappa' in self.configdata['dss_strategy']: kappa_epochs = int(self.configdata['dss_strategy']['kappa'] * self.configdata['train_args']['num_epochs']) full_epochs = round( kappa_epochs * self.configdata['dss_strategy']['fraction']) else: raise KeyError("Specify a kappa value in the config file") elif self.configdata['dss_strategy']['type'] == 'Random-Warm': if 'kappa' in self.configdata['dss_strategy']: kappa_epochs = int(self.configdata['dss_strategy']['kappa'] * self.configdata['train_args']['num_epochs']) full_epochs = round( kappa_epochs * self.configdata['dss_strategy']['fraction']) else: raise KeyError("Specify a kappa value in the config file") print("=======================================", file=logfile) for i in range(self.configdata['train_args']['num_epochs']): subtrn_loss = 0 subtrn_correct = 0 subtrn_total = 0 subset_selection_time = 0 if self.configdata['dss_strategy']['type'] in ['Random-Online']: start_time = time.time() subset_idxs, gammas = setf_model.select(int(bud)) idxs = subset_idxs subset_selection_time += (time.time() - start_time) gammas = gammas.to(self.configdata['train_args']['device']) elif self.configdata['dss_strategy']['type'] in ['Random']: pass elif (self.configdata['dss_strategy']['type'] in [ 'GLISTER', 'GradMatch', 'GradMatchPB', 'CRAIG', 'CRAIGPB' ]) and (((i + 1) % self.configdata['dss_strategy']['select_every']) == 0): start_time = time.time() cached_state_dict = copy.deepcopy(model.state_dict()) clone_dict = copy.deepcopy(model.state_dict()) subset_idxs, gammas = setf_model.select(int(bud), clone_dict) model.load_state_dict(cached_state_dict) idxs = subset_idxs if self.configdata['dss_strategy']['type'] in [ 'GradMatch', 'GradMatchPB', 'CRAIG', 'CRAIGPB' ]: gammas = torch.from_numpy(np.array(gammas)).to( self.configdata['train_args']['device']).to( torch.float32) subset_selection_time += (time.time() - start_time) elif (self.configdata['dss_strategy']['type'] in [ 'GLISTER-Warm', 'GradMatch-Warm', 'GradMatchPB-Warm', 'CRAIG-Warm', 'CRAIGPB-Warm' ]): start_time = time.time() if ((i % self.configdata['dss_strategy']['select_every'] == 0) and (i >= kappa_epochs)): cached_state_dict = copy.deepcopy(model.state_dict()) clone_dict = copy.deepcopy(model.state_dict()) subset_idxs, gammas = setf_model.select( int(bud), clone_dict) model.load_state_dict(cached_state_dict) idxs = subset_idxs if self.configdata['dss_strategy']['type'] in [ 'GradMatch-Warm', 'GradMatchPB-Warm', 'CRAIG-Warm', 'CRAIGPB-Warm' ]: gammas = torch.from_numpy(np.array(gammas)).to( self.configdata['train_args']['device']).to( torch.float32) subset_selection_time += (time.time() - start_time) elif self.configdata['dss_strategy']['type'] in ['Random-Warm']: pass #print("selEpoch: %d, Selection Ended at:" % (i), str(datetime.datetime.now())) data_sub = Subset(trainset, idxs) subset_trnloader = torch.utils.data.DataLoader( data_sub, batch_size=trn_batch_size, shuffle=False, pin_memory=True) model.train() batch_wise_indices = list(subset_trnloader.batch_sampler) if self.configdata['dss_strategy']['type'] in [ 'CRAIG', 'CRAIGPB', 'GradMatch', 'GradMatchPB' ]: start_time = time.time() for batch_idx, (inputs, targets) in enumerate(subset_trnloader): inputs, targets = inputs.to( self.configdata['train_args']['device']), targets.to( self.configdata['train_args']['device'], non_blocking=True ) # targets can have non_blocking=True. optimizer.zero_grad() outputs = model(inputs) losses = criterion_nored(outputs, targets) loss = torch.dot( losses, gammas[batch_wise_indices[batch_idx]]) / ( gammas[batch_wise_indices[batch_idx]].sum()) loss.backward() subtrn_loss += loss.item() optimizer.step() _, predicted = outputs.max(1) subtrn_total += targets.size(0) subtrn_correct += predicted.eq(targets).sum().item() train_time = time.time() - start_time elif self.configdata['dss_strategy']['type'] in [ 'CRAIGPB-Warm', 'CRAIG-Warm', 'GradMatch-Warm', 'GradMatchPB-Warm' ]: start_time = time.time() if i < full_epochs: for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to( self.configdata['train_args'] ['device']), targets.to( self.configdata['train_args']['device'], non_blocking=True ) # targets can have non_blocking=True. optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() subtrn_loss += loss.item() optimizer.step() _, predicted = outputs.max(1) subtrn_total += targets.size(0) subtrn_correct += predicted.eq(targets).sum().item() elif i >= kappa_epochs: for batch_idx, (inputs, targets) in enumerate(subset_trnloader): inputs, targets = inputs.to( self.configdata['train_args'] ['device']), targets.to( self.configdata['train_args']['device'], non_blocking=True ) # targets can have non_blocking=True. optimizer.zero_grad() outputs = model(inputs) losses = criterion_nored(outputs, targets) loss = torch.dot( losses, gammas[batch_wise_indices[batch_idx]]) / ( gammas[batch_wise_indices[batch_idx]].sum()) loss.backward() subtrn_loss += loss.item() optimizer.step() _, predicted = outputs.max(1) subtrn_total += targets.size(0) subtrn_correct += predicted.eq(targets).sum().item() train_time = time.time() - start_time elif self.configdata['dss_strategy']['type'] in [ 'GLISTER', 'Random', 'Random-Online' ]: start_time = time.time() for batch_idx, (inputs, targets) in enumerate(subset_trnloader): inputs, targets = inputs.to( self.configdata['train_args']['device']), targets.to( self.configdata['train_args']['device'], non_blocking=True ) # targets can have non_blocking=True. optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() subtrn_loss += loss.item() optimizer.step() _, predicted = outputs.max(1) subtrn_total += targets.size(0) subtrn_correct += predicted.eq(targets).sum().item() train_time = time.time() - start_time elif self.configdata['dss_strategy']['type'] in [ 'GLISTER-Warm', 'Random-Warm' ]: start_time = time.time() if i < full_epochs: for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to( self.configdata['train_args'] ['device']), targets.to( self.configdata['train_args']['device'], non_blocking=True ) # targets can have non_blocking=True. optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() subtrn_loss += loss.item() optimizer.step() _, predicted = outputs.max(1) subtrn_total += targets.size(0) subtrn_correct += predicted.eq(targets).sum().item() elif i >= kappa_epochs: for batch_idx, (inputs, targets) in enumerate(subset_trnloader): inputs, targets = inputs.to( self.configdata['train_args'] ['device']), targets.to( self.configdata['train_args']['device'], non_blocking=True ) # targets can have non_blocking=True. optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() subtrn_loss += loss.item() optimizer.step() _, predicted = outputs.max(1) subtrn_total += targets.size(0) subtrn_correct += predicted.eq(targets).sum().item() train_time = time.time() - start_time elif self.configdata['dss_strategy']['type'] in ['Full']: start_time = time.time() for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to( self.configdata['train_args']['device']), targets.to( self.configdata['train_args']['device'], non_blocking=True ) # targets can have non_blocking=True. optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() subtrn_loss += loss.item() optimizer.step() _, predicted = outputs.max(1) subtrn_total += targets.size(0) subtrn_correct += predicted.eq(targets).sum().item() train_time = time.time() - start_time scheduler.step() timing[i] = train_time + subset_selection_time print_args = self.configdata['train_args']['print_args'] # print("Epoch timing is: " + str(timing[i])) if ((i + 1) % self.configdata['train_args']['print_every'] == 0): trn_loss = 0 trn_correct = 0 trn_total = 0 val_loss = 0 val_correct = 0 val_total = 0 tst_correct = 0 tst_total = 0 tst_loss = 0 model.eval() if "trn_loss" in print_args: with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(trainloader): # print(batch_idx) inputs, targets = inputs.to( self.configdata['train_args'] ['device']), targets.to( self.configdata['train_args']['device'], non_blocking=True) outputs = model(inputs) loss = criterion(outputs, targets) trn_loss += loss.item() trn_losses.append(trn_loss) if "trn_acc" in print_args: _, predicted = outputs.max(1) trn_total += targets.size(0) trn_correct += predicted.eq( targets).sum().item() trn_acc.append(trn_correct / trn_total) if "val_loss" in print_args: with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(valloader): # print(batch_idx) inputs, targets = inputs.to( self.configdata['train_args'] ['device']), targets.to( self.configdata['train_args']['device'], non_blocking=True) outputs = model(inputs) loss = criterion(outputs, targets) val_loss += loss.item() val_losses.append(val_loss) if "val_acc" in print_args: _, predicted = outputs.max(1) val_total += targets.size(0) val_correct += predicted.eq( targets).sum().item() val_acc.append(val_correct / val_total) if "tst_loss" in print_args: for batch_idx, (inputs, targets) in enumerate(testloader): # print(batch_idx) inputs, targets = inputs.to( self.configdata['train_args'] ['device']), targets.to( self.configdata['train_args']['device'], non_blocking=True) outputs = model(inputs) loss = criterion(outputs, targets) tst_loss += loss.item() tst_losses.append(tst_loss) if "tst_acc" in print_args: _, predicted = outputs.max(1) tst_total += targets.size(0) tst_correct += predicted.eq(targets).sum().item() tst_acc.append(tst_correct / tst_total) if "subtrn_acc" in print_args: subtrn_acc.append(subtrn_correct / subtrn_total) if "subtrn_losses" in print_args: subtrn_losses.append(subtrn_loss) print_str = "Epoch: " + str(i + 1) for arg in print_args: if arg == "val_loss": print_str += " , " + "Validation Loss: " + str( val_losses[-1]) if arg == "val_acc": print_str += " , " + "Validation Accuracy: " + str( val_acc[-1]) if arg == "tst_loss": print_str += " , " + "Test Loss: " + str( tst_losses[-1]) if arg == "tst_acc": print_str += " , " + "Test Accuracy: " + str( tst_acc[-1]) if arg == "trn_loss": print_str += " , " + "Training Loss: " + str( trn_losses[-1]) if arg == "trn_acc": print_str += " , " + "Training Accuracy: " + str( trn_acc[-1]) if arg == "subtrn_loss": print_str += " , " + "Subset Loss: " + str( subtrn_losses[-1]) if arg == "subtrn_acc": print_str += " , " + "Subset Accuracy: " + str( subtrn_acc[-1]) if arg == "time": print_str += " , " + "Timing: " + str(timing[i]) # report metric to ray for hyperparameter optimization if 'report_tune' in self.configdata and self.configdata[ 'report_tune']: tune.report(mean_accuracy=val_acc[-1]) print(print_str) print(self.configdata['dss_strategy']['type'] + " Selection Run---------------------------------") print("Final SubsetTrn:", subtrn_loss) if "val_loss" in print_args: if "val_acc" in print_args: print("Validation Loss and Accuracy: ", val_loss, np.array(val_acc).max()) else: print("Validation Loss: ", val_loss) if "tst_loss" in print_args: if "tst_acc" in print_args: print("Test Data Loss and Accuracy: ", tst_loss, np.array(tst_acc).max()) else: print("Test Data Loss: ", tst_loss) print('-----------------------------------') print(self.configdata['dss_strategy']['type'], file=logfile) print( '---------------------------------------------------------------------', file=logfile) if "val_acc" in print_args: val_str = "Validation Accuracy, " for val in val_acc: val_str = val_str + " , " + str(val) print(val_str, file=logfile) if "tst_acc" in print_args: tst_str = "Test Accuracy, " for tst in tst_acc: tst_str = tst_str + " , " + str(tst) print(tst_str, file=logfile) if "time" in print_args: time_str = "Time, " for t in timing: time_str = time_str + " , " + str(t) print(timing, file=logfile) omp_timing = np.array(timing) omp_cum_timing = list(self.generate_cumulative_timing(omp_timing)) print("Total time taken by " + self.configdata['dss_strategy']['type'] + " = " + str(omp_cum_timing[-1])) logfile.close()
def __init__(self, opt): """ Modulate the data ratio in the batch. For example, when select_data is "MJ-ST" and batch_ratio is "0.5-0.5", the 50% of the batch is filled with MJ and the other 50% of the batch is filled with ST. """ log = open(f'./saved_models/{opt.experiment_name}/log_dataset.txt', 'a') dashed_line = '-' * 80 print(dashed_line) log.write(dashed_line + '\n') print( f'dataset_root: {opt.train_data}\nopt.select_data: {opt.select_data}\nopt.batch_ratio: {opt.batch_ratio}' ) log.write( f'dataset_root: {opt.train_data}\nopt.select_data: {opt.select_data}\nopt.batch_ratio: {opt.batch_ratio}\n' ) assert len(opt.select_data) == len(opt.batch_ratio) _AlignCollate = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) self.data_loader_list = [] self.dataloader_iter_list = [] batch_size_list = [] Total_batch_size = 0 for selected_d, batch_ratio_d in zip(opt.select_data, opt.batch_ratio): _batch_size = max(round(opt.batch_size * float(batch_ratio_d)), 1) print(dashed_line) log.write(dashed_line + '\n') _dataset, _dataset_log = hierarchical_dataset( root=opt.train_data, opt=opt, select_data=[selected_d]) total_number_dataset = len(_dataset) log.write(_dataset_log) """ The total number of data can be modified with opt.total_data_usage_ratio. ex) opt.total_data_usage_ratio = 1 indicates 100% usage, and 0.2 indicates 20% usage. See 4.2 section in our paper. """ number_dataset = int(total_number_dataset * float(opt.total_data_usage_ratio)) dataset_split = [ number_dataset, total_number_dataset - number_dataset ] indices = range(total_number_dataset) _dataset, _ = [ Subset(_dataset, indices[offset - length:offset]) for offset, length in zip(_accumulate(dataset_split), dataset_split) ] selected_d_log = f'num total samples of {selected_d}: {total_number_dataset} x {opt.total_data_usage_ratio} (total_data_usage_ratio) = {len(_dataset)}\n' selected_d_log += f'num samples of {selected_d} per batch: {opt.batch_size} x {float(batch_ratio_d)} (batch_ratio) = {_batch_size}' print(selected_d_log) log.write(selected_d_log + '\n') batch_size_list.append(str(_batch_size)) Total_batch_size += _batch_size _data_loader = torch.utils.data.DataLoader( _dataset, batch_size=_batch_size, shuffle=True, num_workers=int(opt.workers), collate_fn=_AlignCollate, pin_memory=True) self.data_loader_list.append(_data_loader) self.dataloader_iter_list.append(iter(_data_loader)) Total_batch_size_log = f'{dashed_line}\n' batch_size_sum = '+'.join(batch_size_list) Total_batch_size_log += f'Total_batch_size: {batch_size_sum} = {Total_batch_size}\n' Total_batch_size_log += f'{dashed_line}' opt.batch_size = Total_batch_size print(Total_batch_size_log) log.write(Total_batch_size_log + '\n') log.close()
def preTrainFeatureExtractor(feature_extractor, dataset, batch_size, num_epochs, optimizer=Adam, cuda=True): # print("Començem el pre-entrenament del feature extractor\n") if cuda: gpu = torch.device("cuda:0") feature_extractor = feature_extractor.to(gpu) feature_extractor.train() torch.manual_seed(0) idxs = torch.randperm(len(dataset)) evaldataset = Subset(dataset, idxs[:int(len(dataset) / 6)]) traindataset = Subset(dataset, idxs[int(len(dataset) / 6):]) # Generamos los dataloaders traindataloader = DataLoader(traindataset, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=False) evaldataloader = DataLoader(evaldataset, batch_size=batch_size, pin_memory=True, drop_last=False) optimizer = optimizer(feature_extractor.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss() training_losses = [] eval_losses = [] running_loss = 0 total = 0 for epoch in range(num_epochs): for i, data in enumerate(traindataloader): data1, label = data # Dupliquem les dades (ja que la xarxa espera una entrada amb dos canals), però apliquem una # máscara per tal de que tots els parámetres de la xarxa aprenguin. data2 = copy.deepcopy(data1) mask = torch.randint(0, 2, data1.shape) data1[mask == 0] = 0 data2[mask == 1] = 0 data = torch.cat((data1, data2), dim=1) if cuda: data = data.to(gpu) label = label.to(gpu) output = feature_extractor(data) loss = criterion(output, label) loss.backward() optimizer.step() feature_extractor.zero_grad() running_loss += loss total += label.size(0) """ if i % (10000//batch_size) == 10000//batch_size-1: print("{}.{} La loss mitjana sobre les últimes {} dades és {}".format(epoch, i//(10000//batch_size), total, running_loss/total)) training_losses.append(running_loss/total) running_loss = 0 total = 0 with torch.no_grad(): for data1, label in evaldataloader: data2 = copy.deepcopy(data1) mask = torch.randint(0, 2, data1.shape) data1[mask == 0] = 0 data2[mask == 1] = 0 data = torch.cat((data1, data2), dim=1) if cuda: data = data.cuda() label = label.cuda() output = feature_extractor(data) running_loss += criterion(output, label) total += label.size(0) print("{}.{} La loss mitjana sobre el conjunt de validació és {}".format(epoch, i // (10000 // batch_size), running_loss/total)) eval_losses.append(running_loss/total) running_loss = 0 total = 0 # print("\nEl pre-entrenament del feature extractor ha finalitzat\n") plot1, = plt.plot(training_losses, 'r', label="train_loss") plot2, = plt.plot(eval_losses, 'b', label="eval_loss") plt.legend(handles=[plot1, plot2]) plt.show() correct = 0 total = 0 with torch.no_grad(): for inputs1, labels in evaldataloader: inputs2 = copy.deepcopy(inputs1) mask = torch.randint(0, 2, inputs1.shape) inputs1[mask == 0] = 0 inputs2[mask == 1] = 0 inputs = torch.cat((inputs1, inputs2), dim=1) if cuda: inputs, labels = inputs.cuda(), labels.cuda() predictions = feature_extractor(inputs) _, predictions = torch.max(predictions.data, 1) total += labels.size(0) correct += (predictions == labels).sum().item() print("Accuracy of the network over the eval data is: ", (100 * correct / total)) """ feature_extractor.eval() return feature_extractor.cpu()