def create_loaders(test_dir, test_list, normalise_params, num_workers): # Torch libraries from torchvision import transforms from torch.utils.data import DataLoader, random_split # Custom libraries if args.data_name == 'nyu': from datasets import NYUDataset as Dataset elif args.data_name == 'cityscapes': from datasets import CSDataset as Dataset from datasets import ToTensor, Normalise composed_test = transforms.Compose( [Normalise(*normalise_params), ToTensor()]) ## Test Set ## testset = Dataset(data_file=test_list, data_dir=test_dir, transform_trn=None, transform_val=composed_test) logger.info(" Created test set with {} examples".format(len(testset))) ## Test Loader ## test_loader = DataLoader(testset, batch_size=1, shuffle=False, num_workers=num_workers, pin_memory=True) return test_loader
def create_loaders(val_dir, val_list, shorter_side, crop_size, low_scale, high_scale, normalise_params, batch_size, num_workers, ignore_label): """ """ # Torch libraries from torchvision import transforms from torch.utils.data import DataLoader, random_split # Custom libraries from datasets import NYUDataset as Dataset from datasets import Pad, RandomCrop, RandomMirror, ResizeShorterScale, ToTensor, Normalise ## Transformations during training ## composed_trn = transforms.Compose([ ResizeShorterScale(shorter_side, low_scale, high_scale), Pad(crop_size, [123.675, 116.28, 103.53], ignore_label), RandomMirror(), RandomCrop(crop_size), Normalise(*normalise_params), ToTensor() ]) composed_val = transforms.Compose([ ResizeShorterScale(shorter_side, low_scale, high_scale), Normalise(*normalise_params), ToTensor() ]) ## Training and validation sets ## valset = Dataset(data_file=val_list, data_dir=val_dir, transform_trn=None, transform_val=composed_val) ## Training and validation loaders ## val_loader = DataLoader(valset, batch_size=1, shuffle=False, num_workers=num_workers, pin_memory=True) return val_loader
def __init__(self, args): # self.feature_mean = stat_data['feature_mean'] # self.feature_variance = stat_data['feature_variance'] self.num_test_sentences = args.num_test_sentences self.model_name = args.model_name self.frame_size = args.frame_size self.frame_shift = args.frame_size // 2 self.get_frames = SignalToFrames(frame_size=self.frame_size, frame_shift=self.frame_shift) self.ola = OLA(frame_shift=self.frame_shift) self.to_tensor = ToTensor() self.width = args.width self.srate = 16000 self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(self.device)
def create_loaders( train_dir, val_dir, train_list, val_list, shorter_side, crop_size, low_scale, high_scale, normalise_params, batch_size, num_workers, ignore_label ): """ Args: train_dir (str) : path to the root directory of the training set. val_dir (str) : path to the root directory of the validation set. train_list (str) : path to the training list. val_list (str) : path to the validation list. shorter_side (int) : parameter of the shorter_side resize transformation. crop_size (int) : square crop to apply during the training. low_scale (float) : lowest scale ratio for augmentations. high_scale (float) : highest scale ratio for augmentations. normalise_params (list / tuple) : img_scale, img_mean, img_std. batch_size (int) : training batch size. num_workers (int) : number of workers to parallelise data loading operations. ignore_label (int) : label to pad segmentation masks with Returns: train_loader, val loader """ # Torch libraries from torchvision import transforms from torch.utils.data import DataLoader, random_split # Custom libraries from datasets import NYUDataset as Dataset from datasets import Pad, RandomCrop, RandomMirror, ResizeShorterScale, ToTensor, Normalise ## Transformations during training ## composed_trn = transforms.Compose([ResizeShorterScale(shorter_side, low_scale, high_scale), Pad(crop_size, [123.675, 116.28 , 103.53], ignore_label), RandomMirror(), RandomCrop(crop_size), Normalise(*normalise_params), ToTensor()]) composed_val = transforms.Compose([Normalise(*normalise_params), ToTensor()]) ## Training and validation sets ## trainset = Dataset(data_file=train_list, data_dir=train_dir, transform_trn=composed_trn, transform_val=composed_val) valset = Dataset(data_file=val_list, data_dir=val_dir, transform_trn=None, transform_val=composed_val) #transform_val=None) logger.info(" Created train set = {} examples, val set = {} examples" .format(len(trainset), len(valset))) ## Training and validation loaders ## train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True) val_loader = DataLoader(valset, batch_size=1, shuffle=False, num_workers=num_workers, pin_memory=True) return train_loader, val_loader
use_cuda = False ngpus = 0 #if gpus is not None: # use_cuda = True # ngpus = len(gpus.split(',')) seed = 16 torch.manual_seed(seed) #if use_cuda: # device = "cuda" # os.environ['CUDA_VISIBLE_DEVICES'] = gpus # torch.cuda.manual_seed(seed) # Transforms data for train transformed_dataset_train = FallDataset(csv_file=csv_file_train, transform=transforms.Compose([ ToTensor() ])) dataloader_train = DataLoader(transformed_dataset_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) # Transforms data for validation transformed_dataset_val = FallDataset(csv_file=csv_file_valid, transform=transforms.Compose([ ToTensor() ])) dataloader_val = DataLoader(transformed_dataset_val, batch_size=batch_size, shuffle=True, num_workers=num_workers) net = Net()
use_cuda = False ngpus = 0 #if gpus is not None: # use_cuda = True # ngpus = len(gpus.split(',')) seed = 16 torch.manual_seed(seed) #if use_cuda: # device = "cuda" # os.environ['CUDA_VISIBLE_DEVICES'] = gpus # torch.cuda.manual_seed(seed) transformed_dataset_val = FallDataset(csv_file=csv_file_test, transform=transforms.Compose( [ToTensor()])) dataloader_val = DataLoader(transformed_dataset_val, batch_size=batch_size, shuffle=True, num_workers=num_workers) net = Net() outfile = open('test/out_test.txt', "w") criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=momentum) # load the model checkpoint = torch.load(model_name)
def main(): parser = argparse.ArgumentParser( description='text convolution-deconvolution auto-encoder model') # learning parser.add_argument('-lr', type=float, default=0.001, help='initial learning rate') parser.add_argument('-epochs', type=int, default=60, help='number of epochs for train') parser.add_argument('-batch_size', type=int, default=64, help='batch size for training') parser.add_argument( '-lr_decay_interval', type=int, default=20, help='how many epochs to wait before decrease learning rate') parser.add_argument( '-log_interval', type=int, default=16, help='how many steps to wait before logging training status') parser.add_argument('-test_interval', type=int, default=100, help='how many steps to wait before testing') parser.add_argument('-save_interval', type=int, default=5, help='how many epochs to wait before saving') parser.add_argument('-save_dir', type=str, default='snapshot', help='where to save the snapshot') # data parser.add_argument('-data_path', type=str, help='data path') parser.add_argument('-label_path', type=str, help='label path') parser.add_argument('-separated', type=str, default='sentencepiece', help='how separated text data is') parser.add_argument('-shuffle', default=False, help='shuffle the data every epoch') parser.add_argument('-sentence_len', type=int, default=60, help='how many tokens in a sentence') # model parser.add_argument('-mlp_out', type=int, default=7, help='number of classes') parser.add_argument('-dropout', type=float, default=0.5, help='the probability for dropout') parser.add_argument('-embed_dim', type=int, default=300, help='number of embedding dimension') parser.add_argument('-kernel_sizes', type=int, default=2, help='kernel size to use for convolution') parser.add_argument('-tau', type=float, default=0.01, help='temperature parameter') parser.add_argument('-use_cuda', action='store_true', default=True, help='whether using cuda') # option parser.add_argument('-enc_snapshot', type=str, default=None, help='filename of encoder snapshot ') parser.add_argument('-dec_snapshot', type=str, default=None, help='filename of decoder snapshot ') parser.add_argument('-mlp_snapshot', type=str, default=None, help='filename of mlp classifier snapshot ') args = parser.parse_args() dataset = TextClassificationDataset(args.data_path, args.label_path, args.separated, sentence_len=args.sentence_len, transoform=ToTensor()) data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=args.shuffle) print("Vocab number") print(dataset.vocab_length()) k = args.embed_dim v = dataset.vocab_length() if args.enc_snapshot is None or args.dec_snapshot is None or args.mlp_snapshot is None: print("Start from initial") embedding = nn.Embedding(v, k, max_norm=1.0, norm_type=2.0) encoder = model.ConvolutionEncoder(embedding) decoder = model.DeconvolutionDecoder(embedding, args.tau) mlp = model.MLPClassifier(args.mlp_out, args.dropout) else: print("Restart from snapshot") encoder = torch.load(args.enc_snapshot) decoder = torch.load(args.dec_snapshot) mlp = torch.load(args.mlp_snapshot) train_classification(data_loader, data_loader, encoder, decoder, mlp, args)
def main(file_train_path,stop_path,rare_len,epochs,embed_dim,batch_size,shuffle,sentence_len,filter_size,latent_size,n_class,LR,save_interval,save_dir,use_cuda): data_all,labels,rare_word, word2index, index2word = deal_with_data(file_path = file_train_path,stop_path = stop_path,sentence_len = sentence_len,rare_len =rare_len,rare_word = []).word_to_id() ###一共有多少个词 counts_words_len = len(word2index) ###一共多少个样本 sample_len = len(labels) train_data = data_set(data_all[0:int(0.7*sample_len)],labels[0:int(0.7*sample_len)],transform =ToTensor()) test_data = data_set(data_all[int(0.7*sample_len):sample_len],labels[int(0.7*sample_len):sample_len],transform =ToTensor()) data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=shuffle) test_loader = DataLoader(test_data, batch_size=len(test_data)/100, shuffle=shuffle) # 做embedding embedding = nn.Embedding(counts_words_len, embed_dim, max_norm=1.0, norm_type=2.0) #构建textCNN模型 cnn = textcnn_me.TextCNN(embedding = embedding, sentence_len = sentence_len, filter_size = filter_size, latent_size = latent_size,n_class = n_class) cnn_opt = torch.optim.Adam(cnn.parameters(), lr=LR) #损失函数 loss_function = nn.CrossEntropyLoss() steps = 0 for epoch in range(1, epochs+1): print("=======Epoch========") print(epoch) for batch in data_loader: feature, target = Variable(batch["sentence"]), Variable(batch["label"]) if use_cuda: cnn.cuda() feature, target = feature.cuda(), target.cuda() cnn_opt.zero_grad() output = cnn(feature) #print(output) #print(target.view(target.size()[0])) loss = loss_function(output, target.view(target.size()[0])) loss.backward() cnn_opt.step() steps += 1 print("Epoch: {}".format(epoch)) print("Steps: {}".format(steps)) print("Loss: {}".format(loss.data[0])) if epoch % save_interval == 0: util.save_models(cnn, save_dir, "cnn", epoch) for batch in test_loader: test_feature,test_target = Variable(batch["sentence"]),Variable(batch["label"]) test_output =cnn(test_feature) pred_y = torch.max(test_output,1)[1] acc = (test_target.view(test_target.size()[0]) == pred_y) acc = acc.numpy().sum() accuracy = acc / (test_target.size(0)) print(len(pred_y)) print('test_acc:{}'.format(accuracy)) print("Finish!!!") return cnn
def Train(train_root, train_csv, test_csv): # parameters args = parse_args() record_params(args) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_order torch.manual_seed(args.torch_seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.torch_seed) np.random.seed(args.torch_seed) random.seed(args.torch_seed) if args.cudnn == 0: cudnn.benchmark = False else: cudnn.benchmark = True cudnn.deterministic = True device = torch.device("cuda" if torch.cuda.is_available() else "cpu") num_classes = 4 net = build_model(args.model_name, num_classes, args.pretrain) # resume checkpoint_name_loss = os.path.join( args.checkpoint, args.params_name.split('.')[0] + '_loss.' + args.params_name.split('.')[-1]) checkpoint_name_acc = os.path.join( args.checkpoint, args.params_name.split('.')[0] + '_acc.' + args.params_name.split('.')[-1]) if args.resume != 0: logging.info('Resuming from checkpoint...') checkpoint = torch.load(checkpoint_name_loss) best_loss = checkpoint['loss'] best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] history = checkpoint['history'] net.load_state_dict(checkpoint['net']) else: best_loss = float('inf') best_acc = 0.0 start_epoch = 0 history = { 'train_loss': [], 'train_acc': [], 'test_loss': [], 'test_acc': [] } end_epoch = start_epoch + args.num_epoch if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net) net.to(device) # data img_size = args.img_size ## train train_aug = Compose([ Resize(size=(img_size, img_size)), RandomHorizontallyFlip(), RandomVerticallyFlip(), RandomRotate(90), ToTensor(), Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) ## test # test_aug = train_aug test_aug = Compose([ Resize(size=(img_size, img_size)), ToTensor(), Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) train_dataset = breast_classify_inbreast(root=train_root, csv_file=train_csv, transform=train_aug) test_dataset = breast_classify_inbreast(root=train_root, csv_file=test_csv, transform=test_aug) if args.weighted_sampling == 1: weights = torch.FloatTensor([1.0, 1.0, 1.5, 5.0]).to(device) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=4, shuffle=True) else: weights = None train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=4, shuffle=True) # train_loader = DataLoader(train_dataset, batch_size=args.batch_size, # num_workers=4, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=4, shuffle=True) # loss function, optimizer and scheduler criterion = nn.NLLLoss(size_average=True, weight=weights).to(device) optimizer = Adam(net.parameters(), lr=args.lr, amsgrad=True) ## scheduler if args.lr_policy == 'StepLR': scheduler = StepLR(optimizer, step_size=30, gamma=0.5) if args.lr_policy == 'PolyLR': scheduler = PolyLR(optimizer, max_epoch=end_epoch, power=0.9) # training process logging.info('Start Training For Breast Density Classification') for epoch in range(start_epoch, end_epoch): ts = time.time() if args.lr_policy != 'None': scheduler.step() # train net.train() train_loss = 0. train_acc = 0. for batch_idx, (inputs, targets) in tqdm(enumerate(train_loader), total=int(len(train_loader))): inputs = inputs.to(device) targets = targets.to(device) targets = targets.long() optimizer.zero_grad() outputs = net(inputs) loss = criterion(F.log_softmax(outputs, dim=1), targets) loss.backward() optimizer.step() train_loss += loss.item() accuracy = float(sum(outputs.argmax(dim=1) == targets)) train_acc += accuracy train_acc_epoch = train_acc / (len(train_loader.dataset)) train_loss_epoch = train_loss / (batch_idx + 1) history['train_loss'].append(train_loss_epoch) history['train_acc'].append(train_acc_epoch) # test net.eval() test_loss = 0. test_acc = 0. for batch_idx, (inputs, targets) in tqdm( enumerate(test_loader), total=int(len(test_loader.dataset) / args.batch_size) + 1): with torch.no_grad(): inputs = inputs.to(device) targets = targets.to(device) targets = targets.long() outputs = net(inputs) loss = criterion(F.log_softmax(outputs, dim=1), targets) accuracy = float(sum(outputs.argmax(dim=1) == targets)) test_acc += accuracy test_loss += loss.item() test_loss_epoch = test_loss / (batch_idx + 1) test_acc_epoch = test_acc / (len(test_loader.dataset)) history['test_loss'].append(test_loss_epoch) history['test_acc'].append(test_acc_epoch) time_cost = time.time() - ts logging.info( 'epoch[%d/%d]: train_loss: %.3f | train_acc: %.3f | test_loss: %.3f | test_acc: %.3f || time: %.1f' % (epoch + 1, end_epoch, train_loss_epoch, train_acc_epoch, test_loss_epoch, test_acc_epoch, time_cost)) # save checkpoint if test_loss_epoch < best_loss: logging.info('Loss checkpoint Saving...') save_model = net if torch.cuda.device_count() > 1: save_model = list(net.children())[0] state = { 'net': save_model.state_dict(), 'loss': test_loss_epoch, 'acc': test_acc_epoch, 'epoch': epoch + 1, 'history': history } torch.save(state, checkpoint_name_loss) best_loss = test_loss_epoch if test_acc_epoch > best_acc: logging.info('Acc checkpoint Saving...') save_model = net if torch.cuda.device_count() > 1: save_model = list(net.children())[0] state = { 'net': save_model.state_dict(), 'loss': test_loss_epoch, 'acc': test_acc_epoch, 'epoch': epoch + 1, 'history': history } torch.save(state, checkpoint_name_acc) best_acc = test_acc_epoch
def Train(train_root, train_csv, test_csv): # parameters args = parse_args() # record record_params(args) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_order torch.manual_seed(args.torch_seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.torch_seed) np.random.seed(args.torch_seed) random.seed(args.torch_seed) if args.cudnn == 0: cudnn.benchmark = False else: cudnn.benchmark = True cudnn.deterministic = True device = torch.device("cuda" if torch.cuda.is_available() else "cpu") num_classes = 2 net = build_model(args.model_name, num_classes) params_name = '{}_r{}.pkl'.format(args.model_name, args.repetition) start_epoch = 0 history = { 'train_loss': [], 'test_loss': [], 'train_dice': [], 'test_dice': [] } end_epoch = start_epoch + args.num_epoch if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net) net.to(device) # data train_aug = Compose([ Resize(size=(args.img_size, args.img_size)), ToTensor(), Normalize(mean=args.data_mean, std=args.data_std) ]) test_aug = Compose([ Resize(size=(args.img_size, args.img_size)), ToTensor(), Normalize(mean=args.data_mean, std=args.data_std) ]) train_dataset = breast_seg(root=train_root, csv_file=train_csv, transform=train_aug) test_dataset = breast_seg(root=train_root, csv_file=test_csv, transform=test_aug) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=4, shuffle=True, drop_last=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=4, shuffle=False) # loss function, optimizer and scheduler cedice_weight = torch.tensor(args.cedice_weight) ceclass_weight = torch.tensor(args.ceclass_weight) diceclass_weight = torch.tensor(args.diceclass_weight) if args.loss == 'ce': criterion = CrossEntropyLoss2d(weight=ceclass_weight).to(device) elif args.loss == 'dice': criterion = MulticlassDiceLoss(weight=diceclass_weight).to(device) elif args.loss == 'cedice': criterion = CEMDiceLoss(cediceweight=cedice_weight, ceclassweight=ceclass_weight, diceclassweight=diceclass_weight).to(device) else: print('Do not have this loss') optimizer = Adam(net.parameters(), lr=args.lr, amsgrad=True) ## scheduler if args.lr_policy == 'StepLR': scheduler = StepLR(optimizer, step_size=30, gamma=0.5) if args.lr_policy == 'PolyLR': scheduler = PolyLR(optimizer, max_epoch=end_epoch, power=0.9) # training process logging.info('Start Training For Breast Seg') besttraindice = 0. for epoch in range(start_epoch, end_epoch): ts = time.time() net.train() for batch_idx, (imgs, _, targets) in tqdm( enumerate(train_loader), total=int(len(train_loader.dataset) / args.batch_size)): imgs = imgs.to(device) targets = targets.to(device) optimizer.zero_grad() outputs = net(imgs) loss = criterion(outputs, targets) loss.backward() optimizer.step() # test net.eval() test_loss = 0. test_dice = 0. test_count = 0 for batch_idx, (imgs, _, targets) in tqdm( enumerate(test_loader), total=int(len(test_loader.dataset) / args.batch_size)): with torch.no_grad(): imgs = imgs.to(device) targets = targets.to(device) outputs = net(imgs) loss = criterion(outputs, targets).mean() test_count += imgs.shape[0] test_loss += loss.item() * imgs.shape[0] test_dice += Dice_fn(outputs, targets).item() test_loss_epoch = test_loss / float(test_count) test_dice_epoch = test_dice / float(test_count) history['test_loss'].append(test_loss_epoch) history['test_dice'].append(test_dice_epoch) train_loss = 0. train_dice = 0. train_count = 0 for batch_idx, (imgs, _, targets) in tqdm( enumerate(train_loader), total=int(len(train_loader.dataset) / args.batch_size)): with torch.no_grad(): imgs = imgs.to(device) targets = targets.to(device) outputs = net(imgs) loss = criterion(outputs, targets).mean() train_count += imgs.shape[0] train_loss += loss.item() * imgs.shape[0] train_dice += Dice_fn(outputs, targets).item() train_loss_epoch = train_loss / float(train_count) train_dice_epoch = train_dice / float(train_count) history['train_loss'].append(train_loss_epoch) history['train_dice'].append(train_dice_epoch) time_cost = time.time() - ts logging.info( 'epoch[%d/%d]: train_loss: %.3f | test_loss: %.3f | train_dice: %.3f | test_dice: %.3f || time: %.1f' % (epoch + 1, end_epoch, train_loss_epoch, test_loss_epoch, train_dice_epoch, test_dice_epoch, time_cost)) if args.lr_policy != 'None': scheduler.step() # save checkpoint if train_dice_epoch > besttraindice: besttraindice = train_dice_epoch logging.info('Besttraindice Checkpoint {} Saving...'.format(epoch + 1)) save_model = net if torch.cuda.device_count() > 1: save_model = list(net.children())[0] state = { 'net': save_model.state_dict(), 'loss': test_loss_epoch, 'dice': test_dice_epoch, 'epoch': epoch + 1, 'history': history } savecheckname = os.path.join( args.checkpoint, params_name.split('.pkl')[0] + '_besttraindice.' + params_name.split('.')[-1]) torch.save(state, savecheckname)