def main(options): seed = options.seed torch.manual_seed(seed) np.random.seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True frames, targets = dataset.make_dataset() trainer.train_net(frames, targets)
def main(options): seed = options.seed torch.manual_seed(seed) np.random.seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True d = options.dim n = options.num_samples x, y = dataset.make_dataset(d, n) trainer.train_net(x, y)
def visualize_data(): data_dict = dataset.get_data() build_data = BuildDataset() train_ds, test_ds = build_data.get_dataset(data_dict['train_data'], data_dict['test_data']) train_iter, test_iter = build_data.create_vocalb(train_ds, test_ds) st.subheader( 'How the data looks after creating vocalb from the dataset . . ') st.text(vars(test_ds[20])) print(vars(test_ds[20])) # create the model model = LSTMNet(config.vocalb_size, config.embedding_dim, input_dim=len(build_data.TEXT.vocab), hidden_dim=config.hidden_dim, output_dim=config.out_dim, n_layers=config.n_layers, dropout=config.dropout, pad=build_data.TEXT.vocab.stoi[build_data.TEXT.pad_token]) # Load pre-trained embedding weights pretrained_embeddings = build_data.TEXT.vocab.vectors print(pretrained_embeddings.shape) # model.embedding_layer.weight.data.copy_(pretrained_embeddings) #initialize to zeros model.embedding_layer.weight.data[model.pad_idx] = torch.zeros( config.embedding_dim) model_trained = trainer.train_net(model, train_iter, test_iter, epochs=1)
def train(): print("Train mode") print("run_prefix", RUN_PREFIX) config = Config() loss_mode = args.lossmode model = train_net(WEIGHTS_FLD, LOG_FLD, PRED_FLD, config, loss_mode) print("calc_jacc for {}: N examples:: {}".format(dir, config.AMT_VAL)) x_val, y_val = get_patches_dir(config.VAL_DIR, config, shuffleOn=False, amt=config.AMT_VAL) score, trs = calc_jacc_img_msk(model, x_val, y_val, batch_size=4, n_classes=config.NUM_CLASSES) print("score, trs", score, trs)
batch_size =args["batch_size"] test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0) if args['loss'] == "MSE": criterion = nn.MSELoss() else: criterion = nn.SmoothL1Loss() net.to(device) optimizer =torch.optim.Adam(net.parameters(), lr=1e-5, weight_decay=5e-4) #define optimizer val_net(0, 0, net, criterion, optimizer, device, test_loader, sub, div) n_epochs = args["epoch"] min_loss = float('inf') for epoch in range(n_epochs): print("=====Training=======") train_net(epoch, n_epochs, net, criterion, optimizer, device, train_loader, sub, div) print("=====Validation=======") loss = val_net(epoch, n_epochs, net, criterion, optimizer, device, test_loader, sub, div) if loss < min_loss: print("=====Saving=======") model_dir = './saved_models/' name = args["dataset"]+'_'+model_name+'_'+str(loss)+'.pt' min_loss = loss # after training, save your model parameters in the dir 'saved_models' torch.save(net.state_dict(), model_dir+name)
#ImageFolderでデータセットを読み込み、分割してデータローダーを作る from torchvision.datasets import ImageFolder train_loader, test_loader = load_dataset.GetDataLoader_withSplit(ImageFolder('./food-101/images',tf), 0.2, batch_size) print("データーローダー準備完了") #モデル構築 from modeldef import VGG19custom net = VGG19custom() print(net) #ネットワーク構造の表示 #自作ヘルパー関数のロード #評価処理と訓練処理 from trainer import eval_net, train_net #データをすべて転送する import torch #device_select = 'cpu' #デバッグ用 device_select = 'cuda' if torch.cuda.is_available() else 'cpu' #CUDAが使えるなら使う n_epoch = 5 net.to(device_select) #訓練実施 train_net(net, train_loader, test_loader, n_iter=n_epoch, device=device_select) print("モデル訓練完了") #モデルをシリアライズ import modelio modelio.SaveModelWeights(net,"model.pth") modelio.SaveOnnxModel(net, "model.onnx", (3,224,224)) print("モデル出力完了")
def main(options): #seed = options.seed #seeds = [4, 44, 3, 33, 333] seeds = [44, 3, 33, 333] size = options.size num_classes = options.num_classes width = options.width trials = options.trials run_idx = options.run_idx torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True #depths = [1, 2, 3, 4, 5, 6, 7, 8, 9] #depths = [10, 12, 14, 16, 18, 20] #depths = [22, 24, 26, 28, 30] if run_idx == 0: depths = [1, 2, 3] elif run_idx == 1: depths = [4, 5, 6] elif run_idx == 2: depths = [7, 8, 9] elif run_idx == 3: depths = [10, 12, 15] #train_loader, test_loader, classes = dataset.make_dataset(size, num_classes) all_classes = [0, 1, 2, 3, 4, 7, 9, 14, 16, 17] ''' 0 kit_fox 1 English_setter 2 Siberian_husky 3 Australian_terrier 4 English_springer 7 Egyptian_cat 9 Persian_cat 14 malamute 16 Great_Dane 17 Walker_hound ''' classes = all_classes[:num_classes] train_loader, transform = get_imagenet_trainloader(classes) test_loader = get_imagenet_testloader(classes, transform, batch_size=50 * num_classes) classes_arg = range( num_classes ) # the imagenet test loader already renumbers the classes, so we don't need to do this in the train script. file = open( "imagenet32_log_extratrials/size{}_width{}_{}classes_{}.txt".format( size, width, num_classes, run_idx), "w") file.write('depth trial ' + str(seeds) + '\n') for trial in range(trials): seed = seeds[trial] torch.manual_seed(seed) np.random.seed(seed) torch.cuda.manual_seed(seed) for depth in depths: results = trainer.train_net(train_loader, test_loader, depth, size, classes_arg, width, options.model) file.write( str(depth) + " " + str(trial) + " " + str(results) + "\n") file.close()
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu args.multigpu = False if args.distributed: args.multigpu = True args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.num_workers + ngpus_per_node - 1) / ngpus_per_node) print("==> gpu:", args.gpu, ", rank:", args.rank, ", batch_size:", args.batch_size, ", workers:", args.workers) torch.cuda.set_device(args.gpu) elif args.gpu is None: print("==> DataParallel Training") args.multigpu = True os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num else: print("==> Single GPU Training") torch.cuda.set_device(args.gpu) assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled." save_path = save_path_formatter(args, parser) args.save_path = 'checkpoints' / save_path if (args.rank == 0): print('=> number of GPU: ', args.gpu_num) print("=> information will be saved in {}".format(args.save_path)) args.save_path.makedirs_p() torch.manual_seed(args.seed) ############################## Data loading part ################################ if args.dataset == 'KITTI': args.max_depth = 80.0 elif args.dataset == 'NYU': args.max_depth = 10.0 train_set = MyDataset(args, train=True) test_set = MyDataset(args, train=False) if (args.rank == 0): print("=> Dataset: ", args.dataset) print("=> Data height: {}, width: {} ".format(args.height, args.width)) print('=> train samples_num: {} '.format(len(train_set))) print('=> test samples_num: {} '.format(len(test_set))) train_sampler = None test_sampler = None if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_set) test_sampler = torch.utils.data.distributed.DistributedSampler( test_set, shuffle=False) train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=test_sampler) if args.epoch_size == 0: args.epoch_size = len(train_loader) cudnn.benchmark = True ######################################################################################### ###################### Setting Network, Loss, Optimizer part ################### if (args.rank == 0): print("=> creating model") Model = LDRN(args) ############################### Number of model parameters ############################## num_params_encoder = 0 num_params_decoder = 0 for p in Model.encoder.parameters(): num_params_encoder += p.numel() for p in Model.decoder.parameters(): num_params_decoder += p.numel() if (args.rank == 0): print("===============================================") print("model encoder parameters: ", num_params_encoder) print("model decoder parameters: ", num_params_decoder) print("Total parameters: {}".format(num_params_encoder + num_params_decoder)) trainable_params = sum( [np.prod(p.shape) for p in Model.parameters() if p.requires_grad]) print("Total trainable parameters: {}".format(trainable_params)) print("===============================================") ############################### apex distributed package wrapping ######################## if args.distributed: if args.norm == 'BN': Model = nn.SyncBatchNorm.convert_sync_batchnorm(Model) if (args.rank == 0): print("=> use SyncBatchNorm") Model = Model.cuda(args.gpu) Model = torch.nn.parallel.DistributedDataParallel( Model, device_ids=[args.gpu], output_device=args.gpu, find_unused_parameters=True) print("=> Model Initialized on GPU: {} - Distributed Traning".format( args.gpu)) enc_param = Model.module.encoder.parameters() dec_param = Model.module.decoder.parameters() elif args.gpu is None: Model = Model.cuda() Model = torch.nn.DataParallel(Model) print("=> Model Initialized - DataParallel") enc_param = Model.module.encoder.parameters() dec_param = Model.module.decoder.parameters() else: Model = Model.cuda(args.gpu) print("=> Model Initialized on GPU: {} - Single GPU training".format( args.gpu)) enc_param = Model.encoder.parameters() dec_param = Model.decoder.parameters() ########################################################################################### ################################ pretrained model loading ################################# if args.model_dir != '': #Model.load_state_dict(torch.load(args.model_dir,map_location='cuda:'+args.gpu_num)) Model.load_state_dict(torch.load(args.model_dir)) if (args.rank == 0): print('=> pretrained model is created') ############################################################################################# ############################## optimizer and criterion setting ############################## optimizer = torch.optim.AdamW([{ 'params': Model.module.encoder.parameters(), 'weight_decay': args.weight_decay, 'lr': args.lr }, { 'params': Model.module.decoder.parameters(), 'weight_decay': 0, 'lr': args.lr }], eps=args.adam_eps) ############################################################################################## logger = None ####################################### Training part ########################################## if (args.rank == 0): print("training start!") loss = train_net(args, Model, optimizer, train_loader, val_loader, args.epochs, logger) if (args.rank == 0): print("training is finished")