def main(args): # Dataloader dls, data_info = get_dataloaders(args) # Model if args.task == 'classification': model = classifier.get_model(args, data_info) elif args.task == 'autoencoding': model = autoencoder.get_model(args, data_info) else: raise Exception(f'Error. Task "{args.task}" is not supported.') # Trainer trainer = get_trainer(args) if args.mode in ['train', 'training']: # Train trainer.fit(model, dls['train'], dls['validation']) # Validate trainer.test(model=None, test_dataloaders=dls['validation']) elif args.mode in ['validate', 'validation']: trainer.test(model, dls['validation']) else: raise Exception(f'Error. Mode "{args.mode}" is not supported.')
def __init__(self, dataset): self.dataset = dataset ################### # training params # ################### self.args = get_params(dataset) torch.manual_seed(self.args.random_seed) ################### # get dataloaders # ################### kwargs = {'num_workers': 8, 'pin_memory': True} self.train_loader, self.test_loader = get_dataloaders( dataset, **kwargs) ###################### # Initialize Network # ###################### self.net = get_classifier(dataset) if self.args.cuda: self.net = torch.nn.DataParallel(self.net, device_ids=[0]) self.net = self.net.cuda() ######################## # Initialize Optimizer # ######################## self.optimizer = optim.SGD(self.net.parameters(), lr=self.args.learning_rate, momentum=self.args.momentum) ##################### # Initialize Losses # ##################### self.train_losses = [] self.train_counter = [] self.test_losses = [] self.test_counter = [ i * len(self.train_loader.dataset) for i in range(self.args.n_epochs + 1) ] ########################## # Checkpoint data Losses # ########################## self.curr_best = 0.0 self.best_net_state = None self.best_optimizer_state = None
def main(cfg, checkpoint, pretrained_exp_dir, hash_params): # torch.manual_seed(cfg.seed) use_cuda = cfg.use_cuda and torch.cuda.is_available() cfg.use_cuda = use_cuda device = torch.device( "cuda:{}".format(cfg.cuda_num) if use_cuda else "cpu") ## get the dataloaders dloader_train, _, dloader_test = dataloaders.get_dataloaders( cfg, val_split=None) # Load the model #params.num_classes=4 model = load_model(cfg, os.path.join(pretrained_exp_dir, checkpoint), device) model = model.to(device) layer_name = model.avgpool sf = SaveFeatures(layer_name) ## Output before the last FC layer # define a folder for saving all embeddings embedding_path = os.path.join(pretrained_exp_dir, 'embeddings') if not os.path.exists(embedding_path): os.makedirs(embedding_path) # save the feature embeddings for every image train_feat_path = os.path.join(embedding_path, 'train_features_dict.p') val_feat_path = os.path.join(embedding_path, 'val_features_dict.p') test_feat_path = os.path.join(embedding_path, 'test_features_dict.p') # img_names,features_dict = save_features_as_dict(model,dloader_train,sf,\ # save_path=train_feat_path,num_batch='all',device) ## img_names,features_dict = save_features_as_dict(model,dloader_val,sf,\ # save_path=val_feat_path,num_batch='all',device) img_names,features_dict = save_features_as_dict(model,dloader_test,sf,\ save_path=test_feat_path,num_batch='all',device=device) hash_path = os.path.join(embedding_path, 'features_hash.p') save_embedding_hash(hash_params, hash_path, img_names, features_dict)
def predict_and_write(task_name, path, data_dir, submit_subdir, batch_size): bert_model_name, max_seq_len = extract_from_cmd(path) msg = ( f"Using {bert_model_name} and max_sequence_len={max_seq_len} for task " f"{task_name}") logger.info(msg) # Build model task = build_model[task_name](bert_model_name) model = EmmentalModel(name=f"SuperGLUE_{task_name}", tasks=[task]) try: model.load(path) except UnboundLocalError: msg = ( "Failed to load state dict; confirm that your model was saved with " "a command such as 'torch.save(model.state_dict(), PATH)'") logging.error(msg) raise # Build dataloaders dataloaders = get_dataloaders( data_dir, task_name=task_name, splits=["val", "test"], # TODO: replace with ['split'] and update below max_data_samples=None, max_sequence_length=max_seq_len, tokenizer_name=bert_model_name, batch_size=batch_size, uid="uids", ) # TEMP: Sanity check val performance logger.info(f"Valid score: {model.score(dataloaders[0])}") # TEMP filename = f"{task_name}.jsonl" filepath = os.path.join(submit_subdir, filename) make_submission_file(model, dataloaders[-1], task_name, filepath)
def main(args): # Dataloaders dls = get_dataloaders(args) # Model model = Classifier(args) # Trainer trainer = get_trainer(args) if args.mode == 'train': trainer.fit(model, dls['train_aug'], dls['validation']) trainer.test(model=None, test_dataloaders=dls['validation']) elif args.mode == 'validation': trainer.test(model=model, test_dataloaders=dls['validation']) elif args.mode == 'test': trainer.test(model=model, test_dataloaders=dls['validation']) else: raise Exception(f'Mode "{args.mode}" not supported.')
def train(train_path, labels, boxes, output_dir, num_epochs, hard, verbose): df_train, df_val = _get_toy_dataset(labels, boxes) if verbose: logging.info("Train size: {}, validation size: {}".format( len(df_train), len(df_val))) sampler = None if hard: sampler = BalancedBatchSampler(df_train, n_classes=4, n_samples=4) train_dl, single_train_dl, val_dl = get_dataloaders( df_train, df_val, train_path, sampler) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if verbose: logging.info("Using device {}".format(device)) net = TripletNetwork(embedding_size=128).to(device) criterion = TripletLoss() selector = None if hard: selector = HardBatchTripletSelector() optimizer = optim.Adam(net.parameters(), lr=1e-4) net, history = _train( model=net, optimizer=optimizer, criterion=criterion, train_dataloader=train_dl, single_train_dataloader=single_train_dl, val_dataloader=val_dl, num_epochs=num_epochs, save_path=output_dir, device=device, selector=selector, ) _plot_history(history)
config_file = os.path.join(experiment_dir, 'config_sl.yaml') ckpt_name = 'resnet18_best.pth' ckpt_path = os.path.join(experiment_dir, ckpt_name) assert os.path.isfile( config_file), "No parameters config file found at {}".format( config_file) cfg = utils.load_yaml(config_file, config_type='object') use_cuda = cfg.use_cuda and torch.cuda.is_available() cfg.use_cuda = use_cuda device = torch.device( "cuda:{}".format(cfg.cuda_num) if use_cuda else "cpu") ## get the dataloaders _, _, dloader_test = dataloaders.get_dataloaders(cfg, val_split=.2) # Load the model model = models.get_model(cfg) state_dict = torch.load(ckpt_path, map_location=device) model.load_state_dict(state_dict['state_dict'], strict=False) model = model.to(device) criterion = nn.CrossEntropyLoss() test_loss, test_acc = test(model, device, dloader_test, criterion, experiment_dir) print('Test: Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( test_loss, test_acc))
parser.add_argument("--unfoldings", type=int, dest="unfoldings", help="Number of LISTA unfoldings", default=12) parser.add_argument("--num-epochs", type=int, dest="num_epochs", help="Total number of epochs to train", default=250) parser.add_argument("--crop-size", type=int, dest="crop_size", help="Total number of epochs to train", default=128) parser.add_argument("--out-dir", type=str, dest="out_dir", help="Results' dir path", default='trained_models') parser.add_argument("--model-name", type=str, dest="model_name", help="The name of the model to be saved.", default=None) parser.add_argument("--data-path", type=str, dest="data_path", help="Path to the dir containing the training and testing datasets.", default="./datasets/") parser.add_argument("--batch-size", type=int, dest="batch_size", help="Number of images in a batch", default=1) args = parser.parse_args() args.test_path = [f'{args.data_path}/BSD68/'] args.train_path = [f'{args.data_path}/CBSD432/',f'{args.data_path}/waterloo/'] args.noise_std = args.noise_level / 255 args.guid = args.model_name if args.model_name is not None else uuid.uuid4() params = ListaParams(args.kernel_size, args.num_filters, args.stride, args.unfoldings) loaders = dataloaders.get_dataloaders(args.train_path, args.test_path, args.crop_size, args.batch_size) model = ConvLista_T(params).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, eps=args.eps) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step, gamma=args.lr_decay) psnr = {x: np.zeros(args.num_epochs) for x in ['train', 'test']} print(args.__dict__) with open(f'{args.out_dir}/{args.guid}_config.json','w') as json_file: json.dump(args.__dict__, json_file, sort_keys=True, indent=4) print('Training model...') for epoch in tqdm(range(args.num_epochs), position=0, leave=False): for phase in ['train', 'test']: if phase == 'train': model.train() # Set model to training mode
def train_and_evaluate(cfg): #Training settings experiment_dir = os.path.join('experiments', cfg.exp_type, cfg.save_dir) if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) utils.set_logger(os.path.join(experiment_dir, cfg.log)) logging.info('-----------Starting Experiment------------') use_cuda = cfg.use_cuda and torch.cuda.is_available() cfg.use_cuda = use_cuda device = torch.device( "cuda:{}".format(cfg.cuda_num) if use_cuda else "cpu") # initialize the tensorbiard summary writer writer = SummaryWriter(experiment_dir + '/tboard') ## get the dataloaders dloader_train, dloader_val, dloader_test = dataloaders.get_dataloaders( cfg, val_split=.2) # Load the model model = models.get_model(cfg) if cfg.ssl_pretrained_exp_path: ssl_exp_dir = experiment_dir = os.path.join('experiments',\ 'self-supervised',cfg.ssl_pretrained_exp_path) state_dict = torch.load(os.path.join(ssl_exp_dir,cfg.ssl_weight),\ map_location=device) # the stored dict has 3 informations - epoch,state_dict and optimizer state_dict = state_dict['state_dict'] del state_dict['fc.weight'] del state_dict['fc.bias'] model.load_state_dict(state_dict, strict=False) # Only finetune fc layer for name, param in model.named_parameters(): if 'fc' not in name: param.requires_grad = False model = model.to(device) images, _, _, _ = next(iter(dloader_train)) images = images.to(device) writer.add_graph(model, images) # follow the same setting as RotNet paper optimizer = optim.SGD(model.parameters(), lr=float(cfg.lr), momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True) if cfg.scheduler: scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160, 200], gamma=0.2) else: scheduler = None criterion = nn.CrossEntropyLoss() best_loss = 1000 for epoch in range(cfg.num_epochs + 1): # print('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nTrain for Epoch: {}/{}'.format(epoch, cfg.num_epochs)) train_loss, train_acc = train(epoch, model, device, dloader_train, optimizer, scheduler, criterion, experiment_dir, writer) # validate after every epoch # print('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nValidate for Epoch: {}/{}'.format( epoch, cfg.num_epochs)) val_loss, val_acc = validate(epoch, model, device, dloader_val, criterion, experiment_dir, writer) logging.info( 'Val Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( epoch + 1, val_loss, val_acc)) is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) if epoch % cfg.save_intermediate_weights == 0: utils.save_checkpoint({'Epoch': epoch,'state_dict': model.state_dict(), 'optim_dict' : optimizer.state_dict()}, is_best, experiment_dir, checkpoint='{}_{}rot_epoch{}_checkpoint.pth'.format( cfg.network.lower(), str(cfg.num_rot),str(epoch)),\ best_model='{}_{}rot_epoch{}_best.pth'.format(cfg.network.lower(), str(cfg.num_rot),str(epoch)) ) writer.close() # print('\nEvaluate on test') logging.info('\nEvaluate on test') test_loss, test_acc = test(model, device, dloader_test, criterion, experiment_dir) logging.info('Test: Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( test_loss, test_acc)) # save the configuration file within that experiment directory utils.save_yaml(cfg, save_path=os.path.join(experiment_dir, 'config_sl.yaml')) logging.info('-----------End of Experiment------------')
def train_and_evaluate(cfg): #Training settings experiment_dir = os.path.join('experiments',cfg.exp_type,cfg.save_dir) if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) utils.set_logger(os.path.join(experiment_dir,cfg.log)) logging.info('-----------Starting Experiment------------') use_cuda = cfg.use_cuda and torch.cuda.is_available() cfg.use_cuda=use_cuda device = torch.device("cuda:{}".format(cfg.cuda_num) if use_cuda else "cpu") # initialize the tensorbiard summary writer #writer = SummaryWriter(experiment_dir + '/tboard' ) logs=os.path.join('experiments',cfg.exp_type,'tboard_sup_demo') writer = SummaryWriter(logs + '/rotnet_without_pretrain' ) ## get the dataloaders dloader_train,dloader_val,dloader_test = dataloaders.get_dataloaders(cfg) # Load the model model = models.get_model(cfg) # for name, param in model.named_parameters(): # param.requires_grad = False # print(name) # model.avgpool=nn.AdaptiveAvgPool2d(output_size=(1, 1)) #model.fc=nn.Linear(in_features=512, out_features=5, bias=True) if cfg.use_pretrained: pretrained_path = os.path.join('experiments','supervised',cfg.pretrained_dir,cfg.pretrained_weights) state_dict = torch.load(pretrained_path,map_location=device) model.load_state_dict(state_dict, strict=False) logging.info('loading pretrained_weights {}'.format(cfg.pretrained_weights)) if cfg.use_ssl: ssl_exp_dir = os.path.join('experiments',\ 'self-supervised',cfg.ssl_pretrained_exp_path) state_dict = torch.load(os.path.join(ssl_exp_dir,cfg.ssl_weight),\ map_location=device) # the stored dict has 3 informations - epoch,state_dict and optimizer state_dict=state_dict['state_dict'] print(state_dict.keys()) del state_dict['fc.weight'] del state_dict['fc.bias'] del state_dict['layer4.0.conv1.weight'] del state_dict['layer4.0.conv2.weight'] del state_dict['layer4.1.conv1.weight'] del state_dict['layer4.1.conv2.weight'] del state_dict['layer3.0.conv1.weight'] del state_dict['layer3.0.conv2.weight'] del state_dict['layer3.1.conv1.weight'] del state_dict['layer3.1.conv2.weight'] #del state_dict['layer2.0.conv1.weight'] #del state_dict['layer2.0.conv2.weight'] #del state_dict['layer2.1.conv1.weight'] #del state_dict['layer2.1.conv2.weight'] model.load_state_dict(state_dict, strict=False) # Only finetune fc layer #layers_list=['fc','avgpool','layer3.0.conv']#,'layer3.1.conv','layer4.0.conv','layer4.1.conv'] #params_update=[] for name, param in model.named_parameters(): #for l in layers_list: if 'fc' or 'layer3.0.conv' or 'layer3.1.conv' or'layer4.0.conv' or 'layer4.1.conv' in name: param.requires_grad = True ### print(name) else: param.requires_grad = False # print(name) # params_update.append(param) # print(param.requires_grad) model = model.to(device) images,_ ,_,_ = next(iter(dloader_train)) images = images.to(device) writer.add_graph(model, images) # follow the same setting as RotNet paper #model.parameters() if cfg.opt=='sgd': optimizer = optim.SGD(model.parameters(), lr=float(cfg.lr), momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True) elif cfg.opt=='adam': optimizer = optim.Adam(model.parameters(), lr=float(cfg.lr))#, momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True) if cfg.scheduler: scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160, 200], gamma=0.2) else: scheduler=None criterion = nn.CrossEntropyLoss() global iter_cnt iter_cnt=0 best_loss = 1000 for epoch in range(cfg.num_epochs): # print('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) train_loss,train_acc = train(epoch, model, device, dloader_train, optimizer, scheduler, criterion, experiment_dir, writer) # validate after every epoch # print('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) logging.info('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs)) val_loss,val_acc = validate(epoch, model, device, dloader_val, criterion, experiment_dir, writer) logging.info('Val Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format(epoch, val_loss, val_acc)) # for name, weight in model.named_parameters(): # writer.add_histogram(name,weight, epoch) # writer.add_histogram(f'{name}.grad',weight.grad, epoch) is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) if epoch % cfg.save_intermediate_weights==0 or is_best: utils.save_checkpoint({'Epoch': epoch,'state_dict': model.state_dict(), 'optim_dict' : optimizer.state_dict()}, is_best, experiment_dir, checkpoint='{}_epoch{}_checkpoint.pth'.format( cfg.network.lower(),str(epoch)),\ best_model='{}_best.pth'.format(cfg.network.lower()) ) writer.close() # print('\nEvaluate on test') logging.info('\nEvaluate test result on best ckpt') state_dict = torch.load(os.path.join(experiment_dir,'{}_best.pth'.format(cfg.network.lower())),\ map_location=device) model.load_state_dict(state_dict, strict=False) test_loss,test_acc = test(model, device, dloader_test, criterion, experiment_dir) logging.info('Test: Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format(test_loss, test_acc)) # save the configuration file within that experiment directory utils.save_yaml(cfg,save_path=os.path.join(experiment_dir,'config_sl.yaml')) logging.info('-----------End of Experiment------------')
from dataloaders import get_dataloaders import matplotlib.pyplot as plt import numpy as np train_loader, test_loader = get_dataloaders('mnist_m') # pixels = [0.0, 0.0, 0.0] # count = 0 # # for batch, _ in train_loader: # for i in range(batch.shape[0]): # img = batch[i].numpy().transpose(1, 2, 0) # curr_sum = np.mean(img, axis=(0, 1)) # for i in range(3): # pixels[i] += curr_sum[i] # count += 1 # # m = (pixels[0] / count, pixels[1] / count, pixels[2] / count) # print('Mean:', m) # # pixels = [0.0, 0.0, 0.0] # count = 0 # for batch, _ in train_loader: # for i in range(batch.shape[0]): # img = batch[i].numpy().transpose(1, 2, 0) # curr_sum = np.sum((img - m)**2, axis=(0, 1)) # for i in range(3): # pixels[i] += curr_sum[i] # count += img.shape[0] * img.shape[1] # # s = (np.sqrt(pixels[0] / count), np.sqrt(pixels[1] / count), np.sqrt(pixels[2] / count))
0) if torch.cuda.is_available() else os.cpu_count() if torch.cuda.is_available(): torch.backends.cudnn.benchmark = True test_path = [f'{args.test_path}'] train_path = [f'{args.train_path}'] val_path = train_path noise_std = args.noise_level / 255 loaders = dataloaders.get_dataloaders(train_path, test_path, val_path, crop_size=args.patch_size, batch_size=args.train_batch, downscale=args.aug_scale, concat=1, n_worker=args.n_worker, scale_max=args.scale_max, scale_min=args.scale_min) if args.mode == 'group': print('group mode') from model.mosaic_group import ListaParams from model.mosaic_group import groupLista as Lista params = ListaParams(kernel_size=args.kernel_size, num_filters=args.num_filters, stride=args.stride, unfoldings=args.unfoldings, freq=args.freq_corr_update,
model_filename = args.uuid + '.model' config_path = join(args.root_folder, config_filename) with open(config_path) as conf_file: conf = conf_file.read() conf = eval(conf) params = modules.ListaParams(conf['kernel_size'], conf['num_filters'], conf['stride1'], conf['stride2'], conf['stride3'], conf['unfoldings']) model = modules.ConvLista_T(params) model.load_state_dict(torch.load(join( args.root_folder, model_filename))) # cpu is good enough for testing test_path = [f'{args.data_path}/BSD68/'] # test_path = ['../../../../images/BSD68/'] loaders = dataloaders.get_dataloaders(test_path, test_path, 128, 1) loaders['test'].dataset.verbose = True model.eval() # Set model to evaluate mode model.cuda() num_iters = 0 noise_std = conf['noise_std'] psnr = 0 print( f"Testing model: {args.uuid} with noise_std {noise_std*255} on test images..." ) for batch, imagename in loaders['test']: batch = batch.cuda() noise = torch.randn_like(batch) * noise_std noisy_batch = batch + noise # forward
def __init__(self, source_dataset): ########################### # Initialize Info Holders # ########################### self.args = get_params(source_dataset, experiment='adaptation') self.source_best_pred = 0.0 self.target_best_pred = 0.0 self.best_source_net_state = None self.best_target_net_state = None self.source_test_losses = [] self.target_test_losses = [] self.source_test_acc = [] self.target_test_acc = [] self.iters = 0 ####################################### # Initialize Source and target labels # ####################################### self.source_disc_labels = torch.zeros(size=(self.args.batch_size_train, 1)).requires_grad_(False) self.target_disc_labels = torch.ones(size=(self.args.batch_size_train, 1)).requires_grad_(False) if self.args.cuda: self.source_disc_labels = self.source_disc_labels.cuda() self.target_disc_labels = self.target_disc_labels.cuda() ###################### # Define DataLoaders # ###################### kwargs = {'num_workers': 8, 'pin_memory': True} self.source_train_loader, self.source_test_loader = get_dataloaders( self.args.source_dataset, **kwargs) self.target_train_loader, self.target_test_loader = get_dataloaders( self.args.target_dataset, **kwargs) self.n_batch = min(len(self.target_train_loader), len(self.source_train_loader)) ################## # Define network # ################## self.net = get_classifier(source_dataset) if self.args.cuda: self.net = torch.nn.DataParallel(self.net, device_ids=[0]) self.net = self.net.cuda() ############### # Set Encoder # ############### if self.args.cuda: self.encoder = self.net.module.encode else: self.encoder = self.net.encode ################################################### # Set Domain Classifier (Encoder + Discriminator) # ################################################### self.discriminator = get_discriminator(source_dataset) self.domain_classifier = DomainClassifier(self.encoder, self.discriminator) if self.args.cuda: self.domain_classifier = torch.nn.DataParallel( self.domain_classifier, device_ids=[0]) self.domain_classifier = self.domain_classifier.cuda() ##################### # Define Optimizers # ##################### self.net_optimizer = torch.optim.SGD(self.net.parameters(), lr=self.args.learning_rate, momentum=self.args.momentum) self.encoder_optimizer = torch.optim.SGD(self.net.parameters(), self.args.learning_rate, momentum=self.args.momentum) self.discriminator_optimizer = torch.optim.SGD( self.discriminator.parameters(), lr=self.args.learning_rate, momentum=self.args.momentum)
from skimage.metrics import structural_similarity as ssim device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device_name = torch.cuda.get_device_name( 0) if torch.cuda.is_available() else 'cpu' capability = torch.cuda.get_device_capability( 0) if torch.cuda.is_available() else os.cpu_count() test_path = [args.testpath] print(f'test data : {test_path}') train_path = val_path = [] loaders = dataloaders.get_dataloaders(train_path, test_path, val_path, crop_size=args.patch_size, batch_size=args.train_batch, downscale=args.aug_scale, concat=1) if args.mode == 'group': print('group mode') from model.mosaic_group import ListaParams from model.mosaic_group import groupLista as Lista params = ListaParams(kernel_size=args.kernel_size, num_filters=args.num_filters, stride=args.stride, unfoldings=args.unfoldings, freq=args.freq_corr_update, corr_update=args.corr_update,
import torch import torch.nn.functional as F from dataloaders import get_dataloaders from classifiers import get_classifier cuda = True ################### # get dataloaders # ################### kwargs = {'num_workers': 8, 'pin_memory': True} train_loader, test_loader = get_dataloaders('mnist_m', **kwargs) ###################### # Initialize Network # ###################### ckpt_path = '/home/ubuntu/nadav/GradientReversal/weights/mnist_class/99.35/model.pth' net = get_classifier('mnist') if cuda: net = torch.nn.DataParallel(net, device_ids=[0]) net = net.cuda() state = torch.load(ckpt_path) net.load_state_dict(state) net.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader:
# Load the model model = models.get_model(cfg) # Reload weights from the saved file print ('restoring weights from : ',restore_file) ckpt_path = os.path.join(experiment_dir,restore_file) utils.load_checkpoint(os.path.join(ckpt_path), model,device) model.eval() model.to(device) ## get the dataloaders torch.manual_seed(0) np.random.seed(0) cfg.val_split=None dloader_train,dloader_val,dloader_test = dataloaders.get_dataloaders(cfg) # layer_name = model.avgpool # layer_name=model.layer4[1]#model.layer2[1].bn2 # layer_name=model.layer3[1] layer_name=model.layer2[1] summary(model,(3,128,128)) # save the feature embeddings for every image features_path=os.path.join(experiment_dir,'features_3') if not os.path.exists(features_path): os.makedirs(features_path) # save the feature embeddings for every image train_feat_path = os.path.join(features_path, 'train_features_dict.p')
train_path = [f'{args.data_path}/CBSD432/',f'{args.data_path}/waterloo/'] kernel_size = args.kernel_size stride = args.stride num_filters = args.num_filters lr = args.lr eps = args.eps unfoldings = args.unfoldings lr_decay = args.lr_decay lr_step = args.lr_step patch_size = args.patch_size num_epochs = args.num_epochs noise_std = args.noise_level / 255 threshold = args.threshold params = ListaParams(kernel_size, num_filters, stride, unfoldings) loaders = dataloaders.get_dataloaders(train_path, test_path, patch_size, 1) model = ConvLista_T(params).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=lr, eps=eps) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_decay) psnr = {x: np.zeros(num_epochs) for x in ['train', 'test']} guid = args.model_name if args.model_name is not None else uuid.uuid4() config_dict = { 'uuid': guid, 'kernel_size':kernel_size, 'stride': stride, 'num_filters': num_filters, 'lr':lr, 'unfoldings': unfoldings,
def main(args): config = vars(args) # Set random seed for reproducibility if config["seed"]: seed = config["seed"] logging.info(f"Setting seed: {seed}") set_seed(seed) # Full log path gets created in LogWriter log_writer = TensorBoardWriter(log_root=args.log_root, run_name=args.run_name) config["log_dir"] = log_writer.log_dir # Save command line argument into file cmd_msg = " ".join(sys.argv) logging.info(f"COMMAND: {cmd_msg}") log_writer.write_text(cmd_msg, "cmd.txt") # Save config into file logging.info(f"CONFIG: {config}") log_writer.write_config(config) # Construct dataloaders and tasks and load slices dataloaders = [] tasks = [] task_names = args.task for task_name in task_names: task_dataloaders = get_dataloaders( data_dir=args.data_dir, task_name=task_name, splits=["train", "valid", "test"], max_sequence_length=args.max_sequence_length, max_data_samples=args.max_data_samples, tokenizer_name=args.xlnet_model, batch_size=args.batch_size, ) dataloaders.extend(task_dataloaders) task = superglue_tasks.task_funcs[task_name]( args.xlnet_model, last_hidden_dropout_prob=args.last_hidden_dropout_prob) tasks.append(task) if args.slice_dict: slice_dict = json.loads(str(args.slice_dict)) # Ensure this is a mapping str to list for k, v in slice_dict.items(): assert isinstance(k, str) assert isinstance(v, list) slice_tasks = [] for task in tasks: # Update slicing tasks slice_names = slice_dict[task.name] slice_tasks.extend(convert_to_slice_tasks(task, slice_names)) slicing_functions = [ slice_func_dict[task_name][slice_name] for slice_name in slice_names ] applier = PandasSFApplier(slicing_functions) # Update slicing dataloaders for dl in dataloaders: df = task_dataset_to_dataframe(dl.dataset) S_matrix = applier.apply(df) add_slice_labels(dl, task, S_matrix, slice_names) tasks = slice_tasks # Build model model model = MultitaskModel(name=f"SuperGLUE", tasks=tasks) # Load pretrained model if necessary if config["model_path"]: model.load(config["model_path"]) # Training if args.train: trainer = Trainer(**config) trainer.train_model(model, dataloaders) scores = model.score(dataloaders) # Save metrics into file logging.info(f"Metrics: {scores}") log_writer.write_json(scores, "metrics.json") # Save best metrics into file if args.train and trainer.config["checkpointing"]: logging.info(f"Best metrics: " f"{trainer.log_manager.checkpointer.best_metric_dict}") log_writer.write_json( trainer.log_manager.checkpointer.best_metric_dict, "best_metrics.json")
self.mode = 'train' self.height = 224 self.width = 224 self.task = 'anticipation' self.t_buffer = 3.5 self.t_ant = 1.0 def __repr__(self): return 'Input Args: ' + json.dumps(self.__dict__, indent=4) if __name__ == '__main__': # Get args args = Args() print(args) # Dataloaders dls = get_dataloaders(args) # Get sample sample = next(iter(dls['train'].dataset)) for k, v in sample.items(): if torch.is_tensor(v): print(f'{k}: {v.shape}') else: print(f'{k}: {v}')
def main(): if not torch.cuda.is_available(): sys.exit(1) args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) dist.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() start_t = time.time() cudnn.benchmark = True cudnn.enabled = True # Build dataloaders num_classes, train_loader, train_iters, val_loader, val_iters = \ get_dataloaders(args.dataset, args.data, args.batch_size, args.mixup, workers=args.workers) # Setup logger logger = get_logger(args, train_iters, val_iters) # Build model model_student = get_model(args.arch, qa=args.qa, qw=args.qw, num_classes=num_classes, num_channels=args.channel) model_student = model_student.cuda() if args.distributed: model_student = DDP(model_student, device_ids=[args.gpu], find_unused_parameters=False) print(model_student) # TODO hack # for name, param in model_student.named_parameters(): # if name.find('step_size') == -1: # param.requires_grad = False # load model criterion_train, criterion_val, model_teacher = \ get_criterion(args.dataset, args.teacher, num_classes, args.gpu, args.mixup, args.label_smoothing) # Build optimizer weight_parameters = [] step_parameters = [] other_parameters = [] for pname, p in model_student.named_parameters(): if p.ndimension() == 4 or 'conv' in pname: weight_parameters.append(p) elif 'step_size' in pname: step_parameters.append(p) else: other_parameters.append(p) # optimizer = torch.optim.SGD( # [{'params': other_parameters}, # {'params': weight_parameters, 'weight_decay': args.weight_decay}, # {'params': step_parameters, 'momentum': 0.0}], # lr=args.learning_rate, momentum=args.momentum) optimizer = torch.optim.Adam([{ 'params': other_parameters }, { 'params': weight_parameters, 'weight_decay': args.weight_decay }, { 'params': step_parameters }], lr=args.learning_rate, betas=(args.momentum, 0.999)) scheduler = lr_cosine_policy(args.learning_rate, args.warmup, args.epochs, logger=logger) # Resume start_epoch = 0 best_top1_acc = 0 checkpoint_tar = args.resume if os.path.exists(checkpoint_tar): print('loading checkpoint {} ..........'.format(checkpoint_tar)) checkpoint = torch.load( checkpoint_tar, map_location=lambda storage, loc: storage.cuda(args.gpu)) # start_epoch = checkpoint['epoch'] best_top1_acc = checkpoint['best_top1_acc'] state_dict = checkpoint['state_dict'] if not args.distributed: state_dict = { k.replace('module.', ''): state_dict[k] for k in state_dict } else: find_module = False for k in state_dict: if 'module' in k: find_module = True if not find_module: state_dict = {'module.' + k: state_dict[k] for k in state_dict} # TODO hack # init_ea_model_from(model_student, state_dict, 2) if args.qw[0] == 'm' or args.qw[0] == 'a': initialized = True if args.qw[0] == 'a': initialized = False init_model_from(model_student, state_dict, int(args.qw[1:]), initialized=initialized) elif args.qw[0] == 'd' or args.qw[0] == 'e': init_binaryduo_from(model_student, state_dict) else: model_student.load_state_dict(state_dict, strict=False) # TODO: hack load step size from another checkpoint # print('Ckpt 2...') # checkpoint_2 = torch.load('cifar100_64_sa2.pth.tar', # map_location=lambda storage, loc: storage.cuda(args.gpu)) # state_dict = checkpoint_2['state_dict'] # for name, layer in model_student.named_modules(): # if isinstance(layer, BinaryDuo): # print('Loading ', name) # with torch.no_grad(): # layer.step_size.copy_(2 * state_dict.get(name.replace('binary_conv', 'binary_activation.step_size'), None)) # layer.initialized = True # if isinstance(layer, MultibitLSQNoScale): # print('Loading ', name) # with torch.no_grad(): # layer.step_size.copy_(state_dict.get(name.replace('binary_conv.act_quantizer', 'binary_activation.step_size'), None)) # layer.initialized = True for k, v in model_student.named_parameters(): print(k) print("loaded checkpoint {} epoch = {}".format(checkpoint_tar, checkpoint['epoch'])) # train the model epoch_iter = range(start_epoch, args.epochs) if logger is not None: epoch_iter = logger.epoch_generator_wrapper(epoch_iter) valid_top1_acc = validate(0, val_loader, model_student, criterion_val, args, logger) print('Val acc', valid_top1_acc) for epoch in epoch_iter: train(epoch, train_loader, model_student, model_teacher, criterion_train, optimizer, scheduler, logger) # print('--------- Step size -------------') # for name, param in model_student.named_parameters(): # if name.find('step_size') != -1: # print(name, param.mean().detach().cpu().numpy(), # param.min().detach().cpu().numpy(), # param.max().detach().cpu().numpy(), # param.grad.mean().detach().cpu().numpy()) valid_top1_acc = validate(epoch, val_loader, model_student, criterion_val, args, logger) is_best = False if valid_top1_acc > best_top1_acc: best_top1_acc = valid_top1_acc is_best = True if not torch.distributed.is_initialized( ) or torch.distributed.get_rank() == 0: save_checkpoint( { 'epoch': epoch, 'state_dict': model_student.state_dict(), 'best_top1_acc': best_top1_acc, 'optimizer': optimizer.state_dict(), }, is_best, args.save) training_time = (time.time() - start_t) / 3600 print('total training time = {} hours'.format(training_time)) if args.distributed: dist.destroy_process_group()
def main(args): # Ensure that global state is fresh Meta.reset() # Initialize Emmental config = parse_arg_to_config(args) emmental.init(config["meta_config"]["log_path"], config=config) # Save command line argument into file cmd_msg = " ".join(sys.argv) logger.info(f"COMMAND: {cmd_msg}") write_to_file(Meta.log_path, "cmd.txt", cmd_msg) # Save Emmental config into file logger.info(f"Config: {Meta.config}") write_to_file(Meta.log_path, "config.txt", Meta.config) Meta.config["learner_config"]["global_evaluation_metric_dict"] = { f"model/SuperGLUE/{split}/score": partial(superglue_scorer, split=split) for split in ["val"] } # Construct dataloaders and tasks and load slices dataloaders = [] tasks = [] for task_name in args.task: task_dataloaders = get_dataloaders( data_dir=args.data_dir, task_name=task_name, splits=["train", "val", "test"], max_sequence_length=args.max_sequence_length, max_data_samples=args.max_data_samples, tokenizer_name=args.bert_model, batch_size=args.batch_size, augment=args.augmentations, ) task = models.model[task_name]( args.bert_model, last_hidden_dropout_prob=args.last_hidden_dropout_prob) if args.slices: logger.info("Initializing task-specific slices") slice_func_dict = slicing.slice_func_dict[task_name] # Include general purpose slices if args.general_slices: logger.info("Including general slices") slice_func_dict.update(slicing.slice_func_dict["general"]) task_dataloaders = slicing.add_slice_labels( task_name, task_dataloaders, slice_func_dict) slice_tasks = slicing.add_slice_tasks(task_name, task, slice_func_dict, args.slice_hidden_dim) tasks.extend(slice_tasks) else: tasks.append(task) dataloaders.extend(task_dataloaders) # Build Emmental model model = EmmentalModel(name="SuperGLUE", tasks=tasks) # Load pretrained model if necessary if Meta.config["model_config"]["model_path"]: model.load(Meta.config["model_config"]["model_path"]) # Training if args.train: emmental_learner = EmmentalLearner() emmental_learner.learn(model, dataloaders) # If model is slice-aware, slice scores will be calculated from slice heads # If model is not slice-aware, manually calculate performance on slices if not args.slices: slice_func_dict = {} slice_keys = args.task if args.general_slices: slice_keys.append("general") for k in slice_keys: slice_func_dict.update(slicing.slice_func_dict[k]) scores = slicing.score_slices(model, dataloaders, args.task, slice_func_dict) else: scores = model.score(dataloaders) # Save metrics into file logger.info(f"Metrics: {scores}") write_to_file(Meta.log_path, "metrics.txt", scores) # Save best metrics into file if args.train: logger.info( f"Best metrics: " f"{emmental_learner.logging_manager.checkpointer.best_metric_dict}" ) write_to_file( Meta.log_path, "best_metrics.txt", emmental_learner.logging_manager.checkpointer.best_metric_dict, ) # Save submission file for task_name in args.task: dataloaders = [d for d in dataloaders if d.split == "test"] assert len(dataloaders) == 1 filepath = os.path.join(Meta.log_path, f"{task_name}.jsonl") make_submission_file(model, dataloaders[0], task_name, filepath)
# ckpt_file = 'cifar100_fp18_c64_sgd_wd5e-4_distill_qa/model_best.pth.tar' # ckpt_file = 'cifar100_64_sa2.pth.tar' ckpt_file = 'cifar100_fp18_c64_a2_mixup_nowd/checkpoint-1.pth.tar' valdir = '~/data/' batch_size = 1000 data = torch.load(ckpt_file) checkpoint = data['state_dict'] state_dict = {} for k in checkpoint.keys(): print(k, checkpoint[k].shape) state_dict[k.replace('module.', '')] = checkpoint[k] num_classes, _, _, val_loader, val_iters = \ get_dataloaders('cifar100', valdir, batch_size, False, workers=32) # model = birealnet18() model = get_model('resnet18', num_classes=num_classes, num_channels=64, qa='fp', qw='a2') model = model.cuda() model.load_state_dict(state_dict, strict=False) # init_binaryduo_from(model, state_dict) # for name, layer in model.named_modules(): # if isinstance(layer, BinaryDuo): # weight = layer.weight