def __init__(self, run_name: str, run_root: str, model: nn.Module, optimizer: torch.optim.Optimizer, criterion: nn.MSELoss, data_loaders: Union[List, Dict], num_epoch: int = 100, log_int: int = 10, device: str = 'cpu', save: bool = False, save_int: int = 1, resume_epoch: int = None, gpu_id: int = None, target_names: List = None): self.run_name = run_name self.run_root = run_root self.model = model self.optimizer = optimizer self.criterion = criterion self.num_epoch = num_epoch self.log_int = log_int self.save = save self.save_int = save_int if resume_epoch is not None: self.resume = True self.resume_epoch = resume_epoch else: self.resume = False self.resume_epoch = resume_epoch if device == 'cuda': gpu.get_gpu(gpu_id) device = torch.device(torch.cuda.current_device()) self.device = torchDevice(device) self.log_root = os.path.join(run_root, '.log', run_name) self.data_loaders = data_loaders # can only get the lengths when a single set of data loaders are used if isinstance(data_loaders, dict): self.data_lengths = dict(zip(self.data_loaders.keys(), [len(loader) for loader in self.data_loaders])) else: self.data_lengths = {} if save: if not os.path.exists(self.log_root): os.makedirs(self.log_root) # The information regarding the class indices only useful if discrete target self.target_names = target_names
def __init__(self, run_name: str, run_root: str, model: nn.Module, optimizer: torch.optim.Optimizer, criterion: nn.MSELoss, data_loaders: Union[List, Dict], num_epoch: int = 100, log_int: int = 10, device: str = 'cpu', save: bool = False, save_int: int = 1, resume: bool = False, gpu_id: int = None, balance_factor: List = None): """balance_factor: is a list which contains the balance factor for each training epoch""" self.run_name = run_name self.run_root = run_root self.model = model self.optimizer = optimizer self.criterion = criterion self.num_epoch = num_epoch self.log_int = log_int self.save = save self.save_int = save_int self.resume = resume self.balance_factor = balance_factor if device == 'cuda': gpu.get_gpu(gpu_id) device = torch.device(torch.cuda.current_device()) self.device = torchDevice(device) self.log_root = os.path.join(run_root, '.log', run_name) self.data_loaders = data_loaders # can only get the lengths when a single set of data loaders are used if isinstance(data_loaders, dict): self.data_lengths = dict(zip(self.data_loaders.keys(), [len(loader) for loader in self.data_loaders])) else: self.data_lengths = {} if save: if not os.path.exists(self.log_root): os.makedirs(self.log_root)
def __init__(self, model: torch.nn.Module, dataloader: DataLoader, output_prob_fn: Callable = None, output_dtype_fn: Callable = None, output_dtype: np.dtype = None, output_label: str = None, output_wkw_root: str = None, output_wkw_compress: bool = False, device: str = 'cpu', gpu_id: int = None, interpolate: str = None): self.model = model self.dataloader = dataloader if output_prob_fn is None: output_prob_fn = lambda x: np.exp(x[:, 1, 0, 0]) self.output_dtype_fn = output_dtype_fn self.output_dtype = output_dtype self.output_prob_fn = output_prob_fn self.output_label = output_label self.output_wkw_root = output_wkw_root if output_wkw_compress is False: self.output_wkw_block_type = 1 else: self.output_wkw_block_type = 2 if device == 'cuda': gpu.get_gpu(gpu_id) device = torch.device(torch.cuda.current_device()) self.device = torchDevice(device) self.interpolate = interpolate
def __init__(self, run_root: str, model: nn.Module, optimizer: torch.optim.Optimizer, criterion: nn.MSELoss, data_loaders: {}, num_epoch: int = 100, log_int: int = 10, device: str = 'cpu', save: bool = False, resume: bool = False, gpu_id: int = None): self.run_root = run_root self.model = model self.optimizer = optimizer self.criterion = criterion self.num_epoch = num_epoch self.log_int = log_int self.save = save self.resume = resume if device == 'cuda': gpu.get_gpu(gpu_id) device = torch.device(torch.cuda.current_device()) self.device = torchDevice(device) self.log_root = os.path.join(run_root, '.log') self.data_loaders = data_loaders self.data_lengths = dict( zip(self.data_loaders.keys(), [len(loader) for loader in self.data_loaders])) if save: if not os.path.exists(self.log_root): os.makedirs(self.log_root)
from nltk import word_tokenize from torch import max as tmax from collections import Counter # Data parameters # folder with data files saved by create_input_files.py data_folder = '../data/coco/' data_name = 'coco_imgs' # base name shared by data files # Model parameters emb_dim = 512 # dimension of word embeddings attention_dim = 512 # dimension of attention linear layers decoder_dim = 512 # dimension of decoder RNN dropout = 0.6 # sets device for model and PyTorch tensors device = torchDevice("cuda:0" if cuda.is_available() else "cpu") # set to true only if inputs to model are fixed size; otherwise lot of computational overhead cudnn.benchmark = True # Training parameters start_epoch = 0 # number of epochs to train for (if early stopping is not triggered) epochs = 10 # keeps track of number of epochs since there's been an improvement in validation BLEU epochs_since_improvement = 0 batch_size = 20 workers = 1 # for data-loading; right now, only 1 works with h5py encoder_lr = 1e-4 # learning rate for encoder if fine-tuning decoder_lr = 4e-4 # learning rate for decoder grad_clip = 5. # clip gradients at an absolute value of alpha_c = 1. # regularization parameter for 'doubly stochastic attention', as in the paper
from torch.utils import data from PIL import Image from torch.autograd import Variable from torchvision import transforms from torch import device as torchDevice, cuda, LongTensor import re img_size = 224 device = torchDevice("cuda:0" if cuda.is_available() else "cpu") # sets device for model and PyTorch tensors class ANPDataset(data.Dataset): def __init__(self, partition, split, captions): # 'Initialization' self.split = split assert self.split in {'train', 'validation', 'test'} imgs_addrs = partition[split] self.imgs_addrs = [] self.captions = {} if ('coco' in imgs_addrs[0]): for addr in imgs_addrs: for i, cap in enumerate(captions[addr]): i_addr = addr + str(i) self.imgs_addrs.append(i_addr) self.captions.update({i_addr: cap}) else: self.imgs_addrs = imgs_addrs self.captions = captions self.caplens = { key: len(value) for key, value in self.captions.items()