def __init__(self, name, render=False, cuda=True, n_steps=1, gamma=0.99, render_mode='rgb_array', norm_rewards='none', norm_states=False, clip_obs=0, clip_rew=0): super(BulletEnv, self).__init__() self.torch = torch.cuda if cuda else torch self.name = name self.render_mode = render_mode self.gamma = gamma # self.env = NormalizedActions(gym.make(name, render=render)) # self.env = gym.make(name, render=render) self.env = gym.make(name) self.observation_space = self.env.observation_space self.action_space = self.env.action_space self.ns = self.observation_space.shape[0] # Support for state normalization or using time as a feature self.state_filter = Identity() if norm_states: self.state_filter = ZFilter(self.state_filter, shape=[self.ns], clip=clip_obs) # Support for rewards normalization self.reward_filter = Identity() if norm_rewards == "rewards": self.reward_filter = ZFilter(self.reward_filter, shape=(), center=False, clip=clip_rew) elif norm_rewards == "returns": self.reward_filter = RewardFilter(self.reward_filter, shape=(), gamma=self.gamma, clip=clip_rew) self.n_steps = n_steps self.gamma = gamma**( n_steps - self.torch.FloatTensor(n_steps).fill_(1).cumsum(0)) self.render = render self.rewards = list(self.torch.FloatTensor(n_steps, 1).zero_()) self.terminals = list(self.torch.FloatTensor(n_steps, 1).zero_()) self.reset()
def __init__(self, num_classes): super().__init__() self._resnet = torchvision.models.resnet18(pretrained=True) self._resnet.fc = Identity() self._linear_means = torch.nn.Linear(512, num_classes) self._linear_log_vars = torch.nn.Linear(512, num_classes)
def get_model(args): """Use model pretrained on UCF101 """ model = resnet.resnet18(num_classes=args.num_classes, shortcut_type='A', sample_size=args.sample_size, sample_duration=args.sample_duration) model = model.cuda() model = nn.DataParallel(model, device_ids=None) print('Use pretrained model {}'.format(args.pretrain_path)) pretrain = torch.load(args.pretrain_path) if 'backbone' in pretrain.keys(): model.module.fc = Identity() model.load_state_dict(pretrain['backbone']) else: model.load_state_dict(pretrain['state_dict']) model.module.fc = Identity() return model
def __init__(self, encoder_archi=None, decoder_archi=None, predictor_structure=[]): """Instantiate the class. Parameters ----------- encoder_archi : None or dict If dict : Used to build an LSTM module so specify all obligatory parameters with keywords. Please check torch.nn.LSTM documentation If None: The encoder module is Idendity (returns the input) decoder_archi : None or dict If dict : Used to build an LSTM module so specify all obligatory parameters with keywords. Please check torch.nn.LSTM documentation If None: The encoder module is Idendity (returns the input) predictor_structure : List of fully connected layers. Check utils.fc_block documentation. """ super(KeyWordSelectionModel, self).__init__() self.encoder = Identity() if encoder_archi is None else nn.LSTM( **encoder_archi) self.decoder = Identity() if decoder_archi is None else nn.LSTM( **decoder_archi) self.predictor = nn.Sequential( *fc_block(predictor_structure[0], predictor_structure[1:]), nn.Sigmoid()) self.encoder_archi = encoder_archi self.decoder_archi = decoder_archi self.predictor_structure = predictor_structure self.x_sizes = None
def __init__(self, embedding, input_dim, num_softmax=1, dropout=0, padding_idx=-1): """Initialize output layer. Args: embedding: (nn.Module) : the word embedding module input_dim: (int) : input dimension of OutputLayer num_softmax: (int) : (default 1) number of softmaxes to calculate. see arxiv.org/abs/1711.03953 for more info increasing this can add more expressiveness of prediction. dropout: (float) : (defaul 0.0) dropout ratio padding_idx (int) : (default -1) model should output a large negative number for score at this index, if set to -1 , it is disabled. if >= 0, always outputs -1e20 at this index. """ super().__init__() self.embedding = embedding self.num_vocab, self.emb_size = embedding.weight.size() self.input_dim = input_dim self.num_softmax = num_softmax self.dropout = dropout self.padding_idx = padding_idx if self.num_softmax > 1: self.prior_trans = nn.Linear(self.input_dim, self.num_softmax, bias=False) self.latent_trans = nn.Sequential( nn.Linear(self.input_dim, self.num_softmax * self.emb_size), nn.Tanh(), nn.Dropout(dropout)) else: if self.input_dim != self.emb_size: self.output_trans = nn.Sequential( nn.Linear(self.input_dim, self.emb_size, biase=True), nn.Dropout(dropout)) else: self.output_trans = nn.Sequential(Identity(), nn.Dropout(dropout))
def identity(self): return torch.nn.Sequential([Identity()])
def train(): if not osp.exists(SAVED_MODEL_DIR): os.makedirs(SAVED_MODEL_DIR) if not osp.exists(LOG_PATH): os.makedirs(LOG_PATH) writer = SummaryWriter(log_dir=LOG_PATH) cudnn.enabled = True train_loader = data.DataLoader(WSI_Dataset(dir=TRAIN_VAL_DIR, id_list_file=TRAIN_ID_LIST_FILE, batch_size=BATCH_SIZE, transform=transform_pipe_train), batch_size=1) validation_loader = data.DataLoader(WSI_Dataset( dir=TRAIN_VAL_DIR, id_list_file=VAL_ID_LIST_FILE, batch_size=BATCH_SIZE, transform=transform_pipe_val_test), batch_size=1) print("Total training wsi: " + str(len(train_loader))) print("Total validation wsi: " + str(len(validation_loader))) resnet50_model = torchvision.models.resnet50(pretrained=True) #remove the final fully connected layer to suite the problem resnet50_model.fc = Identity() patch_transformer = PatchTransformationModule() multi_tag_attention = MultitagAttentionModule() # if torch.cuda.device_count() > 1: # print("using " + str(torch.cuda.device_count()) + "GPUs...") # resnet50_model = nn.DataParallel(resnet50_model) # patch_transformer = nn.DataParallel(patch_transformer) # multi_tag_attention = nn.DataParallel(multi_tag_attention) resnet50_model.train() patch_transformer.train() multi_tag_attention.train() resnet50_model.to(GPU_DEVICE) patch_transformer.to(GPU_DEVICE) multi_tag_attention.to(GPU_DEVICE) cudnn.benchmark = True optimizer_resnet = torch.optim.Adam(resnet50_model.parameters(), lr=LEARNING_RATE_RESNET) optimizer_patch_transformer = torch.optim.Adam( patch_transformer.parameters(), lr=LEARNING_RATE_PATCH_TRANSFORMER) optimizer_multi_tag_attention = torch.optim.Adam( multi_tag_attention.parameters(), lr=LEARNING_RATE_MULTI_TAG_ATTENTION) optimizer_resnet.zero_grad() optimizer_patch_transformer.zero_grad() optimizer_multi_tag_attention.zero_grad() # weight = np.array([1 / float(71), 1 / float(132), 1 / float(411), 1 / float(191)]) # weight_tensor = torch.from_numpy(weight).float().to(GPU_DEVICE) # mce_loss = torch.nn.CrossEntropyLoss(weight=weight_tensor) mce_loss = torch.nn.CrossEntropyLoss() softmax = torch.nn.Softmax(dim=1) best_val_accuracy = 0.0 for epoch in range(1, EPOCHS + 1): resnet50_model.train() patch_transformer.train() multi_tag_attention.train() for iter, batch in enumerate(train_loader): image, label, name = batch image = np.squeeze(image).float() image = image.to(GPU_DEVICE) label = label.to(GPU_DEVICE) optimizer_resnet.zero_grad() optimizer_patch_transformer.zero_grad() optimizer_multi_tag_attention.zero_grad() visual_features = resnet50_model(image) # MxD print(visual_features.shape) patch_transformer_features = patch_transformer( visual_features) # MxD print(patch_transformer_features.shape) prediction = multi_tag_attention(patch_transformer_features) # 1x4 print(prediction.shape) loss = mce_loss(prediction, label) loss.backward() loss_value = loss.data.cpu().numpy() optimizer_resnet.step() optimizer_patch_transformer.step() optimizer_multi_tag_attention.step() predicted_class = torch.max(softmax(prediction), 1)[1] print( "epoch:{0:3d}, iter:{1:4d}, loss:{2:.3f}, prediction/label:{3:1d}/{4:1d}" .format(epoch, iter + 1, loss_value, predicted_class.data.cpu().numpy()[0], label.data.cpu().numpy()[0])) #validation resnet50_model.eval() patch_transformer.eval() multi_tag_attention.eval() print("validating...") val_loss, val_accuracy = validate(resnet50_model, patch_transformer, multi_tag_attention, validation_loader, mce_loss) print("val_loss: {0:.3f}, val_accuracy: {1:.3f}".format( val_loss, val_accuracy)) writer.add_scalar('validation loss', val_loss, epoch) writer.add_scalar('validation accuracy', val_accuracy, epoch) if (val_accuracy > best_val_accuracy): best_val_accuracy = val_accuracy print('saving best model so far...') torch.save( resnet50_model.state_dict(), osp.join(SAVED_MODEL_DIR, 'best_model_resnet50_' + str(epoch) + '.pth')) torch.save( patch_transformer.state_dict(), osp.join(SAVED_MODEL_DIR, 'best_model_patch_transformer' + str(epoch) + '.pth')) torch.save( multi_tag_attention.state_dict(), osp.join( SAVED_MODEL_DIR, 'best_model_multi_tag_attention_' + str(epoch) + '.pth'))
from torch.autograd import Variable from collections import namedtuple # this object will be needed to represent the discrete architecture extracted # from the architectural parameters. See the method genotype() below. Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat') # operations set OPS = { 'avg_pool_3x3': lambda C, stride: nn.AvgPool2d( 3, stride=stride, padding=1, count_include_pad=False), 'max_pool_3x3': lambda C, stride: nn.MaxPool2d(3, stride=stride, padding=1), 'skip_connect': lambda C, stride: Identity() if stride == 1 else FactorizedReduce(C, C), 'conv_3x3': lambda C, stride: Conv(C, C, 3, stride, 1), } PRIMITIVES = list(OPS.keys()) # operations set as list of strings class MixedOp(nn.Module): """Base class for the mixed operation.""" def __init__(self, C, stride): """ :C: int; number of filters in each convolutional operation :stride: int; stride of the convolutional/pooling kernel """ super(MixedOp, self).__init__()
def predict_and_evaluate(): validation_loader = data.DataLoader(WSI_Dataset(dir=TRAIN_VAL_DIR, id_list_file=VAL_ID_LIST_FILE, batch_size=BATCH_SIZE, transform=transform_pipe_val_test), batch_size=1) resnet50_model = torchvision.models.resnet50() # remove the final fully connected layer to suite the problem resnet50_model.fc = Identity() patch_transformer = PatchTransformationModule() multi_tag_attention = MultitagAttentionModule() resnet50_model.load_state_dict(torch.load(RESNET_BEST_MODEL_PATH)) patch_transformer.load_state_dict(torch.load(PATCH_TRANSFORMER_BEST_MODEL_PATH)) multi_tag_attention.load_state_dict(torch.load(MULTI_TAG_ATTENTION_BEST_MODEL_PATH)) resnet50_model.eval() patch_transformer.eval() multi_tag_attention.eval() resnet50_model.to(GPU_DEVICE) patch_transformer.to(GPU_DEVICE) multi_tag_attention.to(GPU_DEVICE) cudnn.benchmark = True mce_loss = torch.nn.CrossEntropyLoss() softmax = torch.nn.Softmax(dim=1) loss_sum = 0 correct_sum = 0 samples = len(validation_loader) for iter, batch in enumerate(validation_loader): image, label, name = batch image = np.squeeze(image).float() image = image.to(GPU_DEVICE) label = label.to(GPU_DEVICE) visual_features = resnet50_model(image) # MxD patch_transformer_features = patch_transformer(visual_features) # MxD prediction = multi_tag_attention(patch_transformer_features) # 1x4 loss = mce_loss(prediction, label) loss_value = loss.data.cpu().numpy() loss_sum += loss_value.sum() predicted_class = torch.max(softmax(prediction), 1)[1] num_corrects = torch.sum(predicted_class == label).data.cpu().numpy() correct_sum += num_corrects print("iter:{0:4d}, loss:{1:.3f}, prediction/label:{2:1d}/{3:1d}".format (iter + 1, loss_value, predicted_class.data.cpu().numpy()[0], label.data.cpu().numpy()[0])) val_loss = float(loss_sum) / float(samples) val_accuracy = float(correct_sum) / float(samples) print("Validation loss: " + str(val_loss)) print("Validation accuracy: " + str(val_accuracy))
import torch.nn as nn import torch.nn.functional as F from utils import ReLUConvBN, Conv, Identity, FactorizedReduce from torch.autograd import Variable from collections import namedtuple # this object will be needed to represent the discrete architecture extracted # from the architectural parameters. See the method genotype() below. Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat') # operations set OPS = { 'avg_pool_3x3' : lambda C, stride: nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False), 'max_pool_3x3' : lambda C, stride: nn.MaxPool2d(3, stride=stride, padding=1), 'skip_connect' : lambda C, stride: Identity() if stride == 1 else FactorizedReduce(C, C), 'conv_3x3' : lambda C, stride: Conv(C, C, 3, stride, 1), } PRIMITIVES = list(OPS.keys()) # operations set as list of strings class MixedOp(nn.Module): """Base class for the mixed operation.""" def __init__(self, C, stride): """ :C: int; number of filters in each convolutional operation :stride: int; stride of the convolutional/pooling kernel """ super(MixedOp, self).__init__() self._ops = nn.ModuleList() # iterate thtough the operation set and append them to self._ops
def __init__(self, num_classes): super().__init__() self._resnet = torchvision.models.resnet18(pretrained=True) self._resnet.fc = Identity() self._fc = torch.nn.Linear(in_features=512, out_features=num_classes)