Ejemplo n.º 1
0
    def __init__(self,
                 name,
                 render=False,
                 cuda=True,
                 n_steps=1,
                 gamma=0.99,
                 render_mode='rgb_array',
                 norm_rewards='none',
                 norm_states=False,
                 clip_obs=0,
                 clip_rew=0):

        super(BulletEnv, self).__init__()

        self.torch = torch.cuda if cuda else torch
        self.name = name
        self.render_mode = render_mode
        self.gamma = gamma
        # self.env = NormalizedActions(gym.make(name, render=render))
        # self.env = gym.make(name, render=render)
        self.env = gym.make(name)

        self.observation_space = self.env.observation_space
        self.action_space = self.env.action_space

        self.ns = self.observation_space.shape[0]

        # Support for state normalization or using time as a feature
        self.state_filter = Identity()
        if norm_states:
            self.state_filter = ZFilter(self.state_filter,
                                        shape=[self.ns],
                                        clip=clip_obs)
        # Support for rewards normalization
        self.reward_filter = Identity()
        if norm_rewards == "rewards":
            self.reward_filter = ZFilter(self.reward_filter,
                                         shape=(),
                                         center=False,
                                         clip=clip_rew)
        elif norm_rewards == "returns":
            self.reward_filter = RewardFilter(self.reward_filter,
                                              shape=(),
                                              gamma=self.gamma,
                                              clip=clip_rew)

        self.n_steps = n_steps
        self.gamma = gamma**(
            n_steps - self.torch.FloatTensor(n_steps).fill_(1).cumsum(0))
        self.render = render
        self.rewards = list(self.torch.FloatTensor(n_steps, 1).zero_())
        self.terminals = list(self.torch.FloatTensor(n_steps, 1).zero_())
        self.reset()
Ejemplo n.º 2
0
    def __init__(self, num_classes):
        super().__init__()

        self._resnet = torchvision.models.resnet18(pretrained=True)
        self._resnet.fc = Identity()

        self._linear_means = torch.nn.Linear(512, num_classes)
        self._linear_log_vars = torch.nn.Linear(512, num_classes)
Ejemplo n.º 3
0
def get_model(args):
    """Use model pretrained on UCF101
    """
    model = resnet.resnet18(num_classes=args.num_classes,
                            shortcut_type='A',
                            sample_size=args.sample_size,
                            sample_duration=args.sample_duration)
    model = model.cuda()
    model = nn.DataParallel(model, device_ids=None)
    print('Use pretrained model {}'.format(args.pretrain_path))
    pretrain = torch.load(args.pretrain_path)
    if 'backbone' in pretrain.keys():
        model.module.fc = Identity()
        model.load_state_dict(pretrain['backbone'])
    else:
        model.load_state_dict(pretrain['state_dict'])
        model.module.fc = Identity()

    return model
Ejemplo n.º 4
0
    def __init__(self,
                 encoder_archi=None,
                 decoder_archi=None,
                 predictor_structure=[]):
        """Instantiate the class.
        
        Parameters
        -----------
            encoder_archi : None or dict
                If dict :
                    Used to build an LSTM module so specify all obligatory parameters
                    with keywords. Please check torch.nn.LSTM documentation
                If None:
                    The encoder module is Idendity (returns the input)

            decoder_archi : None or dict
                If dict :
                    Used to build an LSTM module so specify all obligatory parameters
                    with keywords. Please check torch.nn.LSTM documentation
                If None:
                    The encoder module is Idendity (returns the input)
            
            predictor_structure :
                List of fully connected layers. Check utils.fc_block documentation.

        """
        super(KeyWordSelectionModel, self).__init__()
        self.encoder = Identity() if encoder_archi is None else nn.LSTM(
            **encoder_archi)
        self.decoder = Identity() if decoder_archi is None else nn.LSTM(
            **decoder_archi)
        self.predictor = nn.Sequential(
            *fc_block(predictor_structure[0], predictor_structure[1:]),
            nn.Sigmoid())

        self.encoder_archi = encoder_archi
        self.decoder_archi = decoder_archi
        self.predictor_structure = predictor_structure

        self.x_sizes = None
    def __init__(self,
                 embedding,
                 input_dim,
                 num_softmax=1,
                 dropout=0,
                 padding_idx=-1):
        """Initialize output layer.
        Args:
            embedding: (nn.Module)  : the word embedding module
            input_dim: (int)        : input dimension of OutputLayer
            num_softmax: (int)      : (default 1) number of softmaxes to calculate.
                                      see arxiv.org/abs/1711.03953 for more info
                                      increasing this can add more expressiveness
                                      of prediction.
            dropout: (float)        : (defaul 0.0) dropout ratio
            padding_idx (int)       : (default -1) model should output a large negative 
                                      number for score at this index, if set to -1 ,
                                      it is disabled. if >= 0, always outputs -1e20 at 
                                      this index.
        """
        super().__init__()
        self.embedding = embedding
        self.num_vocab, self.emb_size = embedding.weight.size()
        self.input_dim = input_dim
        self.num_softmax = num_softmax
        self.dropout = dropout
        self.padding_idx = padding_idx

        if self.num_softmax > 1:
            self.prior_trans = nn.Linear(self.input_dim,
                                         self.num_softmax,
                                         bias=False)
            self.latent_trans = nn.Sequential(
                nn.Linear(self.input_dim, self.num_softmax * self.emb_size),
                nn.Tanh(), nn.Dropout(dropout))
        else:
            if self.input_dim != self.emb_size:
                self.output_trans = nn.Sequential(
                    nn.Linear(self.input_dim, self.emb_size, biase=True),
                    nn.Dropout(dropout))
            else:
                self.output_trans = nn.Sequential(Identity(),
                                                  nn.Dropout(dropout))
Ejemplo n.º 6
0
 def identity(self):
     return torch.nn.Sequential([Identity()])
Ejemplo n.º 7
0
def train():
    if not osp.exists(SAVED_MODEL_DIR):
        os.makedirs(SAVED_MODEL_DIR)

    if not osp.exists(LOG_PATH):
        os.makedirs(LOG_PATH)

    writer = SummaryWriter(log_dir=LOG_PATH)
    cudnn.enabled = True

    train_loader = data.DataLoader(WSI_Dataset(dir=TRAIN_VAL_DIR,
                                               id_list_file=TRAIN_ID_LIST_FILE,
                                               batch_size=BATCH_SIZE,
                                               transform=transform_pipe_train),
                                   batch_size=1)

    validation_loader = data.DataLoader(WSI_Dataset(
        dir=TRAIN_VAL_DIR,
        id_list_file=VAL_ID_LIST_FILE,
        batch_size=BATCH_SIZE,
        transform=transform_pipe_val_test),
                                        batch_size=1)

    print("Total training wsi: " + str(len(train_loader)))
    print("Total validation wsi: " + str(len(validation_loader)))

    resnet50_model = torchvision.models.resnet50(pretrained=True)

    #remove the final fully connected layer to suite the problem
    resnet50_model.fc = Identity()

    patch_transformer = PatchTransformationModule()
    multi_tag_attention = MultitagAttentionModule()

    # if torch.cuda.device_count() > 1:
    #     print("using " + str(torch.cuda.device_count()) + "GPUs...")
    #     resnet50_model = nn.DataParallel(resnet50_model)
    #     patch_transformer = nn.DataParallel(patch_transformer)
    #     multi_tag_attention = nn.DataParallel(multi_tag_attention)

    resnet50_model.train()
    patch_transformer.train()
    multi_tag_attention.train()

    resnet50_model.to(GPU_DEVICE)
    patch_transformer.to(GPU_DEVICE)
    multi_tag_attention.to(GPU_DEVICE)

    cudnn.benchmark = True

    optimizer_resnet = torch.optim.Adam(resnet50_model.parameters(),
                                        lr=LEARNING_RATE_RESNET)
    optimizer_patch_transformer = torch.optim.Adam(
        patch_transformer.parameters(), lr=LEARNING_RATE_PATCH_TRANSFORMER)
    optimizer_multi_tag_attention = torch.optim.Adam(
        multi_tag_attention.parameters(), lr=LEARNING_RATE_MULTI_TAG_ATTENTION)

    optimizer_resnet.zero_grad()
    optimizer_patch_transformer.zero_grad()
    optimizer_multi_tag_attention.zero_grad()

    # weight = np.array([1 / float(71), 1 / float(132), 1 / float(411), 1 / float(191)])
    # weight_tensor = torch.from_numpy(weight).float().to(GPU_DEVICE)
    # mce_loss = torch.nn.CrossEntropyLoss(weight=weight_tensor)
    mce_loss = torch.nn.CrossEntropyLoss()

    softmax = torch.nn.Softmax(dim=1)
    best_val_accuracy = 0.0

    for epoch in range(1, EPOCHS + 1):
        resnet50_model.train()
        patch_transformer.train()
        multi_tag_attention.train()

        for iter, batch in enumerate(train_loader):
            image, label, name = batch
            image = np.squeeze(image).float()
            image = image.to(GPU_DEVICE)
            label = label.to(GPU_DEVICE)

            optimizer_resnet.zero_grad()
            optimizer_patch_transformer.zero_grad()
            optimizer_multi_tag_attention.zero_grad()

            visual_features = resnet50_model(image)  # MxD
            print(visual_features.shape)

            patch_transformer_features = patch_transformer(
                visual_features)  # MxD
            print(patch_transformer_features.shape)

            prediction = multi_tag_attention(patch_transformer_features)  # 1x4
            print(prediction.shape)

            loss = mce_loss(prediction, label)
            loss.backward()
            loss_value = loss.data.cpu().numpy()

            optimizer_resnet.step()
            optimizer_patch_transformer.step()
            optimizer_multi_tag_attention.step()

            predicted_class = torch.max(softmax(prediction), 1)[1]

            print(
                "epoch:{0:3d}, iter:{1:4d}, loss:{2:.3f}, prediction/label:{3:1d}/{4:1d}"
                .format(epoch, iter + 1, loss_value,
                        predicted_class.data.cpu().numpy()[0],
                        label.data.cpu().numpy()[0]))

        #validation
        resnet50_model.eval()
        patch_transformer.eval()
        multi_tag_attention.eval()

        print("validating...")
        val_loss, val_accuracy = validate(resnet50_model, patch_transformer,
                                          multi_tag_attention,
                                          validation_loader, mce_loss)

        print("val_loss: {0:.3f}, val_accuracy: {1:.3f}".format(
            val_loss, val_accuracy))

        writer.add_scalar('validation loss', val_loss, epoch)
        writer.add_scalar('validation accuracy', val_accuracy, epoch)

        if (val_accuracy > best_val_accuracy):
            best_val_accuracy = val_accuracy

            print('saving best model so far...')
            torch.save(
                resnet50_model.state_dict(),
                osp.join(SAVED_MODEL_DIR,
                         'best_model_resnet50_' + str(epoch) + '.pth'))
            torch.save(
                patch_transformer.state_dict(),
                osp.join(SAVED_MODEL_DIR,
                         'best_model_patch_transformer' + str(epoch) + '.pth'))
            torch.save(
                multi_tag_attention.state_dict(),
                osp.join(
                    SAVED_MODEL_DIR,
                    'best_model_multi_tag_attention_' + str(epoch) + '.pth'))
from torch.autograd import Variable
from collections import namedtuple

# this object will be needed to represent the discrete architecture extracted
# from the architectural parameters. See the method genotype() below.
Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')

# operations set
OPS = {
    'avg_pool_3x3':
    lambda C, stride: nn.AvgPool2d(
        3, stride=stride, padding=1, count_include_pad=False),
    'max_pool_3x3':
    lambda C, stride: nn.MaxPool2d(3, stride=stride, padding=1),
    'skip_connect':
    lambda C, stride: Identity() if stride == 1 else FactorizedReduce(C, C),
    'conv_3x3':
    lambda C, stride: Conv(C, C, 3, stride, 1),
}

PRIMITIVES = list(OPS.keys())  # operations set as list of strings


class MixedOp(nn.Module):
    """Base class for the mixed operation."""
    def __init__(self, C, stride):
        """
        :C: int; number of filters in each convolutional operation
        :stride: int; stride of the convolutional/pooling kernel
        """
        super(MixedOp, self).__init__()
def predict_and_evaluate():
    validation_loader = data.DataLoader(WSI_Dataset(dir=TRAIN_VAL_DIR,
                                                    id_list_file=VAL_ID_LIST_FILE,
                                                    batch_size=BATCH_SIZE,
                                                    transform=transform_pipe_val_test),
                                        batch_size=1)

    resnet50_model = torchvision.models.resnet50()

    # remove the final fully connected layer to suite the problem
    resnet50_model.fc = Identity()

    patch_transformer = PatchTransformationModule()
    multi_tag_attention = MultitagAttentionModule()

    resnet50_model.load_state_dict(torch.load(RESNET_BEST_MODEL_PATH))
    patch_transformer.load_state_dict(torch.load(PATCH_TRANSFORMER_BEST_MODEL_PATH))
    multi_tag_attention.load_state_dict(torch.load(MULTI_TAG_ATTENTION_BEST_MODEL_PATH))

    resnet50_model.eval()
    patch_transformer.eval()
    multi_tag_attention.eval()

    resnet50_model.to(GPU_DEVICE)
    patch_transformer.to(GPU_DEVICE)
    multi_tag_attention.to(GPU_DEVICE)

    cudnn.benchmark = True

    mce_loss = torch.nn.CrossEntropyLoss()
    softmax = torch.nn.Softmax(dim=1)

    loss_sum = 0
    correct_sum = 0
    samples = len(validation_loader)

    for iter, batch in enumerate(validation_loader):
        image, label, name = batch
        image = np.squeeze(image).float()
        image = image.to(GPU_DEVICE)
        label = label.to(GPU_DEVICE)

        visual_features = resnet50_model(image)  # MxD
        patch_transformer_features = patch_transformer(visual_features)  # MxD
        prediction = multi_tag_attention(patch_transformer_features)  # 1x4

        loss = mce_loss(prediction, label)
        loss_value = loss.data.cpu().numpy()
        loss_sum += loss_value.sum()

        predicted_class = torch.max(softmax(prediction), 1)[1]
        num_corrects = torch.sum(predicted_class == label).data.cpu().numpy()
        correct_sum += num_corrects

        print("iter:{0:4d}, loss:{1:.3f}, prediction/label:{2:1d}/{3:1d}".format
              (iter + 1, loss_value, predicted_class.data.cpu().numpy()[0], label.data.cpu().numpy()[0]))

    val_loss = float(loss_sum) / float(samples)
    val_accuracy = float(correct_sum) / float(samples)

    print("Validation loss: " + str(val_loss))
    print("Validation accuracy: " + str(val_accuracy))
Ejemplo n.º 10
0
import torch.nn as nn
import torch.nn.functional as F
from utils import ReLUConvBN, Conv, Identity, FactorizedReduce
from torch.autograd import Variable
from collections import namedtuple

# this object will be needed to represent the discrete architecture extracted
# from the architectural parameters. See the method genotype() below.
Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')

# operations set
OPS = {
    'avg_pool_3x3' : lambda C, stride: nn.AvgPool2d(3, stride=stride,
                                                    padding=1, count_include_pad=False),
    'max_pool_3x3' : lambda C, stride: nn.MaxPool2d(3, stride=stride, padding=1),
    'skip_connect' : lambda C, stride: Identity() if stride == 1 else FactorizedReduce(C, C),
    'conv_3x3'     : lambda C, stride: Conv(C, C, 3, stride, 1),
}

PRIMITIVES = list(OPS.keys()) # operations set as list of strings

class MixedOp(nn.Module):
    """Base class for the mixed operation."""
    def __init__(self, C, stride):
        """
        :C: int; number of filters in each convolutional operation
        :stride: int; stride of the convolutional/pooling kernel
        """
        super(MixedOp, self).__init__()
        self._ops = nn.ModuleList()
        # iterate thtough the operation set and append them to self._ops
Ejemplo n.º 11
0
    def __init__(self, num_classes):
        super().__init__()

        self._resnet = torchvision.models.resnet18(pretrained=True)
        self._resnet.fc = Identity()
        self._fc = torch.nn.Linear(in_features=512, out_features=num_classes)