Beispiel #1
0
    def __init__(self):
        super(Net, self).__init__()

        #name
        self.name = "DirCNNh5"
        #optimizer
        self.lr = 0.001
        self.optimizer_name = 'Adam-Exp'

        #data
        #self.data_name = "ModelNet10"
        self.data_name = "Geometry"
        self.batch_size = 20
        self.nr_points = 1024
        self.nr_classes = 10 if self.data_name == 'ModelNet10' else 40

        #train_info
        self.max_epochs = 301
        self.save_every = 100

        #model
        self.k = 20
        self.l = 7
        
        # DD1
        self.in_size = 3
        self.out_size = 64
        layers = []
        layers.append(Linear(self.in_size, 64))
        layers.append(ReLU())
        layers.append(torch.nn.BatchNorm1d(64))
        layers.append(Linear(64 , 64))
        layers.append(ReLU())
        layers.append(torch.nn.BatchNorm1d(64))
        layers.append(Linear(64, self.out_size))
        layers.append(ReLU())
        layers.append(torch.nn.BatchNorm1d(self.out_size))
        dense3dnet = Sequential(*layers)
        self.dd = DD(l = self.l,
                        k = self.k,
                        mlp = dense3dnet,
                        conv_p  = True,
                        conv_fc = False,
                        conv_fn = False,
                        out_3d  = True)

        # DD2
        self.in_size_2 =  64*3
        self.out_size_2 = 128
        layers2 = []
        layers2.append(Linear(self.in_size_2, self.out_size_2))
        layers2.append(ReLU())
        layers2.append(torch.nn.BatchNorm1d(self.out_size_2))
        dense3dnet2 = Sequential(*layers2)
        self.dd2 = DD(l = self.l,
                        k = self.k,
                        mlp = dense3dnet2,
                        conv_p  = False,
                        conv_fc = False,
                        conv_fn = True,
                        out_3d  = False)


        self.nn1 = torch.nn.Linear(self.out_size_2, 1024)
        self.bn1 = torch.nn.BatchNorm1d(1024)
        self.nn2 = torch.nn.Linear(1024, 512)
        self.bn2 = torch.nn.BatchNorm1d(512)
        self.nn3 = torch.nn.Linear(512, 265)
        self.bn3 = torch.nn.BatchNorm1d(265)
        self.nn4 = torch.nn.Linear(265, self.nr_classes)

        self.sm = torch.nn.LogSoftmax(dim=1)
Beispiel #2
0
 def __init__(self, in_features, out_features):
     super(ComplexLinear, self).__init__()
     self.fc_r = Linear(in_features, out_features)
     self.fc_i = Linear(in_features, out_features)
Beispiel #3
0
 def make_model_and_optim():
     model = Linear(in_dim, 2, bias=False)
     model = model.cuda()
     optim = AdaScale(SGD(model.parameters(), lr=0.1, momentum=0.9),
                      num_gradients_to_accumulate=accum_steps)
     return model, optim
Beispiel #4
0
    def __init__(
            self,
            embed_dim=None,  # type: Optional[int]
            num_heads=1,  # type: int
            dropout=0.0,  # type: float
            bias=True,  # type: bool
            add_bias_kv=False,  # type: bool
            add_zero_attn=False,  # type: bool
            kdim=None,  # type: Optional[int]
            vdim=None,  # type: Optional[int]
            head_dim=None,  # type: Optional[int]
            pattern_dim=None,  # type: Optional[int]
            out_dim=None,  # type: Optional[int]
            disable_out_projection=False,  # type: bool
            key_as_static=False,  # type: bool
            query_as_static=False,  # type: bool
            value_as_static=False,  # type: bool
            value_as_connected=False,  # type: bool
            normalize_pattern=False,  # type: bool
            normalize_pattern_affine=False  # type: bool
    ):
        super(HopfieldCore, self).__init__()

        assert (type(key_as_static)
                == bool) and (type(query_as_static)
                              == bool) and (type(value_as_static) == bool)
        self.key_as_static, self.query_as_static, self.value_as_static = key_as_static, query_as_static, value_as_static
        num_non_static = 3 - (self.key_as_static + self.query_as_static +
                              self.value_as_static)
        assert 0 <= num_non_static < 4

        self.value_as_connected = value_as_connected
        self.normalize_pattern, self.normalize_pattern_affine = normalize_pattern, normalize_pattern_affine
        self.disable_out_projection = disable_out_projection

        # In case of a static-only executions, check corresponding projections and normalizations.
        self.static_execution = self._check_execution_mode()
        if self.static_execution:
            embed_dim, kdim, vdim = None, None, None
        if embed_dim is None:
            assert self.static_execution, r'static-only execution requires all projections to be deactivated.'

        # Check and set all other properties, conditioned on <static_execution>.
        self.embed_dim = embed_dim
        self.kdim = kdim if kdim is not None else embed_dim
        self.vdim = vdim if vdim is not None else embed_dim
        self._qkv_same_embed_dim = all(
            (self.kdim == embed_dim, self.vdim == embed_dim,
             pattern_dim is None, not self.value_as_connected))
        assert (not self.value_as_connected) or (
            self.kdim
            == self.vdim), r'key and value need to be of same dimension.'

        self.num_heads = num_heads
        self.dropout = dropout
        self.head_dim = None
        self.pattern_dim = pattern_dim
        self.virtual_hopfield_dim = None
        self.virtual_pattern_dim = None
        if not self.static_execution:
            if head_dim is None:
                self.head_dim = embed_dim // num_heads
                assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads."
            else:
                assert head_dim > 0, "dimension of the association space has to be positive."
                self.head_dim = head_dim
            if self.pattern_dim is None:
                self.pattern_dim = self.head_dim
            self.virtual_hopfield_dim = self.num_heads * self.head_dim
            self.virtual_pattern_dim = self.num_heads * self.pattern_dim

        self.out_dim = embed_dim if out_dim is None else out_dim
        assert disable_out_projection or (
            self.out_dim >
            0), "output projection dimension has to be positive."

        if normalize_pattern_affine:
            assert normalize_pattern, "affine pattern normalization without pattern normalization has no effect."
            self.p_norm_weight = Parameter(torch.Tensor(head_dim))
            self.p_norm_bias = Parameter(torch.Tensor(head_dim))
        else:
            self.register_parameter('p_norm_weight', None)
            self.register_parameter('p_norm_bias', None)

        if self._qkv_same_embed_dim is False:
            if query_as_static:
                self.register_parameter('q_proj_weight', None)
            else:
                self.q_proj_weight = Parameter(
                    torch.Tensor(self.virtual_hopfield_dim, embed_dim))
            if key_as_static:
                self.register_parameter('k_proj_weight', None)
            else:
                self.k_proj_weight = Parameter(
                    torch.Tensor(self.virtual_hopfield_dim, self.kdim))
            if value_as_static:
                self.register_parameter('v_proj_weight', None)
            else:
                self.v_proj_weight = Parameter(
                    torch.Tensor(
                        self.virtual_pattern_dim, self.virtual_hopfield_dim if
                        (value_as_connected
                         and not key_as_static) else self.vdim))
            self.register_parameter('in_proj_weight', None)
        else:
            if num_non_static > 0:
                self.in_proj_weight = Parameter(
                    torch.empty(
                        (not query_as_static) * self.virtual_hopfield_dim +
                        (not key_as_static) * self.virtual_hopfield_dim +
                        (not value_as_static) * self.virtual_pattern_dim,
                        embed_dim))
            else:
                self.register_parameter('in_proj_weight', None)
            self.register_parameter('q_proj_weight', None)
            self.register_parameter('k_proj_weight', None)
            self.register_parameter('v_proj_weight', None)

        if bias and (num_non_static > 0):
            self.in_proj_bias = Parameter(
                torch.empty((not query_as_static) * self.virtual_hopfield_dim +
                            (not key_as_static) * self.virtual_hopfield_dim +
                            self.virtual_pattern_dim))
        else:
            self.register_parameter('in_proj_bias', None)
        if disable_out_projection:
            self.register_parameter('out_proj', None)
        else:
            if bias and _LinearWithBias is not None:
                self.out_proj = _LinearWithBias(self.virtual_pattern_dim,
                                                self.out_dim)
            else:
                self.out_proj = Linear(self.virtual_pattern_dim,
                                       self.out_dim,
                                       bias=bias)

        self.bias_k, self.bias_v = None, None
        if add_bias_kv:
            if not key_as_static:
                self.bias_k = Parameter(
                    torch.empty(1, 1, self.virtual_hopfield_dim))
            if not value_as_static:
                self.bias_v = Parameter(
                    torch.empty(1, 1, self.virtual_hopfield_dim))
            assert not (self.bias_k is None and self.bias_v is None
                        ), r'cannot set key/value bias if both are static.'

        self.add_zero_attn = add_zero_attn
        self.reset_parameters()
Beispiel #5
0
Compute the gradient with PyTorch and the gradient variance with BackPACK.
"""

from torch.nn import CrossEntropyLoss, Flatten, Linear, Sequential

from backpack import backpack, extend, extensions
from backpack.utils.examples import load_mnist_data

B = 4
X, y = load_mnist_data(B)

print("# Gradient with PyTorch, gradient variance with BackPACK | B =", B)

model = Sequential(
    Flatten(),
    Linear(784, 10),
)
lossfunc = CrossEntropyLoss()

model = extend(model)
lossfunc = extend(lossfunc)

loss = lossfunc(model(X), y)

with backpack(extensions.Variance()):
    loss.backward()

for name, param in model.named_parameters():
    print(name)
    print(".grad.shape:             ", param.grad.shape)
    print(".variance.shape:         ", param.variance.shape)
    def __init__(self, D_key, D_query):
        super(AttentionLayer, self).__init__()

        self.W_k = Linear(D_key, D_query, bias=False)
        self.W_q = Linear(D_key + D_query, D_query, bias=False)
Beispiel #7
0
 def __init__(self, state_dim, action_dim):
     super().__init__()
     self.model = Sequential(Linear(state_dim + action_dim, 64),
                             LeakyReLU(), Linear(64, 32), LeakyReLU(),
                             Linear(32, 1))
Beispiel #8
0
import torch
from torch.optim import Adam
from torch.nn.functional import cross_entropy
from collections import OrderedDict
from torch.nn import Linear, ReLU, Sequential

def classify_target(x, y):
    return (y > (x * 3).sin()).long()

mlp = torch.nn.Sequential(OrderedDict([
    ('layer1', Sequential(Linear(2, 20), ReLU())),
    ('layer2', Sequential(Linear(20, 20), ReLU())),
    ('layer3', Sequential(Linear(20, 2)))
]))

mlp.cuda()

optimizer = Adam(mlp.parameters(), lr=0.01)
for iteration in range(1024):
    in_batch = torch.randn(10000, 2, device='cuda')
    target_batch = classify_target(in_batch[:,0], in_batch[:,1])
    out_batch = mlp(in_batch)
    loss = cross_entropy(out_batch, target_batch)
    if iteration > 0:
        mlp.zero_grad()
        loss.backward()
        optimizer.step()
    if iteration == 2 ** iteration.bit_length() - 1:
        pred_batch = out_batch.max(1)[1]
        accuracy = (pred_batch == target_batch).float().sum() / len(in_batch)
        print(f'Iteration {iteration} accuracy: {accuracy}')
 def __init__(self):
     super(Net, self).__init__()
     self.hidden_layer = Linear(1, 20)
     self.out_layer = Linear(20, 1)
Beispiel #10
0
 def __init__(self, in_channels):
     super().__init__()
     self.lin_src = Linear(in_channels, in_channels)
     self.lin_dst = Linear(in_channels, in_channels)
     self.lin_final = Linear(in_channels, 1)
Beispiel #11
0
print("b shape", b.shape)


def forward(x):
    yhat = w * x + b
    return yhat


x = torch.tensor([[1.0], [2.0], [3.0]])
yhat = forward(x)
print("The Prediction: ", yhat)
print("Y size", yhat.shape)

torch.manual_seed(1)

lr = Linear(in_features=1, out_features=1, bias=True)
print("Parameters w and b: ", list(lr.parameters()))

print("Python Dictionary", lr.state_dict())
print("keys:", lr.state_dict().keys())
print("values:", lr.state_dict().values())

print("weight:", lr.weight)
print("bias:", lr.bias)

x = torch.tensor([[1.0]])
yhat = lr(x)
print("The prediction: ", yhat)

x = torch.tensor([[1.0], [2.0]])
yhat = lr(x)
    def __init__(self,
                 pretrained="",
                 checkpoint_path=None,
                 freeze_nlayers=0,
                 round_at: float = None,
                 demo_mode=False):
        super(S20DeconvToDrySpotEff2, self).__init__()
        self.ct1 = ConvTranspose2d(1, 256, 3, stride=2)
        self.ct2 = ConvTranspose2d(256, 128, 5, stride=2)
        self.ct3 = ConvTranspose2d(128, 64, 10, stride=2)
        self.ct4 = ConvTranspose2d(64, 16, 17, stride=2)

        self.details = Conv2d(16, 8, 5)
        # ^ Pretrained ^
        self.c2 = Conv2d(8, 16, 13)
        self.c3 = Conv2d(16, 64, 7)
        self.c4 = Conv2d(64, 128, 3)
        self.c5 = Conv2d(128, 256, 3)
        self.c6 = Conv2d(256, 512, 3)
        self.c7 = Conv2d(512, 512, 1)

        self.maxpool = nn.MaxPool2d(2, 2)
        self.lin1 = Linear(1024, 256)
        self.lin2 = Linear(256, 1)

        self.dropout = nn.Dropout(0.3)
        # self.bn8 = nn.BatchNorm2d(8)
        # self.bn512 = nn.BatchNorm2d(512)
        self.round_at = round_at

        self.demo_mode = demo_mode

        if pretrained == "deconv_weights":
            logger = logging.getLogger(__name__)
            weights = load_model_layers_from_path(
                path=checkpoint_path,
                layer_names={'ct1', 'ct2', 'ct3', 'ct4', 'details'})
            incomp = self.load_state_dict(weights, strict=False)
            logger.debug(f'All layers: {self.state_dict().keys()}')
            logger.debug(f'Loaded weights but the following: {incomp}')
        if pretrained == "all":
            logger = logging.getLogger(__name__)
            weights = load_model_layers_from_path(path=checkpoint_path,
                                                  layer_names={
                                                      'ct1', 'ct2', 'ct3',
                                                      'ct4', 'details', 'c2',
                                                      'c3', 'c4', 'c5', 'c6',
                                                      'c7', 'lin1', 'lin2'
                                                  })
            incomp = self.load_state_dict(weights, strict=False)
            logger.debug(f'All layers: {self.state_dict().keys()}')
            logger.debug(f'Loaded weights but the following: {incomp}')

        if freeze_nlayers == 0:
            return

        for i, c in enumerate(self.children()):
            logger = logging.getLogger(__name__)
            logger.info(f'Freezing: {c}')

            for param in c.parameters():
                param.requires_grad = False
            if i == freeze_nlayers - 1:
                break
Beispiel #13
0
 def __init__(self,
              input_size: int,
              input_module_class: Callable,
              rnn_module_class: Callable,
              output_size: int,
              option_size: int,
              rnn_size: int,
              intra_option_policy: str,
              intra_option_kwargs: [dict, None] = None,
              input_module_kwargs: [dict, None] = None,
              use_interest: bool = False,
              use_diversity: bool = False,
              use_attention: bool = False,
              baselines_init: bool = True,
              prev_action: np.ndarray = np.ones(5, dtype=bool),
              prev_reward: np.ndarray = np.ones(5, dtype=bool),
              prev_option: np.ndarray = np.zeros(5, dtype=bool),
              NORM_EPS: float = 1e-6):
     super().__init__()
     if input_module_kwargs is None:
         input_module_kwargs = {
         }  # Assume module has all necessary arguments
     if intra_option_kwargs is None: intra_option_kwargs = {}
     input_module_kwargs = {
         **input_module_kwargs,
         **{
             'input_size': input_size
         }
     }  # Add input size
     intra_option_kwargs = {**intra_option_kwargs}
     self.use_interest = use_interest
     self.use_diversity = use_diversity
     self.use_attention = use_attention
     self.NORM_EPS = NORM_EPS
     pi_class = DiscreteIntraOptionPolicy if intra_option_policy == 'discrete' else ContinuousIntraOptionPolicy
     # Instantiate independent preprocessors for pi, pi_omega, q (and entropy), interest, and termination heads
     self.pi_proc, self.pi_omega_proc, self.q_proc, self.beta_proc = [
         input_module_class(**input_module_kwargs) for _ in range(4)
     ]
     self.int_proc = input_module_class(
         **input_module_kwargs) if use_interest else Dummy(option_size)
     if baselines_init:
         self.pi_proc.apply(apply_init)
         self.pi_omega_proc.apply(apply_init)
         self.q_proc.apply(apply_init)
         self.int_proc.apply(apply_init)
         self.beta_proc.apply(apply_init)
     input_size = self.pi_proc.output_size
     rnn_input_sizes = [
         input_size + prev_option[i] * option_size +
         prev_action[i] * output_size + prev_reward[i] for i in range(4)
     ]
     self.pi_rnn, self.beta_rnn, self.q_rnn, self.pi_omega_rnn = [
         rnn_module_class(s, rnn_size) for s in rnn_input_sizes
     ]
     self.int_rnn = rnn_module_class(
         input_size + prev_option[-1] * option_size +
         prev_action[-1] * output_size +
         prev_reward[-1], rnn_size) if use_interest else None
     if baselines_init:
         lstm_init = partial(apply_init, gain=O_INIT_VALUES['lstm'])
         self.pi_rnn.apply(lstm_init)
         self.pi_omega_rnn.apply(lstm_init)
         self.q_rnn.apply(lstm_init)
         self.beta_rnn.apply(lstm_init)
         if use_interest: self.int_rnn.apply(lstm_init)
     self.pi = Sequential(
         nn.ReLU(),
         pi_class(rnn_size,
                  option_size,
                  output_size,
                  ortho_init=baselines_init,
                  **intra_option_kwargs))
     self.beta = Sequential(nn.ReLU(), Linear(rnn_size, option_size),
                            nn.Sigmoid())
     self.q = Sequential(nn.ReLU(), Linear(rnn_size, option_size))
     self.q_ent = Sequential(nn.ReLU(), Linear(
         rnn_size, option_size)) if use_diversity else Dummy(option_size,
                                                             out_value=0.)
     self.pi_omega = Sequential(nn.ReLU(), Linear(rnn_size, option_size),
                                nn.Softmax(-1))
     self.interest = Sequential(
         nn.ReLU(), Linear(rnn_size, option_size),
         nn.Sigmoid()) if use_interest else Dummy(option_size)
     self.p_a, self.p_o, self.p_r = prev_action, prev_option, prev_reward
     if baselines_init:
         init_v, init_pi = O_INIT_VALUES['v'], O_INIT_VALUES['pi']
         self.beta[1].apply(apply_init)
         self.pi_omega[1].apply(partial(apply_init, gain=init_pi))
         self.q[1].apply(partial(apply_init, gain=init_v))
         if use_interest: self.interest[0].apply(apply_init)
         if use_diversity: self.q_ent.apply(apply_init)
Beispiel #14
0
 def __init__(self,
              input_size: int,
              input_module_class: Callable,
              output_size: int,
              option_size: int,
              intra_option_policy: str,
              intra_option_kwargs: [dict, None] = None,
              input_module_kwargs: [dict, None] = None,
              use_interest: bool = False,
              use_diversity: bool = False,
              use_attention: bool = False,
              baselines_init: bool = True,
              NORM_EPS: float = 1e-6):
     super().__init__()
     if input_module_kwargs is None:
         input_module_kwargs = {
         }  # Assume module has all necessary arguments
     if intra_option_kwargs is None: intra_option_kwargs = {}
     input_module_kwargs = {
         **input_module_kwargs,
         **{
             'input_size': input_size
         }
     }  # Add input size
     intra_option_kwargs = {**intra_option_kwargs}
     self.use_interest = use_interest
     self.use_diversity = use_diversity
     self.use_attention = use_attention
     self.NORM_EPS = NORM_EPS
     pi_class = DiscreteIntraOptionPolicy if intra_option_policy == 'discrete' else ContinuousIntraOptionPolicy
     # Instantiate independent preprocessors for pi, pi_omega, q (and entropy), interest, and termination heads
     pi_proc, pi_omega_proc, q_proc, int_proc, beta_proc = [
         input_module_class(**input_module_kwargs) for _ in range(5)
     ]
     if baselines_init:
         pi_proc.apply(apply_init)
         pi_omega_proc.apply(apply_init)
         q_proc.apply(apply_init)
         int_proc.apply(apply_init)
         beta_proc.apply(apply_init)
     input_size = pi_proc.output_size
     self.pi = Sequential(
         pi_proc,
         pi_class(input_size,
                  option_size,
                  output_size,
                  ortho_init=baselines_init,
                  **intra_option_kwargs))
     self.beta = Sequential(beta_proc, Linear(input_size, option_size),
                            nn.Sigmoid())
     self.q = Sequential(q_proc, Linear(input_size, option_size))
     self.q_ent = Sequential(q_proc, Linear(
         input_size, option_size)) if use_diversity else Dummy(option_size,
                                                               out_value=0.)
     self.pi_omega = Sequential(pi_omega_proc,
                                Linear(input_size, option_size),
                                nn.Softmax(-1))
     self.interest = Sequential(
         int_proc, Linear(input_size, option_size),
         nn.Sigmoid()) if use_interest else Dummy(option_size)
     if baselines_init:
         init_v, init_pi = O_INIT_VALUES['v'], O_INIT_VALUES['pi']
         self.beta[1].apply(apply_init)
         self.pi_omega[1].apply(partial(apply_init, gain=init_pi))
         self.q[1].apply(partial(apply_init, gain=init_v))
         if use_interest: self.interest[1].apply(apply_init)
         if use_diversity: self.q_ent[1].apply(apply_init)
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    train_size=0.8,
                                                    random_state=1)

order = y_train.argsort(axis=0)
y_train = y_train.values[order]
y_train = np.reshape(y_train, newshape=(y_train.shape[0], 1))
x_train = x_train.values[order, :]

x_train = torch.FloatTensor(x_train)
y_train = torch.FloatTensor(y_train)

Net = Sequential(
    # BatchNorm1d(num_features=2),
    Linear(in_features=2, out_features=10),
    ReLU(inplace=True),
    Linear(in_features=10, out_features=1),
)

optimizer = RMSprop(Net.parameters(), lr=0.001)
loss_func = MSELoss()

x_data, y_data = Variable(x_train), Variable(y_train)
bar = ProgressBar(1, STEPS, "train_loss:%.9f")

predict = []
myloss = []

for step in range(STEPS):
    prediction = Net(x_data)
 def __init__(self):
     super(LinearClassifier, self).__init__()
     self.fully_connected = Linear(2, 1)
Beispiel #17
0
)

from backpack import convert_module_to_backpack
from backpack.custom_module.branching import Parallel

SQRT_GGN_SETTINGS = SECONDORDER_SETTINGS

###############################################################################
#                               Embedding                                     #
###############################################################################
SQRT_GGN_SETTINGS += [
    {
        "input_fn": lambda: randint(0, 5, (6, )),
        "module_fn": lambda: Sequential(
            Embedding(5, 3),
            Linear(3, 4),
        ),
        "loss_function_fn": lambda: CrossEntropyLoss(reduction="mean"),
        "target_fn": lambda: classification_targets((6, ), 4),
    },
    {
        "input_fn": lambda: randint(0, 3, (3, 2, 2)),
        "module_fn": lambda: Sequential(
            Embedding(3, 2),
            Flatten(),
        ),
        "loss_function_fn": lambda: CrossEntropyLoss(reduction="mean"),
        "target_fn": lambda: classification_targets((3, ), 2 * 2),
        "seed": 1,
    },
]
Beispiel #18
0
def _test_create_supervised_trainer(
    model_device: Optional[str] = None,
    trainer_device: Optional[str] = None,
    trace: bool = False,
    amp_mode: str = None,
    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
):
    model = Linear(1, 1)

    if model_device:
        model.to(model_device)

    model.weight.data.zero_()
    model.bias.data.zero_()
    optimizer = SGD(model.parameters(), 0.1)

    if trace:
        example_input = torch.randn(1, 1)
        model = torch.jit.trace(model, example_input)

    if amp_mode == "apex" and model_device == trainer_device == "cuda":
        from apex import amp

        model, optimizer = amp.initialize(model, optimizer, opt_level="O2")

    trainer = create_supervised_trainer(
        model,
        optimizer,
        mse_loss,
        device=trainer_device,
        output_transform=lambda x, y, y_pred, loss: (y_pred, loss.item()),
        amp_mode=amp_mode,
        scaler=scaler,
    )

    x = torch.tensor([[0.1], [0.2]])
    y = torch.tensor([[0.3], [0.5]])
    data = [(x, y)]

    assert model.weight.data[0, 0].item() == approx(0.0)
    assert model.bias.item() == approx(0.0)

    if model_device == trainer_device or ((model_device == "cpu") ^
                                          (trainer_device == "cpu")):
        state = trainer.run(data)

        assert state.output[-1] == approx(0.17), state.output[-1]
        assert round(model.weight.data[0, 0].item(),
                     3) == approx(0.013), model.weight.item()
        assert round(model.bias.item(), 3) == approx(0.08), model.bias.item()

        if amp_mode == "amp":
            assert state.output[0].dtype is torch.half
            if scaler and isinstance(scaler, bool):
                assert hasattr(state, "scaler")
            else:
                assert not hasattr(state, "scaler")
    else:
        if LooseVersion(torch.__version__) >= LooseVersion("1.7.0"):
            # This is broken in 1.6.0 but will be probably fixed with 1.7.0
            with pytest.raises(
                    RuntimeError,
                    match=r"is on CPU, but expected them to be on GPU"):
                trainer.run(data)
Beispiel #19
0
 def __init__(self, observable_dim: int, delay: int,
              latent_dim: int) -> None:
     super().__init__()
     self.linear_embedder = Linear(in_features=observable_dim * delay,
                                   out_features=latent_dim)
Beispiel #20
0
    def __init__(self, input_dim: int, hidden_dim: int, output_dim: int) -> None:
        super().__init__()

        self.fc1 = Linear(input_dim, hidden_dim)
        self.fc2 = Linear(hidden_dim, output_dim)
Beispiel #21
0
    def __init__(self, dim):
        super(NetGIN, self).__init__()

        self.node_attribute_encoder = Sequential(Linear(2 * 13, dim),
                                                 torch.nn.BatchNorm1d(dim),
                                                 ReLU(), Linear(dim, dim),
                                                 torch.nn.BatchNorm1d(dim),
                                                 ReLU())
        self.type_encoder = Sequential(Linear(3, dim),
                                       torch.nn.BatchNorm1d(dim), ReLU(),
                                       Linear(dim, dim),
                                       torch.nn.BatchNorm1d(dim), ReLU())
        self.edge_encoder = Sequential(Linear(4 + 1, dim),
                                       torch.nn.BatchNorm1d(dim), ReLU(),
                                       Linear(dim, dim),
                                       torch.nn.BatchNorm1d(dim), ReLU())
        self.mlp = Sequential(Linear(3 * dim, dim), torch.nn.BatchNorm1d(dim),
                              ReLU(), Linear(dim, dim),
                              torch.nn.BatchNorm1d(dim), ReLU())

        nn1_1 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        nn1_2 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        self.conv1_1 = GINConv(nn1_1, train_eps=True)
        self.conv1_2 = GINConv(nn1_2, train_eps=True)
        self.mlp_1 = Sequential(Linear(2 * dim, dim),
                                torch.nn.BatchNorm1d(dim), ReLU(),
                                Linear(dim, dim), torch.nn.BatchNorm1d(dim),
                                ReLU())

        nn2_1 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        nn2_2 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        self.conv2_1 = GINConv(nn2_1, train_eps=True)
        self.conv2_2 = GINConv(nn2_2, train_eps=True)
        self.mlp_2 = Sequential(Linear(2 * dim, dim),
                                torch.nn.BatchNorm1d(dim), ReLU(),
                                Linear(dim, dim), torch.nn.BatchNorm1d(dim),
                                ReLU())

        nn3_1 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        nn3_2 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        self.conv3_1 = GINConv(nn3_1, train_eps=True)
        self.conv3_2 = GINConv(nn3_2, train_eps=True)
        self.mlp_3 = Sequential(Linear(2 * dim, dim),
                                torch.nn.BatchNorm1d(dim), ReLU(),
                                Linear(dim, dim), torch.nn.BatchNorm1d(dim),
                                ReLU())

        nn4_1 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        nn4_2 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        self.conv4_1 = GINConv(nn4_1, train_eps=True)
        self.conv4_2 = GINConv(nn4_2, train_eps=True)
        self.mlp_4 = Sequential(Linear(2 * dim, dim),
                                torch.nn.BatchNorm1d(dim), ReLU(),
                                Linear(dim, dim), torch.nn.BatchNorm1d(dim),
                                ReLU())

        nn5_1 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        nn5_2 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        self.conv5_1 = GINConv(nn5_1, train_eps=True)
        self.conv5_2 = GINConv(nn5_2, train_eps=True)
        self.mlp_5 = Sequential(Linear(2 * dim, dim),
                                torch.nn.BatchNorm1d(dim), ReLU(),
                                Linear(dim, dim), torch.nn.BatchNorm1d(dim),
                                ReLU())

        nn6_1 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        nn6_2 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(),
                           Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU())
        self.conv6_1 = GINConv(nn6_1, train_eps=True)
        self.conv6_2 = GINConv(nn6_2, train_eps=True)
        self.mlp_6 = Sequential(Linear(2 * dim, dim),
                                torch.nn.BatchNorm1d(dim), ReLU(),
                                Linear(dim, dim), torch.nn.BatchNorm1d(dim),
                                ReLU())
        self.set2set = Set2Set(1 * dim, processing_steps=6)
        self.fc1 = Linear(2 * dim, dim)
        self.fc4 = Linear(dim, 12)
Beispiel #22
0
 def __init__(self, embedding_size, out_h, out_w):
     super(MobileFaceNet, self).__init__()
     self.conv1 = Conv_block(3,
                             64,
                             kernel=(3, 3),
                             stride=(2, 2),
                             padding=(1, 1))
     self.conv2_dw = Conv_block(64,
                                64,
                                kernel=(3, 3),
                                stride=(1, 1),
                                padding=(1, 1),
                                groups=64)
     self.conv_23 = Depth_Wise(64,
                               64,
                               kernel=(3, 3),
                               stride=(2, 2),
                               padding=(1, 1),
                               groups=128)
     self.conv_3 = Residual(64,
                            num_block=4,
                            groups=128,
                            kernel=(3, 3),
                            stride=(1, 1),
                            padding=(1, 1))
     self.conv_34 = Depth_Wise(64,
                               128,
                               kernel=(3, 3),
                               stride=(2, 2),
                               padding=(1, 1),
                               groups=256)
     self.conv_4 = Residual(128,
                            num_block=6,
                            groups=256,
                            kernel=(3, 3),
                            stride=(1, 1),
                            padding=(1, 1))
     self.conv_45 = Depth_Wise(128,
                               128,
                               kernel=(3, 3),
                               stride=(2, 2),
                               padding=(1, 1),
                               groups=512)
     self.conv_5 = Residual(128,
                            num_block=2,
                            groups=256,
                            kernel=(3, 3),
                            stride=(1, 1),
                            padding=(1, 1))
     self.conv_6_sep = Conv_block(128,
                                  512,
                                  kernel=(1, 1),
                                  stride=(1, 1),
                                  padding=(0, 0))
     #self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0))
     #self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(4,7), stride=(1, 1), padding=(0, 0))
     self.conv_6_dw = Linear_block(512,
                                   512,
                                   groups=512,
                                   kernel=(out_h, out_w),
                                   stride=(1, 1),
                                   padding=(0, 0))
     self.conv_6_flatten = Flatten()
     self.linear = Linear(512, embedding_size, bias=False)
     self.bn = BatchNorm1d(embedding_size)
Beispiel #23
0
 def __init__(self):
     super().__init__()
     self.layer = Linear(4, 4)
Beispiel #24
0
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(Block, self).__init__()

        self.conv1 = DenseGCNConv(in_channels, hidden_channels)
        self.conv2 = DenseGCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels + hidden_channels, out_channels)
Beispiel #25
0
 def __init__(self, dim_in, dim_out, dim_ctx):
     super(ConcatSquashLinear, self).__init__()
     self._layer = Linear(dim_in, dim_out)
     self._hyper_bias = Linear(dim_ctx, dim_out, bias=False)
     self._hyper_gate = Linear(dim_ctx, dim_out)
 def __init__(self):
     super().__init__()
     self.inner = FSDP(Linear(4, 4), **fsdp_config)
     self.outer = Linear(4, 5)
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 use_attention: bool,
                 seq2seq_encoder: Seq2SeqEncoder,
                 seq2vec_encoder: Seq2VecEncoder,
                 span_end_encoder_after: Seq2SeqEncoder,
                 use_decoder_trainer: bool,
                 decoder_beam_search: BeamSearch,
                 kb_configs: dict,
                 other_configs: dict,
                 initializer: InitializerApplicator) -> None:
        super(ProStructModel, self).__init__(vocab)

        self.text_field_embedder = text_field_embedder
        self.num_actions = len(Action)  # number of actions is hardcoded here.
        # They are defined in Action enum in propara_dataset_reader.py
        self.other_configs = other_configs

        # kb_coefficient * kb_score + (1-kb_coefficient) * model_score
        self.kb_coefficient = torch.nn.Parameter(torch.ones(1).mul(kb_configs.get('kb_coefficient', 0.5)))

        self.use_attention = use_attention
        self.use_decoder_trainer = use_decoder_trainer
        if self.use_attention:
            self.seq2seq_encoder = seq2seq_encoder
            self.time_distributed_seq2seq_encoder = TimeDistributed(TimeDistributed(self.seq2seq_encoder))
            self.time_distributed_attention_layer = \
                TimeDistributed(TimeDistributed(
                    Attention(similarity_function=BilinearSimilarity(2 * seq2seq_encoder.get_output_dim(),
                                                                     seq2seq_encoder.get_output_dim()),
                              normalize=True)))
            self.aggregate_feedforward = Linear(seq2seq_encoder.get_output_dim(),
                                                self.num_actions)
        else:
            self.seq2vec_encoder = seq2vec_encoder
            self.time_distributed_seq2vec_encoder = TimeDistributed(TimeDistributed(self.seq2vec_encoder))
            self.aggregate_feedforward = Linear(seq2vec_encoder.get_output_dim(),
                                                self.num_actions)

        self.span_end_encoder_after = span_end_encoder_after
        # per step per participant
        self.time_distributed_encoder_span_end_after = TimeDistributed(TimeDistributed(self.span_end_encoder_after))

        # Fixme: dimensions

        self._span_start_predictor_after = TimeDistributed(TimeDistributed(torch.nn.Linear(2 + 2*seq2seq_encoder.get_output_dim(), 1)))

        self._span_end_predictor_after = TimeDistributed(TimeDistributed(torch.nn.Linear(span_end_encoder_after.get_output_dim(), 1)))

        self._type_accuracy = BooleanAccuracy()
        self._loss = torch.nn.CrossEntropyLoss(ignore_index=-1)  # Fixme: This is less robust. If the masking value

        # Fixme: add a metric for location span strings
        self.span_metric = SquadEmAndF1()

        if self.use_decoder_trainer:
            self.decoder_trainer = MaximumMarginalLikelihood()
            if kb_configs['kb_to_use'] == 'lexicalkb':
                kb = KBLexical(
                    lexical_kb_path=kb_configs['lexical_kb_path'],
                    fullgrid_prompts_load_path=kb_configs['fullgrid_prompts_load_path']
                )

            # Makeshift arrangement to get number of participants in tiny.tsv .
            self.commonsense_based_action_generator = CommonsenseBasedActionGenerator(self.num_actions)
            self.rules_activated = [int(rule_val.strip()) > 0
                                    for rule_val in self.other_configs.get('constraint_rules_to_turn_on', '0,0,0,1')
                                                        .split(",")]
            self.rule_2_fraction_participants = self.other_configs.get('rule_2_fraction_participants', 0.5)
            self.rule_3_fraction_steps = self.other_configs.get('rule_3_fraction_steps', 0.5)

            self.commonsense_based_action_generator.set_rules_used(self.rules_activated,
                                                                   self.rule_2_fraction_participants,
                                                                   self.rule_3_fraction_steps)
            # [self.rules_activated[0],  # C/D/C/D cannot happen
            #  self.rules_activated[1],  # > 1/2 partic
            #  self.rules_activated[2],  # > 1/2 steps cannot change
            #  self.rules_activated[3]  # until mentioned
            #  ])
            self.decoder_step = ProParaDecoderStep(KBBasedActionScorer(kb=kb, kb_coefficient=self.kb_coefficient),
                                                   valid_action_generator=self.commonsense_based_action_generator)

        self.beam_search = decoder_beam_search
        initializer(self)
Beispiel #28
0
                                          shuffle=True)

images, label = next(iter(trainloader))
images.size()
im1 = images[0]
im1.size()
im1_plt = np.squeeze(im1)
plt.imshow(im1_plt)

for image, label in trainloader:
    pass
    #Apply your DL on the dataset.
############################################ Linear transformation
from torch.nn import Linear
## linear layer
l1 = Linear(in_features=10, out_features=5, bias=True)
## inputs
inp = Variable(torch.randn(1, 10))
## Apply linear transformation to the inputs
l1(inp).size()
## accessing the trainable parameters

l1.weight  ## size of the l1 weight layer would be such that that the mat mul will give out_features so 10X1.T.dot(10X5)-->
l1.weight.size()
l1.bias

## super is a shortcut to access a base class without having to know its type or name
## here super is used to pass arguments of child class to parents class
## sample network code

Beispiel #29
0
def test_add_param_group(debias_ewma):
    """Test AdaScale supports add_param_group() API."""
    model1 = Linear(2, 2, bias=True)
    with torch.no_grad():
        # make weights and bias deterministic, which is needed for
        # multi-layer models. For them, adascale gain is affected by
        # parameters from other layers.
        model1.weight.copy_(Tensor([1.0, 2.0, 3.0, 4.0]).reshape(2, 2))
        model1.bias.fill_(0.1)
    optim = AdaScale(SGD(model1.parameters(), lr=0.1),
                     num_gradients_to_accumulate=2,
                     debias_ewma=debias_ewma)
    assert len(optim._hook_handles) == 2

    model2 = Linear(2, 3, bias=True)
    with torch.no_grad():
        # make weights and bias deterministic
        model2.weight.copy_(
            Tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(3, 2))
        model2.bias.fill_(0.2)
    optim.add_param_group({"params": model2.parameters()})
    assert len(optim._hook_handles) == 4

    # make sure we can run the model.
    model = Sequential(model1, model2).cuda()
    in_data_0 = Tensor([1.0, 2.0]).cuda()
    out = model(in_data_0)
    out.sum().backward()

    in_data_1 = Tensor([3.0, 4.0]).cuda()
    out = model(in_data_1)
    out.sum().backward()

    # make sure the gains are right and we can step.
    # since this is the first step, debias_ewma doesn't affect the value.
    assert np.allclose(optim.gain(), 1.1440223454935758), optim.gain()
    assert np.allclose(optim.gain(0), 1.1428571428571428), optim.gain(0)
    assert np.allclose(optim.gain(1), 1.1471258476157762), optim.gain(1)
    optim.step()
    optim.zero_grad()

    # make sure we can add a PG again after stepping.
    model3 = Linear(3, 4, bias=True)
    with torch.no_grad():
        # make weights and bias deterministic
        model3.weight.copy_(
            Tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0] * 2).reshape(4, 3))
        model3.bias.fill_(0.2)
    optim.add_param_group({"params": model3.parameters()})
    assert len(optim._hook_handles) == 6

    # make sure we can run the model.
    model = Sequential(model1, model2, model3).cuda()
    in_data_0 = Tensor([1.0, 2.0]).cuda()
    out = model(in_data_0)
    out.sum().backward()

    in_data_1 = Tensor([3.0, 4.0]).cuda()
    out = model(in_data_1)
    out.sum().backward()

    # make sure gains are right and we can step.
    # the last PG's gain is not affected by debias_ewma since it is the first step for that PG.
    assert np.allclose(
        optim.gain(), 1.1191193589460822
        if debias_ewma else 1.1192783954732368), optim.gain()
    assert np.allclose(
        optim.gain(0), 1.1428571880897151
        if debias_ewma else 1.142857188085096), optim.gain(0)
    assert np.allclose(
        optim.gain(1), 1.1167103578364508
        if debias_ewma else 1.1167104954034948), optim.gain(1)
    assert np.allclose(optim.gain(2), 1.117381091722702), optim.gain(2)
    optim.step()
    optim.zero_grad()
Beispiel #30
0
"""
Compute the gradient with PyTorch and the KFLR approximation with BackPACK.
"""

from torch.nn import CrossEntropyLoss, Flatten, Linear, Sequential

from backpack import backpack, extend, extensions
from backpack.utils.examples import load_mnist_data

B = 4
X, y = load_mnist_data(B)

print("# Gradient with PyTorch, KFLR approximation with BackPACK | B =", B)

model = Sequential(Flatten(), Linear(784, 10),)
lossfunc = CrossEntropyLoss()

model = extend(model)
lossfunc = extend(lossfunc)

loss = lossfunc(model(X), y)

with backpack(extensions.KFLR()):
    loss.backward()

for name, param in model.named_parameters():
    print(name)
    print(".grad.shape:             ", param.grad.shape)
    print(".kflr (shapes):          ", [kflr.shape for kflr in param.kflr])