Example #1
0
def get_categorical_model(input_neurons, output_neurons, layers=None):
    """
    creates a model with Categorical Crossentropy Loss
    :param input_neurons: input neuron number
    :param output_neurons: output neuron number
    :param layers: list of intermediate neuron sizes, default is the number of neurons and layer sizes for neuron
    :return: network with Categorical Crossentropy loss
    """
    if layers is None:
        layers = [25, 25, 25]

    default_act = 'relu'
    model = Sequential()

    idx = 1
    layers.insert(0, input_neurons)
    while idx < len(layers):
        model.add(Linear(out=layers[idx], input_size=layers[idx - 1], activation=default_act))
        idx += 1

    # model.add(Dropout(prob=0.2))
    model.add(Linear(out=output_neurons, activation='softmax'))

    # Set loss function to model: Sequential object
    ce = LossCrossEntropy()
    model.loss = ce
    return model
Example #2
0
    def __init__(self,
           embed_dim,
           z_channels,
           s_channels,
           num_dilation_layer=10):
        super(Aligner, self).__init__()

        self.embed_dim = embed_dim
        self.z_channels = z_channels
        self.s_channels = s_channels

        self.pre_process = Conv1d(embed_dim, 256, kernel_size=3)

        self.dilated_conv_layers = nn.ModuleList()
        for i in range(num_dilation_layer):
            dilation = 2**i
            self.dilated_conv_layers.append(DilatedConvBlock(256, 256,
                        z_channels, s_channels, dilation))

        self.post_process = nn.Sequential(
            Linear(256, 256),
            nn.ReLU(inplace=False),
            Linear(256, 1),
            nn.ReLU(inplace=False),
        )
Example #3
0
    def test_init_not_compatible(self):

        with self.assertRaises(NotCompatibleError):
            model = Sequential([
                Linear(input_size=2, out=22, activation='tanh'),
                Linear(input_size=23, out=22, activation='tanh')
                # second layer's input_size is not compatible with previous layer output_size
            ])
Example #4
0
    def test_init_not_input_size(self):
        """

        :return:
        """
        with self.assertRaises(InputSizeNotFoundError):
            model = Sequential([
                Linear(out=22, activation='tanh'),  # NO input_size is given
                Linear(input_size=23, out=22, activation='tanh')
            ])
Example #5
0
    def test_save_model(self):
        """

        :return:
        """
        model = Sequential()
        model.add(Linear(input_size=2, out=24, activation='tanh'))
        model.add(Linear(input_size=24, out=2, activation='tanh'))

        pass
Example #6
0
    def test_load_model(self):
        """

        :return:
        """
        model = Sequential()
        model.add(Linear(input_size=2, out=24, activation='tanh'))
        model.add(Linear(input_size=24, out=2, activation='tanh'))

        file_name = "model.h5py"
Example #7
0
 def _affine_backward(self, x, w, b, dout):
     layer = Linear(w.shape[0], w.shape[1])
     layer.weight = w
     layer.bias = b
     tmp = layer.forward(x)
     layer.backward(dout)
     return layer.dx, layer.dw, layer.db
Example #8
0
 def __init__(self, d_k, d_v, d_model, n_heads, dropout):
     super(MultiHeadAttention, self).__init__()
     self.attention = _MultiHeadAttention(d_k, d_v, d_model, n_heads,
                                          dropout)
     self.proj = Linear(n_heads * d_v, d_model)
     self.dropout = nn.Dropout(dropout)
     self.layer_norm = LayerNormalization(d_model)
 def __init__(
         self,
         d_model: int = 512,  # dimension of model
         input_dim: int = 80,  # dimension of feature vector
         d_ff: int = 2048,  # dimension of feed forward network
         num_layers: int = 6,  # number of encoder layers
         num_heads: int = 8,  # number of attention heads
         ffnet_style: str = 'ff',  # style of feed forward network [ff, conv]
         dropout_p: float = 0.3,  # probability of dropout
         pad_id: int = 0,  # identification of pad token
 ) -> None:
     super(SpeechTransformerEncoder, self).__init__()
     self.d_model = d_model
     self.num_layers = num_layers
     self.num_heads = num_heads
     self.pad_id = pad_id
     self.input_proj = Linear(input_dim, d_model)
     self.input_norm = LayerNorm(d_model)
     self.input_dropout = nn.Dropout(p=dropout_p)
     self.positional_encoding = PositionalEncoding(d_model)
     self.layers = nn.ModuleList([
         SpeechTransformerEncoderLayer(d_model, num_heads, d_ff, dropout_p,
                                       ffnet_style)
         for _ in range(num_layers)
     ])
Example #10
0
 def __init__(self,
              feature_columns,
              hidden_units,
              activation='relu',
              dnn_dropout=0.,
              embed_reg=1e-6,
              w_reg=1e-6):
     """
     Wide&Deep
     :param feature_columns: A list. sparse column feature information.
     :param hidden_units: A list. Neural network hidden units.
     :param activation: A string. Activation function of dnn.
     :param dnn_dropout: A scalar. Dropout of dnn.
     :param embed_reg: A scalar. The regularizer of embedding.
     :param w_reg: A scalar. The regularizer of Linear.
     """
     super(WideDeep, self).__init__()
     self.sparse_feature_columns = feature_columns
     self.embed_layers = {
         'embed_' + str(i):
         Embedding(input_dim=feat['feat_num'],
                   input_length=1,
                   output_dim=feat['embed_dim'],
                   embeddings_initializer='random_uniform',
                   embeddings_regularizer=l2(embed_reg))
         for i, feat in enumerate(self.sparse_feature_columns)
     }
     self.index_mapping = []
     self.feature_length = 0
     for feat in self.sparse_feature_columns:
         self.index_mapping.append(self.feature_length)
         self.feature_length += feat['feat_num']
     self.dnn_network = DNN(hidden_units, activation, dnn_dropout)
     self.linear = Linear(self.feature_length, w_reg=w_reg)
     self.final_dense = Dense(1, activation=None)
Example #11
0
def main():
    # optimizer = SGD(lr, weight_decay, mu=mu)
    optimizer = Adam(lr, weight_decay)
    model = ListModel(net=[
        Linear(784, 400),
        ReLU(),
        Linear(400, 100),
        ReLU(),
        Linear(100, 10),
        Softmax()
    ],
                      loss=CrossEntropyLoss())
    for epoch in range(num_epochs):
        print('epoch number: {}'.format(epoch))
        train(model, optimizer)
        valid(model)
Example #12
0
def train(epochs, batch_size, hidden_size, learning_rate):
    """
	    Train a simple feed-forward network to classify MNIST digits,
	    using vanilla SGD to minimize the categorical cross entropy between
	    network outputs and ground truth labels.
	"""

    ff = Sequence(Linear(784, hidden_size), ReLU(),
                  Linear(hidden_size, hidden_size), ReLU(),
                  Linear(hidden_size, 10))

    loss = cross_entropy_loss_with_logits
    loss_grad = cross_entropy_loss_with_logits_grad

    val_set = mnist(val=True)

    def val():
        gen = val_set()
        val_sum = 0.0
        for i, data in enumerate(gen):
            input, label = data
            output = ff.forward(input)
            val_sum += np.argmax(output) == label
        print "Val", val_sum / float(i)

    optim = GradientDescentOptimizer(ff, lr=learning_rate)

    train_set = mnist()

    print "Training .."

    for epoch in xrange(epochs):
        loss_sum = 0.0
        gen = train_set()
        for i, data in enumerate(gen):
            input, label = data
            label = np.array(label, dtype=np.int32)
            output = ff.forward(input)
            ff.backward(loss_grad(label, output))

            if i > 0 and (i % batch_size == 0):
                optim.step()

            loss_sum += loss(label, output)
        print epoch, "Loss", loss_sum / i
        val()
Example #13
0
 def _create(self, hidden, k, layer, dropout=None):
     if layer == 1:
         return OrderedDict([Linear(784, 10, 0)])
     d = OrderedDict()
     for i in range(layer):
         if i == 0:
             d['linear' + str(i)] = Linear(784, hidden, k, self.unified)
             d['relu' + str(i)] = nn.ReLU()
             if dropout:
                 d['dropout' + str(i)] = nn.Dropout(p=dropout)
         elif i == layer - 1:
             d['linear' + str(i)] = Linear(hidden, 10, 0, self.unified)
         else:
             d['linear' + str(i)] = Linear(hidden, hidden, k, self.unified)
             d['relu' + str(i)] = nn.ReLU()
             if dropout:
                 d['dropout' + str(i)] = nn.Dropout(p=dropout)
     return d
Example #14
0
 def _create(self, hidden, k, layer, dropout=None):
     if layer == 1:
         return OrderedDict([Linear(784, 10, 0)])
     d = OrderedDict()
     for i in range(layer):
         if i == 0:  # input layer case
             d['linear' + str(i)] = Linear(784, hidden, k, self.unified)
             d['relu' + str(i)] = nn.ReLU()
             if dropout:
                 d['dropout' + str(i)] = nn.Dropout(p=dropout)
         elif i == layer - 1:  # final layer/readout layer.
             d['linear' + str(i)] = Linear(hidden, 10, 0, self.unified)
         else:  # standard middle layer
             d['linear' + str(i)] = Linear(hidden, hidden, k, self.unified)
             d['relu' + str(i)] = nn.ReLU()
             if dropout:
                 d['dropout' + str(i)] = nn.Dropout(p=dropout)
     return d
Example #15
0
def _read_txt_old(path):
    print('loading plain text model from', path)

    with open(path, 'rb') as f:
        content = f.read().split('\n')

        modules = []
        c = 0
        line = content[c]
        while len(line) > 0:
            if line.startswith(
                    Linear.__name__
            ):  # @UndefinedVariable import error suppression for PyDev users
                lineparts = line.split()
                m = int(lineparts[1])
                n = int(lineparts[2])
                mod = Linear(m, n)
                for i in range(m):
                    c += 1
                    mod.W[i, :] = np.array([
                        float(val) for val in content[c].split()
                        if len(val) > 0
                    ])

                c += 1
                mod.B = np.array([float(val) for val in content[c].split()])
                modules.append(mod)

            elif line.startswith(
                    Rect.__name__
            ):  # @UndefinedVariable import error suppression for PyDev users
                modules.append(Rect())
            elif line.startswith(
                    Tanh.__name__
            ):  # @UndefinedVariable import error suppression for PyDev users
                modules.append(Tanh())
            elif line.startswith(
                    SoftMax.__name__
            ):  # @UndefinedVariable import error suppression for PyDev users
                modules.append(SoftMax())
            elif line.startswith(
                    BinStep.__name__
            ):  # @UndefinedVariable import error suppression for PyDev users
                modules.append(BinStep())
            elif line.startswith(
                    NegAbs.__name__
            ):  # @UndefinedVariable import error suppression for PyDev users
                modules.append(NegAbs())
            else:
                raise ValueError('Layer type ' +
                                 [s for s in line.split() if len(s) > 0][0] +
                                 ' not supported by legacy plain text format.')

            c += 1
            line = content[c]

        return Sequential(modules)
Example #16
0
    def __init__(self):
        super(FastSpeech2, self).__init__()

        self.encoder = Encoder()
        self.variance_adaptor = VarianceAdaptor()
        self.decoder = Decoder()

        self.mel_linear = Linear(hp.decoder_hidden, hp.n_mel_channels)
        self.postnet = PostNet()
Example #17
0
    def __init__(self, n_layers, d_k, d_v, d_model, d_ff, n_heads,
                 max_tgt_seq_len, tgt_vocab_size, dropout,
                 weighted_model, share_proj_weight, n_experts=10):
        super(LMTransformer, self).__init__()
        self.decoder = Decoder(n_layers, d_k, d_v, d_model, d_ff, n_heads,
                               max_tgt_seq_len, tgt_vocab_size, dropout, weighted_model)
        self.tgt_proj = Linear(d_model, tgt_vocab_size, bias=False)
        self.weighted_model = weighted_model

        self.head = MoShead(tgt_vocab_size, d_model, self.decoder, share_proj_weight, n_experts)
Example #18
0
    def __init__(self, d_model: int = 512, d_ff: int = 2048,
                 dropout_p: float = 0.3, ffnet_style: str = 'ff') -> None:
        super(PositionWiseFeedForwardNet, self).__init__()
        self.ffnet_style = ffnet_style.lower()
        if self.ffnet_style == 'ff':
            self.feed_forward = nn.Sequential(
                Linear(d_model, d_ff),
                nn.Dropout(dropout_p),
                nn.ReLU(),
                Linear(d_ff, d_model),
                nn.Dropout(dropout_p),
            )

        elif self.ffnet_style == 'conv':
            self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
            self.relu = nn.ReLU()
            self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)

        else:
            raise ValueError("Unsupported mode: {0}".format(self.mode))
    def __init__(
            self,
            num_classes: int,                      # number of classfication
            max_length: int = 150,                 # a maximum allowed length for the sequence to be processed
            hidden_dim: int = 1024,                # dimension of RNN`s hidden state vector
            pad_id: int = 0,                       # pad token`s id
            sos_id: int = 1,                       # start of sentence token`s id
            eos_id: int = 2,                       # end of sentence token`s id
            attn_mechanism: str = 'multi-head',    # type of attention mechanism
            num_heads: int = 4,                    # number of attention heads
            num_layers: int = 2,                   # number of RNN layers
            rnn_type: str = 'lstm',                # type of RNN cell
            dropout_p: float = 0.3,                # dropout probability
            device: str = 'cuda'                   # device - 'cuda' or 'cpu'
    ) -> None:
        super(Speller, self).__init__(hidden_dim, hidden_dim, num_layers, rnn_type, dropout_p, False, device)
        self.num_classes = num_classes
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.max_length = max_length
        self.eos_id = eos_id
        self.sos_id = sos_id
        self.pad_id = pad_id
        self.attn_mechanism = attn_mechanism.lower()
        self.embedding = nn.Embedding(num_classes, hidden_dim)
        self.input_dropout = nn.Dropout(dropout_p)

        if self.attn_mechanism == 'loc':
            self.attention = AddNorm(LocationAwareAttention(hidden_dim, smoothing=True), hidden_dim)
        elif self.attn_mechanism == 'multi-head':
            self.attention = AddNorm(MultiHeadAttention(hidden_dim, num_heads), hidden_dim)
        elif self.attn_mechanism == 'additive':
            self.attention = AdditiveAttention(hidden_dim)
        elif self.attn_mechanism == 'scaled-dot':
            self.attention = AddNorm(ScaledDotProductAttention(hidden_dim), hidden_dim)
        else:
            raise ValueError("Unsupported attention: %s".format(attn_mechanism))

        self.projection = AddNorm(Linear(hidden_dim, hidden_dim, bias=True), hidden_dim)
        self.generator = Linear(hidden_dim, num_classes, bias=False)
    def __init__(self, n_layers, d_k, d_v, d_model, d_ff, n_heads,
                 max_tgt_seq_len, tgt_vocab_size, dropout, weighted_model,
                 share_proj_weight):
        super(LMTransformer, self).__init__()
        self.decoder = Decoder(n_layers, d_k, d_v, d_model, d_ff, n_heads,
                               max_tgt_seq_len, tgt_vocab_size, dropout,
                               weighted_model)
        self.tgt_proj = Linear(d_model, tgt_vocab_size, bias=False)
        self.weighted_model = weighted_model

        if share_proj_weight:
            print('Sharing target embedding and projection..')
            self.tgt_proj.weight = self.decoder.tgt_emb.weight
Example #21
0
    def __init__(
        self,
        input_size: int,  # size of input
        num_classes: int,  # number of classfication
        rnn_type='gru',  # type of RNN cell
        num_rnn_layers: int = 5,  # number of RNN layers
        rnn_hidden_dim: int = 512,  # dimension of RNN`s hidden state
        dropout_p: float = 0.1,  # dropout probability
        bidirectional: bool = True,  # if True, becomes a bidirectional rnn
        activation: str = 'hardtanh',  # type of activation function
        device: str = 'cuda'  # device - 'cuda' or 'cpu'
    ):
        super(DeepSpeech2, self).__init__()
        self.rnn_layers = list()
        self.device = device

        input_size = int(math.floor(input_size + 2 * 20 - 41) / 2 + 1)
        input_size = int(math.floor(input_size + 2 * 10 - 21) / 2 + 1)
        input_size <<= 5
        rnn_output_size = rnn_hidden_dim << 1 if bidirectional else rnn_hidden_dim

        self.conv = DeepSpeech2Extractor(activation, mask_conv=True)

        for idx in range(num_rnn_layers):
            self.rnn_layers.append(
                BNReluRNN(
                    input_size=input_size if idx == 0 else rnn_output_size,
                    hidden_dim=rnn_hidden_dim,
                    rnn_type=rnn_type,
                    bidirectional=bidirectional,
                    dropout_p=dropout_p,
                    device=device))

        self.fc = nn.Sequential(
            Linear(rnn_output_size, rnn_hidden_dim), nn.ReLU(),
            Linear(rnn_hidden_dim, num_classes, bias=False))
    def _convert_to_nn(self, svm_model, y_train, x_val):
        #convert to linear NN
        print('converting {} model to linear NN'.format(
            self.__class__.__name__))
        W = svm_model.coef_.T
        B = svm_model.intercept_

        if numpy.unique(y_train).size == 2:
            linear_layer = Linear(W.shape[0], 2)
            linear_layer.W = numpy.concatenate([-W, W], axis=1)
            linear_layer.B = numpy.concatenate([-B, B], axis=0)
        else:
            linear_layer = Linear(*(W.shape))
            linear_layer.W = W
            linear_layer.B = B

        svm_model = self.model
        nn_model = Sequential([Flatten(), linear_layer])
        if not self.use_gpu: nn_model.to_numpy()

        #sanity check model conversion
        self._sanity_check_model_conversion(svm_model, nn_model, x_val)
        print('model conversion sanity check passed')
        return nn_model
Example #23
0
 def __init__(self,
              feature_columns,
              hidden_units,
              cin_size,
              dnn_dropout=0,
              dnn_activation='relu',
              embed_reg=1e-6,
              cin_reg=1e-6,
              w_reg=1e-6):
     """
     xDeepFM
     :param feature_columns: A list. sparse column feature information.
     :param hidden_units: A list. a list of dnn hidden units.
     :param cin_size: A list. a list of the number of CIN layers.
     :param dnn_dropout: A scalar. dropout of dnn.
     :param dnn_activation: A string. activation function of dnn.
     :param embed_reg: A scalar. The regularizer of embedding.
     :param cin_reg: A scalar. The regularizer of cin.
     :param w_reg: A scalar. The regularizer of Linear.
     """
     super(xDeepFM, self).__init__()
     self.sparse_feature_columns = feature_columns
     self.embed_dim = self.sparse_feature_columns[0]['embed_dim']
     self.embed_layers = {
         'embed_' + str(i):
         Embedding(input_dim=feat['feat_num'],
                   input_length=1,
                   output_dim=feat['embed_dim'],
                   embeddings_initializer='random_normal',
                   embeddings_regularizer=l2(embed_reg))
         for i, feat in enumerate(self.sparse_feature_columns)
     }
     self.index_mapping = []
     self.feature_length = 0
     for feat in self.sparse_feature_columns:
         self.index_mapping.append(self.feature_length)
         self.feature_length += feat['feat_num']
     self.linear = Linear(self.feature_length, w_reg)
     self.cin = CIN(cin_size=cin_size, l2_reg=cin_reg)
     self.dnn = DNN(hidden_units=hidden_units,
                    dnn_dropout=dnn_dropout,
                    dnn_activation=dnn_activation)
     self.cin_dense = Dense(1)
     self.dnn_dense = Dense(1)
     self.bias = self.add_weight(name='bias',
                                 shape=(1, ),
                                 initializer=tf.zeros_initializer())
Example #24
0
def main():
  current_state = STATE_SETUP
  worker_name = 'worker'
  if len(sys.argv) > 1:
    worker_name = sys.argv[1]

  print('Initializing worker ' + worker_name)
  while True:
    answer = socket_adapter.send_message(get_formated_message('setup', current_state), wait_answer=True)
    if answer['key'] == current_state and answer['code'] == code.CODE_OK:
        print("Worker successfully registered")
    else:
        print("Error on setup | message:{}".format(answer))

    learning_parameters = answer['data']
    if learning_parameters and answer['code'] == code.CODE_OK:
      input_size = learning_parameters['input_size']
      output_size = learning_parameters['output_size']
      eta = learning_parameters['eta']
      iterations = learning_parameters['iterations']
      break
    else:
      print("Waiting for setup data")
      time.sleep(2)


  print('Learning parameters are: {}'.format(learning_parameters))
  x,y = generate_data(input_size, output_size)
  X = standardize(x)
  Y = standardize(y)
  model = Linear(X.shape[1],Y.shape[1])
  optim = LossMSE()
  trainer = Trainer(model, optim)

  while True:
    current_state = STATE_LEARNING
    print("Waiting to start learning")
    answer = socket_adapter.send_message(get_formated_message('',current_state), wait_answer=True)
    if answer['code'] == code.CODE_OK:
      print("Start learning")
      break
    
    time.sleep(2)


  cost = trainer.trainGD(X,Y,iterations, eta=eta, update_func=on_params_update)
  plotCostAndData(model,X,Y,cost, fig_name=worker_name)
    def test_Linear(self):
        np.random.seed(42)
        torch.manual_seed(42)

        batch_size, n_in, n_out = 2, 3, 4
        for _ in range(100):
            # layers initialization
            torch_layer = torch.nn.Linear(n_in, n_out)
            custom_layer = Linear(n_in, n_out)
            custom_layer.W = torch_layer.weight.data.numpy()
            custom_layer.b = torch_layer.bias.data.numpy()

            layer_input = np.random.uniform(
                -10, 10, (batch_size, n_in)).astype(np.float32)
            next_layer_grad = np.random.uniform(
                -10, 10, (batch_size, n_out)).astype(np.float32)

            # 1. check layer output
            custom_layer_output = custom_layer.updateOutput(layer_input)
            layer_input_var = Variable(torch.from_numpy(layer_input),
                                       requires_grad=True)
            torch_layer_output_var = torch_layer(layer_input_var)
            self.assertTrue(
                np.allclose(torch_layer_output_var.data.numpy(),
                            custom_layer_output,
                            atol=1e-6))

            # 2. check layer input grad
            custom_layer_grad = custom_layer.updateGradInput(
                layer_input, next_layer_grad)
            torch_layer_output_var.backward(torch.from_numpy(next_layer_grad))
            torch_layer_grad_var = layer_input_var.grad
            self.assertTrue(
                np.allclose(torch_layer_grad_var.data.numpy(),
                            custom_layer_grad,
                            atol=1e-6))

            # 3. check layer parameters grad
            custom_layer.accGradParameters(layer_input, next_layer_grad)
            weight_grad = custom_layer.gradW
            bias_grad = custom_layer.gradb
            torch_weight_grad = torch_layer.weight.grad.data.numpy()
            torch_bias_grad = torch_layer.bias.grad.data.numpy()
            self.assertTrue(
                np.allclose(torch_weight_grad, weight_grad, atol=1e-6))
            self.assertTrue(np.allclose(torch_bias_grad, bias_grad, atol=1e-6))
Example #26
0
# normalize inputs
train_input = (train_input - train_input.mean(dim=1)[:, None]
               ) / train_input.std(dim=1)[:, None]
test_input = (test_input -
              test_input.mean(dim=1)[:, None]) / test_input.std(dim=1)[:, None]

# In[]
# training

overallTestAcc = []
overallTrainAcc = []
for eva in range(evaluateIter):

    # create a model
    model = sequential(Linear(input_size=2, output_size=25), ReLU(),
                       batchNormalization(batchSize, input_size=25),
                       Linear(input_size=25, output_size=25), ReLU(),
                       batchNormalization(batchSize, input_size=25),
                       Linear(input_size=25, output_size=25), ReLU(),
                       batchNormalization(batchSize, input_size=25),
                       Linear(input_size=25, output_size=2))

    # define criterion and optimizer
    criterion = MSELoss(method='mean')
    optimizer = SGD(model.parameters(), lr=learningRate)

    trainLossList = []
    trainNumList = []
    testLossList = []
    testNumList = []
weights = weights[None, :, :]
weights.transpose(1,2).shape


input = torch.Tensor([[1, 2, 3, 4, 5],
                      [1, 2, 3, 0, 0],
                      [1, 1, 1, 1, 1]])

bias = torch.Tensor([1, 2, 3, 4])
bias.shape

'''
input = input[:, :, None]
weights.matmul(input).squeeze() + bias'''

lin = Linear(5, 4, ReLU())

output = lin.forward(input)
target = torch.Tensor([[0, 0, 1, 0],
                       [0, 0, 0, 1],
                       [0, 0, 1, 0]])
d_loss = dloss(output, target)

prev_dl_dx = lin.backward(d_loss)

prev_dl_dx.shape

ex_dloss = torch.Tensor([[.1, .2, .2, .1],
                         [.1, .2, .2, .1],
                         [.1, .2, .2, .1]])
Example #28
0
    def _read_txt_helper(path):
        with open(path, 'rb') as f:
            content = f.read().split('\n')

            modules = []
            c = 0
            line = content[c]

            while len(line) > 0:
                if line.startswith(
                        Linear.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    '''
                    Format of linear layer
                    Linear <rows_of_W> <columns_of_W>
                    <flattened weight matrix W>
                    <flattened bias vector>
                    '''
                    _, m, n = line.split()
                    m = int(m)
                    n = int(n)
                    layer = Linear(m, n)
                    layer.W = np.array([
                        float(weightstring)
                        for weightstring in content[c + 1].split()
                        if len(weightstring) > 0
                    ]).reshape((m, n))
                    layer.B = np.array([
                        float(weightstring)
                        for weightstring in content[c + 2].split()
                        if len(weightstring) > 0
                    ])
                    modules.append(layer)
                    c += 3  # the description of a linear layer spans three lines

                elif line.startswith(
                        Convolution.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    '''
                    Format of convolution layer
                    Convolution <rows_of_W> <columns_of_W> <depth_of_W> <number_of_filters_W> <stride_axis_0> <stride_axis_1>
                    <flattened filter block W>
                    <flattened bias vector>
                    '''

                    _, h, w, d, n, s0, s1 = line.split()
                    h = int(h)
                    w = int(w)
                    d = int(d)
                    n = int(n)
                    s0 = int(s0)
                    s1 = int(s1)
                    layer = Convolution(filtersize=(h, w, d, n),
                                        stride=(s0, s1))
                    layer.W = np.array([
                        float(weightstring)
                        for weightstring in content[c + 1].split()
                        if len(weightstring) > 0
                    ]).reshape((h, w, d, n))
                    layer.B = np.array([
                        float(weightstring)
                        for weightstring in content[c + 2].split()
                        if len(weightstring) > 0
                    ])
                    modules.append(layer)
                    c += 3  #the description of a convolution layer spans three lines

                elif line.startswith(
                        SumPool.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    '''
                    Format of sum pooling layer
                    SumPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1>
                    '''

                    _, h, w, s0, s1 = line.split()
                    h = int(h)
                    w = int(w)
                    s0 = int(s0)
                    s1 = int(s1)
                    layer = SumPool(pool=(h, w), stride=(s0, s1))
                    modules.append(layer)
                    c += 1  # one line of parameterized layer description

                elif line.startswith(
                        MaxPool.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    '''
                    Format of max pooling layer
                    MaxPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1>
                    '''

                    _, h, w, s0, s1 = line.split()
                    h = int(h)
                    w = int(w)
                    s0 = int(s0)
                    s1 = int(s1)
                    layer = MaxPool(pool=(h, w), stride=(s0, s1))
                    modules.append(layer)
                    c += 1  # one line of parameterized layer description

                elif line.startswith(
                        Flatten.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    modules.append(Flatten())
                    c += 1  #one line of parameterless layer description
                elif line.startswith(
                        Rect.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    modules.append(Rect())
                    c += 1  #one line of parameterless layer description
                elif line.startswith(
                        Tanh.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    modules.append(Tanh())
                    c += 1  #one line of parameterless layer description
                elif line.startswith(
                        SoftMax.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    modules.append(SoftMax())
                    c += 1  #one line of parameterless layer description
                else:
                    raise ValueError(
                        'Layer type identifier' +
                        [s for s in line.split() if len(s) > 0][0] +
                        ' not supported for reading from plain text file')

                #skip info of previous layers, read in next layer header
                line = content[c]

        return Sequential(modules)
Example #29
0
#  ----- Define the paramters for learning -----
nb_classes = train_labels.shape[0]
features = train_features.size(1)
nb_samples = train_features.size(0)
epsilon = 0.1
eta = .2  #nb_samples is now defined in Sequential()
batch_size = config.batch_size
epochs = int(config.epochs / (nb_samples / batch_size))

# Zeta is to make it work correctly with Sigma activation function.
# train_label = train_label.add(0.125).mul(0.8)
# test_label = test_label.add(0.125).mul(0.8)

# ----- Implementation of the architecture -----
architecture = Sequential(Linear(2, 25, ReLU()), Linear(25, 25, ReLU()),
                          Linear(25, 25, ReLU()), Linear(25, 2, Sigma()))

# ----- Training -----
round = 1
prev_loss = math.inf
prev_prev_loss = math.inf
errors = []
for epoch in range(epochs):
    for batch_start in range(0, nb_samples, batch_size):
        features = train_features[batch_start:batch_start + batch_size, :]
        labels = train_labels[batch_start:batch_start + batch_size]
        tr_loss, tr_error = architecture.forward(train_features, train_labels)
        architecture.backward()
        architecture.update(eta)
        loss, error = architecture.forward(test_features, test_labels)
    def __init__(
        self,
        num_classes: int,  # the number of classfication
        d_model: int = 512,  # dimension of model
        input_dim: int = 80,  # dimension of input
        pad_id: int = 0,  # identification of <PAD_token>
        eos_id: int = 2,  # identification of <EOS_token>
        d_ff: int = 2048,  # dimension of feed forward network
        num_heads: int = 8,  # number of attention heads
        num_encoder_layers: int = 6,  # number of encoder layers
        num_decoder_layers: int = 6,  # number of decoder layers
        dropout_p: float = 0.3,  # dropout probability
        ffnet_style: str = 'ff',  # feed forward network style 'ff' or 'conv'
        extractor: str = 'vgg'  # CNN extractor [vgg, ds2]
    ) -> None:
        super(SpeechTransformer, self).__init__()

        assert d_model % num_heads == 0, "d_model % num_heads should be zero."

        if extractor.lower() == 'vgg':
            input_dim = (input_dim -
                         1) << 5 if input_dim % 2 else input_dim << 5
            self.conv = nn.Sequential(
                nn.Conv2d(1,
                          64,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False), nn.BatchNorm2d(num_features=64),
                nn.Hardtanh(0, 20, inplace=True),
                nn.Conv2d(64,
                          64,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False), nn.BatchNorm2d(num_features=64),
                nn.Hardtanh(0, 20, inplace=True), nn.MaxPool2d(2, stride=2),
                nn.Conv2d(64,
                          128,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False), nn.BatchNorm2d(num_features=128),
                nn.Hardtanh(0, 20, inplace=True),
                nn.Conv2d(128,
                          128,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=False), nn.BatchNorm2d(num_features=128),
                nn.Hardtanh(0, 20, inplace=True), nn.MaxPool2d(2, stride=2))

        elif extractor.lower() == 'ds2':
            input_dim = int(math.floor(input_dim + 2 * 20 - 41) / 2 + 1)
            input_dim = int(math.floor(input_dim + 2 * 10 - 21) / 2 + 1)
            input_dim <<= 5
            self.conv = nn.Sequential(
                nn.Conv2d(1,
                          32,
                          kernel_size=(41, 11),
                          stride=(2, 2),
                          padding=(20, 5),
                          bias=False),
                nn.BatchNorm2d(32),
                nn.Hardtanh(0, 20, inplace=True),
                nn.Conv2d(32,
                          32,
                          kernel_size=(21, 11),
                          stride=(2, 1),
                          padding=(10, 5),
                          bias=False),
                nn.BatchNorm2d(32),
                nn.Hardtanh(0, 20, inplace=True),
            )

        else:
            raise ValueError("Unsupported Extractor : {0}".format(extractor))

        self.encoder = SpeechTransformerEncoder(d_model=d_model,
                                                input_dim=input_dim,
                                                d_ff=d_ff,
                                                num_layers=num_encoder_layers,
                                                num_heads=num_heads,
                                                ffnet_style=ffnet_style,
                                                dropout_p=dropout_p,
                                                pad_id=pad_id)
        self.decoder = SpeechTransformerDecoder(num_classes=num_classes,
                                                d_model=d_model,
                                                d_ff=d_ff,
                                                num_layers=num_decoder_layers,
                                                num_heads=num_heads,
                                                ffnet_style=ffnet_style,
                                                dropout_p=dropout_p,
                                                pad_id=pad_id,
                                                eos_id=eos_id)

        self.eos_id = eos_id
        self.pad_id = pad_id
        self.generator = Linear(d_model, num_classes)