def get_categorical_model(input_neurons, output_neurons, layers=None): """ creates a model with Categorical Crossentropy Loss :param input_neurons: input neuron number :param output_neurons: output neuron number :param layers: list of intermediate neuron sizes, default is the number of neurons and layer sizes for neuron :return: network with Categorical Crossentropy loss """ if layers is None: layers = [25, 25, 25] default_act = 'relu' model = Sequential() idx = 1 layers.insert(0, input_neurons) while idx < len(layers): model.add(Linear(out=layers[idx], input_size=layers[idx - 1], activation=default_act)) idx += 1 # model.add(Dropout(prob=0.2)) model.add(Linear(out=output_neurons, activation='softmax')) # Set loss function to model: Sequential object ce = LossCrossEntropy() model.loss = ce return model
def __init__(self, embed_dim, z_channels, s_channels, num_dilation_layer=10): super(Aligner, self).__init__() self.embed_dim = embed_dim self.z_channels = z_channels self.s_channels = s_channels self.pre_process = Conv1d(embed_dim, 256, kernel_size=3) self.dilated_conv_layers = nn.ModuleList() for i in range(num_dilation_layer): dilation = 2**i self.dilated_conv_layers.append(DilatedConvBlock(256, 256, z_channels, s_channels, dilation)) self.post_process = nn.Sequential( Linear(256, 256), nn.ReLU(inplace=False), Linear(256, 1), nn.ReLU(inplace=False), )
def test_init_not_compatible(self): with self.assertRaises(NotCompatibleError): model = Sequential([ Linear(input_size=2, out=22, activation='tanh'), Linear(input_size=23, out=22, activation='tanh') # second layer's input_size is not compatible with previous layer output_size ])
def test_init_not_input_size(self): """ :return: """ with self.assertRaises(InputSizeNotFoundError): model = Sequential([ Linear(out=22, activation='tanh'), # NO input_size is given Linear(input_size=23, out=22, activation='tanh') ])
def test_save_model(self): """ :return: """ model = Sequential() model.add(Linear(input_size=2, out=24, activation='tanh')) model.add(Linear(input_size=24, out=2, activation='tanh')) pass
def test_load_model(self): """ :return: """ model = Sequential() model.add(Linear(input_size=2, out=24, activation='tanh')) model.add(Linear(input_size=24, out=2, activation='tanh')) file_name = "model.h5py"
def _affine_backward(self, x, w, b, dout): layer = Linear(w.shape[0], w.shape[1]) layer.weight = w layer.bias = b tmp = layer.forward(x) layer.backward(dout) return layer.dx, layer.dw, layer.db
def __init__(self, d_k, d_v, d_model, n_heads, dropout): super(MultiHeadAttention, self).__init__() self.attention = _MultiHeadAttention(d_k, d_v, d_model, n_heads, dropout) self.proj = Linear(n_heads * d_v, d_model) self.dropout = nn.Dropout(dropout) self.layer_norm = LayerNormalization(d_model)
def __init__( self, d_model: int = 512, # dimension of model input_dim: int = 80, # dimension of feature vector d_ff: int = 2048, # dimension of feed forward network num_layers: int = 6, # number of encoder layers num_heads: int = 8, # number of attention heads ffnet_style: str = 'ff', # style of feed forward network [ff, conv] dropout_p: float = 0.3, # probability of dropout pad_id: int = 0, # identification of pad token ) -> None: super(SpeechTransformerEncoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.num_heads = num_heads self.pad_id = pad_id self.input_proj = Linear(input_dim, d_model) self.input_norm = LayerNorm(d_model) self.input_dropout = nn.Dropout(p=dropout_p) self.positional_encoding = PositionalEncoding(d_model) self.layers = nn.ModuleList([ SpeechTransformerEncoderLayer(d_model, num_heads, d_ff, dropout_p, ffnet_style) for _ in range(num_layers) ])
def __init__(self, feature_columns, hidden_units, activation='relu', dnn_dropout=0., embed_reg=1e-6, w_reg=1e-6): """ Wide&Deep :param feature_columns: A list. sparse column feature information. :param hidden_units: A list. Neural network hidden units. :param activation: A string. Activation function of dnn. :param dnn_dropout: A scalar. Dropout of dnn. :param embed_reg: A scalar. The regularizer of embedding. :param w_reg: A scalar. The regularizer of Linear. """ super(WideDeep, self).__init__() self.sparse_feature_columns = feature_columns self.embed_layers = { 'embed_' + str(i): Embedding(input_dim=feat['feat_num'], input_length=1, output_dim=feat['embed_dim'], embeddings_initializer='random_uniform', embeddings_regularizer=l2(embed_reg)) for i, feat in enumerate(self.sparse_feature_columns) } self.index_mapping = [] self.feature_length = 0 for feat in self.sparse_feature_columns: self.index_mapping.append(self.feature_length) self.feature_length += feat['feat_num'] self.dnn_network = DNN(hidden_units, activation, dnn_dropout) self.linear = Linear(self.feature_length, w_reg=w_reg) self.final_dense = Dense(1, activation=None)
def main(): # optimizer = SGD(lr, weight_decay, mu=mu) optimizer = Adam(lr, weight_decay) model = ListModel(net=[ Linear(784, 400), ReLU(), Linear(400, 100), ReLU(), Linear(100, 10), Softmax() ], loss=CrossEntropyLoss()) for epoch in range(num_epochs): print('epoch number: {}'.format(epoch)) train(model, optimizer) valid(model)
def train(epochs, batch_size, hidden_size, learning_rate): """ Train a simple feed-forward network to classify MNIST digits, using vanilla SGD to minimize the categorical cross entropy between network outputs and ground truth labels. """ ff = Sequence(Linear(784, hidden_size), ReLU(), Linear(hidden_size, hidden_size), ReLU(), Linear(hidden_size, 10)) loss = cross_entropy_loss_with_logits loss_grad = cross_entropy_loss_with_logits_grad val_set = mnist(val=True) def val(): gen = val_set() val_sum = 0.0 for i, data in enumerate(gen): input, label = data output = ff.forward(input) val_sum += np.argmax(output) == label print "Val", val_sum / float(i) optim = GradientDescentOptimizer(ff, lr=learning_rate) train_set = mnist() print "Training .." for epoch in xrange(epochs): loss_sum = 0.0 gen = train_set() for i, data in enumerate(gen): input, label = data label = np.array(label, dtype=np.int32) output = ff.forward(input) ff.backward(loss_grad(label, output)) if i > 0 and (i % batch_size == 0): optim.step() loss_sum += loss(label, output) print epoch, "Loss", loss_sum / i val()
def _create(self, hidden, k, layer, dropout=None): if layer == 1: return OrderedDict([Linear(784, 10, 0)]) d = OrderedDict() for i in range(layer): if i == 0: d['linear' + str(i)] = Linear(784, hidden, k, self.unified) d['relu' + str(i)] = nn.ReLU() if dropout: d['dropout' + str(i)] = nn.Dropout(p=dropout) elif i == layer - 1: d['linear' + str(i)] = Linear(hidden, 10, 0, self.unified) else: d['linear' + str(i)] = Linear(hidden, hidden, k, self.unified) d['relu' + str(i)] = nn.ReLU() if dropout: d['dropout' + str(i)] = nn.Dropout(p=dropout) return d
def _create(self, hidden, k, layer, dropout=None): if layer == 1: return OrderedDict([Linear(784, 10, 0)]) d = OrderedDict() for i in range(layer): if i == 0: # input layer case d['linear' + str(i)] = Linear(784, hidden, k, self.unified) d['relu' + str(i)] = nn.ReLU() if dropout: d['dropout' + str(i)] = nn.Dropout(p=dropout) elif i == layer - 1: # final layer/readout layer. d['linear' + str(i)] = Linear(hidden, 10, 0, self.unified) else: # standard middle layer d['linear' + str(i)] = Linear(hidden, hidden, k, self.unified) d['relu' + str(i)] = nn.ReLU() if dropout: d['dropout' + str(i)] = nn.Dropout(p=dropout) return d
def _read_txt_old(path): print('loading plain text model from', path) with open(path, 'rb') as f: content = f.read().split('\n') modules = [] c = 0 line = content[c] while len(line) > 0: if line.startswith( Linear.__name__ ): # @UndefinedVariable import error suppression for PyDev users lineparts = line.split() m = int(lineparts[1]) n = int(lineparts[2]) mod = Linear(m, n) for i in range(m): c += 1 mod.W[i, :] = np.array([ float(val) for val in content[c].split() if len(val) > 0 ]) c += 1 mod.B = np.array([float(val) for val in content[c].split()]) modules.append(mod) elif line.startswith( Rect.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Rect()) elif line.startswith( Tanh.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Tanh()) elif line.startswith( SoftMax.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(SoftMax()) elif line.startswith( BinStep.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(BinStep()) elif line.startswith( NegAbs.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(NegAbs()) else: raise ValueError('Layer type ' + [s for s in line.split() if len(s) > 0][0] + ' not supported by legacy plain text format.') c += 1 line = content[c] return Sequential(modules)
def __init__(self): super(FastSpeech2, self).__init__() self.encoder = Encoder() self.variance_adaptor = VarianceAdaptor() self.decoder = Decoder() self.mel_linear = Linear(hp.decoder_hidden, hp.n_mel_channels) self.postnet = PostNet()
def __init__(self, n_layers, d_k, d_v, d_model, d_ff, n_heads, max_tgt_seq_len, tgt_vocab_size, dropout, weighted_model, share_proj_weight, n_experts=10): super(LMTransformer, self).__init__() self.decoder = Decoder(n_layers, d_k, d_v, d_model, d_ff, n_heads, max_tgt_seq_len, tgt_vocab_size, dropout, weighted_model) self.tgt_proj = Linear(d_model, tgt_vocab_size, bias=False) self.weighted_model = weighted_model self.head = MoShead(tgt_vocab_size, d_model, self.decoder, share_proj_weight, n_experts)
def __init__(self, d_model: int = 512, d_ff: int = 2048, dropout_p: float = 0.3, ffnet_style: str = 'ff') -> None: super(PositionWiseFeedForwardNet, self).__init__() self.ffnet_style = ffnet_style.lower() if self.ffnet_style == 'ff': self.feed_forward = nn.Sequential( Linear(d_model, d_ff), nn.Dropout(dropout_p), nn.ReLU(), Linear(d_ff, d_model), nn.Dropout(dropout_p), ) elif self.ffnet_style == 'conv': self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) self.relu = nn.ReLU() self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) else: raise ValueError("Unsupported mode: {0}".format(self.mode))
def __init__( self, num_classes: int, # number of classfication max_length: int = 150, # a maximum allowed length for the sequence to be processed hidden_dim: int = 1024, # dimension of RNN`s hidden state vector pad_id: int = 0, # pad token`s id sos_id: int = 1, # start of sentence token`s id eos_id: int = 2, # end of sentence token`s id attn_mechanism: str = 'multi-head', # type of attention mechanism num_heads: int = 4, # number of attention heads num_layers: int = 2, # number of RNN layers rnn_type: str = 'lstm', # type of RNN cell dropout_p: float = 0.3, # dropout probability device: str = 'cuda' # device - 'cuda' or 'cpu' ) -> None: super(Speller, self).__init__(hidden_dim, hidden_dim, num_layers, rnn_type, dropout_p, False, device) self.num_classes = num_classes self.num_heads = num_heads self.num_layers = num_layers self.max_length = max_length self.eos_id = eos_id self.sos_id = sos_id self.pad_id = pad_id self.attn_mechanism = attn_mechanism.lower() self.embedding = nn.Embedding(num_classes, hidden_dim) self.input_dropout = nn.Dropout(dropout_p) if self.attn_mechanism == 'loc': self.attention = AddNorm(LocationAwareAttention(hidden_dim, smoothing=True), hidden_dim) elif self.attn_mechanism == 'multi-head': self.attention = AddNorm(MultiHeadAttention(hidden_dim, num_heads), hidden_dim) elif self.attn_mechanism == 'additive': self.attention = AdditiveAttention(hidden_dim) elif self.attn_mechanism == 'scaled-dot': self.attention = AddNorm(ScaledDotProductAttention(hidden_dim), hidden_dim) else: raise ValueError("Unsupported attention: %s".format(attn_mechanism)) self.projection = AddNorm(Linear(hidden_dim, hidden_dim, bias=True), hidden_dim) self.generator = Linear(hidden_dim, num_classes, bias=False)
def __init__(self, n_layers, d_k, d_v, d_model, d_ff, n_heads, max_tgt_seq_len, tgt_vocab_size, dropout, weighted_model, share_proj_weight): super(LMTransformer, self).__init__() self.decoder = Decoder(n_layers, d_k, d_v, d_model, d_ff, n_heads, max_tgt_seq_len, tgt_vocab_size, dropout, weighted_model) self.tgt_proj = Linear(d_model, tgt_vocab_size, bias=False) self.weighted_model = weighted_model if share_proj_weight: print('Sharing target embedding and projection..') self.tgt_proj.weight = self.decoder.tgt_emb.weight
def __init__( self, input_size: int, # size of input num_classes: int, # number of classfication rnn_type='gru', # type of RNN cell num_rnn_layers: int = 5, # number of RNN layers rnn_hidden_dim: int = 512, # dimension of RNN`s hidden state dropout_p: float = 0.1, # dropout probability bidirectional: bool = True, # if True, becomes a bidirectional rnn activation: str = 'hardtanh', # type of activation function device: str = 'cuda' # device - 'cuda' or 'cpu' ): super(DeepSpeech2, self).__init__() self.rnn_layers = list() self.device = device input_size = int(math.floor(input_size + 2 * 20 - 41) / 2 + 1) input_size = int(math.floor(input_size + 2 * 10 - 21) / 2 + 1) input_size <<= 5 rnn_output_size = rnn_hidden_dim << 1 if bidirectional else rnn_hidden_dim self.conv = DeepSpeech2Extractor(activation, mask_conv=True) for idx in range(num_rnn_layers): self.rnn_layers.append( BNReluRNN( input_size=input_size if idx == 0 else rnn_output_size, hidden_dim=rnn_hidden_dim, rnn_type=rnn_type, bidirectional=bidirectional, dropout_p=dropout_p, device=device)) self.fc = nn.Sequential( Linear(rnn_output_size, rnn_hidden_dim), nn.ReLU(), Linear(rnn_hidden_dim, num_classes, bias=False))
def _convert_to_nn(self, svm_model, y_train, x_val): #convert to linear NN print('converting {} model to linear NN'.format( self.__class__.__name__)) W = svm_model.coef_.T B = svm_model.intercept_ if numpy.unique(y_train).size == 2: linear_layer = Linear(W.shape[0], 2) linear_layer.W = numpy.concatenate([-W, W], axis=1) linear_layer.B = numpy.concatenate([-B, B], axis=0) else: linear_layer = Linear(*(W.shape)) linear_layer.W = W linear_layer.B = B svm_model = self.model nn_model = Sequential([Flatten(), linear_layer]) if not self.use_gpu: nn_model.to_numpy() #sanity check model conversion self._sanity_check_model_conversion(svm_model, nn_model, x_val) print('model conversion sanity check passed') return nn_model
def __init__(self, feature_columns, hidden_units, cin_size, dnn_dropout=0, dnn_activation='relu', embed_reg=1e-6, cin_reg=1e-6, w_reg=1e-6): """ xDeepFM :param feature_columns: A list. sparse column feature information. :param hidden_units: A list. a list of dnn hidden units. :param cin_size: A list. a list of the number of CIN layers. :param dnn_dropout: A scalar. dropout of dnn. :param dnn_activation: A string. activation function of dnn. :param embed_reg: A scalar. The regularizer of embedding. :param cin_reg: A scalar. The regularizer of cin. :param w_reg: A scalar. The regularizer of Linear. """ super(xDeepFM, self).__init__() self.sparse_feature_columns = feature_columns self.embed_dim = self.sparse_feature_columns[0]['embed_dim'] self.embed_layers = { 'embed_' + str(i): Embedding(input_dim=feat['feat_num'], input_length=1, output_dim=feat['embed_dim'], embeddings_initializer='random_normal', embeddings_regularizer=l2(embed_reg)) for i, feat in enumerate(self.sparse_feature_columns) } self.index_mapping = [] self.feature_length = 0 for feat in self.sparse_feature_columns: self.index_mapping.append(self.feature_length) self.feature_length += feat['feat_num'] self.linear = Linear(self.feature_length, w_reg) self.cin = CIN(cin_size=cin_size, l2_reg=cin_reg) self.dnn = DNN(hidden_units=hidden_units, dnn_dropout=dnn_dropout, dnn_activation=dnn_activation) self.cin_dense = Dense(1) self.dnn_dense = Dense(1) self.bias = self.add_weight(name='bias', shape=(1, ), initializer=tf.zeros_initializer())
def main(): current_state = STATE_SETUP worker_name = 'worker' if len(sys.argv) > 1: worker_name = sys.argv[1] print('Initializing worker ' + worker_name) while True: answer = socket_adapter.send_message(get_formated_message('setup', current_state), wait_answer=True) if answer['key'] == current_state and answer['code'] == code.CODE_OK: print("Worker successfully registered") else: print("Error on setup | message:{}".format(answer)) learning_parameters = answer['data'] if learning_parameters and answer['code'] == code.CODE_OK: input_size = learning_parameters['input_size'] output_size = learning_parameters['output_size'] eta = learning_parameters['eta'] iterations = learning_parameters['iterations'] break else: print("Waiting for setup data") time.sleep(2) print('Learning parameters are: {}'.format(learning_parameters)) x,y = generate_data(input_size, output_size) X = standardize(x) Y = standardize(y) model = Linear(X.shape[1],Y.shape[1]) optim = LossMSE() trainer = Trainer(model, optim) while True: current_state = STATE_LEARNING print("Waiting to start learning") answer = socket_adapter.send_message(get_formated_message('',current_state), wait_answer=True) if answer['code'] == code.CODE_OK: print("Start learning") break time.sleep(2) cost = trainer.trainGD(X,Y,iterations, eta=eta, update_func=on_params_update) plotCostAndData(model,X,Y,cost, fig_name=worker_name)
def test_Linear(self): np.random.seed(42) torch.manual_seed(42) batch_size, n_in, n_out = 2, 3, 4 for _ in range(100): # layers initialization torch_layer = torch.nn.Linear(n_in, n_out) custom_layer = Linear(n_in, n_out) custom_layer.W = torch_layer.weight.data.numpy() custom_layer.b = torch_layer.bias.data.numpy() layer_input = np.random.uniform( -10, 10, (batch_size, n_in)).astype(np.float32) next_layer_grad = np.random.uniform( -10, 10, (batch_size, n_out)).astype(np.float32) # 1. check layer output custom_layer_output = custom_layer.updateOutput(layer_input) layer_input_var = Variable(torch.from_numpy(layer_input), requires_grad=True) torch_layer_output_var = torch_layer(layer_input_var) self.assertTrue( np.allclose(torch_layer_output_var.data.numpy(), custom_layer_output, atol=1e-6)) # 2. check layer input grad custom_layer_grad = custom_layer.updateGradInput( layer_input, next_layer_grad) torch_layer_output_var.backward(torch.from_numpy(next_layer_grad)) torch_layer_grad_var = layer_input_var.grad self.assertTrue( np.allclose(torch_layer_grad_var.data.numpy(), custom_layer_grad, atol=1e-6)) # 3. check layer parameters grad custom_layer.accGradParameters(layer_input, next_layer_grad) weight_grad = custom_layer.gradW bias_grad = custom_layer.gradb torch_weight_grad = torch_layer.weight.grad.data.numpy() torch_bias_grad = torch_layer.bias.grad.data.numpy() self.assertTrue( np.allclose(torch_weight_grad, weight_grad, atol=1e-6)) self.assertTrue(np.allclose(torch_bias_grad, bias_grad, atol=1e-6))
# normalize inputs train_input = (train_input - train_input.mean(dim=1)[:, None] ) / train_input.std(dim=1)[:, None] test_input = (test_input - test_input.mean(dim=1)[:, None]) / test_input.std(dim=1)[:, None] # In[] # training overallTestAcc = [] overallTrainAcc = [] for eva in range(evaluateIter): # create a model model = sequential(Linear(input_size=2, output_size=25), ReLU(), batchNormalization(batchSize, input_size=25), Linear(input_size=25, output_size=25), ReLU(), batchNormalization(batchSize, input_size=25), Linear(input_size=25, output_size=25), ReLU(), batchNormalization(batchSize, input_size=25), Linear(input_size=25, output_size=2)) # define criterion and optimizer criterion = MSELoss(method='mean') optimizer = SGD(model.parameters(), lr=learningRate) trainLossList = [] trainNumList = [] testLossList = [] testNumList = []
weights = weights[None, :, :] weights.transpose(1,2).shape input = torch.Tensor([[1, 2, 3, 4, 5], [1, 2, 3, 0, 0], [1, 1, 1, 1, 1]]) bias = torch.Tensor([1, 2, 3, 4]) bias.shape ''' input = input[:, :, None] weights.matmul(input).squeeze() + bias''' lin = Linear(5, 4, ReLU()) output = lin.forward(input) target = torch.Tensor([[0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 1, 0]]) d_loss = dloss(output, target) prev_dl_dx = lin.backward(d_loss) prev_dl_dx.shape ex_dloss = torch.Tensor([[.1, .2, .2, .1], [.1, .2, .2, .1], [.1, .2, .2, .1]])
def _read_txt_helper(path): with open(path, 'rb') as f: content = f.read().split('\n') modules = [] c = 0 line = content[c] while len(line) > 0: if line.startswith( Linear.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of linear layer Linear <rows_of_W> <columns_of_W> <flattened weight matrix W> <flattened bias vector> ''' _, m, n = line.split() m = int(m) n = int(n) layer = Linear(m, n) layer.W = np.array([ float(weightstring) for weightstring in content[c + 1].split() if len(weightstring) > 0 ]).reshape((m, n)) layer.B = np.array([ float(weightstring) for weightstring in content[c + 2].split() if len(weightstring) > 0 ]) modules.append(layer) c += 3 # the description of a linear layer spans three lines elif line.startswith( Convolution.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of convolution layer Convolution <rows_of_W> <columns_of_W> <depth_of_W> <number_of_filters_W> <stride_axis_0> <stride_axis_1> <flattened filter block W> <flattened bias vector> ''' _, h, w, d, n, s0, s1 = line.split() h = int(h) w = int(w) d = int(d) n = int(n) s0 = int(s0) s1 = int(s1) layer = Convolution(filtersize=(h, w, d, n), stride=(s0, s1)) layer.W = np.array([ float(weightstring) for weightstring in content[c + 1].split() if len(weightstring) > 0 ]).reshape((h, w, d, n)) layer.B = np.array([ float(weightstring) for weightstring in content[c + 2].split() if len(weightstring) > 0 ]) modules.append(layer) c += 3 #the description of a convolution layer spans three lines elif line.startswith( SumPool.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of sum pooling layer SumPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1> ''' _, h, w, s0, s1 = line.split() h = int(h) w = int(w) s0 = int(s0) s1 = int(s1) layer = SumPool(pool=(h, w), stride=(s0, s1)) modules.append(layer) c += 1 # one line of parameterized layer description elif line.startswith( MaxPool.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of max pooling layer MaxPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1> ''' _, h, w, s0, s1 = line.split() h = int(h) w = int(w) s0 = int(s0) s1 = int(s1) layer = MaxPool(pool=(h, w), stride=(s0, s1)) modules.append(layer) c += 1 # one line of parameterized layer description elif line.startswith( Flatten.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Flatten()) c += 1 #one line of parameterless layer description elif line.startswith( Rect.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Rect()) c += 1 #one line of parameterless layer description elif line.startswith( Tanh.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Tanh()) c += 1 #one line of parameterless layer description elif line.startswith( SoftMax.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(SoftMax()) c += 1 #one line of parameterless layer description else: raise ValueError( 'Layer type identifier' + [s for s in line.split() if len(s) > 0][0] + ' not supported for reading from plain text file') #skip info of previous layers, read in next layer header line = content[c] return Sequential(modules)
# ----- Define the paramters for learning ----- nb_classes = train_labels.shape[0] features = train_features.size(1) nb_samples = train_features.size(0) epsilon = 0.1 eta = .2 #nb_samples is now defined in Sequential() batch_size = config.batch_size epochs = int(config.epochs / (nb_samples / batch_size)) # Zeta is to make it work correctly with Sigma activation function. # train_label = train_label.add(0.125).mul(0.8) # test_label = test_label.add(0.125).mul(0.8) # ----- Implementation of the architecture ----- architecture = Sequential(Linear(2, 25, ReLU()), Linear(25, 25, ReLU()), Linear(25, 25, ReLU()), Linear(25, 2, Sigma())) # ----- Training ----- round = 1 prev_loss = math.inf prev_prev_loss = math.inf errors = [] for epoch in range(epochs): for batch_start in range(0, nb_samples, batch_size): features = train_features[batch_start:batch_start + batch_size, :] labels = train_labels[batch_start:batch_start + batch_size] tr_loss, tr_error = architecture.forward(train_features, train_labels) architecture.backward() architecture.update(eta) loss, error = architecture.forward(test_features, test_labels)
def __init__( self, num_classes: int, # the number of classfication d_model: int = 512, # dimension of model input_dim: int = 80, # dimension of input pad_id: int = 0, # identification of <PAD_token> eos_id: int = 2, # identification of <EOS_token> d_ff: int = 2048, # dimension of feed forward network num_heads: int = 8, # number of attention heads num_encoder_layers: int = 6, # number of encoder layers num_decoder_layers: int = 6, # number of decoder layers dropout_p: float = 0.3, # dropout probability ffnet_style: str = 'ff', # feed forward network style 'ff' or 'conv' extractor: str = 'vgg' # CNN extractor [vgg, ds2] ) -> None: super(SpeechTransformer, self).__init__() assert d_model % num_heads == 0, "d_model % num_heads should be zero." if extractor.lower() == 'vgg': input_dim = (input_dim - 1) << 5 if input_dim % 2 else input_dim << 5 self.conv = nn.Sequential( nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(num_features=64), nn.Hardtanh(0, 20, inplace=True), nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(num_features=64), nn.Hardtanh(0, 20, inplace=True), nn.MaxPool2d(2, stride=2), nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(num_features=128), nn.Hardtanh(0, 20, inplace=True), nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(num_features=128), nn.Hardtanh(0, 20, inplace=True), nn.MaxPool2d(2, stride=2)) elif extractor.lower() == 'ds2': input_dim = int(math.floor(input_dim + 2 * 20 - 41) / 2 + 1) input_dim = int(math.floor(input_dim + 2 * 10 - 21) / 2 + 1) input_dim <<= 5 self.conv = nn.Sequential( nn.Conv2d(1, 32, kernel_size=(41, 11), stride=(2, 2), padding=(20, 5), bias=False), nn.BatchNorm2d(32), nn.Hardtanh(0, 20, inplace=True), nn.Conv2d(32, 32, kernel_size=(21, 11), stride=(2, 1), padding=(10, 5), bias=False), nn.BatchNorm2d(32), nn.Hardtanh(0, 20, inplace=True), ) else: raise ValueError("Unsupported Extractor : {0}".format(extractor)) self.encoder = SpeechTransformerEncoder(d_model=d_model, input_dim=input_dim, d_ff=d_ff, num_layers=num_encoder_layers, num_heads=num_heads, ffnet_style=ffnet_style, dropout_p=dropout_p, pad_id=pad_id) self.decoder = SpeechTransformerDecoder(num_classes=num_classes, d_model=d_model, d_ff=d_ff, num_layers=num_decoder_layers, num_heads=num_heads, ffnet_style=ffnet_style, dropout_p=dropout_p, pad_id=pad_id, eos_id=eos_id) self.eos_id = eos_id self.pad_id = pad_id self.generator = Linear(d_model, num_classes)