def test_load_weight(self): # state_dict の実体は OrderedDict # キーがパラメータ名で値が PyTorch のテンソル state_dict = torch.load('./weights/gru_sequential_mnist_sample.dict') # 含まれているネットワークの重み weights_expected = [ ('gru.weight_ih_l0', [384, 1]), ('gru.weight_hh_l0', [384, 128]), ('gru.bias_ih_l0', [384]), ('gru.bias_hh_l0', [384]), ('linear.weight', [10, 128]), ('linear.bias', [10]), ] for i, (key, value) in enumerate(state_dict.items()): assert key == weights_expected[i][0] assert list(value.size()) == weights_expected[i][1] # 整合性が合うモデルにならロードできる torch.manual_seed(0) model = GRU( input_size=1, # Sequential MNIST タスクなら入力は1次元 output_size=10, # Sequential MNIST タスクなら出力は10次元 num_layers=1, # GRU ブロックの積み重ね数 d_hidden=128) # GRU ブロックの出力次元数(隠れ状態の次元数) # ロード前後で最初の重みが変わることを確認 assert model.gru.weight_ih_l0.data[0, 0].item() == approx(-0.0006617, rel=1e-3) model.load_state_dict(state_dict) assert model.gru.weight_ih_l0.data[0, 0].item() == approx(0.2248, rel=1e-3)
def test_state_dict_note(self): # モデルの重みにメモをかきこんだら怒られるのだろうか model = GRU( input_size=1, # Sequential MNIST タスクなら入力は1次元 output_size=10, # Sequential MNIST タスクなら出力は10次元 num_layers=1, # GRU ブロックの積み重ね数 d_hidden=128) # GRU ブロックの出力次元数(隠れ状態の次元数) model.load_state_dict( torch.load('./weights/gru_sequential_mnist_sample.dict')) # state_dict をとった後 clone して参照を切る state_dict = model.state_dict() for k, v in state_dict.items(): state_dict[k] = v.clone() # メモする state_dict['loss'] = 10.0 state_dict['accuracy'] = 0.97 state_dict['note'] = 'hoge' # メモ付きの重みを普通にモデルに流し込もうとすると怒られる with pytest.raises(RuntimeError): model.load_state_dict(state_dict) # strict=False にしておけばOK # https://github.com/pytorch/pytorch/blob/v1.7.1/torch/nn/modules/module.py#L1010-L1012 model.gru.weight_ih_l0.data[0, 0] = 1.0 # ここをわざと変更しておいて流し込まれたことを確認 model.load_state_dict(state_dict, strict=False) assert model.gru.weight_ih_l0.data[0, 0].item() == approx(0.2248, rel=1e-3)
def test_vanilla_gru(self): gru = GRU(8000, 100) assert gru.X[0].dtype == "float64" assert gru.X[1].dtype == "float64" assert gru.X[2].dtype == "float64" assert gru.Y[0].dtype == "float64" assert gru.Y[1].dtype == "float64" assert gru.Y[2].dtype == "float64" input_data = np.arange(8000) t0 = time.time() gru.forward_propagation(input_data) tt = time.time() - t0 print("\nGRU forward propagation %s sec\n" % str(tt))
def main(arch='gru', id='hoge', weight_dict=None, epochs=10, permute=None): batch_size = 64 train_loader, test_loader = MNIST(batch_size=batch_size, sequential=(arch == 'tcn'), sequential_rnn=(arch != 'tcn'), permute=permute) if arch == 'tcn': model = TCN(input_size=1, output_size=10, num_channels=[25]*8, kernel_size=7, dropout=0.0) optimizer = optim.Adam(model.parameters(), lr=2e-3) elif arch == 'gru': model = GRU(input_size=1, output_size=10, num_layers=1, d_hidden=128, initial_update_gate_bias=0.5, dropout=0.0) optimizer = optim.RMSprop(model.parameters(), lr=1e-3) if weight_dict is not None: model.load_state_dict(torch.load(weight_dict)) for epoch in range(epochs): print(f'エポック {epoch}') train(model, optimizer, train_loader) test(model, test_loader) torch.save(model.state_dict(), f'./weights/{arch}_sequential_mnist_{id}_{epoch}.dict') test(model, test_loader)
def test_make_gru(dim_in=31, dim_h=11, dim_out=None, i_net=None, a_net=None, o_net=None, c_net=None): print 'Testing GRU formation' if i_net is None: i_net = dict( dim_h=17, n_layers=2, h_act='T.tanh', weight_scale=0.1, ) if a_net is None: a_net = dict( dim_h=19, n_layers=2, h_act='T.tanh', weight_scale=0.1 ) if o_net is None: o_net = dict( dim_h=23, n_layers=2, weight_scale=0.1, distribution='binomial' ) nets = dict(i_net=i_net, a_net=a_net, o_net=o_net, c_net=c_net) trng = RandomStreams(101) rnn = GRU.factory(dim_in=dim_in, dim_hs=[dim_h], dim_out=dim_out, **nets) rnn.set_tparams() print 'GRU formed correctly' return rnn
def test_make_gru(dim_in=31, dim_h=11, dim_out=None, i_net=None, a_net=None, o_net=None, c_net=None): print 'Testing GRU formation' if i_net is None: i_net = dict( dim_h=17, n_layers=2, h_act='T.tanh', weight_scale=0.1, ) if a_net is None: a_net = dict(dim_h=19, n_layers=2, h_act='T.tanh', weight_scale=0.1) if o_net is None: o_net = dict(dim_h=23, n_layers=2, weight_scale=0.1, distribution='binomial') nets = dict(i_net=i_net, a_net=a_net, o_net=o_net, c_net=c_net) trng = RandomStreams(101) rnn = GRU.factory(dim_in=dim_in, dim_hs=[dim_h], dim_out=dim_out, **nets) rnn.set_tparams() print 'GRU formed correctly' return rnn
def test_state_dict_ref(self): # モデルの state_dict をとった後モデルのパラメータを更新すると state_dict も連動する torch.manual_seed(0) model = GRU(input_size=1, output_size=4, num_layers=1, d_hidden=8) state_dict = model.state_dict() assert state_dict['gru.weight_ih_l0'][0, 0].item() == approx(-0.002647, rel=1e-3) model.gru.weight_ih_l0.data[0, 0] = 1.0 assert state_dict['gru.weight_ih_l0'][0, 0].item() == approx(1.0, rel=1e-3) # clone すれば参照は切れる state_dict['gru.weight_ih_l0'] = state_dict['gru.weight_ih_l0'].clone() model.gru.weight_ih_l0.data[0, 0] = 2.0 assert state_dict['gru.weight_ih_l0'][0, 0].item() == approx(1.0, rel=1e-3)
def test(self): model = GRU(input_size=1, output_size=8, num_layers=2, d_hidden=64) assert list(model.gru.weight_ih_l0.size()) == [192, 1] assert list(model.gru.weight_hh_l0.size()) == [192, 64] assert list(model.gru.bias_ih_l0.size()) == [192] assert list(model.gru.bias_hh_l0.size()) == [192] assert list(model.gru.weight_ih_l1.size()) == [192, 64] assert list(model.gru.weight_hh_l1.size()) == [192, 64] assert list(model.gru.bias_ih_l1.size()) == [192] assert list(model.gru.bias_hh_l1.size()) == [192] assert list(model.linear.weight.size()) == [8, 64] assert list(model.linear.bias.size()) == [8] n = 192 n += 192 * 64 n += 192 n += 192 n += 192 * 64 n += 192 * 64 n += 192 n += 192 n += 8 * 64 n += 8 assert count_parameters(model) == n
def load(vocab_size, model_name='gru', **kwargs): if model_name == 'gru': model = GRU(vocab_size, **kwargs) else: raise ValueError('Unknown model: %s' % model_name) return model
def train(self, triplet=True): # generate dataset for PyTorch from data.dataset import TruncatedInputFromMFB, ToTensor, SpeakerDataset from torchvision import transforms from torch.utils.data import DataLoader import torch transform = transforms.Compose([TruncatedInputFromMFB(), ToTensor()]) if Config.MODEL_TYPE == 'cnn3d': from data.dataset3d import SpeakerDataset3D initial_dataset = SpeakerDataset(transform=transform) train_dataset = SpeakerDataset3D(initial_dataset) else: train_dataset = SpeakerDataset(transform=transform) # instantiate a model if Config.MODEL_TYPE == 'rescnn': from models.rescnn import ResNet model_ = ResNet(layers=Config.RESCNN_LAYERS, num_classes=Config.NUM_CLASSES) elif Config.MODEL_TYPE == 'gru': from models.gru import GRU model_ = GRU(layers=Config.GRU_LAYERS, num_classes=Config.NUM_CLASSES) elif Config.MODEL_TYPE == 'cnn3d': from models.cnn3d import CNN3D model_ = CNN3D(num_classes=Config.NUM_CLASSES) from utils.train import Trainer model_ = model_.cuda() epoch = Config.SOFTMAX_TRAINING_EPOCH for i in range(epoch): optimizer = torch.optim.Adam(model_.parameters()) train_loader = DataLoader(train_dataset, batch_size=Config.PRETRAIN_BATCH_SIZE, shuffle=True) Trainer.train(train_loader, model_, optimizer, i) if triplet: from copy import deepcopy model_tri = deepcopy(model_) model_tri = model_tri.cuda() epoch_ = Config.TRIPLET_TRAINING_EPOCH for i in range(epoch_): optimizer_ = torch.optim.SGD(model_tri.parameters(), lr=Config.TRIPLET_LR - i * Config.TRIPLET_LR_DECAY, momentum=Config.TRIPLET_MOMENTUM) train_loader = DataLoader( train_dataset, batch_size=Config.FINETUNE_BATCH_SIZE, shuffle=True) Trainer.train_tri(train_loader, model_tri, optimizer_, i, semi_hard=True, triplet_margin=Config.TRIPLET_MARGIN)
def test_make_gru(dim_in=31, dim_h=11, dim_out=None, i_net=None, a_net=None, o_net=None, c_net=None, out_act='T.nnet.sigmoid'): print 'Testing GRU formation' if i_net is None: i_net = dict( dim_h=17, n_layers=2, h_act='T.tanh', out_act='T.tanh', weight_scale=0.1, ) if a_net is None: a_net = dict(dim_h=19, n_layers=2, h_act='T.tanh', out_act='T.tanh', weight_scale=0.1) if o_net is None: o_net = dict(dim_h=23, n_layers=2, weight_scale=0.1, out_act=out_act) nets = dict(i_net=i_net, a_net=a_net, o_net=o_net, c_net=c_net) trng = RandomStreams(101) mlps = GRU.mlp_factory(dim_in, dim_h, dim_out=dim_out, **nets) rnn = GRU(dim_in, dim_h, dim_out=dim_out, trng=trng, **mlps) rnn.set_tparams() print 'GRU formed correctly' return rnn
def export_onnx(): if Config.MODEL_TYPE == 'rescnn': from models.rescnn import ResNet model_ = ResNet(layers=Config.RESCNN_LAYERS, num_classes=Config.NUM_CLASSES) model_.load_state_dict(torch.load('./checkpoints/...')) # dummy_input = Variable(torch.randn(1, 3, 64, 64)) elif Config.MODEL_TYPE == 'gru': from models.gru import GRU model_ = GRU(layers=Config.GRU_LAYERS, num_classes=Config.NUM_CLASSES) dummy_input = Variable(torch.randn(1, 3, 64, 64)) elif Config.MODEL_TYPE == 'cnn3d': from models.cnn3d import CNN3D model_ = CNN3D(num_classes=Config.NUM_CLASSES) dummy_input = Variable(torch.randn(1, 3, 20, 80, 40)) model_name = Config.MODEL_TYPE + '.onnx' torch.onnx.export(model_, dummy_input, os.path.join('./checkpoints', model_name))
if __name__ == "__main__": print(model, field) database = Database() batch_size = 40 dataloader = data.DataLoader(database.train_set, batch_size=batch_size, shuffle=True, drop_last=True) if model == "cnn": encoder = CNN(300, 200, 3) else: encoder = GRU(300, 200) classifier = FFNN(200, 50, 8) learning_rate = 1e-3 n_epochs = 10 optimizer_encoder = torch.optim.Adam(encoder.parameters(), lr=learning_rate, weight_decay=1e-4) optimizer_classifier = torch.optim.Adam(classifier.parameters(), lr=learning_rate) train_errors = [] dev_errors = [] test_errors = []