Ejemplo n.º 1
0
 def test_dropout_WITH_PROB_ZERO(self):
     rnn = EncoderRNN(self.vocab_size, 10, 50, 16, dropout_p=0)
     for param in rnn.parameters():
         param.data.uniform_(-1, 1)
     output1, _ = rnn(self.input_var, self.lengths)
     output2, _ = rnn(self.input_var, self.lengths)
     self.assertTrue(torch.equal(output1.data, output2.data))
Ejemplo n.º 2
0
    def test_input_dropout_WITH_NON_ZERO_PROB(self):
        rnn = EncoderRNN(self.vocab_size, 10, 50, 16, input_dropout_p=0.5)
        for param in rnn.parameters():
            param.data.uniform_(-1, 1)

        equal = True
        for _ in range(50):
            output1, _ = rnn(self.input_var, self.lengths)
            output2, _ = rnn(self.input_var, self.lengths)
            if not torch.equal(output1.data, output2.data):
                equal = False
                break
        self.assertFalse(equal)
Ejemplo n.º 3
0
    def test_dropout_WITH_NON_ZERO_PROB(self):
        # It's critical to set n_layer=2 here since dropout won't work
        # when the RNN only has one layer according to pytorch's doc
        rnn = EncoderRNN(self.vocab_size, 10, 50, 16, n_layers=2, dropout_p=0.5)
        for param in rnn.parameters():
            param.data.uniform_(-1, 1)

        equal = True
        for _ in range(50):
            output1, _ = rnn(self.input_var, self.lengths)
            output2, _ = rnn(self.input_var, self.lengths)
            if not torch.equal(output1.data, output2.data):
                equal = False
                break
        self.assertFalse(equal)
Ejemplo n.º 4
0
    def setUp(self):
        test_path = os.path.dirname(os.path.realpath(__file__))
        src = SourceField()
        tgt = TargetField()
        self.dataset = torchtext.data.TabularDataset(
            path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv',
            fields=[('src', src), ('tgt', tgt)],
        )
        src.build_vocab(self.dataset)
        tgt.build_vocab(self.dataset)

        encoder = EncoderRNN(len(src.vocab), 10, 10, 10, rnn_cell='lstm')
        decoder = DecoderRNN(len(tgt.vocab), 10, 10,
                             tgt.sos_id, tgt.eos_id, rnn_cell='lstm')
        self.seq2seq = Seq2seq(encoder, decoder)

        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
Ejemplo n.º 5
0
    def setUpClass(self):
        test_path = os.path.dirname(os.path.realpath(__file__))
        src = SourceField()
        trg = TargetField()
        dataset = torchtext.data.TabularDataset(
            path=os.path.join(test_path, 'data/eng-fra.txt'),
            format='tsv',
            fields=[('src', src), ('trg', trg)],
        )
        src.build_vocab(dataset)
        trg.build_vocab(dataset)

        encoder = EncoderRNN(len(src.vocab), 5, 10, 10, rnn_cell='lstm')
        decoder = DecoderRNN(len(trg.vocab),
                             10,
                             10,
                             trg.sos_id,
                             trg.eos_id,
                             rnn_cell='lstm')
        seq2seq = Seq2seq(encoder, decoder)
        self.predictor = Predictor(seq2seq, src.vocab, trg.vocab)
Ejemplo n.º 6
0
    def __init__(self, opt, shared=None):
        """Set up model if shared params not set, otherwise no work to do."""
        super().__init__(opt, shared)
        opt = self.opt  # there is a deepcopy in the init

        # all instances may need some params
        self.truncate = opt['truncate'] if opt['truncate'] > 0 else None
        self.metrics = {'loss': 0, 'num_tokens': 0}
        self.history = {}
        self.batch_idx = shared and shared.get('batchindex') or 0
        self.states = {}

        # check for cuda
        self.use_cuda = not opt.get('no_cuda') and torch.cuda.is_available()

        if shared:
            # set up shared properties
            self.dict = shared['dict']
            self.START_IDX = shared['START_IDX']
            self.END_IDX = shared['END_IDX']
            self.NULL_IDX = shared['NULL_IDX']
            # answers contains a batch_size list of the last answer produced
            self.answers = shared['answers']

            if 'model' in shared:
                # model is shared during hogwild
                self.model = shared['model']
        else:
            # this is not a shared instance of this class, so do full init
            # answers contains a batch_size list of the last answer produced
            self.answers = [None] * opt['batchsize']

            if self.use_cuda:
                torch.cuda.set_device(opt['gpu'])

            # check first for 'init_model' for loading model from file
            if opt.get('init_model') and os.path.isfile(opt['init_model']):
                init_model = opt['init_model']
            # next check for 'model_file'
            elif opt.get('model_file') and os.path.isfile(opt['model_file']):
                init_model = opt['model_file']
            else:
                init_model = None

            if init_model is not None:
                # load model parameters if available
                print('Loading existing model params from ' + init_model)
                new_opt, self.states = self.load(init_model)
                # override model-specific options with stored ones
                opt = self.override_opt(new_opt)

            if opt['dict_file'] is None:
                if init_model is not None and os.path.isfile(init_model + '.dict'):
                    # check first to see if a dictionary exists
                    opt['dict_file'] = init_model + '.dict'
                elif opt.get('model_file'):
                    # otherwise, set default dict-file if it is not set
                    opt['dict_file'] = opt['model_file'] + '.dict'

            # load dictionary and basic tokens & vectors
            self.dict = DictionaryAgent(opt)
            self.id = 'Seq2Seq'
            # we use START markers to start our output
            self.START_IDX = self.dict[self.dict.start_token]
            # we use END markers to end our output
            self.END_IDX = self.dict[self.dict.end_token]
            # get index of null token from dictionary (probably 0)
            self.NULL_IDX = self.dict[self.dict.null_token]

            encoder = EncoderRNN(
                len(self.dict),
                opt['maxlength_in'],
                opt['hiddensize'],
                dropout_p=opt['dropout'],
                input_dropout_p=opt['dropout'],
                n_layers=opt['numlayers'],
                rnn_cell=opt['rnncell'],
                bidirectional=opt['bidirectional'],
                variable_lengths=True,
            )
            decoder = DecoderRNN(
                len(self.dict),
                opt['maxlength_out'],
                opt['hiddensize'] * 2 if opt['bidirectional'] else opt['hiddensize'],
                dropout_p=opt['dropout'],
                input_dropout_p=opt['dropout'],
                n_layers=opt['numlayers'],
                rnn_cell=opt['rnncell'],
                bidirectional=opt['bidirectional'],
                sos_id=self.START_IDX,
                eos_id=self.END_IDX,
                use_attention=opt['attention'],
            )
            self.model = Seq2seq(encoder, decoder)

            if self.states:
                # set loaded states if applicable
                self.model.load_state_dict(self.states['model'])

            if self.use_cuda:
                self.model.cuda()

        # set up criteria
        self.criterion = nn.NLLLoss(ignore_index=self.NULL_IDX, size_average=False)
        if self.use_cuda:
            self.criterion.cuda()

        if 'train' in opt.get('datatype', ''):
            # if model was built, do more setup
            self.clip = opt['gradient_clip']

            # set up tensors once
            self.START = torch.LongTensor([self.START_IDX])

            if self.use_cuda:
                # push to cuda
                self.START = self.START.cuda()

            # set up optimizer
            lr = opt['learningrate']
            optim_class = IbmSeq2seqAgent.OPTIM_OPTS[opt['optimizer']]
            kwargs = {'lr': lr}
            if opt['optimizer'] == 'sgd':
                kwargs['momentum'] = 0.95
                kwargs['nesterov'] = True

            self.optimizer = optim_class(
                [p for p in self.model.parameters() if p.requires_grad], **kwargs
            )
            if self.states:
                if self.states['optimizer_type'] != opt['optimizer']:
                    print(
                        'WARNING: not loading optim state since optim class ' 'changed.'
                    )
                else:
                    self.optimizer.load_state_dict(self.states['optimizer'])
            self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                self.optimizer, 'min', factor=0.5, patience=3, verbose=True
            )

        self.reset()
        loss = WFMSELoss(delay=36,
                         loss_weights=((0.25, 0.75), (0.3, 0.5, 0.2)))
    else:
        loss = nn.MSELoss()

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        input_dim = 38 + 10
        hidden_dim = 128
        bidirectional = False
        encoder = EncoderRNN(input_seq_len=seqence_len,
                             input_dim=input_dim,
                             hidden_dim=hidden_dim,
                             bidirectional=bidirectional,
                             n_layers=opt.rnn_layers,
                             rnn_cell=opt.rnn_cell_type,
                             dropout_p=opt.rnn_dropout)
        decoder = DecoderRNN(input_seq_len=seqence_len,
                             output_seq_len=delay,
                             output_dim=output_dim,
                             hidden_dim=hidden_dim *
                             2 if bidirectional else hidden_dim,
                             dropout_p=opt.rnn_dropout,
                             bidirectional=bidirectional,
                             n_layers=opt.rnn_layers,
                             rnn_cell=opt.rnn_cell_type,
                             use_attention=opt.use_attention)
        seq2seq = Seq2Seq(encoder, decoder,
                          decode_function=torch.tanh).to(device)