def sanityCheck_GeneratorLoss(pretrain_result=None, batch_size=5): '''test custom loss function ''' if pretrain_result is None: x, _, reverse_vocab, _ = read_sampleFile() pretrain_result = pretrain_LSTMCore(x, vocab_size=len(reverse_vocab)) model = pretrain_result[0] y_pred_pretrain = pretrain_result[1].view( [-1, SEQ_LENGTH, len(reverse_vocab)]) test_reward = y_pred_pretrain.sum(dim=2).data params = list(filter(lambda p: p.requires_grad, model.parameters())) optimizer = torch.optim.SGD(params, lr=0.01) optimizer.zero_grad() log = openLog('test.txt') log.write('\n\nTest generator.sanityCheck_GeneratorLoss: {}\n'.format( datetime.now())) criterion = GeneratorLoss() g_loss = criterion(y_pred_pretrain[0:batch_size, :, :], x[0:batch_size, :], test_reward[0:batch_size, :]) g_loss.backward() optimizer.step() log.write(' generator.sanityCheck_GeneratorLoss SUCCESSFUL: ' + str(g_loss) + '\n') log.close() return g_loss
def pretrain_generator(x,start_token,end_token,ignored_tokens=None, sentence_lengths=None,batch_size=1,vocab_size=10): pretrain_result = pretrain_LSTMCore(train_x=x, sentence_lengths=sentence_lengths, batch_size=batch_size, end_token=end_token, vocab_size=vocab_size) generator = Generator(pretrain_model=pretrain_result[0], start_token=start_token, ignored_tokens=ignored_tokens) generator.to(DEVICE) return generator
def __init__(self, pretrain_model=None, start_token=0, ignored_tokens=None): super().__init__() self.start_token = start_token self.ignored_tokens = ignored_tokens if pretrain_model is None: x, _, reverse_vocab, _ = read_sampleFile() self.pretrain_model, _ = pretrain_LSTMCore(train_x=x, vocab_size=len(reverse_vocab)) else: self.pretrain_model = pretrain_model self.softmax = nn.Softmax(dim=2) self.loss = GeneratorLoss()
def sanityCheck_generator(model=None): ''' test Generator instantiation and train_generator function ''' log = openLog('test.txt') log.write('\n\nTest generator.sanityCheck_generator: {}\n'.format(datetime.now())) x, vocabulary, reverse_vocab, _ = read_sampleFile() if model is None: pretrain_result = pretrain_LSTMCore(x,vocab_size=len(vocabulary)) model = Generator(pretrain_model=pretrain_result[0]) log.write(' generator instantiated: {}\n'.format(datetime.now())) model.to(DEVICE) model, y_prob_all, y_output_all = train_generator(model, x, reward=None) log.write(' trained generator outputs:\n') log.write(' y_output_all shape: '+ str(y_output_all.shape) +'\n') log.write(' y_prob_all shape: '+ str(y_prob_all.shape) +'\n') log.close() return model, y_prob_all, y_output_all
def pretrain_generator(x, start_token, end_token, ignored_tokens=None, sentence_lengths=None, batch_size=1, vocab_size=10): pretrain_result = pretrain_LSTMCore(train_x=x, sentence_lengths=sentence_lengths, batch_size=batch_size, end_token=end_token, vocab_size=vocab_size) generator = Generator(pretrain_model=pretrain_result[0], start_token=start_token, ignored_tokens=ignored_tokens) # generator is not DataParallel. the lstmCore inside is. # if generator is also DataParallel, when it calls lstmCore it invokes the # error message "RuntimeError: all tensors must be on devices[0]" # because the generator instance may not be on devices[0]. generator.to(DEVICE) return generator