def load_model(file, cuda):
    with open(file) as f:
        config = json.load(f)

    model_type = config['model_type']

    if model_type == 'wgan-gp':
        model = build_standard_wgan_gp(config, cuda)

    elif model_type == 'ae':
        model = build_standard_ae(config, cuda)

    elif model_type == 'vae':
        model = build_standard_vae(config, cuda)

    elif model_type == 'labeled_gan':
        model = build_labeled_gan(config, cuda)

    elif model_type == 'encoder':
        model = Encoder(config)

    elif model_type == 'kl-encoder':
        model = KLEncoder(config)

    elif model_type == 'decoder':
        model = Decoder(config)

    elif model_type == 'dual-encoder':
        model = DualEncoder(config)

    elif model_type == 'img2img-decoder':
        model = ImgToImgDecoder(config)

    elif model_type == 'img2img-gan':
        model = build_img2img_gan(config, cuda)

    else:
        assert False, f"Unknown model type '{model_type}'!"

    if cuda:
        model.cuda()

    return model
    numeric_attr.max() - numeric_attr.min())

## merge cat and num atts
# merge categorical and numeric subsets
ori_subset_transformed = pd.concat(
    [ori_dataset_categ_transformed, ori_dataset_numeric_attr], axis=1)

## ADVERSARIAL NEURAL NETWORK IMPLEMENTATION

# Encoder/Generator network instantiation
# init training network classes / architectures
encoder_train = Encoder(input_size=ori_subset_transformed.shape[1],
                        hidden_size=[256, 64, 16, 4, 2])
# push to cuda if cudnn is available
if (torch.backends.cudnn.version() != None and USE_CUDA == True):
    encoder_train = encoder_train.cuda()
# print the initialized architectures
now = datetime.utcnow().strftime("%Y%m%d-%H:%M:%S")
print('[LOG {}] encoder-generator architecture:\n\n{}\n'.format(
    now, encoder_train))

# Decoder network instantiation
# init training network classes / architectures
decoder_train = Decoder(output_size=ori_subset_transformed.shape[1],
                        hidden_size=[2, 4, 16, 64, 256])
# push to cuda if cudnn is available
if (torch.backends.cudnn.version() != None) and (USE_CUDA == True):
    decoder_train = decoder_train.cuda()
# print the initialized architectures
now = datetime.utcnow().strftime("%Y%m%d-%H:%M:%S")
print('[LOG {}] decoder architecture:\n\n{}\n'.format(now, decoder_train))
    data = dataloader.gen_data()
    vocab_size = vocab.index
    hidden_dim = 512

    learning_rate = 1e-3
    embedding_dim = 512

    # Initializing Encoder and Decoder Network passing appropriate arguments
    encoder = Encoder()
    decoder = DecoderRNN(embedding_dim=embedding_dim,
                         hidden_dim=hidden_dim,
                         vocab_size=vocab_size)

    # Converting tensors into cuda based tensors if available
    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    # Concatenating the parameters of Encoder and Decoder Network into one
    params = list(encoder.linear.parameters()) + list(decoder.parameters())

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(params=params, lr=learning_rate)

    num_epoch = 1000
    save_every = 10

    print('-' * 100)
    print('Starting training network')
    print('-' * 100)
encoder_bytes = urllib.request.urlopen(encoder_model_name)
decoder_bytes = urllib.request.urlopen(decoder_model_name)

# Load tensor from io.BytesIO object
encoder_buffer = io.BytesIO(encoder_bytes.read())
decoder_buffer = io.BytesIO(decoder_bytes.read())

# init training network classes / architectures
encoder_eval = Encoder(input_size=ori_subset_transformed.shape[1],
                       hidden_size=[256, 64, 16, 4, 2])
decoder_eval = Decoder(output_size=ori_subset_transformed.shape[1],
                       hidden_size=[2, 4, 16, 64, 256])

# push to cuda if cudnn is available
if (torch.backends.cudnn.version() != None) and (USE_CUDA == True):
    encoder_eval = encoder_eval.cuda()
    decoder_eval = decoder_eval.cuda()

# load trained models
# since the model was trained on a gpu and will be restored in a cpu we need to provide: map_location = 'cpu'
encoder_eval.load_state_dict(torch.load(encoder_buffer, map_location='cpu'))
decoder_eval.load_state_dict(torch.load(decoder_buffer, map_location='cpu'))

## specify a dataloader that provides the ability to evaluate the journal entrie in an "unshuffled" batch-wise manner:
# convert pre-processed data to pytorch tensor
torch_dataset = torch.from_numpy(ori_subset_transformed.values).float()

# convert to pytorch tensor - none cuda enabled
dataloader_eval = DataLoader(torch_dataset,
                             batch_size=mini_batch_size,
                             shuffle=False,
Beispiel #5
0
class Mem2SeqRunner(ExperimentRunnerBase):
    def __init__(self, args):
        super(Mem2SeqRunner, self).__init__(args)

        # Model parameters
        self.gru_size = 128
        self.emb_size = 128
        #TODO: Try hops 4 with task 3
        self.hops = 3
        self.dropout = 0.2

        self.encoder = Encoder(self.hops, self.nwords, self.gru_size)
        self.decoder = Decoder(self.emb_size, self.hops, self.gru_size,
                               self.nwords)

        self.optim_enc = torch.optim.Adam(self.encoder.parameters(), lr=0.001)
        self.optim_dec = torch.optim.Adam(self.decoder.parameters(), lr=0.001)
        if self.loss_weighting:
            self.optim_loss_weights = torch.optim.Adam([self.loss_weights],
                                                       lr=0.0001)
        self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optim_dec,
                                                        mode='max',
                                                        factor=0.5,
                                                        patience=1,
                                                        min_lr=0.0001,
                                                        verbose=True)

        if self.use_cuda:
            self.cross_entropy = self.cross_entropy.cuda()
            self.encoder = self.encoder.cuda()
            self.decoder = self.decoder.cuda()
            if self.loss_weighting:
                self.loss_weights = self.loss_weights.cuda()

    def train_batch_wrapper(self, batch, new_epoch, clip_grads):
        context = batch[0].transpose(0, 1)
        responses = batch[1].transpose(0, 1)
        index = batch[2].transpose(0, 1)
        sentinel = batch[3].transpose(0, 1)
        context_lengths = batch[4]
        target_lengths = batch[5]
        return self.train_batch(context, responses, index, sentinel, new_epoch,
                                context_lengths, target_lengths, clip_grads)

    def train_batch(self, context, responses, index, sentinel, new_epoch,
                    context_lengths, target_lengths, clip_grads):

        # (TODO): remove transpose
        if new_epoch:  # (TODO): Change this part
            self.loss = 0
            self.ploss = 0
            self.vloss = 0
            self.n = 1

        context = context.type(self.TYPE)
        responses = responses.type(self.TYPE)
        index = index.type(self.TYPE)
        sentinel = sentinel.type(self.TYPE)

        self.optim_enc.zero_grad()
        self.optim_dec.zero_grad()
        if self.loss_weighting:
            self.optim_loss_weights.zero_grad()

        h = self.encoder(context.transpose(0, 1))
        self.decoder.load_memory(context.transpose(0, 1))
        y = torch.from_numpy(np.array([2] * context.size(1),
                                      dtype=int)).type(self.TYPE)
        y_len = 0

        h = h.unsqueeze(0)
        output_vocab = torch.zeros(max(target_lengths), context.size(1),
                                   self.nwords)
        output_ptr = torch.zeros(max(target_lengths), context.size(1),
                                 context.size(0))
        if self.use_cuda:
            output_vocab = output_vocab.cuda()
            output_ptr = output_ptr.cuda()
        while y_len < responses.size(0):  # TODO: Add EOS condition
            p_ptr, p_vocab, h = self.decoder(context, y, h)
            output_vocab[y_len] = p_vocab
            output_ptr[y_len] = p_ptr
            #TODO: Add teqacher forcing ratio
            y = responses[y_len].type(self.TYPE)
            y_len += 1

        # print(loss)
        mask_v = torch.ones(output_vocab.size())
        mask_p = torch.ones(output_ptr.size())
        if self.use_cuda:
            mask_p = mask_p.cuda()
            mask_v = mask_v.cuda()
        for i in range(responses.size(1)):
            mask_v[target_lengths[i]:, i, :] = 0
            mask_p[target_lengths[i]:, i, :] = 0

        loss_v = self.cross_entropy(
            output_vocab.contiguous().view(-1, self.nwords),
            responses.contiguous().view(-1))

        loss_ptr = self.cross_entropy(
            output_ptr.contiguous().view(-1, context.size(0)),
            index.contiguous().view(-1))
        if self.loss_weighting:
            loss = loss_ptr/(2*self.loss_weights[0]*self.loss_weights[0]) + loss_v/(2*self.loss_weights[1]*self.loss_weights[1]) + \
               torch.log(self.loss_weights[0] * self.loss_weights[1])
            loss_ptr = loss_ptr / (2 * self.loss_weights[0] *
                                   self.loss_weights[0])
            loss_v = loss_v / (2 * self.loss_weights[1] * self.loss_weights[1])
        else:
            loss = loss_ptr + loss_v
        loss.backward()
        ec = torch.nn.utils.clip_grad_norm_(self.encoder.parameters(), 10.0)
        dc = torch.nn.utils.clip_grad_norm_(self.decoder.parameters(), 10.0)
        self.optim_enc.step()
        self.optim_dec.step()
        if self.loss_weighting:
            self.optim_loss_weights.step()

        self.loss += loss.item()
        self.vloss += loss_v.item()
        self.ploss += loss_ptr.item()

        return loss.item(), loss_v.item(), loss_ptr.item()

    def evaluate_batch(self,
                       batch_size,
                       input_batches,
                       input_lengths,
                       target_batches,
                       target_lengths,
                       target_index,
                       target_gate,
                       src_plain,
                       profile_memory=None):

        # Set to not-training mode to disable dropout
        self.encoder.train(False)
        self.decoder.train(False)
        # Run words through encoder
        decoder_hidden = self.encoder(input_batches.transpose(0,
                                                              1)).unsqueeze(0)
        self.decoder.load_memory(input_batches.transpose(0, 1))

        # Prepare input and output variables
        decoder_input = Variable(torch.LongTensor([2] * batch_size))

        decoded_words = []
        all_decoder_outputs_vocab = Variable(
            torch.zeros(max(target_lengths), batch_size, self.nwords))
        all_decoder_outputs_ptr = Variable(
            torch.zeros(max(target_lengths), batch_size,
                        input_batches.size(0)))
        # all_decoder_outputs_gate = Variable(torch.zeros(self.max_r, batch_size))
        # Move new Variables to CUDA

        if self.use_cuda:
            all_decoder_outputs_vocab = all_decoder_outputs_vocab.cuda()
            all_decoder_outputs_ptr = all_decoder_outputs_ptr.cuda()
            # all_decoder_outputs_gate = all_decoder_outputs_gate.cuda()
            decoder_input = decoder_input.cuda()

        p = []
        for elm in src_plain:
            elm_temp = [word_triple[0] for word_triple in elm]
            p.append(elm_temp)

        self.from_whichs = []
        acc_gate, acc_ptr, acc_vac = 0.0, 0.0, 0.0
        # Run through decoder one time step at a time
        for t in range(max(target_lengths)):
            decoder_ptr, decoder_vacab, decoder_hidden = self.decoder(
                input_batches, decoder_input, decoder_hidden)
            all_decoder_outputs_vocab[t] = decoder_vacab
            topv, topvi = decoder_vacab.data.topk(1)
            all_decoder_outputs_ptr[t] = decoder_ptr
            topp, toppi = decoder_ptr.data.topk(1)
            top_ptr_i = torch.gather(input_batches[:, :, 0], 0,
                                     Variable(toppi.view(1,
                                                         -1))).transpose(0, 1)
            next_in = [
                top_ptr_i[i].item() if
                (toppi[i].item() < input_lengths[i] - 1) else topvi[i].item()
                for i in range(batch_size)
            ]
            # if next_in in self.kb_entry.keys():
            #     ptr_distr.append([next_in, decoder_vacab.data])

            decoder_input = Variable(
                torch.LongTensor(next_in))  # Chosen word is next input
            if self.use_cuda: decoder_input = decoder_input.cuda()

            temp = []
            from_which = []
            for i in range(batch_size):
                if (toppi[i].item() < len(p[i]) - 1):
                    temp.append(p[i][toppi[i].item()])
                    from_which.append('p')
                else:
                    if target_index[t][i] != toppi[i].item():
                        self.incorrect_sentinel += 1
                    ind = topvi[i].item()
                    if ind == 3:
                        temp.append('<eos>')
                    else:
                        temp.append(self.i2w[ind])
                    from_which.append('v')
            decoded_words.append(temp)
            self.from_whichs.append(from_which)
        self.from_whichs = np.array(self.from_whichs)

        loss_v = self.cross_entropy(
            all_decoder_outputs_vocab.contiguous().view(-1, self.nwords),
            target_batches.contiguous().view(-1))
        loss_ptr = self.cross_entropy(
            all_decoder_outputs_ptr.contiguous().view(-1,
                                                      input_batches.size(0)),
            target_index.contiguous().view(-1))

        if self.loss_weighting:
            loss = loss_ptr/(2*self.loss_weights[0]*self.loss_weights[0]) + loss_v/(2*self.loss_weights[1]*self.loss_weights[1]) + \
               torch.log(self.loss_weights[0] * self.loss_weights[1])
        else:
            loss = loss_ptr + loss_v

        self.loss += loss.item()
        self.vloss += loss_v.item()
        self.ploss += loss_ptr.item()
        self.n += 1

        # Set back to training mode
        self.encoder.train(True)
        self.decoder.train(True)
        return decoded_words, self.from_whichs  # , acc_ptr, acc_vac

    def save_models(self, path):
        torch.save(self.encoder.state_dict(),
                   os.path.join(path, 'encoder.pth'))
        torch.save(self.decoder.state_dict(),
                   os.path.join(path, 'decoder.pth'))

    def load_models(self, path: str = '.'):
        self.encoder.load_state_dict(
            torch.load(os.path.join(path, 'encoder.pth')))
        self.decoder.load_state_dict(
            torch.load(os.path.join(path, 'decoder.pth')))
Beispiel #6
0
    '''
        seqs to id
    '''
    #train
    text_id_list = seq2id(text_alpha, text_sent_list)
    label_id_list = seq2id(label_alpha, label_sent_list)

    #test
    # text_test_id_list = seq2id(text_alpha, text_sent_list)
    # label_test_id_list = seq2id(label_alpha, label_sent_list)

    encoder = Encoder(text_alpha.m_size, config)
    decoder = AttnDecoderRNN(label_alpha.m_size, config)

    if config.use_cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    # print(encoder)
    # print(decoder)
    lr = config.lr
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=lr)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=lr)
    criterion = nn.NLLLoss()

    n_epochs = config.Steps
    plot_every = 200
    print_every = 1

    start = time.time()
    plot_losses = []