Beispiel #1
0
def pretrain_LSTMCore(train_x=None,
                      sentence_lengths=None,
                      batch_size=1,
                      end_token=None,
                      vocab_size=10):
    if train_x is None:
        x = gen_record(vocab_size=vocab_size)
    else:
        x = train_x
    if len(x.shape) == 1:
        x = x.view(1, x.shape[0])
    if sentence_lengths is None:
        sentence_lengths = [x.shape[1]] * len(x)
    if len(sentence_lengths) < len(x):
        sentence_lengths.extend([x.shape[1]] *
                                (len(x) - len(sentence_lengths)))
    if end_token is None:
        end_token = vocab_size - 1

    model = LSTMCore(vocab_size)
    model.to(DEVICE)
    params = list(filter(lambda p: p.requires_grad, model.parameters()))
    criterion = nn.NLLLoss()
    optimizer = torch.optim.SGD(params, lr=0.01)
    y_pred_all = []
    log = openLog()
    log.write('\n\ntraining lstmCore: {}\n'.format(datetime.now()))
    for epoch in range(GEN_NUM_EPOCH_PRETRAIN):
        pointer = 0
        y_pred_all = []
        epoch_loss = []
        while pointer + batch_size <= len(x):
            x_batch = x[pointer:pointer + batch_size]
            x0_length = sentence_lengths[pointer:pointer + batch_size]
            y = torch.cat(
                (x_batch[:, 1:], torch.Tensor(
                    [end_token] * x_batch.shape[0]).int().view(
                        x_batch.shape[0], 1)),
                dim=1)
            model.hidden = model.init_hidden(batch_size)
            y_pred = model(x_batch, x0_length)
            loss = criterion(y_pred.view(-1, y_pred.shape[-1]),
                             y.long().view(-1))
            optimizer.zero_grad()
            loss.backward(retain_graph=True)
            optimizer.step()
            y_prob = F.softmax(model.tag_space, dim=2)
            y_pred_all.append(y_prob)
            epoch_loss.append(loss.item())
            pointer = pointer + batch_size
        log.write('epoch: ' + str(epoch) + ' loss: ' +
                  str(sum(epoch_loss) / len(epoch_loss)) + '\n')
    log.close()
    return model, torch.cat(y_pred_all)
def sanityCheck_discriminator(batch_size=1, vocab_size=10):
    ''' test discriminator instantiation and pretraining'''
    log = openLog('test.txt')
    log.write('\n\nTest discriminator.sanityCheck_discriminator: {}\n'.format(
        datetime.now()))
    model = train_discriminator(vocab_size=vocab_size)
    with torch.no_grad():
        x = gen_record(num=batch_size, vocab_size=vocab_size)
        y_pred = model(x)
    log.write('  y_pred shape: ' + str(y_pred.shape) + '\n')
    log.close()
    return model, y_pred
Beispiel #3
0
def pretrain_LSTMCore(train_x=None, sentence_lengths=None, batch_size=1, end_token=None, vocab_size=10):
    if train_x is None:
        x = gen_record(vocab_size=vocab_size)
    else:
        x = train_x
    if len(x.shape) == 1:
        x = x.view(1,x.shape[0])
    if sentence_lengths is None:
        sentence_lengths = [x.shape[1]] * len(x)
    if len(sentence_lengths) < len(x):
        sentence_lengths.extend([x.shape[1]] * (len(x)-len(sentence_lengths)))
    if end_token is None:
        end_token = vocab_size - 1
    
    model = LSTMCore(vocab_size)
    model = nn.DataParallel(model)#, device_ids=[0])
    model.to(DEVICE)
    params = list(filter(lambda p: p.requires_grad, model.parameters()))       
    criterion = nn.NLLLoss()
    optimizer = torch.optim.SGD(params, lr=0.01)
    y_pred_all = []
    log = openLog()
    log.write('    training lstmCore: {}\n'.format(datetime.now()))
    for epoch in range(GEN_NUM_EPOCH_PRETRAIN):
        pointer = 0
        y_pred_all = []
        epoch_loss = []
        while pointer + batch_size <= len(x):
            x_batch = x[pointer:pointer+batch_size]
            x0_length = torch.tensor(sentence_lengths[pointer:pointer+batch_size]).to(device=DEVICE)
            y = torch.cat((x_batch[:,1:],
                           torch.tensor([end_token]*x_batch.shape[0],device=DEVICE)
                           .int().view(x_batch.shape[0],1)),dim=1)
            # hidden has to be passed to the model as a GPU tensor to be correctly sliced between multiple GPUs. 
            # default dim for DataParallel is dim=0, so the inputs will all be sliced on dim0. 
            # so the hidden tensors need to be permutated back to batch-size-second inside the forward pass
            #   in order to feed into the lstm layer. 
            # when using DataParallel the attributes can be accessed through .module
            hidden = model.module.init_hidden(batch_size)            
            y_pred, tag_space = model(x_batch, hidden, x0_length)
            loss = criterion(y_pred.view(-1,y_pred.shape[-1]), y.long().view(-1))
            optimizer.zero_grad()
            loss.backward(retain_graph=True)
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25)
            optimizer.step()
            y_prob = F.softmax(tag_space, dim=2)
            y_pred_all.append(y_prob)
            epoch_loss.append(loss.item())
            pointer = pointer + batch_size
        log.write('      epoch: '+str(epoch)+' loss: '+str(sum(epoch_loss)/len(epoch_loss))+'\n')
    log.close()
    return model, torch.cat(y_pred_all)
Beispiel #4
0
def train_discriminator(train_x=None,
                        train_y=None,
                        batch_size=1,
                        vocab_size=10):
    if train_x is None:
        x = gen_record(num=batch_size, vocab_size=vocab_size)
    else:
        x = train_x
    if train_y is None:
        y = gen_label()
    else:
        y = train_y

    model = Discriminator(filter_size=FILTER_SIZE,
                          num_filter=NUM_FILTER,
                          vocab_size=vocab_size)
    model = nn.DataParallel(model)
    model.to(DEVICE)
    params = list(filter(lambda p: p.requires_grad, model.parameters()))
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(params, lr=0.01)

    log = openLog()
    log.write('    training discriminator: {}\n'.format(datetime.now()))
    for epoch in range(DIS_NUM_EPOCH_PRETRAIN):
        pointer = 0
        epoch_loss = []
        while pointer + batch_size <= len(x):
            x_batch = x[pointer:pointer + batch_size]
            y_batch = y[pointer:pointer + batch_size]
            # y_pred dim: (batch_size, nr.of.class)
            y_pred = model(x_batch)
            loss = criterion(y_pred, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            pointer = pointer + batch_size
            epoch_loss.append(loss.item())
        log.write('      epoch: ' + str(epoch) + ' loss: ' +
                  str(sum(epoch_loss) / len(epoch_loss)) + '\n')
    log.close()
    return model