コード例 #1
0
ファイル: serve.py プロジェクト: carlward/short-answer-ml
def model_fn(model_dir):
    model = BiLSTMModel(torch.zeros((41299, 300)),
                        nClasses=4,
                        hiddenSizeEncoder=2048,
                        hiddenSizeCls=512,
                        layers=1,
                        dropProb=0.0)
    weights = torch.load(Path(model_dir) / '{}.pt'.format(MODEL_NAME),
                         map_location=DEVICE)
    model.load_state_dict(weights)
    model.to(DEVICE)
    model.eval()

    tokenizer = Tokenizer(Vocab())
    tokenizer.from_disk(Path(model_dir) / '{}'.format(TOKENIZER))
    return {'model': model, 'tokenizer': tokenizer}
コード例 #2
0
# Create reverse dicts
i2w = {v: k for k, v in w2i.items()}
i2w[UNK] = "<unk>"
i2t = {v: k for k, v in t2i.items()}


ntags = len(t2i)

log.pr_cyan("The vocabulary size is %d" %(nwords))

if MODEL_TYPE == 'emb-att':
    model = EmbAttModel(nwords, EMB_SIZE, ntags)
elif MODEL_TYPE == 'emb-lstm-att':
    model = BiLSTMAttModel(nwords, EMB_SIZE, HID_SIZE, ntags)
elif MODEL_TYPE == 'no-att-only-lstm':
    model = BiLSTMModel(nwords, EMB_SIZE, HID_SIZE, ntags)
else:
    raise ValueError("model type not compatible")

calc_ce_loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
type = torch.LongTensor
float_type = torch.FloatTensor
use_cuda = torch.cuda.is_available()

if use_cuda:
    type = torch.cuda.LongTensor
    float_type = torch.cuda.FloatTensor
    model.cuda()

print ("evaluating without any training ...")
コード例 #3
0
    # Model
    #######################################
    # LSTM only baseline model
    if args.model == 'lstm':
        lstm_kwargs = {
            'vocab_size': vocab_size,
            'pad_idx': pad_idx,
            'rnn_emb_dim': args.rnn_emb_dim,
            'rnn_dim': args.rnn_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,
            'fc_dims': args.fc_dims,
            'fc_use_batchnorm': args.fc_use_batchnorm,
            'fc_dropout': args.fc_dropout,
        }
        model = BiLSTMModel(**lstm_kwargs)

    # LSTM + CNN baseline model
    elif args.model == 'cnn':
        cnnlstm_kwargs = {
            'vocab_size': vocab_size,
            'pad_idx': pad_idx,
            'rnn_emb_dim': args.rnn_emb_dim,
            'rnn_dim': args.rnn_dim,
            'rnn_num_layers': args.rnn_num_layers,
            'rnn_dropout': args.rnn_dropout,
            'cnn_feat_dim': args.cnn_feat_dim,
            'cnn_res_block_dim': args.cnn_res_block_dim,
            'cnn_num_res_blocks': args.cnn_num_res_blocks,
            'cnn_proj_dim': args.cnn_proj_dim,
            'cnn_pooling': args.cnn_pooling,
コード例 #4
0
                               opt.length,
                               word2id=train_dataset.word2id,
                               id2word=train_dataset.id2word)

    if opt.model == 'GRU':
        model = GRUModel(
            vocab_size=train_dataset.vocab_size,
            embedding_size=opt.embedding_size,
            output_size=opt.output_dim,
            hidden_dim=opt.hidden_dim,
            n_layers=opt.n_layer,
        )
    elif opt.model == 'BiLSTM':
        model = BiLSTMModel(vocab_size=train_dataset.vocab_size,
                            embedding_size=opt.embedding_size,
                            output_size=opt.output_dim,
                            hidden_dim=opt.hidden_dim,
                            n_layers=opt.n_layer,
                            bidirectional=True)
    else:
        model = RNNModel(
            vocab_size=train_dataset.vocab_size,
            embedding_size=opt.embedding_size,
            output_size=opt.output_dim,
            hidden_dim=opt.hidden_dim,
            n_layers=opt.n_layer,
        )

    optimizer = optim.Adam(model.parameters(), lr=opt.lr, betas=opt.betas)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    trainer = Trainer(model,
コード例 #5
0
ファイル: train.py プロジェクト: carlward/short-answer-ml
def train(params):
    print('=' * 89)
    print('Initializing...')

    # Initialize NLP tools
    vectorPath = Path(params.vector_cache_dir) / 'snli_1.0' / params.word_embeds
    vocab = VocabBuilder(Path(vectorPath))
    if params.rebuild_vocab or not (Path.is_file(vocab.tokenizerPath) and Path.is_dir(vocab.vectorPath)):
        print('No vocabulary found. Rebuilding from dataset')
        if not Path.is_dir(Path.cwd() / vectorPath):
            Path.mkdir(Path.cwd() / vectorPath, parents=True)
        nlp = spacy.load(params.word_embeds)
        fullTokenizer = nlp.tokenizer
        fullVectors = nlp.vocab.vectors

        # Combine all sequences in SNLI dataset
        sequences = loadSequences(
            Path.cwd() / params.data_dir / 'snli_1.0',
            filenames=('snli_1.0_train.txt', 'snli_1.0_test.txt', 'snli_1.0_dev.txt'),
            seq1Col='sentence1',
            seq2Col='sentence2',
            filterPred=lambda r: r['gold_label'] != '-',
            sep='\t')

        # Learn vocabulary from sequences
        vocab.learnVocab(sequences, fullTokenizer, fullVectors)
        vocab.toDisk()
    vocab.fromDisk()
    torch.save(vocab.tokenizer, Path.cwd() / params.model_dir / '{0}Tokenizer.pt'.format(params.word_embeds))

    # Preprocess datasets
    print('Preprocessing dataset...')
    datasetConsMap = {
        'snli_1.0': SNLIDataset,
        'asap-sas': ASAPSASDataset,
        'MRPC': MRPCDataset
    }
    datasetDir = Path.cwd() / params.data_dir / params.dataset
    datasetCons = datasetConsMap[params.dataset]
    trainDataLoader = DataLoader(
        datasetCons(datasetDir, tokenizer=vocab.tokenizer, split='train'),
        batch_size=params.batch_size,
        shuffle=True,
        num_workers=4)
    evalLoader = DataLoader(
        datasetCons(datasetDir, tokenizer=vocab.tokenizer, split='dev'),
        batch_size=params.batch_size,
        shuffle=True,
        num_workers=4)
    trainSize = len(trainDataLoader.dataset)
    evalSize = len(evalLoader.dataset)
    nClasses = trainDataLoader.dataset.nClasses

    # Model parameters
    modelName = 'encoder' if params.mode == 'train_encoder' else 'cls'
    if params.mode == 'train_encoder':
        model = BiLSTMModel(
            torch.Tensor(vocab.vectors.data),
            nClasses=nClasses,
            hiddenSizeEncoder=params.hidden_size_encoder,
            hiddenSizeCls=params.hidden_size_cls,
            layers=params.lstm_layers,
            dropProb=params.dropout)

    elif params.mode == 'train_cls':
        model = BiLSTMModel(
            torch.zeros(vocab.vectors.data.shape),
            nClasses=3,
            hiddenSizeEncoder=params.hidden_size_encoder,
            hiddenSizeCls=params.hidden_size_cls,
            layers=params.lstm_layers,
            dropProb=0.0)
        bestWeights = torch.load(Path.cwd() / params.model_dir / 'encoderParams.pt', map_location=DEVICE)
        model.load_state_dict(bestWeights)
        model.classifier = SeqClassifier(
            params.hidden_size_encoder*8,
            hiddenSize=params.hidden_size_cls,
            nClasses=nClasses,
            dropProb=params.dropout)
        model.freezeEncoder()
    model = model.to(DEVICE)

    # Training parameters
    lossFunc = nn.CrossEntropyLoss()
    optimizerCons = optim.SGD if params.optimizer == 'sgd' else optim.Adam
    optimizer = optimizerCons(model.parameters(), lr=params.learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=params.lr_patience, factor=0.2)

    # Train/test loop
    bestAcc = 0
    bestLoss = 1E9
    startTime = time.time()
    bestWeights = copy.deepcopy(model.state_dict())
    stopTraining = False
    stopCount = 0

    print('-' * 89)
    print('Beginning training...')
    for epoch in range(1, params.epochs+1):
        epochStartTime = time.time()
        trainLoss, trainCorrect = trainEpoch(model, trainDataLoader, optimizer, lossFunc)
        evalLoss, evalCorrect, scores, preds = evalEpoch(model, evalLoader, optimizer, lossFunc)

        # Epoch Loss
        epochLossTrain = trainLoss / trainSize
        epochAccTrain = trainCorrect.double() / trainSize
        epochLossEval = evalLoss / evalSize
        epochAccEval = evalCorrect.double() / evalSize
        print('[Epoch:\t{}/{}] | time {:5.2f}s | train loss: {:.4f} acc: {:.4f}\t'
              '| eval loss: {:.4f} acc: {:.4f} nCorrect: {:d}'.format(epoch, params.epochs, time.time()-epochStartTime,
                                                                      epochLossTrain, epochAccTrain, epochLossEval,
                                                                      epochAccEval, evalCorrect))

        # Update learning rate
        scheduler.step(epochLossEval)
        if epochLossEval < bestLoss:
            bestLoss = epochLossEval
            stopCount = 0
        elif params.optimizer == 'sgd':
            optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / 5
            if optimizer.param_groups[0]['lr'] < 1e-5:
                stopTraining = True
        else:
            stopCount += 1
            if stopCount >= 4:
                stopTraining = True
        optimizer.param_groups[0]['lr'] *= (1 if params.optimizer != 'sgd' else 0.99)

        # Save state
        if epochAccEval > bestAcc:
            bestAcc = epochAccEval
            bestWeights = copy.deepcopy(model.state_dict())
            torch.save(bestWeights, Path.cwd() / params.model_dir / '{0}Params.pt'.format(modelName))
            torch.save(model, Path.cwd() / params.model_dir / '{0}Model.pt'.format(modelName))

        # Check for early stopping
        if stopTraining:
            break

    trainTime = time.time() - startTime
    print('Training complete in {:.0f}m {:.0f}s'.format(
        trainTime // 60, trainTime % 60))
    print('Best eval Acc: {:4f}'.format(bestAcc))

    # Load best model weights
    model.load_state_dict(bestWeights)
    return model, vocab.tokenizer