def deep_rnn_model(input_dim, units, recur_layers, output_dim=29):
    """ Build a deep recurrent network for speech 
    """
    return M.RNNModel(bd_merge=None,
                      rnn_type=M.RNNType.LSTM,
                      rnn_layers=recur_layers,
                      rnn_units=units).model(input_shape=(None, input_dim), output_dim=output_dim)
def rnn_model(input_dim, units, activation, output_dim=29):
    """ Build a recurrent network for speech 
    """
    return M.RNNModel(bd_merge=None,
                      rnn_type=M.RNNType.LSTM,
                      rnn_units=units,
                      activation=activation).model(input_shape=(None, input_dim), output_dim=output_dim)
def cnn_rnn_model(input_dim, filters, kernel_size, conv_stride,
                  conv_border_mode, units, output_dim=29):
    """ Build a recurrent + convolutional network for speech 
    """
    return M.RNNModel(cnn_config=M.CNNConfig(filters=filters, kernel_size=kernel_size, conv_stride=conv_stride,
                                             conv_border_mode=conv_border_mode),
                      bd_merge=None,
                      rnn_type=M.RNNType.LSTM,
                      rnn_units=units).model(input_shape=(None, input_dim), output_dim=output_dim)
def final_model():
    """ Build a deep network for speech 
    """
    return M.RNNModel(cnn_config=M.CNNConfig(kernel_size=3, conv_stride=1, conv_border_mode="same",
                                             cnn_layers=12, cnn_dropout_rate=0.25,
                                             cnn_activation_before_bn_do=True,
                                             cnn_do_bn_order=True),
                      bd_merge=M.BidirectionalMerge.concat,
                      rnn_type=M.RNNType.GRU,
                      rnn_dense=True, rnn_units=250, rnn_layers=4, rnn_dropout_rate=0.2,
                      dropout_rate=0.3, name_suffix="Final").model(input_shape=(None, 26), output_dim=29)
Beispiel #5
0
def main():
    preprocessing = PreProcessing()
    rnn_model = models.RNNModel()

    if config.char_or_word == config.character_model:
        data = None
        if config.data_type == "cmu_dict":
            cmu_data = datasets.getCMUDictData(config.data_src_cmu)
            data = cmu_data
        preprocessing.loadDataCharacter(data=data)
    else:
        preprocessing.loadData()
    preprocessing.prepareLMdata()

    # get model
    params = {}
    params['embeddings_dim'] = config.embeddings_dim
    params['lstm_cell_size'] = config.lstm_cell_size
    if config.char_or_word == config.character_model:
        params['vocab_size'] = preprocessing.vocab_size
    else:
        params['vocab_size'] = len(preprocessing.word_index)
    params['inp_length'] = config.inp_length - 1
    model = rnn_model.getModel(params)

    x_train, y_train, x_val, y_val, x_test, y_test = preprocessing.x_train, preprocessing.y_train, preprocessing.x_val, preprocessing.y_val, preprocessing.x_test, preprocessing.y_test
    # train
    checkpointer = ModelCheckpoint(
        filepath="./checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5",
        verbose=1,
        save_best_only=True)
    model.fit(x_train,
              y_train,
              validation_data=(x_val, y_val),
              nb_epoch=config.num_epochs,
              batch_size=config.batch_size,
              callbacks=[checkpointer])  #config.num_epochs
    saveEmbeddings(model, preprocessing.word_index)

    #evaluate
    scores = model.evaluate(x_test, y_test, verbose=0)
    print("Accuracy: %.2f%%" % (scores[1] * 100))

    print "--- Sampling few sequences.. "
    for i in range(5):
        pred = utilities.generateSentence(model, preprocessing.word_index,
                                          preprocessing.sent_start,
                                          preprocessing.sent_end,
                                          preprocessing.unknown_word)
        sent = [preprocessing.index_word[i] for i in pred]
        if config.char_or_word == config.character_model:
            print ''.join(sent)
        else:
            print ' '.join(sent)
Beispiel #6
0
def main():

    ###############################################################################
    # Load command line options.
    ###############################################################################

    opts = options()
    # Set the random seed manually for reproducibility.
    torch.manual_seed(opts.seed)

    if opts.temperature < 1e-3:
        parser.error("--temperature has to be greater or equal 1e-3")

    ###############################################################################
    # Load the dictionary
    ###############################################################################

    with open(opts.dict, "rb") as f:
        dictionary = pickle.load(f)

    ###############################################################################
    # Build a model
    ###############################################################################

    with open(opts.load + ".params", 'rb') as f:
        params = pickle.load(f)

    # Model check
    if params["direction"] == "both":
        print(
            "WARNING: Bidirectional language model is not supproted by this generator."
        )
        assert (False)
    model = models.RNNModel(params)
    model.load_state_dict(torch.load(opts.load + ".pt"))
    if torch.cuda.is_available():
        if not opts.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )
    device = torch.device("cuda" if opts.cuda else "cpu")
    model.to(device)
    model.eval()
    generate(opts, params, dictionary, model, device)
Beispiel #7
0
def predict(testdata, path_to_weights, vocab_size, n_classes):
    '''
    Creates, loads and initiates the model and making predictions on the test data

    Input: testdata - data loader of the test data (type: DataLoader)
           path_to_weights - relative path and file name of the saved model weights with .pth extension (type:string)
           vocab_size - size of the vocabulary (type: int)
           n_classes - number of labels/classes that can be predicted (type: int)

    Output: preds_prob_list - list of all the probabilities of which the model predicted
                              the corresponding label (type: list of floats)
            preds_status_list - list of all the reencoded labels that were predicted (type: list of strings)
    '''
    rnn_params = train.rnn_params
    model = models.RNNModel(rnn_type=rnn_params.rnn_type,
                            nr_layers=rnn_params.nr_layers,
                            voc_size=vocab_size,
                            emb_dim=rnn_params.emb_dim,
                            rnn_size=rnn_params.rnn_size,
                            dropout=rnn_params.dropout,
                            n_classes=n_classes)
    models.ModelUtils.load_model(path_to_weights, model)
    model.to(rnn_params.device)
    batch_size = 1
    h = model.init_hidden(batch_size, device=rnn_params.device)
    model.zero_grad()

    preds_prob_list, preds_status_list = [], []
    for x_test in testdata:
        x_test = x_test.to(train.rnn_params.device)
        h = tuple([each.data for each in h])
        out, h = model(x_test, h)
        pred = torch.round(out.squeeze()).item()
        pred_status = "depressive" if pred < 0.5 else "non-depressive"
        prob = (1 - pred) if pred_status == "depressive" else pred
        preds_status_list.append(pred_status)
        preds_prob_list.append(prob)

    return preds_prob_list, preds_status_list
Beispiel #8
0
def data_model(args):
    """
    Load data and model
    """
    train_data, val_data, test_data, corpus = data.get_data(args)

    print("Dataset: {}".format(args.dataset))
    print("Dataset path: {}".format(args.data))
    print("Dataset stats:")
    print("Train samples/tokens: {}".format(len(corpus.train)))
    print("Dev samples/tokens: {}".format(len(corpus.valid)))
    print("Test samples/tokens: {}".format(len(corpus.valid)))
    print("Vocabulary size: {}".format(len(corpus.dictionary.idx2word)))

    # Build or load the model
    ntokens = len(corpus.dictionary)
    model = models.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                            args.nlayers, args.dropout, args.tied).to(device)
    if args.load_model:
        model = torch.load(args.saved_model_path)
        print("Loaded saved model from: {}".format(args.saved_model_path))

    return train_data, val_data, test_data, corpus, model
Beispiel #9
0
    def __init__(self,args):
        option, checkpoint_fname, action = args
        rnn_model = models.RNNModel()
        preprocessing = PreProcessing()

        if config.char_or_word == config.character_model:
            data=None
            if config.data_type=="cmu_dict":
                cmu_data = datasets.getCMUDictData(config.data_src_cmu)
                data=cmu_data
                preprocessing.loadDataCharacter(data=data)
            else:
                preprocessing.loadData()        
                preprocessing.prepareLMdata()
        self.preprocessing=preprocessing
        # get model
        params = {}
        params['embeddings_dim'] =  config.embeddings_dim
        params['lstm_cell_size'] = config.lstm_cell_size
        if config.char_or_word == config.character_model:
            params['vocab_size'] =  preprocessing.vocab_size
        else:
            params['vocab_size'] =  len( preprocessing.word_index )
        params['inp_length'] = config.inp_length-1
        model = rnn_model.getModel(params)
        
        if option=="train":
            x_train, y_train, x_val, y_val, x_test, y_test = preprocessing.x_train, preprocessing.y_train, preprocessing.x_val, preprocessing.y_val, preprocessing.x_test, preprocessing.y_test
            # train
            checkpointer = ModelCheckpoint(filepath="./checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5", verbose=1, save_best_only=True)
            model.fit(x_train, y_train, validation_data=(x_val, y_val),
                nb_epoch=config.num_epochs, batch_size=config.batch_size, callbacks=[checkpointer]) #config.num_epochs
               #evaluate
            scores = model.evaluate(x_test, y_test, verbose=0)
            print("Accuracy: %.2f%%" % (scores[1]*100))
            #Sample sequences
            print "--- Sampling few sequences.. "
            for i in range(5):
                pred = utilities.generateSentence(model, preprocessing.word_index, preprocessing.sent_start, 
                    preprocessing.sent_end, preprocessing.unknown_word)
                sent = [preprocessing.index_word[i] for i in pred]
                if config.char_or_word==config.character_model:
                    print ''.join(sent)
                else:
                    print ' '.join(sent)
        else:
            model.load_weights(checkpoint_fname)
            try:
                cache=pickle.load( open('lm_cache','r') )
                print "Loaded cache"
            except:
                cache={}
                print "cache not found. Starting with empty cache"
            if 'cache_clean' in args:
                self.cache={}
            else:
                self.cache=cache    

        self.model = model
        #Action
        if action=="save_embeddings":
            saveEmbeddings(model, preprocessing.word_index)
        else:
            pass
Beispiel #10
0
def train_language_model(CFG, train_nums, test_nums, valid_nums, num_tokens):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f'Using device: {device}')

    if CFG.seed is not None:
        torch.manual_seed(CFG.seed)
        print(f'using seed {CFG.seed}')

    train_data = batchify(train_nums, CFG.batch_size, device=device)
    val_data = batchify(valid_nums, bsz=1, device=device)
    test_data = batchify(test_nums, bsz=1, device=device)

    if CFG.model == 'Transformer':
        model = models.TransformerModel(num_tokens, CFG.emb_size, CFG.nhead,
                                        CFG.n_hid, CFG.n_layers,
                                        CFG.dropout_p).to(device)
    else:
        model = models.RNNModel(CFG.model, num_tokens, CFG.emb_size, CFG.n_hid,
                                CFG.n_layers, CFG.dropout_p,
                                CFG.tied).to(device)

    criterion = nn.CrossEntropyLoss()

    lr = CFG.lr
    best_val_loss = None

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(1, CFG.epochs + 1):
            epoch_start_time = time.time()
            train_epoch(epoch, CFG, model, num_tokens, train_data, criterion,
                        lr)
            val_loss = evaluate(CFG, model, num_tokens, criterion, val_data)

            _saved = False

            # Save the model if the validation loss is the best we've seen so far.
            if not best_val_loss or val_loss < best_val_loss:
                with open(CFG.save_path, 'wb') as f:
                    torch.save(model, f)
                best_val_loss = val_loss
                _saved = True
            else:
                # Anneal the learning rate if no improvement has been seen in the validation dataset.
                lr = 0.9 * lr

            if epoch % 20 == 0:
                print('-' * 120)
                _s = '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:8.5f} | valid ppl {:10.5f}'.format(
                    epoch, (time.time() - epoch_start_time), val_loss,
                    np.exp(val_loss))
                if _saved:
                    _s += ' | * saved best model'
                print(_s)
                print('-' * 120)

    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    # Load the best saved model.
    with open(CFG.save_path, 'rb') as f:
        model = torch.load(f)
        # after load the rnn params are not a continuous chunk of memory
        # this makes them a continuous chunk, and will speed up forward pass
        # Currently, only rnn model supports flatten_parameters function.
        if CFG.model in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU']:
            model.rnn.flatten_parameters()

    # Run on test data.
    test_loss = evaluate(CFG, model, num_tokens, criterion, test_data)
    print('=' * 89)
    print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
        test_loss, np.exp(test_loss)))
    print('=' * 89)
def bidirectional_rnn_model(input_dim, units, output_dim=29):
    """ Build a bidirectional recurrent network for speech
    """
    return M.RNNModel(bd_merge=M.BidirectionalMerge.concat,
                      rnn_type=M.RNNType.LSTM,
                      rnn_units=units).model(input_shape=(None, input_dim), output_dim=output_dim)
# Build a matrix of size num_batch * args.bsz containing the index of observation.
np.random.seed(args.seed)
index = data.subsample_index(train_data[1], args.bptt, args.nsample)
train_batch = data.batch_index(index, args.bsz)
valid_batch = data.batch_index(np.arange(args.bptt - 1, len(valid_data[1])),
                               args.bsz)
test_batch = data.batch_index(np.arange(args.bptt - 1, len(test_data[1])),
                              args.bsz)

classes = ['Downward', 'Stationary', 'Upward']

###############################################################################
# Build the model
###############################################################################

model = models.RNNModel(args.model, args.ninp, args.ntag, args.nhid,
                        args.nlayers, args.dropout).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Variables, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)
def simple_rnn_model(input_dim, output_dim=29):
    """ Build a recurrent network for speech 
    """
    return M.RNNModel(bd_merge=None,
                      rnn_type=M.RNNType.SimpleRNN,
                      time_distributed_dense=False).model(input_shape=(None, input_dim), output_dim=output_dim)
def run(
    batch_size: int,
    bptt: int,
    clip: float,
    cuda: bool,
    data: Path,
    dry_run: bool,
    em_size: int,
    epochs: int,
    log_interval: int,
    model: str,
    n_heads: int,
    report: callable,
    save: Path,
    lr: float,
    seed: int,
    tied: bool,
    warmup: int,
    load: Optional[Path] = None,
    onnx_export: Optional[Path] = None,
    **kwargs,
):
    # Set the random seed manually for reproducibility.
    torch.manual_seed(seed)
    cuda = cuda and torch.cuda.is_available()

    device = torch.device("cuda" if cuda else "cpu")
    print("Running with device:", device)

    ###############################################################################
    # Load data
    ###############################################################################

    eval_batch_size = 10
    if data.name == "debug.npz":
        if not data.exists():
            DebugDataset.generate(data,
                                  seed=seed,
                                  n_seq=10000,
                                  seq_len=bptt,
                                  n_tokens=10,
                                  p=0.8)
        dataset = DebugDataset(data, device)
        assert bptt == dataset.bptt
        ntokens = dataset.n_tokens + 1
        n_seq = len(dataset)
        size_valid = int(n_seq * 0.2)
        size_test = int(n_seq * 0.1)
        train_data, val_data, test_data = torch.utils.data.random_split(
            dataset, [n_seq - size_test - size_valid, size_valid, size_test])
    else:
        corpus = Corpus(data)
        train_data = LMDataset(corpus.train,
                               bptt,
                               batch_size=batch_size,
                               device=device)  # [104431, 20]
        val_data = LMDataset(corpus.valid,
                             bptt,
                             batch_size=batch_size,
                             device=device)  # [21764, 10]
        test_data = LMDataset(corpus.test,
                              bptt,
                              batch_size=batch_size,
                              device=device)  # [24556, 10]
        ntokens = len(corpus.dictionary)

    train_data = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_data = DataLoader(val_data, batch_size=batch_size, shuffle=True)
    test_data = DataLoader(test_data, batch_size=batch_size, shuffle=True)

    ###############################################################################
    # Build the model
    ###############################################################################
    em_size = (em_size // n_heads) * n_heads
    kwargs.update(n_tokens=ntokens, em_size=em_size)
    recurrent = False
    if model == "transformer":
        model = models.TransformerModel(n_head=n_heads, **kwargs).to(device)
    elif model == "ours":
        model = ours.TransformerModel(n_head=n_heads, **kwargs).to(device)
    else:
        model = models.RNNModel(model, tied, **kwargs).to(device)
        recurrent = True
    if load is not None:
        with load.open("rb") as f:
            model.load_state_dict(torch.load(f))
            # after load the rnn params are not a continuous chunk of memory
            # this makes them a continuous chunk, and will speed up forward pass
            # Currently, only rnn model supports flatten_parameters function.
            if recurrent:
                model.rnn.flatten_parameters()

    ###############################################################################
    # Training code
    ###############################################################################

    def evaluate(data_source):
        # Turn on evaluation mode which disables dropout.
        model.eval()
        hidden = model.init_hidden(eval_batch_size) if recurrent else None
        with torch.no_grad():
            for (inputs, targets) in data_source:
                targets = targets.flatten()
                if hidden is None:
                    output = model(inputs)
                    output = output.reshape(-1, ntokens)
                else:
                    output, hidden = model(inputs, hidden)
                    hidden = repackage_hidden(hidden)
                yield len(inputs) * criterion(output, targets).item()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = (None if warmup is None else optim.lr_scheduler.LambdaLR(
        optimizer,
        lambda e: em_size**(-0.5) * min(e**(-0.5), e * warmup**(-1.5)),
    ))

    def train():
        # Turn on training mode which enables dropout.
        model.train()
        hidden = model.init_hidden(batch_size) if recurrent else None
        for i, (inputs, targets) in enumerate(train_data):
            targets = targets.flatten()
            optimizer.zero_grad()

            # Starting each batch, we detach the hidden state from how it was previously produced.
            # If we didn't, the model would try backpropagating all the way to start of the dataset.
            model.zero_grad()
            if hidden is None:
                outputs = model(inputs)
                outputs = outputs.reshape(-1, ntokens)
            else:
                hidden = repackage_hidden(hidden)
                outputs, hidden = model(inputs, hidden)
            is_accurate = outputs.max(-1).indices == targets
            assert isinstance(is_accurate, torch.Tensor)
            accuracy = torch.mean(is_accurate.float())
            loss = criterion(outputs, targets)
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            optimizer.step()
            if scheduler is not None:
                scheduler.step()

            logs = dict(epoch=epoch, batches=i)
            if scheduler is not None:
                logs.update(lr=scheduler.get_lr())
            means = dict(accuracy=accuracy.item(), loss=loss.item())
            writes = dict(inputs=inputs[0],
                          outputs=outputs[0],
                          targets=targets[0])
            yield logs, means, writes
            if dry_run:
                break

    def export_onnx(path, bsz, seq_len):
        print("The model is also exported in ONNX format at {}".format(
            onnx_export.absolute()))
        model.eval()
        dummy_input = torch.LongTensor(seq_len * bsz).zero_().view(
            -1, bsz).to(device)
        hidden = model.init_hidden(bsz)
        torch.onnx.export(model, (dummy_input, hidden), str(path))

    # Loop over epochs.
    best_val_loss = None
    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(1, epochs + 1):
            aggregator = MeanAggregator()
            for batch, (to_log, to_mean, to_write) in enumerate(train()):
                aggregator.update(**to_mean)
                if batch % log_interval == 0 and batch > 0:
                    report(**to_log, **dict(aggregator.items()))
                    aggregator = MeanAggregator()

            val_loss = np.mean(list(evaluate(val_data)))
            report(val_loss=val_loss)
            if not best_val_loss or val_loss < best_val_loss:
                with save.open("wb") as f:
                    torch.save(model.state_dict(), f)
                best_val_loss = val_loss
    except KeyboardInterrupt:
        print("-" * 89)
        print("Exiting from training early")
    # Load the best saved model.
    with save.open("rb") as f:
        model.load_state_dict(torch.load(f))
        # after load the rnn params are not a continuous chunk of memory
        # this makes them a continuous chunk, and will speed up forward pass
        # Currently, only rnn model supports flatten_parameters function.
        if recurrent:
            model.rnn.flatten_parameters()

    # Run on test data.
    test_loss = np.mean(list(evaluate(test_data)))
    report(test_loss=test_loss, test_ppl=math.exp(test_loss))
    if onnx_export:
        # Export the model in ONNX format.
        export_onnx(onnx_export, bsz=1, seq_len=bptt)
Beispiel #15
0
if args.output_file == 'gospel_output.txt':
    '''
    corpus = data.Corpus(train_path='../../data/version2/gospel_dataset_single_file_v2_train.txt',
                         test_path='../../data/version2/gospel_dataset_single_file_v2_test.txt',
                         valid_path='../../data/version2/gospel_dataset_single_file_v2_valid.txt')
    ntokens = len(corpus.dictionary)
    filehandler = open('gospel_corpus.txt', 'wb')
    pickle.dump(corpus, filehandler)
    '''
    filehandler = open('gospel_corpus.txt', 'rb')
    corpus = pickle.load(filehandler)
    print('corpus loaded')
    ntokens = len(corpus.dictionary)


    model = models.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied)
    model.load_state_dict(torch.load('gospel_lm_v2.pt'))

elif args.output_file == 'hiphop_output.txt':
    '''
    corpus = data.Corpus(train_path='../../data/version2/hip_hop_dataset_single_file_v2_train.txt',
                         test_path='../../data/version2/hip_hop_dataset_single_file_v2_test.txt',
                         valid_path='../../data/version2/hip_hop_dataset_single_file_v2_valid.txt')
    ntokens = len(corpus.dictionary)
    '''
    filehandler = open('hip_hop_corpus.txt', 'rb')
    corpus = pickle.load(filehandler)
    print('corpus loaded')
    ntokens = len(corpus.dictionary)
    model = models.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied)
    model.load_state_dict(torch.load('hiphop_lm_v2.pt'))
Beispiel #16
0
def main():

    ###############################################################################
    # Load command line options.
    ###############################################################################

    opts = options()
    # Set the random seed manually for reproducibility.
    torch.manual_seed(opts.seed)

    ###############################################################################
    # Load data
    ###############################################################################

    corpus = data.Corpus(opts)
    if opts.pretrain == "":
        corpus.make_dict(opts.data)
    else:
        corpus.load_dict()

    corpus.load_data(opts.data)
    with open(opts.dict, mode='wb') as f:
        pickle.dump(corpus.dictionary, f)

    ###############################################################################
    # Build a model
    ###############################################################################

    if opts.pretrain == "":
        # convert to parameters
        params = models.opts2params(opts, corpus.dictionary)
        # construct model
        model = models.RNNModel(params)
    # For fine-tuning
    else:
        # load parameters
        with open(opts.pretrain + ".params", 'rb') as f:
            params = pickle.load(f)
        # construct model
        model = models.RNNModel(params)
        # load pretraind model
        model.load_state_dict(torch.load(opts.pretrain + ".pt"))
        model.freeze_emb()

    # save parameters
    with open(opts.save + ".params", mode='wb') as f:
        pickle.dump(params, f)

    if torch.cuda.is_available():
        if not opts.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )
    else:
        if opts.cuda:
            print("Error: No CUDA device. Remove the option --cuda")
    device = torch.device("cuda" if opts.cuda else "cpu")
    model = model.to(device)

    # loss function (ignore padding id)
    criterion = nn.CrossEntropyLoss(ignore_index=corpus.dictionary.pad_id())

    ###############################################################################
    # Train the  model
    ###############################################################################

    # Loop over epochs.
    lr = opts.lr
    best_val_loss = None

    # Select an optimizer
    try:
        optimizer = getattr(torch.optim, opts.optim_type)(model.parameters(),
                                                          lr=lr)
    except:
        raise ValueError(
            """An invalid option for `--optim_type` was supplied.""")

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(1, opts.epochs + 1):
            train(opts, device, corpus, model, criterion, optimizer, lr, epoch)
            val_loss = evaluate(opts, device, corpus, model, criterion, epoch)
            # Save the model if the validation loss is the best we've seen so far.
            if not best_val_loss or val_loss < best_val_loss:
                torch.save(model.state_dict(), opts.save + ".pt")
                best_val_loss = val_loss
            else:
                # Anneal the learning rate if no improvement has been seen in the validation dataset.
                lr /= 4.0
            optimizer.lr = lr
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')
Beispiel #17
0
def main():

    ###############################################################################
    # Load command line options.
    ###############################################################################

    opts = options()
    # Set the random seed manually for reproducibility.
    torch.manual_seed(opts.seed)

    ###############################################################################
    # Build a model
    ###############################################################################

    with open(opts.load + ".params", 'rb') as f:
        params = pickle.load(f)
    model = models.RNNModel(params)
    model.load_state_dict(torch.load(opts.load + ".pt"))
    if torch.cuda.is_available():
        if not opts.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )
    device = torch.device("cuda" if opts.cuda else "cpu")
    model.to(device)
    model.eval()

    ###############################################################################
    # Load dictionary
    ###############################################################################

    corpus = data.Corpus(opts)
    corpus.load_dict()

    criterion = nn.CrossEntropyLoss(reduction="none",
                                    ignore_index=corpus.dictionary.pad_id())

    ###############################################################################
    # Run as a server
    ###############################################################################

    if opts.server:

        app = FastAPI()

        @app.post('/lm',
                  response_model=List[Score],
                  description="get several scores with POST method")
        def predict(req: InSentences):
            print(req)
            stream = []
            for sent in req.sentences:
                seq = ["<s>"] + sent.split(" ") + ["</s>"]
                stream.append(corpus.sent2ids(seq))
            return [
                Score(lm_score=(ppl)) for ppl in evaluate(
                    opts, corpus, stream, model, criterion, device)
            ]

        logzero.loglevel(10)  # log_level = DEBUG
        uvicorn.run(app,
                    host=opts.host,
                    port=opts.port,
                    workers=1,
                    logger=logger,
                    debug=True)

    ###############################################################################
    # Calculates perplexities for sentences in the input file
    ###############################################################################

    else:
        input_texts = corpus.tokenize(opts.input_text)
        with open(opts.outf, 'w') as f_out:
            for ppl in evaluate(opts, corpus, input_texts, model, criterion,
                                device):
                f_out.write(str(ppl) + "\n")
Beispiel #18
0
def main():

    ###############################################################################
    # Load command line options.
    ###############################################################################
    global verbose, opts

    opts = options()
    # Set the random seed manually for reproducibility.
    torch.manual_seed(opts.seed)

    hvd.init()

    if opts.cuda:
        # Horovod: pin GPU to local rank.
        torch.cuda.set_device(hvd.local_rank())
        #torch.cuda.manual_seed(opts.seed)

    cudnn.benchmark = True

    # Horovod: print logs on the first worker.
    verbose = 1 if hvd.rank() == 0 else 0

    if opts.lms == True:
        torch.cuda.set_enabled_lms(True)
        if verbose == True:
            print('LMS is enabled')

    # If set > 0, will resume training from a given checkpoint.
    resume_from_epoch = 0
    for try_epoch in range(opts.epochs, 0, -1):
        filepath = opts.save + "checkpoint-" + str(try_epoch) + ".pth.tar"
        if os.path.exists(filepath):
            resume_from_epoch = try_epoch
            break

    # Horovod: broadcast resume_from_epoch from rank 0 (which will have
    # checkpoints) to other ranks.
    resume_from_epoch = hvd.broadcast(torch.tensor(resume_from_epoch),
                                      root_rank=0,
                                      name='resume_from_epoch').item()

    ###############################################################################
    # Load data
    ###############################################################################

    corpus = data.Corpus(opts)
    if opts.pretrain == "":
        corpus.make_dict(opts.data)
    else:
        corpus.load_dict()

    corpus.load_data(opts.data)
    with open(opts.dict, mode='wb') as f:
        pickle.dump(corpus.dictionary, f)

    ###############################################################################
    # Build a model
    ###############################################################################

    if opts.pretrain == "":
        # convert to parameters
        params = models.opts2params(opts, corpus.dictionary)
        # construct model
        model = models.RNNModel(params)
    # For fine-tuning
    else:
        # load parameters
        with open(opts.pretrain + ".params", 'rb') as f:
            params = pickle.load(f)
        # construct model
        model = models.RNNModel(params)
        # load pretraind model
        model.load_state_dict(torch.load(opts.pretrain + ".pt"))
        model.freeze_emb()

    # save parameters
    #with open(opts.save + ".params", mode='wb') as f:
    #    pickle.dump(params, f)
    save_params(params, opts.save)

    if torch.cuda.is_available():
        if not opts.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )
    else:
        if opts.cuda:
            print("Error: No CUDA device. Remove the option --cuda")
    device = torch.device("cuda" if opts.cuda else "cpu")
    model = model.to(device)

    # loss function (ignore padding id)
    criterion = nn.CrossEntropyLoss(ignore_index=corpus.dictionary.pad_id())

    ###############################################################################
    # Train the  model
    ###############################################################################

    # Loop over epochs.
    lr = opts.lr
    best_val_loss = None

    # Select an optimizer
    try:
        optimizer = getattr(torch.optim, opts.optim_type)(model.parameters(),
                                                          lr=lr)
    except:
        raise ValueError(
            """An invalid option for `--optim_type` was supplied.""")

    # Horovod: (optional) compression algorithm.
    compression = hvd.Compression.fp16 if opts.fp16_allreduce else hvd.Compression.none

    # Horovod: wrap optimizer with DistributedOptimizer.
    try:
        optimizer = hvd.DistributedOptimizer(
            optimizer,
            named_parameters=model.named_parameters(),
            compression=compression,
            backward_passes_per_step=opts.batches_per_allreduce,
            op=hvd.Adasum if opts.use_adasum else hvd.Average)
    except:
        optimizer = hvd.DistributedOptimizer(
            optimizer,
            named_parameters=model.named_parameters(),
            compression=compression,
            backward_passes_per_step=opts.batches_per_allreduce)

    # Restore from a previous checkpoint, if initial_epoch is specified.
    # Horovod: restore on the first worker which will broadcast weights to other workers.
    if (resume_from_epoch > 0) and (hvd.rank() == 0):
        filepath = opts.save + "checkpoint-" + str(
            resume_from_epoch) + ".pth.tar"
        #filepath = args.checkpoint_format.format(epoch=resume_from_epoch)
        checkpoint = torch.load(filepath)
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])

    # Horovod: broadcast parameters & optimizer state.
    hvd.broadcast_parameters(model.state_dict(), root_rank=0)
    hvd.broadcast_optimizer_state(optimizer, root_rank=0)

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(resume_from_epoch, opts.epochs):
            train(opts, device, corpus, model, criterion, optimizer, lr, epoch)
            val_loss = evaluate(opts, device, corpus, model, criterion, epoch)
            save_checkpoint(model, optimizer, epoch)
            # Save the model if the validation loss is the best we've seen so far.
            if not best_val_loss or val_loss < best_val_loss:
                #torch.save(model.state_dict(), opts.save + ".pt")
                save_checkpoint(model, optimizer, -1)
                best_val_loss = val_loss
            #else:
            #    # Anneal the learning rate if no improvement has been seen in the validation dataset.
            #    lr /= 4.0
            #optimizer.lr = lr
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')