Esempio n. 1
0
def eval(X, Y, model):
    model.cpu()
    y_pred = [[],[]]
    for i in range(len(X)):
        x = X[i]
        if len(x) == 0:
            y_pred[0].append(0.)
            y_pred[1].append(0.)
        else:
            sent_tensor = Variable(sent_to_tensor(x))
            hidden = model.init_hidden()
            cell = model.init_hidden()
            for i in range(len(x)):
                logit1, logit2, hidden, cell = model(sent_tensor[i], hidden, cell)
            y_pred[0].append(logit1.data.numpy()[0])
            y_pred[1].append(logit2.data.numpy()[0])
    y1 = np.array(y_pred[0])
    y_t1 = [Y[i][0] for i in range(len(Y))]
    y_t1 = np.array(y_t1)
    ccc1, _ = ccc(y_t1, y1)
    mse1 = mse(y_t1, y1)
    y2 = np.array(y_pred[1])
    y_t2 = [Y[i][1] for i in range(len(Y))]
    y_t2 = np.array(y_t2)
    ccc2, _ = ccc(y_t2, y2)
    mse2 = mse(y_t2, y2)
    model.cuda()
    return ccc1, ccc2, mse1, mse2
Esempio n. 2
0
def load_model():
    model = torch.load(MODEL_CHECKPOINT)
    if USE_CUDA:
        model.cuda()
    else:
        model.cpu()
    return model
Esempio n. 3
0
def predict(model, valid_data):

    model = model.cuda()
    model = model.eval()

    preds = []

    with torch.no_grad():
        for img in valid_data:
            batch_size = img.shape[0]

            img = img.cuda()

            out = model(img)
            pred = out.argmax(dim=1)

            preds.append(pred.cpu().numpy())

    preds = np.concatenate(preds, axis=0)

    model = model.cpu()

    return preds
Esempio n. 4
0
    else:
        # Load GPU model on CPU
        model = torch.load(args.checkpoint,
                           map_location=lambda storage, loc: storage)

if args.finetune:
    assert args.pretrained, "you must specify a pre-trained model"

    with open(args.pretrained, 'rb') as f:
        model = torch.load(f)
    print("loaded pre-trained model...")

if args.cuda:
    model.cuda()
else:
    model.cpu()
print(model)

criterion = nn.CrossEntropyLoss()
if args.cuda:
    criterion.cuda()

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Variables, to detach them from their history."""
    if type(h) == Variable:
        return Variable(h.data)
Esempio n. 5
0
eval_batch_size = args.batch_size // 8
train_data = batchify(corpus.train, args.batch_size)
dev_data = batchify(corpus.dev, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = corpus.dictionary.__len__()
model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied)
if args.cuda:
    model.cuda()
else:
    model.cpu()

criterion = nn.CrossEntropyLoss()

###############################################################################
# Training code
###############################################################################

def repackage_hidden(h):
    """Wraps hidden states in new Variables, to detach them from their history."""
    if type(h) == Variable:
        return Variable(h.data)
    else:
        return tuple(repackage_hidden(v) for v in h)

Esempio n. 6
0
def evaluate(data_dir, model_path, batch_size, chunk_size, use_cuda):
    # =====
    printing(f"Evaluating {data_dir} & {model_path}")
    # read corpus, especially for vocab
    model_dir = os.path.dirname(model_path)
    # fn = 'corpus.{}.data'.format(hashlib.md5(data_dir.encode()).hexdigest())
    fn = glob.glob(f"{model_dir}/corpus.*.data")
    assert len(fn) == 1
    fn = fn[0]
    printing(f"Loading dataset from {fn}")
    corpus = torch.load(fn)
    word2idx = corpus.dictionary.word2idx
    # load model
    with open(model_path, 'rb') as f:
        printing(f"Loading model from {f}")
        model, criterion, optimizer = torch.load(f)
    if use_cuda:
        model = model.cuda()
    else:
        model = model.cpu()
    # Turn on evaluation mode which disables dropout.
    model.eval()
    # =====
    # read test and eval
    test_file = os.path.join(data_dir, "test.txt")
    test_data = []
    with open(test_file) as fd:
        for sid, line in enumerate(fd):
            tokens = line.split() + ['<eos>']
            # there will be no oov since test is included in vocab when building?
            idxes = [word2idx[w] for w in tokens]
            one = {"sid": sid, "tokens": tokens, "idxes": idxes}
            test_data.append(one)
    # start to decode
    printing(f"Decoding with {len(test_data)} lines of data")
    # sort by length
    test_data.sort(key=lambda x: len(x["idxes"]))
    # batched evaluation
    bidx = 0
    while bidx < len(test_data):
        next_bidx = min(len(test_data), bidx + batch_size)
        cur_data = test_data[bidx:next_bidx]
        bsize = len(cur_data)
        max_length = max([len(x["idxes"]) for x in cur_data])
        # batch, 0 as padding
        cur_data_t = torch.full([bsize, max_length], 0, dtype=torch.long)
        for b in range(bsize):
            one_input = cur_data[b]["idxes"]
            cur_data_t[b][:len(one_input)] = torch.as_tensor(one_input)
        cur_data_t = cur_data_t.t().contiguous()  # [max-length, bsize]
        if use_cuda:
            cur_data_t = cur_data_t.cuda()
        # loop
        logprobs = [torch.full([1, bsize], 0., dtype=torch.float32)
                    ]  # start from the first token, but does not predict it
        hidden = model.init_hidden(bsize)
        for start_idx in range(0, cur_data_t.size(0) - 1, chunk_size):
            end_idx = min(start_idx + chunk_size, cur_data_t.size(0) - 1)
            cur_input_t = cur_data_t[start_idx:end_idx]
            cur_target_t = cur_data_t[start_idx + 1:end_idx + 1]
            output, hidden = model(cur_input_t, hidden)
            output = model.decoder(output)
            hidden = repackage_hidden(hidden)
            # get log probs
            output_logprobs = torch.nn.functional.log_softmax(
                output.view(end_idx - start_idx, bsize, -1), -1)
            cur_logprobs = output_logprobs.gather(
                -1, cur_target_t.unsqueeze(-1)).squeeze(-1)  # [len, bsize]
            logprobs.append(cur_logprobs)
        bidx = next_bidx
        # get the scores back
        final_logprobs = torch.cat(logprobs,
                                   0).t().contiguous()  # [bsize, max-length]
        if use_cuda:
            final_logprobs = final_logprobs.cpu()
        for v, d in zip(final_logprobs, cur_data):
            d["scores"] = v[:len(d["idxes"])].tolist()
    # return
    test_data.sort(key=lambda x: x["sid"])
    return test_data
Esempio n. 7
0
                                 def_arr,
                                 test_corpus.dictionary,
                                 set_zero=True)
    logging("Vocab size pre-change: {}".format(len(model.old_dict.word2idx)))
    logging("Vocab size post-change: {}".format(len(model.dict.word2idx)))
else:
    raise AssertionError(
        "new vocabulary provided but model vocab not changed or interpolated")

test_data = batchify(test_corpus.test, args.test_batch_size, args)

if args.cuda:
    model = model.cuda()
    criterion = criterion.cuda()
else:
    model = model.cpu()
    criterion = criterion.cpu()

# Run on test data.
logging("Evaluating...")
with torch.no_grad():
    if args.hyp_search is not None:
        best_score = (np.inf, 0.0, 0.0)
        scores = np.zeros((5, 6))
        import pickle
        # grid search is ok here bc for few hyperparams and small k,
        # it helps minimize gaps. also, based on Grave et al. (2016)
        # we expect lam and theta are ~equally important/sensitive here
        for i, lam in enumerate([0.833, 0.866, 0.9, 0.933, 0.966]):
            for j, theta in enumerate([0, 0.1, 0.3, 0.5, 0.7, 0.9]):
                args.lam = lam
Esempio n. 8
0
def run(args, config, min_test_loss):
    # Change log file
    fileh = logging.FileHandler(args.logfile, 'w')
    formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
    fileh.setFormatter(formatter)

    logger = logging.getLogger('')  # root logger
    logger.setLevel(logging.INFO)
    # Second handler is the file logger.
    for hdlr in logger.handlers[1:]:  # remove all old handlers
        logger.removeHandler(hdlr)
    logger.addHandler(fileh)  # set the new handler
    logger = logging.getLogger('run')

    logger.info('CONFIGURATION: %s', json.dumps(config, indent=2))

    # Set the random seed manually for reproducibility.
    torch.manual_seed(args.seed)
    init_state = torch.get_rng_state()

    logger.info('rng state: %s', init_state)

    ###############################################################################
    # Load data
    ###############################################################################

    corpus = data.Corpus(args.data, args.vocab_size)

    def batchify(data, bsz):
        nbatch = data.size(0) // bsz
        data = data.narrow(0, 0, nbatch * bsz)
        data = data.view(bsz, -1).t().contiguous()
        if args.cuda:
            data = data.cuda()
        return data

    eval_batch_size = 10
    train_data = batchify(corpus.train, args.batch_size)
    val_data = batchify(corpus.valid, eval_batch_size)
    test_data = batchify(corpus.test, eval_batch_size)

    ###############################################################################
    # Build the model
    ###############################################################################
    def load_embedding(corpus,
                       glove_file="data/glove/glove.6B.{0}d.txt",
                       line_to_load=100000):
        """
        Function that populates a dictionary with word embedding vectors
        """
        # resolve glove file
        glove_file = glove_file.format(args.emsize)
        if not os.path.exists(glove_file):
            logger.error("glove_file {0} not exist!".format(glove_file))
            raise ValueError("glove_file {0} not exist!".format(glove_file))
        ctr = 0
        # This is the thing to return
        word_emb = np.random.uniform(-0.1,
                                     0.1,
                                     size=(len(corpus.dictionary),
                                           args.emsize))
        found_words = 0
        with open(glove_file, "r") as f:
            for i, line in enumerate(f):
                ctr += 1
                contents = line.split()
                word = contents[0].lower()
                if word in corpus.dictionary.word2idx:
                    idx = corpus.dictionary.word2idx[word]
                    word_emb[idx, :] = np.asarray(contents[1:]).astype(float)
                    found_words += 1
                if ctr >= line_to_load:
                    break
        logger.info('found: %d', found_words)
        return torch.Tensor(word_emb)

    ntokens = len(corpus.dictionary)
    preload_emb = load_embedding(
        corpus) if args.initialization["word_embedding"] == "glove" else None
    model = RNNModel(args.model,
                     ntokens,
                     args.emsize,
                     args.nhid,
                     args.nlayers,
                     emb_init_method=args.initialization["word_embedding"],
                     weight_init_method=args.initialization["weights"],
                     preload_emb=preload_emb,
                     dropout=args.dropout)
    criterion = nn.CrossEntropyLoss()
    if args.cuda:
        model = model.cuda()
    if args.optim == 'adam':
        opt = O.Adam(model.parameters(),
                     lr=args.lr,
                     weight_decay=args.weight_decay)
    else:
        opt = O.SGD(model.parameters(),
                    lr=args.lr,
                    momentum=args.momentum,
                    weight_decay=args.weight_decay)

    ###############################################################################
    # Training code
    ###############################################################################

    def clip_gradient(model, clip):
        """Computes a gradient clipping coefficient based on gradient norm."""
        totalnorm = 0
        for p in model.parameters():
            modulenorm = p.grad.data.norm()
            totalnorm += modulenorm**2
        totalnorm = math.sqrt(totalnorm)
        return min(1, args.clip / (totalnorm + 1e-6))

    def repackage_hidden(h):
        """Wraps hidden states in new Variables, to detach them from their history."""
        if type(h) == Variable:
            return Variable(h.data)
        else:
            return tuple(repackage_hidden(v) for v in h)

    def get_batch(source, i, evaluation=False):
        seq_len = min(args.sequence_length, len(source) - 1 - i)
        data = Variable(source[i:i + seq_len], volatile=evaluation)
        target = Variable(source[i + 1:i + 1 + seq_len].view(-1))
        return data, target

    def evaluate(data_source):
        total_loss = 0
        model.eval()
        ntokens = len(corpus.dictionary)
        hidden = model.init_hidden(eval_batch_size)
        for i in range(0, data_source.size(0) - 1, args.sequence_length):
            data, targets = get_batch(data_source, i, evaluation=True)
            output, hidden = model(data, hidden)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).data
            hidden = repackage_hidden(hidden)
        return total_loss[0] / len(data_source)

    def train():
        model.train()
        total_loss = 0
        start_time = time.time()
        ntokens = len(corpus.dictionary)
        hidden = model.init_hidden(
            args.batch_size,
            hidden_init_method=args.initialization["hidden_state"])
        iter_idx = range(0, train_data.size(0) - 1, args.sequence_length)
        if args.shuffle:
            np.random.shuffle(iter_idx)
        for batch, i in enumerate(iter_idx):
            data, targets = get_batch(train_data, i)
            hidden = repackage_hidden(hidden)
            model.zero_grad()
            output, hidden = model(data, hidden)
            loss = criterion(output.view(-1, ntokens), targets)
            loss.backward()

            clipped_lr = lr * clip_gradient(model, args.clip)
            for param_group in opt.param_groups:
                param_group['lr'] = clipped_lr
            opt.step()

            total_loss += loss.data

            if batch % args.log_interval == 0 and batch > 0:
                cur_loss = total_loss[0] / args.log_interval
                elapsed = time.time() - start_time
                ppl = 0
                try:
                    ppl = math.exp(cur_loss)
                except OverflowError:
                    ppl = float('inf')
                logger.info(
                    '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                        epoch, batch,
                        len(train_data) // args.sequence_length, lr,
                        elapsed * 1000 / args.log_interval, cur_loss, ppl))
                total_loss = 0
                start_time = time.time()

    # Loop over epochs.
    lr = args.lr
    prev_val_loss = None
    epoch_logs = []
    for epoch in range(1, args.epochs + 1):
        epoch_start_time = time.time()
        train()
        val_loss = evaluate(val_data)
        logger.info('-' * 89)
        time_s = time.time() - epoch_start_time
        logger.info(
            '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
            'valid ppl {:8.2f}'.format(epoch, time_s, val_loss,
                                       math.exp(val_loss)))
        logger.info('-' * 89)
        epoch_logs.append({
            'epoch': epoch,
            'time_s': time_s,
            'val_loss': val_loss,
            'val_ppl': math.exp(val_loss)
        })
        # Anneal the learning rate.
        if prev_val_loss and val_loss > prev_val_loss:
            lr /= 4.0
            logger.info('new learning rate: {}'.format(lr))
            if lr < args.min_lr:
                logger.info('learning rate too small')
                break
        prev_val_loss = val_loss

        if epoch % 6 == 0:
            with open('models/snapshot.pt', 'wb') as f:
                torch.save(model, f)
            logger.info('saved snapshot model.')

    # Run on test data and save the model.
    test_loss = evaluate(test_data)
    logger.info('=' * 89)
    logger.info(
        '| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
            test_loss, math.exp(test_loss)))
    logger.info('=' * 89)
    if args.save != '' and test_loss < min_test_loss:
        mcpu = model.cpu()
        with open(args.save, 'wb') as f:
            torch.save(mcpu, f)
        with open('models/best_model.pt', 'wb') as f:
            torch.save(mcpu, f)

    # Log results in a machine-readable JSON.
    result = {}
    result['config'] = config
    result['epoch_logs'] = epoch_logs
    result['test_loss'] = test_loss
    result['test_ppl'] = math.exp(test_loss)
    with open(args.results, 'w') as r:
        json.dump(result, r, indent=2)

    # Revert random state.
    torch.set_rng_state(init_state)
    return test_loss