Ejemplo n.º 1
0
def best_arch_search():
    model.eval()
    result_df = pd.DataFrame(columns=['Genotype', 'Val_reward'])
    ntokens = len(corpus.dictionary)
    i = 0
    hidden = model.init_hidden(eval_batch_size)
    for m in range(search_arch_num):
        parallel_model.sample_new_architecture()

        data, targets = get_batch(val_data, i, args)
        targets = targets.view(-1)

        hidden = repackage_hidden(hidden)
        #log_prob, hidden = parallel_model(data, hidden)
        #loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data
        loss, hidden = parallel_model._loss(hidden, data, targets)

        reward = architect.reward_c / torch.exp(loss)

        gene = parallel_model.genotype()
        temp_df = pd.DataFrame([[gene, reward.item()]],
                               columns=['Genotype', 'Val_reward'])
        result_df = result_df.append(temp_df, ignore_index=True)

        i += args.bptt
        if i >= search_data.size(0) - 2:
            i = 0

    result_df = result_df.sort_values(by='Val_reward', ascending=False)
    result_df.to_csv('search_result.csv')
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i, args, evaluation=True)
            targets = targets.view(-1)
            log_prob, hidden = parallel_model(data, hidden)
            loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data
            total_loss += loss * len(data)
            hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Ejemplo n.º 3
0
def evaluate(data_source, batch_size=10, data_name='dev'):
    data_source = DataLoader(args.data_dir + '/dev.json',
                             batch_size,
                             opt,
                             vocab,
                             evaluation=True)
    print('Evaluating Model!')
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    # ntokens = len(corpus.dictionary)
    # ntokens = len(vocab.word2id)
    # for i in range(0, data_source.size(0) - 1, args.bptt):
    predictions = []
    for i in range(len(data_source)):
        batch = data_source.next_batch()
        batch_size = len(batch['relation'])
        hidden = model.init_hidden(batch_size)[0]
        # data, targets = get_batch(data_source, i, args, evaluation=True)
        data = batch
        targets = batch['relation']
        targets = targets.view(-1)
        # print('tokens: {} | hidden: {}'.format(batch['tokens'].shape, hidden.shape))
        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(
            log_prob, targets).data  # log_prob.view(-1, log_prob.size(2))

        total_loss += loss * len(data)

        batch_predictions = torch.argmax(log_prob, dim=-1).cpu().data.numpy()
        batch_predictions = [
            id2label[prediction] for prediction in batch_predictions
        ]
        predictions += batch_predictions

        # hidden = repackage_hidden(hidden)

    precision, recall, f1 = scorer.score(dev_data.gold(), predictions)
    logging.info('{} set | Precision: {} | Recall: {} | F1: {}'.format(
        data_name, precision, recall, f1))
    print('total loss: {}'.format(total_loss))
    return total_loss / len(data_source)
Ejemplo n.º 4
0
def train_arch():
    assert args.batch_size % args.small_batch_size == 0, 'batch_size must be divisible by small_batch_size'

    # Turn on training mode which enables dropout.
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden_valid = [
        model.init_hidden(args.small_batch_size)
        for _ in range(args.batch_size // args.small_batch_size)
    ]
    batch, i = 0, 0
    ep_loss = 0
    model.eval()
    while i < search_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        # seq_len = max(5, int(np.random.normal(bptt, 5)))
        # # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + args.max_seq_len_delta)
        seq_len = int(bptt)

        data_valid, targets_valid = get_batch(search_data, i, args)

        start, end, s_id = 0, args.small_batch_size, 0
        while start < args.batch_size:
            cur_data_valid, cur_targets_valid = data_valid[:, start:
                                                           end], targets_valid[:,
                                                                               start:
                                                                               end].contiguous(
                                                                               ).view(
                                                                                   -1
                                                                               )

            hidden_valid[s_id] = repackage_hidden(hidden_valid[s_id])

            parallel_model.sample_new_architecture()
            if i == 0:
                for e in model.edge_weights:
                    print(F.softmax(e, dim=-1))

                print(F.softmax(model.weights, dim=-1))
                print(model.baseline)

            if (batch + 1) % arch_opt_step == 0:
                is_opt_step = True
            else:
                is_opt_step = False

            if i == 0:
                architect.optimizer.zero_grad()

            hidden_valid[s_id], raw_loss = architect.step(
                hidden_valid[s_id], cur_data_valid, cur_targets_valid,
                is_opt_step)
            raw_loss, hidden_valid[s_id] = model._loss(hidden_valid[s_id],
                                                       cur_data_valid,
                                                       cur_targets_valid)
            raw_loss = raw_loss.detach()

            loss = raw_loss

            total_loss += raw_loss.data * args.small_batch_size / args.batch_size
            ep_loss += raw_loss * len(cur_data_valid)

            s_id += 1
            start = end
            end = start + args.small_batch_size

            gc.collect()

        # total_loss += raw_loss.data
        if batch % args.log_interval == 0 and batch > 0:
            logging.info(parallel_model.genotype())
            print(F.softmax(parallel_model.weights, dim=-1))
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time
            logging.info(
                '| arch_epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(search_data) // args.bptt,
                    optimizer.param_groups[0]['lr'],
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        batch += 1
        i += seq_len

    #Optimizer step for residual of valid queue
    if not is_opt_step:
        architect.optimizer.step()

    return ep_loss.item() / len(search_data)