Esempio n. 1
0
def evaluation(args,
               data,
               split,
               model,
               domain,
               epoch,
               str_res='results',
               ner_model=True,
               predictor=None):
    # evaluate performance on data
    model.eval()

    eval_dict = initialize_eval_dict()
    eval_dict['epoch'] = epoch
    #for batch in prepare_data.iterate_batch(data, args.batch_size, args.device):
    for batch in prepare_data.iterate_batch_rand_bucket_choosing(
            data,
            args.batch_size,
            args.device,
            ner_model=ner_model,
            predictor=predictor):
        if ner_model is not None:
            word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths, f_f, f_p, b_f, b_p, w_f, mask_v, file_no = batch
            out_arc, out_arc_tag, masks, lengths = model.forward(
                word,
                char,
                pos,
                mask=masks,
                length=lengths,
                f_f=f_f,
                f_p=f_p,
                b_f=b_f,
                b_p=b_p,
                w_f=w_f,
                file_no=file_no,
                mask_v=mask_v)
        else:
            word, char, pos, ner, heads, arc_tags, auto_label, masks, lengths = batch
            out_arc, out_arc_tag, masks, lengths = model.forward(
                word, char, pos, mask=masks, length=lengths)

        heads_pred, arc_tags_pred, _ = model.decode(
            out_arc,
            out_arc_tag,
            mask=masks,
            length=lengths,
            leading_symbolic=prepare_data.NUM_SYMBOLIC_TAGS)
        lengths = lengths.cpu().numpy()
        word = word.data.cpu().numpy()
        pos = pos.data.cpu().numpy()
        ner = ner.data.cpu().numpy()
        heads = heads.data.cpu().numpy()
        arc_tags = arc_tags.data.cpu().numpy()
        heads_pred = heads_pred.data.cpu().numpy()
        arc_tags_pred = arc_tags_pred.data.cpu().numpy()
        stats, stats_nopunc, stats_root, num_inst = parse.eval_(
            word,
            pos,
            heads_pred,
            arc_tags_pred,
            heads,
            arc_tags,
            args.alphabets['word_alphabet'],
            args.alphabets['pos_alphabet'],
            lengths,
            punct_set=args.punct_set,
            symbolic_root=True)
        ucorr, lcorr, total, ucm, lcm = stats
        ucorr_nopunc, lcorr_nopunc, total_nopunc, ucm_nopunc, lcm_nopunc = stats_nopunc
        corr_root, total_root = stats_root
        eval_dict['dp_ucorrect'] += ucorr
        eval_dict['dp_lcorrect'] += lcorr
        eval_dict['dp_total'] += total
        eval_dict['dp_ucomplete_match'] += ucm
        eval_dict['dp_lcomplete_match'] += lcm
        eval_dict['dp_ucorrect_nopunc'] += ucorr_nopunc
        eval_dict['dp_lcorrect_nopunc'] += lcorr_nopunc
        eval_dict['dp_total_nopunc'] += total_nopunc
        eval_dict['dp_ucomplete_match_nopunc'] += ucm_nopunc
        eval_dict['dp_lcomplete_match_nopunc'] += lcm_nopunc
        eval_dict['dp_root_correct'] += corr_root
        eval_dict['dp_total_root'] += total_root
        eval_dict['dp_total_inst'] += num_inst

    eval_dict['dp_uas'] = eval_dict['dp_ucorrect'] * 100 / eval_dict[
        'dp_total']  # considering w. punctuation
    eval_dict['dp_las'] = eval_dict['dp_lcorrect'] * 100 / eval_dict[
        'dp_total']  # considering w. punctuation
    print_results(eval_dict, split, domain, str_res)
    return eval_dict
def main():
    logger.info("Reading and creating arguments")
    args = read_arguments()
    logger.info("Reading Data")
    datasets = {}
    for split in args.splits:
        dataset = prepare_data.read_data_to_variable(args.data_paths[split],
                                                     args.alphabets,
                                                     args.device,
                                                     symbolic_root=True)
        datasets[split] = dataset
    if args.set_num_training_samples is not None:
        print('Setting train and dev to %d samples' %
              args.set_num_training_samples)
        datasets = rearrange_splits.rearranging_splits(
            datasets, args.set_num_training_samples)
    logger.info("Creating Networks")
    num_data = sum(datasets['train'][1])
    model, optimizer, dev_eval_dict, test_eval_dict, start_epoch = build_model_and_optimizer(
        args)
    best_model = deepcopy(model)
    best_optimizer = deepcopy(optimizer)

    logger.info('Training INFO of in domain %s' % args.domain)
    logger.info('Training on Dependecy Parsing')
    logger.info("train: gamma: %f, batch: %d, clip: %.2f, unk replace: %.2f" %
                (args.gamma, args.batch_size, args.clip, args.unk_replace))
    logger.info('number of training samples for %s is: %d' %
                (args.domain, num_data))
    logger.info("dropout(in, out, rnn): (%.2f, %.2f, %s)" %
                (args.p_in, args.p_out, args.p_rnn))
    logger.info("num_epochs: %d" % (args.num_epochs))
    print('\n')

    if not args.eval_mode:
        logger.info("Training")
        num_batches = prepare_data.calc_num_batches(datasets['train'],
                                                    args.batch_size)
        lr = args.learning_rate
        patient = 0
        decay = 0
        for epoch in range(start_epoch + 1, args.num_epochs + 1):
            print(
                'Epoch %d (Training: rnn mode: %s, optimizer: %s, learning rate=%.6f, eps=%.1e, decay rate=%.2f (schedule=%d, decay=%d)): '
                % (epoch, args.rnn_mode, args.opt, lr, args.epsilon,
                   args.decay_rate, args.schedule, decay))
            model.train()
            total_loss = 0.0
            total_arc_loss = 0.0
            total_arc_tag_loss = 0.0
            total_train_inst = 0.0

            train_iter = prepare_data.iterate_batch_rand_bucket_choosing(
                datasets['train'],
                args.batch_size,
                args.device,
                unk_replace=args.unk_replace)
            start_time = time.time()
            batch_num = 0
            for batch_num, batch in enumerate(train_iter):
                batch_num = batch_num + 1
                optimizer.zero_grad()
                # compute loss of main task
                word, char, pos, ner_tags, heads, arc_tags, auto_label, masks, lengths = batch
                out_arc, out_arc_tag, masks, lengths = model.forward(
                    word, char, pos, mask=masks, length=lengths)
                loss_arc, loss_arc_tag = model.loss(out_arc,
                                                    out_arc_tag,
                                                    heads,
                                                    arc_tags,
                                                    mask=masks,
                                                    length=lengths)
                loss = loss_arc + loss_arc_tag

                # update losses
                num_insts = masks.data.sum() - word.size(0)
                total_arc_loss += loss_arc.item() * num_insts
                total_arc_tag_loss += loss_arc_tag.item() * num_insts
                total_loss += loss.item() * num_insts
                total_train_inst += num_insts
                # optimize parameters
                loss.backward()
                clip_grad_norm_(model.parameters(), args.clip)
                optimizer.step()

                time_ave = (time.time() - start_time) / batch_num
                time_left = (num_batches - batch_num) * time_ave

                # update log
                if batch_num % 50 == 0:
                    log_info = 'train: %d/%d, domain: %s, total loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time left: %.2fs' % \
                               (batch_num, num_batches, args.domain, total_loss / total_train_inst, total_arc_loss / total_train_inst,
                                total_arc_tag_loss / total_train_inst, time_left)
                    sys.stdout.write(log_info)
                    sys.stdout.write('\n')
                    sys.stdout.flush()
            print('\n')
            print(
                'train: %d/%d, domain: %s, total_loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time: %.2fs'
                % (batch_num, num_batches, args.domain,
                   total_loss / total_train_inst, total_arc_loss /
                   total_train_inst, total_arc_tag_loss / total_train_inst,
                   time.time() - start_time))

            dev_eval_dict, test_eval_dict, best_model, best_optimizer, patient = in_domain_evaluation(
                args, datasets, model, optimizer, dev_eval_dict,
                test_eval_dict, epoch, best_model, best_optimizer, patient)
            if patient >= args.schedule:
                lr = args.learning_rate / (1.0 + epoch * args.decay_rate)
                optimizer = generate_optimizer(args, lr, model.parameters())
                print('updated learning rate to %.6f' % lr)
                patient = 0
            print_results(test_eval_dict['in_domain'], 'test', args.domain,
                          'best_results')
            print('\n')
        for split in datasets.keys():
            evaluation(args, datasets[split], split, best_model, args.domain,
                       epoch, 'best_results')

    else:
        logger.info("Evaluating")
        epoch = start_epoch
        #for split in ['train', 'dev', 'test']:
        for split in ['test']:
            eval_dict = evaluation(args, datasets[split], split, model,
                                   args.domain, epoch, 'best_results')
            write_results(args, datasets[split], args.domain, split, model,
                          args.domain, eval_dict)
Esempio n. 3
0
def main():
    logger.info("Reading and creating arguments")
    args = read_arguments()
    logger.info("Reading Data")
    datasets = {}

    for split in args.splits:
        dataset = prepare_data.read_data_to_variable(args.data_paths[split],
                                                     args.alphabets,
                                                     args.device,
                                                     symbolic_root=True)
        datasets[split] = dataset
    if args.set_num_training_samples is not None:
        print('Note the change here')
        print('dev set in not touched similar to test set')
        # print('Setting train and dev to %d samples' % args.set_num_training_samples)
        datasets = rearrange_splits.rearranging_splits(
            datasets, args.set_num_training_samples)
    logger.info("Creating Networks")
    num_data = sum(datasets['train'][1])
    #########################################################
    # Here constraints need to be added.
    model, optimizer, dev_eval_dict, test_eval_dict, start_epoch = build_model_and_optimizer(
        args)
    best_model = deepcopy(model)
    best_optimizer = deepcopy(optimizer)
    #########################################################

    logger.info('Training INFO of in domain %s' % args.domain)
    logger.info('Training on Dependecy Parsing')
    logger.info("train: gamma: %f, batch: %d, clip: %.2f, unk replace: %.2f" %
                (args.gamma, args.batch_size, args.clip, args.unk_replace))
    logger.info('number of training samples for %s is: %d' %
                (args.domain, num_data))
    logger.info("dropout(in, out, rnn): (%.2f, %.2f, %s)" %
                (args.p_in, args.p_out, args.p_rnn))
    logger.info("num_epochs: %d" % (args.num_epochs))
    print('\n')

    if not args.eval_mode:
        logger.info("Training")
        num_batches = prepare_data.calc_num_batches(datasets['train'],
                                                    args.batch_size)
        lr = args.learning_rate
        patient = 0
        decay = 0
        for epoch in range(start_epoch + 1, args.num_epochs + 1):
            print(
                'Epoch %d (Training: rnn mode: %s, optimizer: %s, learning rate=%.6f, eps=%.1e, decay rate=%.2f (schedule=%d, decay=%d)): '
                % (epoch, args.rnn_mode, args.opt, lr, args.epsilon,
                   args.decay_rate, args.schedule, decay))
            model.train()
            total_loss = 0.0
            total_arc_loss = 0.0
            total_arc_tag_loss = 0.0
            total_train_inst = 0.0

            train_iter = prepare_data.iterate_batch_rand_bucket_choosing(
                datasets['train'],
                args.batch_size,
                args.device,
                unk_replace=args.unk_replace)
            start_time = time.time()
            batch_num = 0
            for batch_num, batch in enumerate(train_iter):
                batch_num = batch_num + 1
                optimizer.zero_grad()
                # compute loss of main task

                # word,pos,heads,ner_tags,masks,auto_label,heads: [16,25]
                # char: [16,25,29]
                # lengths: [16]
                # Why do we need auto_label
                word, char, pos, ner_tags, heads, arc_tags, auto_label, masks, lengths = batch

                out_arc, out_arc_tag, masks, lengths = model.forward(
                    word, char, pos, mask=masks, length=lengths)
                # The decoder outputs a score s_ij , indicating the model belief that the latter should be the head of the former
                # out_arc:[16, 24, 24]
                #out_arc_tag_h: torch.Size([16, 24, 128])
                #out_arc_tag_c: torch.Size([16, 24, 128])
                # out_arc_tag =(out_arc_tag_h,out_arc_tag_c)
                loss_arc, loss_arc_tag = model.loss(out_arc,
                                                    out_arc_tag,
                                                    heads,
                                                    arc_tags,
                                                    mask=masks,
                                                    length=lengths)
                loss = loss_arc + loss_arc_tag

                # update losses
                num_insts = masks.data.sum() - word.size(0)

                total_arc_loss += loss_arc.item() * num_insts
                total_arc_tag_loss += loss_arc_tag.item() * num_insts
                total_loss += loss.item() * num_insts
                total_train_inst += num_insts
                # optimize parameters
                loss.backward()
                clip_grad_norm_(model.parameters(), args.clip)
                optimizer.step()

                time_ave = (time.time() - start_time) / batch_num
                time_left = (num_batches - batch_num) * time_ave

                # update log
                if batch_num % 50 == 0:
                    log_info = 'train: %d/%d, domain: %s, total loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time left: %.2fs' % \
                               (batch_num, num_batches, args.domain, total_loss / total_train_inst, total_arc_loss / total_train_inst,
                                total_arc_tag_loss / total_train_inst, time_left)
                    sys.stdout.write(log_info)
                    sys.stdout.write('\n')
                    sys.stdout.flush()
            print('\n')
            print(
                'train: %d/%d, domain: %s, total_loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time: %.2fs'
                % (batch_num, num_batches, args.domain,
                   total_loss / total_train_inst, total_arc_loss /
                   total_train_inst, total_arc_tag_loss / total_train_inst,
                   time.time() - start_time))
            #
            dev_eval_dict, test_eval_dict, best_model, best_optimizer, patient, curr_dev_eval_dict = in_domain_evaluation(
                args, datasets, model, optimizer, dev_eval_dict,
                test_eval_dict, epoch, best_model, best_optimizer, patient)
            store ={'total_loss':str(total_loss.cpu().numpy() / total_train_inst.cpu().numpy())\
            ,'arc_loss': str(total_arc_loss.cpu().numpy() / total_train_inst.cpu().numpy()), \
            'arc_tag_loss': str(total_arc_tag_loss.cpu().numpy() / total_train_inst.cpu().numpy()),'eval':curr_dev_eval_dict }
            #############################################
            str_file = args.full_model_name + '_' + 'all_epochs'
            with open(str_file, 'a') as f:
                f.write(str(store) + '\n')
            ###############################################
            if patient >= args.schedule:
                lr = args.learning_rate / (1.0 + epoch * args.decay_rate)
                optimizer = generate_optimizer(args, lr, model.parameters())
                print('updated learning rate to %.6f' % lr)
                patient = 0
            print_results(test_eval_dict['in_domain'], 'test', args.domain,
                          'best_results')
            print('\n')

        for split in datasets.keys():
            flag = False
            eval_dict = evaluation(args, datasets[split], split, best_model,
                                   args.domain, epoch, flag, 'best_results')

    else:
        logger.info("Evaluating")
        epoch = start_epoch
        # epoch = 70
        # print('Start epoch is',start_epoch)
        for split in ['train', 'dev', 'test']:
            if split == 'dev':
                flag = False
            else:
                flag = False
            eval_dict = evaluation(args, datasets[split], split, model,
                                   args.domain, epoch, flag, 'best_results')
            # print_results(eval_dict, split, args.domain, 'results')
            print('\n')