Ejemplo n.º 1
0
        def valid(data_processor, max_batches=None, batch_size=1024):
            preds_kvpair = []
            golds_kvpair = []
            batches_sample = 0

            while True:
                (inputs_seq_batch, inputs_mask_batch, inputs_segment_batch,
                 outputs_seq_bio_batch,
                 outputs_seq_attr_batch) = data_processor.get_batch(batch_size)

                feed_dict = {
                    model.inputs_seq: inputs_seq_batch,
                    model.inputs_mask: inputs_mask_batch,
                    model.inputs_segment: inputs_segment_batch
                }

                preds_seq_bio_batch, preds_seq_attr_batch = sess.run(
                    model.outputs, feed_dict)

                for pred_seq_bio, gold_seq_bio, pred_seq_attr, gold_seq_attr, input_seq, mask in zip(
                        preds_seq_bio_batch, outputs_seq_bio_batch,
                        preds_seq_attr_batch, outputs_seq_attr_batch,
                        inputs_seq_batch, inputs_mask_batch):
                    l = sum(mask) - 2
                    pred_seq_bio = [i2w_bio[i] for i in pred_seq_bio[1:-1][:l]]
                    gold_seq_bio = [i2w_bio[i] for i in gold_seq_bio[1:-1][:l]]
                    char_seq = [i2w_char[i] for i in input_seq[1:-1][:l]]
                    pred_seq_attr = [
                        i2w_attr[i] for i in pred_seq_attr[1:-1][:l]
                    ]
                    gold_seq_attr = [
                        i2w_attr[i] for i in gold_seq_attr[1:-1][:l]
                    ]

                    pred_kvpair = extract_kvpairs_in_bioes(
                        pred_seq_bio, char_seq, pred_seq_attr)
                    gold_kvpair = extract_kvpairs_in_bioes(
                        gold_seq_bio, char_seq, gold_seq_attr)

                    preds_kvpair.append(pred_kvpair)
                    golds_kvpair.append(gold_kvpair)

                if data_processor.end_flag:
                    data_processor.refresh()
                    break

                batches_sample += 1
                if (max_batches
                        is not None) and (batches_sample >= max_batches):
                    break

            p, r, f1 = cal_f1_score(preds_kvpair, golds_kvpair)

            logger.info("Valid Samples: {}".format(len(preds_kvpair)))
            logger.info("Valid P/R/F1: {} / {} / {}".format(
                round(p * 100, 2), round(r * 100, 2), round(f1 * 100, 2)))

            return (p, r, f1)
Ejemplo n.º 2
0
        def valid(data_processor, max_batches=None, batch_size=1024):
            preds_kvpair = []
            golds_kvpair = []
            batches_sample = 0
            
            while True:
                (inputs_seq_batch, 
                 inputs_seq_len_batch,
                 outputs_seq_batch) = data_processor.get_batch(batch_size)

                feed_dict = {
                    model.inputs_seq: inputs_seq_batch,
                    model.inputs_seq_len: inputs_seq_len_batch,
                    model.outputs_seq: outputs_seq_batch
                }

                preds_seq_batch = sess.run(model.outputs, feed_dict)
                
                for pred_seq, gold_seq, input_seq, l in zip(preds_seq_batch, 
                                                            outputs_seq_batch, 
                                                            inputs_seq_batch, 
                                                            inputs_seq_len_batch):
                    pred_seq = [i2w_bio[i] for i in pred_seq[:l]]
                    gold_seq = [i2w_bio[i] for i in gold_seq[:l]]
                    char_seq = [i2w_char[i] for i in input_seq[:l]]
                    pred_kvpair = extract_kvpairs_in_bio(pred_seq, char_seq)
                    gold_kvpair = extract_kvpairs_in_bio(gold_seq, char_seq)

                    print('预测结果:')
                    print('preds_kvpair', preds_kvpair)
                    print('gold_kvpair', gold_kvpair)

                    
                    preds_kvpair.append(pred_kvpair)
                    golds_kvpair.append(gold_kvpair)
                    
                if data_processor.end_flag:
                    data_processor.refresh()
                    break
                
                batches_sample += 1
                if (max_batches is not None) and (batches_sample >= max_batches):
                    break
            
            p, r, f1 = cal_f1_score(preds_kvpair, golds_kvpair)
            
            logger.info("Valid Samples: {}".format(len(preds_kvpair)))
            logger.info("Valid P/R/F1: {} / {} / {}".format(round(p*100, 2), round(r*100, 2), round(f1*100, 2)))

            return (p, r, f1)
Ejemplo n.º 3
0
assert(params_dict['batch_size'] > 0)

# Start Itearting through
epoch_no = 0

for idx, data in enumerate(train_set):
    train_output = train_computation(data)
    predictions = train_output['logits']
    label_batch = train_output['labels']
    niter = idx + 1
    # Print training loss and F-1 and EM scores after every 20 iterations
    if (idx % 20 == 0):

        print('iteration = {}, train loss = {}'.format(
            niter, train_output['batch_cost']))
        f1_score_int, em_score_int = cal_f1_score(
            params_dict, label_batch, predictions)
        print("F1_Score and EM_score are", f1_score_int, em_score_int)

    divide_val = math.ceil(
        len(dev['para']['data']) / params_dict['batch_size'])

    if niter % val_frequency == 0:
        print('Epoch done:', epoch_no)
        epoch_no += 1
        f1_score_req = 0
        em_score_req = 0

        # Compute validation scores
        for idx_val, data_val in enumerate(valid_set):
            eval_output = valid_computation(data_val)
            predictions_val = eval_output['logits']
Ejemplo n.º 4
0
assert (params_dict['batch_size'] > 0)

# Start Itearting through
epoch_no = 0

for idx, data in enumerate(train_set):
    train_output = train_computation(data)
    predictions = train_output['logits']
    label_batch = train_output['labels']
    niter = idx + 1
    # Print training loss and F-1 and EM scores after every 20 iterations
    if (idx % 20 == 0):

        print('iteration = {}, train loss = {}'.format(
            niter, train_output['batch_cost']))
        f1_score_int, em_score_int = cal_f1_score(params_dict, label_batch,
                                                  predictions)
        print("F1_Score and EM_score are", f1_score_int, em_score_int)

    divide_val = math.ceil(
        len(dev['para']['data']) / params_dict['batch_size'])

    if niter % val_frequency == 0:
        print('Epoch done:', epoch_no)
        epoch_no += 1
        f1_score_req = 0
        em_score_req = 0

        # Compute validation scores
        for idx_val, data_val in enumerate(valid_set):
            eval_output = valid_computation(data_val)
            predictions_val = eval_output['logits']