Ejemplo n.º 1
0
def prediction_step(sess, dataset, dataset_type, model, transition_params_trained, stats_graph_folder, epoch_number, parameters, dataset_filepaths):
    if dataset_type == 'deploy':
        print('Predict labels for the {0} set'.format(dataset_type))
    else:
        print('Evaluate model on the {0} set'.format(dataset_type))
    all_predictions = []
    all_y_true = []
    output_filepath = os.path.join(stats_graph_folder, '{1:03d}_{0}.txt'.format(dataset_type,epoch_number))
    output_file = codecs.open(output_filepath, 'w', 'UTF-8')
    original_conll_file = codecs.open(dataset_filepaths[dataset_type], 'r', 'UTF-8')

    for i in range(len(dataset.token_indices[dataset_type])):
        feed_dict = {
          model.input_token_indices: dataset.token_indices[dataset_type][i],
          model.input_token_character_indices: dataset.character_indices_padded[dataset_type][i],
          model.input_token_lengths: dataset.token_lengths[dataset_type][i],
          model.input_label_indices_vector: dataset.label_vector_indices[dataset_type][i],
          model.dropout_keep_prob: 1.
        }
        unary_scores, predictions = sess.run([model.unary_scores, model.predictions], feed_dict)
        if parameters['use_crf']:
            predictions, _ = tf.contrib.crf.viterbi_decode(unary_scores, transition_params_trained)
            predictions = predictions[1:-1]
        else:
            predictions = predictions.tolist()

        assert(len(predictions) == len(dataset.tokens[dataset_type][i]))
        output_string = ''
        prediction_labels = [dataset.index_to_label[prediction] for prediction in predictions]
        gold_labels = dataset.labels[dataset_type][i]
        if parameters['tagging_format'] == 'bioes':
            prediction_labels = utils_nlp.bioes_to_bio(prediction_labels)
            gold_labels = utils_nlp.bioes_to_bio(gold_labels)
        for prediction, token, gold_label in zip(prediction_labels, dataset.tokens[dataset_type][i], gold_labels):
            while True:
                line = original_conll_file.readline()
                split_line = line.strip().split(' ')
                if '-DOCSTART-' in split_line[0] or len(split_line) == 0 or len(split_line[0]) == 0:
                    continue
                else:
                    token_original = split_line[0]
                    if parameters['tagging_format'] == 'bioes':
                        split_line.pop()
                    gold_label_original = split_line[-1]
                    assert(token == token_original and gold_label == gold_label_original)
                    break
            split_line.append(prediction)
            output_string += ' '.join(split_line) + '\n'
        output_file.write(output_string+'\n')

        all_predictions.extend(predictions)
        all_y_true.extend(dataset.label_indices[dataset_type][i])

    output_file.close()
    original_conll_file.close()

    if dataset_type != 'deploy':
        if parameters['main_evaluation_mode'] == 'conll':
            conll_evaluation_script = os.path.join('.', 'conlleval')
            conll_output_filepath = '{0}_conll_evaluation.txt'.format(output_filepath)
            shell_command = 'perl {0} < {1} > {2}'.format(conll_evaluation_script, output_filepath, conll_output_filepath)
            os.system(shell_command)
            with open(conll_output_filepath, 'r') as f:
                classification_report = f.read()
                print(classification_report)
        else:
            new_y_pred, new_y_true, new_label_indices, new_label_names, _, _ = remap_labels(all_predictions, all_y_true, dataset, parameters['main_evaluation_mode'])
            print(sklearn.metrics.classification_report(new_y_true, new_y_pred, digits=4, labels=new_label_indices, target_names=new_label_names))

    return all_predictions, all_y_true, output_filepath
Ejemplo n.º 2
0
def prediction_step(sess,
                    dataset,
                    dataset_type,
                    model,
                    transition_params_trained,
                    stats_graph_folder,
                    epoch_number,
                    parameters,
                    dataset_filepaths,
                    for_adapter=False):
    if dataset_type == 'deploy':
        print('Predict labels for the {0} set'.format(dataset_type))
    else:
        print('Evaluate model on the {0} set'.format(dataset_type))
    if for_adapter == True:
        all_predictions_per_sentence = []
        all_y_true_per_sentence = []
        all_prediction_labels_per_sentence = []

    all_predictions = []
    all_y_true = []
    output_filepath = os.path.join(
        stats_graph_folder, '{1:03d}_{0}.txt'.format(dataset_type,
                                                     epoch_number))
    output_file = codecs.open(output_filepath,
                              'w',
                              'latin-1',
                              errors='replace')
    original_conll_file = codecs.open(dataset_filepaths[dataset_type],
                                      'r',
                                      'latin-1',
                                      errors='replace')
    for i in range(len(dataset.token_indices[dataset_type])):
        if parameters['use_adapter']:
            feed_dict = {
                model.input_token_indices:
                dataset.token_indices[dataset_type][i],
                model.input_token_character_indices:
                dataset.character_indices_padded[dataset_type][i],
                model.input_token_lengths:
                dataset.token_lengths[dataset_type][i],
                model.input_label_indices_vector:
                dataset.label_vector_indices[dataset_type][i],
                model.input_label_adapter_indices_vector:
                dataset.label_adapter_vector_indices[dataset_type][i],
                model.dropout_keep_prob:
                1.,
                model.adapter_keep_prob:
                1.
            }
            if parameters['include_pos']:
                feed_dict[
                    model.
                    input_label_pos_indices_vector] = dataset.label_pos_vector_indices[
                        dataset_type][i]
        elif for_adapter == True:
            # use for pred/eval step, not to provide the gold labels in dataset
            feed_dict = {
                model.input_token_indices:
                dataset.token_indices[dataset_type][i],
                model.input_token_character_indices:
                dataset.character_indices_padded[dataset_type][i],
                model.input_token_lengths:
                dataset.token_lengths[dataset_type][i],
                model.dropout_keep_prob:
                1.
            }
        else:
            feed_dict = {
                model.input_token_indices:
                dataset.token_indices[dataset_type][i],
                model.input_token_character_indices:
                dataset.character_indices_padded[dataset_type][i],
                model.input_token_lengths:
                dataset.token_lengths[dataset_type][i],
                model.input_label_indices_vector:
                dataset.label_vector_indices[dataset_type][i],
                model.dropout_keep_prob:
                1.
            }
        unary_scores, predictions = sess.run(
            [model.unary_scores, model.predictions], feed_dict)
        if parameters['use_crf']:
            predictions, _ = tf.contrib.crf.viterbi_decode(
                unary_scores, transition_params_trained)
            predictions = predictions[1:-1]
        else:
            predictions = predictions.tolist()

        assert (len(predictions) == len(dataset.tokens[dataset_type][i]))
        output_string = ''
        prediction_labels = [
            dataset.index_to_label[prediction] for prediction in predictions
        ]
        gold_labels = dataset.labels[dataset_type][i]
        if parameters['tagging_format'] == 'bioes':
            prediction_labels = utils_nlp.bioes_to_bio(prediction_labels)
            gold_labels = utils_nlp.bioes_to_bio(gold_labels)
        try:
            assert len(prediction_labels) == len(gold_labels)
        except AssertionError:
            print(dataset.tokens[dataset_type][i])
            print(gold_labels)
            print(prediction_labels)

        for z, (prediction, token, gold_label) in enumerate(
                zip(prediction_labels, dataset.tokens[dataset_type][i],
                    gold_labels)):
            while True:
                line = original_conll_file.readline()
                split_line = line.strip().split(' ')
                if '-DOCSTART-' in split_line[0] or len(
                        split_line) == 0 or len(split_line[0]) == 0:
                    continue
                else:
                    token_original = split_line[0]
                    if parameters['tagging_format'] == 'bioes':
                        split_line.pop()
                    gold_label_original = split_line[-1]
                    try:
                        assert (token == dataset.tokenize(token_original)
                                and gold_label == gold_label_original)
                    except AssertionError:
                        print(' '.join([
                            dataset.tokens[dataset_type][i][x] + '/' +
                            gold_labels[x] for x in range(len(gold_labels))
                        ]))
                        print(
                            'token: {:s} - gold_label: {:s} - gold_label_original: {:s}'
                            .format(dataset.tokens[dataset_type][i][z],
                                    gold_label, gold_label_original))

                    break
            split_line.append(prediction)
            output_string += ' '.join(split_line) + '\n'
        newstr = output_string + '\n'
        output_file.write(newstr)

        if for_adapter == True:
            all_predictions_per_sentence.append(predictions)
            all_y_true_per_sentence.append(
                dataset.label_indices[dataset_type][i])
            all_prediction_labels_per_sentence.append(prediction_labels)

        all_predictions.extend(predictions)
        all_y_true.extend(dataset.label_indices[dataset_type][i])

    output_file.close()
    original_conll_file.close()

    if dataset_type != 'deploy':
        if parameters['main_evaluation_mode'] == 'conll':
            conll_evaluation_script = os.path.join('.', 'conlleval')
            conll_output_filepath = '{0}_conll_evaluation.txt'.format(
                output_filepath)
            shell_command = '/usr/bin/perl {0} < {1} > {2}'.format(
                conll_evaluation_script, output_filepath,
                conll_output_filepath)
            os.system(shell_command)
            with open(conll_output_filepath, 'r') as f:
                classification_report = f.read()
                print(classification_report)
        else:
            new_y_pred, new_y_true, new_label_indices, new_label_names, _, _ = remap_labels(
                all_predictions, all_y_true, dataset,
                parameters['main_evaluation_mode'])

            print(
                sklearn.metrics.classification_report(
                    new_y_true,
                    new_y_pred,
                    digits=4,
                    labels=new_label_indices,
                    target_names=new_label_names))

    if for_adapter == True:
        return all_prediction_labels_per_sentence, all_predictions, all_y_true, output_filepath
    else:
        return all_predictions, all_y_true, output_filepath
Ejemplo n.º 3
0
def prediction_step(sess, dataset, dataset_type, model, transition_params_trained, stats_graph_folder, epoch_number, parameters, dataset_filepaths, demo=False):
    if dataset_type == 'deploy':
        print('Predict labels for the {0} set'.format(dataset_type))
    else:
        print('Evaluate model on the {0} set'.format(dataset_type))
    all_predictions = []
    all_predictions_label = []
    all_y_true = []
    all_y_true_label = []
    output_filepath = os.path.join(stats_graph_folder, '{1:07.3f}_{0}.txt'.format(dataset_type,epoch_number))

    encoding = "utf-8"
    output_file = codecs.open(output_filepath, 'w', encoding)
    if "combined" in dataset_filepaths[dataset_type]:
        label_idx = 2
    else:
        label_idx = -1
    #original_conll_file = codecs.open(dataset_filepaths[dataset_type], 'r', encoding=encoding)
    sequence_numbers = list(range(len(dataset.token_indices[dataset_type])))

    for i in tqdm(range(0,len(dataset.token_indices[dataset_type]), parameters['batch_size']), "Testing on {} at epoch {}".format(dataset_type, epoch_number)): #
        sequence_number = sequence_numbers[i: i  + parameters['batch_size']]
        batch = utils.pad_batch(dataset, sequence_number, dataset_type)

        feed_dict = {
            model.input_token_indices: batch['token_indices_padded'],
            model.input_sequence_lengths: batch['sequence_lengths'],
            model.input_token_character_indices: batch['character_indices_padded'],
            model.input_token_lengths: batch['token_lengths'],
            model.input_label_indices_flat: batch['label_indices'],
            model.input_label_indices_vector: batch['label_vector_indices'],
            model.dropout_keep_prob: 1
        }

        batch_unary_scores, batch_predictions = sess.run([model.unary_scores, model.predictions], feed_dict)

        for unary_score, y, sequence_length, predictions, j in zip(batch_unary_scores, batch['label_indices'],
                                                          batch['sequence_lengths'], batch_predictions, sequence_number):
            if parameters['use_crf']:
                # Remove padding from the scores and tag sequence.
                unary_score =unary_score[:sequence_length]
                y = y[:sequence_length]

                # Compute the highest scoring sequence.
                predictions, _ = tf.contrib.crf.viterbi_decode(unary_score, transition_params_trained)
            else:
                predictions = predictions[:sequence_length].tolist()

            assert(len(predictions) == len(np.array(dataset.tokens[dataset_type])[j]))
            output_string = []
            prediction_labels = [dataset.index_to_label[pred] for pred in predictions]
            gold_labels = np.array(dataset.labels[dataset_type])[j]
            if parameters['tagging_format'] == 'bioes':
                prediction_labels = utils_nlp.bioes_to_bio(prediction_labels)
                gold_labels = utils_nlp.bioes_to_bio(gold_labels)
            for prediction, token, gold_label in zip(prediction_labels, np.array(dataset.tokens[dataset_type])[j], gold_labels):
                #while True:
                #    line = original_conll_file.readline()
                #    split_line = line.strip().split(' ')
                #    if "combined" in dataset_filepaths[dataset_type]:
                #        split_line = split_line[:label_idx+1]
                #    if len(split_line) == 0 or '-DOCSTART-' in split_line[0] or len(split_line[0]) == 0:
                #        continue
                #    else:
                #        token_original = split_line[0]
                #        if parameters['tagging_format'] == 'bioes':
                #            split_line.pop()
                #        gold_label_original = split_line[label_idx]
                #        #if token != token_original or gold_label != gold_label_original:
                #        assert(token == token_original and gold_label == gold_label_original)
                #        break
                #split_line.append(prediction)
                #output_string.append(' '.join(split_line))
                output_string.append(' '.join([token, gold_label, prediction]))
            output_file.write("\n".join(output_string))
            output_file.write("\n\n")

            all_predictions.extend(predictions)
            all_y_true.extend(np.array(dataset.label_indices[dataset_type])[j])

            all_predictions_label.append(prediction_labels)
            all_y_true_label.append(np.array(dataset.labels[dataset_type])[j])
    output_file.close()
    #original_conll_file.close()

    if demo:
        return all_predictions, all_y_true, output_filepath

    if dataset_type != 'deploy':
        if parameters['main_evaluation_mode'] == 'conll':
            conll_evaluation_script = os.path.join('.', 'conlleval')
            conll_output_filepath = '{0}_conll_evaluation.txt'.format(output_filepath)
            #if "labelled_yelp_tips_th06" in parameters["dataset_train"]:
            #    shell_command = 'perl {0} -r < {1} > {2}'.format(conll_evaluation_script, output_filepath, conll_output_filepath)
            #else:
            shell_command = 'perl {0} < {1} > {2}'.format(conll_evaluation_script, output_filepath,
                                                                 conll_output_filepath)

            os.system(shell_command)
            with open(conll_output_filepath, 'r') as f:
                classification_report = f.read()
                print(classification_report)
        else:
            raise AssertionError("Not implemented")
            new_y_pred, new_y_true, new_label_indices, new_label_names, _, _ = remap_labels(all_predictions, all_y_true, dataset, parameters['main_evaluation_mode'])
            print(sklearn.metrics.classification_report(new_y_true, new_y_pred, digits=4, labels=new_label_indices, target_names=new_label_names))

    exact_score, inexact_score = report_fscore(all_y_true_label, all_predictions_label)
    exact_inexact_evaluation = '{0}_exact_inexact_evaluation.txt'.format(output_filepath)
    with open(exact_inexact_evaluation, "w") as file:
        file.write("Exact score\n")
        file.write(json.dumps(exact_score) + "\n")
        file.write(json.dumps(inexact_score) + "\n")

    return all_predictions, all_y_true, output_filepath
Ejemplo n.º 4
0
def prediction_step(sess, dataset, dataset_type, model,
                    transition_params_trained, stats_graph_folder,
                    epoch_number, parameters, dataset_filepaths):
    """
    Predict.
    """
    if dataset_type == 'deploy':
        print('=> Predict labels for the {0} set'.format(dataset_type))
    else:
        print('Evaluate model on the {0} set'.format(dataset_type))
    """
    Comment out writing to file only for efficiency experiment
    """

    all_predictions = []
    all_y_true = []
    output_filepath = os.path.join(
        stats_graph_folder, '{1:03d}_{0}.txt'.format(dataset_type,
                                                     epoch_number))
    mentions_output_filepath = os.path.join(stats_graph_folder,
                                            'mentions_output.txt')

    print('output_file: ', output_filepath)
    print('mentions_output_file: ', mentions_output_filepath)

    output_file = codecs.open(output_filepath, 'w', 'UTF-8')
    original_conll_file = codecs.open(dataset_filepaths[dataset_type], 'r',
                                      'UTF-8')
    # mentions_file= codecs.open(mentions_output_filepath, 'w', 'UTF-8')

    output_tokens_list = []
    token_list_outer = []
    token_list_inner = []

    for i in range(len(dataset.token_indices[dataset_type])):
        feed_dict = {
            model.input_token_indices:
            dataset.token_indices[dataset_type][i],
            model.input_token_character_indices:
            dataset.character_indices_padded[dataset_type][i],
            model.input_token_lengths:
            dataset.token_lengths[dataset_type][i],
            model.input_label_indices_vector:
            dataset.label_vector_indices[dataset_type][i],
            model.dropout_keep_prob:
            1.
        }

        unary_scores, predictions = sess.run(
            [model.unary_scores, model.predictions], feed_dict)

        if parameters['use_crf']:
            predictions, _ = tf.contrib.crf.viterbi_decode(
                unary_scores, transition_params_trained)
            predictions = predictions[1:-1]
        else:
            predictions = predictions.tolist()

        assert (len(predictions) == len(dataset.tokens[dataset_type][i]))

        output_string = ''
        prediction_labels = [
            dataset.index_to_label[prediction] for prediction in predictions
        ]
        unary_score_list = unary_scores.tolist()[1:-1]

        gold_labels = dataset.labels[dataset_type][i]

        if parameters['tagging_format'] == 'bioes':
            prediction_labels = utils_nlp.bioes_to_bio(prediction_labels)
            gold_labels = utils_nlp.bioes_to_bio(gold_labels)

        for prediction, token, gold_label, scores in zip(
                prediction_labels, dataset.tokens[dataset_type][i],
                gold_labels, unary_score_list):

            while True:
                line = original_conll_file.readline()
                split_line = line.strip().split(' ')

                if '-DOCSTART-' in split_line[0] or len(split_line) == 0 \
                or len(split_line[0]) == 0:
                    continue
                else:
                    token_original = split_line[0]

                    if parameters['tagging_format'] == 'bioes':
                        split_line.pop()

                    gold_label_original = split_line[-1]

                    assert (token == token_original
                            and gold_label == gold_label_original)
                    # print('prediction and label: ', prediction, token)
                    if (token != '--eosc'):
                        token_list_inner.append(token)
                        if (prediction != 'O'):
                            output_tokens_list.append(token + '//' +
                                                      prediction)
                    else:
                        token_list_inner.append(token)
                        token_list_outer.append(token_list_inner)
                        token_list_inner = []
                        output_tokens_list.append(token + '//' + prediction)
                    break

            split_line.append(prediction)
            # print('========================')

            if parameters['output_scores']:
                # space separated scores
                scores = ' '.join([str(i) for i in scores])
                split_line.append('{}'.format(scores))
            # print(split_line)
            output_string += ' '.join(split_line) + '\n'

        output_file.write(output_string + '\n')

        all_predictions.extend(predictions)
        all_y_true.extend(dataset.label_indices[dataset_type][i])

    output_file.close()
    original_conll_file.close()

    mention_count = 0
    mentions_list_inner = []
    mentions_list_outer = []
    candidateMention = ""
    for outputStr in output_tokens_list:
        #outputStr=output[index]
        candidate = " ".join((outputStr.split("//"))[:-1])
        tag = (outputStr.split("//"))[-1]
        #text="".join(candidate)
        if (candidate != '--eosc'):
            if (tag.startswith('B-')):
                #print candidateMention
                if (candidateMention != ""):
                    mentions_list_inner.append(candidateMention)
                candidateMention = candidate
            else:
                candidateMention += " " + candidate
        else:
            if (candidateMention != ""):
                mentions_list_inner.append(candidateMention)
            candidateMention = ""
            mentions_list_outer.append(mentions_list_inner)
            mentions_list_inner = []

    # print('sentence list: ', token_list_outer)
    # print('mentions: ', mentions_list_outer)
    for inner_list in mentions_list_outer:
        mention_count += len(inner_list)
        mentions_string = ','.join(inner_list) + '\n'
        # mentions_file.write(mentions_string)

    # mentions_string=''
    # mentions_string += ' '.join(mentions_list_outer) + '\n'

    # mentions_file.close()

    print('tally:', len(token_list_outer), len(mentions_list_outer),
          'total mentions discovered:', mention_count)

    if dataset_type != 'deploy':

        if parameters['main_evaluation_mode'] == 'conll':

            # run perl evaluation script in python package
            # conll_evaluation_script = os.path.join('.', 'conlleval')
            package_name = 'neuroner'
            root_dir = os.path.dirname(
                pkg_resources.resource_filename(package_name, '__init__.py'))
            conll_evaluation_script = os.path.join(root_dir, 'conlleval')

            conll_output_filepath = '{0}_conll_evaluation.txt'.format(
                output_filepath)
            shell_command = 'perl {0} < {1} > {2}'.format(
                conll_evaluation_script, output_filepath,
                conll_output_filepath)
            os.system(shell_command)

            with open(conll_output_filepath, 'r') as f:
                classification_report = f.read()
                print(classification_report)

        else:
            new_y_pred, new_y_true, new_label_indices, new_label_names, _, _ = remap_labels(
                all_predictions, all_y_true, dataset,
                parameters['main_evaluation_mode'])

            print(
                sklearn.metrics.classification_report(
                    new_y_true,
                    new_y_pred,
                    digits=4,
                    labels=new_label_indices,
                    target_names=new_label_names))

    return all_predictions, all_y_true, output_filepath
Ejemplo n.º 5
0
def prediction_step(sess, dataset, dataset_type, model,
                    transition_params_trained, stats_graph_folder,
                    epoch_number, parameters, dataset_filepaths):
    if dataset_type == 'deploy':
        print('Predict labels for the {0} set'.format(dataset_type))
    else:
        print('Evaluate model on the {0} set'.format(dataset_type))
    all_predictions = []
    all_y_true = []
    output_filepath = os.path.join(
        stats_graph_folder, '{1:03d}_{0}.txt'.format(dataset_type,
                                                     epoch_number))
    output_file = codecs.open(output_filepath, 'w', 'UTF-8')
    original_conll_file = codecs.open(dataset_filepaths[dataset_type], 'r',
                                      'UTF-8')

    #
    A = []
    #  B=[]
    C = []
    #  ti=[]
    #  chi=[]
    #  tl=[]
    #  lvi=[]
    #
    for i in range(len(dataset.token_indices[dataset_type])):
        feed_dict = {
            model.input_token_indices:
            dataset.token_indices[dataset_type][i],
            model.input_token_character_indices:
            dataset.character_indices_padded[dataset_type][i],
            model.input_token_lengths:
            dataset.token_lengths[dataset_type][i],
            model.input_label_indices_vector:
            dataset.label_vector_indices[dataset_type][i],
            model.dropout_keep_prob:
            1.
        }
        unary_scores, predictions = sess.run(
            [model.unary_scores, model.predictions], feed_dict)
        #
        #np.save("SalmanTest/DICT%s%s"%(i,dataset_type),feed_dict)
        #save_path = saver.save(sess,"./SalmanTest/CHECKDIC.ckpt")
        #np.save("SalmanTest/SICT%s%s"%(i,dataset_type),predictions)
        #print("SALMAAAAANNNNNNNNNNNNNN = %s"%feed_dict)
        #print("SALMAAAAAN11111111NNNNNNNNNNNNN = %s"%dataset.token_indices[dataset_type][i])
        #print("SALMAAAAAN22222222NNNNNNNNNNNNN = %s"%dataset.character_indices_padded[dataset_type][i])
        #print("SALMAAAAAN33333333NNNNNNNNNNNNN = %s"%dataset.token_lengths[dataset_type][i])
        #print("SALMAAAAAN44444444NNNNNNNNNNNNN = %s"%dataset.label_vector_indices[dataset_type][i])
        #      ti.append(dataset.token_indices[dataset_type][i])
        #      chi.append(dataset.character_indices_padded[dataset_type][i])
        #      tl.append(dataset.token_lengths[dataset_type][i])
        #      lvi.append(dataset.label_vector_indices[dataset_type][i])
        A.append(unary_scores[1:, :-2][:-1])
        #      B.append(predictions)
        #
        if parameters['use_crf']:
            predictions, _ = tf.contrib.crf.viterbi_decode(
                unary_scores, transition_params_trained)
            predictions = predictions[1:-1]
        else:
            predictions = predictions.tolist()

#
        C.append(predictions)
        #
        assert (len(predictions) == len(dataset.tokens[dataset_type][i]))
        output_string = ''
        prediction_labels = [
            dataset.index_to_label[prediction] for prediction in predictions
        ]
        gold_labels = dataset.labels[dataset_type][i]
        if parameters['tagging_format'] == 'bioes':
            prediction_labels = utils_nlp.bioes_to_bio(prediction_labels)
            gold_labels = utils_nlp.bioes_to_bio(gold_labels)
        for prediction, token, gold_label in zip(
                prediction_labels, dataset.tokens[dataset_type][i],
                gold_labels):
            while True:
                line = original_conll_file.readline()
                split_line = line.strip().split(' ')
                if '-DOCSTART-' in split_line[0] or len(
                        split_line) == 0 or len(split_line[0]) == 0:
                    continue
                else:
                    token_original = split_line[0]
                    if parameters['tagging_format'] == 'bioes':
                        split_line.pop()
                    gold_label_original = split_line[-1]
                    assert (token == token_original
                            and gold_label == gold_label_original)
                    break
            split_line.append(prediction)
            output_string += ' '.join(split_line) + '\n'
        output_file.write(output_string + '\n')

        all_predictions.extend(predictions)
        all_y_true.extend(dataset.label_indices[dataset_type][i])


#
#    np.save('SalmanTest/SalmanPred%s%s'%(i,dataset_type),predictions)
#A.append(np.zeros(99))
#C.append(np.zeros(99))
    np.save('SalmanTest/NAMEMr1mainSalmanUnary_scores%s' % dataset_type, A)
    #    np.save('SalmanTest/SalmanUnary_scores%s%s'%(i,dataset_type),unary_scores)
    #   np.save('SalmanTest/myx3mainSalmanPred%s'%dataset_type,B)
    np.save('SalmanTest/NAMEMr1mainSalmanCCC%s' % dataset_type, C)
    #   np.save('SalmanTest/myx3ti',ti)
    #   np.save('SalmanTest/myx3chi',chi)
    #   np.save('SalmanTest/myx3tl',tl)
    #   np.save('SalmanTest/myx3lvi',lvi)
    #
    output_file.close()
    original_conll_file.close()

    if dataset_type != 'deploy':
        if parameters['main_evaluation_mode'] == 'conll':
            conll_evaluation_script = os.path.join('.', 'conlleval')
            conll_output_filepath = '{0}_conll_evaluation.txt'.format(
                output_filepath)
            shell_command = 'perl {0} < {1} > {2}'.format(
                conll_evaluation_script, output_filepath,
                conll_output_filepath)
            os.system(shell_command)
            with open(conll_output_filepath, 'r') as f:
                classification_report = f.read()
                print(classification_report)
        else:
            new_y_pred, new_y_true, new_label_indices, new_label_names, _, _ = remap_labels(
                all_predictions, all_y_true, dataset,
                parameters['main_evaluation_mode'])
            print(
                sklearn.metrics.classification_report(
                    new_y_true,
                    new_y_pred,
                    digits=4,
                    labels=new_label_indices,
                    target_names=new_label_names))

    return all_predictions, all_y_true, output_filepath