Ejemplo n.º 1
0
def save_train_sentiment_value_result(y_pred, y_data, out_file_path):
    """

    Args:
        y_pred:
        id_test: id,
    """
    if isinstance(y_pred, np.ndarray):
        y_pred = y_pred.tolist()

    predict_labels = convert_sentiment_value_predict(y_pred)

    id_labels = merge(y_data, predict_labels)

    result = []
    for i, id_label in enumerate(id_labels):
        parts = id_label.split(',')
        p = int(parts[-1]) + 1
        a = [e for e in parts[0].split()]
        pred_p = [('%.2f' % num) for num in y_pred[i]]
        parts.append(' '.join(pred_p))
        temp = [
            sentiment_onehot_label[parts[0]], parts[4], parts[5], parts[2],
            parts[1], parts[3]
        ]
        parts = temp
        if a[p] != '1':
            result.append(','.join(parts))
        else:
            result.insert(0, ','.join(parts))
    result = [',,,,,id'] + result
    file_utils.write_lines(result, out_file_path)
Ejemplo n.º 2
0
def convert_subject_sentiment_value_predict_result(
        subject_subject_sentiment_value_file_path, result_file_path):
    """convert_subject_sentiment_value_predict_result"""
    subject_sentiment_value_file_lines = file_utils.read_all_lines(
        subject_subject_sentiment_value_file_path)
    result = ['content_id,subject,sentiment_value,sentiment_word']
    for i, subject_line in enumerate(subject_sentiment_value_file_lines):
        id_subjects = subject_line.split(',')
        subjects = id_subjects[1].split('|')
        for subject in subjects:
            result.append(id_subjects[0] + ',' + subject.replace('_', ',') +
                          ',')
    file_utils.write_lines(result, result_file_path)
Ejemplo n.º 3
0
def save_subject_result(y_pred, id_test, model_name, is_val=False):
    """save_subject_result"""
    if y_pred is None:
        return
    if isinstance(y_pred, np.ndarray):
        y_pred = y_pred.tolist()
    y_pred_probability = convert_predict_for_probability_output(y_pred)
    id_probabilities = merge(id_test, y_pred_probability)
    head = 'id,' + ','.join(label_mapping.subject_mapping_list)
    if is_val:
        file_utils.write_lines(
            [head] + id_probabilities,
            data_path.val_subject_probability_result_file_path + '.' +
            model_name)
    else:
        file_utils.write_lines(
            [head] + id_probabilities,
            data_path.test_subject_probability_result_file_path + '.' +
            model_name)

    predict_labels = convert_subject_predict(
        y_pred, threshold=thresholds.topic_positive_threshold)

    id_labels = merge(id_test, predict_labels)

    if is_val:
        file_utils.write_lines(id_labels,
                               data_path.val_subject_result_file_path)
    else:
        file_utils.write_lines(id_labels,
                               data_path.test_subject_result_file_path)
Ejemplo n.º 4
0
def save_sentiment_value_result(y_pred, id_test, model_name, is_val=False):
    """

    Args:
        y_pred:
        id_test: id,
    """
    if y_pred is None:
        return
    if isinstance(y_pred, np.ndarray):
        y_pred = y_pred.tolist()
    y_pred_probability = convert_predict_for_probability_output(y_pred)
    id_probabilities = merge(id_test, y_pred_probability)
    head = 'id,' + ','.join(label_mapping.sentiment_value_mapping_list)
    if is_val:
        file_utils.write_lines(
            [head] + id_probabilities,
            data_path.val_sentiment_value_probability_result_file_path + '.' +
            model_name)
    else:
        file_utils.write_lines(
            [head] + id_probabilities,
            data_path.test_public_sentiment_value_probability_result_file_path
            + '.' + model_name)

    predict_labels = convert_sentiment_value_predict(y_pred)

    id_labels = merge(id_test, predict_labels)

    if is_val:
        file_utils.write_lines(id_labels,
                               data_path.val_sentiment_value_result_file_path)
    else:
        file_utils.write_lines(
            id_labels, data_path.test_public_sentiment_value_result_file_path)
Ejemplo n.º 5
0
def merge_subject_sentiment_value(subject_file_path, sentiment_file_path,
                                  result_file_path):
    """convert_subject_sentiment_value_predict_result"""
    subject_file_lines = file_utils.read_all_lines(subject_file_path)
    sentiment_file_lines = file_utils.read_all_lines(sentiment_file_path)
    result = ['content_id,subject,sentiment_value,sentiment_word']
    for i, subject_line in enumerate(subject_file_lines):
        subject_line_parts = subject_line.split(',')

        sentiment_value = sentiment_file_lines[i].split(',')[1]

        result.append(subject_line_parts[0] + ',' + subject_line_parts[2] +
                      ',' + sentiment_value + ',')

    file_utils.write_lines(result, result_file_path)
Ejemplo n.º 6
0
def save_train_subject_result(y_pred, y_data, model_name):
    """save_subject_result"""
    if isinstance(y_pred, np.ndarray):
        y_pred = y_pred.tolist()

    predict_labels = convert_subject_predict(
        y_pred, threshold=thresholds.topic_positive_threshold)

    id_labels = merge(y_data, predict_labels)

    y_true = [[float(p) for p in data.split(',')[0].split()]
              for data in y_data]
    true_labels = convert_subject_predict(
        y_true, threshold=thresholds.topic_positive_threshold)

    y_pred_probability = convert_predict_for_probability_output(y_pred)
    result = []
    result.append(',,,,id')
    for i, id_label in enumerate(id_labels):
        if true_labels[i] == predict_labels[i]:
            continue
        parts = id_label.split(',')
        parts[0] = true_labels[i]

        pred_p = y_pred_probability[i]
        pred_p_elements = pred_p.split(',')
        pred_p_str_list = []
        for j in range(len(pred_p_elements)):
            label = label_mapping.subject_mapping_reverse[str(j)]
            pred_p_str_list.append(label + ':' + pred_p_elements[j])
        parts.insert(1, ' '.join(pred_p_str_list))
        parts.insert(1, parts[-1])
        del parts[-1]
        result.append(','.join(parts))

    result.sort()
    file_utils.write_lines(result, data_path.train_subject_result_file_path)
Ejemplo n.º 7
0
    parts = train_data_line.split(',')
    if parts[1] not in train_data_content_line_map:
        train_data_content_line_map[parts[1]] = []
    train_data_content_line_map[parts[1]].append(train_data_line)

test_public_for_sentiment_lines = file_utils.read_all_lines(
    data_path.test_public_for_sentiment_value_file_path)
result = ['content_id,subject,sentiment_value,sentiment_word']
in_train_data = set()
in_train_data_for_submit = []
for test_public_for_sentiment_line in test_public_for_sentiment_lines:
    parts = test_public_for_sentiment_line.split(',')
    if parts[1] in train_data_content_line_map:
        if parts[1] not in in_train_data:
            in_train_data.add(parts[1])
            in_train_data_samples = train_data_content_line_map[parts[1]]
            for in_train_data_sample in in_train_data_samples:
                in_train_data_sample_parts = in_train_data_sample.split(',')
                result.append(parts[0] + ',' + in_train_data_sample_parts[2] +
                              ',' + in_train_data_sample_parts[3] + ',')
                in_train_data_for_submit.append(parts[0] + ',' +
                                                in_train_data_sample_parts[2] +
                                                ',' +
                                                in_train_data_sample_parts[3] +
                                                ',')
    else:
        result.append(parts[0] + ',' + parts[2] + ',0,')
file_utils.write_lines(result, data_path.data_base_dir + 'all_zero.result')
file_utils.write_lines(in_train_data_for_submit,
                       data_path.data_base_dir + 'in_train_data_for_submit')
Ejemplo n.º 8
0
# -*- coding: utf-8 -*-
"""

Date:    2018/10/12 15:32
"""

from nlp_tasks.absa.conf import data_path
from nlp_tasks.absa.preprocess import label_mapping
from nlp_tasks.absa.utils import file_utils

if __name__ == '__main__':
    result = []
    topics = label_mapping.subject_mapping.keys()
    for topic in topics:
        result += file_utils.\
            read_all_lines(data_path.test_public_sentiment_value_result_file_path + '.' + topic)
    file_utils.write_lines(
        result, data_path.test_public_sentiment_value_result_file_path)
Ejemplo n.º 9
0
from nlp_tasks.absa.conf import data_path
from nlp_tasks.absa.utils import file_utils

in_train_data_for_submit = file_utils.read_all_lines(
    data_path.data_base_dir + 'in_train_data_for_submit')
in_train_data_for_submit_id = [
    line.split(',')[0] for line in in_train_data_for_submit
]

result_file_name = 'test_public.result_20181028232554_caokong_xingneng.csv'
result = file_utils.read_all_lines(data_path.data_base_dir + result_file_name)
merge_result = [result.pop(0)]
for line in result:
    id = line.split(',')[0]
    if id in in_train_data_for_submit_id:
        continue
    else:
        merge_result.append(line)

merge_result.extend(in_train_data_for_submit)
file_utils.write_lines(
    merge_result,
    data_path.data_base_dir + result_file_name + '.merge_result_and_in_train')