Ejemplo n.º 1
0
def merge_subject_sentiment_value(subject_file_path, sentiment_file_path,
                                  result_file_path):
    """convert_subject_sentiment_value_predict_result"""
    subject_file_lines = file_utils.read_all_lines(subject_file_path)
    sentiment_file_lines = file_utils.read_all_lines(sentiment_file_path)
    result = ['content_id,subject,sentiment_value,sentiment_word']
    for i, subject_line in enumerate(subject_file_lines):
        subject_line_parts = subject_line.split(',')

        sentiment_value = sentiment_file_lines[i].split(',')[1]

        result.append(subject_line_parts[0] + ',' + subject_line_parts[2] +
                      ',' + sentiment_value + ',')

    file_utils.write_lines(result, result_file_path)
Ejemplo n.º 2
0
def read_test_labels(file_path):
    """

    Args:
        file_path: ,
    """
    lines = file_utils.read_all_lines(file_path)
    labels = [[int(label) for label in line.split(',')[1:]] for line in lines[1:]]
    return labels
Ejemplo n.º 3
0
def read_labels(file_path):
    """

    Args:
        file_path: ,,label
    Returns:
        list, label
    """
    lines = file_utils.read_all_lines(file_path)
    labels = [[int(label) for label in line.split(',')[0].split(' ')] for line in lines]
    return labels
Ejemplo n.º 4
0
def read_subject_train_ids(file_path):
    """

    Args:
        file_path: ,,id
    Returns:
        list, id
    """
    lines = file_utils.read_all_lines(file_path)
    ids = [line.split(',')[2] for line in lines]
    return ids
Ejemplo n.º 5
0
def read_features(file_path):
    """

    Args:
        file_path: ,,,,
    Returns:
        list,
    """
    lines = file_utils.read_all_lines(file_path)
    features = [line.split(',')[1] for line in lines]
    return features
Ejemplo n.º 6
0
def read_subject_of_sentiment_value(file_path):
    """

    Args:
        file_path: ,,
    Returns:
        list,
    """
    lines = file_utils.read_all_lines(file_path)
    features = [ line.split(',')[2] for line in lines]
    return features
Ejemplo n.º 7
0
def read_field(file_path, field_index, separator=',', has_head=True):
    """

    Args:
        file_path: ,,id
    Returns:
        list, id
    """
    lines = file_utils.read_all_lines(file_path)
    if has_head:
        lines = lines[1:]
    ids = [line.split(separator)[field_index] for line in lines]
    return ids
Ejemplo n.º 8
0
def convert_subject_sentiment_value_predict_result(
        subject_subject_sentiment_value_file_path, result_file_path):
    """convert_subject_sentiment_value_predict_result"""
    subject_sentiment_value_file_lines = file_utils.read_all_lines(
        subject_subject_sentiment_value_file_path)
    result = ['content_id,subject,sentiment_value,sentiment_word']
    for i, subject_line in enumerate(subject_sentiment_value_file_lines):
        id_subjects = subject_line.split(',')
        subjects = id_subjects[1].split('|')
        for subject in subjects:
            result.append(id_subjects[0] + ',' + subject.replace('_', ',') +
                          ',')
    file_utils.write_lines(result, result_file_path)
Ejemplo n.º 9
0
from nlp_tasks.absa.conf import data_path
from nlp_tasks.absa.utils import file_utils

train_data_lines = file_utils.read_all_lines(data_path.train_file_path)[1:]
train_data_content_line_map = {}
for train_data_line in train_data_lines:
    parts = train_data_line.split(',')
    if parts[1] not in train_data_content_line_map:
        train_data_content_line_map[parts[1]] = []
    train_data_content_line_map[parts[1]].append(train_data_line)

test_public_for_sentiment_lines = file_utils.read_all_lines(
    data_path.test_public_for_sentiment_value_file_path)
result = ['content_id,subject,sentiment_value,sentiment_word']
in_train_data = set()
in_train_data_for_submit = []
for test_public_for_sentiment_line in test_public_for_sentiment_lines:
    parts = test_public_for_sentiment_line.split(',')
    if parts[1] in train_data_content_line_map:
        if parts[1] not in in_train_data:
            in_train_data.add(parts[1])
            in_train_data_samples = train_data_content_line_map[parts[1]]
            for in_train_data_sample in in_train_data_samples:
                in_train_data_sample_parts = in_train_data_sample.split(',')
                result.append(parts[0] + ',' + in_train_data_sample_parts[2] +
                              ',' + in_train_data_sample_parts[3] + ',')
                in_train_data_for_submit.append(parts[0] + ',' +
                                                in_train_data_sample_parts[2] +
                                                ',' +
                                                in_train_data_sample_parts[3] +
                                                ',')

def preict(data, model):
    for i in range(len(data)):
        sample = data[i]
        parts = sample.split(',')
        text = parts[1]
        x_train = tokenizer.texts_to_sequences([text])
        x_train_len = [len(element) for element in x_train]
        subject = parts[2]
        subject_repeated = data_utils.repeat_element_in_list([subject],
                                                             x_train_len)
        x_subject = tokenizer.texts_to_sequences(subject_repeated)

        x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
        x_subject = sequence.pad_sequences(x_subject, maxlen=maxlen)

        prob = model.predict([x_train, x_subject])
        predict_label = result_utils.convert_sentiment_value_predict(prob)
        print(predict_label)


train_data = file_utils.read_all_lines(train_file_path)
test_data = file_utils.read_all_lines(test_file_path)

print('train')
train_sample = preict(train_data, model)

print('test')
test_sample = preict(test_data, model)
Ejemplo n.º 11
0
from nlp_tasks.absa.conf import data_path
from nlp_tasks.absa.utils import file_utils

in_train_data_for_submit = file_utils.read_all_lines(
    data_path.data_base_dir + 'in_train_data_for_submit')
in_train_data_for_submit_id = [
    line.split(',')[0] for line in in_train_data_for_submit
]

result_file_name = 'test_public.result_20181028232554_caokong_xingneng.csv'
result = file_utils.read_all_lines(data_path.data_base_dir + result_file_name)
merge_result = [result.pop(0)]
for line in result:
    id = line.split(',')[0]
    if id in in_train_data_for_submit_id:
        continue
    else:
        merge_result.append(line)

merge_result.extend(in_train_data_for_submit)
file_utils.write_lines(
    merge_result,
    data_path.data_base_dir + result_file_name + '.merge_result_and_in_train')