Beispiel #1
0
def output_csv(not_mapping_ids, sorted_many_mapping, all_documents):
    if is_include_self:
        incluse_self_name = '_include_self'
    else:
        incluse_self_name = ''

    if is_limit_hinshi:
        hinshi_name = '_hinshi'
    else:
        hinshi_name = ''

    rare_file = open(
        os.path.normpath(
            os.path.join(
                base_path,
                'output/wmd_map/rare{}{}.csv'.format(incluse_self_name,
                                                     hinshi_name))), 'w')
    many_file = open(
        os.path.normpath(
            os.path.join(
                base_path,
                'output/wmd_map/many{}{}.csv'.format(incluse_self_name,
                                                     hinshi_name))), 'w')

    writer = csv.writer(rare_file, lineterminator='\n')
    writer.writerow(['student', 'date', 'origin', 'id', 'KPT'])

    for not_mapping_id in not_mapping_ids:
        doc = all_documents[not_mapping_id]
        student_name = correspondence_student_number.get_name(doc['student'])
        writer.writerow(
            [student_name, doc['day'], doc['origin'], doc['id'], doc['KPT']])

    writer = csv.writer(many_file, lineterminator='\n')
    writer.writerow(
        ['student', 'date', 'origin', 'count', 'KPT', 'id', 'mappings'])

    for mapping_dict in sorted_many_mapping:
        doc = all_documents[mapping_dict['id']]
        student_name = correspondence_student_number.get_name(doc['student'])
        mappings = [str(x) for x in mapping_dict['map']]
        mappings = ' '.join(mappings)

        writer.writerow([
            student_name, doc['day'], doc['origin'],
            len(mapping_dict['map']), doc['KPT'], doc['id'], mappings
        ])

    many_file.close()
    rare_file.close()
Beispiel #2
0
def output_csv(documents):
    # sum, ave, min, max
    # aveとsumはis_top3の状態では上位100件くらいまでは同じ結果

    mode = 'sum'

    if is_top3:
        is_top3_name = '_top3'
    else:
        is_top3_name = ''

    if is_limit_hinshi:
        is_hinshi_name = '_hinshi'
    else:
        is_hinshi_name = ''

    file = open(
        os.path.normpath(
            os.path.join(
                base_path,
                'output/tfidf/{}{}{}.csv'.format(mode, is_top3_name,
                                                 is_hinshi_name))), 'w')
    writer = csv.writer(file, lineterminator='\n')
    documents.sort(key=lambda x: x[mode], reverse=True)
    writer.writerow(['student', 'day', 'origin', 'id', 'tfidf', 'KPT'])

    for document in documents:
        student_name = correspondence_student_number.get_name(
            document['student'])
        writer.writerow([
            student_name, document['date'], document['origin'], document['id'],
            document[mode], document['KPT']
        ])
    file.close()
Beispiel #3
0
def output_csv(documents):
    mode = 'sum'

    file = open(
        os.path.normpath(
            os.path.join(base_path, 'output/bm25/{}.csv'.format(mode))), 'w')
    writer = csv.writer(file, lineterminator='\n')
    documents.sort(key=lambda x: x[mode], reverse=True)
    writer.writerow(['student', 'day', 'origin', 'id', 'bm25', 'KPT'])

    for document in documents:
        student_name = correspondence_student_number.get_name(
            document['student'])
        writer.writerow([
            student_name, document['date'], document['origin'], document['id'],
            document[mode], document['KPT']
        ])
    file.close()
Beispiel #4
0
def main():
    file = open(os.path.normpath(os.path.join(base_path, '../後期.json')))
    data = json.load(file)
    results = []

    for student_number in data.keys():
        char_count = 0
        k_count = 0
        p_count = 0
        t_count = 0

        for day in data[student_number].keys():
            char_count += get_char_count_from_sentences(data[student_number][day]['K'])
            char_count += get_char_count_from_sentences(data[student_number][day]['P'])
            char_count += get_char_count_from_sentences(data[student_number][day]['T'])

            k_count += len(data[student_number][day]['K'])
            p_count += len(data[student_number][day]['P'])
            t_count += len(data[student_number][day]['T'])

        results.append({
            'id': student_number,
            'name': correspondence_student_number.get_name(student_number),
            'char': char_count,
            'k': k_count,
            'p': p_count,
            't': t_count
        })

    file.close()

    # ファイル出力
    char_count_file = open(os.path.normpath(os.path.join(base_path, 'output/count/char.csv')), 'w')
    kpt_count_file = open(os.path.normpath(os.path.join(base_path, 'output/count/kpt.csv')), 'w')
    char_count_writer = csv.writer(char_count_file, lineterminator='\n')
    kpt_count_writer = csv.writer(kpt_count_file, lineterminator='\n')

    for result_dict in sorted(results, key=lambda x: x['char'], reverse=True):
        char_count_writer.writerow([result_dict['name'], result_dict['char']])
        kpt_count_writer.writerow([result_dict['name'], result_dict['k'], result_dict['p'], result_dict['t']])

    char_count_file.close()
    kpt_count_file.close()
Beispiel #5
0
def many_mapping_sort(mapping, all_documents):
    sum_mapping_count = 0

    for doc_id in mapping:
        sum_mapping_count += len(mapping[doc_id])

    ave_mapping_count = sum_mapping_count / len(all_documents)

    tmp = [{
        'id': int(x),
        'map': mapping[x]
    } for x in mapping if len(mapping[x]) >= ave_mapping_count]

    tmp = sorted(tmp, key=lambda x: len(x['map']), reverse=True)
    tmp = sorted(tmp,
                 key=lambda x: correspondence_student_number.get_name(
                     all_documents[x['id']]['student']))

    return tmp