예제 #1
0
def write_topics_dictionary():
    topics_dictionary = {}
    counter = 1
    for reuter in retrieve_all_relevant_reuters():
        _update_topics_dictionary(_get_topics(reuter), topics_dictionary)
        print(counter)
        counter += 1
    with open('topics_dictionary.json', 'w') as f:
        json.dump(topics_dictionary, f, indent=4, sort_keys=True)
예제 #2
0
def write_topic_nonverted_index():
    topic_nonverted_index = {}
    counter = 1
    for reuter in retrieve_all_relevant_reuters():
        _update_topic_nonverted_index(_get_topics(reuter),
                                      _preprocess_content(reuter), topic_nonverted_index)
        print(counter)
        counter += 1
    with open('topic_nonverted_index.json', 'w') as f:
        json.dump(topic_nonverted_index, f, indent=4, sort_keys=True)
예제 #3
0
def write_document_topic_dictionary():
    document_topic_dictionary = {}
    j = 1
    for r in retrieve_all_relevant_reuters():
        newid = _get_id(r)
        topics = _get_topics(r)
        document_topic_dictionary[newid] = []
        for t in topics:
            document_topic_dictionary[newid].append(t)
        print(j)
        j += 1
    with open('document_topic_dictionary.json', 'w') as f:
        json.dump(document_topic_dictionary, f, indent=4, sort_keys=True)
예제 #4
0
def write_targets():
    with open('topics_dictionary.json', 'r') as f:
        topics_dictionary = json.load(f)
        k = 1
    for reuter in retrieve_all_relevant_reuters():
        newid = _get_id(reuter)
        topics = _get_topics(reuter)
        for t in topics_dictionary:
            if t in topics:
                topics_dictionary[t] = 1
            else:
                topics_dictionary[t] = 0
        vector = []
        for t in topics_dictionary:
            vector.append(topics_dictionary[t])
        with open(f'training_nn/{newid}.json', 'w') as f:
            json.dump(vector, f)
        print(k)
        k += 1