def write_topics_dictionary(): topics_dictionary = {} counter = 1 for reuter in retrieve_all_relevant_reuters(): _update_topics_dictionary(_get_topics(reuter), topics_dictionary) print(counter) counter += 1 with open('topics_dictionary.json', 'w') as f: json.dump(topics_dictionary, f, indent=4, sort_keys=True)
def write_topic_nonverted_index(): topic_nonverted_index = {} counter = 1 for reuter in retrieve_all_relevant_reuters(): _update_topic_nonverted_index(_get_topics(reuter), _preprocess_content(reuter), topic_nonverted_index) print(counter) counter += 1 with open('topic_nonverted_index.json', 'w') as f: json.dump(topic_nonverted_index, f, indent=4, sort_keys=True)
def write_document_topic_dictionary(): document_topic_dictionary = {} j = 1 for r in retrieve_all_relevant_reuters(): newid = _get_id(r) topics = _get_topics(r) document_topic_dictionary[newid] = [] for t in topics: document_topic_dictionary[newid].append(t) print(j) j += 1 with open('document_topic_dictionary.json', 'w') as f: json.dump(document_topic_dictionary, f, indent=4, sort_keys=True)
def write_targets(): with open('topics_dictionary.json', 'r') as f: topics_dictionary = json.load(f) k = 1 for reuter in retrieve_all_relevant_reuters(): newid = _get_id(reuter) topics = _get_topics(reuter) for t in topics_dictionary: if t in topics: topics_dictionary[t] = 1 else: topics_dictionary[t] = 0 vector = [] for t in topics_dictionary: vector.append(topics_dictionary[t]) with open(f'training_nn/{newid}.json', 'w') as f: json.dump(vector, f) print(k) k += 1