def get_test_data(): """ Test data is gathered, processed and put in dictionaries :return: dict with positive and negative reviews Keys: pos_reviews - the positive reviews neg_reviews - the negative reviews """ pos_train_files = get_filelist(main.get_path() + "\\test\\pos\\") # list of files neg_train_files = get_filelist(main.get_path() + "\\test\\neg\\") # list of files i = 0 pos_reviews = {} neg_reviews = {} while pos_train_files.__len__() is not 0: # while list is not empty get reviews and put them into a dict review = get_words(path = pos_train_files.pop()) pos_reviews[i] = review # key is just a number, use __len__() on the dict to find number of reviews later i += 1 i = 0 while neg_train_files.__len__() is not 0: review = get_words(path = neg_train_files.pop()) neg_reviews[i] = review i += 1 test_data = {"pos_reviews":pos_reviews, "neg_reviews":neg_reviews} return test_data
def get_training_words(): """ This function will gather all the training data and return them as a tuple :return: a tuple where [0]=pos_words & [1]=neg_words [2]=number of positive reviews, [3]=number of negative reviews """ pos_train_files = get_filelist(main.get_path() + "\\train\\pos\\") # list of files neg_train_files = get_filelist(main.get_path() + "\\train\\neg\\") # list of files pos_words = get_words(pos_train_files) # list of words neg_words = get_words(neg_train_files) # list of words return pos_words, neg_words, pos_train_files.__len__(), neg_train_files.__len__()
def tests_mono(): def dummy(*_, **__): pass home = main.HOME path = main.get_path(home) cfg = main.CFG.copy() # Меняем настройки test_settings = '{}.test'.format(path['settings']) path['settings'] = test_settings test_log_file = os.path.join(home, 'mdmt2.log.test') cfg['log'].update({ 'file_lvl': 'warn', 'print_lvl': 'warn', 'file': test_log_file }) try: loader = Loader(init_cfg=cfg, path=path, die_in=dummy) loader.start() time.sleep(10) loader.stop() err = check_log(test_log_file) if err: RuntimeError('{}'.format(', '.join(err))) finally: for target in [test_settings, test_log_file]: if os.path.isfile(target): os.remove(target)
def test_path_with_green_express_stations(self): node_a = 'A' node_b = 'F' express_train = 'Green' path = main.get_path(node_a, node_b, train_adj, express_stations, express_train) self.assertEqual(len(path), 5) print('The path', path)
def get_test_data(use_training_data=False): """ Test data is gathered, processed and put in dictionaries Progress is printed :return: dict with positive and negative reviews Keys: pos_reviews - the positive reviews neg_reviews - the negative reviews """ if use_training_data: pos_train_files = get_filelist(main.get_path() + "\\train\\pos\\") # list of files neg_train_files = get_filelist(main.get_path() + "\\train\\neg\\") # list of files else: pos_train_files = get_filelist(main.get_path() + "\\test\\pos\\") # list of files neg_train_files = get_filelist(main.get_path() + "\\test\\neg\\") # list of files i = 0 pos_reviews = {} neg_reviews = {} for file in pos_train_files: pos_reviews[i] = get_words(path=file) i += 1 print( f"Loading negative test reviews... {(i)/len(pos_train_files):.0%}", end="\r") i = 0 print() for file in neg_train_files: neg_reviews[i] = get_words(path=file) i += 1 print( f"Loading positive test reviews... {(i)/len(pos_train_files):.0%}", end="\r") print() test_data = {"pos_reviews": pos_reviews, "neg_reviews": neg_reviews} return test_data
def get_initialized_train_data(): """ This function will go through the training set and return the positive and negative wordfrequency as well as the their probability. :return: A dictionary with the following keys as strings: pos_freq - the frequency of words that are in positive reviews neg_freq - the frequency of words that are in negative reviews pos_prob - the positive probability - amount of positive reviews / total number of reviews neg_prob - the negative probability - amount of negative reviews / total number of reviews """ pos_train_files = get_filelist(main.get_path() + "\\train\\pos\\") # list of files neg_train_files = get_filelist(main.get_path() + "\\train\\neg\\") # list of files pos_words = get_words(pos_train_files) # list of words neg_words = get_words(neg_train_files) # list of words pos_frequency = count_text(pos_words) neg_frequency = count_text(neg_words) # dictionaries with frequency of words found in negative reviews, use stopwords if true pos_probability = pos_train_files.__len__() / ( pos_train_files.__len__() + neg_train_files.__len__()) # baseline prob neg_probability = neg_train_files.__len__() / (pos_train_files.__len__() + neg_train_files.__len__()) # .50ish? initialized_training_data = {"pos_freq":pos_frequency, "neg_freq":neg_frequency, "pos_prob":pos_probability, "neg_prob":neg_probability} return initialized_training_data
def get_training_data(use_testing_data=False): """ This function will gather all the training data and return them as a tuple :return: a tuple where [0]=pos_words & [1]=neg_words [2]=number of positive reviews, [3]=number of negative reviews """ if not use_testing_data: pos_train_files_paths = get_filelist(main.get_path() + "\\train\\pos\\") # list of files neg_train_files_paths = get_filelist(main.get_path() + "\\train\\neg\\") # list of files pos_words = get_words(pos_train_files_paths) # list of words neg_words = get_words(neg_train_files_paths) # list of words else: # load the testing dataset pos_train_files_paths = get_filelist(main.get_path() + "\\test\\pos\\") # list of files neg_train_files_paths = get_filelist(main.get_path() + "\\test\\neg\\") # list of files pos_words = get_words(pos_train_files_paths) # list of words neg_words = get_words(neg_train_files_paths) # list of words return pos_words, neg_words, pos_train_files_paths.__len__( ), neg_train_files_paths.__len__()
def test_path_without_express_stations(self): node_a = 'A' node_b = 'E' path = main.get_path(node_a, node_b, train_adj, express_stations) print('The path', path) self.assertEqual(len(path), 5)