Esempio n. 1
0
def get_test_data():
	"""
	Test data is gathered, processed and put in dictionaries
	:return: dict with positive and negative reviews
	Keys:
	pos_reviews - the positive reviews
	neg_reviews - the negative reviews
	"""
	pos_train_files = get_filelist(main.get_path() + "\\test\\pos\\")  # list of files
	neg_train_files = get_filelist(main.get_path() + "\\test\\neg\\")  # list of files
	i = 0
	pos_reviews = {}
	neg_reviews = {}
	while pos_train_files.__len__() is not 0:  # while list is not empty get reviews and put them into a dict
		review = get_words(path = pos_train_files.pop())
		pos_reviews[i] = review  # key is just a number, use __len__() on the dict to find number of reviews later
		i += 1
	i = 0
	while neg_train_files.__len__() is not 0:
		review = get_words(path = neg_train_files.pop())
		neg_reviews[i] = review
		i += 1

	test_data = {"pos_reviews":pos_reviews, "neg_reviews":neg_reviews}
	return test_data
Esempio n. 2
0
def get_training_words():
	"""
	This function will gather all the training data and return them as a tuple
	:return: a tuple where [0]=pos_words & [1]=neg_words [2]=number of positive reviews, [3]=number of negative reviews
	"""
	pos_train_files = get_filelist(main.get_path() + "\\train\\pos\\")  # list of files
	neg_train_files = get_filelist(main.get_path() + "\\train\\neg\\")  # list of files
	pos_words = get_words(pos_train_files)  # list of words
	neg_words = get_words(neg_train_files)  # list of words
	return pos_words, neg_words, pos_train_files.__len__(), neg_train_files.__len__()
Esempio n. 3
0
def tests_mono():
    def dummy(*_, **__):
        pass

    home = main.HOME
    path = main.get_path(home)
    cfg = main.CFG.copy()
    # Меняем настройки
    test_settings = '{}.test'.format(path['settings'])
    path['settings'] = test_settings
    test_log_file = os.path.join(home, 'mdmt2.log.test')
    cfg['log'].update({
        'file_lvl': 'warn',
        'print_lvl': 'warn',
        'file': test_log_file
    })
    try:
        loader = Loader(init_cfg=cfg, path=path, die_in=dummy)
        loader.start()
        time.sleep(10)
        loader.stop()
        err = check_log(test_log_file)
        if err:
            RuntimeError('{}'.format(', '.join(err)))
    finally:
        for target in [test_settings, test_log_file]:
            if os.path.isfile(target):
                os.remove(target)
Esempio n. 4
0
 def test_path_with_green_express_stations(self):
     node_a = 'A'
     node_b = 'F'
     express_train = 'Green'
     path = main.get_path(node_a, node_b, train_adj,
                          express_stations, express_train)
     self.assertEqual(len(path), 5)
     print('The path', path)
Esempio n. 5
0
def get_test_data(use_training_data=False):
    """
	Test data is gathered, processed and put in dictionaries
	Progress is printed
	:return: dict with positive and negative reviews
	Keys:
	pos_reviews - the positive reviews
	neg_reviews - the negative reviews
	"""
    if use_training_data:
        pos_train_files = get_filelist(main.get_path() +
                                       "\\train\\pos\\")  # list of files
        neg_train_files = get_filelist(main.get_path() +
                                       "\\train\\neg\\")  # list of files
    else:

        pos_train_files = get_filelist(main.get_path() +
                                       "\\test\\pos\\")  # list of files
        neg_train_files = get_filelist(main.get_path() +
                                       "\\test\\neg\\")  # list of files
    i = 0
    pos_reviews = {}
    neg_reviews = {}
    for file in pos_train_files:
        pos_reviews[i] = get_words(path=file)
        i += 1
        print(
            f"Loading negative test reviews... {(i)/len(pos_train_files):.0%}",
            end="\r")
    i = 0
    print()
    for file in neg_train_files:
        neg_reviews[i] = get_words(path=file)
        i += 1
        print(
            f"Loading positive test reviews... {(i)/len(pos_train_files):.0%}",
            end="\r")
    print()

    test_data = {"pos_reviews": pos_reviews, "neg_reviews": neg_reviews}
    return test_data
Esempio n. 6
0
def get_initialized_train_data():
	"""
	This function will go through the training set and return the positive and negative wordfrequency as well as the their probability.
	:return: A dictionary with the following keys as strings:
	pos_freq - the frequency of words that are in positive reviews
	neg_freq - the frequency of words that are in negative reviews
	pos_prob - the positive probability - amount of positive reviews / total number of reviews
	neg_prob - the negative probability - amount of negative reviews / total number of reviews
	"""
	pos_train_files = get_filelist(main.get_path() + "\\train\\pos\\")  # list of files
	neg_train_files = get_filelist(main.get_path() + "\\train\\neg\\")  # list of files
	pos_words = get_words(pos_train_files)  # list of words
	neg_words = get_words(neg_train_files)  # list of words
	pos_frequency = count_text(pos_words)
	neg_frequency = count_text(neg_words)
	# dictionaries with frequency of words found in negative reviews, use stopwords if true
	pos_probability = pos_train_files.__len__() / (
			pos_train_files.__len__() + neg_train_files.__len__())  # baseline prob
	neg_probability = neg_train_files.__len__() / (pos_train_files.__len__() + neg_train_files.__len__())  # .50ish?
	initialized_training_data = {"pos_freq":pos_frequency, "neg_freq":neg_frequency,
	                             "pos_prob":pos_probability, "neg_prob":neg_probability}
	return initialized_training_data
Esempio n. 7
0
def get_training_data(use_testing_data=False):
    """
	This function will gather all the training data and return them as a tuple
	:return: a tuple where [0]=pos_words & [1]=neg_words [2]=number of positive reviews, [3]=number of negative reviews
	"""
    if not use_testing_data:
        pos_train_files_paths = get_filelist(main.get_path() +
                                             "\\train\\pos\\")  # list of files
        neg_train_files_paths = get_filelist(main.get_path() +
                                             "\\train\\neg\\")  # list of files
        pos_words = get_words(pos_train_files_paths)  # list of words
        neg_words = get_words(neg_train_files_paths)  # list of words

    else:  # load the testing dataset
        pos_train_files_paths = get_filelist(main.get_path() +
                                             "\\test\\pos\\")  # list of files
        neg_train_files_paths = get_filelist(main.get_path() +
                                             "\\test\\neg\\")  # list of files
        pos_words = get_words(pos_train_files_paths)  # list of words
        neg_words = get_words(neg_train_files_paths)  # list of words

    return pos_words, neg_words, pos_train_files_paths.__len__(
    ), neg_train_files_paths.__len__()
Esempio n. 8
0
 def test_path_without_express_stations(self):
     node_a = 'A'
     node_b = 'E'
     path = main.get_path(node_a, node_b, train_adj, express_stations)
     print('The path', path)
     self.assertEqual(len(path), 5)