def tests(): """Run some tests on the classifer class.""" # read in the data text = pd.read_csv('input_texts.csv') tweet = pd.read_csv('input_tweet.csv') # a test train/test split value train_split = .7 # initialize the classifiers text_classifier = c.Classifier(text, train_split=train_split) tweet_classifier = c.Classifier(tweet, train_split=train_split) # run both models with internal test data print('\ntext data') text_classifier.test_model() print('\ntweet data') tweet_classifier.test_model() # test the single text value classifier function content = 'input some tweet data here' print('testing a single line input on tweet data:', content) label = tweet_classifier.find_label(content) print(label) print('SUCCESS') # test inputing unclassified data print('testing an input of unclassified texts') unclassified_data = pd.read_csv('input_texts.csv') unclassified_data = unclassified_data.drop(['label'], axis=1) output_classified = text_classifier.classify_texts(unclassified_data) print(output_classified.head()) print('SUCCESS') # test different column names print('testing with different column name inputs') tweet_new_names = pd.read_csv('input_tweet.csv') tweet_new_names = tweet_new_names.rename(columns={ 'label': 'feeling', 'content': 'tweet_text' }) tweet_classifier_new_names = c.Classifier(tweet_new_names, train_split=train_split, label_column='feeling', text_column='tweet_text') tweet_classifier_new_names.test_model() print('SUCCESS')
def test_reset_training_set(): """Test for load training set.""" classifier = classifier_module.Classifier(None) classifier.reset_training_set(117, "a") assert classifier.training_set == [] assert classifier.training_size == 0 assert classifier.ultimate_training_size == 117
def test_ignore_newer_than_should_ignore_new_files(self): self.classifier = clf.Classifier(['--ignore-newer-than=10m']) self.assertTrue(self.classifier.run()) for file in self.__tmp_files: file_path = os.path.join(self.__location, file) self.assertTrue(os.path.exists(file_path))
def _repeat(args): """Wrap up the repeat subcommand to make main() less complex. Args: args (dict): Parsed command line arguments. """ print('Repeating the learning process from traning-set file.') classifier = classifier_module.Classifier() classifier.learn(True, args.symbol_name) sys.exit(0)
def _delete(args): """Wrap up the delete subcommand to make main() less complex. Args: args (dict): Parsed command line arguments. """ classifier = classifier_module.Classifier() classifier.delete_symbols(args.symbols) databox_instance.delete_symbols(args.symbols) sys.exit(0)
def test__delete_symbol(): """Test deleting one symbol""" classifier = classifier_module.Classifier(None) classifier.symbol_list.append("test2") classifier._save_symbol_list() classifier._delete_symbol('test2') classifier.symbol_list.append("test3") classifier._save_symbol_list() assert filecmp.cmp(TEST_LOCATION + 'symbol-list.dat', TEST_LOCATION + 'expected_test_delete_symbol.dat')
def test_classify(): """Test classifying the given list of points to a symbol""" if platform.machine() == 'x86_64': classifier = classifier_module.Classifier(None) for i in range(0, 5): signal_a = Signal_test(1.0 + i * 0.028, 1.00 - i * i * 0.20 * 0.30) signal_b = Signal_test(2.0 - i * 0.011, 2.00 - i * 0.020) signal_list_test = [signal_a, signal_b] symbol = classifier.classify(signal_list_test) assert symbol == 'test'
def test__load_training_set(): """Test loading training set from file""" classifier = classifier_module.Classifier(None) set = classifier._load_training_set('test') for i in range(0, 5): signal_list = set[i] assert signal_list[0].get_x() == 1.0 + i * 0.028 assert signal_list[0].get_y() == 1.00 - i * i * 0.20 * 0.30 assert signal_list[1].get_x() == 2.0 - i * 0.011 assert signal_list[1].get_y() == 2.00 - i * 0.020
def test_add_to_training_set(): """Test if added list of points is in the set""" classifier = classifier_module.Classifier(None) classifier.add_to_training_set(SIGNAL_LIST_TEST) training_size = classifier.training_size added_list = classifier.training_set[training_size - 1] for i in range(0, len(SIGNAL_LIST_TEST) - 1): assert SIGNAL_LIST_TEST[i].get_x() == added_list[i].get_x() assert SIGNAL_LIST_TEST[i].get_y() == added_list[i].get_y()
def _import_settings(args): """Wrap up the import subcommand to make main() less complex. Args: args (dict): Parsed command line arguments. """ classifier = classifier_module.Classifier() classifier.import_files(args.settings_name) databox_instance.import_settings(args.settings_name) sigcol = signalcollection.SignalCollection() sigcol.import_settings(args.settings_name)
def _activate(args): """Wrap up the activate subcommand to make main() less complex. Args: args (dict): Parsed command line arguments. """ classifier = classifier_module.Classifier() if classifier.activate_symbols(args.symbols): databox_instance.activate(args.symbols) sys.exit(0) else: print('activation failed') sys.exit(1)
def setUp(self): if not os.path.exists(self.__location): os.mkdir(self.__location) os.chdir(self.__location) for file_ in self.__tmp_files: open(file_, 'w').close() for dir_ in self.__tmp_dirs: if not os.path.exists(dir_): os.mkdir(dir_) self.classifier = clf.Classifier() super(ClassifierTest, self).setUp()
def test_ignore_newer_than_should_not_ignore_old_files(self): self.classifier = clf.Classifier(['--ignore-newer-than=10m']) for file_ in self.__tmp_files: date = datetime.datetime.now() - datetime.timedelta(minutes=11) mod_time = time.mktime(date.timetuple()) os.utime(os.path.join(self.__location, file_), (mod_time, mod_time)) self.assertTrue(self.classifier.run()) for file in self.__tmp_files: fp = os.path.join(self.__location, file) self.assertFalse(os.path.exists(fp))
def test_delete_symbols(): """Test deleting all symbols""" classifier = classifier_module.Classifier(None) classifier._save_training_set("test2") classifier._save_training_set("test3") classifier._save_training_set("test4") symbols_list = ['test2', 'test3', 'test4'] classifier.delete_symbols(symbols_list) classifier.symbol_list.append("test2") classifier._save_symbol_list() assert filecmp.cmp(TEST_LOCATION + 'symbol-list.dat', TEST_LOCATION + 'expected_test_delete_symbols.dat')
def test__learn_one_symbol(): """Test learning specific symbol""" if platform.machine() == 'x86_64': classifier = classifier_module.Classifier(None) tolerance = classifier._learn_one_symbol('test') file_with_model = open(TEST_LOCATION + 'test_nn_model.dat', 'rb') nbrs_from_file = pickle.load(file_with_model) assert 'ball_tree' == nbrs_from_file.algorithm assert 30 == nbrs_from_file.leaf_size assert 'minkowski' == nbrs_from_file.metric assert nbrs_from_file.metric_params is None assert 2 == nbrs_from_file.n_neighbors assert 2 == nbrs_from_file.p assert 1.0 == nbrs_from_file.radius assert tolerance < 398.85960989443032 + epsilon assert tolerance > 398.85960989443032 - epsilon
def test__save_training_set(): """Test learning symbol given the specific training set""" classifier = classifier_module.Classifier(None) classifier._delete_symbol('test') classifier.reset_training_set(7, 'test') for i in range(0, 5): signal_a = Signal_test(1.0 + i * 0.028, 1.00 - i * i * 0.20 * 0.30) signal_b = Signal_test(2.0 - i * 0.011, 2.00 - i * 0.020) signal_list_test = [signal_a, signal_b] classifier.add_to_training_set(signal_list_test) classifier._save_training_set("test") assert filecmp.cmp(TEST_LOCATION + 'symbol-list.dat', TEST_LOCATION + 'expected_symbol-list.dat') assert filecmp.cmp(TEST_LOCATION + 'training-set_test.dat', TEST_LOCATION + 'expected_training-set_test.dat') or \ filecmp.cmp(TEST_LOCATION + 'training-set_test.dat', TEST_LOCATION + 'expected2_training-set_test.dat')
def test__compute_tolerance_distance(): """Test for computing distance. We put some list of list of features to calculate fixed distance, and check if it's same. """ classifier = classifier_module.Classifier(None) L1 = [11.2, 41.43, 1.33] L2 = [10.9, 41.45, 1.34] L3 = [12.0, 41.4412, 1.001] L4 = [11.3, 41.15, 1.12] L5 = [11.223, 41.0, 1.31] AL = [L1, L2, L3, L4, L5] symbol = "a" classifier._compute_tolerance_distance(AL, symbol) tolerance_distance_path = \ classifier_module.Classifier._get_file_path( \ classifier.files[classifier_module.DISTANCE_TOLERANCE_FILE], symbol) file_with_tolerance_distance = \ open(tolerance_distance_path, 'r') tolerance_distance = float(file_with_tolerance_distance.readline()) file_with_tolerance_distance.close() assert fabs(tolerance_distance - 0.5506099238118276) < epsilon
def application_thread(queue, condition, learning_mode=False, training_size=0, system_bitness=None, symbol_name=None): """The application thread function. Every iteration of the while loop one signal is read from the queue. The signals are sent to the interpreter if there is a longer pause between signals. Args: queue (Queue): An inter-thread queue to pass signals between the listener and the application. condition (Condition): A condition which allows the threads to notify each other and wait if there is nothing to do. learning_mode (bool): A variable which stores the information if the app is in the learning mode or not. training_size (int): A number of the learning samples of the symbol that the user is asked to draw. system_bitness (int): A bitness of the system. The only legal values are {None, 32, 64}. If the value is 32 or 64 then set of hardcoded symbols (with respect to the provided bitness) will be recogniezed instead of the user defined symbols. symbol_name (str): A name of the symbol provided by the user with a command line option. Variables: collection (SignalCollection): A collection of signals sent by the listener thread. Mostly touchpad events but not necessarily. """ classifier = classifier_module.Classifier(system_bitness=system_bitness) if learning_mode: classifier.reset_training_set(training_size, symbol_name) if learning_mode: print("Welcome to learning mode.\n" "Think of a symbol you want the application to learn " "and draw it {0} times.".format(training_size)) else: print("Use your touchpad as usual. Have a nice day!") collection = signalcollection.SignalCollection() while True: condition.acquire() condition.wait(collection.get_time_when_old_enough(time.time())) condition.release() if not collection.is_recent_enough(time.time()): send_points_to_interpreter(collection.as_list(), learning_mode, classifier) collection.reset() if queue.empty(): continue signal = queue.get() if signal.is_stop_signal(): send_points_to_interpreter(collection.as_list(), learning_mode, classifier) collection.reset() elif signal.is_proper_signal_of_point() \ or signal.is_raising_finger_signal(): collection.add_and_maintain(signal)
def test_ignore_newer_than_with_wrong_input(self): self.classifier = clf.Classifier(['--ignore-newer-than=wrong_input']) self.assertFalse(self.classifier.run())