def testEvaluateBetweenDates(self): between = Between() data = np.array([ '13 Jul 1930 - 15:00', '13 Jul 1930 - 15:00', '14 Jul 1930 - 12:45', '14 Jul 1930 - 14:50', '15 Jul 1930 - 16:00', '16 Jul 1930 - 14:45', '17 Jul 1930 - 12:45', '17 Jul 1930 - 14:45', '18 Jul 1930 - 14:30', '19 Jul 1930 - 12:50', '19 Jul 1930 - 15:00', '20 Jul 1930 - 13:00', '20 Jul 1930 - 15:00', '21 Jul 1930 - 14:50', '22 Jul 1930 - 14:45', '26 Jul 1930 - 14:45', '27 Jul 1930 - 14:45', '30 Jul 1930 - 14:15', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '31 May 1934 - 16:30', '31 May 1934 - 16:30', '31 May 1934 - 16:30', '31 May 1934 - 16:30', '' ]) user_conv = 'Find all array values between 2 pm and 6 pm' user_conv = user_conv.lower() target = DataObject(user_conv.split(' '), ['user', 'conversation']) array_object = DataObject(data, ['date', 'time']) arguments = {'array_data': array_object, 'target': target} result_object = between.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Success) self.assertEqual(result_object.data[0], True) self.assertEqual(result_object.data[2], False) self.assertEqual(result_object.data[-1], False)
def testEvaluateLessThanDates(self): less_than = LessThan() data = np.array([ '13 Jul 1930 - 15:00', '13 Jul 1930 - 15:00', '14 Jul 1930 - 12:45', '14 Jul 1930 - 14:50', '15 Jul 1930 - 16:00', '16 Jul 1930 - 14:45', '17 Jul 1930 - 12:45', '17 Jul 1930 - 14:45', '18 Jul 1930 - 14:30', '19 Jul 1930 - 12:50', '19 Jul 1930 - 15:00', '20 Jul 1930 - 13:00', '20 Jul 1930 - 15:00', '21 Jul 1930 - 14:50', '22 Jul 1930 - 14:45', '26 Jul 1930 - 14:45', '27 Jul 1930 - 14:45', '30 Jul 1930 - 14:15', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '27 May 1934 - 16:30', '31 May 1934 - 16:30', '31 May 1934 - 16:30', '31 May 1934 - 16:30', '31 May 1934 - 16:30', '' ]) user_conv = 'Find all array values before 1933' user_conv = user_conv.lower() target = DataObject(user_conv.split(' '), ['user', 'conversation']) array_object = DataObject(data, ['date', 'time']) arguments = {'array_data': array_object, 'target': target} result_object = less_than.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Success) dates = pd.to_datetime(data) npt.assert_almost_equal(dates.year < 1933, result_object.data) user_conv = 'Find all array values before 2 pm' user_conv = user_conv.lower() arguments['target'] = DataObject(user_conv.split(' '), ['user']) result_object = less_than.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Success)
def testCompareDataObjects(self): data_object1 = DataObject(1, ['data', '1']) data_object2 = DataObject(1, ['data', '2']) data_object3 = DataObject(1, ['data', '1']) data_object4 = DataObject(1, ['1', 'data']) self.assertTrue(data_object1 == data_object3) self.assertTrue(data_object1 != data_object2) self.assertTrue(data_object1 == data_object4)
def testEvaluate(self): list_columns_command = StatListColumns() arg = list_columns_command.argumentTypes()[0] file_path = os.path.join(package_directory, 'test_data', 'data.csv') data = pd.read_csv(file_path) data_object = DataObject(data, ['random', 'dataset']) user_conv = DataObject('list categorical columns'.split(' '), ['user', 'conv']) arguments = {arg.keyword: data_object, 'user_conv': user_conv} result_object = list_columns_command.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Success) # Try false data data_object.data = [1, 2, 3] result_object = list_columns_command.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Error)
def testWorstN(self): filter_bottom = FilterBottomN() array_data = DataObject(np.array( [1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5, np.nan]), keyword_list=['array']) user_conv = 'Find last 3 values in array' user_conv = user_conv.lower() target = DataObject(user_conv.split(' '), ['user', 'conversation']) arguments = {'array_data': array_data, 'target': target} result_object = filter_bottom.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Success) self.assertEqual(result_object.data[0], True) self.assertEqual(result_object.data[1], True) self.assertEqual(result_object.data[2], True) self.assertEqual(result_object.data[-1], False)
def extractArgFromUser(self, key_words, argument): """ Extract argument from user input if possible """ data_res = [] for tag in argument.tags: try: index = key_words.index(tag.name) except: continue if tag.position == Argument.TagPosition.After: search_scope = key_words[(index + 1):] elif tag.position == Argument.TagPosition.Before: # Reverse list to be consistent with search # order search_scope = key_words[:index][::-1] else: search_scope = key_words if argument.argument_type is DataType.number: res = findNumbers(search_scope, argument.number) if len(res) != 0: data_res = data_res + res break elif argument.argument_type is DataType.user_string: res = DataObject(' '.join(search_scope), search_scope) data_res.append(res) break else: Printer.Print("Can only extract numbers and strings from user" "currently") break return data_res
def testEvaluateLessThanNumbers(self): less_than = LessThan() array_data = DataObject(np.arange(10), keyword_list=['array']) user_conv = 'Find all array values less than 5' user_conv = user_conv.lower() target = DataObject(user_conv.split(' '), ['user', 'conversation']) arguments = {'array_data': array_data, 'target': target} result_object = less_than.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Success) npt.assert_almost_equal(array_data.data[result_object.data], np.arange(5)) user_conv = 'Find all array values less than 5 pm' user_conv = user_conv.lower() arguments['target'] = DataObject(user_conv.split(' '), ['user', 'conversation']) result_object = less_than.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Error)
def test_execute_command(self): command = DummyCommand() dummy_input = DataObject("How are you", ["question"]) self.parser.executeCommand(command, {"dummy": dummy_input}) out = self.checkResult(self.history, "How are you", ["dummy"], DataType.string) self.assertTrue(out) self.assertEqual(self.parser.currentState, ParserStates.command_unknown)
def read(self, file_path, keyword_list, pre_evaluate_results): if type(pre_evaluate_results) != list: Printer.Print("No preevaluation done!", " Please file a bug report with the chat") return ResultObject(None, None, None, CommandStatus.Error) # List the information about csv Printer.Print("Loaded " + " ".join(keyword_list)) self.list_command.evaluate(pre_evaluate_results[0], DataObject([''], [])) return pre_evaluate_results
def testEvaluate(self): list_history_command = ListHistory() arg1 = list_history_command.argumentTypes()[0] arg2 = list_history_command.argumentTypes()[1] history = TypeDatabase() history.add(DataType.number, ['my', 'lucky', 'number'], 10) history.add(DataType.string, ['my', 'favorite', 'quote'], 'Pen is sharper than knife') history.add(DataType.array, ['zero', 'array'], np.zeros(10)) arguments = { arg1.keyword: DataObject(history, 'history'), arg2.keyword: DataObject([], 'user_conv') } result_object = list_history_command.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Success) # Try no data arguments = {arg1.keyword: None, arg2.keyword: DataObject([], '')} result_object = list_history_command.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Error)
def testTopN(self): filter_top = FilterTopN() array_data = DataObject(np.array( [1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5, np.nan]), keyword_list=['array']) user_conv = 'Find top 2 values in array' user_conv = user_conv.lower() target = DataObject(user_conv.split(' '), ['user', 'conversation']) arguments = {'array_data': array_data, 'target': target} result_object = filter_top.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Success) self.assertEqual(result_object.data[-2], True) self.assertEqual(result_object.data[-3], True) self.assertEqual(result_object.data[-1], False) array_data.data = np.array( ['Hello', 'how', 'are', 'you', 'how', 'are']) result_object = filter_top.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Success) self.assertEqual(result_object.data[0], False) self.assertEqual(result_object.data[1], True) self.assertEqual(result_object.data[-1], True)
def testEvaluate(self): load_command = Load() arg = load_command.argumentTypes()[0] file_name_data_object = FileObject('', DataType.file_name, '', False) file_name_object = DataObject(file_name_data_object, ['random', 'file']) arguments = {arg.keyword: file_name_object} pre_eval_res = load_command.preEvaluate(**arguments) arguments['pre_evaluate_results'] = pre_eval_res result_object = load_command.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Error) file_name_data_object.path = os.path.join(package_directory, 'test_data', 'data.csv') file_name_data_object.data_type = DataType.csv file_name_object.data = file_name_data_object arguments.pop('pre_evaluate_results', None) pre_eval_res = load_command.preEvaluate(**arguments) arguments['pre_evaluate_results'] = pre_eval_res result_objects = load_command.evaluate(**arguments) self.assertEqual(result_objects[0].command_status, CommandStatus.Success)
def testEvaluate(self): image_command = ImageDisplay() arg = image_command.argumentTypes()[0] # Use None when varstore does not have anything stored arguments = {arg.keyword: None} result_object = image_command.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Error) # Use false data data_object = DataObject(123, ['random', 'image']) arguments = {arg.keyword: data_object} data_object.data = 123 result_object = image_command.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Error) # No more optional image should be passed by parser: result_object = image_command.evaluate(**{arg.keyword: None}) self.assertEqual(result_object.command_status, CommandStatus.Error) # Use real image image_data = imread(os.path.join( package_directory, 'test_data', 'image.jpg')) data_object.data = image_data result_object = image_command.evaluate(**arguments) self.assertEqual(result_object.command_status, CommandStatus.Success) plt.close()
def test_found_command(self): self.history.add(DataType.string, ["input", "my"], "my dummy input") self.history.add(DataType.string, ["input", "your"], "your dummy input") input_text = "Call the dummy function with my input" self.parser.keyword_list = input_text.split(' ') cmd_in = DataObject(DummyCommand(), ["dummy", "test"]) self.parser.foundCommand(cmd_in) out = self.checkResult(self.history, "my dummy input", ["dummy", "result"], DataType.string) self.assertTrue(out) out = self.checkResult(self.history, "your dummy input", ["dummy", "result"], DataType.string) self.assertFalse(out)
def add(self, keyword_list, data_object, add_to_cache=True, data_type=None, name=None): """ Add data with specified keyword list to database Parameters keyword_list - A list of strings to identify data object """ i = len(self.data_objects) self.keyword_search.add(keyword_list, i) if name is not None: self.name_dict[name] = i data_object = DataObject(data_object, keyword_list, data_type, name) self.data_objects.append(data_object) if add_to_cache: self.cache.append(data_object) return data_object
def test_execute_command_update_cache(self): command = DummyCommandWithFillCache() # Add a result to history and cache self.history.add(DataType.string, ["previous", "result"], "previous result") dummy_input = DataObject("How are you", ["question"]) self.parser.executeCommand(command, {"dummy": dummy_input}) # Check result in history out = self.checkResult(self.history, "How are you", ["dummy", "result"], DataType.string) self.assertTrue(out) self.assertEqual(self.parser.currentState, ParserStates.command_unknown) # Check cache is updated cache_result = self.history.getLastObject(DataType.string) self.assertEqual(cache_result.data, "How are you")
def add_basic_database(history): data_base_path = os.path.join(str(Path.home()), 'AlfaDatabase/file_database.csv') if not os.path.isfile(data_base_path): data_base_path = os.path.join(package_directory, 'resources/file_database.csv') if os.path.isfile(data_base_path): reader = ReadDatabase() data_base_object = DataObject(data_base_path, ['startup', 'database']) history.add(DataType.data_base, data_base_object.keyword_list, data_base_object) results = reader.read(data_base_object.data, data_base_object.keyword_list) if type(results) == list: for result in results: history.add(result.data_type, result.keyword_list, result.data, name=result.name) print("Loaded basic file database") else: print("Failed to load file database")
def transformArray_to_dataFrame(self, array_datas, useCategorical=False, expand_single=False, remove_nan=False): # Create a combined array and keyword list array_sizes = [] # Check if array_datas is of length 1 or not if not isinstance(array_datas, collections.Iterable): array_datas = [array_datas] for array_data in array_datas: if array_data.data.size != 1: array_sizes.append(array_data.data.size) if len(array_sizes) == 0: array_sizes = [1] array_size = mode(array_sizes)[0][0] df = pd.DataFrame() command_status = CommandStatus.Success kl1 = [" ".join(array_data.keyword_list) for array_data in array_datas] truncated_kl1, common_name = StatContainer.removeCommonNames(kl1) # Conditional filter if StatContainer.conditional_array is not None and len( StatContainer.conditional_array.data) == array_size: inds = StatContainer.conditional_array.data Nfiltered = np.sum(inds) Printer.Print("Nfiltered: ", Nfiltered) else: Nfiltered = array_size inds = np.full(Nfiltered, True) for i, array_data in enumerate(array_datas): # Check if the array is a numeric type or not if (np.issubdtype(array_data.data.dtype, np.number)) == False: if not useCategorical: Printer.Print("Skipping ", " ".join(array_data.keyword_list), "\nThe array is not of numeric type") continue else: if len(array_datas) > 1: # Map the array to numeric quantity arr_data = pd.Series(array_data.data[inds]) lut = dict( zip(arr_data.unique(), np.linspace(0, 1, arr_data.unique().size))) # Creating a new data object by mapping strings to numbers array_data = DataObject(arr_data.map(lut), array_data.keyword_list) # Check if all the arrays have the same size or not. Pick the largest # set of arrays that have the same size if array_size != array_data.data.size: if array_data.data.size == 1 and expand_single: data = np.ones(Nfiltered) * array_data.data else: Printer.Print("Skipping array ", " ".join(array_data.keyword_list), " since its size does not match with", " other arrays in the frame") continue elif array_data.data.size == 1: data = [array_data.data] else: data = array_data.data[inds] df[truncated_kl1[i]] = pd.Series(data) if df.size == 0: Printer.Print("No arrays found in the arguments provided") command_status = CommandStatus.Error if remove_nan: df.dropna(inplace=True) return command_status, df, df.columns.values.tolist(), common_name
def test_print_commands(self): command_list = [ DataObject(None, ['load']), DataObject(None, ['Random Forrest']) ] self.parser.printCommands(command_list)
def resolveArguments(self, key_words): all_arg_names = set() argumentTypes = self.currentCommand.argumentTypes() for argument in argumentTypes: # TODO Try to use information from user when command gives error # TODO If user wants to substitute arguments in the process of # resolution then ask him for confirmation. # TODO Handle composite commands (resolveCommands similar to # resolveArguments) assert (argument.number != 0) arg_type = argument.argument_type arg_name = argument.keyword if arg_name in self.argumentsFound: continue if arg_type is DataType.user_conversation: self.argumentsFound[arg_name] = DataObject( key_words, ['user', 'coversation']) continue elif arg_type is DataType.history: self.argumentsFound[arg_name] = DataObject( self.history, ['history']) continue elif arg_name == "parent_parser": self.argumentsFound[arg_name] = DataObject( self, ['parent', 'parser']) data_res = self.searchHistory(argument, key_words) all_arg_names.add(arg_name) # If file name, try searching folders if DataType.file_name in self.wrap(arg_type) and len( data_res) == 0: Printer.Print("Searching for file from folders") data_res = searchFileFromFolder(key_words, self.history) # If infinite args allowed and we found some args or # if finite args allowed and we found exactly those # many arguments # TODO print intelligent responses as in which arguments # are missing or more? if self.checkArgumentNumber(argument.number, len(data_res)): self.argumentsFound[arg_name] = self.unwrap( data_res, argument.number) elif len(data_res) != argument.number and len(data_res) > 0: if arg_name in self.argument_search_result: previous_result = self.argument_search_result[arg_name] if len(previous_result) > argument.number: res_set = self.findIntersection( previous_result, data_res) else: res_set = self.findUnion(previous_result, data_res) if len(res_set) == argument.number: self.argumentsFound[arg_name] = self.unwrap( list(res_set), argument.number) elif len(res_set) == 0: self.argument_search_result[arg_name] = data_res else: self.argument_search_result[arg_name] = list(res_set) else: self.argument_search_result[arg_name] = data_res self.fillClosestArguments(self.argument_search_result, self.argumentsFound, argumentTypes) # Fill all the optional arguments self.fillOptionalArguments(self.argumentsFound, argumentTypes) self.fillUsingFileDialog(self.argumentsFound, argumentTypes) if self.checkArgumentsFound(self.argumentsFound, argumentTypes): self.currentState = ParserStates.command_known_data_known self.argument_search_result = {} self.executeCommand(self.currentCommand, self.argumentsFound) self.clearCommandSearchResults() else: self.currentState = ParserStates.command_known_data_unknown unknown_args = all_arg_names.difference( set(self.argumentsFound.keys())) # Get a list of unknown arguments" #Printer.Print("\nChecking for arguments...\n") unknownList = list(unknown_args) for arg in self.argumentsFound: try: self.currentCommand.ArgFoundResponse(arg) self.printArguments(self.argumentsFound[arg]) except: Printer.Print("Argument ", arg, "found") Printer.Print( "Matching argument: ", self.printArguments(self.argumentsFound[arg])) for arg in unknown_args: if arg in self.argument_search_result: try: self.currentCommand.MultipleArgsFoundResponse(arg) except: Printer.Print("\nMultiple arguments found for ", arg) self.printArguments(self.argument_search_result[arg]) else: try: self.currentCommand.ArgNotFoundResponse(arg) except: Printer.Print("Could not find any match for ", arg) if len(unknownList) > 0: Printer.Print("\nPlease provide more clues to help me resolve", "these arguments! or 'quit' to abort", "command execution")