def test_latest_diff_json(self, json_flag): """ Test "recipy latest --diff -j|--json". """ _, stdout = helpers.execute(["recipy", "latest", "--diff", json_flag], 0) assert len(stdout) > 0, "Expected stdout" json_log = json.loads(" ".join(stdout)) db_log, _ = helpers.get_log(recipyenv.get_recipydb()) helpers.assert_equal_json_logs(json_log, db_log) assert json_log["diff"] == "", "Expected 'diff' to be empty" self.modify_script() helpers.execute_python( [self.script, self.input_file, self.output_file]) _, stdout = helpers.execute(["recipy", "latest", "--diff", json_flag], 0) assert len(stdout) > 0, "Expected stdout" json_log = json.loads(" ".join(stdout)) db_log, _ = helpers.get_log(recipyenv.get_recipydb()) helpers.assert_equal_json_logs(json_log, db_log) assert json_log["diff"] != "", "Expected 'diff' to be non-empty" helpers.assert_matches_regexps( json_log["diff"], regexps.get_diff(TestRecipy.SCRIPT_NAME))
def test_data_file_diff_outputs(self): """ If [data].file_diff_outputs is present then: * If output files are created, then there are no 'filediffs' for the run. * If output files with the same content are created, then there are 'filediffs' for the run, with an empty 'diffs' value. """ recipyrc = recipyenv.get_recipyrc() helpers.update_recipyrc(recipyrc, "data", "file_diff_outputs") helpers.execute_python( [self.script, self.input_file, self.output_file]) _, filediffs = helpers.get_log(recipyenv.get_recipydb()) assert filediffs is None, "Expected filediffs to be None" helpers.execute_python( [self.script, self.input_file, self.output_file]) _, filediffs = helpers.get_log(recipyenv.get_recipydb()) assert filediffs is not None, "Expected filediffs not to be None" assert filediffs["filename"] == self.output_file,\ ("Expected filediffs['filename'] to be " + self.output_file) assert filediffs["diff"] == "",\ "Expected filediffs['diff'] to be empty"
def test_latest_diff(self): """ Test "recipy latest --diff". """ _, stdout = helpers.execute(["recipy", "latest", "--diff"], 0) assert len(stdout) > 0, "Expected stdout" db_log, _ = helpers.get_log(recipyenv.get_recipydb()) # Validate standard output. helpers.assert_matches_regexps(" ".join(stdout), regexps.get_stdout(db_log)) # Validate logged data assert db_log["diff"] == "", "Expected 'diff' to be empty" self.modify_script() helpers.execute_python( [self.script, self.input_file, self.output_file]) _, stdout = helpers.execute(["recipy", "latest", "--diff"], 0) assert len(stdout) > 0, "Expected stdout" diff_db_log, _ = helpers.get_log(recipyenv.get_recipydb()) # Validate standard output. helpers.assert_matches_regexps(" ".join(stdout), regexps.get_stdout(diff_db_log)) helpers.assert_matches_regexps( " ".join(stdout), regexps.get_diff(TestRecipy.SCRIPT_NAME)) # Validate logged data assert diff_db_log["diff"] != "", "Expected 'diff' to be non-empty" helpers.assert_matches_regexps( diff_db_log["diff"], regexps.get_diff(TestRecipy.SCRIPT_NAME)) # Compare original log to diff log. for key in ["inputs", "outputs"]: assert len(db_log[key]) == len(diff_db_log[key]),\ ("Expected same number of " + key + " files") for index in range(0, len(db_log[key])): [original_file, _] = db_log[key][index] [diff_file, _] = diff_db_log[key][index] assert os.path.basename(original_file) ==\ os.path.basename(diff_file),\ "Expected local file names to be equal" # Remove fields that are specific to a run. for key in [ "unique_id", "diff", "date", "exit_date", "command_args", "inputs", "outputs" ]: del db_log[key] del diff_db_log[key] assert db_log == diff_db_log,\ ("Expected " + str(db_log) + " to equal " + str(diff_db_log))
def test_ignored_metadata_diff(self): """ If [ignored metadata].diff is present then no 'diff' information is in the log. """ helpers.execute_python( [self.script, self.input_file, self.output_file]) log, _ = helpers.get_log(recipyenv.get_recipydb()) assert "diff" in log, "Expected 'diff' in log" recipyrc = recipyenv.get_recipyrc() helpers.update_recipyrc(recipyrc, "ignored metadata", "diff") helpers.execute_python( [self.script, self.input_file, self.output_file]) log, _ = helpers.get_log(recipyenv.get_recipydb()) assert "diff" not in log, "Unexpected 'diff' in log"
def test_latest_json(self, json_flag): """ Test "recipy latest -j|--json". """ _, stdout = helpers.execute(["recipy", "latest", json_flag], 0) assert len(stdout) > 0, "Expected stdout" json_log = json.loads(" ".join(stdout)) db_log, _ = helpers.get_log(recipyenv.get_recipydb()) helpers.assert_equal_json_logs(json_log, db_log)
def test_ignored_metadata_git(self): """ If [ignored metadata].git is present then no 'gitrepo', 'gitorigin', 'gitcommit' information is in the log. """ helpers.execute_python( [self.script, self.input_file, self.output_file]) log, _ = helpers.get_log(recipyenv.get_recipydb()) keys = ["gitrepo", "gitorigin", "gitcommit"] for key in keys: assert key in log, ("Expected " + key + " in log") recipyrc = recipyenv.get_recipyrc() helpers.update_recipyrc(recipyrc, "ignored metadata", "git") helpers.execute_python( [self.script, self.input_file, self.output_file]) log, _ = helpers.get_log(recipyenv.get_recipydb()) for key in keys: assert key not in log, ("Unexpected " + key + " in log")
def test_latest(self): """ Test "recipy latest". """ _, stdout = helpers.execute(["recipy", "latest"], 0) assert len(stdout) > 0, "Expected stdout" # Validate using logged data db_log, _ = helpers.get_log(recipyenv.get_recipydb()) helpers.assert_matches_regexps(" ".join(stdout), regexps.get_stdout(db_log))
def test_m_recipy(self): """ Running 'python -m recipy script' and the same script that inclues 'import recipy' should give the same results in the log (aside from their 'unique_id', 'diff', 'date', 'exit_date', 'command_args', 'inputs' and 'outputs'). """ input_file = os.path.join(TestMflag.directory, "input.csv") with open(input_file, "w") as csv_file: csv_file.write("1,4,9,16\n") output_file = os.path.join(TestMflag.directory, "output.csv") exit_code, _ = helpers.execute_python( ["-m", "recipy", TestMflag.script, input_file, output_file]) assert exit_code == 0, ("Unexpected exit code " + str(exit_code)) module_log, _ = helpers.get_log(recipyenv.get_recipydb()) helpers.enable_recipy(TestMflag.original_script, TestMflag.script) exit_code, _ = helpers.execute_python( ["-m", "recipy", TestMflag.script, input_file, output_file]) assert exit_code == 0, ("Unexpected exit code " + str(exit_code)) import_log, _ = helpers.get_log(recipyenv.get_recipydb()) for key in ["inputs", "outputs"]: assert len(module_log[key]) == len(import_log[key]),\ ("Expected same number of " + key + " files") for index in range(0, len(module_log[key])): [import_file, _] = module_log[key][index] [module_file, _] = import_log[key][index] assert os.path.basename(import_file) ==\ os.path.basename(module_file),\ "Expected local file names to be equal" # Remove fields that are specific to a run. for key in ["unique_id", "diff", "date", "exit_date", "command_args", "inputs", "outputs"]: if key in module_log: del module_log[key] del import_log[key] assert module_log == import_log,\ ("Expected " + str(module_log) + " to equal " + str(import_log))
def test_recipyrc(self): """ If neither .recipyrc, recipyrc nor ~/recipyrc exist then recipy should use its default configuration. A check is also done to see that the database is created in ~/recipy/recipyDB.json. """ helpers.execute_python( [self.script, self.input_file, self.output_file]) recipydb = recipyenv.get_recipydb() assert os.path.isfile(recipydb), ("Expected to find " + recipydb)
def test_unknown_parameter(self): """ If recipyrc has a section with an unknown key then the key is ignored and does not prevent recipy from running. """ recipyrc = recipyenv.get_recipyrc() helpers.update_recipyrc(recipyrc, "database", "unknown", "unknown") helpers.execute_python( [self.script, self.input_file, self.output_file]) recipydb = recipyenv.get_recipydb() assert os.path.isfile(recipydb), ("Expected to find " + recipydb)
def test_search_bad_syntax(self, search_flag, json_flag): """ Test "recipy search -p|--filepath|-f|--fuzzy |-r|--regex PATTERN VALUE -j|--json". """ db_log, _ = helpers.get_log(recipyenv.get_recipydb()) unique_id = db_log["unique_id"] pattern = self.get_search(search_flag, unique_id) cmd = ["recipy", "search"] cmd.extend(pattern) cmd.append("value") cmd.append(json_flag) _, _ = helpers.execute(cmd, 1)
def test_search_id_hash_prefix(self, id_flag, json_flag): """ Test "recipy search -i|--id HASH_PREFIX [-j|--json]". """ db_log, _ = helpers.get_log(recipyenv.get_recipydb()) unique_id = db_log["unique_id"] half_id = unique_id[0:int(len(unique_id) / 2)] _, stdout = helpers.execute( ["recipy", "search", id_flag, str(half_id), json_flag], 0) assert len(stdout) > 0, "Expected stdout" json_log = json.loads(" ".join(stdout)) assert len(json_log) == 1, "Expected a single JSON log" helpers.assert_equal_json_logs(json_log[0], db_log)
def test_ignored_inputs_outputs(self, ignores): """ If [ignored inputs] or [ignored outputs] entries are present, with a package name, then no 'inputs' or 'outputs' are present in logs when the package is used. :param ignores: (recipyrc configuration key, recipy log key), if a package is in the former, then the latter should not record files input/output by that package. :type ignores: (str or unicode, str or unicode) """ (recipyrc_key, log_key) = ignores recipyrc = recipyenv.get_recipyrc() helpers.execute_python( [self.script, self.input_file, self.output_file]) log, _ = helpers.get_log(recipyenv.get_recipydb()) assert len(log[log_key]) > 0, "Expected functions to be logged" helpers.update_recipyrc(recipyrc, recipyrc_key, TestRecipyrc.LIBRARY) helpers.execute_python( [self.script, self.input_file, self.output_file]) log, _ = helpers.get_log(recipyenv.get_recipydb()) assert len(log[log_key]) == 0, "Expected no functions to be logged"
def test_search(self, search_flag, json_flag): """ Test "recipy search [-p|--filepath|-f|--fuzzy |-r|--regex] VALUE -j|--json". """ db_log, _ = helpers.get_log(recipyenv.get_recipydb()) unique_id = db_log["unique_id"] pattern = self.get_search(search_flag, unique_id) cmd = ["recipy", "search"] cmd.extend(pattern) cmd.append(json_flag) _, stdout = helpers.execute(cmd, 0) assert len(stdout) > 0, "Expected stdout" json_log = json.loads(" ".join(stdout)) # Handle case where 'recipy search HASH' returns a list if isinstance(json_log, list): json_log = json_log[0] helpers.assert_equal_json_logs(json_log, db_log)
def test_data_file_diff_outputs_diff(self): """ If [data].file_diff_outputs is present, if output files are changed, then there will be 'filediffs' for that run, with a 'diff' value describing changes to the output files. """ recipyrc = recipyenv.get_recipyrc() helpers.update_recipyrc(recipyrc, "data", "file_diff_outputs") # Create empty output file. open(self.output_file, 'w').close() helpers.execute_python( [self.script, self.input_file, self.output_file]) _, filediffs = helpers.get_log(recipyenv.get_recipydb()) assert filediffs is not None, "Expected filediffs not to be None" assert filediffs["filename"] == self.output_file,\ ("Expected filediffs['filename'] to be " + self.output_file) helpers.assert_matches_regexps(filediffs['diff'], regexps.get_filediffs())
def test_ignored_metadata_hashes(self, ignores): """ If [ignored metadata].input_hases or output_hashes are present then no hashes are recorded for input/output files. :param ignores: (recipyrc configuration key, recipy log key), if the former is in [ignored metadata] the latter should not be in the log :type ignores: (str or unicode, str or unicode) """ (recipyrc_key, log_key) = ignores recipyrc = recipyenv.get_recipyrc() helpers.update_recipyrc(recipyrc, "ignored metadata", recipyrc_key) helpers.execute_python( [self.script, self.input_file, self.output_file]) log, _ = helpers.get_log(recipyenv.get_recipydb()) files = log[log_key] assert len(files) >= 1, "Unexpected number of files" assert not isinstance(files[0], list), "Unexpected list"
def test_search_all(self, search_flag, all_flag, json_flag): """ Test "recipy search [-p|--filepath|-f|--fuzzy |-r|--regex] VALUE -a|--all -j|--json". """ num_runs = 3 for _ in range(num_runs): helpers.execute_python( [self.script, self.input_file, self.output_file]) db_log, _ = helpers.get_log(recipyenv.get_recipydb()) unique_id = db_log["unique_id"] pattern = self.get_search(search_flag, unique_id) cmd = ["recipy", "search"] cmd.extend(pattern) cmd.append(all_flag) cmd.append(json_flag) _, stdout = helpers.execute(cmd, 0) assert len(stdout) > 0, "Expected stdout" json_logs = json.loads(" ".join(stdout)) assert num_runs + 1 == len(json_logs),\ "Unexpected number of JSON logs"
def run_test_case(script_path, command, test_case): """ Run a single test case. This runs a script using arguments in test_case and validates that recipy has logged information about the script, also using data in test_case. test_case is assumed to have the following entries: * 'libraries': a list of one or more libraries e.g. ['numpy']. * 'arguments': a list of script arguments e.g. ['loadtxt'], ['savetxt']. If none, then this can be omitted. * 'inputs': a list of zero or more input files which running the script with the argument will read e.g. ['data.csv']. If none, then this can be omitted. * 'outputs': a list of zero or more output files which running the script with the argument will write e.g. ['data.csv']. If none, then this can be omitted. :param script_path: Path to the script. :type script_path: str or unicode :param commmand: Command-line invocation used to run the script (not including "python" or any arguments, which are test-case specific). :type command: str or unicode :param test_case: Test case configuration. :type test_case: dict """ number_of_logs = 0 try: number_of_logs =\ helpers.get_number_of_logs(recipyenv.get_recipydb()) except DatabaseError: # Database may not exist if running tests for first time so # give benefit of doubt at this stage and assume running script # will bring it into life. pass libraries = test_case[LIBRARIES] if ARGUMENTS in test_case: arguments = test_case[ARGUMENTS] else: arguments = [] # Execute script _, _ = helpers.execute_python(command + arguments, 0) # Validate recipy database log, _ = helpers.get_log(recipyenv.get_recipydb()) # Number of logs new_number_of_logs =\ helpers.get_number_of_logs(recipyenv.get_recipydb()) assert new_number_of_logs == (number_of_logs + 1),\ ("Unexpected number of logs " + new_number_of_logs) # Script that was invoked check_script(script_path, log["script"], arguments, log["command_args"]) # Libraries check_libraries(libraries, log["libraries"]) # Inputs and outputs (local filenames only) check_input_outputs(test_case, INPUTS, log["inputs"]) check_input_outputs(test_case, OUTPUTS, log["outputs"]) # Dates check_dates(log["date"], log["exit_date"]) # Execution environment check_environment(log["command"], log["environment"]) # Miscellaneous assert environment.get_user() == log["author"], "Unexpected author" assert log["description"] == "", "Unexpected description"