def main(): """Entry point to this script.""" # Parse all CLI arguments. args = cli_arguments() verbose = args.verbose input_file = args.input # validate the provided Parquet file report = validate_parquet_file(schema, input_file, verbose) # print report from schema validation print_report(report, args.nocolors)
def main(): """Entry point to this script.""" # Parse all CLI arguments. args = cli_arguments() verbose = args.verbose multiple = args.multiple input_file = args.input if multiple: # process multiple messages stored in one input file report = validate_multiple_messages(schema, input_file, verbose) else: # process single message stored in one input file report = validate_single_message(schema, input_file, verbose) # print report from schema validation print_report(report, args.nocolors)
def test_print_report_use_colors(): """Test the function print_report.""" result = {"processed": 2, "valid": 2, "invalid": 0, "error": 0} f = io.StringIO() with contextlib.redirect_stdout(f): print_report(result, False) output = f.getvalue() print(output) assert "Status:" in output assert "Processed messages:" in output assert "Valid messages:" in output assert "Invalid messages:" in output assert "Errors detected:" in output assert "Summary:" in output assert "[OK]" in output assert "all messages have proper format" in output
def test_print_report_in_case_of_error(): """Test the function print_report.""" result = {"processed": 1, "valid": 2, "invalid": 3, "error": 4} f = io.StringIO() with contextlib.redirect_stdout(f): print_report(result, True) output = f.getvalue() print(output) expected = """ Status: Processed messages: 1 Valid messages: 2 Invalid messages: 3 Errors detected: 4 Summary: [FAIL]: invalid JSON(s) detected """ assert output == expected
def test_print_report_in_case_of_invalid_data(): """Test the function print_report.""" result = {"processed": 2, "valid": 1, "invalid": 1, "error": 0} f = io.StringIO() with contextlib.redirect_stdout(f): print_report(result, True) output = f.getvalue() print(output) expected = """ Status: Processed messages: 2 Valid messages: 1 Invalid messages: 1 Errors detected: 0 Summary: [WARN]: invalid messages detected """ assert output == expected
def test_print_report_in_case_of_no_error(): """Test the function print_report.""" result = {"processed": 2, "valid": 2, "invalid": 0, "error": 0} f = io.StringIO() with contextlib.redirect_stdout(f): print_report(result, True) output = f.getvalue() print(output) expected = """ Status: Processed messages: 2 Valid messages: 2 Invalid messages: 0 Errors detected: 0 Summary: [OK]: all messages have proper format """ assert output == expected
def evaluator(): existing_files = get_file_list("model", "mdl") if not existing_files: print("No models files found. Come back when you have models.") exit() target_filename = get_existing_filename(existing_files) if len(target_filename) == 0: print("No files selected, quitting...") exit() model = None print(target_filename) with open("model/" + target_filename, 'rb') as file: model = pickle.load(file) if model is None: return existing_files = get_file_list("sequence", "smp") if not existing_files: print("No models files found. Come back when you have samples.") return target_filenames = get_existing_filename(existing_files, True) if len(target_filenames) == 0: print("No files selected, quitting...") return sequence = [] for target_filename in target_filenames: samples = get_sequence_from_file(target_filename) sequence.extend(samples) timings_sequences = [] compared_size = None print("") for raw_sample in sequence: parser = SampleParser(raw_sample) timings_sequences.append(parser.timings) if compared_size is None: compared_size = parser.timings[-1] else: if parser.timings[-1] != compared_size: print( "Error, one sample has a different size ({}), removing it" .format(parser.timings[-1]) ) del timings_sequences[-1] print("{} samples".format(len(timings_sequences))) # Build the data trueData = [smp[:smp[-1]] for smp in timings_sequences if smp[-2] == 1] fakeData = [smp[:smp[-1]] for smp in timings_sequences if smp[-2] == 0] # Split for training/optimization and final evaluation train, test = train_test_split(trueData, train_size=0.8, test_size=None) print("{} samples from user".format(len(trueData))) print("{} samples from impostor\n".format(len(fakeData))) # Print a final evaluation of the model agains impostors data report = Model.report(model, train, test, fakeData) print_report(report)
def trainer(): existing_files = get_file_list() if not existing_files: print("No samples files found. Come back when you have samples.") exit() target_filenames = get_existing_filename(existing_files, True) if len(target_filenames) == 0: print("No files selected, quitting...") exit() sequence = [] for target_filename in target_filenames: samples = get_sequence_from_file(target_filename) sequence.extend(samples) timings_sequences = [] compared_size = None print("") for raw_sample in sequence: parser = SampleParser(raw_sample) timings_sequences.append(parser.timings) if compared_size is None: compared_size = parser.timings[-1] else: if parser.timings[-1] != compared_size: print( "Error, one sample has a different size ({}), removing it". format(parser.timings[-1])) del timings_sequences[-1] model = Model() print("{} samples".format(len(timings_sequences))) # Build the data trueData = [smp[:smp[-1]] for smp in timings_sequences if smp[-2] == 1] fakeData = [smp[:smp[-1]] for smp in timings_sequences if smp[-2] == 0] # Split for training/optimization and final evaluation train, test = train_test_split(trueData, train_size=0.8, test_size=None) print("{} samples from user".format(len(trueData))) print(" {:3d} samples for training".format(len(train))) print(" {:3d} samples for testing".format(len(test))) print("{} samples from impostor\n".format(len(fakeData))) spinner = PixelSpinner("Fitting data to the model... ", ) spinner.start() # Create a thread for the spinner t = Thread(target=spinner_loop, args=(spinner, )) t.do_run = True t.start() # Train and optimize params = Model.findParameters(model, train) t.do_run = False t.join() print("") # Print a report on the training/optimization phase # evaluate = Model.evaluate(params["model"], train, test) # Print a final evaluation of the model agains impostors data report = Model.report(params["model"], train, test, fakeData) print_report(report) save_model = get_binary_validation("Do you want to keep this model ?", True) if save_model: filename = get_custom_filename(target_filenames) os.makedirs("model", exist_ok=True) with open("model/" + filename, 'wb') as file: pickle.dump(params["model"], file, pickle.HIGHEST_PROTOCOL) print("Model saved in model/" + filename)