def main():
    """Entry point to this script."""
    # Parse all CLI arguments.
    args = cli_arguments()
    verbose = args.verbose
    input_file = args.input

    # validate the provided Parquet file
    report = validate_parquet_file(schema, input_file, verbose)

    # print report from schema validation
    print_report(report, args.nocolors)
def main():
    """Entry point to this script."""
    # Parse all CLI arguments.
    args = cli_arguments()
    verbose = args.verbose
    multiple = args.multiple
    input_file = args.input

    if multiple:
        # process multiple messages stored in one input file
        report = validate_multiple_messages(schema, input_file, verbose)
    else:
        # process single message stored in one input file
        report = validate_single_message(schema, input_file, verbose)

    # print report from schema validation
    print_report(report, args.nocolors)
Example #3
0
def test_print_report_use_colors():
    """Test the function print_report."""
    result = {"processed": 2, "valid": 2, "invalid": 0, "error": 0}

    f = io.StringIO()
    with contextlib.redirect_stdout(f):
        print_report(result, False)
    output = f.getvalue()

    print(output)

    assert "Status:" in output
    assert "Processed messages:" in output
    assert "Valid messages:" in output
    assert "Invalid messages:" in output
    assert "Errors detected:" in output
    assert "Summary:" in output
    assert "[OK]" in output
    assert "all messages have proper format" in output
Example #4
0
def test_print_report_in_case_of_error():
    """Test the function print_report."""
    result = {"processed": 1, "valid": 2, "invalid": 3, "error": 4}

    f = io.StringIO()
    with contextlib.redirect_stdout(f):
        print_report(result, True)
    output = f.getvalue()

    print(output)

    expected = """
Status:
Processed messages: 1

Valid messages:     2
Invalid messages:   3
Errors detected:    4

Summary:
[FAIL]: invalid JSON(s) detected
"""
    assert output == expected
Example #5
0
def test_print_report_in_case_of_invalid_data():
    """Test the function print_report."""
    result = {"processed": 2, "valid": 1, "invalid": 1, "error": 0}

    f = io.StringIO()
    with contextlib.redirect_stdout(f):
        print_report(result, True)
    output = f.getvalue()

    print(output)

    expected = """
Status:
Processed messages: 2

Valid messages:     1
Invalid messages:   1
Errors detected:    0

Summary:
[WARN]: invalid messages detected
"""
    assert output == expected
Example #6
0
def test_print_report_in_case_of_no_error():
    """Test the function print_report."""
    result = {"processed": 2, "valid": 2, "invalid": 0, "error": 0}

    f = io.StringIO()
    with contextlib.redirect_stdout(f):
        print_report(result, True)
    output = f.getvalue()

    print(output)

    expected = """
Status:
Processed messages: 2

Valid messages:     2
Invalid messages:   0
Errors detected:    0

Summary:
[OK]: all messages have proper format
"""
    assert output == expected
def evaluator():
    existing_files = get_file_list("model", "mdl")

    if not existing_files:
        print("No models files found. Come back when you have models.")
        exit()

    target_filename = get_existing_filename(existing_files)
    if len(target_filename) == 0:
        print("No files selected, quitting...")
        exit()

    model = None
    print(target_filename)
    with open("model/" + target_filename, 'rb') as file:
        model = pickle.load(file)

    if model is None:
        return

    existing_files = get_file_list("sequence", "smp")

    if not existing_files:
        print("No models files found. Come back when you have samples.")
        return

    target_filenames = get_existing_filename(existing_files, True)
    if len(target_filenames) == 0:
        print("No files selected, quitting...")
        return

    sequence = []
    for target_filename in target_filenames:
        samples = get_sequence_from_file(target_filename)
        sequence.extend(samples)

    timings_sequences = []
    compared_size = None
    print("")

    for raw_sample in sequence:
        parser = SampleParser(raw_sample)
        timings_sequences.append(parser.timings)
        if compared_size is None:
            compared_size = parser.timings[-1]
        else:
            if parser.timings[-1] != compared_size:
                print(
                    "Error, one sample has a different size ({}), removing it"
                    .format(parser.timings[-1])
                )
                del timings_sequences[-1]

    print("{} samples".format(len(timings_sequences)))

    # Build the data
    trueData = [smp[:smp[-1]] for smp in timings_sequences if smp[-2] == 1]
    fakeData = [smp[:smp[-1]] for smp in timings_sequences if smp[-2] == 0]

    # Split for training/optimization and final evaluation
    train, test = train_test_split(trueData, train_size=0.8, test_size=None)

    print("{} samples from user".format(len(trueData)))
    print("{} samples from impostor\n".format(len(fakeData)))

    # Print a final evaluation of the model agains impostors data
    report = Model.report(model, train, test, fakeData)

    print_report(report)
Example #8
0
def trainer():
    existing_files = get_file_list()

    if not existing_files:
        print("No samples files found. Come back when you have samples.")
        exit()

    target_filenames = get_existing_filename(existing_files, True)
    if len(target_filenames) == 0:
        print("No files selected, quitting...")
        exit()

    sequence = []
    for target_filename in target_filenames:
        samples = get_sequence_from_file(target_filename)
        sequence.extend(samples)

    timings_sequences = []
    compared_size = None
    print("")

    for raw_sample in sequence:
        parser = SampleParser(raw_sample)
        timings_sequences.append(parser.timings)
        if compared_size is None:
            compared_size = parser.timings[-1]
        else:
            if parser.timings[-1] != compared_size:
                print(
                    "Error, one sample has a different size ({}), removing it".
                    format(parser.timings[-1]))
                del timings_sequences[-1]

    model = Model()

    print("{} samples".format(len(timings_sequences)))

    # Build the data
    trueData = [smp[:smp[-1]] for smp in timings_sequences if smp[-2] == 1]
    fakeData = [smp[:smp[-1]] for smp in timings_sequences if smp[-2] == 0]

    # Split for training/optimization and final evaluation
    train, test = train_test_split(trueData, train_size=0.8, test_size=None)

    print("{} samples from user".format(len(trueData)))
    print("    {:3d} samples for training".format(len(train)))
    print("    {:3d} samples for testing".format(len(test)))
    print("{} samples from impostor\n".format(len(fakeData)))

    spinner = PixelSpinner("Fitting data to the model... ", )
    spinner.start()

    # Create a thread for the spinner
    t = Thread(target=spinner_loop, args=(spinner, ))
    t.do_run = True
    t.start()

    # Train and optimize
    params = Model.findParameters(model, train)

    t.do_run = False
    t.join()
    print("")

    # Print a report on the training/optimization phase
    # evaluate = Model.evaluate(params["model"], train, test)

    # Print a final evaluation of the model agains impostors data
    report = Model.report(params["model"], train, test, fakeData)

    print_report(report)

    save_model = get_binary_validation("Do you want to keep this model ?",
                                       True)

    if save_model:
        filename = get_custom_filename(target_filenames)
        os.makedirs("model", exist_ok=True)
        with open("model/" + filename, 'wb') as file:
            pickle.dump(params["model"], file, pickle.HIGHEST_PROTOCOL)
            print("Model saved in model/" + filename)