Ejemplo n.º 1
0
def helper_extract_leading_digits_payments(num_payments, col, num_digits):
    prefix = TEST_DATA_DIR + "/payments_" + str(num_payments) + "_"
    input_filename = prefix+"input.csv"
    amounts = read_column_from_csv(input_filename, col, True)
    amounts_copy = amounts[:]

    expected_filename = prefix + str(num_digits) + "_leading_digits.txt"
    expected = []
    for x in read_column_from_csv(expected_filename, 0, True):
        expected.append(int(x))
    leading_digits = extract_leading_digits_from_list(amounts, num_digits)
    assert leading_digits == expected
    assert amounts == amounts_copy, "Do not change the list that is passed to your function!"
Ejemplo n.º 2
0
def go():
    usage = "usage: python benford.py <input filename> <column number>  <num digits>"
    if len(sys.argv) != 4:
        print(usage)
    else:
        input_filename = sys.argv[1]
        if not os.path.isfile(input_filename):
            print(usage)
            print("error: file not found: {}".format(input_filename))
            return

        # convert column number argument to an integer
        try:
            col_num = int(sys.argv[2])
        except ValueError:
            s = "error: column number must be an integer: {}"
            print(usage)
            print(s.format(sys.argv[2]))
            return

        data = util.read_column_from_csv(input_filename, col_num, True)

        # convert number of digits argument to an integer
        try:
            num_digits = int(sys.argv[3])
        except ValueError:
            s = "error: number of digits must be an integer: {}".format(sys.argv[3])
            print(usage)
            print(s.format(sys.argv[3]))
            return

        plot_benford_dist(data, num_digits)

        # print only four digits after the decimal point
        print("MAD: {:.4}".format(compute_benford_MAD(data, num_digits)))
Ejemplo n.º 3
0
def compare_actual_expected_from_file(actual, expected_filename):
    # get expected list of values from the file
    expected = []
    for x in read_column_from_csv(expected_filename, 0, True):
        expected.append(float(x))

    if not isinstance(actual, list):
        s = "Actual value returned from the function must be a list of floats."
        pytest.fail(s)

    if len(actual) > 0 and isinstance(actual[0], list):
        s = (
            "Actual value returned from the function must be a list of floats,"
            " not a list containing a list.")
        pytest.fail(s)

    if len(actual) != len(expected):
        s = "Length of expected ({0}) and actual results ({1}) do not match"
        pytest.fail(s.format(len(expected), len(actual)))

    for i in range(len(actual)):
        # stored and computed representations may not be identical
        if abs(expected[i] - actual[i]) > EPS:
            s = "actual and expected values do not match at element {0}"
            pytest.fail(s.format(i))
Ejemplo n.º 4
0
def helper_test_compute_benford_MAD(prefix, col, num_digits):
    input_filename = prefix + "input.csv"
    amounts = read_column_from_csv(input_filename, col, True)
    actual = compute_benford_MAD(amounts, num_digits)

    expected_filename = prefix + "computed_benford_mad_{0}_output.txt".format(num_digits)
    compare_actual_expected_from_file([actual], expected_filename)
Ejemplo n.º 5
0
def compare_actual_expected_from_file(actual, expected_filename):
    # get expected list of values from the file
    expected = []
    for x in read_column_from_csv(expected_filename, 0, True):
        expected.append(float(x))

    compare_actual_expected(actual, expected)
Ejemplo n.º 6
0
def helper_test_compute_benford_dist(prefix, col, num_digits):
    input_filename = prefix + "input.csv"
    amounts = read_column_from_csv(input_filename, col, True)
    amounts_copy = amounts[:]
    actual = compute_benford_dist(amounts, num_digits)

    expected_filename = prefix + "computed_benford_dist_{0}_output.txt".format(num_digits)
    compare_actual_expected_from_file(actual, expected_filename)
    assert amounts == amounts_copy, "Do not change the list that is passed to your function!"
Ejemplo n.º 7
0
def helper_test_compute_benford_dist(prefix, col, num_digits):
    input_filename = prefix + "input.csv"
    amounts = read_column_from_csv(input_filename, col, True)
    expected_filename = prefix + "computed_benford_dist_{0}_output.txt".format(
        num_digits)
    amounts_copy = amounts[:]
    actual = compute_benford_dist(amounts, num_digits)
    compare_actual_expected_from_file(actual, expected_filename)
    if amounts != amounts_copy:
        pytest.fail("Do not change the list that is passed to your function!")
Ejemplo n.º 8
0
def helper_test_compute_benford_dist(prefix, col, currency_symbol, num_digits):
    input_filename = os.path.join(TEST_DATA_DIR, prefix + "input.csv")
    amounts = read_column_from_csv(input_filename, col, True)
    expected_filename = os.path.join(
        TEST_DATA_DIR,
        prefix + "computed_benford_dist_{0}_output.txt".format(num_digits))
    amounts_copy = amounts[:]
    actual = benford.compute_benford_dist(currency_symbol, amounts, num_digits)
    compare_actual_expected_from_file(actual, expected_filename)
    if amounts != amounts_copy:
        pytest.fail("Do not change the list that is passed to your function!")
Ejemplo n.º 9
0
def helper_test_compute_benford_MAD(prefix, col, currency_symbol, num_digits):
    input_filename = prefix + "input.csv"
    amounts = read_column_from_csv(input_filename, col, True)
    amounts_copy = amounts[:]
    actual = benford.compute_benford_MAD(currency_symbol, amounts, num_digits)
    if amounts != amounts_copy:
        pytest.fail("Do not change the list that is passed to your function!")

    # get expected list of values from the file
    expected_filename = prefix + "computed_benford_mad_{0}_output.txt".format(
        num_digits)
    expected = float(read_column_from_csv(expected_filename, 0, True)[0])

    if not isinstance(actual, float):
        s = "Actual value returned from the function must be a float."
        pytest.fail(s)

    if abs(expected - actual) > EPS:
        s = "actual ({:f}) and expected ({:f}) values do not match"
        pytest.fail(s.format(actual, expected))

    compare_actual_expected_from_file([actual], expected_filename)
Ejemplo n.º 10
0
def go():
    '''
    Process the arguments and do the work.
    '''
    usage = ("usage: python benford.py <input filename> <column number>"
             "<currency symbol> <num digits>")

    if len(sys.argv) < 5 or len(sys.argv) > 6:
        print(usage)
        return

    input_filename = sys.argv[1]
    if not os.path.isfile(input_filename):
        print(usage)
        print("error: file not found: {}".format(input_filename))
        return

    # convert column number argument to an integer
    try:
        col_num = int(sys.argv[2])
    except ValueError:
        s = "error: column number must be an integer: {}"
        print(usage)
        print(s.format(sys.argv[2]))
        return

    data = util.read_column_from_csv(input_filename, col_num, True)
    currency_symbol = sys.argv[3]

    # convert number of digits argument to an integer
    try:
        num_digits = int(sys.argv[4])
    except ValueError:
        s = "error: number of digits must be an integer: {}".format(sys.argv[4])
        print(usage)
        print(s.format(sys.argv[4]))
        return

    # grab the name for the PNG file, if exists.
    if len(sys.argv) == 5:
        output_filename = None
    else:
        output_filename = sys.argv[5]

    plot_benford_dist(currency_symbol, data, num_digits, output_filename)

    # print only four digits after the decimal point
    print("MAD: {:.4}".format(compute_benford_MAD(currency_symbol, data, num_digits)))
Ejemplo n.º 11
0
    plt.xticks(range(lb, ub, 10**(num_digits-1)))

    # compute limits for the y axis
    max_val = max(max(expected), max(actual))
    y_ub = max_val + max_val * .1
    plt.ylim(0,y_ub)

    # add labels
    plt.title("Actual (blue) and expected (red) Benford distributions")
    if num_digits ==1: 
        plt.xlabel("Leading digit")
    else:
        plt.xlabel("Leading digits")
    plt.ylabel("Proportion")

    plt.savefig(output_filename)


if __name__=="__main__":
    if len(sys.argv) != 5:
        print("usage: python benford.py <input filename> <column number>  <num digits> <output filename>")
    else:
        input_filename = sys.argv[1]
        data = util.read_column_from_csv(input_filename, int(sys.argv[2]), True)
        num_digits = int(sys.argv[3])
        output_filename = sys.argv[4]
        plot_benford_dist(data, num_digits, output_filename)
        # print only four digits after the decimal point
        print("MAD: {0:.4}".format(benford.compute_benford_MAD(data, num_digits)))