Example #1
0
def main():
    buckets = 10
    bucket_max = 5
    stats.set_bucket_width(bucket_max/buckets)

    h = None
    items = None
    if len(sys.argv) < 2:
        print("Error: Filename required")
        return
    with open(sys.argv[1], "r") as f:
        reader = csv.reader(f, delimiter=",")
        items = [(float(row[0]), float(row[1])) for row in reader]
    h = stats.gen_histogram(buckets + 1, items)


    # Frequencies
    fs = [x[1] for x in h]
    # Bucket centers
    ms = [(stats.bucket_interval(x[0])[1] - stats.bucket_interval(x[0])[0]) / 2 + stats.bucket_interval(x[0])[0] for x in h]

    vs = [ x * x for x in ms]
    m = [x[0] * x[1] for x in list(zip(fs, ms))]
    v = [x[0] * x[1] for x in list(zip(fs, vs))]


    mean = sum(m) / stats.total_elements(h)
    vari = (sum(v) - (stats.total_elements(h) * mean*mean)) / (stats.total_elements(h) - 1)

    print("Mean:", mean)
    print("Variance:", vari)
Example #2
0
def main():
    buckets = 10
    bucket_max = 5
    stats.set_bucket_width(bucket_max/buckets)

    h = None
    items = None
    if len(sys.argv) < 2:
        print("Error: Filename required")
        return
    with open(sys.argv[1], "r") as f:
        reader = csv.reader(f, delimiter=",")
        items = [(float(row[0]), float(row[1])) for row in reader]
    h = stats.gen_histogram(buckets + 1, items)
    total_elements = sum([x[1] for x in h]) * bucket_max / buckets

    # print("Thing:", (buckets / bucket_max), "Other thing:", stats.__int_width)

    if len(sys.argv) < 3 or sys.argv[2] == "h":
        print("x, y")
        for x, y in h:
            print(str(x)+",", y/stats.total_elements(h) * buckets / bucket_max)
            # print(str(x)+",", (y/total_elements))
            # # print(str(x)+",", (y / total_elements) * (buckets/bucket_max))
        return

    CDF = []
    for bucket, count in h:
        CDF.append((bucket, sum([c[1] / stats.total_elements(h) for c in h[:bucket+1]])))

    if len(sys.argv) >= 3 and sys.argv[2] == "c":
        print("x, y")
        for x, y, in CDF:
            print(str(x)+",", y)
        return

    if sys.argv[2] == "q":
        print("x, y")
        for q in quantiles(buckets):
            q = round(q, 10)
            x = stats.exp_inv(q)
            f_val = round(x, 4)
            j = stats.bucket(f_val)

            # f_val: The function value
            # j: which bucket we are working with
            print(str(f_val)+",", stats.bucket_interval(j)[0])