def main(): buckets = 10 bucket_max = 5 stats.set_bucket_width(bucket_max/buckets) h = None items = None if len(sys.argv) < 2: print("Error: Filename required") return with open(sys.argv[1], "r") as f: reader = csv.reader(f, delimiter=",") items = [(float(row[0]), float(row[1])) for row in reader] h = stats.gen_histogram(buckets + 1, items) # Frequencies fs = [x[1] for x in h] # Bucket centers ms = [(stats.bucket_interval(x[0])[1] - stats.bucket_interval(x[0])[0]) / 2 + stats.bucket_interval(x[0])[0] for x in h] vs = [ x * x for x in ms] m = [x[0] * x[1] for x in list(zip(fs, ms))] v = [x[0] * x[1] for x in list(zip(fs, vs))] mean = sum(m) / stats.total_elements(h) vari = (sum(v) - (stats.total_elements(h) * mean*mean)) / (stats.total_elements(h) - 1) print("Mean:", mean) print("Variance:", vari)
def main(): buckets = 10 bucket_max = 5 stats.set_bucket_width(bucket_max/buckets) h = None items = None if len(sys.argv) < 2: print("Error: Filename required") return with open(sys.argv[1], "r") as f: reader = csv.reader(f, delimiter=",") items = [(float(row[0]), float(row[1])) for row in reader] h = stats.gen_histogram(buckets + 1, items) total_elements = sum([x[1] for x in h]) * bucket_max / buckets # print("Thing:", (buckets / bucket_max), "Other thing:", stats.__int_width) if len(sys.argv) < 3 or sys.argv[2] == "h": print("x, y") for x, y in h: print(str(x)+",", y/stats.total_elements(h) * buckets / bucket_max) # print(str(x)+",", (y/total_elements)) # # print(str(x)+",", (y / total_elements) * (buckets/bucket_max)) return CDF = [] for bucket, count in h: CDF.append((bucket, sum([c[1] / stats.total_elements(h) for c in h[:bucket+1]]))) if len(sys.argv) >= 3 and sys.argv[2] == "c": print("x, y") for x, y, in CDF: print(str(x)+",", y) return if sys.argv[2] == "q": print("x, y") for q in quantiles(buckets): q = round(q, 10) x = stats.exp_inv(q) f_val = round(x, 4) j = stats.bucket(f_val) # f_val: The function value # j: which bucket we are working with print(str(f_val)+",", stats.bucket_interval(j)[0])