def get_stats(results): """ Produces statistics for various metrics from many runs INPUT: results is a dictionary: {id : [{}, {}, {}]} OUTPUT: DataFrame with statistics on different parameters """ ret = OrderedDict() for id_, res_list in results.items(): average_res = OrderedDict() metrics = res_list[0].keys() for metric in metrics: try: metric_results = [float(result[metric]) for result in res_list] stats = Stats(intigerize(min(metric_results)), intigerize(max(metric_results)), intigerize(mean(metric_results)), intigerize(median(metric_results)), intigerize(stddev(metric_results))) average_res[metric] = stats except ValueError: # we fall back here is every run was executed only once average_res[metric] = res_list[0][metric] ret[id_] = average_res return ret
def create_main_node(tableT, table): income_low_average = helper.mean(tableT[2]) income_hgh_average = helper.mean(tableT[3]) working_seAk = helper.seAk(tableT[4]) node = Node() for row in table: city = row[0] affil = row[1] income = 'low' if row[3] < income_hgh_average else 'high' working = 'low' if row[4] < working_seAk[0] else ( 'high' if row[4] < working_seAk[1] else 'medium') node.vec.append((city, affil, income, working)) return node
def __init__(self,data): for i in range(len(data)): self.Mean.append(hp.mean(data[i])) X = [[] for i in range(3)] Y = [[] for i in range(3)] sum=0 for i in np.arange(min1,max,0.1): for j in np.arange(miny,may,0.1): sum=sum+1 G = [ 0 for i in range(3)] for k in range(3): G[k] = self.get_Gx(i,j,k); C=G.index(max(G)); X[C].append(i) Y[C].append(j) print sum for i in range(3): prob[i]=len(X[i])/sum print(prob[i]) # plt.plot(X[0],Y[0],colour='Red',label= "Class_1") # plt.plot(X[1],Y[1],colour='Blue',label= "Class_1") # plt.plot(X[2],Y[2],colour='Black',label= "Class_1") # plt.show() c1_mat = hp.get_mat(data[0]) c2_mat = hp.get_mat(data[1]) c3_mat = hp.get_mat(data[2]) for i in range(2): for j in range(2): if i==j: self.matrix[i][j]=c1_mat[i][j]+c2_mat[i][j]+c3_mat[i][j] self.matrix[i][j]/3 else: self.matrix[i][j]=0 for i in range(2): self.var = self.var+self.matrix[i][i] self.var=self.var/2; self.matrix[0][0]=self.var self.matrix[1][1]=self.var print "variance:",self.var # setting line parameters for i in range(len(data)): self.W[i][0] = self.Mean[i][0]/self.var self.W[i][1] = self.Mean[i][1]/self.var self.W[i][2] = np.log(self.prob[i])-((self.Mean[i][0]*self.Mean[i][0]+self.Mean[i][1]*self.Mean[i][1]))/(2*self.var)
async def main_loop(app): state = app['state'] state['buffer'] = [] state['client'] = None state['current_run'] = None rates = [] msg = ["Results:"] try: while True: outs = helper.update_client(state, timeout=1) if outs is not None: if outs.timeout: points = (0 if state['current_run'] is None else state['current_run'][0]) rates.append(points / STATIONS) msg.append("- Timeout (partial run possible)") state['current_run'] = None elif outs.code != 0: helper.eprint(outs, state['current_run']) points = (0 if state['current_run'] is None else state['current_run'][0]) rates.append(points / STATIONS) msg.append(f"- Runtime error (partial run possible)\n") state['current_run'] = None elif state['current_run'] is None: msg.append("- Session wasn't started") rates.append(0) else: msg.append("- Normal run (either good or bad)") rates.append(state['current_run'][0] / STATIONS) state['current_run'] = None elif state['current_run']: if state['current_run'][1] is None: helper.eprint("early_bird", state['current_run']) msg.append("- Normal run (either good or bad)") rates.append(state['current_run'][0] / STATIONS) helper.kill_client(state) state['current_run'] = None await asyncio.sleep(0.005) if len(rates) >= 20: score = helper.mean(rates) helper.exit_grader(score, "\n".join(msg)) except asyncio.CancelledError: pass except Exception as e: helper.bail_exception()
def __init__(self, data): self.class_data = data for i in range(len(data)): self.Mean.append(hp.mean(data[i])) c1_mat = hp.get_mat(data[0]) c2_mat = hp.get_mat(data[1]) c3_mat = hp.get_mat(data[2]) for i in range(2): for j in range(2): self.matrix[i][j] = c1_mat[i][j] + c2_mat[i][j] + c3_mat[i][j] self.matrix[i][j] = self.matrix[i][j] / 3 inv = np.linalg.inv(self.matrix) for i in range(len(data)): mn = np.matrix(self.Mean[i]).transpose() term = np.dot(inv, mn) # print(mn.transpose()) self.W[i].append(-0.5 * inv) self.W[i].append(term.transpose()) self.W[i].append(-0.5 * (np.log(np.linalg.det(self.matrix)) + np.dot(mn.transpose(), term)))
def evaluate(self, query_id='all', metric='F', method='ltn-lnn', verbose=False): should_divide = (method[2] == 'c') scores = [] for i, (q, res) in enumerate(self.q_and_res_path(query_id)): with open(q, 'r') as f: lines = f.read().split('\n') if len(lines) < 1: raise ValueError('file has no lines') query_title = lines[0] if len(lines) > 1 and len(lines[1].replace(' ', '')) > 0: query_text = lines[1] else: query_text = query_title # if verbose: # print("Query title: %s, query text: %s" % (query_title, query_text)) # print('\n'.join(map(str, Text_cleaner.query_cleaner(query_title)))) top_k = self.retrieval_index.query(query_title, query_text, should_divide) with open(res, 'r') as f: real_best = f.read().replace(',', '').split() scores.append(IREvaluator.funcs[metric](real_best, top_k)) if verbose: print(top_k) print(query_text) print(query_title) print(real_best) print("i = %d, q = %s" % (i, q)) # print("query = %s" % query_title) print("metric = %s, Element %.2f, runing mean = %.2f" % (metric, scores[-1], mean(scores))) print() assert len(scores) > 0 return sum(scores) / len(scores)
def __init__(self, data): self.class_data = data for i in range(len(data)): self.Mean.append(hp.mean(data[i])) self.matrix[0] = hp.get_mat(data[0]) self.matrix[1] = hp.get_mat(data[1]) self.matrix[2] = hp.get_mat(data[2]) for i in range(2): for j in range(2): if i != j: self.matrix[0][i][j] = 0 self.matrix[1][i][j] = 0 self.matrix[2][i][j] = 0 for i in range(len(data)): inv = np.linalg.inv(self.matrix[i]) mn = np.matrix(self.Mean[i]).transpose() term = np.dot(inv, mn) # print(mn.transpose()) self.W[i].append(-0.5 * inv) self.W[i].append(term.transpose()) self.W[i].append(-0.5 * (np.log(np.linalg.det(self.matrix[i])) + np.dot(mn.transpose(), term)))
# K points are chosen randomly from data as mean mean = fun.set_random_mean(data, k) itr = 0 final_distortion = 999999.9 init_distortion = 0.0 while (abs(final_distortion - init_distortion) > error): cluster = [[] for i in range(k)] itr = itr + 1 init_distortion = final_distortion final_distortion = 0 for point in data: assigned_cluster, min_distortion = fun.get_Cluster(point, mean, k) cluster[assigned_cluster].append(point) final_distortion = final_distortion + min_distortion for i in range(k): mean[i] = fun.mean(cluster[i]) print("Iteration:", itr, " Distortion measure Error:", abs(final_distortion - init_distortion)) colour = fun.get_random_colour(k) for t in range(k): for data in cluster[t]: plt.plot(data[0], data[1], color=colour[t], marker='o', markersize=1) plt.savefig(sys.argv[1] + "_cluster_" + str(k) + ".png")
# Q8: What are the minimum, mean, median, and maximum salaries? import helper data = helper.read_salaries() salaries = [] # TODO: get non-empty salaries from data print('Minimum:', min(salaries)) print('Mean:', helper.mean(salaries)) print('Median:', helper.median(salaries)) print('Maximum:', max(salaries))
#!/usr/bin/env python from matplotlib import pyplot as plt from collections import Counter import random import helper xs = [random.uniform(0, 100) for _ in range(0, 100)] ys = [random.uniform(0, 100) for _ in range(0, 100)] min_xs = min(xs) max_xs = max(xs) length = len(xs) mean_xs = helper.mean(xs) variance_xs = helper.variance(xs) std_xs = helper.std(xs) median_xs = helper.median(xs) data_range_xs = helper.data_range(xs) print("min: %s" % min_xs) print("max: %s" % max_xs) print("varaince: %s" % variance_xs) print("std: %s" % std_xs) print("length: %s" % length) print("mean: %s" % mean_xs) print("median: %s" % median_xs) print("range: %s" % data_range_xs) covariance_xsys = helper.covariance(xs, ys) correlation_xsys = helper.correlation(xs, ys) print("covariance: %s" % covariance_xsys)