def top_bgp_data(flows): "Return top AS of GVB flow recarray in the form of a dictonnary." resume = {} flows_down = flows.compress(flows.direction == INDEX_VALUES.DOWN) flows_down_as = aggregate.aggregate(flows_down, 'asBGP', 'l3Bytes', sum) flows_down_as.sort(order='aggregation') for i in range(11): (resume['name_as_down_%d' % i], resume['vol_as_down_%d' % i]) = flows_down_as[-(i+1)] resume['total_other_down_as'] = np.sum(flows_down_as[:-10][:].aggregation) flows_down_web = flows_down.compress(flows_down.dscp == INDEX_VALUES.DSCP_WEB) flows_down_as_web = aggregate.aggregate(flows_down_web, 'asBGP', 'l3Bytes', sum) flows_down_as_web.sort(order='aggregation') for i in range(11): (resume['name_as_down_web_%d' % i], resume['vol_as_down_web_%d' % i]) = flows_down_as_web[-(i+1)] resume['total_other_as_down_web'] = np.sum(flows_down_as_web[:-10][:].aggregation) flows_down_other_stream = flows_down.compress(flows_down.dscp == INDEX_VALUES.DSCP_OTHER_STREAM) flows_down_as_other_stream = aggregate.aggregate(flows_down_other_stream, 'asBGP', 'l3Bytes', sum) flows_down_as_other_stream.sort(order='aggregation') for i in range(11): (resume['name_as_down_other_stream_%d' % i], resume['vol_as_down_other_stream_%d' % i]) \ = flows_down_as_other_stream[-(i+1)] resume['total_other_as_down_other_stream'] = np.sum(flows_down_as_other_stream[:-10][:].aggregation) flows_down_http_stream = flows_down.compress(flows_down.dscp == INDEX_VALUES.DSCP_HTTP_STREAM) flows_down_as_http_stream = aggregate.aggregate(flows_down_http_stream, 'asBGP', 'l3Bytes', sum) flows_down_as_http_stream.sort(order='aggregation') for i in range(11): (resume['name_as_down_http_stream_%d' % i], resume['vol_as_down_http_stream_%d' % i]) \ = flows_down_as_http_stream[-(i+1)] resume['total_other_as_down_http_stream'] = np.sum(flows_down_as_http_stream[:-10][:].aggregation) return resume
def fetch_data(flows): "Return a resume of GVB flow recarray in the form of a dictonnary." resume = {} vol_dir = aggregate.aggregate(flows, 'direction', 'l3Bytes', sum) resume['vol_up'] = vol_dir[0][1] resume['vol_down'] = vol_dir[1][1] resume['vol_tot'] = resume['vol_down'] + resume['vol_up'] vol_dscp = aggregate.aggregate(flows, 'dscp', 'l3Bytes', sum) resume['vol_down_web'] = extract_aggregated_field(vol_dscp, 'dscp', INDEX_VALUES.DSCP_WEB) resume['vol_down_http_stream'] = extract_aggregated_field(vol_dscp, 'dscp', INDEX_VALUES.DSCP_HTTP_STREAM) resume['vol_down_other_stream'] = extract_aggregated_field(vol_dscp, 'dscp', INDEX_VALUES.DSCP_OTHER_STREAM) #to check nb of flow value # nb_flows_dir = aggregate.aggregate(flows, 'direction', 'client_id', len) # nb_flows_up = list(nb_flows_dir[0])[1] # nb_flows_down = list(nb_flows_dir[1])[1] flows_down = flows.compress(flows.direction == INDEX_VALUES.DOWN) resume['nb_down_flows_tot'] = np.shape(flows_down)[0] flows_down_web = flows_down.compress(flows_down.dscp == INDEX_VALUES.DSCP_WEB ) flows_down_other_stream = flows_down.compress(flows_down.dscp == INDEX_VALUES.DSCP_OTHER_STREAM ) flows_down_http_stream = flows_down.compress(flows_down.dscp == INDEX_VALUES.DSCP_HTTP_STREAM ) resume['nb_down_flows_web'] = np.shape(flows_down_web)[0] resume['nb_down_flows_http_stream'] = np.shape(flows_down_http_stream)[0] resume['nb_down_flows_other_stream'] = np.shape(flows_down_other_stream)[0] resume['nb_clients_tot'] = np.shape(np.unique(flows_down.client_id))[0] resume['nb_clients_web'] = np.shape(np.unique(flows_down_web.client_id))[0] resume['nb_clients_http_stream'] = np.shape(np.unique(flows_down_http_stream.client_id))[0] resume['nb_clients_other_stream'] = np.shape(np.unique(flows_down_other_stream.client_id))[0] flows_down_1MB = flows_down.compress(flows_down.l3Bytes > 10**6 ) flows_down_1MB_dscp = aggregate.aggregate(flows_down_1MB, 'dscp', 'l3Bytes', len) flows_down_1MB_web = flows_down_1MB.compress(flows_down_1MB.dscp == INDEX_VALUES.DSCP_WEB ) flows_down_1MB_http_stream = flows_down_1MB.compress(flows_down_1MB.dscp == INDEX_VALUES.DSCP_HTTP_STREAM ) flows_down_1MB_other_stream = flows_down_1MB.compress(flows_down_1MB.dscp == INDEX_VALUES.DSCP_OTHER_STREAM ) resume['nb_clients_1MB_tot'] = np.shape(np.unique(flows_down_1MB.client_id))[0] resume['nb_clients_1MB_web'] = np.shape(np.unique(flows_down_1MB_web.client_id))[0] resume['nb_clients_1MB_http_stream'] = np.shape(np.unique(flows_down_1MB_http_stream.client_id))[0] resume['nb_clients_1MB_other_stream'] = np.shape(np.unique(flows_down_1MB_other_stream.client_id))[0] resume['nb_down_flows_1MB_tot'] = np.shape(flows_down_1MB)[0] resume['nb_down_flows_1MB_web'] = extract_aggregated_field(flows_down_1MB_dscp, 'dscp', INDEX_VALUES.DSCP_WEB) resume['nb_down_flows_1MB_http_stream'] = extract_aggregated_field(flows_down_1MB_dscp, 'dscp', INDEX_VALUES.DSCP_HTTP_STREAM) resume['nb_down_flows_1MB_other_stream'] = extract_aggregated_field(flows_down_1MB_dscp, 'dscp', INDEX_VALUES.DSCP_OTHER_STREAM) return resume
def process(input_file, output_name, var_map, calc=None, agg_areas=True): def _add_pct(data_frame): var_list = data_frame.columns.tolist() for var in GEO_COLUMNS + ['area']: if var in var_list: var_list.remove(var) return pct.add_percentages(data_frame, var_list, var_list[0]) def _export(data_frame, suffix, include_index=False): full_name = output_name + '_' + suffix + '.csv' data_frame.to_csv(full_name, index=include_index) print('Saved file: ' + full_name) return # Clean municipality data data = cd.clean_data(input_file) data_new = data[GEO_COLUMNS + sorted(var_map.keys())] data_new = data_new.rename(columns=var_map) # Perform any extra necessary calculations if calc: data_new = calc(data_new) # Aggregate if agg_areas: data_agg = agg.aggregate(data_new) data_ri = agg.aggregate(data_new, agg_var=(lambda x: True)) # Calculate percentages data_new_w_pct = _add_pct(data_new) if agg_areas: data_agg_w_pct = _add_pct(data_agg) data_ri_w_pct = _add_pct(data_ri.drop('area', axis=1)) # Export to CSV _export(data_new_w_pct, 'munis') if agg_areas: _export(data_agg_w_pct, 'areas', include_index=True) _export(data_ri_w_pct, 'state') return (data_new_w_pct, data_agg_w_pct, data_ri_w_pct) else: return (data_new_w_pct,)
def evaluate(): if args.evaluate == "slurm" and args.evaluate_checkpoint is None: ckpt = "./checkpoint-%d.p" % model.iteration save(ckpt) slurmcmd = "bash -c 'p=$(./slurmparams) sbatch $p --time=240 -J evaluate -o evaluate_" + str( model.iteration) + ".out om-run python " + " ".join( sys.argv) + " --evaluate-checkpoint=" + ckpt + "'" print(slurmcmd) os.system(slurmcmd) else: M.evaluate(model) evaluate_start = time.time() elbo, kl = getELBo() print("elbo:", elbo) print("KL:", kl) n_classification_samples = args.classification_samples classification_20_way, predictive = getClassification( 20, n_classification_samples) print("20-way Accuracy:", "%5.2f" % (classification_20_way * 100) + "%", flush=True) precision_20_way = math.sqrt(classification_20_way * (1 - classification_20_way) / n_classification_samples) #classification_100_way, _ = getClassification(100, n_classification_samples) #print("100-way Accuracy:", "%5.2f" % (classification_100_way*100) + "%", flush=True) #precision_100_way = math.sqrt(classification_100_way * (1-classification_100_way) / n_classification_samples) print("Evaluate took a total of:", int((time.time() - evaluate_start) / 60), "minutes") ev = { '20-way': classification_20_way, #'100-way':classification_100_way, 'precision-20-way': precision_20_way, #'precision-100-way':precision_100_way, #'predictive':predictive, 'ELBo': elbo, 'kl': kl, 'time': model.wallclock, 'iteration': model.iteration } with open("evaluate_%d.p" % model.iteration, "wb") as f: pickle.dump(ev, f) aggregate() #model.history.append(ev) return ev
def aggregate(self): ''' Aggregate/merge individual sample GTF files ''' r = self.results a = self.args samples = self.samples aggregate(samples, ref_gtf_file=a.ref_gtf_file, gtf_expr_attr=a.gtf_expr_attr, tmp_dir=r.tmp_dir, output_gtf_file=r.transfrags_gtf_file, stats_file=r.aggregate_stats_file) # update status and write to file self.status.aggregate = True self.status.write(self.results.status_file)
def calculate_statistics(results): """Calculates aggregate statistics for a set of NDT results. Calculates aggregate statistics (e.g. mean, median, std dev) for each relevant NDT metric (e.g. total test duration, s2c throughput). Args: results: A list of NdtResult instances for which to calculate aggregate statistics. Returns: A ResultStatistics instance that contains aggregate statistics for each NDT metric. """ total_duration = aggregate.aggregate(map(result_metrics.total_duration, results)) c2s_duration = aggregate.aggregate(map(result_metrics.c2s_duration, results)) s2c_duration = aggregate.aggregate(map(result_metrics.s2c_duration, results)) c2s_throughput = aggregate.aggregate(map( lambda result: result.c2s_result.throughput, results)) s2c_throughput = aggregate.aggregate(map( lambda result: result.s2c_result.throughput, results)) latency = aggregate.aggregate(map(lambda result: result.latency, results)) return ResultStatistics(total_duration, c2s_duration, s2c_duration, c2s_throughput, s2c_throughput, latency)
def perform_aggregation(df, freq): log.info("Generating summary tables") # Limit to values during working hours: df = ag.limit_by_hours(df) # Perform multi-column aggregation and # extract interesting stats from the aggregate table stats = ag.extract_stats(ag.aggregate(df, freq=freq)) # Iterate each month and tabulate each stats set table_list = [ag.tabulate(stats[month]) for month in list(stats.keys())] return zip(list(stats.keys()), table_list)
def fetch_data_http_stream_down(flow): "Return a resume of interesting HTTP streaming \ down flows characteristics." flow = flow.view(np.recarray) resume = {} flows_1MB = flow.compress(flow.l3Bytes > 10**6 ) vol_dir = aggregate.aggregate(flow, 'direction', 'l3Bytes', sum) #resume['vol_up'] = vol_dir[0][1] resume['vol_down'] = vol_dir[0][1] #resume['total_vol'] = (resume['vol_down'] + #resume['vol_up']) resume['nb_client'] = len(np.unique(flow.client_id)) resume['nb_flow'] = len(flow) resume['nb_client_1MB'] = len(np.unique(flows_1MB.client_id)) resume['nb_flow_1MB'] = len(flows_1MB) resume['mean_flow_size'] = np.mean(flow.l3Bytes) resume['median_flow_size'] = np.median(flow.l3Bytes) resume['max_flow_size'] = np.int64(np.max(flow.l3Bytes)) resume['mean_flow_duration'] = np.mean(flow.duration) resume['median_flow_duration'] = np.median(flow.duration) resume['max_flow_duration'] = np.max(flow.duration) resume['mean_flow_peak_rate'] = np.mean(80.0 * flow.peakRate) resume['median_flow_peak_rate'] = np.median(80.0 * flow.peakRate) resume['max_flow_peak_rate'] = np.max(80.0 * flow.peakRate) mean_rate = [8*x['l3Bytes']/(1000.0*x['duration']) for x in flow if x['duration']>0] resume['mean_flow_mean_rate'] = np.mean(mean_rate) resume['median_flow_mean_rate'] = np.median(mean_rate) resume['max_flow_mean_rate'] = np.max(mean_rate) mean_rate_1MB = [8*x['l3Bytes']/(1000.0*x['duration']) for x in flow if x['duration']>0 and x['l3Bytes'] > 10**6] resume['mean_flow_mean_rate_1MB'] = np.mean(mean_rate_1MB) resume['median_flow_mean_rate_1MB'] = np.median(mean_rate_1MB) resume['max_flow_mean_rate_1MB'] = np.max(mean_rate_1MB) resume['mean_flow_AR'] = \ compute_AT.compute_AT(flow.initTime)[0] resume['mean_flow_100_AR_per_cl'] = \ 100 * resume['mean_flow_AR'] / resume['nb_client'] return resume
def evaluateQuery(query, metadataDict): for stmnt_unformated in sqlparse.parse(query): statement = sqlparse.parse(sqlparse.format(str(stmnt_unformated)))[0] query_tokens = [] for x in statement.tokens: if re.match('([\s]+)', str(x)): continue else: query_tokens.append(str(x)) #print query_tokens distinct_flag = 0 distinct_flag2 = 0 if str(query_tokens[1]).lower() == "distinct": distinct_flag = 1 elif "distinct(" in query: distinct_flag2 = 1 #print distinct_flag2 colNames = query_tokens[1 + distinct_flag].split(",") #print colNames tableNames = query_tokens[3 + distinct_flag].split(",") #print tableNames #Error Handling error_handling(query, colNames, tableNames) #Checking for aggregate function func = ["min", "max", "count", "sum", "avg"] if any(x in query for x in func): aggregate(colNames[0], tableNames[0]) return #reading table data from file temp_table_data = [] table_data = [] cross = [] for t in tableNames: f = open(t + ".csv", 'r') temp_table_data = [line.replace('"', '').strip() for line in f] if len(table_data) == 0: table_data = temp_table_data else: for y in temp_table_data: for z in table_data: cross.append(z + "," + y) table_data = cross cross = [] #print table_data #Checking for Where Condition index = 4 + distinct_flag if len(query_tokens) > index: whereCond = "" whereCond = query_tokens[index][6:] #print whereCond table_data = whereEvaluate(whereCond, tableNames, table_data) #Projection table_data = project(colNames, tableNames, table_data) if distinct_flag == 1 or distinct_flag2 == 1: table_data = [table_data[0], distinct(table_data[1])] # for x in table_data: # print table_data #Printing Output print "Output:" header = "" flag = 0 for i in table_data[0]: if flag == 0: header += str(i) flag = 1 else: header = header + "," + str(i) print header for x in table_data[1]: flag = 0 valstr = "" if isinstance(x, list): for y in x: #print y if flag == 0: valstr = valstr + str(y) flag = 1 else: valstr = valstr + "," + str(y) #print valstr else: if flag == 0: valstr = valstr + str(x) flag = 1 else: valstr = valstr + "," + str(x) print valstr
def modify_and_fetch_data_named(resume, flows, name): "Modify a dictonnary resume, to extend it with a GVB array with a specifier name. " vol_dir = aggregate.aggregate(flows, 'direction', 'l3Bytes', sum) resume['vol_up_%s' % name] = vol_dir[0][1] resume['vol_down_%s' % name] = vol_dir[1][1] resume['vol_tot_%s' % name] = resume['vol_down_%s' % name] + resume['vol_up_%s' % name] vol_dscp = aggregate.aggregate(flows, 'dscp', 'l3Bytes', sum) resume['vol_down_web_%s' % name] = extract_aggregated_field(vol_dscp, 'dscp', INDEX_VALUES.DSCP_WEB) resume['vol_down_http_stream_%s' % name] = extract_aggregated_field(vol_dscp, 'dscp', INDEX_VALUES.DSCP_HTTP_STREAM) resume['vol_down_other_stream_%s' % name] = extract_aggregated_field(vol_dscp, 'dscp', INDEX_VALUES.DSCP_OTHER_STREAM) flows_down = flows.compress(flows.direction == INDEX_VALUES.DOWN) resume['nb_down_flows_tot_%s' % name] = np.shape(flows_down)[0] flows_down_web = flows_down.compress(flows_down.dscp == INDEX_VALUES.DSCP_WEB ) flows_down_other_stream = flows_down.compress(flows_down.dscp == INDEX_VALUES.DSCP_OTHER_STREAM ) flows_down_http_stream = flows_down.compress(flows_down.dscp == INDEX_VALUES.DSCP_HTTP_STREAM ) resume['nb_down_flows_web_%s' % name] = np.shape(flows_down_web)[0] resume['nb_down_flows_http_stream_%s' % name] = np.shape(flows_down_http_stream)[0] resume['nb_down_flows_other_stream_%s' % name] = np.shape(flows_down_other_stream)[0] resume['vol_down_per_flow_tot_%s' % name] = resume['vol_down_%s' % name] / resume['nb_down_flows_tot_%s' % name] resume['vol_down_per_flow_web_%s' % name] = resume['vol_down_web_%s' % name] / resume['nb_down_flows_web_%s' % name] resume['vol_down_per_flow_http_stream_%s' % name] = resume['vol_down_http_stream_%s' % name] / resume['nb_down_flows_http_stream_%s' % name] resume['vol_down_per_flow_other_stream_%s' % name] = resume['vol_down_other_stream_%s' % name] / resume['nb_down_flows_other_stream_%s' % name] resume['nb_clients_tot_%s' % name] = np.shape(np.unique(flows_down.client_id))[0] resume['nb_clients_web_%s' % name] = np.shape(np.unique(flows_down_web.client_id))[0] resume['nb_clients_http_stream_%s' % name] = np.shape(np.unique(flows_down_http_stream.client_id))[0] resume['nb_clients_other_stream_%s' % name] = np.shape(np.unique(flows_down_other_stream.client_id))[0] resume['vol_down_per_client_tot_%s' % name] = resume['vol_down_%s' % name] / resume['nb_clients_tot_%s' % name] resume['vol_down_per_client_web_%s' % name] = resume['vol_down_web_%s' % name] / resume['nb_clients_web_%s' % name] resume['vol_down_per_client_http_stream_%s' % name] = resume['vol_down_http_stream_%s' % name] / resume['nb_clients_http_stream_%s' % name] resume['vol_down_per_client_other_stream_%s' % name] = resume['vol_down_other_stream_%s' % name] / resume['nb_clients_other_stream_%s' % name] flows_down_1MB = flows_down.compress(flows_down.l3Bytes > 10**6 ) flows_down_1MB_dscp = aggregate.aggregate(flows_down_1MB, 'dscp', 'l3Bytes', len) flows_down_1MB_web = flows_down_1MB.compress(flows_down_1MB.dscp == INDEX_VALUES.DSCP_WEB ) flows_down_1MB_http_stream = flows_down_1MB.compress(flows_down_1MB.dscp == INDEX_VALUES.DSCP_HTTP_STREAM ) flows_down_1MB_other_stream = flows_down_1MB.compress(flows_down_1MB.dscp == INDEX_VALUES.DSCP_OTHER_STREAM ) resume['nb_clients_1MB_tot_%s' % name] = np.shape(np.unique(flows_down_1MB.client_id))[0] resume['nb_clients_1MB_web_%s' % name] = np.shape(np.unique(flows_down_1MB_web.client_id))[0] resume['nb_clients_1MB_http_stream_%s' % name] = np.shape(np.unique(flows_down_1MB_http_stream.client_id))[0] resume['nb_clients_1MB_other_stream_%s' % name] = np.shape(np.unique(flows_down_1MB_other_stream.client_id))[0] resume['nb_down_flows_1MB_tot_%s' % name] = np.shape(flows_down_1MB)[0] resume['nb_down_flows_1MB_web_%s' % name] = extract_aggregated_field(flows_down_1MB_dscp, 'dscp', INDEX_VALUES.DSCP_WEB) resume['nb_down_flows_1MB_http_stream_%s' % name] = extract_aggregated_field(flows_down_1MB_dscp, 'dscp', INDEX_VALUES.DSCP_HTTP_STREAM) resume['nb_down_flows_1MB_other_stream_%s' % name] = extract_aggregated_field(flows_down_1MB_dscp, 'dscp', INDEX_VALUES.DSCP_OTHER_STREAM) return resume
assessor = pd.read_sql( 'select * from aux.assessor_summary b join aux.addresses a using(address)', engine) acs = pd.read_sql('select geo_id2 as census_tract_id, * from aux.acs', engine) wt = pd.read_sql('select * from aux.ward_tracts', engine) for level in ['tracts', 'wards']: if level == 'wards': acs_level = wt.merge(acs, on='census_tract_id', how='left') index = 'ward_id' weight = acs_level['area'] else: acs_level = acs index = 'census_tract_id' weight = None acs_ag = a.aggregate(acs_level, columns.acs, weight, index) buildings_ag = a.aggregate(buildings, columns.building, index=index) assessor_ag = a.aggregate(assessor, columns.assessor, index=index) acs_ag.columns = ['acs_' + c for c in acs_ag.columns] assessor_ag.columns = ['assessor_' + c for c in assessor_ag.columns] buildings_ag.columns = ['buildings_' + c for c in buildings_ag.columns] ag = acs_ag.join(assessor_ag, how='outer') ag = ag.join(buildings_ag, how='outer') # to_sql using wrong datatype when writing index as such. can specify dtype with pandas .15.2 ag.reset_index(inplace=True) ag.to_sql(level, engine, if_exists='replace', schema='output', index=False)
def fetch_data_general(in_flow, filtered=False): "Return a resume of interesting \ flows characteristics." # ('nb_down_flows_1MB%s', 'Nb', '.4g'), # ('avg_vol_down_per_flow%s', 'Bytes', '.4g'), # ('avg_vol_down_per_client%s', 'Bytes', '.4g')] #new_flows = {} resume = {} for app in ('', '_WEB', '_HTTP_STREAM', '_OTHER_STREAM'): if app == '': new_flow = in_flow #['data%s' % app] else: dscp = get_dscp(app, in_flow, filtered=filtered) new_flow = in_flow.compress(in_flow['dscp'] == dscp) #['data%s' % app] resume['App: %s' % app] = '' new_flow = new_flow.view(np.recarray) new_flow_down = new_flow.compress(new_flow.direction == INDEX_VALUES.DOWN) new_flow_1MB = new_flow.compress(new_flow.l3Bytes > 10**6) new_flow_down_1MB = new_flow_down.compress(new_flow_down.l3Bytes > 10**6) vol_dir = aggregate.aggregate(new_flow, 'direction', 'l3Bytes', sum) # resume['vol_up%s' % app] = vol_dir[0][1] # resume['vol_down%s' % app] = vol_dir[1][1] try: resume['vol_up%s' % app] = vol_u = vol_dir[0][1] except IndexError: resume['vol_up%s' % app] = vol_u = float(0) try: resume['vol_down%s' % app] = vol_d = vol_dir[1][1] except IndexError: resume['vol_down%s' % app] = vol_d = float(0) resume['total_vol%s' % app] = vol_u + vol_d resume['nb_flows_down%s' % app] = nb_fl = len(new_flow_down) resume['nb_flows_down_1MB%s' % app] = nb_fl_1mb = len(new_flow_down_1MB) resume['ratio_nb_flows%s' % app] = int(100 * nb_fl_1mb / float(nb_fl)) \ if (resume['nb_flows_down%s' % app] != 0) else 0 resume['nb_client_down%s' % app] = nb_cl = len(np.unique( new_flow_down.client_id)) resume['avg_vol_down_per_client%s' % app] = vol_d / nb_cl \ if (nb_cl != 0) else 0 resume['avg_vol_down_per_flow%s' % app] = vol_d / nb_fl \ if (nb_fl != 0) else 0 resume['avg_vol_up_per_client%s' % app] = vol_u / nb_cl \ if (nb_cl != 0) else 0 resume['avg_vol_up_per_flow%s' % app] = vol_u / nb_fl \ if (nb_fl != 0) else 0 resume['nb_client_down_1MB%s' % app] = len(np.unique( new_flow_down_1MB.client_id)) resume['avg_nb_flows_per_client%s' % app] = nb_fl / float(nb_cl) \ if nb_cl !=0 else 0 resume['avg_nb_flows_1MB_per_client%s' % app] = nb_fl_1mb / float(nb_cl) \ if nb_cl !=0 else 0 # resume['nb_flow'] = len(flow) # resume['nb_client_1MB'] = len(np.unique(flows_1MB.client_id)) # resume['nb_flow_1MB'] = len(flows_1MB) # resume['mean_flow_size'] = np.mean(flow.l3Bytes) # resume['median_flow_size'] = np.median(flow.l3Bytes) # resume['max_flow_size'] = np.int64(np.max(flow.l3Bytes)) # resume['mean_flow_duration'] = np.mean(flow.duration) # resume['median_flow_duration'] = np.median(flow.duration) # resume['max_flow_duration'] = np.max(flow.duration) # resume['mean_flow_peak_rate'] = np.mean(80.0 * flow.peakRate) # resume['median_flow_peak_rate'] = np.median(80.0 * flow.peakRate) # resume['max_flow_peak_rate'] = np.max(80.0 * flow.peakRate) # mean_rate = [8*x['l3Bytes']/(1000.0*x['duration']) # for x in flow if x['duration']>0] # resume['mean_flow_mean_rate'] = np.mean(mean_rate) # resume['median_flow_mean_rate'] = np.median(mean_rate) # resume['max_flow_mean_rate'] = np.max(mean_rate) # mean_rate_1MB = [8*x['l3Bytes']/(1000.0*x['duration']) # for x in flow if x['duration']>0 # and x['l3Bytes'] > 10**6] # resume['mean_flow_mean_rate_1MB'] = np.mean(mean_rate_1MB) # resume['median_flow_mean_rate_1MB'] = np.median(mean_rate_1MB) # resume['max_flow_mean_rate_1MB'] = np.max(mean_rate_1MB) # resume['mean_flow_AR'] = \ # compute_AT.compute_AT(flow.initTime)[0] # resume['mean_flow_100_AR_per_cl'] = \ # 100 * resume['mean_flow_AR'] / resume['nb_client'] return resume
import sys from aggregate import aggregate bucket_size = 1 line_filter = None if len(sys.argv) > 1: bucket_size = int(sys.argv[1]) if len(sys.argv) > 2: line_filter = sys.argv[2] res = aggregate(sys.stdin, bucket_size, line_filter) for datetime, count in res: print(datetime.strftime("%s"), count, sep="\t")
def strollr2d_imagedenoising(data, param): """ This is the entrypoint function for the imagedenoising. The input parameters are defined as data: A python object containing the image data, the structure contains two fields - noisy: a*b size gray-scale image matrix for denoising. - oracle (optional): a*b size gray-scale matrix as the ground truth for calculation of PSNR. param: Structure containing parameters for the algorithm. """ try: noisy = data['noisy'] oracle = data.get('oracle', None) sig = param['sig'] dim = param['dim'] # Kronecker product # dct(np.eye(8), axis=0) is the cosine transform of order 8 W = np.kron(dct(np.eye(dim), axis=0, norm='ortho'), dct(np.eye(dim), axis=0, norm='ortho')) threshold = param['TLthr0'] * sig param['threshold'] = threshold thr = param['thr0'] * sig param['thr'] = thr print('[+] Parameters loaded') noisy, param = image_enlarge_tl(noisy, param) print('[+] Image Enlarged for TL ONLY') patchNoisy = image_patch(noisy, dim) print('[+] Image patch done') patches = patchNoisy # patchNoisy is a 2D numpy array numTensorPatch = patchNoisy.shape[1] param['numTensorPatch'] = numTensorPatch W, sparseCode, nonZeroTable = tl_approximation(patches, W, param) print('[+] Module TL approx done') nonZeroTable[nonZeroTable == 0] = param['zeroWeight'] TLsparsityWeight = np.divide(1, nonZeroTable) blk_arr, _, blk_pSize = bm_fix(patches, param) print('[+] Module BM fix done') blk_arr = np.asarray(blk_arr) blk_pSize = np.asarray(blk_pSize) LRpatch, LRweights, LRrankWeight = lr_approximation(patches, blk_arr, blk_pSize, param) print('[+] Module LRapprox done') nonZerosLR = LRweights > 0 LRrankWeight[nonZerosLR] = np.divide(LRrankWeight[nonZerosLR], LRweights[nonZerosLR]) patchRecon = f1_reconstruction(sparseCode, W, LRpatch, LRweights, patches, param, TLsparsityWeight, LRrankWeight) print('[+] Module F1 Reconstruction done') Xr = aggregate(patchRecon, TLsparsityWeight, param) plt.imshow(Xr, cmap='gray', vmin=0, vmax=255) plt.show() psnrXr = PSNR(Xr - oracle) print('[+] PSNR value is : {}'.format(psnrXr)) return Xr, psnrXr except KeyError as e: print('The parameter provided to strollr2d are not valid: {}'.format(e)) sys.exit(1)
import sys import matplotlib.pyplot as plt from aggregate import aggregate bucket_size = 1 if len(sys.argv) > 1: bucket_size = int(sys.argv[1]) res = aggregate(sys.stdin, bucket_size) plt.plot(*zip(*res)) plt.show()
def execution(result): # 获取起止日期 result = result.split('~') date_begin = result[0] date_end = result[-1] date_end = datetime.datetime.strptime(date_end, "%Y-%m-%d") # 获取所有选中日期 day_range = [] date = datetime.datetime.strptime(date_begin, "%Y-%m-%d") while date <= date_end: day_range.append(date.strftime("%Y-%m-%d")) date = date + datetime.timedelta(days=1) # download the raw data date = datetime.datetime.strptime(date_begin, "%Y-%m-%d") web.update(date.strftime("%Y-%m-%d")) while (date + datetime.timedelta(days=7)) <= date_end: date = date + datetime.timedelta(days=7) web.update(date.strftime("%Y-%m-%d")) # 删除选中日期以外的文件 path = '../Download/' word = 'BusLocation' Bus_dir = web.search_dir(path, word) for i in Bus_dir: web.del_file(i, day_range) word = 'Session' Session_dir = web.search_dir(path, word) for i in Session_dir: web.del_file(i, day_range) # service recognition word = 'BusLocation' Bus_dir = web.search_dir(path, word) for filedir_bus in Bus_dir: #### Modify folderpath #### aggregate.aggregate(filedir_bus) # ridership analysis database = "ridership" user = "******" password = "******" host = "localhost" port = "5432" ridership.ridership(database, user, password, host, port) # round trip time folderpath = '../output' database = "RTT" extractMobilityInfo.run_RTT(folderpath) upload.RTT_upload(database, user, password, host, port) # interarrival database = "inter_arrival" mobilityInterval.run_interarrival(folderpath) upload.inter_arrival_upload(database, user, password, host, port) web.clean() print('COMPLETED!') return
"""This module pulls data from each of the sources and generates the aggregate projections for the week.""" import sys import aggregate from scrapers import dailyfantasynerd, espn, nfl, numberfire, rotogrinders if __name__ == "__main__": year = sys.argv[1] week = sys.argv[2] nfl.scrape(week, year) espn.scrape(week, year) numberfire.scrape(week, year) rotogrinders.scrape(week, year) dailyfantasynerd.scrape(week, year) aggregate.aggregate(week, year)
def vol_per_client(data, as_list=None, as_excluded=None, on_list=False, field='l3Bytes', func=sum, output_path = 'rapport/client_ok', title='', prefix = ''#, # trace_list = ('ADSL_2008', 'FTTH_2008', 'ADSL_nov_2009', 'FTTH_nov_2009', # 'ADSL_dec_2009', 'FTTH_dec_2009') ): """Plots volumes per clients according to AS match list: use * for all ASes. flag 'on_list' works only on AS_list (included AS) and AS_list elements are filters and names: see exemples Use as: data = tools.load_hdf5_data.load_h5_file('hdf5/lzf_data.h5') tools.plot_per_client.vol_per_client(data) tools.plot_per_client.vol_per_client(data, ('*', tools.INDEX_VALUES.AS_YOUTUBE)) tools.plot_per_client.vol_per_client(data, as_excluded=tools.INDEX_VALUES.AS_YOUTUBE +tools.INDEX_VALUES.AS_YOUTUBE_EU, title='Other Streams', prefix='OTHER_') tools.plot_per_client.vol_per_client(data_streaming, as_list=((tools.INDEX_VALUES.AS_YOUTUBE, 'YOUTUBE'), (tools.INDEX_VALUES.AS_YOUTUBE_EU, 'YOUTUBE_EU')), title='YT and YT-EU Streams', prefix='YT_YT_EU_', on_list=True, output_path='rapport/client_ok') tools.plot_per_client.vol_per_client(data, as_list=((tools.INDEX_VALUES.AS_YOUTUBE, 'YOUTUBE'), (tools.INDEX_VALUES.AS_YOUTUBE_EU, 'YOUTUBE_EU'), (tools.INDEX_VALUES.AS_GOOGLE, 'GOOGLE')), title='YT and GOO Streams', prefix='YT_GOO', on_list=True, output_path='rapport/client_ok') """ client_vol = {} # data collection args = [] # TODO: AS list for trace in sorted([x for x in data.keys() if '_GVB' in x]): print 'process trace: ', trace filtered_data_dict = defaultdict(dict) if on_list: filtered_data_dict[trace] = filter_array_list(data, trace, as_list, as_excluded) else: filtered_data_dict[trace][trace] = filter_array(data[trace], 'asBGP', as_list, as_excluded) for name in sorted(filtered_data_dict[trace]): filtered_data = filtered_data_dict[trace][name] # at least MIN_NB_FLOWS flows per data to plot if len(filtered_data) < MIN_NB_FLOWS: continue client_vol[name] = aggregate.aggregate(filtered_data, 'client_id', field, func) # construct plot args if as_list: title_name = format_as_title(name) else: title_name = format_title(name).rstrip(' GVB') args.append((title_name, client_vol[name]['aggregation'])) # plot individual repartitions pylab.clf() cdfplot.repartplotdata(client_vol[name]['aggregation'], _title='%s Volume per Client for %s' % (title, trace), _ylabel='Percentage of Downstream Volume', _loc=0) cdfplot.setgraph_loglog() pylab.savefig(output_path + '/%s%s_repart_volume_per_client.pdf' % (prefix, trace)) # plot CDF pylab.clf() cdfplot.cdfplotdataN(args, _title='%s Volume per Client' % title, _xlabel='Downstream Volume in Bytes', _loc=0) pylab.savefig(output_path + '/%sCDF_volume_per_client.pdf' % prefix) # plot global repartition pylab.clf() cdfplot.repartplotdataN(args, _title='%s Volume per Client' % title, _ylabel='Percentage of Downstream Volume', _loc=0) pylab.savefig(output_path + '/%srepart_volume_per_client.pdf' % prefix)