def process_backend_data(self): """ Question 2: implement this method """ final_counts = compute_histogram_bins(self.backend_database[0], BINS) for durations in self.backend_database[1:]: counts = compute_histogram_bins(durations, BINS) final_counts = { key: final_counts[key] + counts[key] for key in final_counts.keys() } print(final_counts) return final_counts
def process_backend_data(self): """ Question 2: This method should do all necessary processing to return the connection durations histogram bins counts. Don't call `plot_histogram` in this method, we just want to compute the histogram bins counts! """ flat_dur = [ item for sublist in self.backend_database for item in sublist ] compute_histogram_bins(flat_dur, BINS)
def process_backend_data(self): """ Question 4: implement this method """ durations = [] for i in range(len(self.network)): durations.extend(self.network[i].send_data_to_backend()) return compute_histogram_bins(data=durations, bins=BINS)
def process_backend_data(self): """ Question 2: implement this method """ # We need to flatten the database which consists of a list of lists connection_times = [ item for sublist in self.backend_database for item in sublist ] return compute_histogram_bins(connection_times, BINS)
def send_data_to_backend(self): """ Question 4: implement this method """ data_to_compute = [ connection_time for connection_time in self.peer_pool.values() ] # We suppose that every peer and the backend use the same BINS binned_times, _ = compute_histogram_bins(data_to_compute, BINS) return binned_times
def process_backend_data(self): """ Question 4: implement this method """ durations_ = random.sample(self.backend_database, int(self.number_of_peers / 2)) k = [i for j in durations_ for i in (json.loads(j).values())] data, bins, counts = compute_histogram_bins(k, BINS) return (data, bins, counts)
def process_backend_data(self): """ Question 4: implement this method """ datas = [] for peer in random.sample(self.network, min(len(self.network), 10000)): datas += peer.send_data_to_backend() ## we just take at most 10000 peers to work with their peer pools result = compute_histogram_bins(datas, BINS) for i in range(len(result[0])): result[0][i] = result[0][i] / 2
def process_backend_data(self): """ Question 2: implement this method """ data = [] for list in self.backend_database: data.extend(list) data = np.array(data) k = int(2 * np.power(data.size, 1 / 3)) bins = [i * 600 / k for i in range(k + 1)] return compute_histogram_bins(data=data, bins=bins)
def process_backend_data(self): """ Question 4: implement this method This method randomly samples half of the input data. This might not be the EXACT distribution but it preserves the shape and distribution of the data leading to a GOOD representation. Later it returns the histogram_bin_counts which can used for plot_histogram. """ durations_ = random.sample(self.backend_database, int(self.number_of_peers/2)) # Sampling half of the data. When there is a millions of peers we can sample it and still gives a good distribution of the data k = [i for j in durations_ for i in (json.loads(j).values())] data, bins, counts = compute_histogram_bins(k, BINS) sns.distplot(data, hist=False) plt.show() plt.title('Peers:{} - Pool Size:{}'.format(int(self.number_of_peers/2), self.max_peer_pool_size)) # Setting a Title for the plot based on inputs # # plot_histogram((data, bins, counts)) return (data, bins, counts)
def process_backend_data(self): """ Question 2: This method should do all necessary processing to return the connection durations histogram bins counts. Don't call `plot_histogram` in this method, we just want to compute the histogram bins counts! """ # contains list of lists of all connection durations for each peer in the network durations_ = [ i for j in self.backend_database for i in j ] # Self.backend_database is a list of lists. Taking all these values and placing them into a list data, bins, counts = compute_histogram_bins( durations_, BINS ) # Computing histogram bins using histogram.py by the duration data produced return (data, bins, counts)
def process_backend_data(self): """ Question 2: This method should do all necessary processing to return the connection durations histogram bins counts. Don't call `plot_histogram` in this method, we just want to compute the histogram bins counts! """ datas = [] for peer in self.network: datas += peer.send_data_to_backend() result = compute_histogram_bins(datas, BINS) for i in range(len(result[0])): result[0][i] = result[0][ i] / 2 #this is to avoid counting a connection twice (between a peer "a" and a peer "b", which occurs in the conncection of "a" and "b")
def process_backend_data(self): """ Question 2: This method should do all necessary processing to return the connection durations histogram bins counts. Don't call `plot_histogram` in this method, we just want to compute the histogram bins counts! """ # contains list of lists of all connection durations for each peer in the network durations_ = [ i for j in self.backend_database for i in j ] # Self.backend_database is a list of lists. Taking all these values and placing them into a list data, bins, counts = compute_histogram_bins( durations_, BINS ) # Computing histogram bins using histogram.py by the duration data produced sns.distplot(data, hist=False) # Plotting the distribution of data plt.title('Peers:{} - Pool Size:{}'.format(self.number_of_peers, self.max_peer_pool_size) ) # Setting a Title for the plot based on inputs plt.show() return ( data, bins, counts ) # Returns a histogram_bin_counts tuple which can be used in plot_histogram function
def send_data_to_backend(self): """ Question 4: implement this method """ return compute_histogram_bins(self.peer_pool.values(), BINS)
def process_backend_data(self): histogram_bins = compute_histogram_bins( generate_network(self), PeerQ2().send_data_to_backend(self)) sending_backend_data = [histo_bin[1] for histo_bin in histogram_bins]
def process_backend_data(self): compute_histogram_bins() """