def test_k(file_name, k_list): frame = pd.read_csv(file_name) headers = list(frame.columns.values)[1:-1] avg_loss_list = [] for k in k_list: params = Parameters() params.k = k params.delta = 10 params.beta = 10 params.mu = 10 stream = CASTLE(handler, headers, "FareAmount", params) for (_, row) in frame.iterrows(): stream.insert(row) clusters = stream.big_gamma cum_loss = 0 for cluster in clusters: cum_loss += cluster.information_loss(stream.global_ranges) avg_loss = cum_loss / len(clusters) avg_loss_list.append(avg_loss) plot_average_loss_1D(avg_loss_list, k_list, "k")
def generate_parameters(args): """Generates some random parameters for the program to use Args: args: The arguments supplied to the program Returns: Randomly generate parameters """ p = Parameters(args) p.k = np.random.randint(1, 100) if not args.k else args.k p.delta = np.random.randint(1, 100) if not args.delta else args.delta p.beta = np.random.randint(1, 100) if not args.beta else args.beta p.mu = np.random.randint(1, 100) if not args.mu else args.mu p.l = np.random.randint(1, 10) if not args.l else args.l return p
def test_beta_mu(file_name, beta_list, mu_list): frame = pd.read_csv(file_name) headers = list(frame.columns.values)[1:-1] info_loss = [] for mu in mu_list: print("mu: {}".format(mu)) avg_loss_list = [] for beta in beta_list: print("beta: {}".format(beta)) params = Parameters() params.k = 10 params.delta = 200 params.beta = beta params.mu = mu params.l = 1 params.dp = False stream = CASTLE(handler, headers, "FareAmount", params) for (_, row) in frame.iterrows(): stream.insert(row) clusters = stream.big_gamma cum_loss = 0 for cluster in clusters: cum_loss += cluster.information_loss(stream.global_ranges) avg_loss = cum_loss / len(clusters) avg_loss_list.append(avg_loss) info_loss.append(np.array(avg_loss_list)) X, Y = np.meshgrid(beta_list, mu_list) plot_average_loss_2D(np.array(info_loss), X, "Beta", Y, "Mu")