def main(): dsname = "kosarak-jaccard" # Load the dataset. This assumes you've run the download.sh script. dataset = open_dataset(os.path.join(ANNB_ROOT, f"{dsname}.hdf5")) print( f"Loaded {len(dataset.corpus)} vectors and {len(dataset.queries)} queries" ) # for _ in range(3): # loss = exact(dataset) # print(f"exact: {loss}") # # for _ in range(3): # loss = indexed(dataset) # print(f"jaccard indexed: {loss}") # # for _ in range(3): # loss = lsh(dataset, 165, 1, 1.5) # print(f"lsh: {loss}") bands = [('bands', b) for b in range(10, 601, 10)] rows = [('rows', r) for r in range(1, 2)] candidates = [('candidates', c) for c in np.linspace(0, 10, 21)] combinations = list(map(dict, itertools.product(bands, rows, candidates))) metrics = np.zeros((len(combinations), 2)) for i, params in enumerate(combinations): print(f"Running {i + 1} of {len(combinations)}: {params}...") try: (x, y) = lsh(dataset, **params) print(f"Loss = {(x, y)}") metrics[i] = [x, y] pmax = pareto_max(metrics) plt.title(f"{dsname} results") plt.scatter(metrics[:, 0], metrics[:, 1], label='All') plt.scatter(metrics[pmax, 0], metrics[pmax, 1], label='Optimal') plt.legend() plt.savefig(f"out/{dsname}.png") plt.clf() with open(f"out/{dsname}.txt", "w") as fp: for j in pmax: d, m = combinations[j], metrics[j] fp.write( f"{d['num_bands']}, {d['num_rows']}, {m[0]}, {m[1]}\n") except Exception as e: print(e, file=sys.stderr) continue finally: print('-' * 100)
def initialize_dataset(self, data_source): filepath = utils.get_filepath(data_source, 'train') self.dataset = utils.open_dataset(filepath) self.slc_dataset = utils.open_slc_dataset(filepath, self.num_covs) self.num_classes = self.slc_dataset.num_classes()
def frequency_count_class(): # retrieve class name frequency (top 50) plt.subplots(figsize=(9, 5)) z = sns.countplot(y="Class Name", data=df, order=df["Class Name"].value_counts()[:50].index) show_values_on_bars(z, "h", 0.3) plt.title("Frequency Count of Class Name") plt.xlabel("Count") plt.show() if __name__ == '__main__': df = open_dataset() # many_trials() # age_plot() # department_age_plot() # counts_department_plot() # rating_age_plot() # rating_age_box_plot() # review_length_plot # clothing_recommended_boxplot() # pair_plot_department() missing_values_plot() distribution_ratings() distribution_of_reviews('Division Name', "Reviews in each Division") distribution_of_reviews('Department Name', "Reviews in each Department") distribution_of_reviews('Class Name', "Reviews in each Class")
def initialize_dataset(self, data_source): filepath = utils.get_filepath(data_source, 'train') self.dataset = utils.open_dataset(filepath) self.slc_dataset = utils.open_slc_dataset(filepath)
"instance": "36Guadalajara30.txt", "obj": 57476, "time": "1.16" }, { "instance": "37Guadalajara20.txt", "obj": 59493, "time": "2.29" }] datasets = [{"instance": "1Bari30.txt", "obj": 14600, "time": "0.06"}] for dataset in datasets: print(dataset["instance"]) # read dataset n, c, q, Q = utils.open_dataset("dataset/" + dataset["instance"]) N = [i for i in range(1, n)] V = [0] + N A = [(i, j) for i in V for j in V] #m = 80 # build initial solution source = Node(0, q[0]) nodes = [Node(i, q[i]) for i in range(1, n)] network = Network(source, c, Q) network.add_nodes(nodes) routes, total_cost = network.build_route()