Example #1
0
def main():

    dsname = "kosarak-jaccard"

    # Load the dataset. This assumes you've run the download.sh script.
    dataset = open_dataset(os.path.join(ANNB_ROOT, f"{dsname}.hdf5"))
    print(
        f"Loaded {len(dataset.corpus)} vectors and {len(dataset.queries)} queries"
    )

    # for _ in range(3):
    #     loss = exact(dataset)
    #     print(f"exact: {loss}")
    #
    # for _ in range(3):
    #     loss = indexed(dataset)
    #     print(f"jaccard indexed: {loss}")
    #
    # for _ in range(3):
    #     loss = lsh(dataset, 165, 1, 1.5)
    #     print(f"lsh: {loss}")

    bands = [('bands', b) for b in range(10, 601, 10)]
    rows = [('rows', r) for r in range(1, 2)]
    candidates = [('candidates', c) for c in np.linspace(0, 10, 21)]

    combinations = list(map(dict, itertools.product(bands, rows, candidates)))
    metrics = np.zeros((len(combinations), 2))

    for i, params in enumerate(combinations):
        print(f"Running {i + 1} of {len(combinations)}: {params}...")
        try:
            (x, y) = lsh(dataset, **params)
            print(f"Loss = {(x, y)}")
            metrics[i] = [x, y]
            pmax = pareto_max(metrics)

            plt.title(f"{dsname} results")
            plt.scatter(metrics[:, 0], metrics[:, 1], label='All')
            plt.scatter(metrics[pmax, 0], metrics[pmax, 1], label='Optimal')
            plt.legend()
            plt.savefig(f"out/{dsname}.png")
            plt.clf()

            with open(f"out/{dsname}.txt", "w") as fp:
                for j in pmax:
                    d, m = combinations[j], metrics[j]
                    fp.write(
                        f"{d['num_bands']}, {d['num_rows']}, {m[0]}, {m[1]}\n")
        except Exception as e:
            print(e, file=sys.stderr)
            continue
        finally:
            print('-' * 100)
Example #2
0
    def initialize_dataset(self, data_source):
        filepath = utils.get_filepath(data_source, 'train')
        self.dataset = utils.open_dataset(filepath)
        self.slc_dataset = utils.open_slc_dataset(filepath, self.num_covs)

        self.num_classes = self.slc_dataset.num_classes()
Example #3
0

def frequency_count_class():
    # retrieve class name frequency (top 50)
    plt.subplots(figsize=(9, 5))
    z = sns.countplot(y="Class Name",
                      data=df,
                      order=df["Class Name"].value_counts()[:50].index)
    show_values_on_bars(z, "h", 0.3)
    plt.title("Frequency Count of Class Name")
    plt.xlabel("Count")
    plt.show()


if __name__ == '__main__':
    df = open_dataset()
    # many_trials()
    # age_plot()
    # department_age_plot()
    # counts_department_plot()
    # rating_age_plot()
    # rating_age_box_plot()
    # review_length_plot
    # clothing_recommended_boxplot()

    # pair_plot_department()
    missing_values_plot()
    distribution_ratings()
    distribution_of_reviews('Division Name', "Reviews in each Division")
    distribution_of_reviews('Department Name', "Reviews in each Department")
    distribution_of_reviews('Class Name', "Reviews in each Class")
Example #4
0
 def initialize_dataset(self, data_source):
     filepath = utils.get_filepath(data_source, 'train')
     self.dataset = utils.open_dataset(filepath)
     self.slc_dataset = utils.open_slc_dataset(filepath)
    "instance": "36Guadalajara30.txt",
    "obj": 57476,
    "time": "1.16"
}, {
    "instance": "37Guadalajara20.txt",
    "obj": 59493,
    "time": "2.29"
}]

datasets = [{"instance": "1Bari30.txt", "obj": 14600, "time": "0.06"}]

for dataset in datasets:
    print(dataset["instance"])

    # read dataset
    n, c, q, Q = utils.open_dataset("dataset/" + dataset["instance"])

    N = [i for i in range(1, n)]
    V = [0] + N
    A = [(i, j) for i in V for j in V]
    #m = 80

    # build initial solution
    source = Node(0, q[0])
    nodes = [Node(i, q[i]) for i in range(1, n)]

    network = Network(source, c, Q)
    network.add_nodes(nodes)

    routes, total_cost = network.build_route()