Example #1
0
def test_node(cores, d, m_out, L_out, m_in, L_in, k, alpha):
    """
    Test that a single node correctly executes in the distributed setting.
    It assumes middleware/tests/system_test.py is already running (set to a single node).
    This function tests the system in prediction mode.
    """

    H_out = L1LSH([(-5, 5)] * d)
    H_in = COSLSH(d)

    # Execute parallel code.
    table_log, queries = execute_node(
        cores,
        k,
        m_out,
        L_out,
        m_in,
        L_in,
        H_out,
        H_in,
        alpha,
        distributed=True,
        prediction=True)

    execute_node_logging(("testlog", cores), queries, table_log)
Example #2
0
def node_distributed_labeled_abp(node_id,
                                 n,
                                 d,
                                 port,
                                 cores,
                                 m_out,
                                 L_out,
                                 m_in,
                                 L_in,
                                 alpha,
                                 k,
                                 dataparallel=False):
    """
    Execute ABP prediction in a distributed fashion. This function runs a node.

    :param node_id: this node's ID (used for naming)
    :param port: the port to receive connections on
    :param cores: number of cores to use
    :param m_out: number of outer hash functions
    :param L_out: number of outer hash tables
    :param m_in: number of inner hash functions
    :param L_in: number of inner hash tables
    :param alpha: SLSH threshold
    :param k: number of neighbors
    """

    if dataparallel:
        base = "dataparallel-"
    else:
        base = ""

    # SLSH parameters.
    H_out = L1LSH([(40, 120)] * d)
    H_in = COSLSH(d)

    # Execute algorithm.
    table_log, queries = execute_node(cores,
                                      k,
                                      m_out,
                                      L_out,
                                      m_in,
                                      L_in,
                                      H_out,
                                      H_in,
                                      alpha,
                                      prediction=True,
                                      distributed=True,
                                      port=port,
                                      dataparallel=dataparallel)

    execute_node_logging(
        ("{}abp-mout{}-Lout{}-min{}-Lin{}-alpha{}-n{}-k{}".format(
            base, m_out, L_out, m_in, L_in, alpha, n, k), cores),
        queries,
        table_log,
        accparameters=(n, k, cores))
Example #3
0
def parallel_gaussian_test(d, n, cores):
    '''
    Execute speed test on a single node with cores cores.
    d is the dimensionality of the dataset, n the number of points to generate.
    The generated dataset is a isotropic gaussian.

    :param d: dataset dimensionality
    :param n: number of datapoints

    :return: nothing
    '''

    # Data generation.
    mean = 0
    std = 20
    X_shape = (d, n)
    X = np.random.normal(mean, std, X_shape)  # Generate dataset.

    # SLSH parameters.
    m_out = 50 * 2
    L_out = 24 * 4  # It has to be a multiple of 24 for it to scale decently.
    m_in = 30
    L_in = 10
    k = 1
    alpha = 0.1
    H_out = L1LSH([(-100, 100)] * d)
    H_in = COSLSH(d)

    # Queries generation.
    n_queries = 100
    queries = [
        Query(np.random.normal(mean, std, d)) for i in range(n_queries)
    ]  # Generate random gaussian queries as the dataset points.

    # Execute algorithm.
    table_log, queries = execute_node(cores,
                                      k,
                                      m_out,
                                      L_out,
                                      m_in,
                                      L_in,
                                      H_out,
                                      H_in,
                                      alpha,
                                      X=X,
                                      queries=queries)

    # Log output.
    execute_node_logging(("gaussian{}x{}_scaling_test".format(d, n), cores),
                         queries, table_log)

    return
Example #4
0
def node_distributed_test(node_id, port, d, n, cores):
    """
    Execute the node for a distibuted SLSH system.
    The dataset choice is done at the middleware.

    :param d: the dimensionality of a point
    :param cores: number of cores to use

    :return: nothing.
    """

    # SLSH parameters.
    m_out = 50 * 2
    L_out = 24 * 4  # It has to be a multiple of 24 for it to scale decently.
    m_in = 20
    L_in = 10
    k = 1
    alpha = 0.01
    H_out = L1LSH([(-100, 100)] * d)
    H_in = COSLSH(d)

    # Execute parallel code.
    table_log, queries = execute_node(cores,
                                      k,
                                      m_out,
                                      L_out,
                                      m_in,
                                      L_in,
                                      H_out,
                                      H_in,
                                      alpha,
                                      distributed=True,
                                      port=port)
    # Log output.
    execute_node_logging(
        ("distributed-node{}-gaussian{}x{}_scaling_test".format(
            node_id, d, n), cores), queries, table_log)

    return
Example #5
0
def parallel_labeled_abp_test(filename,
                              n,
                              cores,
                              m_out,
                              L_out,
                              m_in,
                              L_in,
                              alpha,
                              k,
                              exhaustive=False):
    '''
    Execute test on a single node with cores cores. It must fit into memory as is.
    filename is the name of the filename in the dataset folder.
    The generated dataset is a isotropic gaussian.

    :param filename: the name of the file
    :param cores: number of cores to use
    :param m_out: number of outer hash functions
    :param L_out: number of outer tables
    :param m_in: number of inner hash functions
    :param L_in: number of inner tables
    :param alpha: SLSH ratio
    :param k: number of neighbors to use

    :return: nothing
    '''

    X, labels, queries, query_labels = get_dataset_and_queries_from_pickles(
        filename)

    d = len(queries[0].point)
    n = len(labels)

    # SLSH parameters.
    H_out = L1LSH([(40, 120)] * d)
    H_in = COSLSH(d)

    # Execute algorithm.
    table_log, queries = execute_node(
        cores,
        k,
        m_out,
        L_out,
        m_in,
        L_in,
        H_out,
        H_in,
        alpha,
        X=X,
        queries=queries,
        labels=labels,
        exhaustive=exhaustive)

    accuracy = compute_accuracy(queries, query_labels)
    print("The prediction accuracy is: {}".format(accuracy))

    recall = compute_recall(queries, query_labels)
    print("The recall is: {}".format(recall))

    mcc = compute_mcc(queries, query_labels)
    print("The mcc is: {}".format(mcc))

    # Log output.
    if not exhaustive:
        execute_node_logging(
            ("abp-partial{}x{}_scaling_test".format(d, n), cores),
            queries,
            table_log,
            accuracy=accuracy,
            recall=recall,
            mcc=mcc,
            accparameters=(m_out, L_out, m_in, L_in, alpha, n, k))
    else:
        execute_node_logging(
            ("exhaustive-abp-partial{}x{}_scaling_test".format(d, n), cores),
            queries,
            table_log,
            accuracy=accuracy,
            recall=recall,
            mcc=mcc,
            accparameters=(n, k, cores),
            exhaustive=exhaustive)

    return