예제 #1
0
def gemm_recompute(A, B, thresh, s3_key):
    """    
    Compute A * B.T via speculative execution (i.e., recompute straggling workers).

    Params
    ======
    A : numpywren.matrix.BigMatrix
        First input matrix.
        
    B : numpywren.matrix.BigMatrix
        Second input matrix.
        
    thresh : float (in [0, 1])
        Fraction of workers that should finish before recomputing.
        
    s3_key : str
        Storage key for output matrix.

    Returns
    =======
    C : matrix.BigMatrix
        Resultant matrix product.
        
    t_comp : float
        Time for thresh percentage of the workers to finish.
        
    t_straggle : float
        Time for the remaining 1 - thresh percentage of the workers to finish after
        we begin recomputing.
    """
    if not (0 <= thresh <= 1):
        raise ValueError("thresh must be in the interval [0, 1]")
        
    """Initialize output matrix"""
    num_col_blocks = A.shape[1] // A.shard_sizes[1]
    shard_sizes = (A.shard_sizes[0], B.shard_sizes[0])
    C = matrix.BigMatrix(s3_key, shape=(A.shape[0], B.shape[0]), shard_sizes=shard_sizes, autosqueeze=False, write_header=True)
    C.delete() # Only needed if you reuse the same s3_key (if the blocks already exist, no work will be done here)

    """Stage 1: Compute "thresh" percentage of the results"""
    t_comp_start = time.time()
    pwex = pywren.lambda_executor()
    futures, num_done = pwex.map(lambda x: pywren_gemm(x, A, B, C, num_col_blocks), C.block_idxs), 0
    while num_done < thresh * len(futures):
        fs_dones, _ = pywren.wait(futures, return_when=ANY_COMPLETED)
        num_done = len(fs_dones)
    t_comp = time.time() - t_comp_start # Total stage 1 time

    """Stage 2: Recompute straggling workers (the last 1-thresh percent of jobs)"""
    t_straggle_start = time.time()
    futures_stragglers = pwex.map(lambda x: pywren_gemm(x, A, B, C, num_col_blocks), C.block_idxs_not_exist)
    while len(C.block_idxs_not_exist) > 0: 
        pywren.wait(futures, return_when=ALWAYS)
        pywren.wait(futures_stragglers, return_when=ALWAYS)
    t_straggle = time.time() - t_straggle_start # Total stage 2 time
    
    return C, t_comp, t_straggle
예제 #2
0
    parser.add_argument('test_key', type=str, help="train_key")
    parser.add_argument('--train_labels',
                        type=str,
                        help="train_labels",
                        default="y_train_fishervector.npy")
    parser.add_argument('--test_labels',
                        type=str,
                        help="test_labels",
                        default="y_test_fishervector.npy")
    args = parser.parse_args()

    y_train = np.load(args.train_labels)
    y_test = np.load(args.test_labels)

    K_train = matrix.BigSymmetricMatrix(args.train_key, bucket="pictureweb")
    K_test = matrix.BigMatrix(args.test_key, bucket="pictureweb")
    model = matrix.BigMatrix(args.model_key,
                             bucket="pictureweb",
                             shape=(K_train.shape[0],
                                    int(np.max(y_train) + 1)),
                             shard_sizes=(4096, 1000),
                             write_header=True)

    config = wc.default()
    config['runtime']['s3_bucket'] = 'pictureweb'
    config['runtime'][
        's3_key'] = 'pywren.runtime/pywren_runtime-3.6-pictureweb.tar.gz'
    config['standalone']['sqs_queue_name'] = 'pictureweb'
    print("please launch some standalone instances for this script....")
    pwex = pywren.standalone_executor(config=config)
    print("Evaluating Train")
def code_2D(A, num_parity_blocks, thres=1):
    assert (len(A._block_idxs(0)) % num_parity_blocks == 0)
    shard_size = A.shard_sizes[0]
    coded_shape = (A.shape[0] + num_parity_blocks * A.shard_sizes[0],
                   A.shape[1])
    coding_length = int(np.ceil(len(A._block_idxs(0)) / num_parity_blocks))
    coding_fn2D = make_coding_function2D(A, coding_length)

    coded_2D_shape = (
        A.shape[0] +
        (coding_length + 1 + num_parity_blocks) * A.shard_sizes[0], A.shape[1])
    A_coded_2D = matrix.BigMatrix(A.key + "CODED2D_{0}_{1}_{2}".format(
        A.shape[0], shard_size, num_parity_blocks),
                                  shape=coded_2D_shape,
                                  shard_sizes=A.shard_sizes,
                                  write_header=True,
                                  parent_fn=coding_fn2D)

    # if list(set(A_coded_2D.block_idxs_not_exist) - set(A.block_idxs_exist)) == []:
    #     return A_coded_2D

    last_block = max(A._block_idxs(0))
    columns = A_coded_2D._block_idxs(1)
    rows = A_coded_2D._block_idxs(0)
    to_read = []
    blocks_exist = A_coded_2D.block_idxs_exist
    for row in rows:
        if (row <= last_block): continue
        for column in columns:
            if (row, column) in blocks_exist:
                continue
            else:
                to_read.append((row, column))

    print("Number of parity blocks", len(to_read))

    num_parities_1D = coding_length * len(A._block_idxs(1))
    to_read_phase1 = to_read[0:num_parities_1D]
    to_read_phase2 = to_read[num_parities_1D:]

    def get_block_wrapper(x):
        A_coded_2D.get_block(*x)
        return 0

    #### For 2D ENCODING of A, uncomment
    pwex = pywren.lambda_executor()
    t_enc1 = time.time()
    futures2 = pwex.map(get_block_wrapper, to_read_phase1)
    result_count = 0
    fs_dones = []
    while (result_count < thres * len(to_read_phase1)):
        fs_dones, fs_notdones = pywren.wait(futures2, 2)
        result_count = len(fs_dones)
        print(result_count)
        time.sleep(3)
    for f in fs_dones:
        try:
            f.result()
        except Exception as e:
            print(e)
            pass
    t_enc1 = time.time() - t_enc1
    print("Encoding phase 1 time", t_enc1)

    t_enc2 = time.time()
    futures2 = pwex.map(get_block_wrapper, to_read_phase2)
    result_count = 0
    while (result_count < thres * len(to_read_phase2)):
        fs_dones, fs_notdones = pywren.wait(futures2, 2)
        result_count = len(fs_dones)
        print(result_count)
        time.sleep(3)
    for f in fs_dones:
        try:
            f.result()
        except Exception as e:
            print(e)
            pass
    t_enc2 = time.time() - t_enc2
    print("Encoding phase 2 time", t_enc2)
    print("Total ENCODING time", t_enc1 + t_enc2)

    # a = list(set(A_coded_2D.block_idxs_not_exist) - set(A.block_idxs_exist))
    # print("Still to encode", a)
    return A_coded_2D
예제 #4
0
def start_encode_mtx(M, blocks_per_parity, s3_key):
    """ 
    Apply a (blocks_per_parity + 1, blocks_per_parity) MDS code to the matrix M every 
    blocks_per_parity rows by summing up the previous blocks_per_parity rows. 
    
    Params
    ======
    M : numpywren.matrix.BigMatrix
        The matrix to encode.
    
    blocks_per_parity : int
        The number of input blocks sum up in creating each parity block. Note that as
        this number increases, less redundancy is provided.
        
    s3_key : str
        The storage key for Amazon S3.
        
    Returns
    =======
    M_coded : numpywren.matrix.BigMatrix
        The encoded matrix.
        
    futures : list
        List of the pywren futures.
        
    num_workers : int
        The number of workers.
    """
    # Useful definitions
    num_row_blocks = M.shape[0] // M.shard_sizes[0]
    num_col_blocks = M.shape[1] // M.shard_sizes[1]
    num_parity = num_row_blocks // blocks_per_parity  # total number of parity blocks that will be added
    coded_shape = (M.shape[0] + num_parity * M.shard_sizes[0], M.shape[1])

    # Ensure no blocks will go uncoded
    if not num_row_blocks % blocks_per_parity == 0:
        raise ValueError("Number of row blocks ({0}) is not divisible \
                         by number of blocks per parity ({1})".format(
            num_row_blocks, blocks_per_parity))

    # Create the coded matrix object
    coding_fn = make_coding_function(M, blocks_per_parity)
    M_coded = matrix.BigMatrix(s3_key,
                               shape=coded_shape,
                               shard_sizes=M.shard_sizes,
                               write_header=True,
                               parent_fn=coding_fn)
    M_coded.delete(
    )  # Only needed if you reuse the same s3_key (if the blocks already exist, no work will be done here)

    # Generate encoding indices
    encode_idx = []
    for j in range(num_col_blocks):
        for i in range(1, num_parity + 1):
            encode_idx.append((i * (blocks_per_parity + 1) - 1, j))
    num_workers = len(encode_idx)

    # Encode the matrix
    pwex = pywren.lambda_executor()
    futures = pwex.map(lambda x: get_block_wrapper(M_coded, x), encode_idx)
    return M_coded, futures, num_workers
예제 #5
0
## Define number of processors to use while calculating the gradient

n_procs = 60

## Define number of parity blocks to use for coded computation
num_parity_blocks = 6  #Make num_parity blocks close to sqrt(n_procs) for efficiency
"""
Define numpywren BigMatrix and make sure the data
has been uploaded to S3 cloud storage
(done using the script upload_data_to_s3)
"""

X_s3_conv = matrix.BigMatrix("logistic_synthetic_data_{0}_{1}_{2}".format(
    n_samples, n_features, n_procs),
                             shape=(n_samples, n_features),
                             shard_sizes=(n_samples // n_procs, n_features),
                             write_header=True)

X_s3_test = matrix.BigMatrix("logistic_epsilon_test_data_{0}_{1}".format(
    n_samples_test, n_features),
                             shape=(n_samples_test, n_features),
                             shard_sizes=(n_samples_test, n_features),
                             write_header=True)

y_s3_conv = matrix.BigMatrix("logistic_synthetic_data_y_{0}_{1}".format(
    n_samples, n_procs),
                             shape=(n_samples, ),
                             shard_sizes=(n_samples // n_procs, ),
                             write_header=True)
n_features = 3000
n_samples = 300000
n_samples_test = int(0.25 * n_samples)
X, X_test, y2, y_test = toy_logistic_data(n_samples, n_samples_test,
                                          n_features)
y = y2

## Define number of processors to use while calculating the gradient

n_procs = 60

## Define numpywren BigMatrix and upload data to S3 cloud storage

X_s3_conv = matrix.BigMatrix("logistic_synthetic_data_{0}_{1}_{2}".format(
    n_samples, n_features, n_procs),
                             shape=(n_samples, n_features),
                             shard_sizes=(n_samples // n_procs, n_features),
                             write_header=True)
shard_matrix(X_s3_conv, X, overwrite=True)

X_s3_unconv = matrix.BigMatrix(
    "logistic_synthetic_data_{0}_{1}_{2}".format(n_samples, n_features,
                                                 n_procs),
    shape=(n_samples, n_features),
    shard_sizes=(n_samples, int(np.ceil(n_features / n_procs))),
    write_header=True)
shard_matrix(X_s3_unconv, X, overwrite=True)

X_s3_test = matrix.BigMatrix("logistic_epsilon_test_data_{0}_{1}".format(
    n_samples_test, n_features),
                             shape=(n_samples_test, n_features),
예제 #7
0
    pca_dpi = args.pca_sample_descs_per_image
    num_sample_descs = pca_dpi * pca_sample_images
    sifts_hash = utils.hash_string(
        utils.hash_args((train_keys, args.pca_dim, pca_sample_images,
                         args.pca_sample_descs_per_image, args.random_seed,
                         args.pca_dim)) +
        utils.hash_function(calculate_sifts) + utils.hash_function(sift.sift))
    lcs_hash = utils.hash_string(
        utils.hash_args((train_keys, args.pca_dim, pca_sample_images,
                         args.pca_sample_descs_per_image, args.random_seed,
                         args.pca_dim)) + utils.hash_function(calculate_lcs) +
        utils.hash_function(lcs.lcs))

    sift_sample_descs = matrix.BigMatrix(sifts_hash,
                                         shape=(num_sample_descs,
                                                SIFT_DESC_LENGTH),
                                         shard_sizes=(pca_dpi**2,
                                                      SIFT_DESC_LENGTH),
                                         write_header=True)

    lcs_sample_descs = matrix.BigMatrix(lcs_hash,
                                        shape=(num_sample_descs,
                                               LCS_DESC_LENGTH),
                                        shard_sizes=(pca_dpi**2,
                                                     LCS_DESC_LENGTH),
                                        write_header=True)

    block_idxs_not_exist = sift_sample_descs.block_idxs_not_exist
    print("Sample Descs Blocks not exist", len(block_idxs_not_exist))
    print("Sample Descs Blocks total", len(sift_sample_descs.block_idxs))
    pca_sample_train_keys = train_keys[idxs_sample]
    chunked_train_keys = list(utils.chunk(pca_sample_train_keys, pca_dpi))
예제 #8
0
from numpywren import matrix, matrix_utils
from numpywren import binops
from numpywren.matrix_init import shard_matrix
from OverSketch import OverSketchFunc

m = 2000
n = 10000
b = 1000
l = 3000
d = int(4 * b)

A_loc = np.asarray(range(m * n))
A_loc = A_loc.reshape(m, n)
B_loc = np.random.rand(n, l)
A = matrix.BigMatrix("oversketch_A_{0}_{1}_{2}".format(m, n, b),
                     shape=(m, n),
                     shard_sizes=(b, n),
                     write_header=True)
shard_matrix(A, A_loc)
B = matrix.BigMatrix("oversketch_B_{0}_{1}_{2}".format(n, l, b),
                     shape=(n, l),
                     shard_sizes=(n, b),
                     write_header=True)
shard_matrix(B, B_loc)

print("A and B done")

AB = OverSketchFunc(A, B, d)

print("OverSketch done")

c = AB.numpy()
예제 #9
0
def OverSketchFunc(A, B, d, thres = 0.95):

    m = A.shape[0]
    n = A.shape[1]
    l = B.shape[1]
    b = A.shard_sizes[0]

    assert (d % b == 0)
    assert (m % b == 0)
    assert (l % b == 0)
    assert (b == B.shard_sizes[1])

    N = int(d/b)

    sketch_A = matrix.BigMatrix("sketch_A_{0}_{1}".format(m, d), shape=(m, d), shard_sizes=(b, b))
    sketch_BT = matrix.BigMatrix("sketch_B_{0}_{1}".format(l, d), shape=(l, d), shard_sizes=(b, b))

    hashes = np.random.randint(0, b, size=(N, n))
    flips = np.random.choice([-1,1], size=(N, n))

    def OverSketchMatrix(id, X, hashes, flips, b, sketch):
        """
        Calculates OverSketch AS for a row-block of a fat matrix A with block-size b
        """
        x = id[0]
        y = id[1]
        A = X.get_block(x,0)
        m,n = A.shape
        hash_local = hashes[y,:]
        flip_local = flips[y,:]
        sketch_block = np.zeros((m, b))
        for i in range(n):
            sketch_block[:, hash_local[i]] += flip_local[i]*A[:,i]
        sketch.put_block(sketch_block, x, y)
        return 0

    pwex = pywren.lambda_executor()

    t1 = time.time()
    futuresA = pwex.map(lambda x: OverSketchMatrix(x, A, hashes, flips, b, sketch_A), sketch_A.block_idxs)
    futuresB = pwex.map(lambda x: OverSketchMatrix(x, B.T, hashes, flips, b, sketch_BT), sketch_BT.block_idxs)
    fs_donesA = pywren.wait(futuresA, 2)[0]
    fs_donesB = pywren.wait(futuresB, 2)[0]
    while len(fs_donesA)<thres*len(futuresA) and len(fs_donesB)<thres*len(futuresB):
        fs_donesA = pywren.wait(futuresA, 2)[0]
        fs_donesB = pywren.wait(futuresB, 2)[0]
    print("Sketching time", time.time() - t1)

    ## Computation phase
    def blockMatMul(A, B, tensorAB, id):
        """
        Multiplies A and B.T in a blocked fashion
        """
        i = id[0]
        j = id[1]
        k = id[2]
        X = A.get_block(i,k)
        Y = B.get_block(j,k)
        tensorAB[k].put_block(X.dot(Y.T), i, j)
        return 0

    tensorAB = []
    for x in range(N):
        tensorAB.append(matrix.BigMatrix("AxB_outer_{0}_{1}_{2}".format(m, l, x), shape=(m, l), shard_sizes=(b, b)))

    computeArr = [(i,j,k) for (i,k) in sketch_A.block_idxs for j in sketch_BT._block_idxs(0)]

    t1 = time.time()
    futures = pwex.map(lambda x: blockMatMul(sketch_A, sketch_BT, tensorAB, x), computeArr)
    fs_dones = pywren.wait(futures, 2)[0]
    while len(fs_dones)<thres*len(futures):
        fs_dones = pywren.wait(futures, 2)[0]
    print("Computation time", time.time() - t1)

    ## Reduction phase

    def blockMatMulReduction(tensorAB, AB, id):
        """
        Reduces the output from computation phase to get A*B
        Variable 'count' keeps track of number of blocks that have returned 
        """
        i = id[0]
        j = id[1]
        X = None
        count = 1
        for k in range(N):
            if X is None:
                try:
                    X = tensorAB[k].get_block(i,j)
                except Exception as e:
                    print(e)
                    pass
            else:
                try:
                    X = X + tensorAB[k].get_block(i,j)
                    count = count+1
                except Exception as e:
                    print(e)
                    pass
        AB.put_block(X/count, i, j)  
        return 0

    AB = matrix.BigMatrix("AxB_{0}_{1}".format(m, l), shape=(m, l), shard_sizes=(b, b))
    reduceArr = [(i,j) for i in sketch_A._block_idxs(0) for j in sketch_BT._block_idxs(0)]

    t1 = time.time()
    futures_red = pwex.map(lambda x: blockMatMulReduction(tensorAB, AB, x), reduceArr)
    fs_dones = pywren.wait(futures_red)[0]
    print("Reduction time", time.time() - t1)

    return AB
예제 #10
0
def gemm_coded(A, B, blocks_per_parity, s3_key, completion_pct=.7, encode_A=True, encode_B=True, np_A=-1, np_B=-1):
    """
    Compute A * B.T using a locally recoverable product code for redundancy.

    Params
    ======
    A : numpywren.matrix.BigMatrix
        First input matrix.
        
    B : numpywren.matrix.BigMatrix
        Second input matrix.
        
    blocks_per_parity : int
        Number of blocks to sum up when creating each parity block.
        
    s3_key: str
        Storage key for output matrix.
        
    completion_pct: int
        The fraction of multiplication workers that must finish before moving on to decoding.
        
    encode_A : bool
        Whether or not A needs to be encoded. 
        Allows for the user to pre-encode A if it will be used multiple times.
    
    encode_B : bool
        Whether or not B needs to be encoded.
        Allows for the user to pre-encode B if it will be used multiple times.
    
    np_A : int
        Number of parity blocks in the matrix A. Should be provided if and only if
        encode_A is set to false.
    
    np_B : int
        Number of parity blocks in the matrix B. Should be provided if and only if
        encode_B is set to false. 

    Returns
    =======
    C : numpywren.matrix.BigMatrix
        Resultant matrix product.
        
    t_enc : float
        Encoding time.
        
    t_comp : float
        Computation time.
        
    t_dec : float
        Decoding time.        
    """
    if (not encode_A) and np_A == -1:
        raise ValueError("You must provide the number of parity blocks in A if you pre-encoded it.")
    if (not encode_B) and np_B == -1:
        raise ValueError("You must provide the number of parity blocks in B if you pre-encoded it.")
    
    """Stage 1: Encoding"""
    start = time.time()
    if encode_A or encode_B:
        # Spin up encoding workers
        num_workers = 0
        if encode_A:
            A_coded, futures_encode_A, num_workers_A = start_encode_mtx(A, blocks_per_parity, "A_coded")
            num_workers += num_workers_A
        if encode_B:
            B_coded, futures_encode_B, num_workers_B = start_encode_mtx(B, blocks_per_parity, "B_coded")
            num_workers += num_workers_B
        
        # Wait until enough encoding workers are done to move on
        num_done = 0
        while num_done < MIN_ENCODING_COMPLETION_PCT * num_workers:
            fs_A, fs_B = [], []
            if encode_A:
                fs_A, _ = pywren.wait(futures_encode_A, return_when=ANY_COMPLETED)
            if encode_B:
                fs_B, _ = pywren.wait(futures_encode_B, return_when=ANY_COMPLETED)
            num_done = len(fs_A) + len(fs_B)    
    if not encode_A:
        A_coded = A
    if not encode_B:
        B_coded = B
    t_enc = time.time() - start # Total encoding time
    
    """Intermediate step: Initialize output matrix (untimed for consistency with gemm_recompute)."""
    # Determine coded dimensions of A, B
    if encode_A:
        num_parity_A = (A.shape[0] // A.shard_sizes[0]) // blocks_per_parity
        coded_shape_A = (A.shape[0] + num_parity_A * A.shard_sizes[0], A.shape[1])
    else:
        num_parity_A = np_A
        coded_shape_A = A_coded.shape    
    if encode_B:
        num_parity_B = (B.shape[0] // B.shard_sizes[0]) // blocks_per_parity
        coded_shape_B = (B.shape[0] + num_parity_B * B.shard_sizes[0], B.shape[1])
    else:
        num_parity_B = np_B
        coded_shape_B = B_coded.shape
    
    # Create (encoded) output matrix
    shard_sizes_C = (A.shard_sizes[0], B.shard_sizes[0])
    C_coded = matrix.BigMatrix(s3_key + "coded", shape=(A_coded.shape[0], B_coded.shape[0]), \
                               shard_sizes=shard_sizes_C, \
                               autosqueeze=False, \
                               write_header=True)
    C_coded.delete() # Only needed if you reuse the same s3_key (if the blocks already exist, no work will be done here)
        
    # Generate indices for the output matrix
    num_row_blocks_C = C_coded.shape[0] // C_coded.shard_sizes[0] 
    num_col_blocks_C = C_coded.shape[1] // C_coded.shard_sizes[1]
    num_cols_coded = A_coded.shape[1] // A_coded.shard_sizes[1] # Inner dimension of the coded multiplication
    block_idx_C = C_coded.block_idxs
    num_workers = len(block_idx_C)
    np.random.shuffle(block_idx_C) # Randomize jobs to avoid bad straggler locality
    
    """Stage 2: Multiply"""
    t_comp_start = time.time()
    pwex = pywren.lambda_executor()
    futures_matmul = pwex.map(lambda x: pywren_gemm(x, A_coded, B_coded, C_coded, num_cols_coded), block_idx_C)
    fs_done_matmul, num_done = [], 0
    while num_done < completion_pct * num_workers:
        fs_done_matmul, _ = pywren.wait(futures_matmul, return_when=ANY_COMPLETED)
        num_done = len(fs_done_matmul)
    t_comp = time.time() - t_comp_start # Total stage 2 time
        
    """Stage 3: Decoding"""
    t_dec_start = time.time()
    decode_idx = [(i, j) for i in range(num_parity_A) for j in range(num_parity_B)]
    num_workers = len(decode_idx)    
    futures_decode = pwex.map(lambda x: decode_gemm(num_row_blocks_C, num_parity_A, C_coded, x), decode_idx)
    fs_done_decode, num_done = [], 0
    while num_done < num_workers and len(C_coded.block_idxs_not_exist) > 0:
        fs_done_decode, _ = pywren.wait(futures_decode, return_when=ANY_COMPLETED)
        num_done = len(fs_done_decode)
    t_dec = time.time() - t_dec_start # Total stage 3 time
    
    """Final step: Specify the systematic part (i.e., all non-parity blocks) of the result"""
    # Determine output dimensions
    if encode_A:
        C_num_rows = A.shape[0]
    else:
        C_num_rows = A.shape[0] - np_A * A.shard_sizes[0]
    if encode_B:
        C_num_cols = B.shape[0]
    else:
        C_num_cols = B.shape[0] - np_B * B.shard_sizes[0]
    
    # Create the output matrix containing only the systematic part of the result
    get_systematic_part = systematicize(C_coded, blocks_per_parity)
    C_shard_sizes = (A.shard_sizes[0], B.shard_sizes[0])
    C = matrix.BigMatrix(s3_key, shape=(C_num_rows, C_num_cols), shard_sizes=C_shard_sizes, parent_fn=get_systematic_part)
    C.delete() # Only needed if you reuse the same s3_key (if the blocks already exist, no work will be done here)
    return C, t_enc, t_comp, t_dec