Beispiel #1
0
def get_train_features(src_domain, tgt_domain):
    srcfile_loc = osp.join(DATA_DIR, src_domain + '.txt')
    tgtfile_loc = osp.join(DATA_DIR, tgt_domain + '.txt')

    # 首先收集字典,用于生成特征向量
    vocab_all = create_vocab_all(srcfile_loc, tgtfile_loc)

    src_authors, src_author_paper = get_train_author_paper(srcfile_loc)
    tgt_authors, tgt_author_paper = get_train_author_paper(tgtfile_loc)

    src_paper_token = get_train_paper_token(srcfile_loc, vocab_all)
    tgt_paper_token = get_train_paper_token(tgtfile_loc, vocab_all)

    src_features = blas.sgemm(alpha=1.0,
                              a=src_author_paper,
                              b=src_paper_token.T,
                              trans_b=True)
    tgt_features = blas.sgemm(alpha=1.0,
                              a=tgt_author_paper,
                              b=tgt_paper_token.T,
                              trans_b=True)

    feature_dict = {}
    feature_dict['src_authors'] = src_authors
    feature_dict['tgt_authors'] = tgt_authors
    feature_dict['src_paper_author'] = src_author_paper.T
    feature_dict['tgt_paper_author'] = tgt_author_paper.T
    feature_dict['src_features'] = src_features
    feature_dict['tgt_features'] = tgt_features
    feature_dict['vocab_all'] = vocab_all

    return feature_dict
Beispiel #2
0
    def test_gemm_323(self):
        A = numpy.arange(6).reshape((2, 3)) + 1
        B = numpy.arange(6).reshape((3, 2)) + 10
        for dtype in [numpy.float32, numpy.float64, numpy.int64]:
            a = A.astype(dtype)
            b = B.astype(dtype)
            for t1 in [False, True]:
                for t2 in [False, True]:
                    with self.subTest(dtype=dtype,
                                      transA=t1,
                                      transB=t2,
                                      shapeA=a.shape,
                                      shapeB=b.shape):
                        ta = a.T if t1 else a
                        tb = b.T if t2 else b
                        try:
                            exp = ta @ tb
                        except ValueError:
                            continue

                        if t1:
                            M = a.shape[1]
                            lda = a.shape[0]
                            K = a.shape[0]
                        else:
                            M = a.shape[0]
                            lda = a.shape[0]
                            K = a.shape[1]

                        if t2:
                            N = b.shape[0]
                            ldb = b.shape[1]
                        else:
                            N = b.shape[1]
                            ldb = b.shape[1]
                        ldc = N

                        c = numpy.empty(M * N, dtype=a.dtype)
                        pygemm(t2, t1, N, M, K, 1., b.ravel(), ldb, a.ravel(),
                               lda, 0., c, ldc)
                        cc = c.reshape((M, N))
                        # self.assertEqualArray(exp, cc)

                        if dtype == numpy.float32:
                            res = sgemm(1, a, b, 0, cc, t1, t2)
                            self.assertEqualArray(exp, res)

                            cc[:, :] = 0
                            sgemm(1, a, b, 0, cc, t1, t2, 1)
                            try:
                                self.assertEqualArray(exp, cc)
                            except AssertionError:
                                # Overwriting the result does not seem
                                # to work.
                                pass

                        got = gemm_dot(a, b, t1, t2)
                        self.assertEqualArray(exp, got)
Beispiel #3
0
def _compute_reprs(net_in, net, layers_style, layers_content, gram_scale=1):
    """
        Computes representation matrices for an image.
        :param net_in: content image or style image
        :param net: caffe network
        :param layers_style: layers selected for Style Target. 
            If net_in is content image, this should be []
        :param layers_content: layers selected for Content Target. 
            If net_in is style image, this should be []
       """
    # input data and forward pass
    (repr_s, repr_c) = ({}, {})
    net.blobs["data"].data[0] = net_in
    net.forward()
    # decide if net_in is content image or style image
    if layers_style == []:
        repr_s = {}
    if layers_content == []:
        repr_c = {}
    """
    TODO #6
    Calculate representations for content and style
    """
    for layer in (set(layers_style) | set(layers_content)):
        f_sc = net.blobs[layer].data[0].copy()
        (a, b, c) = f_sc.shape
        f_sc = np.reshape(f_sc, (a, b * c))
        repr_c[layer] = f_sc
        if layer in layers_style:
            repr_s[layer] = sgemm(gram_scale, f_sc, f_sc.T)
    return repr_s, repr_c
Beispiel #4
0
def matrix_mult(i,len_i,j,len_j,mat_size,name_A,name_B,name_C):
    #The different cores often  have separate the timeer, so we mark the time before any calculation so that results can be offset to the same starting time
    stabiliser_time = time.time()
    #Identifies the shared memory blocks of A,B,C
    existing_shm_A = shared_memory.SharedMemory(name=name_A)
    existing_shm_B = shared_memory.SharedMemory(name=name_B)
    existing_shm_C = shared_memory.SharedMemory(name=name_C)
    #Calculates the (i,j) coodinates of the submatrices that must be worked on
    i1 = i*len_i
    i2 = (i+1)*len_i
    j1 = j*len_j
    j2 = (j+1)*len_j
    #Reads the relevant block of A,B,C from shared memory
    sub_mat_A = np.ndarray((mat_size,mat_size), dtype=np.float32, buffer=existing_shm_A.buf)[i1:i2,:]
    sub_mat_B = np.ndarray((mat_size,mat_size), dtype=np.float32, buffer=existing_shm_B.buf)[:,j1:j2]
    sub_mat_C = np.ndarray((mat_size,mat_size), dtype=np.float32, buffer=existing_shm_C.buf)
    #Marks the start of the calculation time
    calc_start = time.time()
    #----------------------------------------------
    #Calculates the submatrix C' using sgemm and saves it to shared memory
    sub_mat_C[i1:i2,j1:j2] = FB.sgemm(alpha=1.0, a=sub_mat_A, b=sub_mat_B)
    #----------------------------------------------
    #<arks the end of the calculation time
    calc_finish = time.time()
    #Closes the link to the shared memory blocks
    existing_shm_A.close()
    existing_shm_B.close()
    existing_shm_C.close()
    #Returns all the timiing results
    return stabiliser_time, calc_start, calc_finish
Beispiel #5
0
    def test_gemm_1(self):
        A = numpy.arange(1).reshape((1, 1)) + 1
        B = numpy.arange(1).reshape((1, 1)) + 10
        for dtype in [numpy.float32, numpy.float64, numpy.int64]:
            a = A.astype(dtype)
            b = B.astype(dtype)
            for t1 in [False, True]:
                for t2 in [False, True]:
                    with self.subTest(dtype=dtype,
                                      transA=t1,
                                      transB=t2,
                                      shapeA=a.shape,
                                      shapeB=b.shape):
                        ta = a.T if t1 else a
                        tb = b.T if t2 else b
                        exp = ta @ tb
                        got = gemm_dot(a, b, t1, t2)
                        self.assertEqualArray(exp, got)

                        M, N, K = 1, 1, 1
                        lda, ldb, ldc = 1, 1, 1

                        c = numpy.empty(M * N, dtype=a.dtype)
                        pygemm(t2, t1, M, N, K, 1., b.ravel(), ldb, a.ravel(),
                               lda, 0., c, ldc)
                        cc = c.reshape((M, N))
                        self.assertEqualArray(exp, cc)

                        if dtype == numpy.float32:
                            res = sgemm(1, a, b, 0, cc, t1, t2)
                            self.assertEqualArray(exp, res)
Beispiel #6
0
def matrix_mult(i, len_i, j, len_j, mat_size, name_A, name_B, name_C):
    stabiliser_time = time.time()
    existing_shm_A = shared_memory.SharedMemory(name=name_A)
    existing_shm_B = shared_memory.SharedMemory(name=name_B)
    existing_shm_C = shared_memory.SharedMemory(name=name_C)
    i1 = i * len_i
    i2 = (i + 1) * len_i
    j1 = j * len_j
    j2 = (j + 1) * len_j
    sub_mat_A = np.ndarray((mat_size, mat_size),
                           dtype=np.float32,
                           buffer=existing_shm_A.buf)[i1:i2, :]
    sub_mat_B = np.ndarray((mat_size, mat_size),
                           dtype=np.float32,
                           buffer=existing_shm_B.buf)[:, j1:j2]
    sub_mat_C = np.ndarray((mat_size, mat_size),
                           dtype=np.float32,
                           buffer=existing_shm_C.buf)
    calc_start = time.time()
    sub_mat_C[i1:i2, j1:j2] = FB.sgemm(alpha=1.0, a=sub_mat_A, b=sub_mat_B)
    calc_finish = time.time()
    existing_shm_A.close()
    existing_shm_B.close()
    existing_shm_C.close()
    return stabiliser_time, calc_start, calc_finish
Beispiel #7
0
 def _gram(self, layer):
     """
     Compute gram matrix; just the dot product of the layer and its
     transform
     """
     gram = blas.sgemm(1.0, layer, layer.T)
     return gram
Beispiel #8
0
 def _gram(self, layer):
     """
     Compute gram matrix; just the dot product of the layer and its
     transform
     """
     gram = blas.sgemm(1.0, layer, layer.T)
     return gram
Beispiel #9
0
    def style_lag(self, noisies, grams, i, compute_grad=False):
        """
        Compute style losses and gradients for all gram matrices

        This is compressed into one function to save intermediate computations.

        Is assumed that gram matrices and self.style_targets correspond to
        identical layers.
        """
        # Get everything.
        style_noisy = noisies[i]
        style_gram = grams[i]
        style_target = self.style_targets[i]
        weight = STYLE_WEIGHTS[i]

        diff = (style_gram - style_target)
        size_c = (1. / ((style_noisy.shape[0] ** 2) *
                  (style_noisy.shape[1] ** 2)))
        loss = (size_c / 4) * (diff**2).sum() * weight

        if compute_grad:
            gradient = (size_c * blas.sgemm(1.0, diff, style_noisy) *
                        (style_noisy > 0) * weight)
            return loss, gradient

        return loss, None
Beispiel #10
0
    def style_lag(self, noisies, grams, i, compute_grad=False):
        """
        Compute style losses and gradients for all gram matrices

        This is compressed into one function to save intermediate computations.

        Is assumed that gram matrices and self.style_targets correspond to
        identical layers.
        """
        # Get everything.
        style_noisy = noisies[i]
        style_gram = grams[i]
        style_target = self.style_targets[i]
        weight = STYLE_WEIGHTS[i]

        diff = (style_gram - style_target)
        size_c = (1. / ((style_noisy.shape[0]**2) * (style_noisy.shape[1]**2)))
        loss = (size_c / 4) * (diff**2).sum() * weight

        if compute_grad:
            gradient = (size_c * blas.sgemm(1.0, diff, style_noisy) *
                        (style_noisy > 0) * weight)
            return loss, gradient

        return loss, None
Beispiel #11
0
def test_scipy_sgemm():
    pair_domain = four_pair_of_domains[0]
    src_domain, tgt_domain = pair_domain[0], pair_domain[1]
    srcfile_loc = osp.join(DATA_DIR, src_domain + '.txt')
    tgtfile_loc = osp.join(DATA_DIR, tgt_domain + '.txt')

    src_names, src_name_line_count = get_names_counts(srcfile_loc)
    tgt_names, tgt_name_line_count = get_names_counts(tgtfile_loc)
    common_tokens = get_common_tokens(srcfile_loc, tgtfile_loc)

    print('src', src_name_line_count.shape)
    print('tgt', tgt_name_line_count.shape)
    print('common', len(common_tokens))

    src_line_token_count = get_tokens_counts(srcfile_loc, common_tokens)
    tgt_line_token_count = get_tokens_counts(tgtfile_loc, common_tokens)

    print('src token', src_line_token_count.shape)
    print('tgt token', tgt_line_token_count.shape)

    res = blas.sgemm(alpha=1.0,
                     a=src_name_line_count,
                     b=src_line_token_count.T,
                     trans_b=True)

    print('we need <dot result> == <sgemm result>')
    for i in range(10):
        X, Y = np.nonzero(res)
        rand_ind = random.choice(range(len(X)))
        xx = X[rand_ind]
        yy = Y[rand_ind]
        print('dot res:',
              np.dot(src_name_line_count[xx, :], src_line_token_count[:, yy]))
        print('sgemm res:', res[xx, yy])
        print()
Beispiel #12
0
def matrix_mult(i1, i2, j1, j2, mat_size):
    A_np = np.frombuffer(var_dict['A'], dtype=np.float32).reshape(
        (mat_size, mat_size))
    B_np = np.frombuffer(var_dict['B'], dtype=np.float32).reshape(
        (mat_size, mat_size))
    #mat_C = np.zeros((i2-i1,j2-j1))
    mat_C = FB.sgemm(alpha=1.0, a=A_np[i1:i2, :], b=B_np[:, j1:j2])
    return mat_C
    def gram_matrix(X, y=None, kernel="linear", bandwidth=1, centered=False):
        k = None

        if kernel == "linear":
            if y is None:
                k = sgemm(alpha=1.0, a=X, b=X, trans_b=True)
            else:
                k = sgemm(alpha=1.0, a=X, b=y, trans_b=True)

        elif kernel == "rbf":
            # print("rbf kernel: ")
            euc_dist = np.einsum('ij,ij->i', X, X)
            if y is not None:
                euc_dist_y = np.einsum('ij,ij->i', y, y)
            else:
                euc_dist_y = euc_dist
                y = X

            k = ne.evaluate(
                'exp(-b * (A + B - 2 * C))', {
                    'A': euc_dist[:, None],
                    'B': euc_dist_y[None, :],
                    'C': sgemm(alpha=1.0, a=X, b=y, trans_b=True),
                    'b': bandwidth,
                })

        # elif kernel == "rbf":
        #     print("rbf kernel: ")
        #     euc_dist = np.einsum('ij,ij->i', X, X)
        #     k = ne.evaluate('exp(-b * (A + B - 2 * C))', {
        #         'A': euc_dist[:, None],
        #         'B': euc_dist[None, :],
        #         'C': sgemm(alpha=1.0, a=X, b=X, trans_b=True),
        #         'b': bandwidth,
        #     })

        if centered and k.shape[0] == k.shape[1]:
            N = X.shape[0]
            identity_n = np.ones((N, N)) / N
            first_term = k
            second_term = np.dot(identity_n, k)
            third_term = np.dot(k, identity_n)
            fourth_term = np.dot(identity_n, third_term)
            k = first_term - second_term - third_term + fourth_term

        return k
def main():
    lock = int(sys.argv[1])
    size_list = [2**i for i in range(8,16)]
    no_runs = 10
    time_df = pd.DataFrame(columns=["My function (Python)",
                                    "My function (32 Cores Python)",
                                    "matmul (NumPy Python)",
                                    "dgemm (Python)",
                                    "sgemm (Python)"])
    for mat_size in size_list:
        print(f"Mat size: {mat_size}")
        for i in range(no_runs):
            print(f"i: {i}")

            m1 = np.random.rand(mat_size,mat_size).astype(np.float32)
            m2 = np.random.rand(mat_size,mat_size).astype(np.float32)
            new_times=[]

            time.sleep(10)
            
            if mat_size =< 2048:
                my_func_start = time.perf_counter()
                m_myfunc = matrix_mult(m1,m2)
                my_func_finish = time.perf_counter()
                new_times.append(round(my_func_finish-my_func_start,8))
                
                my_func_32cores_start = time.perf_counter()
                time_taken, m_myfunc32 = gen_time_results(mat_size,32,m1,m2)
                my_func_32cores_finish = time.perf_counter()
                new_times.append(round(my_func_32cores_finish-my_func_32cores_start,8))

            else:
                new_times.append(None)
                new_times.append(None)
            
            numpy_start = time.perf_counter()
            mn = np.matmul(m1,m2)
            numpy_finish = time.perf_counter()
            new_times.append(round(numpy_finish-numpy_start,8))
            
            dgemm_start = time.perf_counter()
            md = FB.dgemm(alpha=1.0, a=m1, b=m2)
            dgemm_finish = time.perf_counter()
            new_times.append(round(dgemm_finish-dgemm_start,8))
            
            sgemm_start = time.perf_counter()
            ms = FB.sgemm(alpha=1.0, a=m1, b=m2)
            sgemm_finish = time.perf_counter()
            new_times.append(round(sgemm_finish-sgemm_start,8))

            print(new_times)
            
            time_df = time_df.append( pd.DataFrame(columns=["My function (Python)","My function (32 Cores Python)",
                                    "matmul (NumPy_Python)","dgemm (Python)","sgemm (Python)"],index=[mat_size]) )
            if lock:
                time_df.to_pickle("time_df_libraries_lock.pkl")
            else:
                time_df.to_pickle("time_df_libraries_no_lock.pkl")
Beispiel #15
0
def style_loss(F, A, layer, style_layers):
    idx = style_layers.index(layer)+1

    Fl = np.squeeze(F[idx])
    Al = np.squeeze(A[idx])

    channel, row, col = Fl.shape
    Fl = Fl.reshape((channel, row*col))
    Al = Al.reshape((channel, row*col))

    gram_F = sgemm(1, Fl, Fl.T)
    gram_A = sgemm(1, Al, Al.T)

    denom = (2*channel*row*col)**2
    loss = np.sum((gram_F-gram_A)**2) / denom
    grad = 4 * sgemm(1, gram_F-gram_A, Fl) * (Fl > 0) / denom

    return loss, grad
Beispiel #16
0
def rbf_kernel_fast(X, precision):
    gamma = precision / 2
    X_norm = -gamma * np.einsum('ij,ij->i', X, X)
    return ne.evaluate(
        'exp(A + B + C)', {
            'A': X_norm[:, None],
            'B': X_norm[None, :],
            'C': sgemm(alpha=2.0 * gamma, a=X, b=X, trans_b=True),
            'g': gamma,
        })
Beispiel #17
0
def _compute_style_grad(F, G, G_style, layer):
    """
        Computes style gradient and loss from activation features.
       """
    # compute loss and gradient
    (Fl, Gl) = (F[layer], G[layer])
    c = Fl.shape[0]**-2 * Fl.shape[1]**-2
    El = Gl - G_style[layer]
    loss = c / 4 * (El**2).sum()
    grad = c * sgemm(1.0, El, Fl) * (Fl > 0)
    return loss, grad
Beispiel #18
0
def _compute_style_grad(F, G, G_style, layer):
    """
        Computes style gradient and loss from activation features.
    """

    # compute loss and gradient
    (Fl, Gl) = (F[layer], G[layer])
    c = Fl.shape[0]**-2 * Fl.shape[1]**-2
    El = Gl - G_style[layer]
    loss = c/4 * (El**2).sum()
    grad = c * sgemm(1.0, El, Fl) * (Fl>0)

    return loss, grad
Beispiel #19
0
def rbf(X, Y=None, gamma=1.0, gradient=False):
    """
    Compute row wise kernel matrix of X and Y.
    
    Parameters
    ----------
    X : numpy array
        first array of size (n x m).
    Y : numpy array, optional
        second array of size (o x m). The default is None.
    gamma : float, optional
        Length scale. The default is 0.5.

    Returns
    -------
    kernel matrix as numpy array of size (n x o).
    """

    XX = np.einsum('ij,ij -> i', X, X)

    if Y is None:
        Y = X
        YY = XX
        Y_flag = True
    else:
        YY = np.einsum('ij,ij -> i', Y, Y)
        Y_flag = False

    dist = ne.evaluate(
        '(A + B - C) / g**2', {
            'A': XX[:, None],
            'B': YY[None, :],
            'C': sgemm(alpha=2, a=X, b=Y, trans_b=True),
            'g': gamma
        })

    if Y_flag: np.fill_diagonal(dist, 0)

    K = np.exp(-0.5 * dist)

    if gradient:
        grad = K * dist
        np.fill_diagonal(grad, 0)

        return K, grad

    return K
Beispiel #20
0
def gen_time_results(mat_size, no_runs):
    mat_A = np.random.rand(mat_size, mat_size)
    mat_B = np.random.rand(mat_size, mat_size)
    time_list_d = []
    time_list_s = []
    for _ in range(no_runs):
        start = time.perf_counter()
        result = FB.dgemm(alpha=1, a=mat_A, b=mat_B)
        finish = time.perf_counter()
        time_taken_d = round(finish - start, 10)
        time_list_d.append(time_taken_d)

        start = time.perf_counter()
        result = FB.sgemm(alpha=1, a=mat_A, b=mat_B)
        finish = time.perf_counter()
        time_taken_s = round(finish - start, 10)
        time_list_s.append(time_taken_s)
    return time_list_d, time_list_s
Beispiel #21
0
def _compute_reprs(net, layers_style, layers_content, net_in, scale_gram=1):
    """
        Computes representation matrices for an image.
    """

    # copy activations to output from forward pass
    (repr_s, repr_c) = ({}, {})
    net.blobs["data"].data[0] = net_in
    net.forward(end=net.params.keys()[-1])

    # loop through combined set of layers
    for layer in set(layers_style) | set(layers_content):
        F = net.blobs[layer].data[0].copy()
        F.shape = (F.shape[0], -1)
        repr_c[layer] = F
        if layer in layers_style:
            repr_s[layer] = sgemm(scale_gram, F, F.T)

    return repr_s, repr_c
Beispiel #22
0
def _compute_reprs(net, layers_style, layers_content, net_in, scale_gram=1):
    """
        Computes representation matrices for an image.
    """

    # copy activations to output from forward pass
    (repr_s, repr_c) = ({}, {})
    net.blobs["data"].data[0] = net_in
    net.forward(end=net.params.keys()[-1])

    # loop through combined set of layers
    for layer in set(layers_style)|set(layers_content):
        F = net.blobs[layer].data[0].copy()
        F.shape = (F.shape[0], -1)
        repr_c[layer] = F
        if layer in layers_style:
            repr_s[layer] = sgemm(scale_gram, F, F.T)

    return repr_s, repr_c
Beispiel #23
0
def _compute_reprs(net_in, net, layers_style, layers_content, gram_scale=1):
    """
        Computes representation matrices for an image.
    """

    # input data and forward pass
    (repr_s, repr_c) = ({}, {})
    net.blobs["data"].data[0] = net_in
    net.forward()

    # loop through combined set of layers
    for layer in set(layers_style)|set(layers_content):
        F = net.blobs[layer].data[0].copy()
        F.shape = (F.shape[0], -1)
        repr_c[layer] = F
        if layer in layers_style:
            repr_s[layer] = sgemm(gram_scale, F, F.T)

    return repr_s, repr_c
Beispiel #24
0
def _compute_reprs(net_in, net, layers_style, layers_content, gram_scale=1):
    """
        Computes representation matrices for an image.
    """

    # input data and forward pass
    (repr_s, repr_c) = ({}, {})
    net.blobs["data"].data[0] = net_in
    net.forward()

    # loop through combined set of layers
    for layer in set(layers_style) | set(layers_content):
        F = net.blobs[layer].data[0].copy()
        F.shape = (F.shape[0], -1)
        repr_c[layer] = F
        if layer in layers_style:
            repr_s[layer] = sgemm(gram_scale, F, F.T)

    return repr_s, repr_c
Beispiel #25
0
def matrix_mult(mat_A, mat_B):
    calc_start = time.perf_counter()
    mat_C = FB.sgemm(alpha=1.0, a=mat_A, b=mat_B)
    calc_finish = time.perf_counter()
    return mat_C, calc_start, calc_finish
    for mat_size in mat_sizes:
        print(f"Mat size: {mat_size}")
        total_time_DGEMM = 0
        for _ in range(no_runs):
            m1 = np.random.rand(mat_size, mat_size)
            m2 = np.random.rand(mat_size, mat_size)
            start = time.perf_counter()
            md = FB.dgemm(alpha=1, a=m1, b=m2)
            finish = time.perf_counter()
            time_taken = round(finish - start, 8)
            total_time_DGEMM += time_taken
            #assert md.all() == ans.all()
        print(total_time_DGEMM / no_runs)
    print("\n")

    print("---- LAPACK SGEMM----")
    for mat_size in mat_sizes:
        print(f"Mat size: {mat_size}")
        total_time_SGEMM = 0
        for _ in range(no_runs):
            m1 = np.random.rand(mat_size, mat_size)
            m2 = np.random.rand(mat_size, mat_size)
            start = time.perf_counter()
            ms = FB.sgemm(alpha=1, a=m1, b=m2)
            finish = time.perf_counter()
            time_taken = round(finish - start, 8)
            total_time_SGEMM += time_taken
            #assert ms.all() == ans.all()
        print(total_time_SGEMM / no_runs)
    print("\n")
Beispiel #27
0
    send_list = []
    for i in range(i_len):
        for j in range(j_len):
            send_list.append([send_list_A[i], send_list_B[j]])
    #mat_A = None
    #mat_B = None
else:
    mat_A = None
    mat_B = None
    send_list = None

mats = comm.scatter(send_list, root=0)

calc_start = MPI.Wtime()

mat_C = FB.sgemm(alpha=1.0, a=mats[0], b=mats[1])

calc_finish = MPI.Wtime()

res_list = comm.gather(mat_C, root=0)

if rank == 0:
    res = np.vstack(np.split(np.concatenate(res_list, axis=1), i_len, axis=1))

total_finish = MPI.Wtime()

scatter_time = calc_start - total_start
calc_time = calc_finish - calc_start
gather_time = total_finish - calc_finish

scatter_sum = np.zeros(0)
fh_A = MPI.File.Open(comm, f"mat_A/mat_A_{mat_size}_{iteration}.txt", amode_A)
buf_mat_A = np.empty((i_size, mat_size), dtype=np.float32)
offset_A = i_coord * buf_mat_A.nbytes
fh_A.Read_at_all(offset_A, buf_mat_A)
fh_A.Close()
#Opening and reading matrix B
fh_B = MPI.File.Open(comm, f"mat_B/mat_B_{mat_size}_{iteration}.txt", amode_B)
buf_mat_B = np.empty((j_size, mat_size), dtype=np.float32)
offset_B = j_coord * buf_mat_B.nbytes
fh_B.Read_at_all(offset_B, buf_mat_B)
mat_B = np.transpose(buf_mat_B)
fh_B.Close()

calc_start = MPI.Wtime()

buf_mat_C = FB.sgemm(alpha=1.0, a=buf_mat_A, b=mat_B)

calc_time = MPI.Wtime() - calc_start

fh_C = MPI.File.Open(comm, f"mat_C/mat_C_{mat_size}_{iteration}.txt", amode_C)
filetype = MPI.FLOAT.Create_vector(j_size, i_size, mat_size)
filetype.Commit()
offset_C = (mat_size * i_coord * i_size +
            j_coord * j_size) * MPI.FLOAT.Get_size()
fh_C.Set_view(offset_C, filetype=filetype)
fh_C.Write_all(buf_mat_C)
filetype.Free()
fh_C.Close()

total_time = MPI.Wtime() - t_start
Beispiel #29
0
################################################################################

func = 'posv'

for prefix in ['s', 'd']:
    funcname = prefix + func
    dtype = get_dtype(funcname)

    m = 8192
    n = 100
    a = np.random.uniform(size=m * m).reshape((m, m)).astype(dtype)
    b = np.ones((m, n), dtype=dtype)
    alpha = 1.
    if dtype == np.float32:
        c = bl.sgemm(alpha, a, b)
    elif dtype == np.float64:
        c = bl.dgemm(alpha, a, b)

    get_time(funcname, [a, c], df)

################################################################################

func = 'potrf'

for prefix in ['s', 'd']:
    funcname = prefix + func
    dtype = get_dtype(funcname)

    m = 8192
    a = np.random.uniform(size=m * m).reshape((m, m)).astype(dtype)
#Checking whether the runtime of LAPACK SGEMM will be drastically changed
#if the NUmpy arrays are type casted as single point precision when being defined

import numpy as np
import time
from scipy.linalg import blas as FB

mat_size = 8192

mat_A = np.random.rand(mat_size, mat_size)
mat_B = np.random.rand(mat_size, mat_size)

t0 = time.time()

mat_C = FB.sgemm(alpha=1.0, a=mat_A, b=mat_B)

t1 = time.time()

mat_A_new = mat_A.astype(np.float32)
mat_B_new = mat_B.astype(np.float32)

t2 = time.time()

mat_C_new = FB.sgemm(alpha=1.0, a=mat_A_new, b=mat_B_new)

t3 = time.time()

print(t1 - t0)
print(t3 - t2)
Beispiel #31
0
import numpy as np
from scipy.linalg import blas as FB
import sys

#Reads the Matrix Size from the command line
mat_size = int(sys.argv[1])
iteration = int(sys.argv[2])

mat_A = np.loadtxt(f"mat_A/mat_A_{mat_size}_{iteration}.txt")
mat_B = np.loadtxt(f"mat_B/mat_B_{mat_size}_{iteration}.txt")
mat_B = np.transpose(mat_B)

answer = FB.sgemm(alpha=1.0, a=mat_A, b=mat_B)

res = np.loadtxt(f"mat_C/mat_C_{mat_size}_{iteration}.txt")

print(np.allclose(answer, res))
Beispiel #32
0
                  dest=i,
                  tag=25)
    #Defines the matrices that the master core will be operating on
    sub_mat_A = mat_A[displ_A[0]:displ_A[0] + len_i]
    sub_mat_B = mat_B[displ_B[0]:displ_B[0] + len_j]
else:
    #Every worker core receives their submatrices of A,B
    comm.Recv([sub_mat_A, MPI.FLOAT], source=0)
    comm.Recv([sub_mat_B, MPI.FLOAT], source=0)

#Starts the timer for the beginning of the "calculation" portion
comm.Barrier()
calc_start = MPI.Wtime()

#Each core calculates their submatrix C' using sgemm. In the handwritten version of this, the "matrix_mult" function will be called instead
sub_mat_C = FB.sgemm(alpha=1.0, a=sub_mat_A, b=sub_mat_B, trans_b=True)

#Stops the timer for the "calculation" portion, starting the timer for the "gather" portion
comm.Barrier()
calc_finish = MPI.Wtime()

#Creates an empty matrix for the submatrices C' to be gathered into
mat_C = None
if rank == 0:
    mat_C = np.empty(mat_size * mat_size, dtype=np.float32)

#Gathers all of the submatrices C'
count_C = [len_i * len_j for _ in range(size)]
displ_C = [len_i * len_j * list_rank for list_rank in range(size)]
sub_mat_C = np.ascontiguousarray(sub_mat_C, dtype=np.float32)
comm.Gatherv(sub_mat_C, [mat_C, count_C, displ_C, MPI.FLOAT], root=0)
Beispiel #33
0
print("Enter matrix size, m x n x l")
m = int(input("\n"))
n = int(input("\n"))
l = int(input("\n"))

a = np.random.random((m, l)).astype('float32')

b = np.identity((l)).astype('float32')

c = np.zeros((m, n)).astype('float32')

itermax = 10

ts = time.time()
for iteration in range(itermax):
    c = blas.sgemm(1.0, a, b)

#c = blas.sgemm(1.0,a,b)

te = time.time()
duration = te - ts
flops = 2.0 * (np.double(m) * np.double(n) * np.double(l)) - (np.double(m) *
                                                              np.double(n))

gflops = (itermax * flops / duration) * 1.0e-9

print("c")
print(c)

print("a")
print(a)
Beispiel #34
0
def check_answer(mat_A,mat_B,mat_C):
    answer = FB.sgemm(alpha=1.0, a=mat_A, b=mat_B)
    #rounded_answer = np.around(answer,decimals=5)
    #rounded_mat_C = np.around(mat_C,decimals=5)
    return np.allclose(answer,mat_C)
Beispiel #35
0
def test(model, queryloader, galleryloader, pool, use_gpu, ranks=[1, 5, 10, 20]):
    with torch.no_grad():
        model.eval()

        qf, q_pids, q_camids = [], [], []
        for batch_idx, (imgs, pids, camids) in tqdm(enumerate(queryloader), total=len(queryloader)):
            if use_gpu:
                imgs = imgs.cuda()
            # imgs = Variable(imgs, volatile=True)
            # b=1, n=number of clips, s=16
            b, n, s, c, h, w = imgs.size()
            assert (b == 1)
            imgs = imgs.view(b * n, s, c, h, w)
            features = model(imgs)
            features = features.view(n, -1)
            features = torch.mean(features, 0)
            features = features.data.cpu().numpy()
            qf.append(features)
            q_pids.extend(pids)
            q_camids.extend(camids)
            if batch_idx % 20 == 0:
                gc.collect()
        qf = np.asarray(qf, dtype=np.float32)
        q_pids = np.asarray(q_pids)
        q_camids = np.asarray(q_camids)
        gc.collect()
        print("Extracted features for query set, obtained {}-by-{} matrix".format(qf.shape[0], qf.shape[1]))

        gf, g_pids, g_camids = [], [], []
        for batch_idx, (imgs, pids, camids) in tqdm(enumerate(galleryloader), total=len(galleryloader)):
            if use_gpu:
                imgs = imgs.cuda()
            # imgs = Variable(imgs, volatile=True)
            b, n, s, c, h, w = imgs.size()
            imgs = imgs.view(b * n, s, c, h, w)
            assert (b == 1)
            features = model(imgs)
            features = features.view(n, -1)
            if pool == 'avg':
                features = torch.mean(features, 0)
            else:
                features, _ = torch.max(features, 0)
            features = features.data.cpu().numpy()
            gf.append(features)
            g_pids.extend(pids)
            g_camids.extend(camids)
            if batch_idx % 20 == 0:
                gc.collect()

        gf = np.asarray(gf, dtype=np.float32)
        g_pids = np.asarray(g_pids)
        g_camids = np.asarray(g_camids)
        gc.collect()
        print("Extracted features for gallery set, obtained {}-by-{} matrix".format(gf.shape[0], gf.shape[1]))
        print("Computing distance matrix")

        m, n = qf.shape[0], gf.shape[0]
        distmat = np.tile(np.sum(np.power(qf, 2), axis=1, keepdims=True), (1, n)) + \
                  np.tile(np.sum(np.power(gf, 2), axis=1, keepdims=True), (1, m)).T
        distmat -= 2 * blas.sgemm(1, qf, gf.T)

        # distmat = np.power(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \
        #           torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t()
        # distmat.addmm_(1, -2, qf, gf.t())
        # distmat = distmat.numpy()

        print("Computing CMC and mAP")
        cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids)

        print("Results ----------")
        print("mAP: {:.1%}".format(mAP))
        print("CMC curve")
        for r in ranks:
            print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1]))
        print("------------------")

        return cmc[0]