Ejemplo n.º 1
0
    def test_cp_decomposition(self):
        test_data1 = T.array([[[0, 1, 3, 4], [4, 0, 2, 1], [4, 2, 3, 4]],
                              [[2, 4, 2, 3], [3, 3, 2, 4], [2, 3, 0, 2]]])
        test_data2 = T.array([[[3, 1, 1, 2], [1, 0, 3, 2], [3, 4, 0, 2]],
                              [[1, 2, 3, 3], [2, 3, 1, 0], [1, 2, 0, 2]]])
        factors, lamda = cp(test_data1,
                            r=3,
                            stop_iter=500,
                            tol=1e-6,
                            normalize_factor=True,
                            random_seed=44)
        P, fit, itr = cp_als(dtensor(test_data1), 3, init='random')
        T.testing.assert_array_almost_equal(reconstruct_tensor(
            factors, lamda, (2, 3, 4)),
                                            P.toarray(),
                                            decimal=0)

        factors, lamda = cp(test_data2,
                            r=3,
                            stop_iter=500,
                            tol=1e-6,
                            normalize_factor=True,
                            random_seed=44)
        P, fit, itr = cp_als(dtensor(test_data2), 3, init='random')
        T.testing.assert_array_almost_equal(reconstruct_tensor(
            factors, lamda, (2, 3, 4)),
                                            P.toarray(),
                                            decimal=0)
Ejemplo n.º 2
0
def get_cp_factors(layer, rank, cin, cout, kernel_size, **kwargs):
    weights, bias = get_weights_and_bias(layer)
    w_h = None
    w_w = None
    w_cin = None
    w_cout = None

    if isinstance(layer, keras.Sequential):
        w_cout, w_cin, w_h, w_w = recompress_ncpd_tensor(weights, new_rank=rank, max_cycle=500, return_fit=False,
                                                         tensor_format="cpd")
    elif isinstance(layer, keras.layers.Conv2D):
        P, _, _ = cp_als(dtensor(weights), rank, init="random")
        w_cin, w_cout, w_h, w_w = extract_weights_tensors(P)

    # Reshape to proper kernel sizes.
    w_h = w_h.T.reshape((rank, 1, kernel_size[0], 1))
    w_w = w_w.T.reshape((rank, 1, 1, kernel_size[1]))
    w_cin = w_cin.T.reshape((rank, cin, 1, 1))
    w_cout = w_cout.reshape([cout, rank, 1, 1])

    # Reorder to TensorFlow order.
    w_cin, w_cout = [to_tf_kernel_order(w) for w in [w_cin, w_cout]]

    # The middle layers are depthwise it should have order
    # [rank, 1, kernel_size, kernel_size].
    # This reorders it correctly from TensorFlow order to PyTorch order
    # w_h, w_w = [depthwise_to_pytorch_kernel_order(w) for w in [w_h, w_w]].
    w_h, w_w = [to_tf_kernel_order(w) for w in [w_h, w_w]]

    return [w_cin, w_h, w_w, w_cout], [None, None, None, bias]
Ejemplo n.º 3
0
def get_cp_factors(layer, rank, cin, cout, kernel_size, **kwargs):
    weights, bias = get_weights_and_bias(layer)
    w_cin = None
    w_z = None
    w_cout = None

    if isinstance(layer, keras.Sequential):
        w_cout, w_cin, w_z = recompress_ncpd_tensor(weights,
                                                    new_rank=rank,
                                                    max_cycle=500,
                                                    return_fit=False,
                                                    tensor_format="cpd")
    elif isinstance(layer, keras.layers.Conv2D):
        P, _, _ = cp_als(dtensor(weights), rank, init="random")
        w_cin, w_cout, w_z = extract_weights_tensors(P)

    if w_cin is None or w_z is None or w_cout is None:
        raise CompressionError()

    # Reshape to the proper PyTorch shape order.
    w_cin = w_cin.T.reshape((rank, cin, 1, 1))
    w_z = w_z.T.reshape((rank, 1, *kernel_size))
    w_cout = w_cout.reshape((cout, rank, 1, 1))

    # Reorder to TensorFlow order.
    w_cin, w_z, w_cout = [to_tf_kernel_order(w) for w in [w_cin, w_z, w_cout]]

    return [w_cin, w_z, w_cout], [None, None, bias]
Ejemplo n.º 4
0
 def test_cp_reconstruction(self):
     data = T.array([[[3, 1, 1, 2], [1, 0, 3, 2], [3, 4, 0, 2]],
                     [[1, 2, 3, 3], [2, 3, 1, 0], [1, 2, 0, 2]]])
     tensor = dtensor(data)
     P, fit, itr = cp_als(tensor, 3, init='random')
     T.testing.assert_array_almost_equal(
         P.toarray(), reconstruct_tensor(P.U, P.lmbda, (2, 3, 4)))
Ejemplo n.º 5
0
def tensor_decomp(X):
    print("CP-ALS Decomposition.")
    T = dtensor(XX)
    P, fit, itr, exectimes = cp_als(T, 2, init='nvecs')
    proj = P.U[2]
    fproj = np.abs(np.fft.fft(proj, axis=0))[:XX.shape[-1] // 2, :]
    return fproj, proj
Ejemplo n.º 6
0
    def get_cp_factors(self):

        if self.pretrained is not None:
            mat_dict = scipy.io.loadmat(self.pretrained)

            if mat_dict['R'][0][0] != self.rank:
                print('WRONG FACTORS, do not correspond to desired rank')

            PU_z, PU_cout, PU_cin = [Ui[0] for Ui in mat_dict['P_bals_epc_U']]
            Plmbda = mat_dict['P_bals_epc_lambda'].ravel()

            f_cin = np.array(PU_cin)
            f_cout = np.array(PU_cout)
            f_z = (np.array(PU_z) * (Plmbda))

        else:
            if '__getitem__' in dir(self.layer):
                f_cout_old, f_cin_old, f_z_old = self.weight

                f_cout_old = np.array(f_cout_old)
                f_cin_old = np.array(f_cin_old)
                f_z_old = np.array(f_z_old)

                bias = self.bias

                f_cout, f_cin, f_z = recompress_ncpd_tensor(
                    [f_cout_old, f_cin_old, f_z_old],
                    new_rank=self.rank,
                    max_cycle=500,
                    return_fit=False,
                    tensor_format='cpd')
            else:
                if self.weight.is_cuda:
                    self.weight = self.weight.cpu()
                    if self.bias is not None:
                        self.bias = self.bias.cpu()
                weights = dtensor(self.weight)
                bias = self.bias

                T = dtensor(self.weight)
                P, fit, itr, exectimes = cp_als(T, self.rank, init='random')

                f_cout = np.array(P.U[0])
                f_cin = np.array(P.U[1])
                f_z = (np.array(P.U[2]) * (P.lmbda))

        f_cin = torch.FloatTensor(
            np.reshape(f_cin.T, [self.rank, self.cin, 1, 1])).contiguous()
        f_z = torch.FloatTensor(
            np.reshape(f_z.T, [self.rank, 1, *self.kernel_size])).contiguous()
        f_cout = torch.FloatTensor(
            np.reshape(f_cout, [self.cout, self.rank, 1, 1])).contiguous()

        return [f_cin, f_z, f_cout], [None, None, bias]
Ejemplo n.º 7
0
 def __init__(self, data, rank=8):
     """
     Args:
         data: np.ndarray : the underlying multi-dimensional array
         rank: 
     Returns:
         BaseTensor object
     """
     self.rank = rank
     self.shape = data.shape
     self.ktensor = cp_als(dtensor(data), (self.rank, ) * 3)[0]
Ejemplo n.º 8
0
def _splitrank3(h, verbose=False):
    # this fixes a bug in scikit-tensor
    if np.sum(np.abs(h)) < 1.e-30:
        return tuple(np.zeros(s) for s in h.shape) + (np.zeros_like(h), )

    P, fit, itr, exectimes = cp_als(dtensor(h.copy()), 1)
    hx, hy, hz = [(P.lmbda[0])**(1. / 3) * np.array(P.U[i])[:, 0]
                  for i in range(3)]
    if verbose:
        print("lambdas= %s \nfit = %s \niterations= %s " % (P.lmbda, fit, itr))
    return hx, hy, hz, P.toarray()
Ejemplo n.º 9
0
def load_data():
    all_subjects = range(1, 24)

    X = []
    y = []
    ids_test = []
    label_count = []

    n_basis = 10
    all_U0 = []
    all_U2 = []
    for n, subject in enumerate(all_subjects):
        if subject < 17:
            filename = 'data/train_subject%02d.mat' % subject
        else:
            filename = 'data/test_subject%02d.mat' % subject
        print("Loading", filename)
        data = loadmat(filename, squeeze_me=True)
        XX = window_filter_baseline(data['X'])
        mask = get_outlier_mask(XX)
        T = dtensor(XX)
        r = cp_als(T, rank=n_basis)
        U0 = r[0].U[0]
        U1 = r[0].U[1]
        U2 = r[0].U[2]
        X.append(XX)
        all_U0.append(U0)
        all_U2.append(U2)
        if subject < 17:
            yy = data['y'].ravel()
            y.append(yy)
        else:
            ids = data['Id']
            ids_test.append(ids)
        label_count += [subject] * len(XX)

    all_U0 = np.vstack(all_U0)
    all_U2 = np.vstack(all_U2)
    X = np.vstack(X)
    y = np.concatenate(y)

    cv = StratifiedShuffleSplit(yy, n_iter=50, test_size=.1)
    selection_pipe = Pipeline([('scaler', StandardScaler()),
                               ('estimator', LassoLarsCV(cv=cv))])
    selection_pipe.fit(all_U0[:y.shape[0]], y * 2 - 1)
    weights = selection_pipe.steps[1][1].coef_
    selected = np.where(weights != 0)[0]
    proj = all_U2[:, selected].T
    ids_test = np.concatenate(ids_test)
    from IPython import embed; embed()
    return np.dot(X, proj), y, ids_test, label_count
Ejemplo n.º 10
0
def decompose_tensor(filters):
    """ filters is of type input feature maps, output feature maps, wxh of filter
        Output is a structure P which contains lambda, U{1}, U{2}, U{3}    
    """
    # Set logging to DEBUG to see CP-ALS information
    logging.basicConfig(level=logging.DEBUG)
    print filters.shape
    filters = np.array(filters)   
    print filters.shape 
    print filters.dtype
    nbr_filters = filters.shape[0]
    fwidth = filters.shape[2]
    fheight = filters.shape[3]
    Pstruct = []
    for chanel in range(filters.shape[1]):
        filter_for_channel = filters[:,chanel,:,:]
        filter_for_channel.reshape(nbr_filters, fwidth, fheight)
        filter_for_channel = np.swapaxes(filter_for_channel, 0,2);
        print 'Number of filters ', nbr_filters
        print 'filter_for channel shape ', filter_for_channel.shape
        fig, axes = plt.subplots(nrows=5, ncols=4)
        fig.tight_layout() 
        
        for f in xrange(nbr_filters):
            img = filter_for_channel[:,:,f]
            plt.subplot(5,4,f)
            plt.imshow(img)
        plt.show(block=False)
        T  = dtensor(filter_for_channel);
        rank = np.floor(nbr_filters*0.6);
        print 'rank is ', rank
        session = pymatlab.session_factory()
        session.putvalue('A',rank)
        del session
        ## P.U, P.lmbda
        print 'P U0,U1,U2, lambda sizes: ', P.U[0].size, P.U[1].size, P.U[2].size, P.lmbda
        print 'fit was ', fit        
        Pstruct.append(P)
        #dtensor(ktensor(U).toarray())
        print np.allclose(T, P.totensor())
    
    
    U = [np.random.rand(i,3) for i in (20, 10, 14)]
    
    Tn = dtensor(ktensor(U).toarray())
    P, fit, itr, _ = cp_als(Tn, 10)
    print 'P U0,U1,U2, lambda sizes: ', P.U[0].size, P.U[1].size, P.U[2].size, P.lmbda
    print 'fit was ', fit  
    print np.allclose(Tn, P.totensor())
    
    return Pstruct
def approx_CP_R(value, R):
    if value.ndim < 2:
        return value
    T = dtensor(value)
    P, fit, itr, exetimes = cp_als(T, R, init='random')
    Y = None
    for i in range(R):
        y = P.lmbda[i]
        o = None
        for l in range(T.ndim):
            o = P.U[l][:,i] if o is None else np.outer(o, P.U[l][:,i])
        y = y * o
        Y = y if Y is None else Y+y
    return Y
Ejemplo n.º 12
0
def corcondia(tensor, components=1):
    k = components
    X = tensor
    X_approx_ks, fit, itr, exectimes = cp_als(X, k, init='random')

    A = X_approx_ks.U[0]
    B = X_approx_ks.U[1]
    C = X_approx_ks.U[2]

    x = X_approx_ks.totensor()

    Ua, Sa, Va = np.linalg.svd(A)
    Ub, Sb, Vb = np.linalg.svd(B)
    Uc, Sc, Vc = np.linalg.svd(C)

    SaI = np.zeros((Ua.shape[0], Va.shape[0]), float)
    np.fill_diagonal(SaI, Sa)

    SbI = np.zeros((Ub.shape[0], Vb.shape[0]), float)
    np.fill_diagonal(SbI, Sb)

    ScI = np.zeros((Uc.shape[0], Vc.shape[0]), float)
    np.fill_diagonal(ScI, Sc)

    SaI = np.linalg.pinv(SaI)
    SbI = np.linalg.pinv(SbI)
    ScI = np.linalg.pinv(ScI)

    y = kronecker([Ua.transpose(), Ub.transpose(), Uc.transpose()], x)
    z = kronecker([SaI, SbI, ScI], y)
    G = kronecker([Va.transpose(), Vb.transpose(), Vc.transpose()], z)

    # print G

    C = np.full((k, k, k), 0)
    for i in range(k):
        for j in range(k):
            for l in range(k):
                if i == j == l:
                    C[i][j][l] = 1

    c = 0
    for i in range(k):
        for j in range(k):
            for l in range(k):
                c += float(G[i][j][l] - C[i][j][l])**2.0

    cc = 100 * (1 - (c / float(k)))

    return round(cc)
Ejemplo n.º 13
0
    def get_cp_factors(self):

        if '__getitem__' in dir(self.layer):
            f_cout_old, f_cin_old, f_h_old, f_w_old = self.weight

            f_cout_old = np.array(f_cout_old)
            f_cin_old = np.array(f_cin_old)
            f_h_old = np.array(f_h_old)
            f_w_old = np.array(f_w_old)

            bias = self.bias

            f_cout, f_cin, f_h, f_w = recompress_ncpd_tensor(
                [f_cout_old, f_cin_old, f_h_old, f_w_old],
                new_rank=self.rank,
                max_cycle=500,
                return_fit=False,
                tensor_format='cpd')

        else:
            if self.weight.is_cuda:
                self.weight = self.weight.cpu()
                if self.bias is not None:
                    self.bias = self.bias.cpu()
            weights = dtensor(self.weight)
            bias = self.bias

            T = dtensor(weights)
            P, fit, itr, exectimes = cp_als(T, self.rank, init='random')

            f_w = (np.array(P.U[3]) * (P.lmbda))
            f_h = np.array(P.U[2])
            f_cin = np.array(P.U[1])
            f_cout = np.array(P.U[0])

        f_h = torch.FloatTensor(
            np.reshape(f_h.T,
                       (self.rank, 1, self.kernel_size[0], 1))).contiguous()
        f_w = torch.FloatTensor(
            np.reshape(f_w.T,
                       [self.rank, 1, 1, self.kernel_size[1]])).contiguous()
        f_cin = torch.FloatTensor(
            np.reshape(f_cin.T, [self.rank, self.cin, 1, 1])).contiguous()
        f_cout = torch.FloatTensor(
            np.reshape(f_cout, [self.cout, self.rank, 1, 1])).contiguous()

        return [f_cin, f_h, f_w, f_cout], [None, None, None, bias]
Ejemplo n.º 14
0
def prepare_models(LAYER, R, NET_PATH, NET_NAME, INPUT_DIM):
    PATH = NET_PATH
    NET_PREFIX = PATH + NET_NAME
    input_dim = INPUT_DIM

    model = load_model(NET_PREFIX + '.prototxt')
    ind = find_layer_by_name(model, LAYER)
    new_model = accelerate_model(model, ind, R)
    save_model(new_model, NET_PREFIX + '_accelerated.prototxt')
    new_deploy = create_deploy_model(new_model, input_dim)
    save_model(new_deploy, NET_PREFIX + '_accelerated_deploy.prototxt')
    deploy = create_deploy_model(model, input_dim)
    save_model(deploy, NET_PREFIX + '_deploy.prototxt')

    net = caffe.Classifier(NET_PREFIX + '_deploy.prototxt',
                           NET_PREFIX + '.caffemodel')
    fast_net = caffe.Classifier(NET_PREFIX + '_accelerated_deploy.prototxt',
                                NET_PREFIX + '.caffemodel')

    l = ind - 1  #layer index in deploy version
    weights = net.layers[l].blobs[0].data
    bias = net.layers[l].blobs[1]

    T = dtensor(weights)
    P, fit, itr, exectimes = cp_als(T, R, init='random')
    f_x = (np.array(P.U[3]) * (P.lmbda)).T
    f_y = np.array(P.U[2]).T
    f_c = np.array(P.U[1]).T
    f_n = np.array(P.U[0])

    n = model.layer[ind].convolution_param.num_output
    d = model.layer[ind].convolution_param.kernel_size[0]
    c = weights.shape[1]

    f_y = np.reshape(f_y, [R, 1, d, 1])
    f_x = np.reshape(f_x, [R, 1, 1, d])
    f_c = np.reshape(f_c, [R, c, 1, 1])
    f_n = np.reshape(f_n, [n, R, 1, 1])

    np.copyto(fast_net.layers[l].blobs[0].data, f_c)
    np.copyto(fast_net.layers[l + 1].blobs[0].data, f_y)
    np.copyto(fast_net.layers[l + 2].blobs[0].data, f_x)
    np.copyto(fast_net.layers[l + 3].blobs[0].data, f_n)
    np.copyto(fast_net.layers[l + 3].blobs[1].data, bias.data)

    fast_net.save(NET_PREFIX + '_accelerated.caffemodel')
Ejemplo n.º 15
0
Archivo: CP.py Proyecto: ALSAREM/ENsEN
 def claculateCP(self,T,rank):
     logging.warning("claculateCP")
     P, fit, itr, exectimes = cp_als (T, rank, init='random', fit_method='full')
     logging.warning("*****************Tensor*****************************")
     logging.warning(T.shape)
     logging.warning("******************** U1 represent the subjects *************************")
     logging.warning( P.U[0].shape)
     #print(P.U[0])
     logging.warning("********************U2 represent the objects **************************")
     logging.warning(P.U[1].shape)
     #print(P.U[1])
     logging.warning("********************U3 represent the predicats **************************")
     logging.warning( P.U[2].shape)
     #print(P.U[2])
     logging.warning("********************L represent the subjects **************************")
     #print(P.lmbda)
     logging.warning("Done with "+itr.__str__()+" iterations")
     return P
Ejemplo n.º 16
0
def cp_conv(weights, img, iter, w_shp, size, N_i, N_j, rank):

    # Define parameters
    (F, C, X, Y) = w_shp
    (N, C, H, W) = size

    # Instantiate 4D tensor for input
    input = T.tensor4(name='input')

    # Initialize shared variable for weights.
    weights = weights.eval()
    W_tensor =  dtensor(weights)

    # Apply CP-Decomposition on the clustered weight tensor
    P, fit, itr, exectimes = cp_als(W_tensor, rank, init='random')

    output = None
    for k in range(rank):

        T_F = theano.shared(np.reshape(P.U[0][:,k], (F, 1, 1, 1)), name='F_{0}'.format(k))
        T_C = theano.shared(np.reshape(P.U[1][:,k], (1, C, 1, 1)), name='C_{0}'.format(k))
        T_X = theano.shared(np.reshape(P.U[2][:,k], (1, 1, X, 1)), name='X_{0}'.format(k))
        T_Y = theano.shared(np.reshape(P.U[3][:,k], (1, 1, 1, Y)), name='Y_{0}'.format(k))

        # Apply convolution on each dimension individually
        conv_C = conv.conv2d(input, T_C)
        conv_X = conv.conv2d(conv_C, T_X)
        conv_Y = conv.conv2d(conv_X, T_Y)
        conv_F = conv.conv2d(conv_Y, T_F)

        output = output + conv_F if output else conv_F

    # Map Theano function
    f = theano.function([input], output, profile=False)

    # Execute Theano function
    times = []
    for i in range(iter):
        start = time.time()
        filtered_img = f(img)
        done = time.time()
        times.append(done-start)
    avg1 = np.mean(times)
    return filtered_img, avg1
Ejemplo n.º 17
0
def prepare_models(LAYER, R, NET_PATH, NET_NAME, INPUT_DIM):
    PATH = NET_PATH
    NET_PREFIX = PATH + NET_NAME
    input_dim = INPUT_DIM
    
    model = load_model(NET_PREFIX + '.prototxt')
    ind = find_layer_by_name(model, LAYER)
    new_model = accelerate_model(model, ind, R)
    save_model(new_model, NET_PREFIX + '_accelerated.prototxt')
    new_deploy = create_deploy_model(new_model, input_dim)
    save_model(new_deploy, NET_PREFIX + '_accelerated_deploy.prototxt')
    deploy = create_deploy_model(model, input_dim)
    save_model(deploy, NET_PREFIX + '_deploy.prototxt')

    net = caffe.Classifier(NET_PREFIX + '_deploy.prototxt', NET_PREFIX + '.caffemodel')
    fast_net = caffe.Classifier(NET_PREFIX + '_accelerated_deploy.prototxt', NET_PREFIX + '.caffemodel')

    l = ind - 1#layer index in deploy version
    weights = net.layers[l].blobs[0].data
    bias = net.layers[l].blobs[1]

    T = dtensor(weights)
    P, fit, itr, exectimes = cp_als(T, R, init='random')
    f_x = (np.array(P.U[3])*(P.lmbda)).T
    f_y = np.array(P.U[2]).T
    f_c = np.array(P.U[1]).T
    f_n = np.array(P.U[0]) 

    n = model.layer[ind].convolution_param.num_output # OUTPUT
    d = model.layer[ind].convolution_param.kernel_size[0] # KERNEL SIZE
    c = weights.shape[1] # INPUT 
    
    f_c = np.reshape(f_c, [R, c, 1, 1]) # 1
    f_y = np.reshape(f_y, [R, 1, d, 1]) # 2
    f_x = np.reshape(f_x, [R, 1, 1, d]) # 3
    f_n = np.reshape(f_n, [n, R, 1, 1]) # 4 

    np.copyto(fast_net.layers[l].blobs[0].data, f_c)
    np.copyto(fast_net.layers[l+1].blobs[0].data, f_y)
    np.copyto(fast_net.layers[l+2].blobs[0].data, f_x)
    np.copyto(fast_net.layers[l+3].blobs[0].data, f_n)
    np.copyto(fast_net.layers[l+3].blobs[1].data, bias.data)

    fast_net.save(NET_PREFIX + '_accelerated.caffemodel')
Ejemplo n.º 18
0
    def tcca(self, Views, var_matrix, cov_ten, reduce_to_dim):

        var_matrix_inverse = list()
        for v in range(self.number_of_views):
            var_matrix_inverse.append(
                self.root_inverse(var_matrix[v]) +
                np.eye(var_matrix[v].shape[0]))

        M_ten = self.ttm(cov_ten, var_matrix_inverse)
        M_ten = dtensor(M_ten)
        P, fit, itr = cp_als(M_ten, reduce_to_dim, max_iter=self.max_iter)

        H = list()
        for v in range(self.number_of_views):
            H.append(np.dot(var_matrix_inverse[v], P.U[v]))

        self.H = H

        return H
Ejemplo n.º 19
0
def cp_decomposition(weights, bias, rank):

    # Define parameters
    (F, C, X, Y) = weights.shape

    # Initialize shared variable for weights.
    W_tensor =  dtensor(weights)

    # Apply CP-Decomposition on the clustered weight tensor
    P, fit, itr, exectimes = cp_als(W_tensor, rank, init='random')

    output = []
    for k in range(rank):

        T_F = np.reshape(P.U[0][:,k], (F, 1, 1, 1))
        T_C = np.reshape(P.U[1][:,k], (1, C, 1, 1))
        T_X = np.reshape(P.U[2][:,k], (1, 1, X, 1))
        T_Y = np.reshape(P.U[3][:,k], (1, 1, 1, Y))
        output.append([T_C, T_X, T_Y, T_F, bias])
    return output
Ejemplo n.º 20
0
def get_basis(labels, data, n_components):
    # Generate decompositions
    print("Performing tensor decomposition of training data.")
    all_basis = []
    for n in np.unique(labels):
        idx = np.where(labels == n)[0]
        X = data[idx]
        grad1, grad2, mag1 = separate_grad_mag(X)
        grad = np.concatenate((grad1, grad2))
        # Magnetometers look real rough
        for idx in [grad, mag1]:
            Xi = X[:, idx, :]
            r = cp_als(dtensor(Xi), n_components, init="nvecs")
            r_good_idx = drop_fifty_and_ten_hz(r[0].U[2])
            basis = r[0].U[2][:, r_good_idx]
            all_basis.append(basis)

    basis = np.hstack(all_basis)
    del all_basis
    return basis
Ejemplo n.º 21
0
def get_UDV_decomposition(W, method='svd'):
    # current implementation is svd
    c_out, khkw, c_in = W.shape
    method = method.lower()
    with torch.no_grad():
        if method == 'svd':
            m_W = W.mean(1)
            U, _, V = torch.svd(m_W)
            D = []
            for r in range(khkw):
                W_r = W[:, r, :]  # c_out * c_in  ->
                c = min(c_out, c_in)
                D_w = torch.diag(U.t() @ W_r @ V).view(c, 1)
                D.append(D_w)
            S = torch.stack(D, dim=1)
        elif method == 'svd_avg':
            pass
        elif method == 'als':
            # m = min(c_out, c_in)
            # U: c_out * m
            # S: k^2 * m
            # V: c_in * m
            rank = min(c_out, c_in)

            tic = time.clock()
            T = dtensor(W.data.cpu().numpy())
            P, fit, itr, exectimes = cp_als(T, rank, init='random')
            U = np.array(P.U[0])  # c_out * rank
            S = np.array(P.U[1]).T  # k^2 * rank --> rank * k^2
            V = np.array(P.U[2] * P.lmbda)  # c_in * rank
            print('CP decomposition done. It cost %.5f secs. fit: %f' % (time.clock() - tic, fit[0]))
            V = torch.FloatTensor(V).cuda()
            S = torch.FloatTensor(S).cuda()
            U = torch.FloatTensor(U).cuda()

        else:
            raise NotImplementedError("Method {} not supported!".format(method))

    return U, S, V
Ejemplo n.º 22
0
def valid_3ord():
    print('\nCorrectness benchmark for 3 order tensor CP decomposition.\n')
    
    shape = (2, 3, 4)
    max_iter = 30
    print('----------TensorNP----------')
    norm_errors = 0
    for _ in range(max_iter):
        tensor = tnp.randn(2, 3, 4)
        factors, lamda = tnp.cp(tensor, r=3, stop_iter=500, tol=1e-5, normalize_factor=True)
        rec_tensor = tnp.reconstruct_cp(factors, lamda, shape)
        norm_error = tnp.linalg.norm(rec_tensor - tensor) / tnp.linalg.norm(tensor)
        norm_errors += norm_error
    print(f'error ({norm_errors/max_iter})')

    print('----------scikit-tensor----------')
    norm_errors = 0
    for _ in range(max_iter):
        tensor = tnp.randn(2, 3, 4)
        skt_tensor = skt.dtensor(tensor)
        P, _, _ = skt.cp_als(skt_tensor, rank=3, init='random')
        rec_tensor = P.toarray()
        norm_error = tnp.linalg.norm(rec_tensor - skt_tensor) / tnp.linalg.norm(tensor)
        norm_errors += norm_error
    print(f'error ({norm_errors/max_iter})')


    print('----------Tensorly----------')
    norm_errors = 0
    for _ in range(max_iter):
        tensor = tnp.randn(2, 3, 4)
        tl_tensor = tl.tensor(tensor)
        cp_tensor = tensorly.decomposition.parafac(
            tl_tensor, rank=3, n_iter_max=500, tol=1e-6, normalize_factors=True, init='random')
        rec_tensor = tnp.reconstruct_cp(cp_tensor.factors, cp_tensor.weights, shape)
        norm_error = tnp.linalg.norm(rec_tensor - tensor) / tnp.linalg.norm(tensor)
        norm_errors += norm_error
    print(f'error ({norm_errors/max_iter})')
Ejemplo n.º 23
0
#!/usr/bin/env python

import logging
from scipy.io.matlab import loadmat
from sktensor import dtensor, cp_als

# Set logging to DEBUG to see CP-ALS information
logging.basicConfig(level=logging.DEBUG)

# Load Matlab data and convert it to dense tensor format
mat = loadmat('../data/sensory-bread/brod.mat')
T = dtensor(mat['X'])

# Decompose tensor using CP-ALS
P, fit, itr, exectimes = cp_als(T, 3, init='random')
all_scores = []


for subject in train_subject_names:
    print subject
    f = loadmat(data_dir / subject)
    X = f['X'][:, 160:, 125:250]
    y = f['y'].ravel() * 2 - 1

    cv = StratifiedShuffleSplit(y, n_iter=50, test_size=.1)

    pipeline = Pipeline([('scaler', StandardScaler()),
                     ('estimator', LassoLarsCV(cv=cv))])

    T = dtensor(X)
    r = cp_als(T, rank=10)

    sample_axes = r[0].U[0]

    pipeline.fit(sample_axes, y)
    weights = pipeline.steps[1][1].coef_
    all_weights.append(weights)

    selected = np.where(weights != 0)[0]
    all_selected_timecourses.append(r[0].U[2][:, selected])

    pipeline_global_eval = Pipeline([
            ('scaler', StandardScaler()),
            ('estimator', LogisticRegression(C=1., penalty="l1"))])
    global_scores = cross_val_score(pipeline_global_eval,
                                    sample_axes, y, cv=cv,
Ejemplo n.º 25
0
def task1cFunc(userid):
    # read mltags, mlrating, mlmovies, movie-actor
    mltagsFile = pd.read_csv('mltags.csv')
    mlratingsFile = pd.read_csv('mlratings.csv')
    genomeFile = pd.read_csv('genome-tags.csv')
    movieFile = pd.read_csv('smallmlmovies.csv')

    # Exgtract tag from tagid
    genomeFile['tagid'] = genomeFile['tagId']
    del genomeFile['tagId']
    mltagsFile = pd.merge(mltagsFile, genomeFile, on='tagid')

    s = movieFile["genres"].str.split('|', expand=True).stack()
    i = s.index.get_level_values(0)
    movieFile = movieFile.loc[i].copy()
    movieFile["genres"] = s.values

    # Extract movie from movieid
    del movieFile['year']
    mlratingsFile = pd.merge(mlratingsFile, movieFile, on='movieid')
    mltagsFile = pd.merge(mltagsFile, movieFile, on='movieid')

    mltagsFileUser = mltagsFile.loc[mltagsFile['userid'] == userid]
    tagUserMovies = mltagsFileUser['moviename'].values
    mlratingsFileUser = mlratingsFile.loc[mlratingsFile['userid'] == userid]
    ratingUserMovies = mlratingsFileUser['moviename'].values
    tagRatingUserMovies = list(set(tagUserMovies) | set(ratingUserMovies))

    mltagsFileUser['timestamp'] = pd.to_datetime(mltagsFileUser['timestamp'])
    mltagsFileUser['timestamp'] = (mltagsFileUser['timestamp'] -
                                   dt.datetime(1970, 1, 1)).dt.total_seconds()
    mltagsFileUser['timestamp'] = \
        ((mltagsFileUser['timestamp'] - mltagsFileUser['timestamp'].min()) / (mltagsFileUser['timestamp'].max() - mltagsFileUser['timestamp'].min()+1))+1

    mlratingsFileUser['timestamp'] = pd.to_datetime(
        mlratingsFileUser['timestamp'])
    mlratingsFileUser['timestamp'] = (
        mlratingsFileUser['timestamp'] -
        dt.datetime(1970, 1, 1)).dt.total_seconds()
    mlratingsFileUser['timestamp'] = \
        ((mlratingsFileUser['timestamp'] - mlratingsFileUser['timestamp'].min()) / (mlratingsFileUser['timestamp'].max() - mlratingsFileUser['timestamp'].min()+1))+1

    commonTagRating = list(set(tagUserMovies) & set(ratingUserMovies))
    uncommonTag = list(set(tagUserMovies) ^ set(commonTagRating))
    uncommonRating = list(set(ratingUserMovies) ^ set(commonTagRating))

    timeWeights = {}
    for i in range(len(commonTagRating)):
        tag = mltagsFileUser.loc[mltagsFileUser['moviename'] ==
                                 commonTagRating[i]]['timestamp'].values[0]
        rating = mlratingsFileUser.loc[
            mlratingsFileUser['moviename'] ==
            commonTagRating[i]]['timestamp'].values[0]
        if tag > rating:
            timeWeights[commonTagRating[i]] = tag
        else:
            timeWeights[commonTagRating[i]] = rating

    for i in range(len(uncommonRating)):
        rating = mlratingsFileUser.loc[
            mlratingsFileUser['moviename'] ==
            uncommonRating[i]]['timestamp'].values[0]
        timeWeights[uncommonRating[i]] = rating

    for i in range(len(uncommonTag)):
        tag = mltagsFileUser.loc[mltagsFileUser['moviename'] ==
                                 uncommonTag[i]]['timestamp'].values[0]
        timeWeights[uncommonTag[i]] = tag
    #
    # deleting columns that are not required
    del mlratingsFile['timestamp']
    del mlratingsFile['imdbid']
    del mlratingsFile['userid']
    del mltagsFile['timestamp']
    del mltagsFile['userid']
    del mltagsFile['tagid']

    # creating a dictionary with movieid as key and a list of all tags associated with tha movie and removing duplicates
    movieGenreDict = {
        k: g['genres'].tolist()
        for k, g in movieFile.groupby('moviename')
    }
    movieGenreDict = {k: list(set(j)) for k, j in movieGenreDict.items()}

    # creating a dictionary with movieid as key and a list of all ratings given by a user for that particular movie and removing duplicates
    movieRatingDict = {
        k: g['rating'].tolist()
        for k, g in mlratingsFile.groupby('moviename')
    }
    movieRatingDict = {k: list(set(j)) for k, j in movieRatingDict.items()}

    # computing the average rating for all movies and storing in a dictionary
    avgRating = mlratingsFile.groupby('moviename').mean().reset_index()
    avgRatingDict = {
        k: g['rating'].tolist()
        for k, g in avgRating.groupby('moviename')
    }

    # List of unique movies, genres and ratings
    movieList = mlratingsFile.moviename.unique()
    movieList = np.asarray(movieList)
    movieListDict = dict(enumerate(movieList))
    genreList = movieFile.genres.unique()
    genreList = np.asarray(genreList)
    genreListDict = dict(enumerate(genreList))
    ratingList = mlratingsFile.rating.unique()
    ratingList = np.asarray(ratingList)
    ratingListDict = dict(enumerate(ratingList))

    movieListDictInverse = invertDictionary(movieListDict)
    genreListDictInverse = invertDictionary(genreListDict)
    ratingListDictInverse = invertDictionary(ratingListDict)

    movieNotWatched = list(set(movieList) ^ set(tagRatingUserMovies))

    # declaring a tensor with three modes - with movie, tags and ratings
    T = np.zeros((movieList.shape[0], genreList.shape[0], ratingList.shape[0]))
    arrayofvalues = []

    for i in movieList:
        if i in movieRatingDict:
            if i in movieGenreDict:
                movieTags = movieGenreDict[i]
                rList = movieRatingDict[i]
                for j in movieTags:
                    for k in rList:
                        mIndex = movieListDictInverse[i]
                        gIndex = genreListDictInverse[j]
                        rIndex = ratingListDictInverse[k]
                        avgRatingValue = avgRatingDict[i][0]
                        if k >= avgRatingValue:
                            T[mIndex, gIndex, rIndex] = 1
                            arrayofvalues.append([mIndex, gIndex, rIndex])
                        else:
                            T[mIndex, gIndex, rIndex] = 0

    # building the tensor using sktensor
    tensor = dtensor(T)

    # applying CP-decomposition with ALS(Alternating Least Squares)
    U, fit, itr, exectimes, P = cp_als(tensor, 5, init='random')

    latent_semantics_movie = pd.DataFrame(
        columns=['movie', 'ls1', 'ls2', 'ls3', 'ls4', 'ls5'])
    latent_semantics_movie['movie'] = movieList
    latent_semantics_movie['ls1'] = U[0][:, 0]
    latent_semantics_movie['ls2'] = U[0][:, 1]
    latent_semantics_movie['ls3'] = U[0][:, 2]
    latent_semantics_movie['ls4'] = U[0][:, 3]
    latent_semantics_movie['ls5'] = U[0][:, 4]

    x = latent_semantics_movie.loc[latent_semantics_movie['movie'].isin(
        tagRatingUserMovies)].values
    for i in range(len(x)):
        for j in range(1, len(x[0])):
            x[i][j] = x[i][j] * timeWeights.get(x[i][0])
    y = latent_semantics_movie.loc[latent_semantics_movie['movie'].isin(
        movieNotWatched)].values

    cossim = cosine_similarity(x[:, 1:], y[:, 1:])
    simDF = pd.DataFrame(cossim,
                         index=tagRatingUserMovies,
                         columns=movieNotWatched)
    simDF.to_csv('cos.csv')

    temp = simDF.values.tolist()
    sorted_movies_for_each_watched_movieDict = []
    for i in range(len(temp)):
        sorted_movies_for_each_watched_movie = np.argsort(temp[i])
        sorted_movies_for_each_watched_movieDict.append(
            sorted_movies_for_each_watched_movie.tolist()[:10])

    sortedMoviesRavel = [
        item for sublist in sorted_movies_for_each_watched_movieDict
        for item in sublist
    ]
    freq = {}
    for i in range(len(sorted_movies_for_each_watched_movieDict)):
        for j in range(len(sorted_movies_for_each_watched_movieDict[0])):
            freq[sorted_movies_for_each_watched_movieDict[i][j]] = 0

    for i in range(len(sorted_movies_for_each_watched_movieDict)):
        for j in range(len(sorted_movies_for_each_watched_movieDict[0])):
            freq[sorted_movies_for_each_watched_movieDict[i][j]] += (10 - j)

    freq = OrderedDict(sorted(freq.items(), reverse=True, key=lambda t: t[1]))
    freq = freq.items()

    recommendedMovies = []
    for i in range(5):
        index = freq[i][0]
        recommendedMovies.append(y[index][0])

    relevant = []
    notRelevant = []

    choice = 'y'
    while choice != 'n':
        rel_dict = {}
        selected_dict = {}
        N = 5
        R = 0
        for i in range(len(recommendedMovies)):
            print "If ", recommendedMovies[
                i], " is relevant, enter 1. If it is not relevant, enter 0"
            relevant.append(int(raw_input()))
            rel_dict[recommendedMovies[i]] = relevant[i]
            if relevant[i] == 1:
                R = R + 1
            else:
                notRelevant.append(recommendedMovies[i])

        genreset = set()
        for movie in recommendedMovies:
            genres_list = movieGenreDict[movie]
            selected_dict[movie] = genres_list
            genreset = genreset.union(set(genres_list))

        genreTop5 = list(genreset)
        ri = []
        ni = []
        for i in range(0, len(genreTop5)):
            ri.append(0)
            ni.append(0)
        for m in recommendedMovies:
            for i in range(0, len(genreTop5)):
                l1 = selected_dict[m]
                rval = rel_dict[m]
                if genreTop5[i] in l1:
                    ni[i] = ni[i] + 1
                    if rval == 1:
                        ri[i] = ri[i] + 1

        pr_feedback = {}

        for i in range(0, len(genreTop5)):
            try:
                numerator = ri[i] / (R - ri[i])
                denominator = (ni[i] - ri[i]) / (N - R - ni[i] + ri[i])
                pr = math.log((numerator / denominator), 2)
            except:
                numerator = (ri[i] + 0.5) / (R - ri[i] + 1)
                denominator = (ni[i] - ri[i] + 0.5) / (N - R - ni[i] + ri[i] +
                                                       1)
                pr = math.log((numerator / denominator), 2)

            pr_feedback[genreTop5[i]] = pr

        for key, value in pr_feedback.iteritems():
            pr_feedback[key] = (pr_feedback[key] - min(
                pr_feedback.values())) / max(pr_feedback.values())

        pr_dict = {}
        for i in movieList:
            if i in movieRatingDict:
                if i in movieGenreDict:
                    movieTags = movieGenreDict[i]
                    rList = movieRatingDict[i]
                    for j in movieTags:
                        for k in rList:
                            mIndex = movieListDictInverse[i]
                            tIndex = genreListDictInverse[j]
                            rIndex = ratingListDictInverse[k]
                            avgRatingValue = avgRatingDict[i][0]
                            if k >= avgRatingValue:
                                if j in genreTop5:
                                    T[mIndex, tIndex, rIndex] *= pr_feedback[j]

        tensor = dtensor(T)

        # applying CP-decomposition with ALS(Alternating Least Squares)
        U, fit, itr, exectimes, P = cp_als(tensor, 5, init='random')

        latent_semantics_movie = pd.DataFrame(
            columns=['movie', 'ls1', 'ls2', 'ls3', 'ls4', 'ls5'])
        latent_semantics_movie['movie'] = movieList
        latent_semantics_movie['ls1'] = U[0][:, 0]
        latent_semantics_movie['ls2'] = U[0][:, 1]
        latent_semantics_movie['ls3'] = U[0][:, 2]
        latent_semantics_movie['ls4'] = U[0][:, 3]
        latent_semantics_movie['ls5'] = U[0][:, 4]

        x = latent_semantics_movie.loc[latent_semantics_movie['movie'].isin(
            tagRatingUserMovies)].values
        for i in range(len(x)):
            for j in range(1, len(x[0])):
                x[i][j] = x[i][j] * timeWeights.get(x[i][0])
        y = latent_semantics_movie.loc[latent_semantics_movie['movie'].isin(
            movieNotWatched)].values
        cossim = cosine_similarity(x[:, 1:], y[:, 1:])
        simDF = pd.DataFrame(cossim,
                             index=tagRatingUserMovies,
                             columns=movieNotWatched)

        temp = simDF.values.tolist()
        sorted_movies_for_each_watched_movieDict = []
        for i in range(len(temp)):
            sorted_movies_for_each_watched_movie = np.argsort(temp[i])
            sorted_movies_for_each_watched_movieDict.append(
                sorted_movies_for_each_watched_movie.tolist()[:10])

        sortedMoviesRavel = [
            item for sublist in sorted_movies_for_each_watched_movieDict
            for item in sublist
        ]
        freq = {}
        for i in range(len(sorted_movies_for_each_watched_movieDict)):
            for j in range(len(sorted_movies_for_each_watched_movieDict[0])):
                freq[sorted_movies_for_each_watched_movieDict[i][j]] = 0

        for i in range(len(sorted_movies_for_each_watched_movieDict)):
            for j in range(len(sorted_movies_for_each_watched_movieDict[0])):
                freq[sorted_movies_for_each_watched_movieDict[i][j]] += (10 -
                                                                         j)

        freq = OrderedDict(
            sorted(freq.items(), reverse=True, key=lambda t: t[1]))
        freq = freq.items()

        recommendedMovies = []
        for i in range(5):
            index = freq[i][0]
            recommendedMovies.append(y[index][0])
        print recommendedMovies
        relevant = []

        print('Do you want to continue? Enter Y for yes and N for No')
        choice = raw_input()
        while choice not in ['y', 'n']:
            print('invalid input')
            choice = input()
Ejemplo n.º 26
0
def task2c(recommendedMovies, T):
    relevant = []
    notRelevant = []

    choice = 'y'
    while choice != 'n':
        rel_dict = {}
        selected_dict = {}
        N = 5
        R = 0
        for i in range(len(recommendedMovies)):
            print "If ", recommendedMovies[
                i], " is relevant, enter 1. If it is not relevant, enter 0"
            relevant.append(int(raw_input()))
            rel_dict[recommendedMovies[i]] = relevant[i]
            if relevant[i] == 1:
                R = R + 1
            else:
                notRelevant.append(recommendedMovies[i])

        genreset = set()
        for movie in recommendedMovies:
            genres_list = movieGenreDict[movie]
            selected_dict[movie] = genres_list
            genreset = genreset.union(set(genres_list))

        genreTop5 = list(genreset)
        ri = []
        ni = []
        for i in range(0, len(genreTop5)):
            ri.append(0)
            ni.append(0)
        for m in recommendedMovies:
            for i in range(0, len(genreTop5)):
                l1 = selected_dict[m]
                rval = rel_dict[m]
                if genreTop5[i] in l1:
                    ni[i] = ni[i] + 1
                    if rval == 1:
                        ri[i] = ri[i] + 1

        pr_feedback = {}

        for i in range(0, len(genreTop5)):
            try:
                numerator = ri[i] / (R - ri[i])
                denominator = (ni[i] - ri[i]) / (N - R - ni[i] + ri[i])
                pr = math.log((numerator / denominator), 2)
            except:
                numerator = (ri[i] + 0.5) / (R - ri[i] + 1)
                denominator = (ni[i] - ri[i] + 0.5) / (N - R - ni[i] + ri[i] +
                                                       1)
                pr = math.log((numerator / denominator), 2)

            pr_feedback[genreTop5[i]] = pr

        for key, value in pr_feedback.iteritems():
            pr_feedback[key] = (pr_feedback[key] - min(
                pr_feedback.values())) / max(pr_feedback.values())

        pr_dict = {}
        for i in movieList:
            if i in movieRatingDict:
                if i in movieGenreDict:
                    movieTags = movieGenreDict[i]
                    rList = movieRatingDict[i]
                    for j in movieTags:
                        for k in rList:
                            mIndex = movieListDictInverse[i]
                            tIndex = genreListDictInverse[j]
                            rIndex = ratingListDictInverse[k]
                            avgRatingValue = avgRatingDict[i][0]
                            if k >= avgRatingValue:
                                if j in genreTop5:
                                    T[mIndex, tIndex, rIndex] *= pr_feedback[j]

        tensor = dtensor(T)

        # applying CP-decomposition with ALS(Alternating Least Squares)
        U, fit, itr, exectimes, P = cp_als(tensor, 5, init='random')

        latent_semantics_movie = pd.DataFrame(
            columns=['movie', 'ls1', 'ls2', 'ls3', 'ls4', 'ls5'])
        latent_semantics_movie['movie'] = movieList
        latent_semantics_movie['ls1'] = U[0][:, 0]
        latent_semantics_movie['ls2'] = U[0][:, 1]
        latent_semantics_movie['ls3'] = U[0][:, 2]
        latent_semantics_movie['ls4'] = U[0][:, 3]
        latent_semantics_movie['ls5'] = U[0][:, 4]

        x = latent_semantics_movie.loc[latent_semantics_movie['movie'].isin(
            tagRatingUserMovies)].values
        for i in range(len(x)):
            for j in range(1, len(x[0])):
                x[i][j] = x[i][j] * timeWeights.get(x[i][0])
        y = latent_semantics_movie.loc[latent_semantics_movie['movie'].isin(
            movieNotWatched)].values
        cossim = cosine_similarity(x[:, 1:], y[:, 1:])
        simDF = pd.DataFrame(cossim,
                             index=tagRatingUserMovies,
                             columns=movieNotWatched)

        temp = simDF.values.tolist()
        sorted_movies_for_each_watched_movieDict = []
        for i in range(len(temp)):
            sorted_movies_for_each_watched_movie = np.argsort(temp[i])
            sorted_movies_for_each_watched_movieDict.append(
                sorted_movies_for_each_watched_movie.tolist()[:10])

        sortedMoviesRavel = [
            item for sublist in sorted_movies_for_each_watched_movieDict
            for item in sublist
        ]
        freq = {}
        for i in range(len(sorted_movies_for_each_watched_movieDict)):
            for j in range(len(sorted_movies_for_each_watched_movieDict[0])):
                freq[sorted_movies_for_each_watched_movieDict[i][j]] = 0

        for i in range(len(sorted_movies_for_each_watched_movieDict)):
            for j in range(len(sorted_movies_for_each_watched_movieDict[0])):
                freq[sorted_movies_for_each_watched_movieDict[i][j]] += (10 -
                                                                         j)

        freq = OrderedDict(
            sorted(freq.items(), reverse=True, key=lambda t: t[1]))
        freq = freq.items()
        movieFreq = {}
        for i in range(len(freq)):
            index = freq[i][0]
            movieFreq[y[index][0]] = freq[i][1]

        recommendedMovies = []
        for i in range(5):
            index = freq[i][0]
            recommendedMovies.append(y[index][0])
        print recommendedMovies
        relevant = []

        print('Do you want to continue? Enter Y for yes and N for No')
        choice = raw_input()
        while choice not in ['y', 'n']:
            print('invalid input')
            choice = input()
Ejemplo n.º 27
0
def create_features(XX, tmin, tmax,
                    sfreq, tmin_original=-0.5,
                    perform_baseline_correction=True,
                    plot_name=""):
    """
    Creation of the feature space.

    - restricting the time window of MEG data to [tmin, tmax]sec.
    - Concatenating the 306 timeseries of each trial in one long
      vector.
    - Normalizing each feature independently (z-scoring).

    - optional: "baseline correction", a data centering concept often
                used in M/EEG, will calculate a mean value per sensor
                from pre-stimulus measurements, and subtract this from
                the relevant measurement. Replaces centering based on
                post-stimulus data

    Returns a feature vector XX,

    """
    print("Applying the desired time window and dropping sensors.")
    lower_limit = 240
    XX = XX[:, lower_limit:, :]
    # instead of post-stimulus centering
    baseline = XX[..., :125].mean(-1)

    beginning = np.round((tmin - tmin_original) * sfreq).astype(np.int)
    end = np.round((tmax - tmin_original) * sfreq).astype(np.int)
    XX = XX[:, :, beginning:end].copy()
    XX /= np.linalg.norm(XX, axis=2)[..., np.newaxis]

    #Assuming 250Hz == fs, 125Hz == fs/2, 50Hz = 50/125 = .4
    #5 Hz bw = 5/125 = .04
    print("Applying notch filter for powerline.")
    bw = .04
    freq = .4
    b, a = notch(freq, bw)
    XX = sig.lfilter(b, a, XX)

    #Assuming 250Hz == fs, 125Hz == fs/2, 50Hz = 10/125 = .08
    #5 Hz bw = 5/125 = .04
    print("Applying filter for alpha wave.")
    bw = .04
    freq = .08
    b, a = notch(freq, bw)
    XX = sig.lfilter(b, a, XX)

    XX -= baseline[..., np.newaxis]

    print("CP-ALS Decomposition.")
    T = dtensor(XX)
    P, fit, itr, exectimes = cp_als(T, 2, init='nvecs')
    #P, fit, itr, exectimes = cp_als(T, 8, init='random')
    proj = P.U[2]
    fproj = np.abs(np.fft.fft(proj, axis=0))[:XX.shape[-1] // 2, :]

    plt.figure()
    plt.plot(proj)
    plt.title(plot_name)

    print("Projecting.")
    XX = np.dot(XX, proj)

    print("New shape is %sx%sx%s" % XX.shape)

    print("2D Reshaping: concatenating all 306 timeseries.")
    XX = XX.reshape(XX.shape[0], XX.shape[1] * XX.shape[2])

    print("Features Normalization.")
    XX -= XX.mean(0)
    XX = np.nan_to_num(XX / XX.std(0))
    return XX
def CPDecomposition(tensor, rank):
    T = dtensor(tensor)
    # Decompose tensor using CP-ALS
    P, fit, itr, exectimes = cp_als(T, rank, init='random')
    u = P.U
    return u
Ejemplo n.º 29
0
def task1cFunc(userid):

    arrayofvalues = []

    for i in movieList:
        if i in movieRatingDict:
            if i in movieGenreDict:
                movieTags = movieGenreDict[i]
                rList = movieRatingDict[i]
                for j in movieTags:
                    for k in rList:
                        mIndex = movieListDictInverse[i]
                        gIndex = genreListDictInverse[j]
                        rIndex = ratingListDictInverse[k]
                        avgRatingValue = avgRatingDict[i][0]
                        if k >= avgRatingValue:
                            T[mIndex, gIndex, rIndex] = 1
                            arrayofvalues.append([mIndex, gIndex, rIndex])
                        else:
                            T[mIndex, gIndex, rIndex] = 0

    # building the tensor using sktensor
    tensor = dtensor(T)

    # applying CP-decomposition with ALS(Alternating Least Squares)
    U, fit, itr, exectimes, P = cp_als(tensor, 5, init='random')

    latent_semantics_movie = pd.DataFrame(
        columns=['movie', 'ls1', 'ls2', 'ls3', 'ls4', 'ls5'])
    latent_semantics_movie['movie'] = movieList
    latent_semantics_movie['ls1'] = U[0][:, 0]
    latent_semantics_movie['ls2'] = U[0][:, 1]
    latent_semantics_movie['ls3'] = U[0][:, 2]
    latent_semantics_movie['ls4'] = U[0][:, 3]
    latent_semantics_movie['ls5'] = U[0][:, 4]

    x = latent_semantics_movie.loc[latent_semantics_movie['movie'].isin(
        tagRatingUserMovies)].values
    for i in range(len(x)):
        for j in range(1, len(x[0])):
            x[i][j] = x[i][j] * timeWeights.get(x[i][0])
    y = latent_semantics_movie.loc[latent_semantics_movie['movie'].isin(
        movieNotWatched)].values

    cossim = cosine_similarity(x[:, 1:], y[:, 1:])
    simDF = pd.DataFrame(cossim,
                         index=tagRatingUserMovies,
                         columns=movieNotWatched)
    simDF.to_csv('cos.csv')

    temp = simDF.values.tolist()
    sorted_movies_for_each_watched_movieDict = []
    for i in range(len(temp)):
        sorted_movies_for_each_watched_movie = np.argsort(temp[i])
        sorted_movies_for_each_watched_movieDict.append(
            sorted_movies_for_each_watched_movie.tolist()[:10])

    sortedMoviesRavel = [
        item for sublist in sorted_movies_for_each_watched_movieDict
        for item in sublist
    ]
    freq = {}
    for i in range(len(sorted_movies_for_each_watched_movieDict)):
        for j in range(len(sorted_movies_for_each_watched_movieDict[0])):
            freq[sorted_movies_for_each_watched_movieDict[i][j]] = 0

    for i in range(len(sorted_movies_for_each_watched_movieDict)):
        for j in range(len(sorted_movies_for_each_watched_movieDict[0])):
            freq[sorted_movies_for_each_watched_movieDict[i][j]] += (10 - j)

    freq = OrderedDict(sorted(freq.items(), reverse=True, key=lambda t: t[1]))
    freq = freq.items()
    movieFreq = {}
    for i in range(len(freq)):
        index = freq[i][0]
        movieFreq[y[index][0]] = freq[i][1]

    recommendedMovies = []
    for i in range(5):
        index = freq[i][0]
        recommendedMovies.append(y[index][0])

    task2c(recommendedMovies, T)
Ejemplo n.º 30
0
def decompose_model(model_def_path, model_weights_path, layer_ranks):
    """ CREATE DECOMPOSED MODEL DEFINITION """

    with open(model_def_path) as f:
        model_def = caffe.proto.caffe_pb2.NetParameter()
        google.protobuf.text_format.Merge(f.read(), model_def)

    new_model_def = caffe.proto.caffe_pb2.NetParameter()
    new_model_def.name = model_def.name + '_decomposed'

    if model_def.input:
        new_model_def.input.extend(['data'])
        new_model_def.input_dim.extend(model_def.input_dim)

    new_layers = [
    ]  #Keeping track of new layers helps renaming nodes in the future

    for layer in model_def.layer:
        if layer.name not in layer_ranks.keys() or layer.type != 'Convolution':
            new_model_def.layer.extend([layer])
        else:
            decomposed_layer = decompose_layer(layer, layer_ranks[layer.name])
            for i in range(4):
                new_layers.append(decomposed_layer[i].name)
            new_model_def.layer.extend(decomposed_layer)

    #Rename bottom/top nodes for some layers !!!
    layer_index = len(new_model_def.layer)
    for i in range(layer_index):
        #Label Decomposed layers nodes
        if new_model_def.layer[i].name in new_layers:
            if i == 0:
                new_model_def.layer[i].bottom.extend(['data'])
            elif new_model_def.layer[i - 1].type == 'ReLU':
                new_model_def.layer[i].bottom.extend(
                    [new_model_def.layer[i - 2].name])
            elif new_model_def.layer[i - 1].type in ['Convolution', 'Pooling']:
                new_model_def.layer[i].bottom.extend(
                    [new_model_def.layer[i - 1].name])
            new_model_def.layer[i].top.extend([new_model_def.layer[i].name])
        #Rename Convolution layers nodes
        elif new_model_def.layer[i].type == 'Convolution':
            if new_model_def.layer[i - 2].name in new_layers:
                new_model_def.layer[i].bottom[0] = new_model_def.layer[i -
                                                                       2].name
        #Rename ReLU layers nodes
        elif new_model_def.layer[i].type == 'ReLU':
            if new_model_def.layer[i - 1].name in new_layers:
                new_model_def.layer[i].bottom[0] = new_model_def.layer[i -
                                                                       1].name
                new_model_def.layer[i].top[0] = new_model_def.layer[i - 1].name
        #Rename Pooling layers nodes
        elif new_model_def.layer[i].type == 'Pooling':
            if new_model_def.layer[i - 2].name in new_layers:
                new_model_def.layer[i].bottom[0] = new_model_def.layer[i -
                                                                       2].name

    new_model_def_path = model_def_path[:-9] + '_decomposed.prototxt'
    with open(new_model_def_path, 'w') as f:
        google.protobuf.text_format.PrintMessage(new_model_def, f)
    """ CREATE DECOMPOSED MODEL WEIGHTS """

    net = caffe.Net(model_def_path, model_weights_path, caffe.TEST)
    decomposed_net = caffe.Net(new_model_def_path, model_weights_path,
                               caffe.TEST)

    layers = net.params.items()
    net_dict = {}

    for name, params in layers:
        net_dict[name] = {
            'weights': params[0].data,
            'bias': params[1].data,
        }

    log_dict = {}  #Log containing fit, n_itr and exectimes for each layer

    for conv_layer in layer_ranks.keys():
        log_dict[conv_layer] = {}
        rank = layer_ranks[conv_layer]
        T = dtensor(net_dict[conv_layer]['weights'])
        print('\nDecomposing %s...' % conv_layer)
        P, fit, n_itr, exectimes = cp_als(T, rank, init='random')
        print('Reconstruction: %f%%' % (fit * 100))
        print('Elapsed time: %.2fs' % sum(exectimes))

        log_dict[conv_layer]['fit'] = fit[0]
        log_dict[conv_layer]['n_itr'] = n_itr
        log_dict[conv_layer]['exectime'] = sum(exectimes)

        num_output = net_dict[conv_layer]['weights'].shape[0]
        channels = net_dict[conv_layer]['weights'].shape[1]
        kernel_size = net_dict[conv_layer]['weights'].shape[2]
        bias = net_dict[conv_layer]['bias']

        P_x = (P.U[3] * P.lmbda).T
        P_y = P.U[2].T
        P_c = P.U[1].T
        P_n = P.U[0]

        P_x = np.reshape(P_x, [rank, 1, 1, kernel_size]).astype(np.float32)
        P_y = np.reshape(P_y, [rank, 1, kernel_size, 1]).astype(np.float32)
        P_c = np.reshape(P_c, [rank, channels, 1, 1]).astype(np.float32)
        P_n = np.reshape(P_n, [num_output, rank, 1, 1]).astype(np.float32)

        np.copyto(decomposed_net.params[conv_layer + '_x'][0].data, P_x)
        np.copyto(decomposed_net.params[conv_layer + '_y'][0].data, P_y)
        np.copyto(decomposed_net.params[conv_layer + '_c'][0].data, P_c)
        np.copyto(decomposed_net.params[conv_layer + '_n'][0].data, P_n)
        np.copyto(decomposed_net.params[conv_layer + '_n'][1].data, bias)

    new_model_weights_path = model_weights_path[:-11] + '_decomposed.caffemodel'
    decomposed_net.save(new_model_weights_path)

    return log_dict, [new_model_def_path, new_model_weights_path]
Ejemplo n.º 31
0
import logging
import numpy as np
from scipy.io.matlab import loadmat
from sktensor import dtensor, cp_als

# Set logging to DEBUG to see CP-ALS information
logging.basicConfig(level=logging.DEBUG)

# Load Matlab data and convert it to dense tensor format
# mat = loadmat('/Users/monty/Julia/dNTF.py/brod.mat')

X = np.zeros((2, 10, 5))
X[:, 0:2, 0] = 1
X[:, 2:4, 1] = 1
X[:, 4:6, 2] = 1
X[:, 6:8, 3] = 1
X[:, 8:10, 4] = 1

T = dtensor(X)

# Decompose tensor using CP-ALS
P, fit, itr, exectimes = cp_als(T, 4, init='random')
Ejemplo n.º 32
0
for i in range(len(year)):
    for j in range(len(movie)):
        if ((merged['year'] == year[i]) &
            (merged['moviename'] == movie[j])).any():
            for k in range(len(actor)):
                if ((merged['moviename'] == movie[j]) &
                    (merged['name'] == actor[k])).any():
                    T[i, j, k] = 1
                else:
                    T[i, j, k] = 0

tensor = dtensor(T)

# Decompose tensor using CP-ALS
U, fit, itr, exectimes = cp_als(tensor, 5, init='random')
print U

# Latent Semantics for Year
latent_semantics_year = pd.DataFrame(
    columns=['year', 'ls1', 'ls2', 'ls3', 'ls4', 'ls5'])
latent_semantics_year['year'] = year
latent_semantics_year['ls1'] = U[0][:, 0]
latent_semantics_year['ls2'] = U[0][:, 1]
latent_semantics_year['ls3'] = U[0][:, 2]
latent_semantics_year['ls4'] = U[0][:, 3]
latent_semantics_year['ls5'] = U[0][:, 4]

print 'Latent Semantic for Year sorted by LS1'
ls1 = latent_semantics_year.sort_values(by='ls1', ascending=False)
print ls1
Ejemplo n.º 33
0
    for j in range(0, 192, 2):
        X[i][j][:] = faketnsr[i][k][:]
        X[i][j + 1][:] = realtnsr[i][k][:]
        k = k + 1

print('K is:', k)
print(X[0].shape)
print('Densifying X tensor..')
T1 = dtensor(X)
print('Shape of tensor:', tf.shape(T1))

rnk = 45
print('Rank is:', rnk)
print(T1.shape[0], T1.shape[1], T1.shape[2])
print('CP decomposition for tensor..')
P1, fit1, itr1, exectimes1 = cp_als(T1, rnk, init='random')

print('End of 1st Decomposition')
X_train = P1.U[1]

print((P1.U[0]).shape, (P1.U[1]).shape, (P1.U[2]).shape)
print('Shape of decomposed array:', X_train.shape)

print('Creating label array, 1 means fake, 0 means real..')

y_train = []
for i in range(96):
    y_train.append(1)
    y_train.append(0)

y_test = []
Ejemplo n.º 34
0
def fast_conv(weights, img, iter, w_shp, size, N_i, N_j, rank):

    # Define parameters
    (F, C, X, Y) = w_shp
    (N, C, H, W) = size

    # Instantiate 4D tensor for input
    input = T.tensor4(name='input')

    # Initialize shared variable for weights.
    weights = weights.eval().transpose(1, 2, 3, 0)

    # Rows Clustering
    kmeans_rows = KMeans(init='k-means++', n_clusters=N_i , n_init=10)
    W_C = np.reshape(weights, (C, X*Y*F))
    kmeans_rows.fit(W_C)
    [clusters_rows, pos_c] = map(list, zip(*sorted(zip(kmeans_rows.labels_, range(C)), key=lambda item:item[0])))
    new_W_C = W_C[pos_c, :]

    # Cols Clustering
    kmeans_cols = KMeans(init='k-means++', n_clusters=N_j , n_init=10)
    W_F = np.reshape(new_W_C, (C*X*Y, F))
    kmeans_cols.fit(W_F.T)
    [clusters_cols, pos_f] = map(list, zip(*sorted(zip(kmeans_cols.labels_, range(F)), key=lambda item:item[0])))
    new_W_F = np.reshape(W_F[:, pos_f], (C, X, Y, F))

    # Breakdown into cubes
    counter_cols = Counter(clusters_cols)
    counter_rows = Counter(clusters_rows)

    c_sum = 0
    D_CF = []
    for i, c in enumerate(list(counter_rows.values())):
        f_sum = 0
        D_F = []
        for j, f in enumerate(list(counter_cols.values())):

            # Crop cluster
            W_tensor =  dtensor(new_W_F[c_sum:c+c_sum, :, :, f_sum:f+f_sum])

            # Apply CP-Decomposition on the clustered weight tensor
            P, fit, itr, exectimes = cp_als(W_tensor, rank, init='random')
            D_F.append(P)
            f_sum += f
        D_CF.append(D_F)
        c_sum += c

    # Compute Convolution
    conv_out = theano.shared(np.zeros((N, F, H-X+1, W-Y+1)), name='out')
    c_sum = 0
    for i, c in enumerate(list(counter_rows.values())):
        f_sum = 0
        for j, f in enumerate(list(counter_cols.values())):

            # Crop cluster
            f_out = pos_f[f_sum:f+f_sum]
            c_in = pos_c[c_sum:c+c_sum]

            # Apply CP-Decomposition on the clustered weight tensor
            P = D_CF[i][j]
            for k in range(rank):

                # Create shared variables
                T_C = theano.shared(np.reshape(P.U[0][:,k], (1, c, 1, 1)), name='C_{0}_{1}_{2}'.format(k, i, j))
                T_X = theano.shared(np.reshape(P.U[1][:,k], (1, 1, X, 1)), name='X_{0}_{1}_{2}'.format(k, i, j))
                T_Y = theano.shared(np.reshape(P.U[2][:,k], (1, 1, 1, Y)), name='Y_{0}_{1}_{2}'.format(k, i, j))
                T_F = theano.shared(np.reshape(P.U[3][:,k], (f, 1, 1, 1)), name='F_{0}_{1}_{2}'.format(k, i, j))

                # Apply convolution on each dimension individually
                conv_C = conv.conv2d(input[:, c_in, :, :], T_C)
                conv_X = conv.conv2d(conv_C, T_X)
                conv_Y = conv.conv2d(conv_X, T_Y)
                conv_F = conv.conv2d(conv_Y, T_F)
                if f == 1:
                    conv_out = T.set_subtensor(conv_out[:, f_out[0], :, :], np.add(conv_out[:, f_out[0], :, :], conv_F[:, 0, :, :]))
                else:
                    conv_out = T.set_subtensor(conv_out[:, f_out, :, :], np.add(conv_out[:, f_out, :, :], conv_F))
            f_sum += f
        c_sum += c

    # Map Theano function
    f = theano.function([input], conv_out, profile=False)

    # Execute Theano function
    times = []
    for i in range(iter):
        start = time.time()
        filtered_img = f(img)
        done = time.time()
        times.append(done-start)
    avg1 = np.mean(times)
    return filtered_img, avg1